From 6129e9e635d2a3bc4194f37f2d51ae4209f01191 Mon Sep 17 00:00:00 2001 From: Dries Schaumont <5946712+DriesSchaumont@users.noreply.github.com> Date: Wed, 31 Jan 2024 08:51:24 +0100 Subject: [PATCH 1/5] log1p: fix input layer argument being unused. (#678) * log1p: fix input layer argument being unused. * Add CHANGELOG entry --- CHANGELOG.md | 6 ++++ src/transform/log1p/run_test.py | 57 +++++++++++++++++++++++++++------ src/transform/log1p/script.py | 25 +++++++++++---- 3 files changed, 72 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 915c0178b35..0f7a08c43b1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +# openpipelines 0.12.4 + +## BUG FIXES + +* `transform/log1p`: fix `--input_layer` argument not functionning (PR #678). + # openpipelines 0.12.3 ## BUG FIXES diff --git a/src/transform/log1p/run_test.py b/src/transform/log1p/run_test.py index 1de97d01bde..d8f416bf48f 100644 --- a/src/transform/log1p/run_test.py +++ b/src/transform/log1p/run_test.py @@ -1,16 +1,19 @@ from os import path import mudata as mu import numpy as np +import scanpy as sc +import pandas as pd import sys import pytest import sys +import uuid from operator import attrgetter ## VIASH START meta = { 'functionality_name': 'lognorm', 'resources_dir': 'resources_test/', - 'config': '/home/di/code/openpipeline/src/transform/log1p/config.vsh.yaml', + 'config': './src/transform/log1p/config.vsh.yaml', 'executable': "../../target/docker/transform/log1p/log1p" } @@ -18,12 +21,29 @@ ## VIASH END @pytest.fixture -def input_path(): - return f"{meta['resources_dir']}/pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu" +def input_data(): + return mu.read_h5mu(f"{meta['resources_dir']}/pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu").copy() + +@pytest.fixture +def random_h5mu_path(tmp_path): + def wrapper(): + unique_filename = f"{str(uuid.uuid4())}.h5mu" + temp_file = tmp_path / unique_filename + return temp_file + return wrapper @pytest.mark.parametrize("output_layer", [None, "log_normalized"]) -def test_1logp(run_component, input_path, output_layer): - output = "output.h5mu" +@pytest.mark.parametrize("input_layer", [None, "normalized"]) +def test_1logp(run_component, input_data, output_layer, input_layer, random_h5mu_path): + output = random_h5mu_path() + if input_layer: + mod = input_data.mod["rna"] + mod.layers[input_layer] = mod.X.copy() + # Overwrite the original layer to make sure + # it is not accidentally used as input layer. + mod.X[:] = 0 + input_path = random_h5mu_path() + input_data.write(input_path) run_args = [ "--input", input_path, "--output", output, @@ -31,6 +51,8 @@ def test_1logp(run_component, input_path, output_layer): ] if output_layer: run_args.extend(["--output_layer", output_layer]) + if input_layer: + run_args.extend(["--input_layer", input_layer]) run_component(run_args) get_output_layer = attrgetter("X") if not output_layer else lambda x: getattr(x, 'layers')[output_layer] @@ -49,16 +71,31 @@ def test_1logp(run_component, input_path, output_layer): assert rna_in.shape == rna_out.shape, "Should have same shape as before" assert prot_in.shape == prot_out.shape, "Should have same shape as before" + input_layer_data = rna_in.X if not input_layer else rna_in.layers[input_layer] + assert np.mean(input_layer_data) != np.mean(get_output_layer(rna_out)), "Expression should have changed" - assert np.mean(rna_in.X) != np.mean(get_output_layer(rna_out)), "Expression should have changed" - - nz_row, nz_col = rna_in.X.nonzero() - row_corr = np.corrcoef(rna_in.X[nz_row[0],:].toarray().flatten(), get_output_layer(rna_out)[nz_row[0],:].toarray().flatten())[0,1] - col_corr = np.corrcoef(rna_in.X[:,nz_col[0]].toarray().flatten(), get_output_layer(rna_out)[:,nz_col[0]].toarray().flatten())[0,1] + nz_row, nz_col = input_layer_data.nonzero() + row_corr = np.corrcoef(input_layer_data[nz_row[0],:].toarray().flatten(), + get_output_layer(rna_out)[nz_row[0],:].toarray().flatten())[0,1] + col_corr = np.corrcoef(input_layer_data[:,nz_col[0]].toarray().flatten(), + get_output_layer(rna_out)[:,nz_col[0]].toarray().flatten())[0,1] assert row_corr > .1 assert col_corr > .1 assert 'log1p' in rna_out.uns + # Make sure that the original input layer has not been overwritten + layers_to_test = [None] + list(rna_in.layers.keys()) + for layer in layers_to_test: + if layer != output_layer: + in_data = sc.get.var_df(rna_in, + keys=rna_in.obs_names.to_list(), + layer=layer) + out_data = sc.get.var_df(rna_out, + keys=rna_in.obs_names.to_list(), + layer=layer) + pd.testing.assert_frame_equal(in_data, out_data) + + if __name__ == '__main__': sys.exit(pytest.main([__file__])) \ No newline at end of file diff --git a/src/transform/log1p/script.py b/src/transform/log1p/script.py index 44e53caa159..69c10ae400f 100644 --- a/src/transform/log1p/script.py +++ b/src/transform/log1p/script.py @@ -1,5 +1,6 @@ import scanpy as sc import mudata as mu +import anndata as ad import sys ## VIASH START @@ -39,12 +40,24 @@ def setup_logger(): mod = par["modality"] logger.info("Performing log transformation on modality %s", mod) data = mdata.mod[mod] -new_layer = sc.pp.log1p(data, - base=par["base"], - copy=True if par['output_layer'] else False) -if new_layer: - data.layers[par['output_layer']] = new_layer.X - data.uns['log1p'] = new_layer.uns['log1p'] + +# Make our own copy with not a lot of data +# this avoid excessive memory usage and accidental overwrites +input_layer = data.layers[par["input_layer"]] \ + if par["input_layer"] else data.X +data_for_scanpy = ad.AnnData(X=input_layer.copy()) +sc.pp.log1p(data_for_scanpy, + base=par["base"], + layer=None, # use X + copy=False) # allow overwrites in the copy that was made + +# Scanpy will overwrite the input layer. +# So fetch input layer from the copy and use it to populate the output slot +if par["output_layer"]: + data.layers[par["output_layer"]] = data_for_scanpy.X +else: + data.X = data_for_scanpy.X +data.uns['log1p'] = data_for_scanpy.uns['log1p'].copy() logger.info("Writing to file %s", par["output"]) mdata.write_h5mu(filename=par["output"], compression=par["output_compression"]) From 64a9ed5bfbbc7f8c638be2d1f1d54171f460807f Mon Sep 17 00:00:00 2001 From: DriesSchaumont <5946712+DriesSchaumont@users.noreply.github.com> Date: Wed, 31 Jan 2024 10:03:32 +0100 Subject: [PATCH 2/5] Remove target folder --- target/docker/annotate/popv/.config.vsh.yaml | 346 - target/docker/annotate/popv/popv | 1401 ---- target/docker/annotate/popv/setup_logger.py | 12 - target/docker/cluster/leiden/.config.vsh.yaml | 219 - target/docker/cluster/leiden/leiden | 1115 --- target/docker/cluster/leiden/setup_logger.py | 12 - .../compress_h5mu/.config.vsh.yaml | 167 - .../compression/compress_h5mu/compress_h5mu | 1008 --- .../compress_h5mu/compress_h5mu.py | 49 - .../compression/tar_extract/.config.vsh.yaml | 106 - .../compression/tar_extract/tar_extract | 978 --- .../from_10xh5_to_h5mu/.config.vsh.yaml | 272 - .../from_10xh5_to_h5mu/from_10xh5_to_h5mu | 1117 --- .../from_10xh5_to_h5mu/setup_logger.py | 12 - .../from_10xmtx_to_h5mu/.config.vsh.yaml | 166 - .../from_10xmtx_to_h5mu/from_10xmtx_to_h5mu | 989 --- .../from_10xmtx_to_h5mu/setup_logger.py | 12 - .../.config.vsh.yaml | 159 - .../from_bd_to_10x_molecular_barcode_tags | 1030 --- .../from_bdrhap_to_h5mu/.config.vsh.yaml | 181 - .../from_bdrhap_to_h5mu/from_bdrhap_to_h5mu | 1215 --- .../.config.vsh.yaml | 190 - .../from_cellranger_multi_to_h5mu | 1166 --- .../setup_logger.py | 12 - .../from_h5ad_to_h5mu/.config.vsh.yaml | 177 - .../from_h5ad_to_h5mu/from_h5ad_to_h5mu | 1056 --- .../convert/from_h5ad_to_h5mu/setup_logger.py | 12 - .../from_h5mu_to_h5ad/.config.vsh.yaml | 182 - .../from_h5mu_to_h5ad/from_h5mu_to_h5ad | 1009 --- .../convert/from_h5mu_to_h5ad/setup_logger.py | 12 - .../convert/velocyto_to_h5mu/.config.vsh.yaml | 255 - .../convert/velocyto_to_h5mu/velocyto_to_h5mu | 1086 --- .../.config.vsh.yaml | 637 -- .../cellbender_remove_background | 2153 ------ .../setup_logger.py | 12 - .../.config.vsh.yaml | 406 - .../cellbender_remove_background_v0_2 | 1629 ----- .../helper.py | 143 - .../setup_logger.py | 12 - .../docker/dataflow/concat/.config.vsh.yaml | 222 - target/docker/dataflow/concat/concat | 1386 ---- target/docker/dataflow/concat/setup_logger.py | 12 - target/docker/dataflow/merge/.config.vsh.yaml | 175 - target/docker/dataflow/merge/merge | 1051 --- target/docker/dataflow/merge/setup_logger.py | 12 - .../split_modalities/.config.vsh.yaml | 214 - .../dataflow/split_modalities/setup_logger.py | 12 - .../split_modalities/split_modalities | 1065 --- .../docker/demux/bcl2fastq/.config.vsh.yaml | 169 - target/docker/demux/bcl2fastq/bcl2fastq | 1028 --- .../docker/demux/bcl_convert/.config.vsh.yaml | 189 - target/docker/demux/bcl_convert/bcl_convert | 1033 --- .../demux/cellranger_mkfastq/.config.vsh.yaml | 207 - .../cellranger_mkfastq/cellranger_mkfastq | 1026 --- .../demux/cellranger_mkfastq/setup_logger.py | 12 - target/docker/dimred/pca/.config.vsh.yaml | 253 - target/docker/dimred/pca/pca | 1188 --- target/docker/dimred/pca/setup_logger.py | 12 - target/docker/dimred/umap/.config.vsh.yaml | 312 - target/docker/dimred/umap/setup_logger.py | 12 - target/docker/dimred/umap/umap | 1306 ---- .../download/download_file/.config.vsh.yaml | 138 - .../download/download_file/download_file | 931 --- .../sync_test_resources/.config.vsh.yaml | 170 - .../sync_test_resources/sync_test_resources | 1018 --- .../docker/files/make_params/.config.vsh.yaml | 220 - target/docker/files/make_params/make_params | 1100 --- .../filter/delimit_fraction/.config.vsh.yaml | 241 - .../filter/delimit_fraction/delimit_fraction | 1207 --- .../filter/delimit_fraction/setup_logger.py | 12 - .../docker/filter/do_filter/.config.vsh.yaml | 202 - target/docker/filter/do_filter/do_filter | 1056 --- .../docker/filter/do_filter/setup_logger.py | 12 - .../filter_with_counts/.config.vsh.yaml | 295 - .../filter_with_counts/filter_with_counts | 1241 ---- .../filter/filter_with_counts/setup_logger.py | 12 - .../filter/filter_with_hvg/.config.vsh.yaml | 352 - .../filter/filter_with_hvg/filter_with_hvg | 1407 ---- .../filter/filter_with_hvg/setup_logger.py | 12 - .../filter_with_scrublet/.config.vsh.yaml | 304 - .../filter_with_scrublet/filter_with_scrublet | 1260 ---- .../filter_with_scrublet/setup_logger.py | 12 - .../filter/remove_modality/.config.vsh.yaml | 171 - .../filter/remove_modality/remove_modality | 972 --- .../filter/subset_h5mu/.config.vsh.yaml | 187 - .../docker/filter/subset_h5mu/setup_logger.py | 12 - target/docker/filter/subset_h5mu/subset_h5mu | 994 --- .../integrate/harmonypy/.config.vsh.yaml | 240 - target/docker/integrate/harmonypy/harmonypy | 1099 --- .../integrate/scanorama/.config.vsh.yaml | 283 - target/docker/integrate/scanorama/scanorama | 1187 --- .../integrate/scarches/.config.vsh.yaml | 331 - target/docker/integrate/scarches/scarches | 1568 ---- .../docker/integrate/scarches/setup_logger.py | 12 - target/docker/integrate/scvi/.config.vsh.yaml | 591 -- target/docker/integrate/scvi/scvi | 1912 ----- target/docker/integrate/scvi/subset_vars.py | 16 - .../docker/integrate/totalvi/.config.vsh.yaml | 348 - .../docker/integrate/totalvi/setup_logger.py | 12 - target/docker/integrate/totalvi/totalvi | 1479 ---- .../run_cellxgene/.config.vsh.yaml | 83 - .../interactive/run_cellxgene/run_cellxgene | 901 --- .../run_cirrocumulus/.config.vsh.yaml | 85 - .../run_cirrocumulus/run_cirrocumulus | 901 --- .../docker/interpret/lianapy/.config.vsh.yaml | 313 - target/docker/interpret/lianapy/lianapy | 1259 ---- .../labels_transfer/knn/.config.vsh.yaml | 379 - target/docker/labels_transfer/knn/helper.py | 32 - target/docker/labels_transfer/knn/knn | 1258 ---- .../labels_transfer/knn/setup_logger.py | 12 - .../labels_transfer/xgboost/.config.vsh.yaml | 594 -- .../docker/labels_transfer/xgboost/helper.py | 32 - .../labels_transfer/xgboost/setup_logger.py | 12 - target/docker/labels_transfer/xgboost/xgboost | 2013 ----- .../mapping/bd_rhapsody/.config.vsh.yaml | 417 -- target/docker/mapping/bd_rhapsody/bd_rhapsody | 1975 ----- .../rhapsody_targeted_1.10.1_nodocker.cwl | 5159 ------------- .../rhapsody_wta_1.10.1_nodocker.cwl | 5204 ------------- .../mapping/bd_rhapsody/setup_logger.py | 12 - .../mapping/cellranger_count/.config.vsh.yaml | 266 - .../mapping/cellranger_count/cellranger_count | 1206 --- .../cellranger_count_split/.config.vsh.yaml | 218 - .../cellranger_count_split | 1090 --- .../mapping/cellranger_multi/.config.vsh.yaml | 423 -- .../mapping/cellranger_multi/cellranger_multi | 1681 ----- .../mapping/cellranger_multi/setup_logger.py | 12 - .../mapping/htseq_count/.config.vsh.yaml | 418 -- target/docker/mapping/htseq_count/htseq_count | 1608 ---- .../htseq_count_to_h5mu/.config.vsh.yaml | 209 - .../htseq_count_to_h5mu/htseq_count_to_h5mu | 1151 --- .../mapping/multi_star/.config.vsh.yaml | 3080 -------- target/docker/mapping/multi_star/multi_star | 6362 ---------------- .../multi_star_to_h5mu/.config.vsh.yaml | 179 - .../multi_star_to_h5mu/multi_star_to_h5mu | 1017 --- .../mapping/samtools_sort/.config.vsh.yaml | 270 - .../mapping/samtools_sort/samtools_sort | 1185 --- .../mapping/star_align/.config.vsh.yaml | 2535 ------- .../docker/mapping/star_align/setup_logger.py | 12 - target/docker/mapping/star_align/star_align | 5713 --------------- .../mapping/star_align_v273a/.config.vsh.yaml | 2535 ------- .../mapping/star_align_v273a/setup_logger.py | 12 - .../mapping/star_align_v273a/star_align_v273a | 5713 --------------- .../star_build_reference/.config.vsh.yaml | 190 - .../star_build_reference/star_build_reference | 1175 --- .../docker/metadata/add_id/.config.vsh.yaml | 197 - target/docker/metadata/add_id/add_id | 1064 --- target/docker/metadata/add_id/setup_logger.py | 12 - .../grep_annotation_column/.config.vsh.yaml | 244 - .../grep_annotation_column | 1148 --- .../docker/metadata/join_csv/.config.vsh.yaml | 229 - target/docker/metadata/join_csv/join_csv | 1119 --- .../docker/metadata/join_csv/setup_logger.py | 12 - .../metadata/join_uns_to_obs/.config.vsh.yaml | 171 - .../metadata/join_uns_to_obs/join_uns_to_obs | 1035 --- .../metadata/join_uns_to_obs/setup_logger.py | 12 - .../move_obsm_to_obs/.config.vsh.yaml | 192 - .../move_obsm_to_obs/move_obsm_to_obs | 1054 --- .../metadata/move_obsm_to_obs/setup_logger.py | 12 - .../docker/neighbors/bbknn/.config.vsh.yaml | 289 - target/docker/neighbors/bbknn/bbknn | 1184 --- .../neighbors/find_neighbors/.config.vsh.yaml | 309 - .../neighbors/find_neighbors/find_neighbors | 1208 --- .../neighbors/find_neighbors/setup_logger.py | 12 - .../filter_10xh5/.config.vsh.yaml | 195 - .../process_10xh5/filter_10xh5/filter_10xh5 | 1089 --- .../qc/calculate_qc_metrics/.config.vsh.yaml | 235 - .../calculate_qc_metrics/calculate_qc_metrics | 1211 --- .../qc/calculate_qc_metrics/setup_logger.py | 12 - target/docker/qc/fastqc/.config.vsh.yaml | 156 - target/docker/qc/fastqc/fastqc | 994 --- target/docker/qc/multiqc/.config.vsh.yaml | 140 - target/docker/qc/multiqc/multiqc | 959 --- .../query/cellxgene_census/.config.vsh.yaml | 260 - .../query/cellxgene_census/cellxgene_census | 1223 ---- .../query/cellxgene_census/setup_logger.py | 12 - .../build_bdrhap_reference/.config.vsh.yaml | 186 - .../build_bdrhap_reference | 972 --- .../.config.vsh.yaml | 187 - .../build_cellranger_reference | 977 --- .../reference/make_reference/.config.vsh.yaml | 212 - .../reference/make_reference/make_reference | 1076 --- target/docker/report/mermaid/.config.vsh.yaml | 185 - target/docker/report/mermaid/mermaid | 1029 --- .../report/mermaid/puppeteer-config.json | 6 - .../docker/transfer/publish/.config.vsh.yaml | 125 - target/docker/transfer/publish/publish | 919 --- target/docker/transform/clr/.config.vsh.yaml | 188 - target/docker/transform/clr/clr | 1005 --- .../transform/delete_layer/.config.vsh.yaml | 196 - .../transform/delete_layer/compress_h5mu.py | 49 - .../transform/delete_layer/delete_layer | 1122 --- .../transform/delete_layer/setup_logger.py | 12 - .../docker/transform/log1p/.config.vsh.yaml | 225 - target/docker/transform/log1p/log1p | 1068 --- target/docker/transform/log1p/setup_logger.py | 12 - .../normalize_total/.config.vsh.yaml | 242 - .../transform/normalize_total/normalize_total | 1108 --- .../transform/normalize_total/setup_logger.py | 12 - .../transform/regress_out/.config.vsh.yaml | 195 - .../docker/transform/regress_out/regress_out | 1039 --- .../transform/regress_out/setup_logger.py | 12 - .../docker/transform/scale/.config.vsh.yaml | 205 - target/docker/transform/scale/scale | 1063 --- target/docker/transform/scale/setup_logger.py | 12 - .../docker/velocity/scvelo/.config.vsh.yaml | 276 - target/docker/velocity/scvelo/scvelo | 1272 ---- target/docker/velocity/scvelo/setup_logger.py | 12 - .../docker/velocity/velocyto/.config.vsh.yaml | 225 - target/docker/velocity/velocyto/velocyto | 1097 --- .../compress_h5mu/.config.vsh.yaml | 167 - .../compression/compress_h5mu/compress_h5mu | 537 -- .../compress_h5mu/compress_h5mu.py | 49 - .../compression/tar_extract/.config.vsh.yaml | 106 - .../compression/tar_extract/tar_extract | 514 -- .../native/dataflow/concat/.config.vsh.yaml | 222 - target/native/dataflow/concat/concat | 898 --- target/native/dataflow/concat/setup_logger.py | 12 - target/native/dataflow/merge/.config.vsh.yaml | 175 - target/native/dataflow/merge/merge | 563 -- target/native/dataflow/merge/setup_logger.py | 12 - .../split_modalities/.config.vsh.yaml | 214 - .../dataflow/split_modalities/setup_logger.py | 12 - .../split_modalities/split_modalities | 586 -- .../sync_test_resources/.config.vsh.yaml | 170 - .../sync_test_resources/sync_test_resources | 557 -- .../integrate/scarches/.config.vsh.yaml | 331 - target/native/integrate/scarches/scarches | 1086 --- .../native/integrate/scarches/setup_logger.py | 12 - .../native/integrate/totalvi/.config.vsh.yaml | 348 - .../native/integrate/totalvi/setup_logger.py | 12 - target/native/integrate/totalvi/totalvi | 985 --- .../labels_transfer/knn/.config.vsh.yaml | 379 - target/native/labels_transfer/knn/helper.py | 32 - target/native/labels_transfer/knn/knn | 773 -- .../labels_transfer/knn/setup_logger.py | 12 - .../labels_transfer/xgboost/.config.vsh.yaml | 594 -- .../native/labels_transfer/xgboost/helper.py | 32 - .../labels_transfer/xgboost/setup_logger.py | 12 - target/native/labels_transfer/xgboost/xgboost | 1520 ---- .../native/metadata/add_id/.config.vsh.yaml | 197 - target/native/metadata/add_id/add_id | 593 -- target/native/metadata/add_id/setup_logger.py | 12 - .../grep_annotation_column/.config.vsh.yaml | 244 - .../grep_annotation_column | 677 -- .../native/transform/scale/.config.vsh.yaml | 205 - target/native/transform/scale/scale | 592 -- target/native/transform/scale/setup_logger.py | 12 - .../native/velocity/scvelo/.config.vsh.yaml | 276 - target/native/velocity/scvelo/scvelo | 801 -- target/native/velocity/scvelo/setup_logger.py | 12 - .../native/velocity/velocyto/.config.vsh.yaml | 225 - target/native/velocity/velocyto/velocyto | 605 -- .../nextflow/annotate/popv/.config.vsh.yaml | 346 - target/nextflow/annotate/popv/main.nf | 2958 -------- target/nextflow/annotate/popv/nextflow.config | 108 - .../annotate/popv/nextflow_params.yaml | 25 - .../annotate/popv/nextflow_schema.json | 171 - target/nextflow/annotate/popv/setup_logger.py | 12 - .../nextflow/cluster/leiden/.config.vsh.yaml | 219 - target/nextflow/cluster/leiden/main.nf | 2631 ------- .../nextflow/cluster/leiden/nextflow.config | 108 - .../cluster/leiden/nextflow_params.yaml | 12 - .../cluster/leiden/nextflow_schema.json | 94 - .../nextflow/cluster/leiden/setup_logger.py | 12 - .../compress_h5mu/.config.vsh.yaml | 167 - .../compress_h5mu/compress_h5mu.py | 49 - .../compression/compress_h5mu/main.nf | 2596 ------- .../compression/compress_h5mu/nextflow.config | 108 - .../compress_h5mu/nextflow_params.yaml | 8 - .../compress_h5mu/nextflow_schema.json | 67 - .../from_10xh5_to_h5mu/.config.vsh.yaml | 272 - .../convert/from_10xh5_to_h5mu/main.nf | 2767 ------- .../from_10xh5_to_h5mu/nextflow.config | 108 - .../from_10xh5_to_h5mu/nextflow_params.yaml | 16 - .../from_10xh5_to_h5mu/nextflow_schema.json | 113 - .../from_10xh5_to_h5mu/setup_logger.py | 12 - .../from_10xmtx_to_h5mu/.config.vsh.yaml | 166 - .../convert/from_10xmtx_to_h5mu/main.nf | 2577 ------- .../from_10xmtx_to_h5mu/nextflow.config | 108 - .../from_10xmtx_to_h5mu/nextflow_params.yaml | 8 - .../from_10xmtx_to_h5mu/nextflow_schema.json | 66 - .../from_10xmtx_to_h5mu/setup_logger.py | 12 - .../.config.vsh.yaml | 159 - .../main.nf | 2586 ------- .../nextflow.config | 108 - .../nextflow_params.yaml | 9 - .../nextflow_schema.json | 71 - .../from_bdrhap_to_h5mu/.config.vsh.yaml | 181 - .../convert/from_bdrhap_to_h5mu/main.nf | 2801 ------- .../from_bdrhap_to_h5mu/nextflow.config | 108 - .../from_bdrhap_to_h5mu/nextflow_params.yaml | 11 - .../from_bdrhap_to_h5mu/nextflow_schema.json | 83 - .../.config.vsh.yaml | 190 - .../from_cellranger_multi_to_h5mu/main.nf | 2744 ------- .../nextflow.config | 108 - .../nextflow_params.yaml | 9 - .../nextflow_schema.json | 73 - .../setup_logger.py | 12 - .../from_h5ad_to_h5mu/.config.vsh.yaml | 177 - .../convert/from_h5ad_to_h5mu/main.nf | 2596 ------- .../convert/from_h5ad_to_h5mu/nextflow.config | 108 - .../from_h5ad_to_h5mu/nextflow_params.yaml | 9 - .../from_h5ad_to_h5mu/nextflow_schema.json | 74 - .../convert/from_h5ad_to_h5mu/setup_logger.py | 12 - .../from_h5mu_to_h5ad/.config.vsh.yaml | 182 - .../convert/from_h5mu_to_h5ad/main.nf | 2592 ------- .../convert/from_h5mu_to_h5ad/nextflow.config | 108 - .../from_h5mu_to_h5ad/nextflow_params.yaml | 9 - .../from_h5mu_to_h5ad/nextflow_schema.json | 75 - .../convert/from_h5mu_to_h5ad/setup_logger.py | 12 - .../convert/velocyto_to_h5mu/.config.vsh.yaml | 255 - .../nextflow/convert/velocyto_to_h5mu/main.nf | 2693 ------- .../convert/velocyto_to_h5mu/nextflow.config | 108 - .../velocyto_to_h5mu/nextflow_params.yaml | 15 - .../velocyto_to_h5mu/nextflow_schema.json | 111 - .../.config.vsh.yaml | 637 -- .../cellbender_remove_background/main.nf | 3212 -------- .../nextflow.config | 107 - .../nextflow_params.yaml | 51 - .../nextflow_schema.json | 355 - .../setup_logger.py | 12 - .../.config.vsh.yaml | 406 - .../helper.py | 143 - .../cellbender_remove_background_v0_2/main.nf | 2946 -------- .../nextflow.config | 107 - .../nextflow_params.yaml | 33 - .../nextflow_schema.json | 234 - .../setup_logger.py | 12 - .../nextflow/dataflow/concat/.config.vsh.yaml | 222 - target/nextflow/dataflow/concat/main.nf | 2911 -------- .../nextflow/dataflow/concat/nextflow.config | 108 - .../dataflow/concat/nextflow_params.yaml | 11 - .../dataflow/concat/nextflow_schema.json | 88 - .../nextflow/dataflow/concat/setup_logger.py | 12 - .../nextflow/dataflow/merge/.config.vsh.yaml | 175 - target/nextflow/dataflow/merge/main.nf | 2614 ------- .../nextflow/dataflow/merge/nextflow.config | 108 - .../dataflow/merge/nextflow_params.yaml | 8 - .../dataflow/merge/nextflow_schema.json | 67 - .../nextflow/dataflow/merge/setup_logger.py | 12 - .../split_modalities/.config.vsh.yaml | 214 - .../dataflow/split_modalities/main.nf | 2655 ------- .../dataflow/split_modalities/nextflow.config | 108 - .../split_modalities/nextflow_params.yaml | 10 - .../split_modalities/nextflow_schema.json | 81 - .../dataflow/split_modalities/setup_logger.py | 12 - .../nextflow/demux/bcl2fastq/.config.vsh.yaml | 169 - target/nextflow/demux/bcl2fastq/main.nf | 2548 ------- .../nextflow/demux/bcl2fastq/nextflow.config | 108 - .../demux/bcl2fastq/nextflow_params.yaml | 10 - .../demux/bcl2fastq/nextflow_schema.json | 78 - .../demux/bcl_convert/.config.vsh.yaml | 189 - target/nextflow/demux/bcl_convert/main.nf | 2574 ------- .../demux/bcl_convert/nextflow.config | 108 - .../demux/bcl_convert/nextflow_params.yaml | 10 - .../demux/bcl_convert/nextflow_schema.json | 78 - .../demux/cellranger_mkfastq/.config.vsh.yaml | 207 - .../nextflow/demux/cellranger_mkfastq/main.nf | 2646 ------- .../demux/cellranger_mkfastq/nextflow.config | 108 - .../cellranger_mkfastq/nextflow_params.yaml | 9 - .../cellranger_mkfastq/nextflow_schema.json | 71 - .../demux/cellranger_mkfastq/setup_logger.py | 12 - target/nextflow/dimred/pca/.config.vsh.yaml | 253 - target/nextflow/dimred/pca/main.nf | 2718 ------- target/nextflow/dimred/pca/nextflow.config | 108 - .../nextflow/dimred/pca/nextflow_params.yaml | 16 - .../nextflow/dimred/pca/nextflow_schema.json | 119 - target/nextflow/dimred/pca/setup_logger.py | 12 - target/nextflow/dimred/umap/.config.vsh.yaml | 312 - target/nextflow/dimred/umap/main.nf | 2778 ------- target/nextflow/dimred/umap/nextflow.config | 108 - .../nextflow/dimred/umap/nextflow_params.yaml | 23 - .../nextflow/dimred/umap/nextflow_schema.json | 164 - target/nextflow/dimred/umap/setup_logger.py | 12 - .../download/download_file/.config.vsh.yaml | 138 - .../nextflow/download/download_file/main.nf | 2492 ------- .../download/download_file/nextflow.config | 108 - .../download_file/nextflow_params.yaml | 8 - .../download_file/nextflow_schema.json | 65 - .../sync_test_resources/.config.vsh.yaml | 170 - .../download/sync_test_resources/main.nf | 2554 ------- .../sync_test_resources/nextflow.config | 108 - .../sync_test_resources/nextflow_params.yaml | 11 - .../sync_test_resources/nextflow_schema.json | 86 - .../files/make_params/.config.vsh.yaml | 220 - target/nextflow/files/make_params/main.nf | 2663 ------- .../files/make_params/nextflow.config | 108 - .../files/make_params/nextflow_params.yaml | 13 - .../files/make_params/nextflow_schema.json | 98 - .../filter/delimit_fraction/.config.vsh.yaml | 241 - .../nextflow/filter/delimit_fraction/main.nf | 2713 ------- .../filter/delimit_fraction/nextflow.config | 108 - .../delimit_fraction/nextflow_params.yaml | 18 - .../delimit_fraction/nextflow_schema.json | 127 - .../filter/delimit_fraction/setup_logger.py | 12 - .../filter/do_filter/.config.vsh.yaml | 202 - target/nextflow/filter/do_filter/main.nf | 2634 ------- .../nextflow/filter/do_filter/nextflow.config | 108 - .../filter/do_filter/nextflow_params.yaml | 11 - .../filter/do_filter/nextflow_schema.json | 85 - .../nextflow/filter/do_filter/setup_logger.py | 12 - .../filter_with_counts/.config.vsh.yaml | 295 - .../filter/filter_with_counts/main.nf | 2796 ------- .../filter/filter_with_counts/nextflow.config | 108 - .../filter_with_counts/nextflow_params.yaml | 22 - .../filter_with_counts/nextflow_schema.json | 152 - .../filter/filter_with_counts/setup_logger.py | 12 - .../filter/filter_with_hvg/.config.vsh.yaml | 352 - .../nextflow/filter/filter_with_hvg/main.nf | 2856 -------- .../filter/filter_with_hvg/nextflow.config | 108 - .../filter_with_hvg/nextflow_params.yaml | 22 - .../filter_with_hvg/nextflow_schema.json | 162 - .../filter/filter_with_hvg/setup_logger.py | 12 - .../filter_with_scrublet/.config.vsh.yaml | 304 - .../filter/filter_with_scrublet/main.nf | 2769 ------- .../filter_with_scrublet/nextflow.config | 108 - .../filter_with_scrublet/nextflow_params.yaml | 18 - .../filter_with_scrublet/nextflow_schema.json | 136 - .../filter_with_scrublet/setup_logger.py | 12 - .../filter/remove_modality/.config.vsh.yaml | 171 - .../nextflow/filter/remove_modality/main.nf | 2550 ------- .../filter/remove_modality/nextflow.config | 108 - .../remove_modality/nextflow_params.yaml | 9 - .../remove_modality/nextflow_schema.json | 72 - .../filter/subset_h5mu/.config.vsh.yaml | 187 - target/nextflow/filter/subset_h5mu/main.nf | 2575 ------- .../filter/subset_h5mu/nextflow.config | 108 - .../filter/subset_h5mu/nextflow_params.yaml | 10 - .../filter/subset_h5mu/nextflow_schema.json | 79 - .../filter/subset_h5mu/setup_logger.py | 12 - .../integrate/harmonypy/.config.vsh.yaml | 240 - target/nextflow/integrate/harmonypy/main.nf | 2646 ------- .../integrate/harmonypy/nextflow.config | 108 - .../integrate/harmonypy/nextflow_params.yaml | 13 - .../integrate/harmonypy/nextflow_schema.json | 100 - .../integrate/scanorama/.config.vsh.yaml | 283 - target/nextflow/integrate/scanorama/main.nf | 2702 ------- .../integrate/scanorama/nextflow.config | 108 - .../integrate/scanorama/nextflow_params.yaml | 17 - .../integrate/scanorama/nextflow_schema.json | 129 - .../integrate/scarches/.config.vsh.yaml | 331 - target/nextflow/integrate/scarches/main.nf | 2962 -------- .../integrate/scarches/nextflow.config | 108 - .../integrate/scarches/nextflow_params.yaml | 27 - .../integrate/scarches/nextflow_schema.json | 189 - .../integrate/scarches/setup_logger.py | 12 - .../nextflow/integrate/scvi/.config.vsh.yaml | 591 -- target/nextflow/integrate/scvi/main.nf | 3174 -------- .../nextflow/integrate/scvi/nextflow.config | 108 - .../integrate/scvi/nextflow_params.yaml | 51 - .../integrate/scvi/nextflow_schema.json | 351 - target/nextflow/integrate/scvi/subset_vars.py | 16 - .../integrate/totalvi/.config.vsh.yaml | 348 - target/nextflow/integrate/totalvi/main.nf | 2923 -------- .../integrate/totalvi/nextflow.config | 108 - .../integrate/totalvi/nextflow_params.yaml | 28 - .../integrate/totalvi/nextflow_schema.json | 195 - .../integrate/totalvi/setup_logger.py | 12 - .../interpret/lianapy/.config.vsh.yaml | 313 - target/nextflow/interpret/lianapy/main.nf | 2757 ------- .../interpret/lianapy/nextflow.config | 108 - .../interpret/lianapy/nextflow_params.yaml | 18 - .../interpret/lianapy/nextflow_schema.json | 140 - .../labels_transfer/knn/.config.vsh.yaml | 379 - target/nextflow/labels_transfer/knn/helper.py | 32 - target/nextflow/labels_transfer/knn/main.nf | 2921 -------- .../labels_transfer/knn/nextflow.config | 108 - .../labels_transfer/knn/nextflow_params.yaml | 6 - .../labels_transfer/knn/nextflow_schema.json | 51 - .../labels_transfer/knn/setup_logger.py | 12 - .../labels_transfer/xgboost/.config.vsh.yaml | 594 -- .../labels_transfer/xgboost/helper.py | 32 - .../nextflow/labels_transfer/xgboost/main.nf | 3422 --------- .../labels_transfer/xgboost/nextflow.config | 108 - .../xgboost/nextflow_params.yaml | 24 - .../xgboost/nextflow_schema.json | 177 - .../labels_transfer/xgboost/setup_logger.py | 12 - .../mapping/bd_rhapsody/.config.vsh.yaml | 417 -- target/nextflow/mapping/bd_rhapsody/main.nf | 3249 --------- .../mapping/bd_rhapsody/nextflow.config | 108 - .../mapping/bd_rhapsody/nextflow_params.yaml | 36 - .../mapping/bd_rhapsody/nextflow_schema.json | 239 - .../rhapsody_targeted_1.10.1_nodocker.cwl | 5159 ------------- .../rhapsody_wta_1.10.1_nodocker.cwl | 5204 ------------- .../mapping/bd_rhapsody/setup_logger.py | 12 - .../mapping/cellranger_count/.config.vsh.yaml | 266 - .../nextflow/mapping/cellranger_count/main.nf | 2745 ------- .../mapping/cellranger_count/nextflow.config | 108 - .../cellranger_count/nextflow_params.yaml | 17 - .../cellranger_count/nextflow_schema.json | 122 - .../cellranger_count_split/.config.vsh.yaml | 218 - .../mapping/cellranger_count_split/main.nf | 2633 ------- .../cellranger_count_split/nextflow.config | 108 - .../nextflow_params.yaml | 12 - .../nextflow_schema.json | 93 - .../mapping/cellranger_multi/.config.vsh.yaml | 423 -- .../nextflow/mapping/cellranger_multi/main.nf | 3132 -------- .../mapping/cellranger_multi/nextflow.config | 108 - .../cellranger_multi/nextflow_params.yaml | 34 - .../cellranger_multi/nextflow_schema.json | 222 - .../mapping/cellranger_multi/setup_logger.py | 12 - .../mapping/htseq_count/.config.vsh.yaml | 418 -- target/nextflow/mapping/htseq_count/main.nf | 2978 -------- .../mapping/htseq_count/nextflow.config | 108 - .../mapping/htseq_count/nextflow_params.yaml | 27 - .../mapping/htseq_count/nextflow_schema.json | 198 - .../htseq_count_to_h5mu/.config.vsh.yaml | 209 - .../mapping/htseq_count_to_h5mu/main.nf | 2710 ------- .../htseq_count_to_h5mu/nextflow.config | 108 - .../htseq_count_to_h5mu/nextflow_params.yaml | 12 - .../htseq_count_to_h5mu/nextflow_schema.json | 89 - .../mapping/multi_star/.config.vsh.yaml | 3080 -------- target/nextflow/mapping/multi_star/main.nf | 6497 ----------------- .../mapping/multi_star/nextflow.config | 108 - .../mapping/multi_star/nextflow_params.yaml | 16 - .../mapping/multi_star/nextflow_schema.json | 114 - .../multi_star_to_h5mu/.config.vsh.yaml | 179 - .../mapping/multi_star_to_h5mu/main.nf | 2625 ------- .../multi_star_to_h5mu/nextflow.config | 108 - .../multi_star_to_h5mu/nextflow_params.yaml | 8 - .../multi_star_to_h5mu/nextflow_schema.json | 66 - .../mapping/samtools_sort/.config.vsh.yaml | 270 - target/nextflow/mapping/samtools_sort/main.nf | 2740 ------- .../mapping/samtools_sort/nextflow.config | 108 - .../samtools_sort/nextflow_params.yaml | 19 - .../samtools_sort/nextflow_schema.json | 134 - .../mapping/star_align/.config.vsh.yaml | 2535 ------- target/nextflow/mapping/star_align/main.nf | 5287 -------------- .../mapping/star_align/nextflow.config | 108 - .../mapping/star_align/nextflow_params.yaml | 8 - .../mapping/star_align/nextflow_schema.json | 64 - .../mapping/star_align/setup_logger.py | 12 - .../mapping/star_align_v273a/.config.vsh.yaml | 2535 ------- .../nextflow/mapping/star_align_v273a/main.nf | 5287 -------------- .../mapping/star_align_v273a/nextflow.config | 108 - .../star_align_v273a/nextflow_params.yaml | 8 - .../star_align_v273a/nextflow_schema.json | 64 - .../mapping/star_align_v273a/setup_logger.py | 12 - .../star_build_reference/.config.vsh.yaml | 190 - .../mapping/star_build_reference/main.nf | 2686 ------- .../star_build_reference/nextflow.config | 108 - .../star_build_reference/nextflow_params.yaml | 11 - .../star_build_reference/nextflow_schema.json | 82 - .../nextflow/metadata/add_id/.config.vsh.yaml | 197 - target/nextflow/metadata/add_id/main.nf | 2631 ------- .../nextflow/metadata/add_id/nextflow.config | 108 - .../metadata/add_id/nextflow_params.yaml | 11 - .../metadata/add_id/nextflow_schema.json | 86 - .../nextflow/metadata/add_id/setup_logger.py | 12 - .../grep_annotation_column/.config.vsh.yaml | 244 - .../metadata/grep_annotation_column/main.nf | 2700 ------- .../grep_annotation_column/nextflow.config | 108 - .../nextflow_params.yaml | 18 - .../nextflow_schema.json | 126 - .../metadata/join_csv/.config.vsh.yaml | 229 - target/nextflow/metadata/join_csv/main.nf | 2670 ------- .../metadata/join_csv/nextflow.config | 108 - .../metadata/join_csv/nextflow_params.yaml | 17 - .../metadata/join_csv/nextflow_schema.json | 120 - .../metadata/join_csv/setup_logger.py | 12 - .../metadata/join_uns_to_obs/.config.vsh.yaml | 171 - .../nextflow/metadata/join_uns_to_obs/main.nf | 2577 ------- .../metadata/join_uns_to_obs/nextflow.config | 107 - .../join_uns_to_obs/nextflow_params.yaml | 10 - .../join_uns_to_obs/nextflow_schema.json | 79 - .../metadata/join_uns_to_obs/setup_logger.py | 12 - .../move_obsm_to_obs/.config.vsh.yaml | 192 - .../metadata/move_obsm_to_obs/main.nf | 2626 ------- .../metadata/move_obsm_to_obs/nextflow.config | 108 - .../move_obsm_to_obs/nextflow_params.yaml | 12 - .../move_obsm_to_obs/nextflow_schema.json | 90 - .../metadata/move_obsm_to_obs/setup_logger.py | 12 - .../nextflow/neighbors/bbknn/.config.vsh.yaml | 289 - target/nextflow/neighbors/bbknn/main.nf | 2706 ------- .../nextflow/neighbors/bbknn/nextflow.config | 108 - .../neighbors/bbknn/nextflow_params.yaml | 17 - .../neighbors/bbknn/nextflow_schema.json | 128 - .../neighbors/find_neighbors/.config.vsh.yaml | 309 - .../nextflow/neighbors/find_neighbors/main.nf | 2759 ------- .../neighbors/find_neighbors/nextflow.config | 108 - .../find_neighbors/nextflow_params.yaml | 16 - .../find_neighbors/nextflow_schema.json | 124 - .../neighbors/find_neighbors/setup_logger.py | 12 - .../filter_10xh5/.config.vsh.yaml | 195 - .../process_10xh5/filter_10xh5/main.nf | 2642 ------- .../filter_10xh5/nextflow.config | 108 - .../filter_10xh5/nextflow_params.yaml | 11 - .../filter_10xh5/nextflow_schema.json | 85 - .../qc/calculate_qc_metrics/.config.vsh.yaml | 235 - .../nextflow/qc/calculate_qc_metrics/main.nf | 2739 ------- .../qc/calculate_qc_metrics/nextflow.config | 108 - .../calculate_qc_metrics/nextflow_params.yaml | 15 - .../calculate_qc_metrics/nextflow_schema.json | 108 - .../qc/calculate_qc_metrics/setup_logger.py | 12 - target/nextflow/qc/fastqc/.config.vsh.yaml | 156 - target/nextflow/qc/fastqc/main.nf | 2512 ------- target/nextflow/qc/fastqc/nextflow.config | 107 - .../nextflow/qc/fastqc/nextflow_params.yaml | 9 - .../nextflow/qc/fastqc/nextflow_schema.json | 73 - target/nextflow/qc/multiqc/.config.vsh.yaml | 140 - target/nextflow/qc/multiqc/main.nf | 2493 ------- target/nextflow/qc/multiqc/nextflow.config | 107 - .../nextflow/qc/multiqc/nextflow_params.yaml | 7 - .../nextflow/qc/multiqc/nextflow_schema.json | 58 - .../query/cellxgene_census/.config.vsh.yaml | 260 - .../nextflow/query/cellxgene_census/main.nf | 2803 ------- .../query/cellxgene_census/nextflow.config | 108 - .../cellxgene_census/nextflow_params.yaml | 18 - .../cellxgene_census/nextflow_schema.json | 130 - .../query/cellxgene_census/setup_logger.py | 12 - .../build_bdrhap_reference/.config.vsh.yaml | 186 - .../reference/build_bdrhap_reference/main.nf | 2597 ------- .../build_bdrhap_reference/nextflow.config | 108 - .../nextflow_params.yaml | 8 - .../nextflow_schema.json | 64 - .../.config.vsh.yaml | 187 - .../build_cellranger_reference/main.nf | 2602 ------- .../nextflow.config | 108 - .../nextflow_params.yaml | 8 - .../nextflow_schema.json | 64 - .../reference/make_reference/.config.vsh.yaml | 212 - .../nextflow/reference/make_reference/main.nf | 2635 ------- .../reference/make_reference/nextflow.config | 108 - .../make_reference/nextflow_params.yaml | 11 - .../make_reference/nextflow_schema.json | 83 - .../nextflow/report/mermaid/.config.vsh.yaml | 185 - target/nextflow/report/mermaid/main.nf | 2554 ------- .../nextflow/report/mermaid/nextflow.config | 108 - .../report/mermaid/nextflow_params.yaml | 11 - .../report/mermaid/nextflow_schema.json | 87 - .../report/mermaid/puppeteer-config.json | 6 - .../transfer/publish/.config.vsh.yaml | 125 - target/nextflow/transfer/publish/main.nf | 2474 ------- .../nextflow/transfer/publish/nextflow.config | 108 - .../transfer/publish/nextflow_params.yaml | 7 - .../transfer/publish/nextflow_schema.json | 58 - .../nextflow/transform/clr/.config.vsh.yaml | 188 - target/nextflow/transform/clr/main.nf | 2577 ------- target/nextflow/transform/clr/nextflow.config | 108 - .../transform/clr/nextflow_params.yaml | 10 - .../transform/clr/nextflow_schema.json | 79 - .../transform/delete_layer/.config.vsh.yaml | 196 - .../transform/delete_layer/compress_h5mu.py | 49 - .../nextflow/transform/delete_layer/main.nf | 2681 ------- .../transform/delete_layer/nextflow.config | 108 - .../delete_layer/nextflow_params.yaml | 11 - .../delete_layer/nextflow_schema.json | 86 - .../transform/delete_layer/setup_logger.py | 12 - .../nextflow/transform/log1p/.config.vsh.yaml | 225 - target/nextflow/transform/log1p/main.nf | 2655 ------- .../nextflow/transform/log1p/nextflow.config | 108 - .../transform/log1p/nextflow_params.yaml | 12 - .../transform/log1p/nextflow_schema.json | 91 - .../nextflow/transform/log1p/setup_logger.py | 12 - .../normalize_total/.config.vsh.yaml | 242 - .../transform/normalize_total/main.nf | 2669 ------- .../transform/normalize_total/nextflow.config | 108 - .../normalize_total/nextflow_params.yaml | 13 - .../normalize_total/nextflow_schema.json | 99 - .../transform/normalize_total/setup_logger.py | 12 - .../transform/regress_out/.config.vsh.yaml | 195 - target/nextflow/transform/regress_out/main.nf | 2613 ------- .../transform/regress_out/nextflow.config | 108 - .../regress_out/nextflow_params.yaml | 10 - .../regress_out/nextflow_schema.json | 79 - .../transform/regress_out/setup_logger.py | 12 - .../nextflow/transform/scale/.config.vsh.yaml | 205 - target/nextflow/transform/scale/main.nf | 2625 ------- .../nextflow/transform/scale/nextflow.config | 108 - .../transform/scale/nextflow_params.yaml | 11 - .../transform/scale/nextflow_schema.json | 86 - .../nextflow/transform/scale/setup_logger.py | 12 - .../nextflow/velocity/scvelo/.config.vsh.yaml | 276 - target/nextflow/velocity/scvelo/main.nf | 2761 ------- .../nextflow/velocity/scvelo/nextflow.config | 107 - .../velocity/scvelo/nextflow_params.yaml | 24 - .../velocity/scvelo/nextflow_schema.json | 161 - .../nextflow/velocity/scvelo/setup_logger.py | 12 - .../velocity/velocyto/.config.vsh.yaml | 225 - target/nextflow/velocity/velocyto/main.nf | 2650 ------- .../velocity/velocyto/nextflow.config | 108 - .../velocity/velocyto/nextflow_params.yaml | 11 - .../velocity/velocyto/nextflow_schema.json | 86 - 683 files changed, 432338 deletions(-) delete mode 100644 target/docker/annotate/popv/.config.vsh.yaml delete mode 100755 target/docker/annotate/popv/popv delete mode 100644 target/docker/annotate/popv/setup_logger.py delete mode 100644 target/docker/cluster/leiden/.config.vsh.yaml delete mode 100755 target/docker/cluster/leiden/leiden delete mode 100644 target/docker/cluster/leiden/setup_logger.py delete mode 100644 target/docker/compression/compress_h5mu/.config.vsh.yaml delete mode 100755 target/docker/compression/compress_h5mu/compress_h5mu delete mode 100644 target/docker/compression/compress_h5mu/compress_h5mu.py delete mode 100644 target/docker/compression/tar_extract/.config.vsh.yaml delete mode 100755 target/docker/compression/tar_extract/tar_extract delete mode 100644 target/docker/convert/from_10xh5_to_h5mu/.config.vsh.yaml delete mode 100755 target/docker/convert/from_10xh5_to_h5mu/from_10xh5_to_h5mu delete mode 100644 target/docker/convert/from_10xh5_to_h5mu/setup_logger.py delete mode 100644 target/docker/convert/from_10xmtx_to_h5mu/.config.vsh.yaml delete mode 100755 target/docker/convert/from_10xmtx_to_h5mu/from_10xmtx_to_h5mu delete mode 100644 target/docker/convert/from_10xmtx_to_h5mu/setup_logger.py delete mode 100644 target/docker/convert/from_bd_to_10x_molecular_barcode_tags/.config.vsh.yaml delete mode 100755 target/docker/convert/from_bd_to_10x_molecular_barcode_tags/from_bd_to_10x_molecular_barcode_tags delete mode 100644 target/docker/convert/from_bdrhap_to_h5mu/.config.vsh.yaml delete mode 100755 target/docker/convert/from_bdrhap_to_h5mu/from_bdrhap_to_h5mu delete mode 100644 target/docker/convert/from_cellranger_multi_to_h5mu/.config.vsh.yaml delete mode 100755 target/docker/convert/from_cellranger_multi_to_h5mu/from_cellranger_multi_to_h5mu delete mode 100644 target/docker/convert/from_cellranger_multi_to_h5mu/setup_logger.py delete mode 100644 target/docker/convert/from_h5ad_to_h5mu/.config.vsh.yaml delete mode 100755 target/docker/convert/from_h5ad_to_h5mu/from_h5ad_to_h5mu delete mode 100644 target/docker/convert/from_h5ad_to_h5mu/setup_logger.py delete mode 100644 target/docker/convert/from_h5mu_to_h5ad/.config.vsh.yaml delete mode 100755 target/docker/convert/from_h5mu_to_h5ad/from_h5mu_to_h5ad delete mode 100644 target/docker/convert/from_h5mu_to_h5ad/setup_logger.py delete mode 100644 target/docker/convert/velocyto_to_h5mu/.config.vsh.yaml delete mode 100755 target/docker/convert/velocyto_to_h5mu/velocyto_to_h5mu delete mode 100644 target/docker/correction/cellbender_remove_background/.config.vsh.yaml delete mode 100755 target/docker/correction/cellbender_remove_background/cellbender_remove_background delete mode 100644 target/docker/correction/cellbender_remove_background/setup_logger.py delete mode 100644 target/docker/correction/cellbender_remove_background_v0_2/.config.vsh.yaml delete mode 100755 target/docker/correction/cellbender_remove_background_v0_2/cellbender_remove_background_v0_2 delete mode 100644 target/docker/correction/cellbender_remove_background_v0_2/helper.py delete mode 100644 target/docker/correction/cellbender_remove_background_v0_2/setup_logger.py delete mode 100644 target/docker/dataflow/concat/.config.vsh.yaml delete mode 100755 target/docker/dataflow/concat/concat delete mode 100644 target/docker/dataflow/concat/setup_logger.py delete mode 100644 target/docker/dataflow/merge/.config.vsh.yaml delete mode 100755 target/docker/dataflow/merge/merge delete mode 100644 target/docker/dataflow/merge/setup_logger.py delete mode 100644 target/docker/dataflow/split_modalities/.config.vsh.yaml delete mode 100644 target/docker/dataflow/split_modalities/setup_logger.py delete mode 100755 target/docker/dataflow/split_modalities/split_modalities delete mode 100644 target/docker/demux/bcl2fastq/.config.vsh.yaml delete mode 100755 target/docker/demux/bcl2fastq/bcl2fastq delete mode 100644 target/docker/demux/bcl_convert/.config.vsh.yaml delete mode 100755 target/docker/demux/bcl_convert/bcl_convert delete mode 100644 target/docker/demux/cellranger_mkfastq/.config.vsh.yaml delete mode 100755 target/docker/demux/cellranger_mkfastq/cellranger_mkfastq delete mode 100644 target/docker/demux/cellranger_mkfastq/setup_logger.py delete mode 100644 target/docker/dimred/pca/.config.vsh.yaml delete mode 100755 target/docker/dimred/pca/pca delete mode 100644 target/docker/dimred/pca/setup_logger.py delete mode 100644 target/docker/dimred/umap/.config.vsh.yaml delete mode 100644 target/docker/dimred/umap/setup_logger.py delete mode 100755 target/docker/dimred/umap/umap delete mode 100644 target/docker/download/download_file/.config.vsh.yaml delete mode 100755 target/docker/download/download_file/download_file delete mode 100644 target/docker/download/sync_test_resources/.config.vsh.yaml delete mode 100755 target/docker/download/sync_test_resources/sync_test_resources delete mode 100644 target/docker/files/make_params/.config.vsh.yaml delete mode 100755 target/docker/files/make_params/make_params delete mode 100644 target/docker/filter/delimit_fraction/.config.vsh.yaml delete mode 100755 target/docker/filter/delimit_fraction/delimit_fraction delete mode 100644 target/docker/filter/delimit_fraction/setup_logger.py delete mode 100644 target/docker/filter/do_filter/.config.vsh.yaml delete mode 100755 target/docker/filter/do_filter/do_filter delete mode 100644 target/docker/filter/do_filter/setup_logger.py delete mode 100644 target/docker/filter/filter_with_counts/.config.vsh.yaml delete mode 100755 target/docker/filter/filter_with_counts/filter_with_counts delete mode 100644 target/docker/filter/filter_with_counts/setup_logger.py delete mode 100644 target/docker/filter/filter_with_hvg/.config.vsh.yaml delete mode 100755 target/docker/filter/filter_with_hvg/filter_with_hvg delete mode 100644 target/docker/filter/filter_with_hvg/setup_logger.py delete mode 100644 target/docker/filter/filter_with_scrublet/.config.vsh.yaml delete mode 100755 target/docker/filter/filter_with_scrublet/filter_with_scrublet delete mode 100644 target/docker/filter/filter_with_scrublet/setup_logger.py delete mode 100644 target/docker/filter/remove_modality/.config.vsh.yaml delete mode 100755 target/docker/filter/remove_modality/remove_modality delete mode 100644 target/docker/filter/subset_h5mu/.config.vsh.yaml delete mode 100644 target/docker/filter/subset_h5mu/setup_logger.py delete mode 100755 target/docker/filter/subset_h5mu/subset_h5mu delete mode 100644 target/docker/integrate/harmonypy/.config.vsh.yaml delete mode 100755 target/docker/integrate/harmonypy/harmonypy delete mode 100644 target/docker/integrate/scanorama/.config.vsh.yaml delete mode 100755 target/docker/integrate/scanorama/scanorama delete mode 100644 target/docker/integrate/scarches/.config.vsh.yaml delete mode 100755 target/docker/integrate/scarches/scarches delete mode 100644 target/docker/integrate/scarches/setup_logger.py delete mode 100644 target/docker/integrate/scvi/.config.vsh.yaml delete mode 100755 target/docker/integrate/scvi/scvi delete mode 100644 target/docker/integrate/scvi/subset_vars.py delete mode 100644 target/docker/integrate/totalvi/.config.vsh.yaml delete mode 100644 target/docker/integrate/totalvi/setup_logger.py delete mode 100755 target/docker/integrate/totalvi/totalvi delete mode 100644 target/docker/interactive/run_cellxgene/.config.vsh.yaml delete mode 100755 target/docker/interactive/run_cellxgene/run_cellxgene delete mode 100644 target/docker/interactive/run_cirrocumulus/.config.vsh.yaml delete mode 100755 target/docker/interactive/run_cirrocumulus/run_cirrocumulus delete mode 100644 target/docker/interpret/lianapy/.config.vsh.yaml delete mode 100755 target/docker/interpret/lianapy/lianapy delete mode 100644 target/docker/labels_transfer/knn/.config.vsh.yaml delete mode 100644 target/docker/labels_transfer/knn/helper.py delete mode 100755 target/docker/labels_transfer/knn/knn delete mode 100644 target/docker/labels_transfer/knn/setup_logger.py delete mode 100644 target/docker/labels_transfer/xgboost/.config.vsh.yaml delete mode 100644 target/docker/labels_transfer/xgboost/helper.py delete mode 100644 target/docker/labels_transfer/xgboost/setup_logger.py delete mode 100755 target/docker/labels_transfer/xgboost/xgboost delete mode 100644 target/docker/mapping/bd_rhapsody/.config.vsh.yaml delete mode 100755 target/docker/mapping/bd_rhapsody/bd_rhapsody delete mode 100755 target/docker/mapping/bd_rhapsody/rhapsody_targeted_1.10.1_nodocker.cwl delete mode 100755 target/docker/mapping/bd_rhapsody/rhapsody_wta_1.10.1_nodocker.cwl delete mode 100644 target/docker/mapping/bd_rhapsody/setup_logger.py delete mode 100644 target/docker/mapping/cellranger_count/.config.vsh.yaml delete mode 100755 target/docker/mapping/cellranger_count/cellranger_count delete mode 100644 target/docker/mapping/cellranger_count_split/.config.vsh.yaml delete mode 100755 target/docker/mapping/cellranger_count_split/cellranger_count_split delete mode 100644 target/docker/mapping/cellranger_multi/.config.vsh.yaml delete mode 100755 target/docker/mapping/cellranger_multi/cellranger_multi delete mode 100644 target/docker/mapping/cellranger_multi/setup_logger.py delete mode 100644 target/docker/mapping/htseq_count/.config.vsh.yaml delete mode 100755 target/docker/mapping/htseq_count/htseq_count delete mode 100644 target/docker/mapping/htseq_count_to_h5mu/.config.vsh.yaml delete mode 100755 target/docker/mapping/htseq_count_to_h5mu/htseq_count_to_h5mu delete mode 100644 target/docker/mapping/multi_star/.config.vsh.yaml delete mode 100755 target/docker/mapping/multi_star/multi_star delete mode 100644 target/docker/mapping/multi_star_to_h5mu/.config.vsh.yaml delete mode 100755 target/docker/mapping/multi_star_to_h5mu/multi_star_to_h5mu delete mode 100644 target/docker/mapping/samtools_sort/.config.vsh.yaml delete mode 100755 target/docker/mapping/samtools_sort/samtools_sort delete mode 100644 target/docker/mapping/star_align/.config.vsh.yaml delete mode 100644 target/docker/mapping/star_align/setup_logger.py delete mode 100755 target/docker/mapping/star_align/star_align delete mode 100644 target/docker/mapping/star_align_v273a/.config.vsh.yaml delete mode 100644 target/docker/mapping/star_align_v273a/setup_logger.py delete mode 100755 target/docker/mapping/star_align_v273a/star_align_v273a delete mode 100644 target/docker/mapping/star_build_reference/.config.vsh.yaml delete mode 100755 target/docker/mapping/star_build_reference/star_build_reference delete mode 100644 target/docker/metadata/add_id/.config.vsh.yaml delete mode 100755 target/docker/metadata/add_id/add_id delete mode 100644 target/docker/metadata/add_id/setup_logger.py delete mode 100644 target/docker/metadata/grep_annotation_column/.config.vsh.yaml delete mode 100755 target/docker/metadata/grep_annotation_column/grep_annotation_column delete mode 100644 target/docker/metadata/join_csv/.config.vsh.yaml delete mode 100755 target/docker/metadata/join_csv/join_csv delete mode 100644 target/docker/metadata/join_csv/setup_logger.py delete mode 100644 target/docker/metadata/join_uns_to_obs/.config.vsh.yaml delete mode 100755 target/docker/metadata/join_uns_to_obs/join_uns_to_obs delete mode 100644 target/docker/metadata/join_uns_to_obs/setup_logger.py delete mode 100644 target/docker/metadata/move_obsm_to_obs/.config.vsh.yaml delete mode 100755 target/docker/metadata/move_obsm_to_obs/move_obsm_to_obs delete mode 100644 target/docker/metadata/move_obsm_to_obs/setup_logger.py delete mode 100644 target/docker/neighbors/bbknn/.config.vsh.yaml delete mode 100755 target/docker/neighbors/bbknn/bbknn delete mode 100644 target/docker/neighbors/find_neighbors/.config.vsh.yaml delete mode 100755 target/docker/neighbors/find_neighbors/find_neighbors delete mode 100644 target/docker/neighbors/find_neighbors/setup_logger.py delete mode 100644 target/docker/process_10xh5/filter_10xh5/.config.vsh.yaml delete mode 100755 target/docker/process_10xh5/filter_10xh5/filter_10xh5 delete mode 100644 target/docker/qc/calculate_qc_metrics/.config.vsh.yaml delete mode 100755 target/docker/qc/calculate_qc_metrics/calculate_qc_metrics delete mode 100644 target/docker/qc/calculate_qc_metrics/setup_logger.py delete mode 100644 target/docker/qc/fastqc/.config.vsh.yaml delete mode 100755 target/docker/qc/fastqc/fastqc delete mode 100644 target/docker/qc/multiqc/.config.vsh.yaml delete mode 100755 target/docker/qc/multiqc/multiqc delete mode 100644 target/docker/query/cellxgene_census/.config.vsh.yaml delete mode 100755 target/docker/query/cellxgene_census/cellxgene_census delete mode 100644 target/docker/query/cellxgene_census/setup_logger.py delete mode 100644 target/docker/reference/build_bdrhap_reference/.config.vsh.yaml delete mode 100755 target/docker/reference/build_bdrhap_reference/build_bdrhap_reference delete mode 100644 target/docker/reference/build_cellranger_reference/.config.vsh.yaml delete mode 100755 target/docker/reference/build_cellranger_reference/build_cellranger_reference delete mode 100644 target/docker/reference/make_reference/.config.vsh.yaml delete mode 100755 target/docker/reference/make_reference/make_reference delete mode 100644 target/docker/report/mermaid/.config.vsh.yaml delete mode 100755 target/docker/report/mermaid/mermaid delete mode 100644 target/docker/report/mermaid/puppeteer-config.json delete mode 100644 target/docker/transfer/publish/.config.vsh.yaml delete mode 100755 target/docker/transfer/publish/publish delete mode 100644 target/docker/transform/clr/.config.vsh.yaml delete mode 100755 target/docker/transform/clr/clr delete mode 100644 target/docker/transform/delete_layer/.config.vsh.yaml delete mode 100644 target/docker/transform/delete_layer/compress_h5mu.py delete mode 100755 target/docker/transform/delete_layer/delete_layer delete mode 100644 target/docker/transform/delete_layer/setup_logger.py delete mode 100644 target/docker/transform/log1p/.config.vsh.yaml delete mode 100755 target/docker/transform/log1p/log1p delete mode 100644 target/docker/transform/log1p/setup_logger.py delete mode 100644 target/docker/transform/normalize_total/.config.vsh.yaml delete mode 100755 target/docker/transform/normalize_total/normalize_total delete mode 100644 target/docker/transform/normalize_total/setup_logger.py delete mode 100644 target/docker/transform/regress_out/.config.vsh.yaml delete mode 100755 target/docker/transform/regress_out/regress_out delete mode 100644 target/docker/transform/regress_out/setup_logger.py delete mode 100644 target/docker/transform/scale/.config.vsh.yaml delete mode 100755 target/docker/transform/scale/scale delete mode 100644 target/docker/transform/scale/setup_logger.py delete mode 100644 target/docker/velocity/scvelo/.config.vsh.yaml delete mode 100755 target/docker/velocity/scvelo/scvelo delete mode 100644 target/docker/velocity/scvelo/setup_logger.py delete mode 100644 target/docker/velocity/velocyto/.config.vsh.yaml delete mode 100755 target/docker/velocity/velocyto/velocyto delete mode 100644 target/native/compression/compress_h5mu/.config.vsh.yaml delete mode 100755 target/native/compression/compress_h5mu/compress_h5mu delete mode 100644 target/native/compression/compress_h5mu/compress_h5mu.py delete mode 100644 target/native/compression/tar_extract/.config.vsh.yaml delete mode 100755 target/native/compression/tar_extract/tar_extract delete mode 100644 target/native/dataflow/concat/.config.vsh.yaml delete mode 100755 target/native/dataflow/concat/concat delete mode 100644 target/native/dataflow/concat/setup_logger.py delete mode 100644 target/native/dataflow/merge/.config.vsh.yaml delete mode 100755 target/native/dataflow/merge/merge delete mode 100644 target/native/dataflow/merge/setup_logger.py delete mode 100644 target/native/dataflow/split_modalities/.config.vsh.yaml delete mode 100644 target/native/dataflow/split_modalities/setup_logger.py delete mode 100755 target/native/dataflow/split_modalities/split_modalities delete mode 100644 target/native/download/sync_test_resources/.config.vsh.yaml delete mode 100755 target/native/download/sync_test_resources/sync_test_resources delete mode 100644 target/native/integrate/scarches/.config.vsh.yaml delete mode 100755 target/native/integrate/scarches/scarches delete mode 100644 target/native/integrate/scarches/setup_logger.py delete mode 100644 target/native/integrate/totalvi/.config.vsh.yaml delete mode 100644 target/native/integrate/totalvi/setup_logger.py delete mode 100755 target/native/integrate/totalvi/totalvi delete mode 100644 target/native/labels_transfer/knn/.config.vsh.yaml delete mode 100644 target/native/labels_transfer/knn/helper.py delete mode 100755 target/native/labels_transfer/knn/knn delete mode 100644 target/native/labels_transfer/knn/setup_logger.py delete mode 100644 target/native/labels_transfer/xgboost/.config.vsh.yaml delete mode 100644 target/native/labels_transfer/xgboost/helper.py delete mode 100644 target/native/labels_transfer/xgboost/setup_logger.py delete mode 100755 target/native/labels_transfer/xgboost/xgboost delete mode 100644 target/native/metadata/add_id/.config.vsh.yaml delete mode 100755 target/native/metadata/add_id/add_id delete mode 100644 target/native/metadata/add_id/setup_logger.py delete mode 100644 target/native/metadata/grep_annotation_column/.config.vsh.yaml delete mode 100755 target/native/metadata/grep_annotation_column/grep_annotation_column delete mode 100644 target/native/transform/scale/.config.vsh.yaml delete mode 100755 target/native/transform/scale/scale delete mode 100644 target/native/transform/scale/setup_logger.py delete mode 100644 target/native/velocity/scvelo/.config.vsh.yaml delete mode 100755 target/native/velocity/scvelo/scvelo delete mode 100644 target/native/velocity/scvelo/setup_logger.py delete mode 100644 target/native/velocity/velocyto/.config.vsh.yaml delete mode 100755 target/native/velocity/velocyto/velocyto delete mode 100644 target/nextflow/annotate/popv/.config.vsh.yaml delete mode 100644 target/nextflow/annotate/popv/main.nf delete mode 100644 target/nextflow/annotate/popv/nextflow.config delete mode 100644 target/nextflow/annotate/popv/nextflow_params.yaml delete mode 100644 target/nextflow/annotate/popv/nextflow_schema.json delete mode 100644 target/nextflow/annotate/popv/setup_logger.py delete mode 100644 target/nextflow/cluster/leiden/.config.vsh.yaml delete mode 100644 target/nextflow/cluster/leiden/main.nf delete mode 100644 target/nextflow/cluster/leiden/nextflow.config delete mode 100644 target/nextflow/cluster/leiden/nextflow_params.yaml delete mode 100644 target/nextflow/cluster/leiden/nextflow_schema.json delete mode 100644 target/nextflow/cluster/leiden/setup_logger.py delete mode 100644 target/nextflow/compression/compress_h5mu/.config.vsh.yaml delete mode 100644 target/nextflow/compression/compress_h5mu/compress_h5mu.py delete mode 100644 target/nextflow/compression/compress_h5mu/main.nf delete mode 100644 target/nextflow/compression/compress_h5mu/nextflow.config delete mode 100644 target/nextflow/compression/compress_h5mu/nextflow_params.yaml delete mode 100644 target/nextflow/compression/compress_h5mu/nextflow_schema.json delete mode 100644 target/nextflow/convert/from_10xh5_to_h5mu/.config.vsh.yaml delete mode 100644 target/nextflow/convert/from_10xh5_to_h5mu/main.nf delete mode 100644 target/nextflow/convert/from_10xh5_to_h5mu/nextflow.config delete mode 100644 target/nextflow/convert/from_10xh5_to_h5mu/nextflow_params.yaml delete mode 100644 target/nextflow/convert/from_10xh5_to_h5mu/nextflow_schema.json delete mode 100644 target/nextflow/convert/from_10xh5_to_h5mu/setup_logger.py delete mode 100644 target/nextflow/convert/from_10xmtx_to_h5mu/.config.vsh.yaml delete mode 100644 target/nextflow/convert/from_10xmtx_to_h5mu/main.nf delete mode 100644 target/nextflow/convert/from_10xmtx_to_h5mu/nextflow.config delete mode 100644 target/nextflow/convert/from_10xmtx_to_h5mu/nextflow_params.yaml delete mode 100644 target/nextflow/convert/from_10xmtx_to_h5mu/nextflow_schema.json delete mode 100644 target/nextflow/convert/from_10xmtx_to_h5mu/setup_logger.py delete mode 100644 target/nextflow/convert/from_bd_to_10x_molecular_barcode_tags/.config.vsh.yaml delete mode 100644 target/nextflow/convert/from_bd_to_10x_molecular_barcode_tags/main.nf delete mode 100644 target/nextflow/convert/from_bd_to_10x_molecular_barcode_tags/nextflow.config delete mode 100644 target/nextflow/convert/from_bd_to_10x_molecular_barcode_tags/nextflow_params.yaml delete mode 100644 target/nextflow/convert/from_bd_to_10x_molecular_barcode_tags/nextflow_schema.json delete mode 100644 target/nextflow/convert/from_bdrhap_to_h5mu/.config.vsh.yaml delete mode 100644 target/nextflow/convert/from_bdrhap_to_h5mu/main.nf delete mode 100644 target/nextflow/convert/from_bdrhap_to_h5mu/nextflow.config delete mode 100644 target/nextflow/convert/from_bdrhap_to_h5mu/nextflow_params.yaml delete mode 100644 target/nextflow/convert/from_bdrhap_to_h5mu/nextflow_schema.json delete mode 100644 target/nextflow/convert/from_cellranger_multi_to_h5mu/.config.vsh.yaml delete mode 100644 target/nextflow/convert/from_cellranger_multi_to_h5mu/main.nf delete mode 100644 target/nextflow/convert/from_cellranger_multi_to_h5mu/nextflow.config delete mode 100644 target/nextflow/convert/from_cellranger_multi_to_h5mu/nextflow_params.yaml delete mode 100644 target/nextflow/convert/from_cellranger_multi_to_h5mu/nextflow_schema.json delete mode 100644 target/nextflow/convert/from_cellranger_multi_to_h5mu/setup_logger.py delete mode 100644 target/nextflow/convert/from_h5ad_to_h5mu/.config.vsh.yaml delete mode 100644 target/nextflow/convert/from_h5ad_to_h5mu/main.nf delete mode 100644 target/nextflow/convert/from_h5ad_to_h5mu/nextflow.config delete mode 100644 target/nextflow/convert/from_h5ad_to_h5mu/nextflow_params.yaml delete mode 100644 target/nextflow/convert/from_h5ad_to_h5mu/nextflow_schema.json delete mode 100644 target/nextflow/convert/from_h5ad_to_h5mu/setup_logger.py delete mode 100644 target/nextflow/convert/from_h5mu_to_h5ad/.config.vsh.yaml delete mode 100644 target/nextflow/convert/from_h5mu_to_h5ad/main.nf delete mode 100644 target/nextflow/convert/from_h5mu_to_h5ad/nextflow.config delete mode 100644 target/nextflow/convert/from_h5mu_to_h5ad/nextflow_params.yaml delete mode 100644 target/nextflow/convert/from_h5mu_to_h5ad/nextflow_schema.json delete mode 100644 target/nextflow/convert/from_h5mu_to_h5ad/setup_logger.py delete mode 100644 target/nextflow/convert/velocyto_to_h5mu/.config.vsh.yaml delete mode 100644 target/nextflow/convert/velocyto_to_h5mu/main.nf delete mode 100644 target/nextflow/convert/velocyto_to_h5mu/nextflow.config delete mode 100644 target/nextflow/convert/velocyto_to_h5mu/nextflow_params.yaml delete mode 100644 target/nextflow/convert/velocyto_to_h5mu/nextflow_schema.json delete mode 100644 target/nextflow/correction/cellbender_remove_background/.config.vsh.yaml delete mode 100644 target/nextflow/correction/cellbender_remove_background/main.nf delete mode 100644 target/nextflow/correction/cellbender_remove_background/nextflow.config delete mode 100644 target/nextflow/correction/cellbender_remove_background/nextflow_params.yaml delete mode 100644 target/nextflow/correction/cellbender_remove_background/nextflow_schema.json delete mode 100644 target/nextflow/correction/cellbender_remove_background/setup_logger.py delete mode 100644 target/nextflow/correction/cellbender_remove_background_v0_2/.config.vsh.yaml delete mode 100644 target/nextflow/correction/cellbender_remove_background_v0_2/helper.py delete mode 100644 target/nextflow/correction/cellbender_remove_background_v0_2/main.nf delete mode 100644 target/nextflow/correction/cellbender_remove_background_v0_2/nextflow.config delete mode 100644 target/nextflow/correction/cellbender_remove_background_v0_2/nextflow_params.yaml delete mode 100644 target/nextflow/correction/cellbender_remove_background_v0_2/nextflow_schema.json delete mode 100644 target/nextflow/correction/cellbender_remove_background_v0_2/setup_logger.py delete mode 100644 target/nextflow/dataflow/concat/.config.vsh.yaml delete mode 100644 target/nextflow/dataflow/concat/main.nf delete mode 100644 target/nextflow/dataflow/concat/nextflow.config delete mode 100644 target/nextflow/dataflow/concat/nextflow_params.yaml delete mode 100644 target/nextflow/dataflow/concat/nextflow_schema.json delete mode 100644 target/nextflow/dataflow/concat/setup_logger.py delete mode 100644 target/nextflow/dataflow/merge/.config.vsh.yaml delete mode 100644 target/nextflow/dataflow/merge/main.nf delete mode 100644 target/nextflow/dataflow/merge/nextflow.config delete mode 100644 target/nextflow/dataflow/merge/nextflow_params.yaml delete mode 100644 target/nextflow/dataflow/merge/nextflow_schema.json delete mode 100644 target/nextflow/dataflow/merge/setup_logger.py delete mode 100644 target/nextflow/dataflow/split_modalities/.config.vsh.yaml delete mode 100644 target/nextflow/dataflow/split_modalities/main.nf delete mode 100644 target/nextflow/dataflow/split_modalities/nextflow.config delete mode 100644 target/nextflow/dataflow/split_modalities/nextflow_params.yaml delete mode 100644 target/nextflow/dataflow/split_modalities/nextflow_schema.json delete mode 100644 target/nextflow/dataflow/split_modalities/setup_logger.py delete mode 100644 target/nextflow/demux/bcl2fastq/.config.vsh.yaml delete mode 100644 target/nextflow/demux/bcl2fastq/main.nf delete mode 100644 target/nextflow/demux/bcl2fastq/nextflow.config delete mode 100644 target/nextflow/demux/bcl2fastq/nextflow_params.yaml delete mode 100644 target/nextflow/demux/bcl2fastq/nextflow_schema.json delete mode 100644 target/nextflow/demux/bcl_convert/.config.vsh.yaml delete mode 100644 target/nextflow/demux/bcl_convert/main.nf delete mode 100644 target/nextflow/demux/bcl_convert/nextflow.config delete mode 100644 target/nextflow/demux/bcl_convert/nextflow_params.yaml delete mode 100644 target/nextflow/demux/bcl_convert/nextflow_schema.json delete mode 100644 target/nextflow/demux/cellranger_mkfastq/.config.vsh.yaml delete mode 100644 target/nextflow/demux/cellranger_mkfastq/main.nf delete mode 100644 target/nextflow/demux/cellranger_mkfastq/nextflow.config delete mode 100644 target/nextflow/demux/cellranger_mkfastq/nextflow_params.yaml delete mode 100644 target/nextflow/demux/cellranger_mkfastq/nextflow_schema.json delete mode 100644 target/nextflow/demux/cellranger_mkfastq/setup_logger.py delete mode 100644 target/nextflow/dimred/pca/.config.vsh.yaml delete mode 100644 target/nextflow/dimred/pca/main.nf delete mode 100644 target/nextflow/dimred/pca/nextflow.config delete mode 100644 target/nextflow/dimred/pca/nextflow_params.yaml delete mode 100644 target/nextflow/dimred/pca/nextflow_schema.json delete mode 100644 target/nextflow/dimred/pca/setup_logger.py delete mode 100644 target/nextflow/dimred/umap/.config.vsh.yaml delete mode 100644 target/nextflow/dimred/umap/main.nf delete mode 100644 target/nextflow/dimred/umap/nextflow.config delete mode 100644 target/nextflow/dimred/umap/nextflow_params.yaml delete mode 100644 target/nextflow/dimred/umap/nextflow_schema.json delete mode 100644 target/nextflow/dimred/umap/setup_logger.py delete mode 100644 target/nextflow/download/download_file/.config.vsh.yaml delete mode 100644 target/nextflow/download/download_file/main.nf delete mode 100644 target/nextflow/download/download_file/nextflow.config delete mode 100644 target/nextflow/download/download_file/nextflow_params.yaml delete mode 100644 target/nextflow/download/download_file/nextflow_schema.json delete mode 100644 target/nextflow/download/sync_test_resources/.config.vsh.yaml delete mode 100644 target/nextflow/download/sync_test_resources/main.nf delete mode 100644 target/nextflow/download/sync_test_resources/nextflow.config delete mode 100644 target/nextflow/download/sync_test_resources/nextflow_params.yaml delete mode 100644 target/nextflow/download/sync_test_resources/nextflow_schema.json delete mode 100644 target/nextflow/files/make_params/.config.vsh.yaml delete mode 100644 target/nextflow/files/make_params/main.nf delete mode 100644 target/nextflow/files/make_params/nextflow.config delete mode 100644 target/nextflow/files/make_params/nextflow_params.yaml delete mode 100644 target/nextflow/files/make_params/nextflow_schema.json delete mode 100644 target/nextflow/filter/delimit_fraction/.config.vsh.yaml delete mode 100644 target/nextflow/filter/delimit_fraction/main.nf delete mode 100644 target/nextflow/filter/delimit_fraction/nextflow.config delete mode 100644 target/nextflow/filter/delimit_fraction/nextflow_params.yaml delete mode 100644 target/nextflow/filter/delimit_fraction/nextflow_schema.json delete mode 100644 target/nextflow/filter/delimit_fraction/setup_logger.py delete mode 100644 target/nextflow/filter/do_filter/.config.vsh.yaml delete mode 100644 target/nextflow/filter/do_filter/main.nf delete mode 100644 target/nextflow/filter/do_filter/nextflow.config delete mode 100644 target/nextflow/filter/do_filter/nextflow_params.yaml delete mode 100644 target/nextflow/filter/do_filter/nextflow_schema.json delete mode 100644 target/nextflow/filter/do_filter/setup_logger.py delete mode 100644 target/nextflow/filter/filter_with_counts/.config.vsh.yaml delete mode 100644 target/nextflow/filter/filter_with_counts/main.nf delete mode 100644 target/nextflow/filter/filter_with_counts/nextflow.config delete mode 100644 target/nextflow/filter/filter_with_counts/nextflow_params.yaml delete mode 100644 target/nextflow/filter/filter_with_counts/nextflow_schema.json delete mode 100644 target/nextflow/filter/filter_with_counts/setup_logger.py delete mode 100644 target/nextflow/filter/filter_with_hvg/.config.vsh.yaml delete mode 100644 target/nextflow/filter/filter_with_hvg/main.nf delete mode 100644 target/nextflow/filter/filter_with_hvg/nextflow.config delete mode 100644 target/nextflow/filter/filter_with_hvg/nextflow_params.yaml delete mode 100644 target/nextflow/filter/filter_with_hvg/nextflow_schema.json delete mode 100644 target/nextflow/filter/filter_with_hvg/setup_logger.py delete mode 100644 target/nextflow/filter/filter_with_scrublet/.config.vsh.yaml delete mode 100644 target/nextflow/filter/filter_with_scrublet/main.nf delete mode 100644 target/nextflow/filter/filter_with_scrublet/nextflow.config delete mode 100644 target/nextflow/filter/filter_with_scrublet/nextflow_params.yaml delete mode 100644 target/nextflow/filter/filter_with_scrublet/nextflow_schema.json delete mode 100644 target/nextflow/filter/filter_with_scrublet/setup_logger.py delete mode 100644 target/nextflow/filter/remove_modality/.config.vsh.yaml delete mode 100644 target/nextflow/filter/remove_modality/main.nf delete mode 100644 target/nextflow/filter/remove_modality/nextflow.config delete mode 100644 target/nextflow/filter/remove_modality/nextflow_params.yaml delete mode 100644 target/nextflow/filter/remove_modality/nextflow_schema.json delete mode 100644 target/nextflow/filter/subset_h5mu/.config.vsh.yaml delete mode 100644 target/nextflow/filter/subset_h5mu/main.nf delete mode 100644 target/nextflow/filter/subset_h5mu/nextflow.config delete mode 100644 target/nextflow/filter/subset_h5mu/nextflow_params.yaml delete mode 100644 target/nextflow/filter/subset_h5mu/nextflow_schema.json delete mode 100644 target/nextflow/filter/subset_h5mu/setup_logger.py delete mode 100644 target/nextflow/integrate/harmonypy/.config.vsh.yaml delete mode 100644 target/nextflow/integrate/harmonypy/main.nf delete mode 100644 target/nextflow/integrate/harmonypy/nextflow.config delete mode 100644 target/nextflow/integrate/harmonypy/nextflow_params.yaml delete mode 100644 target/nextflow/integrate/harmonypy/nextflow_schema.json delete mode 100644 target/nextflow/integrate/scanorama/.config.vsh.yaml delete mode 100644 target/nextflow/integrate/scanorama/main.nf delete mode 100644 target/nextflow/integrate/scanorama/nextflow.config delete mode 100644 target/nextflow/integrate/scanorama/nextflow_params.yaml delete mode 100644 target/nextflow/integrate/scanorama/nextflow_schema.json delete mode 100644 target/nextflow/integrate/scarches/.config.vsh.yaml delete mode 100644 target/nextflow/integrate/scarches/main.nf delete mode 100644 target/nextflow/integrate/scarches/nextflow.config delete mode 100644 target/nextflow/integrate/scarches/nextflow_params.yaml delete mode 100644 target/nextflow/integrate/scarches/nextflow_schema.json delete mode 100644 target/nextflow/integrate/scarches/setup_logger.py delete mode 100644 target/nextflow/integrate/scvi/.config.vsh.yaml delete mode 100644 target/nextflow/integrate/scvi/main.nf delete mode 100644 target/nextflow/integrate/scvi/nextflow.config delete mode 100644 target/nextflow/integrate/scvi/nextflow_params.yaml delete mode 100644 target/nextflow/integrate/scvi/nextflow_schema.json delete mode 100644 target/nextflow/integrate/scvi/subset_vars.py delete mode 100644 target/nextflow/integrate/totalvi/.config.vsh.yaml delete mode 100644 target/nextflow/integrate/totalvi/main.nf delete mode 100644 target/nextflow/integrate/totalvi/nextflow.config delete mode 100644 target/nextflow/integrate/totalvi/nextflow_params.yaml delete mode 100644 target/nextflow/integrate/totalvi/nextflow_schema.json delete mode 100644 target/nextflow/integrate/totalvi/setup_logger.py delete mode 100644 target/nextflow/interpret/lianapy/.config.vsh.yaml delete mode 100644 target/nextflow/interpret/lianapy/main.nf delete mode 100644 target/nextflow/interpret/lianapy/nextflow.config delete mode 100644 target/nextflow/interpret/lianapy/nextflow_params.yaml delete mode 100644 target/nextflow/interpret/lianapy/nextflow_schema.json delete mode 100644 target/nextflow/labels_transfer/knn/.config.vsh.yaml delete mode 100644 target/nextflow/labels_transfer/knn/helper.py delete mode 100644 target/nextflow/labels_transfer/knn/main.nf delete mode 100644 target/nextflow/labels_transfer/knn/nextflow.config delete mode 100644 target/nextflow/labels_transfer/knn/nextflow_params.yaml delete mode 100644 target/nextflow/labels_transfer/knn/nextflow_schema.json delete mode 100644 target/nextflow/labels_transfer/knn/setup_logger.py delete mode 100644 target/nextflow/labels_transfer/xgboost/.config.vsh.yaml delete mode 100644 target/nextflow/labels_transfer/xgboost/helper.py delete mode 100644 target/nextflow/labels_transfer/xgboost/main.nf delete mode 100644 target/nextflow/labels_transfer/xgboost/nextflow.config delete mode 100644 target/nextflow/labels_transfer/xgboost/nextflow_params.yaml delete mode 100644 target/nextflow/labels_transfer/xgboost/nextflow_schema.json delete mode 100644 target/nextflow/labels_transfer/xgboost/setup_logger.py delete mode 100644 target/nextflow/mapping/bd_rhapsody/.config.vsh.yaml delete mode 100644 target/nextflow/mapping/bd_rhapsody/main.nf delete mode 100644 target/nextflow/mapping/bd_rhapsody/nextflow.config delete mode 100644 target/nextflow/mapping/bd_rhapsody/nextflow_params.yaml delete mode 100644 target/nextflow/mapping/bd_rhapsody/nextflow_schema.json delete mode 100755 target/nextflow/mapping/bd_rhapsody/rhapsody_targeted_1.10.1_nodocker.cwl delete mode 100755 target/nextflow/mapping/bd_rhapsody/rhapsody_wta_1.10.1_nodocker.cwl delete mode 100644 target/nextflow/mapping/bd_rhapsody/setup_logger.py delete mode 100644 target/nextflow/mapping/cellranger_count/.config.vsh.yaml delete mode 100644 target/nextflow/mapping/cellranger_count/main.nf delete mode 100644 target/nextflow/mapping/cellranger_count/nextflow.config delete mode 100644 target/nextflow/mapping/cellranger_count/nextflow_params.yaml delete mode 100644 target/nextflow/mapping/cellranger_count/nextflow_schema.json delete mode 100644 target/nextflow/mapping/cellranger_count_split/.config.vsh.yaml delete mode 100644 target/nextflow/mapping/cellranger_count_split/main.nf delete mode 100644 target/nextflow/mapping/cellranger_count_split/nextflow.config delete mode 100644 target/nextflow/mapping/cellranger_count_split/nextflow_params.yaml delete mode 100644 target/nextflow/mapping/cellranger_count_split/nextflow_schema.json delete mode 100644 target/nextflow/mapping/cellranger_multi/.config.vsh.yaml delete mode 100644 target/nextflow/mapping/cellranger_multi/main.nf delete mode 100644 target/nextflow/mapping/cellranger_multi/nextflow.config delete mode 100644 target/nextflow/mapping/cellranger_multi/nextflow_params.yaml delete mode 100644 target/nextflow/mapping/cellranger_multi/nextflow_schema.json delete mode 100644 target/nextflow/mapping/cellranger_multi/setup_logger.py delete mode 100644 target/nextflow/mapping/htseq_count/.config.vsh.yaml delete mode 100644 target/nextflow/mapping/htseq_count/main.nf delete mode 100644 target/nextflow/mapping/htseq_count/nextflow.config delete mode 100644 target/nextflow/mapping/htseq_count/nextflow_params.yaml delete mode 100644 target/nextflow/mapping/htseq_count/nextflow_schema.json delete mode 100644 target/nextflow/mapping/htseq_count_to_h5mu/.config.vsh.yaml delete mode 100644 target/nextflow/mapping/htseq_count_to_h5mu/main.nf delete mode 100644 target/nextflow/mapping/htseq_count_to_h5mu/nextflow.config delete mode 100644 target/nextflow/mapping/htseq_count_to_h5mu/nextflow_params.yaml delete mode 100644 target/nextflow/mapping/htseq_count_to_h5mu/nextflow_schema.json delete mode 100644 target/nextflow/mapping/multi_star/.config.vsh.yaml delete mode 100644 target/nextflow/mapping/multi_star/main.nf delete mode 100644 target/nextflow/mapping/multi_star/nextflow.config delete mode 100644 target/nextflow/mapping/multi_star/nextflow_params.yaml delete mode 100644 target/nextflow/mapping/multi_star/nextflow_schema.json delete mode 100644 target/nextflow/mapping/multi_star_to_h5mu/.config.vsh.yaml delete mode 100644 target/nextflow/mapping/multi_star_to_h5mu/main.nf delete mode 100644 target/nextflow/mapping/multi_star_to_h5mu/nextflow.config delete mode 100644 target/nextflow/mapping/multi_star_to_h5mu/nextflow_params.yaml delete mode 100644 target/nextflow/mapping/multi_star_to_h5mu/nextflow_schema.json delete mode 100644 target/nextflow/mapping/samtools_sort/.config.vsh.yaml delete mode 100644 target/nextflow/mapping/samtools_sort/main.nf delete mode 100644 target/nextflow/mapping/samtools_sort/nextflow.config delete mode 100644 target/nextflow/mapping/samtools_sort/nextflow_params.yaml delete mode 100644 target/nextflow/mapping/samtools_sort/nextflow_schema.json delete mode 100644 target/nextflow/mapping/star_align/.config.vsh.yaml delete mode 100644 target/nextflow/mapping/star_align/main.nf delete mode 100644 target/nextflow/mapping/star_align/nextflow.config delete mode 100644 target/nextflow/mapping/star_align/nextflow_params.yaml delete mode 100644 target/nextflow/mapping/star_align/nextflow_schema.json delete mode 100644 target/nextflow/mapping/star_align/setup_logger.py delete mode 100644 target/nextflow/mapping/star_align_v273a/.config.vsh.yaml delete mode 100644 target/nextflow/mapping/star_align_v273a/main.nf delete mode 100644 target/nextflow/mapping/star_align_v273a/nextflow.config delete mode 100644 target/nextflow/mapping/star_align_v273a/nextflow_params.yaml delete mode 100644 target/nextflow/mapping/star_align_v273a/nextflow_schema.json delete mode 100644 target/nextflow/mapping/star_align_v273a/setup_logger.py delete mode 100644 target/nextflow/mapping/star_build_reference/.config.vsh.yaml delete mode 100644 target/nextflow/mapping/star_build_reference/main.nf delete mode 100644 target/nextflow/mapping/star_build_reference/nextflow.config delete mode 100644 target/nextflow/mapping/star_build_reference/nextflow_params.yaml delete mode 100644 target/nextflow/mapping/star_build_reference/nextflow_schema.json delete mode 100644 target/nextflow/metadata/add_id/.config.vsh.yaml delete mode 100644 target/nextflow/metadata/add_id/main.nf delete mode 100644 target/nextflow/metadata/add_id/nextflow.config delete mode 100644 target/nextflow/metadata/add_id/nextflow_params.yaml delete mode 100644 target/nextflow/metadata/add_id/nextflow_schema.json delete mode 100644 target/nextflow/metadata/add_id/setup_logger.py delete mode 100644 target/nextflow/metadata/grep_annotation_column/.config.vsh.yaml delete mode 100644 target/nextflow/metadata/grep_annotation_column/main.nf delete mode 100644 target/nextflow/metadata/grep_annotation_column/nextflow.config delete mode 100644 target/nextflow/metadata/grep_annotation_column/nextflow_params.yaml delete mode 100644 target/nextflow/metadata/grep_annotation_column/nextflow_schema.json delete mode 100644 target/nextflow/metadata/join_csv/.config.vsh.yaml delete mode 100644 target/nextflow/metadata/join_csv/main.nf delete mode 100644 target/nextflow/metadata/join_csv/nextflow.config delete mode 100644 target/nextflow/metadata/join_csv/nextflow_params.yaml delete mode 100644 target/nextflow/metadata/join_csv/nextflow_schema.json delete mode 100644 target/nextflow/metadata/join_csv/setup_logger.py delete mode 100644 target/nextflow/metadata/join_uns_to_obs/.config.vsh.yaml delete mode 100644 target/nextflow/metadata/join_uns_to_obs/main.nf delete mode 100644 target/nextflow/metadata/join_uns_to_obs/nextflow.config delete mode 100644 target/nextflow/metadata/join_uns_to_obs/nextflow_params.yaml delete mode 100644 target/nextflow/metadata/join_uns_to_obs/nextflow_schema.json delete mode 100644 target/nextflow/metadata/join_uns_to_obs/setup_logger.py delete mode 100644 target/nextflow/metadata/move_obsm_to_obs/.config.vsh.yaml delete mode 100644 target/nextflow/metadata/move_obsm_to_obs/main.nf delete mode 100644 target/nextflow/metadata/move_obsm_to_obs/nextflow.config delete mode 100644 target/nextflow/metadata/move_obsm_to_obs/nextflow_params.yaml delete mode 100644 target/nextflow/metadata/move_obsm_to_obs/nextflow_schema.json delete mode 100644 target/nextflow/metadata/move_obsm_to_obs/setup_logger.py delete mode 100644 target/nextflow/neighbors/bbknn/.config.vsh.yaml delete mode 100644 target/nextflow/neighbors/bbknn/main.nf delete mode 100644 target/nextflow/neighbors/bbknn/nextflow.config delete mode 100644 target/nextflow/neighbors/bbknn/nextflow_params.yaml delete mode 100644 target/nextflow/neighbors/bbknn/nextflow_schema.json delete mode 100644 target/nextflow/neighbors/find_neighbors/.config.vsh.yaml delete mode 100644 target/nextflow/neighbors/find_neighbors/main.nf delete mode 100644 target/nextflow/neighbors/find_neighbors/nextflow.config delete mode 100644 target/nextflow/neighbors/find_neighbors/nextflow_params.yaml delete mode 100644 target/nextflow/neighbors/find_neighbors/nextflow_schema.json delete mode 100644 target/nextflow/neighbors/find_neighbors/setup_logger.py delete mode 100644 target/nextflow/process_10xh5/filter_10xh5/.config.vsh.yaml delete mode 100644 target/nextflow/process_10xh5/filter_10xh5/main.nf delete mode 100644 target/nextflow/process_10xh5/filter_10xh5/nextflow.config delete mode 100644 target/nextflow/process_10xh5/filter_10xh5/nextflow_params.yaml delete mode 100644 target/nextflow/process_10xh5/filter_10xh5/nextflow_schema.json delete mode 100644 target/nextflow/qc/calculate_qc_metrics/.config.vsh.yaml delete mode 100644 target/nextflow/qc/calculate_qc_metrics/main.nf delete mode 100644 target/nextflow/qc/calculate_qc_metrics/nextflow.config delete mode 100644 target/nextflow/qc/calculate_qc_metrics/nextflow_params.yaml delete mode 100644 target/nextflow/qc/calculate_qc_metrics/nextflow_schema.json delete mode 100644 target/nextflow/qc/calculate_qc_metrics/setup_logger.py delete mode 100644 target/nextflow/qc/fastqc/.config.vsh.yaml delete mode 100644 target/nextflow/qc/fastqc/main.nf delete mode 100644 target/nextflow/qc/fastqc/nextflow.config delete mode 100644 target/nextflow/qc/fastqc/nextflow_params.yaml delete mode 100644 target/nextflow/qc/fastqc/nextflow_schema.json delete mode 100644 target/nextflow/qc/multiqc/.config.vsh.yaml delete mode 100644 target/nextflow/qc/multiqc/main.nf delete mode 100644 target/nextflow/qc/multiqc/nextflow.config delete mode 100644 target/nextflow/qc/multiqc/nextflow_params.yaml delete mode 100644 target/nextflow/qc/multiqc/nextflow_schema.json delete mode 100644 target/nextflow/query/cellxgene_census/.config.vsh.yaml delete mode 100644 target/nextflow/query/cellxgene_census/main.nf delete mode 100644 target/nextflow/query/cellxgene_census/nextflow.config delete mode 100644 target/nextflow/query/cellxgene_census/nextflow_params.yaml delete mode 100644 target/nextflow/query/cellxgene_census/nextflow_schema.json delete mode 100644 target/nextflow/query/cellxgene_census/setup_logger.py delete mode 100644 target/nextflow/reference/build_bdrhap_reference/.config.vsh.yaml delete mode 100644 target/nextflow/reference/build_bdrhap_reference/main.nf delete mode 100644 target/nextflow/reference/build_bdrhap_reference/nextflow.config delete mode 100644 target/nextflow/reference/build_bdrhap_reference/nextflow_params.yaml delete mode 100644 target/nextflow/reference/build_bdrhap_reference/nextflow_schema.json delete mode 100644 target/nextflow/reference/build_cellranger_reference/.config.vsh.yaml delete mode 100644 target/nextflow/reference/build_cellranger_reference/main.nf delete mode 100644 target/nextflow/reference/build_cellranger_reference/nextflow.config delete mode 100644 target/nextflow/reference/build_cellranger_reference/nextflow_params.yaml delete mode 100644 target/nextflow/reference/build_cellranger_reference/nextflow_schema.json delete mode 100644 target/nextflow/reference/make_reference/.config.vsh.yaml delete mode 100644 target/nextflow/reference/make_reference/main.nf delete mode 100644 target/nextflow/reference/make_reference/nextflow.config delete mode 100644 target/nextflow/reference/make_reference/nextflow_params.yaml delete mode 100644 target/nextflow/reference/make_reference/nextflow_schema.json delete mode 100644 target/nextflow/report/mermaid/.config.vsh.yaml delete mode 100644 target/nextflow/report/mermaid/main.nf delete mode 100644 target/nextflow/report/mermaid/nextflow.config delete mode 100644 target/nextflow/report/mermaid/nextflow_params.yaml delete mode 100644 target/nextflow/report/mermaid/nextflow_schema.json delete mode 100644 target/nextflow/report/mermaid/puppeteer-config.json delete mode 100644 target/nextflow/transfer/publish/.config.vsh.yaml delete mode 100644 target/nextflow/transfer/publish/main.nf delete mode 100644 target/nextflow/transfer/publish/nextflow.config delete mode 100644 target/nextflow/transfer/publish/nextflow_params.yaml delete mode 100644 target/nextflow/transfer/publish/nextflow_schema.json delete mode 100644 target/nextflow/transform/clr/.config.vsh.yaml delete mode 100644 target/nextflow/transform/clr/main.nf delete mode 100644 target/nextflow/transform/clr/nextflow.config delete mode 100644 target/nextflow/transform/clr/nextflow_params.yaml delete mode 100644 target/nextflow/transform/clr/nextflow_schema.json delete mode 100644 target/nextflow/transform/delete_layer/.config.vsh.yaml delete mode 100644 target/nextflow/transform/delete_layer/compress_h5mu.py delete mode 100644 target/nextflow/transform/delete_layer/main.nf delete mode 100644 target/nextflow/transform/delete_layer/nextflow.config delete mode 100644 target/nextflow/transform/delete_layer/nextflow_params.yaml delete mode 100644 target/nextflow/transform/delete_layer/nextflow_schema.json delete mode 100644 target/nextflow/transform/delete_layer/setup_logger.py delete mode 100644 target/nextflow/transform/log1p/.config.vsh.yaml delete mode 100644 target/nextflow/transform/log1p/main.nf delete mode 100644 target/nextflow/transform/log1p/nextflow.config delete mode 100644 target/nextflow/transform/log1p/nextflow_params.yaml delete mode 100644 target/nextflow/transform/log1p/nextflow_schema.json delete mode 100644 target/nextflow/transform/log1p/setup_logger.py delete mode 100644 target/nextflow/transform/normalize_total/.config.vsh.yaml delete mode 100644 target/nextflow/transform/normalize_total/main.nf delete mode 100644 target/nextflow/transform/normalize_total/nextflow.config delete mode 100644 target/nextflow/transform/normalize_total/nextflow_params.yaml delete mode 100644 target/nextflow/transform/normalize_total/nextflow_schema.json delete mode 100644 target/nextflow/transform/normalize_total/setup_logger.py delete mode 100644 target/nextflow/transform/regress_out/.config.vsh.yaml delete mode 100644 target/nextflow/transform/regress_out/main.nf delete mode 100644 target/nextflow/transform/regress_out/nextflow.config delete mode 100644 target/nextflow/transform/regress_out/nextflow_params.yaml delete mode 100644 target/nextflow/transform/regress_out/nextflow_schema.json delete mode 100644 target/nextflow/transform/regress_out/setup_logger.py delete mode 100644 target/nextflow/transform/scale/.config.vsh.yaml delete mode 100644 target/nextflow/transform/scale/main.nf delete mode 100644 target/nextflow/transform/scale/nextflow.config delete mode 100644 target/nextflow/transform/scale/nextflow_params.yaml delete mode 100644 target/nextflow/transform/scale/nextflow_schema.json delete mode 100644 target/nextflow/transform/scale/setup_logger.py delete mode 100644 target/nextflow/velocity/scvelo/.config.vsh.yaml delete mode 100644 target/nextflow/velocity/scvelo/main.nf delete mode 100644 target/nextflow/velocity/scvelo/nextflow.config delete mode 100644 target/nextflow/velocity/scvelo/nextflow_params.yaml delete mode 100644 target/nextflow/velocity/scvelo/nextflow_schema.json delete mode 100644 target/nextflow/velocity/scvelo/setup_logger.py delete mode 100644 target/nextflow/velocity/velocyto/.config.vsh.yaml delete mode 100644 target/nextflow/velocity/velocyto/main.nf delete mode 100644 target/nextflow/velocity/velocyto/nextflow.config delete mode 100644 target/nextflow/velocity/velocyto/nextflow_params.yaml delete mode 100644 target/nextflow/velocity/velocyto/nextflow_schema.json diff --git a/target/docker/annotate/popv/.config.vsh.yaml b/target/docker/annotate/popv/.config.vsh.yaml deleted file mode 100644 index d595e6e7d5c..00000000000 --- a/target/docker/annotate/popv/.config.vsh.yaml +++ /dev/null @@ -1,346 +0,0 @@ -functionality: - name: "popv" - namespace: "annotate" - version: "0.12.3" - authors: - - name: "Matthias Beyens" - roles: - - "author" - info: - role: "Contributor" - links: - github: "MatthiasBeyens" - orcid: "0000-0003-3304-0706" - email: "matthias.beyens@gmail.com" - linkedin: "mbeyens" - organizations: - - name: "Janssen Pharmaceuticals" - href: "https://www.janssen.com" - role: "Principal Scientist" - - name: "Robrecht Cannoodt" - roles: - - "author" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - argument_groups: - - name: "Inputs" - description: "Arguments related to the input (aka query) dataset." - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input h5mu file." - info: null - example: - - "input.h5mu" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - description: "Which modality to process." - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--input_layer" - description: "Which layer to use. If no value is provided, the counts are assumed\ - \ to be in the `.X` slot. Otherwise, count data is expected to be in `.layers[input_layer]`." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--input_obs_batch" - description: "Key in obs field of input adata for batch information. If no value\ - \ is provided, batch label is assumed to be unknown." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--input_var_subset" - description: "Subset the input object with this column." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--input_obs_label" - description: "Key in obs field of input adata for label information. This is\ - \ only used for training scANVI. Unlabelled cells should be set to `\"unknown_celltype_label\"\ - `." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--unknown_celltype_label" - description: "If `input_obs_label` is specified, cells with this value will\ - \ be treated as unknown and will be predicted by the model." - info: null - default: - - "unknown" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Reference" - description: "Arguments related to the reference dataset." - arguments: - - type: "file" - name: "--reference" - description: "User-provided reference tissue. The data that will be used as\ - \ reference to call cell types." - info: null - example: - - "TS_Bladder_filtered.h5ad" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--reference_layer" - description: "Which layer to use. If no value is provided, the counts are assumed\ - \ to be in the `.X` slot. Otherwise, count data is expected to be in `.layers[reference_layer]`." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--reference_obs_label" - description: "Key in obs field of reference AnnData with cell-type information." - info: null - default: - - "cell_ontology_class" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--reference_obs_batch" - description: "Key in obs field of input adata for batch information." - info: null - default: - - "donor_assay" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Outputs" - description: "Output arguments." - arguments: - - type: "file" - name: "--output" - description: "Output h5mu file." - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Arguments" - description: "Other arguments." - arguments: - - type: "string" - name: "--methods" - description: "Methods to call cell types. By default, runs to knn_on_scvi and\ - \ scanvi." - info: null - example: - - "knn_on_scvi" - - "scanvi" - required: true - choices: - - "celltypist" - - "knn_on_bbknn" - - "knn_on_scanorama" - - "knn_on_scvi" - - "onclass" - - "rf" - - "scanvi" - - "svm" - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Performs popular major vote cell typing on single cell sequence data\ - \ using multiple algorithms. Note that this is a one-shot version of PopV." - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/annotation_test_data/" - - type: "file" - path: "resources_test/pbmc_1k_protein_v3/" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.9-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - - "git" - - "build-essential" - - "wget" - interactive: false - - type: "python" - user: false - packages: - - "scanpy~=1.9.5" - - "scvi-tools~=1.0.3" - - "popv~=0.3.2" - - "jax==0.4.10" - - "jaxlib==0.4.10" - - "ml-dtypes<0.3.0" - upgrade: true - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - upgrade: true - - type: "docker" - run: - - "cd /opt && git clone --depth 1 https://github.com/YosefLab/PopV.git && \\\n\ - \ cd PopV && git fetch --depth 1 origin tag v0.2 && git checkout v0.2\n" - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highmem" - - "highcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/annotate/popv/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/annotate/popv" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/annotate/popv/popv" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/annotate/popv/popv b/target/docker/annotate/popv/popv deleted file mode 100755 index 5d1d6c02249..00000000000 --- a/target/docker/annotate/popv/popv +++ /dev/null @@ -1,1401 +0,0 @@ -#!/usr/bin/env bash - -# popv 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Matthias Beyens (author) -# * Robrecht Cannoodt (author) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="popv" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "popv 0.12.3" - echo "" - echo "Performs popular major vote cell typing on single cell sequence data using" - echo "multiple algorithms. Note that this is a one-shot version of PopV." - echo "" - echo "Inputs:" - echo " Arguments related to the input (aka query) dataset." - echo "" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " example: input.h5mu" - echo " Input h5mu file." - echo "" - echo " --modality" - echo " type: string" - echo " default: rna" - echo " Which modality to process." - echo "" - echo " --input_layer" - echo " type: string" - echo " Which layer to use. If no value is provided, the counts are assumed to" - echo " be in the \`.X\` slot. Otherwise, count data is expected to be in" - echo " \`.layers[input_layer]\`." - echo "" - echo " --input_obs_batch" - echo " type: string" - echo " Key in obs field of input adata for batch information. If no value is" - echo " provided, batch label is assumed to be unknown." - echo "" - echo " --input_var_subset" - echo " type: string" - echo " Subset the input object with this column." - echo "" - echo " --input_obs_label" - echo " type: string" - echo " Key in obs field of input adata for label information. This is only used" - echo " for training scANVI. Unlabelled cells should be set to" - echo " \`\"unknown_celltype_label\"\`." - echo "" - echo " --unknown_celltype_label" - echo " type: string" - echo " default: unknown" - echo " If \`input_obs_label\` is specified, cells with this value will be treated" - echo " as unknown and will be predicted by the model." - echo "" - echo "Reference:" - echo " Arguments related to the reference dataset." - echo "" - echo " --reference" - echo " type: file, required parameter, file must exist" - echo " example: TS_Bladder_filtered.h5ad" - echo " User-provided reference tissue. The data that will be used as reference" - echo " to call cell types." - echo "" - echo " --reference_layer" - echo " type: string" - echo " Which layer to use. If no value is provided, the counts are assumed to" - echo " be in the \`.X\` slot. Otherwise, count data is expected to be in" - echo " \`.layers[reference_layer]\`." - echo "" - echo " --reference_obs_label" - echo " type: string" - echo " default: cell_ontology_class" - echo " Key in obs field of reference AnnData with cell-type information." - echo "" - echo " --reference_obs_batch" - echo " type: string" - echo " default: donor_assay" - echo " Key in obs field of input adata for batch information." - echo "" - echo "Outputs:" - echo " Output arguments." - echo "" - echo " --output" - echo " type: file, required parameter, output, file must exist" - echo " example: output.h5mu" - echo " Output h5mu file." - echo "" - echo " --output_compression" - echo " type: string" - echo " example: gzip" - echo " choices: [ gzip, lzf ]" - echo "" - echo "Arguments:" - echo " Other arguments." - echo "" - echo " --methods" - echo " type: string, required parameter, multiple values allowed" - echo " example: knn_on_scvi:scanvi" - echo " choices: [ celltypist, knn_on_bbknn, knn_on_scanorama, knn_on_scvi," - echo "onclass, rf, scanvi, svm ]" - echo " Methods to call cell types. By default, runs to knn_on_scvi and scanvi." -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM python:3.9-slim - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y procps git build-essential wget && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "scanpy~=1.9.5" "scvi-tools~=1.0.3" "popv~=0.3.2" "jax==0.4.10" "jaxlib==0.4.10" "ml-dtypes<0.3.0" - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" - -RUN cd /opt && git clone --depth 1 https://github.com/YosefLab/PopV.git && \ - cd PopV && git fetch --depth 1 origin tag v0.2 && git checkout v0.2 - -LABEL org.opencontainers.image.authors="Matthias Beyens, Robrecht Cannoodt" -LABEL org.opencontainers.image.description="Companion container for running component annotate popv" -LABEL org.opencontainers.image.created="2024-01-25T10:13:59Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-popv-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "popv 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -i) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality=*) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --input_layer) - [ -n "$VIASH_PAR_INPUT_LAYER" ] && ViashError Bad arguments for option \'--input_layer\': \'$VIASH_PAR_INPUT_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT_LAYER="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_layer. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input_layer=*) - [ -n "$VIASH_PAR_INPUT_LAYER" ] && ViashError Bad arguments for option \'--input_layer=*\': \'$VIASH_PAR_INPUT_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT_LAYER=$(ViashRemoveFlags "$1") - shift 1 - ;; - --input_obs_batch) - [ -n "$VIASH_PAR_INPUT_OBS_BATCH" ] && ViashError Bad arguments for option \'--input_obs_batch\': \'$VIASH_PAR_INPUT_OBS_BATCH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT_OBS_BATCH="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_obs_batch. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input_obs_batch=*) - [ -n "$VIASH_PAR_INPUT_OBS_BATCH" ] && ViashError Bad arguments for option \'--input_obs_batch=*\': \'$VIASH_PAR_INPUT_OBS_BATCH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT_OBS_BATCH=$(ViashRemoveFlags "$1") - shift 1 - ;; - --input_var_subset) - [ -n "$VIASH_PAR_INPUT_VAR_SUBSET" ] && ViashError Bad arguments for option \'--input_var_subset\': \'$VIASH_PAR_INPUT_VAR_SUBSET\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT_VAR_SUBSET="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_var_subset. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input_var_subset=*) - [ -n "$VIASH_PAR_INPUT_VAR_SUBSET" ] && ViashError Bad arguments for option \'--input_var_subset=*\': \'$VIASH_PAR_INPUT_VAR_SUBSET\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT_VAR_SUBSET=$(ViashRemoveFlags "$1") - shift 1 - ;; - --input_obs_label) - [ -n "$VIASH_PAR_INPUT_OBS_LABEL" ] && ViashError Bad arguments for option \'--input_obs_label\': \'$VIASH_PAR_INPUT_OBS_LABEL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT_OBS_LABEL="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_obs_label. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input_obs_label=*) - [ -n "$VIASH_PAR_INPUT_OBS_LABEL" ] && ViashError Bad arguments for option \'--input_obs_label=*\': \'$VIASH_PAR_INPUT_OBS_LABEL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT_OBS_LABEL=$(ViashRemoveFlags "$1") - shift 1 - ;; - --unknown_celltype_label) - [ -n "$VIASH_PAR_UNKNOWN_CELLTYPE_LABEL" ] && ViashError Bad arguments for option \'--unknown_celltype_label\': \'$VIASH_PAR_UNKNOWN_CELLTYPE_LABEL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_UNKNOWN_CELLTYPE_LABEL="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --unknown_celltype_label. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --unknown_celltype_label=*) - [ -n "$VIASH_PAR_UNKNOWN_CELLTYPE_LABEL" ] && ViashError Bad arguments for option \'--unknown_celltype_label=*\': \'$VIASH_PAR_UNKNOWN_CELLTYPE_LABEL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_UNKNOWN_CELLTYPE_LABEL=$(ViashRemoveFlags "$1") - shift 1 - ;; - --reference) - [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reference=*) - [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference=*\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --reference_layer) - [ -n "$VIASH_PAR_REFERENCE_LAYER" ] && ViashError Bad arguments for option \'--reference_layer\': \'$VIASH_PAR_REFERENCE_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE_LAYER="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference_layer. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reference_layer=*) - [ -n "$VIASH_PAR_REFERENCE_LAYER" ] && ViashError Bad arguments for option \'--reference_layer=*\': \'$VIASH_PAR_REFERENCE_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE_LAYER=$(ViashRemoveFlags "$1") - shift 1 - ;; - --reference_obs_label) - [ -n "$VIASH_PAR_REFERENCE_OBS_LABEL" ] && ViashError Bad arguments for option \'--reference_obs_label\': \'$VIASH_PAR_REFERENCE_OBS_LABEL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE_OBS_LABEL="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference_obs_label. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reference_obs_label=*) - [ -n "$VIASH_PAR_REFERENCE_OBS_LABEL" ] && ViashError Bad arguments for option \'--reference_obs_label=*\': \'$VIASH_PAR_REFERENCE_OBS_LABEL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE_OBS_LABEL=$(ViashRemoveFlags "$1") - shift 1 - ;; - --reference_obs_batch) - [ -n "$VIASH_PAR_REFERENCE_OBS_BATCH" ] && ViashError Bad arguments for option \'--reference_obs_batch\': \'$VIASH_PAR_REFERENCE_OBS_BATCH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE_OBS_BATCH="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference_obs_batch. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reference_obs_batch=*) - [ -n "$VIASH_PAR_REFERENCE_OBS_BATCH" ] && ViashError Bad arguments for option \'--reference_obs_batch=*\': \'$VIASH_PAR_REFERENCE_OBS_BATCH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE_OBS_BATCH=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output_compression) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression=*) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --methods) - if [ -z "$VIASH_PAR_METHODS" ]; then - VIASH_PAR_METHODS="$2" - else - VIASH_PAR_METHODS="$VIASH_PAR_METHODS:""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --methods. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --methods=*) - if [ -z "$VIASH_PAR_METHODS" ]; then - VIASH_PAR_METHODS=$(ViashRemoveFlags "$1") - else - VIASH_PAR_METHODS="$VIASH_PAR_METHODS:"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/annotate_popv:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/annotate_popv:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/annotate_popv:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/annotate_popv:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_REFERENCE+x} ]; then - ViashError '--reference' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_METHODS+x} ]; then - ViashError '--methods' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_MODALITY+x} ]; then - VIASH_PAR_MODALITY="rna" -fi -if [ -z ${VIASH_PAR_UNKNOWN_CELLTYPE_LABEL+x} ]; then - VIASH_PAR_UNKNOWN_CELLTYPE_LABEL="unknown" -fi -if [ -z ${VIASH_PAR_REFERENCE_OBS_LABEL+x} ]; then - VIASH_PAR_REFERENCE_OBS_LABEL="cell_ontology_class" -fi -if [ -z ${VIASH_PAR_REFERENCE_OBS_BATCH+x} ]; then - VIASH_PAR_REFERENCE_OBS_BATCH="donor_assay" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_REFERENCE" ] && [ ! -e "$VIASH_PAR_REFERENCE" ]; then - ViashError "Input file '$VIASH_PAR_REFERENCE' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then - VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then - ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -if [ ! -z "$VIASH_PAR_METHODS" ]; then - VIASH_PAR_METHODS_CHOICES=("celltypist:knn_on_bbknn:knn_on_scanorama:knn_on_scvi:onclass:rf:scanvi:svm") - IFS=':' - set -f - for val in $VIASH_PAR_METHODS; do - if ! [[ ":${VIASH_PAR_METHODS_CHOICES[*]}:" =~ ":${val}:" ]]; then - ViashError '--methods' specified value of \'${val}\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - done - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_REFERENCE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_REFERENCE")" ) - VIASH_PAR_REFERENCE=$(ViashAutodetectMount "$VIASH_PAR_REFERENCE") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/annotate_popv:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/annotate_popv:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/annotate_popv:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-popv-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -import sys -import re -import tempfile -import typing -import numpy as np -import mudata as mu -import anndata as ad -import popv - -# todo: is this still needed? -from torch.cuda import is_available as cuda_is_available -try: - from torch.backends.mps import is_available as mps_is_available -except ModuleNotFoundError: - # Older pytorch versions - # MacOS GPUs - def mps_is_available(): - return False - -# where to find the obo files -cl_obo_folder = "/opt/PopV/ontology/" - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'input_layer': $( if [ ! -z ${VIASH_PAR_INPUT_LAYER+x} ]; then echo "r'${VIASH_PAR_INPUT_LAYER//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'input_obs_batch': $( if [ ! -z ${VIASH_PAR_INPUT_OBS_BATCH+x} ]; then echo "r'${VIASH_PAR_INPUT_OBS_BATCH//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'input_var_subset': $( if [ ! -z ${VIASH_PAR_INPUT_VAR_SUBSET+x} ]; then echo "r'${VIASH_PAR_INPUT_VAR_SUBSET//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'input_obs_label': $( if [ ! -z ${VIASH_PAR_INPUT_OBS_LABEL+x} ]; then echo "r'${VIASH_PAR_INPUT_OBS_LABEL//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'unknown_celltype_label': $( if [ ! -z ${VIASH_PAR_UNKNOWN_CELLTYPE_LABEL+x} ]; then echo "r'${VIASH_PAR_UNKNOWN_CELLTYPE_LABEL//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'reference': $( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "r'${VIASH_PAR_REFERENCE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'reference_layer': $( if [ ! -z ${VIASH_PAR_REFERENCE_LAYER+x} ]; then echo "r'${VIASH_PAR_REFERENCE_LAYER//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'reference_obs_label': $( if [ ! -z ${VIASH_PAR_REFERENCE_OBS_LABEL+x} ]; then echo "r'${VIASH_PAR_REFERENCE_OBS_LABEL//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'reference_obs_batch': $( if [ ! -z ${VIASH_PAR_REFERENCE_OBS_BATCH+x} ]; then echo "r'${VIASH_PAR_REFERENCE_OBS_BATCH//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'methods': $( if [ ! -z ${VIASH_PAR_METHODS+x} ]; then echo "r'${VIASH_PAR_METHODS//\'/\'\"\'\"r\'}'.split(':')"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -## VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -use_gpu = cuda_is_available() or mps_is_available() -logger.info("GPU enabled? %s", use_gpu) - -# Helper functions -def get_X(adata: ad.AnnData, layer: typing.Optional[str], var_index: typing.Optional[str]): - """Fetch the counts data from X or a layer. Subset columns by var_index if so desired.""" - if var_index: - adata = adata[:, var_index] - if layer: - return adata.layers[layer] - else: - return adata.X -def get_obs(adata: ad.AnnData, obs_par_names): - """Subset the obs dataframe to just the columns defined by the obs_label and obs_batch.""" - obs_columns = [par[x] for x in obs_par_names if par[x]] - return adata.obs[obs_columns] -def get_var(adata: ad.AnnData, var_index: list[str]): - """Fetch the var dataframe. Subset rows by var_index if so desired.""" - return adata.var.loc[var_index] - -def main(par, meta): - assert len(par["methods"]) >= 1, "Please, specify at least one method for cell typing." - logger.info("Cell typing methods: {}".format(par["methods"])) - - ### PREPROCESSING REFERENCE ### - logger.info("### PREPROCESSING REFERENCE ###") - - # take a look at reference data - logger.info("Reading reference data '%s'", par["reference"]) - reference = ad.read_h5ad(par["reference"]) - - logger.info("Setting reference var index to Ensembl IDs") - reference.var["gene_symbol"] = list(reference.var.index) - reference.var.index = [re.sub("\\\\.[0-9]+\$", "", s) for s in reference.var["ensemblid"]] - - logger.info("Detect number of samples per label") - min_celltype_size = np.min(reference.obs.groupby(par["reference_obs_batch"]).size()) - n_samples_per_label = np.max((min_celltype_size, 100)) - - ### PREPROCESSING INPUT ### - logger.info("### PREPROCESSING INPUT ###") - logger.info("Reading '%s'", par["input"]) - input = mu.read_h5mu(par["input"]) - input_modality = input.mod[par["modality"]] - - # subset with var column - if par["input_var_subset"]: - logger.info("Subset input with .var['%s']", par["input_var_subset"]) - assert par["input_var_subset"] in input_modality.var, f"--input_var_subset='{par['input_var_subset']}' needs to be a column in .var" - input_modality = input_modality[:,input_modality.var[par["input_var_subset"]]] - - ### ALIGN REFERENCE AND INPUT ### - logger.info("### ALIGN REFERENCE AND INPUT ###") - - logger.info("Detecting common vars based on ensembl ids") - common_ens_ids = list(set(reference.var.index).intersection(set(input_modality.var.index))) - - logger.info(" reference n_vars: %i", reference.n_vars) - logger.info(" input n_vars: %i", input_modality.n_vars) - logger.info(" intersect n_vars: %i", len(common_ens_ids)) - assert len(common_ens_ids) >= 100, "The intersection of genes is too small." - - # subset input objects to make sure popv is using the data we expect - input_modality = ad.AnnData( - X = get_X(input_modality, par["input_layer"], common_ens_ids), - obs = get_obs(input_modality, ["input_obs_label", "input_obs_batch"]), - var = get_var(input_modality, common_ens_ids) - ) - reference = ad.AnnData( - X = get_X(reference, par["reference_layer"], common_ens_ids), - obs = get_obs(reference, ["reference_obs_label", "reference_obs_batch"]), - var = get_var(reference, common_ens_ids) - ) - - # remove layers that - - ### ALIGN REFERENCE AND INPUT ### - logger.info("### ALIGN REFERENCE AND INPUT ###") - - with tempfile.TemporaryDirectory(prefix="popv-", dir=meta["temp_dir"]) as temp_dir: - logger.info("Run PopV processing") - pq = popv.preprocessing.Process_Query( - # input - query_adata=input_modality, - query_labels_key=par["input_obs_label"], - query_batch_key=par["input_obs_batch"], - query_layers_key=None, # this is taken care of by subset - # reference - ref_adata=reference, - ref_labels_key=par["reference_obs_label"], - ref_batch_key=par["reference_obs_batch"], - # options - unknown_celltype_label=par["unknown_celltype_label"], - n_samples_per_label=n_samples_per_label, - # pretrained model - # Might need to be parameterized at some point - prediction_mode="retrain", - pretrained_scvi_path=None, - # outputs - # Might need to be parameterized at some point - save_path_trained_models=temp_dir, - # hardcoded values - cl_obo_folder=cl_obo_folder, - use_gpu=use_gpu - ) - method_kwargs = {} - if 'scanorama' in par['methods']: - method_kwargs['scanorama'] = {'approx': False} - logger.info("Annotate data") - popv.annotation.annotate_data( - adata=pq.adata, - methods=par["methods"], - methods_kwargs=method_kwargs - ) - - popv_input = pq.adata[input_modality.obs_names] - - # select columns starting with "popv_" - popv_obs_cols = popv_input.obs.columns[popv_input.obs.columns.str.startswith("popv_")] - - # create new data frame with selected columns - df_popv = popv_input.obs[popv_obs_cols] - - # remove prefix from column names - df_popv.columns = df_popv.columns.str.replace("popv_", "") - - # store output in mudata .obsm - input.mod[par["modality"]].obsm["popv_output"] = df_popv - - # copy important output in mudata .obs - for col in ["popv_prediction"]: - if col in popv_input.obs.columns: - input.mod[par["modality"]].obs[col] = popv_input.obs[col] - - # code to explore how the output differs from the original - # for attr in ["obs", "var", "uns", "obsm", "layers", "obsp"]: - # old_keys = set(getattr(pq_adata_orig, attr).keys()) - # new_keys = set(getattr(pq.adata, attr).keys()) - # diff_keys = list(new_keys.difference(old_keys)) - # diff_keys.sort() - # print(f"{attr}:", flush=True) - # for key in diff_keys: - # print(f" {key}", flush=True) - - # write output - logger.info("Writing %s", par["output"]) - input.write_h5mu(par["output"], compression=par["output_compression"]) - -if __name__ == "__main__": - main(par, meta) -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_REFERENCE" ]; then - VIASH_PAR_REFERENCE=$(ViashStripAutomount "$VIASH_PAR_REFERENCE") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/annotate/popv/setup_logger.py b/target/docker/annotate/popv/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/docker/annotate/popv/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/docker/cluster/leiden/.config.vsh.yaml b/target/docker/cluster/leiden/.config.vsh.yaml deleted file mode 100644 index b108de8f22e..00000000000 --- a/target/docker/cluster/leiden/.config.vsh.yaml +++ /dev/null @@ -1,219 +0,0 @@ -functionality: - name: "leiden" - namespace: "cluster" - version: "0.12.3" - authors: - - name: "Dries De Maeyer" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "ddemaeyer@gmail.com" - github: "ddemaeyer" - linkedin: "dries-de-maeyer-b46a814" - organizations: - - name: "Janssen Pharmaceuticals" - href: "https://www.janssen.com" - role: "Principal Scientist" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input file." - info: null - example: - - "input.h5mu" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obsp_connectivities" - description: "In which .obsp slot the neighbor connectivities can be found." - info: null - default: - - "connectivities" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output file." - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obsm_name" - description: "Name of the .obsm key under which to add the cluster labels.\nThe\ - \ name of the columns in the matrix will correspond to the resolutions.\n" - info: null - default: - - "leiden" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--resolution" - description: "A parameter value controlling the coarseness of the clustering.\ - \ Higher values lead to more clusters.\nMultiple values will result in clustering\ - \ being performed multiple times.\n" - info: null - default: - - 1.0 - required: true - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Cluster cells using the Leiden algorithm [Traag18] implemented in\ - \ the Scanpy framework [Wolf18]. \nLeiden is an improved version of the Louvain\ - \ algorithm [Blondel08]. \nIt has been proposed for single-cell analysis by [Levine15].\ - \ \nThis requires having ran `neighbors/find_neighbors` or `neighbors/bbknn` first.\n\ - \nBlondel08: Blondel et al. (2008), Fast unfolding of communities in large networks,\ - \ J. Stat. Mech. \nLevine15: Levine et al. (2015), Data-Driven Phenotypic Dissection\ - \ of AML Reveals Progenitor-like Cells that Correlate with Prognosis, Cell. \n\ - Traag18: Traag et al. (2018), From Louvain to Leiden: guaranteeing well-connected\ - \ communities arXiv. \nWolf18: Wolf et al. (2018), Scanpy: large-scale single-cell\ - \ gene expression data analysis, Genome Biology. \n" - test_resources: - - type: "python_script" - path: "run_test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.8-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "cmake" - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "scanpy~=1.9.5" - - "leidenalg~=0.8.9" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highcpu" - - "midmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/cluster/leiden/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/cluster/leiden" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/cluster/leiden/leiden" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/cluster/leiden/leiden b/target/docker/cluster/leiden/leiden deleted file mode 100755 index d8b5d51ec91..00000000000 --- a/target/docker/cluster/leiden/leiden +++ /dev/null @@ -1,1115 +0,0 @@ -#!/usr/bin/env bash - -# leiden 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Dries De Maeyer (maintainer) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="leiden" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "leiden 0.12.3" - echo "" - echo "Cluster cells using the Leiden algorithm [Traag18] implemented in the Scanpy" - echo "framework [Wolf18]." - echo "Leiden is an improved version of the Louvain algorithm [Blondel08]." - echo "It has been proposed for single-cell analysis by [Levine15]." - echo "This requires having ran \`neighbors/find_neighbors\` or \`neighbors/bbknn\` first." - echo "" - echo "Blondel08: Blondel et al. (2008), Fast unfolding of communities in large" - echo "networks, J. Stat. Mech." - echo "Levine15: Levine et al. (2015), Data-Driven Phenotypic Dissection of AML Reveals" - echo "Progenitor-like Cells that Correlate with Prognosis, Cell." - echo "Traag18: Traag et al. (2018), From Louvain to Leiden: guaranteeing" - echo "well-connected communities arXiv." - echo "Wolf18: Wolf et al. (2018), Scanpy: large-scale single-cell gene expression data" - echo "analysis, Genome Biology." - echo "" - echo "Arguments:" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " example: input.h5mu" - echo " Input file." - echo "" - echo " --modality" - echo " type: string" - echo " default: rna" - echo "" - echo " --obsp_connectivities" - echo " type: string" - echo " default: connectivities" - echo " In which .obsp slot the neighbor connectivities can be found." - echo "" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " example: output.h5mu" - echo " Output file." - echo "" - echo " --output_compression" - echo " type: string" - echo " example: gzip" - echo " choices: [ gzip, lzf ]" - echo "" - echo " --obsm_name" - echo " type: string" - echo " default: leiden" - echo " Name of the .obsm key under which to add the cluster labels." - echo " The name of the columns in the matrix will correspond to the" - echo " resolutions." - echo "" - echo " --resolution" - echo " type: double, required parameter, multiple values allowed" - echo " default: 1.0" - echo " A parameter value controlling the coarseness of the clustering. Higher" - echo " values lead to more clusters." - echo " Multiple values will result in clustering being performed multiple" - echo " times." -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM python:3.8-slim - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y cmake procps && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "scanpy~=1.9.5" "leidenalg~=0.8.9" - -LABEL org.opencontainers.image.authors="Dries De Maeyer" -LABEL org.opencontainers.image.description="Companion container for running component cluster leiden" -LABEL org.opencontainers.image.created="2024-01-25T10:13:59Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-leiden-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "leiden 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -i) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality=*) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --obsp_connectivities) - [ -n "$VIASH_PAR_OBSP_CONNECTIVITIES" ] && ViashError Bad arguments for option \'--obsp_connectivities\': \'$VIASH_PAR_OBSP_CONNECTIVITIES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBSP_CONNECTIVITIES="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obsp_connectivities. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obsp_connectivities=*) - [ -n "$VIASH_PAR_OBSP_CONNECTIVITIES" ] && ViashError Bad arguments for option \'--obsp_connectivities=*\': \'$VIASH_PAR_OBSP_CONNECTIVITIES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBSP_CONNECTIVITIES=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression=*) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --obsm_name) - [ -n "$VIASH_PAR_OBSM_NAME" ] && ViashError Bad arguments for option \'--obsm_name\': \'$VIASH_PAR_OBSM_NAME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBSM_NAME="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obsm_name. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obsm_name=*) - [ -n "$VIASH_PAR_OBSM_NAME" ] && ViashError Bad arguments for option \'--obsm_name=*\': \'$VIASH_PAR_OBSM_NAME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBSM_NAME=$(ViashRemoveFlags "$1") - shift 1 - ;; - --resolution) - if [ -z "$VIASH_PAR_RESOLUTION" ]; then - VIASH_PAR_RESOLUTION="$2" - else - VIASH_PAR_RESOLUTION="$VIASH_PAR_RESOLUTION:""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --resolution. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --resolution=*) - if [ -z "$VIASH_PAR_RESOLUTION" ]; then - VIASH_PAR_RESOLUTION=$(ViashRemoveFlags "$1") - else - VIASH_PAR_RESOLUTION="$VIASH_PAR_RESOLUTION:"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/cluster_leiden:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/cluster_leiden:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/cluster_leiden:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/cluster_leiden:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_RESOLUTION+x} ]; then - ViashError '--resolution' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_MODALITY+x} ]; then - VIASH_PAR_MODALITY="rna" -fi -if [ -z ${VIASH_PAR_OBSP_CONNECTIVITIES+x} ]; then - VIASH_PAR_OBSP_CONNECTIVITIES="connectivities" -fi -if [ -z ${VIASH_PAR_OBSM_NAME+x} ]; then - VIASH_PAR_OBSM_NAME="leiden" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [ -n "$VIASH_PAR_RESOLUTION" ]; then - IFS=':' - set -f - for val in $VIASH_PAR_RESOLUTION; do - if ! [[ "${val}" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--resolution' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi - done - set +f - unset IFS -fi - -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then - VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then - ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/cluster_leiden:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/cluster_leiden:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/cluster_leiden:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-leiden-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -import sys -import mudata as mu -import pandas as pd -import scanpy as sc - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'obsp_connectivities': $( if [ ! -z ${VIASH_PAR_OBSP_CONNECTIVITIES+x} ]; then echo "r'${VIASH_PAR_OBSP_CONNECTIVITIES//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'obsm_name': $( if [ ! -z ${VIASH_PAR_OBSM_NAME+x} ]; then echo "r'${VIASH_PAR_OBSM_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resolution': $( if [ ! -z ${VIASH_PAR_RESOLUTION+x} ]; then echo "list(map(float, r'${VIASH_PAR_RESOLUTION//\'/\'\"\'\"r\'}'.split(':')))"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -## VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -logger.info("Reading %s.", par["input"]) -mdata = mu.read_h5mu(par["input"]) - - -def run_single_resolution(adata, resolution): - adata_out = sc.tl.leiden( - adata, - resolution=resolution, - key_added=str(resolution), - obsp=par['obsp_connectivities'], - copy=True - ) - return adata_out.obs[str(resolution)] - -logger.info("Processing modality '%s'.", par['modality']) -data = mdata.mod[par['modality']] -results = {str(resolution): run_single_resolution(data, resolution) for resolution in par["resolution"]} -data.obsm[par["obsm_name"]] = pd.DataFrame(results) -logger.info("Writing to %s.", par["output"]) -mdata.write_h5mu(filename=par["output"], compression=par["output_compression"]) -logger.info("Finished.") -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/cluster/leiden/setup_logger.py b/target/docker/cluster/leiden/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/docker/cluster/leiden/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/docker/compression/compress_h5mu/.config.vsh.yaml b/target/docker/compression/compress_h5mu/.config.vsh.yaml deleted file mode 100644 index 82c3f368ea4..00000000000 --- a/target/docker/compression/compress_h5mu/.config.vsh.yaml +++ /dev/null @@ -1,167 +0,0 @@ -functionality: - name: "compress_h5mu" - namespace: "compression" - version: "0.12.3" - authors: - - name: "Dries Schaumont" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Path to the input .h5mu." - info: null - example: - - "sample_path" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - description: "location of output file." - info: null - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--compression" - description: "Compression type." - info: null - default: - - "gzip" - required: false - choices: - - "lzf" - - "gzip" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "../../utils/compress_h5mu.py" - description: "Compress a MuData file. \n" - test_resources: - - type: "python_script" - path: "run_test.py" - is_executable: true - - type: "file" - path: "resources_test/concat_test_data/e18_mouse_brain_fresh_5k_filtered_feature_bc_matrix_subset_unique_obs.h5mu" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "native" - id: "native" -- type: "nextflow" - id: "nextflow" - directives: - label: - - "singlecpu" - - "lowmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/compression/compress_h5mu/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/compression/compress_h5mu" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/compression/compress_h5mu/compress_h5mu" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/compression/compress_h5mu/compress_h5mu b/target/docker/compression/compress_h5mu/compress_h5mu deleted file mode 100755 index e4f20ce9654..00000000000 --- a/target/docker/compression/compress_h5mu/compress_h5mu +++ /dev/null @@ -1,1008 +0,0 @@ -#!/usr/bin/env bash - -# compress_h5mu 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Dries Schaumont (maintainer) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="compress_h5mu" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "compress_h5mu 0.12.3" - echo "" - echo "Compress a MuData file." - echo "" - echo "Arguments:" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " example: sample_path" - echo " Path to the input .h5mu." - echo "" - echo " --output" - echo " type: file, required parameter, output, file must exist" - echo " location of output file." - echo "" - echo " --compression" - echo " type: string" - echo " default: gzip" - echo " choices: [ lzf, gzip ]" - echo " Compression type." -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM python:3.10-slim - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" - -LABEL org.opencontainers.image.authors="Dries Schaumont" -LABEL org.opencontainers.image.description="Companion container for running component compression compress_h5mu" -LABEL org.opencontainers.image.created="2024-01-25T10:13:59Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-compress_h5mu-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "compress_h5mu 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -i) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --compression) - [ -n "$VIASH_PAR_COMPRESSION" ] && ViashError Bad arguments for option \'--compression\': \'$VIASH_PAR_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --compression=*) - [ -n "$VIASH_PAR_COMPRESSION" ] && ViashError Bad arguments for option \'--compression=*\': \'$VIASH_PAR_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/compression_compress_h5mu:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/compression_compress_h5mu:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/compression_compress_h5mu:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/compression_compress_h5mu:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_COMPRESSION+x} ]; then - VIASH_PAR_COMPRESSION="gzip" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_COMPRESSION" ]; then - VIASH_PAR_COMPRESSION_CHOICES=("lzf:gzip") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_COMPRESSION:" ]]; then - ViashError '--compression' specified value of \'$VIASH_PAR_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/compression_compress_h5mu:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/compression_compress_h5mu:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/compression_compress_h5mu:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-compress_h5mu-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -import sys -### VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'compression': $( if [ ! -z ${VIASH_PAR_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -### VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND compress_h5mu -# reason: resources aren't available when using Nextflow fusion -# from compress_h5mu import compress_h5mu -from h5py import File as H5File -from h5py import Group, Dataset -from pathlib import Path -from typing import Union, Literal -from functools import partial - - -def compress_h5mu(input_path: Union[str, Path], - output_path: Union[str, Path], - compression: Union[Literal['gzip'], Literal['lzf']]): - input_path, output_path = str(input_path), str(output_path) - - def copy_attributes(in_object, out_object): - for key, value in in_object.attrs.items(): - out_object.attrs[key] = value - - def visit_path(output_h5: H5File, - compression: Union[Literal['gzip'], Literal['lzf']], - name: str, object: Union[Group, Dataset]): - if isinstance(object, Group): - new_group = output_h5.create_group(name) - copy_attributes(object, new_group) - elif isinstance(object, Dataset): - # Compression only works for non-scalar Dataset objects - # Scalar objects dont have a shape defined - if not object.compression and object.shape not in [None, ()]: - new_dataset = output_h5.create_dataset(name, data=object, compression=compression) - copy_attributes(object, new_dataset) - else: - output_h5.copy(object, name) - else: - raise NotImplementedError(f"Could not copy element {name}, " - f"type has not been implemented yet: {type(object)}") - - with H5File(input_path, 'r') as input_h5, H5File(output_path, 'w', userblock_size=512) as output_h5: - copy_attributes(input_h5, output_h5) - input_h5.visititems(partial(visit_path, output_h5, compression)) - - with open(input_path, "rb") as input_bytes: - # Mudata puts metadata like this in the first 512 bytes: - # MuData (format-version=0.1.0;creator=muon;creator-version=0.2.0) - # See mudata/_core/io.py, read_h5mu() function - starting_metadata = input_bytes.read(100) - # The metadata is padded with extra null bytes up until 512 bytes - truncate_location = starting_metadata.find(b"\\x00") - starting_metadata = starting_metadata[:truncate_location] - with open(output_path, "br+") as f: - nbytes = f.write(starting_metadata) - f.write(b"\\0" * (512 - nbytes)) -# END TEMPORARY WORKAROUND compress_h5mu - -if __name__ == "__main__": - compress_h5mu(par["input"], par["output"], compression=par["compression"]) -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/compression/compress_h5mu/compress_h5mu.py b/target/docker/compression/compress_h5mu/compress_h5mu.py deleted file mode 100644 index 9d92395a573..00000000000 --- a/target/docker/compression/compress_h5mu/compress_h5mu.py +++ /dev/null @@ -1,49 +0,0 @@ -from h5py import File as H5File -from h5py import Group, Dataset -from pathlib import Path -from typing import Union, Literal -from functools import partial - - -def compress_h5mu(input_path: Union[str, Path], - output_path: Union[str, Path], - compression: Union[Literal['gzip'], Literal['lzf']]): - input_path, output_path = str(input_path), str(output_path) - - def copy_attributes(in_object, out_object): - for key, value in in_object.attrs.items(): - out_object.attrs[key] = value - - def visit_path(output_h5: H5File, - compression: Union[Literal['gzip'], Literal['lzf']], - name: str, object: Union[Group, Dataset]): - if isinstance(object, Group): - new_group = output_h5.create_group(name) - copy_attributes(object, new_group) - elif isinstance(object, Dataset): - # Compression only works for non-scalar Dataset objects - # Scalar objects dont have a shape defined - if not object.compression and object.shape not in [None, ()]: - new_dataset = output_h5.create_dataset(name, data=object, compression=compression) - copy_attributes(object, new_dataset) - else: - output_h5.copy(object, name) - else: - raise NotImplementedError(f"Could not copy element {name}, " - f"type has not been implemented yet: {type(object)}") - - with H5File(input_path, 'r') as input_h5, H5File(output_path, 'w', userblock_size=512) as output_h5: - copy_attributes(input_h5, output_h5) - input_h5.visititems(partial(visit_path, output_h5, compression)) - - with open(input_path, "rb") as input_bytes: - # Mudata puts metadata like this in the first 512 bytes: - # MuData (format-version=0.1.0;creator=muon;creator-version=0.2.0) - # See mudata/_core/io.py, read_h5mu() function - starting_metadata = input_bytes.read(100) - # The metadata is padded with extra null bytes up until 512 bytes - truncate_location = starting_metadata.find(b"\x00") - starting_metadata = starting_metadata[:truncate_location] - with open(output_path, "br+") as f: - nbytes = f.write(starting_metadata) - f.write(b"\0" * (512 - nbytes)) diff --git a/target/docker/compression/tar_extract/.config.vsh.yaml b/target/docker/compression/tar_extract/.config.vsh.yaml deleted file mode 100644 index e6ea892a4ca..00000000000 --- a/target/docker/compression/tar_extract/.config.vsh.yaml +++ /dev/null @@ -1,106 +0,0 @@ -functionality: - name: "tar_extract" - namespace: "compression" - version: "0.12.3" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input file" - info: null - example: - - "input.tar.gz" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Folder to restore file(s) to." - info: null - example: - - "output_folder" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--strip_components" - alternatives: - - "-s" - description: "Strip this amount of leading components from file names on extraction.\ - \ For example, to extract only 'myfile.txt' from an archive containing the structure\ - \ `this/goes/deep/myfile.txt', use 3 to strip 'this/goes/deep/'." - info: null - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--exclude" - alternatives: - - "-e" - description: "Prevents any file or member whose name matches the shell wildcard\ - \ (pattern) from being extracted." - info: null - example: - - "docs/figures" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "bash_script" - path: "script.sh" - is_executable: true - description: "Extract files from a tar archive" - test_resources: - - type: "bash_script" - path: "test.sh" - is_executable: true - - type: "file" - path: "../../../LICENSE" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "native" - id: "native" -- type: "docker" - id: "docker" - image: "ubuntu:latest" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - entrypoint: [] - cmd: null -info: - config: "/home/runner/work/openpipeline/openpipeline/src/compression/tar_extract/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/compression/tar_extract" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/compression/tar_extract/tar_extract" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/compression/tar_extract/tar_extract b/target/docker/compression/tar_extract/tar_extract deleted file mode 100755 index 6ae8af87aee..00000000000 --- a/target/docker/compression/tar_extract/tar_extract +++ /dev/null @@ -1,978 +0,0 @@ -#!/usr/bin/env bash - -# tar_extract 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="tar_extract" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "tar_extract 0.12.3" - echo "" - echo "Extract files from a tar archive" - echo "" - echo "Arguments:" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " example: input.tar.gz" - echo " Input file" - echo "" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " example: output_folder" - echo " Folder to restore file(s) to." - echo "" - echo " -s, --strip_components" - echo " type: integer" - echo " example: 1" - echo " Strip this amount of leading components from file names on extraction." - echo " For example, to extract only 'myfile.txt' from an archive containing the" - echo " structure \`this/goes/deep/myfile.txt', use 3 to strip 'this/goes/deep/'." - echo "" - echo " -e, --exclude" - echo " type: string" - echo " example: docs/figures" - echo " Prevents any file or member whose name matches the shell wildcard" - echo " (pattern) from being extracted." -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM ubuntu:latest - -ENTRYPOINT [] - - -RUN : -LABEL org.opencontainers.image.description="Companion container for running component compression tar_extract" -LABEL org.opencontainers.image.created="2024-01-25T10:13:59Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-tar_extract-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "tar_extract 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -i) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --strip_components) - [ -n "$VIASH_PAR_STRIP_COMPONENTS" ] && ViashError Bad arguments for option \'--strip_components\': \'$VIASH_PAR_STRIP_COMPONENTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_STRIP_COMPONENTS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --strip_components. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --strip_components=*) - [ -n "$VIASH_PAR_STRIP_COMPONENTS" ] && ViashError Bad arguments for option \'--strip_components=*\': \'$VIASH_PAR_STRIP_COMPONENTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_STRIP_COMPONENTS=$(ViashRemoveFlags "$1") - shift 1 - ;; - -s) - [ -n "$VIASH_PAR_STRIP_COMPONENTS" ] && ViashError Bad arguments for option \'-s\': \'$VIASH_PAR_STRIP_COMPONENTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_STRIP_COMPONENTS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -s. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --exclude) - [ -n "$VIASH_PAR_EXCLUDE" ] && ViashError Bad arguments for option \'--exclude\': \'$VIASH_PAR_EXCLUDE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EXCLUDE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --exclude. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --exclude=*) - [ -n "$VIASH_PAR_EXCLUDE" ] && ViashError Bad arguments for option \'--exclude=*\': \'$VIASH_PAR_EXCLUDE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EXCLUDE=$(ViashRemoveFlags "$1") - shift 1 - ;; - -e) - [ -n "$VIASH_PAR_EXCLUDE" ] && ViashError Bad arguments for option \'-e\': \'$VIASH_PAR_EXCLUDE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EXCLUDE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -e. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/compression_tar_extract:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/compression_tar_extract:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/compression_tar_extract:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/compression_tar_extract:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_STRIP_COMPONENTS" ]]; then - if ! [[ "$VIASH_PAR_STRIP_COMPONENTS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--strip_components' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/compression_tar_extract:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/compression_tar_extract:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/compression_tar_extract:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-tar_extract-XXXXXX").sh -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -#!/usr/bin/env bash - -## VIASH START -# The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) -$( if [ ! -z ${VIASH_PAR_STRIP_COMPONENTS+x} ]; then echo "${VIASH_PAR_STRIP_COMPONENTS}" | sed "s#'#'\"'\"'#g;s#.*#par_strip_components='&'#" ; else echo "# par_strip_components="; fi ) -$( if [ ! -z ${VIASH_PAR_EXCLUDE+x} ]; then echo "${VIASH_PAR_EXCLUDE}" | sed "s#'#'\"'\"'#g;s#.*#par_exclude='&'#" ; else echo "# par_exclude="; fi ) -$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) -$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) -$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) -$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) -$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) -$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) - -## VIASH END - -extra_params=() -mkdir -p \$par_output # Create output directory if it doesn't exist already - -if [ "\$par_strip_components" != "" ]; then - extra_params+=("--strip-components=\$par_strip_components") -fi - -if [ "\$par_exclude" != "" ]; then - extra_params+=("--exclude=\$par_exclude") -fi - -echo "Extracting \$par_input to \$par_output..." -echo "" -tar "\${extra_params[@]}" -xvf "\$par_input" -C "\$par_output" -VIASHMAIN -bash "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/convert/from_10xh5_to_h5mu/.config.vsh.yaml b/target/docker/convert/from_10xh5_to_h5mu/.config.vsh.yaml deleted file mode 100644 index 253211db1ac..00000000000 --- a/target/docker/convert/from_10xh5_to_h5mu/.config.vsh.yaml +++ /dev/null @@ -1,272 +0,0 @@ -functionality: - name: "from_10xh5_to_h5mu" - namespace: "convert" - version: "0.12.3" - authors: - - name: "Robrecht Cannoodt" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - argument_groups: - - name: "Inputs" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "A 10x h5 file as generated by Cell Ranger." - info: null - example: - - "raw_feature_bc_matrix.h5" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--input_metrics_summary" - description: "A metrics summary csv file as generated by Cell Ranger." - info: null - example: - - "metrics_cellranger.h5" - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Outputs" - arguments: - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output h5mu file." - info: - slots: - mod: - - name: "rna" - required: true - description: "Gene expression counts." - slots: - var: - - name: "gene_symbol" - type: "string" - description: "Identification of the gene." - required: true - - name: "feature_types" - type: "string" - description: "The full name of the modality." - required: true - - name: "genome" - type: "string" - description: "Reference that was used to generate the data." - required: true - - name: "prot" - required: false - description: "Protein abundancy" - slots: - var: - - name: "gene_symbol" - type: "string" - description: "Identification of the gene." - required: true - - name: "feature_types" - type: "string" - description: "The full name of the modality." - required: true - - name: "genome" - type: "string" - description: "Reference that was used to generate the data." - required: true - - name: "vdj" - required: false - description: "VDJ transcript counts" - slots: - var: - - name: "gene_symbol" - type: "string" - description: "Identification of the gene." - required: true - - name: "feature_types" - type: "string" - description: "The full name of the modality." - required: true - - name: "genome" - type: "string" - description: "Reference that was used to generate the data." - required: true - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--uns_metrics" - description: "Name of the .uns slot under which to QC metrics (if any)." - info: null - default: - - "metrics_cellranger" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Arguments" - arguments: - - type: "integer" - name: "--min_genes" - description: "Minimum number of counts required for a cell to pass filtering." - info: null - example: - - 100 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--min_counts" - description: "Minimum number of genes expressed required for a cell to pass\ - \ filtering." - info: null - example: - - 1000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Converts a 10x h5 into an h5mu file.\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "scanpy~=1.9.5" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "lowmem" - - "singlecpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/convert/from_10xh5_to_h5mu/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/convert/from_10xh5_to_h5mu" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/convert/from_10xh5_to_h5mu/from_10xh5_to_h5mu" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/convert/from_10xh5_to_h5mu/from_10xh5_to_h5mu b/target/docker/convert/from_10xh5_to_h5mu/from_10xh5_to_h5mu deleted file mode 100755 index bd646a2b95b..00000000000 --- a/target/docker/convert/from_10xh5_to_h5mu/from_10xh5_to_h5mu +++ /dev/null @@ -1,1117 +0,0 @@ -#!/usr/bin/env bash - -# from_10xh5_to_h5mu 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Robrecht Cannoodt (maintainer) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="from_10xh5_to_h5mu" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "from_10xh5_to_h5mu 0.12.3" - echo "" - echo "Converts a 10x h5 into an h5mu file." - echo "" - echo "Inputs:" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " example: raw_feature_bc_matrix.h5" - echo " A 10x h5 file as generated by Cell Ranger." - echo "" - echo " --input_metrics_summary" - echo " type: file, file must exist" - echo " example: metrics_cellranger.h5" - echo " A metrics summary csv file as generated by Cell Ranger." - echo "" - echo "Outputs:" - echo " -o, --output" - echo " type: file, output, file must exist" - echo " example: output.h5mu" - echo " Output h5mu file." - echo "" - echo " --output_compression" - echo " type: string" - echo " example: gzip" - echo " choices: [ gzip, lzf ]" - echo "" - echo " --uns_metrics" - echo " type: string" - echo " default: metrics_cellranger" - echo " Name of the .uns slot under which to QC metrics (if any)." - echo "" - echo "Arguments:" - echo " --min_genes" - echo " type: integer" - echo " example: 100" - echo " Minimum number of counts required for a cell to pass filtering." - echo "" - echo " --min_counts" - echo " type: integer" - echo " example: 1000" - echo " Minimum number of genes expressed required for a cell to pass filtering." -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM python:3.10-slim - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "scanpy~=1.9.5" - -LABEL org.opencontainers.image.authors="Robrecht Cannoodt" -LABEL org.opencontainers.image.description="Companion container for running component convert from_10xh5_to_h5mu" -LABEL org.opencontainers.image.created="2024-01-25T10:13:56Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-from_10xh5_to_h5mu-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "from_10xh5_to_h5mu 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -i) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input_metrics_summary) - [ -n "$VIASH_PAR_INPUT_METRICS_SUMMARY" ] && ViashError Bad arguments for option \'--input_metrics_summary\': \'$VIASH_PAR_INPUT_METRICS_SUMMARY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT_METRICS_SUMMARY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_metrics_summary. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input_metrics_summary=*) - [ -n "$VIASH_PAR_INPUT_METRICS_SUMMARY" ] && ViashError Bad arguments for option \'--input_metrics_summary=*\': \'$VIASH_PAR_INPUT_METRICS_SUMMARY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT_METRICS_SUMMARY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression=*) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --uns_metrics) - [ -n "$VIASH_PAR_UNS_METRICS" ] && ViashError Bad arguments for option \'--uns_metrics\': \'$VIASH_PAR_UNS_METRICS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_UNS_METRICS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --uns_metrics. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --uns_metrics=*) - [ -n "$VIASH_PAR_UNS_METRICS" ] && ViashError Bad arguments for option \'--uns_metrics=*\': \'$VIASH_PAR_UNS_METRICS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_UNS_METRICS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --min_genes) - [ -n "$VIASH_PAR_MIN_GENES" ] && ViashError Bad arguments for option \'--min_genes\': \'$VIASH_PAR_MIN_GENES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_GENES="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_genes. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --min_genes=*) - [ -n "$VIASH_PAR_MIN_GENES" ] && ViashError Bad arguments for option \'--min_genes=*\': \'$VIASH_PAR_MIN_GENES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_GENES=$(ViashRemoveFlags "$1") - shift 1 - ;; - --min_counts) - [ -n "$VIASH_PAR_MIN_COUNTS" ] && ViashError Bad arguments for option \'--min_counts\': \'$VIASH_PAR_MIN_COUNTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_COUNTS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_counts. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --min_counts=*) - [ -n "$VIASH_PAR_MIN_COUNTS" ] && ViashError Bad arguments for option \'--min_counts=*\': \'$VIASH_PAR_MIN_COUNTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_COUNTS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/convert_from_10xh5_to_h5mu:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/convert_from_10xh5_to_h5mu:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/convert_from_10xh5_to_h5mu:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/convert_from_10xh5_to_h5mu:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_UNS_METRICS+x} ]; then - VIASH_PAR_UNS_METRICS="metrics_cellranger" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_INPUT_METRICS_SUMMARY" ] && [ ! -e "$VIASH_PAR_INPUT_METRICS_SUMMARY" ]; then - ViashError "Input file '$VIASH_PAR_INPUT_METRICS_SUMMARY' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_MIN_GENES" ]]; then - if ! [[ "$VIASH_PAR_MIN_GENES" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--min_genes' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_MIN_COUNTS" ]]; then - if ! [[ "$VIASH_PAR_MIN_COUNTS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--min_counts' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then - VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then - ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_INPUT_METRICS_SUMMARY" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT_METRICS_SUMMARY")" ) - VIASH_PAR_INPUT_METRICS_SUMMARY=$(ViashAutodetectMount "$VIASH_PAR_INPUT_METRICS_SUMMARY") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/convert_from_10xh5_to_h5mu:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/convert_from_10xh5_to_h5mu:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/convert_from_10xh5_to_h5mu:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-from_10xh5_to_h5mu-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -import mudata -import scanpy as sc -import sys -import pandas as pd - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'input_metrics_summary': $( if [ ! -z ${VIASH_PAR_INPUT_METRICS_SUMMARY+x} ]; then echo "r'${VIASH_PAR_INPUT_METRICS_SUMMARY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'uns_metrics': $( if [ ! -z ${VIASH_PAR_UNS_METRICS+x} ]; then echo "r'${VIASH_PAR_UNS_METRICS//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'min_genes': $( if [ ! -z ${VIASH_PAR_MIN_GENES+x} ]; then echo "int(r'${VIASH_PAR_MIN_GENES//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'min_counts': $( if [ ! -z ${VIASH_PAR_MIN_COUNTS+x} ]; then echo "int(r'${VIASH_PAR_MIN_COUNTS//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -## VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -logger.info("Reading %s.", par["input"]) -adata = sc.read_10x_h5(par["input"], gex_only=False) - -# set the gene ids as var_names -logger.info("Renaming var columns") -adata.var = adata.var\\ - .rename_axis("gene_symbol")\\ - .reset_index()\\ - .set_index("gene_ids") - -# parse metrics summary file and store in .obsm or .obs -if par["input_metrics_summary"] and par["uns_metrics"]: - logger.info("Reading metrics summary file '%s'", par['input_metrics_summary']) - - def read_percentage(val): - try: - return float(val.strip('%')) / 100 - except AttributeError: - return val - - metrics_summary = pd.read_csv(par["input_metrics_summary"], decimal=".", quotechar='"', thousands=",").applymap(read_percentage) - - logger.info("Storing metrics summary in .uns['%s']", par['uns_metrics']) - adata.uns[par["uns_metrics"]] = metrics_summary -else: - is_none = "input_metrics_summary" if not par["input_metrics_summary"] else "uns_metrics" - logger.info("Not storing metrics summary because par['%s'] is None", is_none) - -# might perform basic filtering to get rid of some data -# applicable when starting from the raw counts -if par["min_genes"]: - logger.info("Filtering with min_genes=%d", par['min_genes']) - sc.pp.filter_cells(adata, min_genes=par["min_genes"]) - -if par["min_counts"]: - logger.info("Filtering with min_counts=%d", par['min_counts']) - sc.pp.filter_cells(adata, min_counts=par["min_counts"]) - -# generate output -logger.info("Convert to mudata") -mdata = mudata.MuData(adata) - -# override root .obs -mdata.obs = adata.obs - -# write output -logger.info("Writing %s", par["output"]) -mdata.write_h5mu(par["output"], compression=par["output_compression"]) -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_INPUT_METRICS_SUMMARY" ]; then - VIASH_PAR_INPUT_METRICS_SUMMARY=$(ViashStripAutomount "$VIASH_PAR_INPUT_METRICS_SUMMARY") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/convert/from_10xh5_to_h5mu/setup_logger.py b/target/docker/convert/from_10xh5_to_h5mu/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/docker/convert/from_10xh5_to_h5mu/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/docker/convert/from_10xmtx_to_h5mu/.config.vsh.yaml b/target/docker/convert/from_10xmtx_to_h5mu/.config.vsh.yaml deleted file mode 100644 index 9828a537c5f..00000000000 --- a/target/docker/convert/from_10xmtx_to_h5mu/.config.vsh.yaml +++ /dev/null @@ -1,166 +0,0 @@ -functionality: - name: "from_10xmtx_to_h5mu" - namespace: "convert" - version: "0.12.3" - authors: - - name: "Robrecht Cannoodt" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input mtx folder" - info: null - example: - - "input_dir_containing_gz_files" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output h5mu file." - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Converts a 10x mtx into an h5mu file.\n" - test_resources: - - type: "python_script" - path: "run_test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.8-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "scanpy~=1.9.5" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "lowmem" - - "singlecpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/convert/from_10xmtx_to_h5mu/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/convert/from_10xmtx_to_h5mu" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/convert/from_10xmtx_to_h5mu/from_10xmtx_to_h5mu" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/convert/from_10xmtx_to_h5mu/from_10xmtx_to_h5mu b/target/docker/convert/from_10xmtx_to_h5mu/from_10xmtx_to_h5mu deleted file mode 100755 index 12d25de3753..00000000000 --- a/target/docker/convert/from_10xmtx_to_h5mu/from_10xmtx_to_h5mu +++ /dev/null @@ -1,989 +0,0 @@ -#!/usr/bin/env bash - -# from_10xmtx_to_h5mu 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Robrecht Cannoodt (maintainer) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="from_10xmtx_to_h5mu" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "from_10xmtx_to_h5mu 0.12.3" - echo "" - echo "Converts a 10x mtx into an h5mu file." - echo "" - echo "Arguments:" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " example: input_dir_containing_gz_files" - echo " Input mtx folder" - echo "" - echo " -o, --output" - echo " type: file, output, file must exist" - echo " example: output.h5mu" - echo " Output h5mu file." - echo "" - echo " --output_compression" - echo " type: string" - echo " example: gzip" - echo " choices: [ gzip, lzf ]" -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM python:3.8-slim - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "scanpy~=1.9.5" - -LABEL org.opencontainers.image.authors="Robrecht Cannoodt" -LABEL org.opencontainers.image.description="Companion container for running component convert from_10xmtx_to_h5mu" -LABEL org.opencontainers.image.created="2024-01-25T10:13:55Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-from_10xmtx_to_h5mu-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "from_10xmtx_to_h5mu 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -i) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression=*) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/convert_from_10xmtx_to_h5mu:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/convert_from_10xmtx_to_h5mu:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/convert_from_10xmtx_to_h5mu:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/convert_from_10xmtx_to_h5mu:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then - VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then - ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/convert_from_10xmtx_to_h5mu:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/convert_from_10xmtx_to_h5mu:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/convert_from_10xmtx_to_h5mu:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-from_10xmtx_to_h5mu-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -import mudata as mu -import scanpy as sc -import sys - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -## VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -logger.info("Reading %s.", par["input"]) -adata = sc.read_10x_mtx(par["input"], gex_only=False) - -logger.info("Renaming keys.") -adata.var = adata.var\\ - .rename_axis("gene_symbol")\\ - .reset_index()\\ - .set_index("gene_ids") - -# generate output -logger.info("Convert to mudata") -mdata = mu.MuData(adata) - -# override root .obs -mdata.obs = adata.obs - -# write output -logger.info("Writing %s", par["output"]) -mdata.write_h5mu(par["output"], compression=par["output_compression"]) -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/convert/from_10xmtx_to_h5mu/setup_logger.py b/target/docker/convert/from_10xmtx_to_h5mu/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/docker/convert/from_10xmtx_to_h5mu/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/docker/convert/from_bd_to_10x_molecular_barcode_tags/.config.vsh.yaml b/target/docker/convert/from_bd_to_10x_molecular_barcode_tags/.config.vsh.yaml deleted file mode 100644 index 7e86c69dedb..00000000000 --- a/target/docker/convert/from_bd_to_10x_molecular_barcode_tags/.config.vsh.yaml +++ /dev/null @@ -1,159 +0,0 @@ -functionality: - name: "from_bd_to_10x_molecular_barcode_tags" - namespace: "convert" - version: "0.12.3" - authors: - - name: "Dries Schaumont" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input SAM or BAM file." - info: null - example: - - "input.bam" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output alignment file." - info: null - example: - - "output.sam" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--bam" - description: "Output a BAM file." - info: null - direction: "input" - dest: "par" - - type: "integer" - name: "--threads" - alternatives: - - "-t" - description: "Number of threads" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "bash_script" - path: "script.sh" - is_executable: true - description: "Convert the molecular barcode sequence SAM tag from BD format (MA)\ - \ to 10X format (UB).\n" - test_resources: - - type: "bash_script" - path: "run_test.sh" - is_executable: true - - type: "file" - path: "resources_test/bdrhap_5kjrt/processed/WTA.bd_rhapsody.output_raw" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "ubuntu:latest" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "samtools" - interactive: false - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "lowmem" - - "singlecpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/convert/from_bd_to_10x_molecular_barcode_tags/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/convert/from_bd_to_10x_molecular_barcode_tags" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/convert/from_bd_to_10x_molecular_barcode_tags/from_bd_to_10x_molecular_barcode_tags" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/convert/from_bd_to_10x_molecular_barcode_tags/from_bd_to_10x_molecular_barcode_tags b/target/docker/convert/from_bd_to_10x_molecular_barcode_tags/from_bd_to_10x_molecular_barcode_tags deleted file mode 100755 index 00300515cc3..00000000000 --- a/target/docker/convert/from_bd_to_10x_molecular_barcode_tags/from_bd_to_10x_molecular_barcode_tags +++ /dev/null @@ -1,1030 +0,0 @@ -#!/usr/bin/env bash - -# from_bd_to_10x_molecular_barcode_tags 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Dries Schaumont (maintainer) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="from_bd_to_10x_molecular_barcode_tags" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "from_bd_to_10x_molecular_barcode_tags 0.12.3" - echo "" - echo "Convert the molecular barcode sequence SAM tag from BD format (MA) to 10X format" - echo "(UB)." - echo "" - echo "Arguments:" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " example: input.bam" - echo " Input SAM or BAM file." - echo "" - echo " -o, --output" - echo " type: file, output, file must exist" - echo " example: output.sam" - echo " Output alignment file." - echo "" - echo " --bam" - echo " type: boolean_true" - echo " Output a BAM file." - echo "" - echo " -t, --threads" - echo " type: integer" - echo " Number of threads" -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM ubuntu:latest - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y samtools && \ - rm -rf /var/lib/apt/lists/* - -LABEL org.opencontainers.image.authors="Dries Schaumont" -LABEL org.opencontainers.image.description="Companion container for running component convert from_bd_to_10x_molecular_barcode_tags" -LABEL org.opencontainers.image.created="2024-01-25T10:13:55Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-from_bd_to_10x_molecular_barcode_tags-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "from_bd_to_10x_molecular_barcode_tags 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -i) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --bam) - [ -n "$VIASH_PAR_BAM" ] && ViashError Bad arguments for option \'--bam\': \'$VIASH_PAR_BAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BAM=true - shift 1 - ;; - --threads) - [ -n "$VIASH_PAR_THREADS" ] && ViashError Bad arguments for option \'--threads\': \'$VIASH_PAR_THREADS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_THREADS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --threads. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --threads=*) - [ -n "$VIASH_PAR_THREADS" ] && ViashError Bad arguments for option \'--threads=*\': \'$VIASH_PAR_THREADS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_THREADS=$(ViashRemoveFlags "$1") - shift 1 - ;; - -t) - [ -n "$VIASH_PAR_THREADS" ] && ViashError Bad arguments for option \'-t\': \'$VIASH_PAR_THREADS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_THREADS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -t. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/convert_from_bd_to_10x_molecular_barcode_tags:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/convert_from_bd_to_10x_molecular_barcode_tags:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/convert_from_bd_to_10x_molecular_barcode_tags:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/convert_from_bd_to_10x_molecular_barcode_tags:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_BAM+x} ]; then - VIASH_PAR_BAM="false" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_BAM" ]]; then - if ! [[ "$VIASH_PAR_BAM" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--bam' has to be a boolean_true. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_THREADS" ]]; then - if ! [[ "$VIASH_PAR_THREADS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--threads' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/convert_from_bd_to_10x_molecular_barcode_tags:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/convert_from_bd_to_10x_molecular_barcode_tags:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/convert_from_bd_to_10x_molecular_barcode_tags:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-from_bd_to_10x_molecular_barcode_tags-XXXXXX").sh -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -## VIASH START -# The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) -$( if [ ! -z ${VIASH_PAR_BAM+x} ]; then echo "${VIASH_PAR_BAM}" | sed "s#'#'\"'\"'#g;s#.*#par_bam='&'#" ; else echo "# par_bam="; fi ) -$( if [ ! -z ${VIASH_PAR_THREADS+x} ]; then echo "${VIASH_PAR_THREADS}" | sed "s#'#'\"'\"'#g;s#.*#par_threads='&'#" ; else echo "# par_threads="; fi ) -$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) -$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) -$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) -$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) -$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) -$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) - -## VIASH END -#!/bin/bash - -set -eo pipefail - -# Sam tags added by BD Rhapsody Pipeline -# From: https://www.bd.com/documents/guides/user-guides/GMX_BD-Rhapsody-genomics-informatics_UG_EN.pdf -# -# ========================================================================================= -# | | Definition | -# ========================================================================================= -# | CB | A number between 1 and 96 3 (884,736) representing a unique cell label sequence | -# | | (CB = 0 when no cell label sequence is detected) | -# ----------------------------------------------------------------------------------------- -# | MR | Raw molecular identifier sequence | -# ----------------------------------------------------------------------------------------- -# | MA | RSEC-adjusted molecular identifier sequence. If not a true cell, the raw UMI is | -# | | repeated in this tag. | -# ----------------------------------------------------------------------------------------- -# | PT | T if a poly(T) tail was found in the expected position on R1, or F if poly(T) | -# | | was not found | -# ----------------------------------------------------------------------------------------- -# | CN | Indicates if a sequence is derived from a putative cell, as determined by the | -# | | cell label filtering algorithm (T: putative cell; x: invalid cell label or noise | -# | | cell) Note: You can distinguish between an invalid cell label and a noise cell | -# | | with the CB tag (invalid cell labels are 0). | -# ----------------------------------------------------------------------------------------- -# | ST | The value is 1-12, indicating the Sample Tag of the called putative cell, or M | -# | | for multiplet, or x for undetermined. | -# ========================================================================================= - - -# SAM tags added by 10X -# https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/output/bam -# ========================================================================================= -# | | Definition | -# ========================================================================================= -# | CB | Chromium cellular barcode sequence that is error-corrected and confirmed against | -# | | a list of known-good barcode sequences. For multiplex Fixed RNA Profiling, the | -# | | cellular barcode is a combination of the 10x GEM Barcode and Probe Barcode | -# | | sequences. | -# ----------------------------------------------------------------------------------------- -# | CR | Chromium cellular barcode sequence as reported by the sequencer. For multiplex | -# | | Fixed RNA Profiling, the cellular barcode is a combination of the 10x GEM | -# | | Barcode and Probe Barcode sequences. | -# ----------------------------------------------------------------------------------------- -# | CY | Chromium cellular barcode read quality. For multiplex Fixed RNA Profiling, the | -# | | cellular barcode is a combination of the 10x GEM Barcode and Probe Barcode | -# | | sequences. Phred scores as reported by sequencer. | -# ----------------------------------------------------------------------------------------- -# | UB | Chromium molecular barcode sequence that is error-corrected among other | -# | | molecular barcodes with the same cellular barcode and gene alignment. | -# ----------------------------------------------------------------------------------------- -# | UR | Chromium molecular barcode sequence as reported by the sequencer. | -# ----------------------------------------------------------------------------------------- -# | UY | Chromium molecular barcode read quality. Phred scores as reported by sequencer. | -# ----------------------------------------------------------------------------------------- -# | TR | Trimmed sequence. For the Single Cell 3' v1 chemistry, this is trailing sequence | -# | | following the UMI on Read 2. For the Single Cell 3' v2 chemistry, this is | -# | | trailing sequence following the cell and molecular barcodes on Read 1. | -# ========================================================================================= - -extra_params=() - -if [ "\$par_bam" == "true" ]; then - extra_params+=("--bam") -fi - -cat \\ - <(samtools view -SH "\$par_input") \\ - <(samtools view "\$par_input" | grep "MA:Z:*" | sed "s/MA:Z:/UB:Z:/" ) | \\ -samtools view -Sh "\${extra_params[@]}" -@"\$par_threads" - > "\$par_output" -VIASHMAIN -bash "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/convert/from_bdrhap_to_h5mu/.config.vsh.yaml b/target/docker/convert/from_bdrhap_to_h5mu/.config.vsh.yaml deleted file mode 100644 index 7ecfaba2de3..00000000000 --- a/target/docker/convert/from_bdrhap_to_h5mu/.config.vsh.yaml +++ /dev/null @@ -1,181 +0,0 @@ -functionality: - name: "from_bdrhap_to_h5mu" - namespace: "convert" - version: "0.12.3" - authors: - - name: "Robrecht Cannoodt" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - argument_groups: - - name: "Inputs" - arguments: - - type: "string" - name: "--id" - description: "A sample ID." - info: null - example: - - "my_id" - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "The output of a BD Rhapsody workflow." - info: null - example: - - "input_dir" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Outputs" - arguments: - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output h5mu file." - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "r_script" - path: "script.R" - is_executable: true - description: "Convert the output of a BD Rhapsody WTA pipeline to a MuData h5 file.\n" - test_resources: - - type: "python_script" - path: "run_test.py" - is_executable: true - - type: "file" - path: "resources_test/bdrhap_5kjrt/processed/WTA.bd_rhapsody.output_raw" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "ghcr.io/data-intuitive/randpy:r4.2_py3.9" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "docker" - run: - - "apt update && apt upgrade -y" - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - upgrade: true - - type: "r" - cran: - - "anndata" - bioc_force_install: false - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "lowmem" - - "singlecpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/convert/from_bdrhap_to_h5mu/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/convert/from_bdrhap_to_h5mu" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/convert/from_bdrhap_to_h5mu/from_bdrhap_to_h5mu" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/convert/from_bdrhap_to_h5mu/from_bdrhap_to_h5mu b/target/docker/convert/from_bdrhap_to_h5mu/from_bdrhap_to_h5mu deleted file mode 100755 index 5914e1e8f9a..00000000000 --- a/target/docker/convert/from_bdrhap_to_h5mu/from_bdrhap_to_h5mu +++ /dev/null @@ -1,1215 +0,0 @@ -#!/usr/bin/env bash - -# from_bdrhap_to_h5mu 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Robrecht Cannoodt (maintainer) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="from_bdrhap_to_h5mu" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "from_bdrhap_to_h5mu 0.12.3" - echo "" - echo "Convert the output of a BD Rhapsody WTA pipeline to a MuData h5 file." - echo "" - echo "Inputs:" - echo " --id" - echo " type: string, required parameter" - echo " example: my_id" - echo " A sample ID." - echo "" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " example: input_dir" - echo " The output of a BD Rhapsody workflow." - echo "" - echo "Outputs:" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " example: output.h5mu" - echo " Output h5mu file." - echo "" - echo " --output_compression" - echo " type: string" - echo " example: gzip" - echo " choices: [ gzip, lzf ]" -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM ghcr.io/data-intuitive/randpy:r4.2_py3.9 - -ENTRYPOINT [] - - -RUN apt update && apt upgrade -y -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" - -RUN Rscript -e 'if (!requireNamespace("remotes", quietly = TRUE)) install.packages("remotes")' && \ - Rscript -e 'remotes::install_cran(c("anndata"), repos = "https://cran.rstudio.com")' - -LABEL org.opencontainers.image.authors="Robrecht Cannoodt" -LABEL org.opencontainers.image.description="Companion container for running component convert from_bdrhap_to_h5mu" -LABEL org.opencontainers.image.created="2024-01-25T10:13:56Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-from_bdrhap_to_h5mu-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "from_bdrhap_to_h5mu 0.12.3" - exit - ;; - --id) - [ -n "$VIASH_PAR_ID" ] && ViashError Bad arguments for option \'--id\': \'$VIASH_PAR_ID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ID="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --id. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --id=*) - [ -n "$VIASH_PAR_ID" ] && ViashError Bad arguments for option \'--id=*\': \'$VIASH_PAR_ID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ID=$(ViashRemoveFlags "$1") - shift 1 - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -i) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression=*) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/convert_from_bdrhap_to_h5mu:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/convert_from_bdrhap_to_h5mu:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/convert_from_bdrhap_to_h5mu:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/convert_from_bdrhap_to_h5mu:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_ID+x} ]; then - ViashError '--id' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then - VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then - ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/convert_from_bdrhap_to_h5mu:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/convert_from_bdrhap_to_h5mu:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/convert_from_bdrhap_to_h5mu:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-from_bdrhap_to_h5mu-XXXXXX").R -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -cat("Loading libraries\\n") -options(tidyverse.quiet = TRUE) -library(tidyverse) -requireNamespace("anndata", quietly = TRUE) -requireNamespace("reticulate", quietly = TRUE) -library(assertthat) -mudata <- reticulate::import("mudata") - -## VIASH START -# The following code has been auto-generated by Viash. -# treat warnings as errors -.viash_orig_warn <- options(warn = 2) - -par <- list( - "id" = $( if [ ! -z ${VIASH_PAR_ID+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_ID" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), - "input" = $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_INPUT" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), - "output" = $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_OUTPUT" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), - "output_compression" = $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_OUTPUT_COMPRESSION" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ) -) -meta <- list( - "functionality_name" = $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo -n "'"; echo -n "$VIASH_META_FUNCTIONALITY_NAME" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), - "resources_dir" = $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo -n "'"; echo -n "$VIASH_META_RESOURCES_DIR" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), - "executable" = $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo -n "'"; echo -n "$VIASH_META_EXECUTABLE" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), - "config" = $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo -n "'"; echo -n "$VIASH_META_CONFIG" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), - "temp_dir" = $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo -n "'"; echo -n "$VIASH_META_TEMP_DIR" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), - "cpus" = $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo -n "as.integer('"; echo -n "$VIASH_META_CPUS" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), - "memory_b" = $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_B" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), - "memory_kb" = $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_KB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), - "memory_mb" = $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_MB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), - "memory_gb" = $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_GB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), - "memory_tb" = $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_TB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), - "memory_pb" = $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_PB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ) -) - - -# restore original warn setting -options(.viash_orig_warn) -rm(.viash_orig_warn) - -## VIASH END - -read_metrics <- function(file) { - metric_lines <- readr::read_lines(file) - metric_lines_no_header <- metric_lines[!grepl("^##", metric_lines)] - - # parse sub data frames - group_title_regex <- "^#([^#]*)#" - group_title_ix <- grep(group_title_regex, metric_lines_no_header) - group_titles <- gsub(group_title_regex, "\\\\1", metric_lines_no_header[group_title_ix]) - group_ix_from <- group_title_ix+1 - group_ix_to <- c(group_title_ix[-1]-1, length(metric_lines_no_header)) - metric_dfs <- pmap( - list( - from = group_ix_from, - to = group_ix_to - ), - function(from, to) { - lines <- metric_lines_no_header[from:to] - lines <- lines[lines != ""] - readr::read_csv(paste0(lines, collapse = "\\n")) %>% - mutate(run_id = par\$id) %>% - select(run_id, everything()) - } - ) - names(metric_dfs) <- gsub(" ", "_", tolower(group_titles)) - metric_dfs -} -cat("Reading in metric summaries\\n") -metrics_file <- list.files(par\$input, pattern = "_Metrics_Summary.csv\$", full.names = TRUE) -assert_that( - length(metrics_file) == 1, - msg = paste0("Exactly one *_Metrics_Summary.csv should be found, found ", length(metrics_file), " files instead.") -) -metric_dfs <- read_metrics(metrics_file) - -cat("Reading in count data\\n") -counts_file <- list.files(par\$input, pattern = "_DBEC_MolsPerCell.csv\$", full.names = TRUE) -if (length(counts_file) == 0) { - cat("Warning: could not find DBEC file, looking for RSEC file instead.\\n") - counts_file <- list.files(par\$input, pattern = "_RSEC_MolsPerCell.csv\$", full.names = TRUE) -} -assert_that( - length(counts_file) == 1, - msg = paste0("Exactly one *_(RSEC|DBEC)_MolsPerCell.csv should be found, found ", length(counts_file), " files instead.") -) -counts <- - readr::read_csv( - counts_file, - col_types = cols(.default = col_integer()), - comment = "#" - ) %>% - tibble::column_to_rownames("Cell_Index") %>% - as.matrix %>% - Matrix::Matrix(sparse = TRUE) - -# processing VDJ data -vdj_file <- list.files(par\$input, pattern = "_VDJ_perCell.csv\$", full.names = TRUE) -vdj_data <- - if (length(vdj_file) == 1) { - cat("Reading in VDJ data\\n") - readr::read_csv( - vdj_file, - comment = "#" - ) - } else { - NULL - } - -cat("Reading in VDJ metric summaries\\n") -vdj_metrics_file <- list.files(par\$input, pattern = "_VDJ_metrics.csv\$", full.names = TRUE) -vdj_metric_dfs <- - if (length(vdj_metrics_file) == 1) { - read_metrics(vdj_metrics_file) - } else { - list() - } - -# processing SMK data -smk_file <- list.files(par\$input, pattern = "_Sample_Tag_Calls.csv\$", full.names = TRUE) -smk_calls <- - if (length(smk_file) == 1) { - cat("Processing sample tags\\n") - readr::read_csv( - smk_file, - comment = "#" - ) - } else { - NULL - } -smk_metrics_file <- list.files(par\$input, pattern = "_Sample_Tag_Metrics.csv\$", full.names = TRUE) -smk_metrics <- - if (length(smk_metrics_file) == 1) { - readr::read_csv( - smk_metrics_file, - comment = "#" - ) - } else { - NULL - } - -cat("Constructing obs\\n") -library_id <- metric_dfs[["sequencing_quality"]]\$Library -if (length(library_id) > 1) { - library_id <- paste(library_id[library_id != "Combined_stats"], collapse = " & ") -} - -obs <- tibble( - cell_id = rownames(counts), - run_id = par\$id, - library_id = library_id -) - -if (!is.null(smk_calls)) { - obs <- left_join( - obs, - smk_calls %>% transmute( - cell_id = as.character(Cell_Index), - sample_tag = Sample_Tag, - sample_id = Sample_Name - ), - by = "cell_id" - ) -} else { - obs <- obs %>% mutate(sample_id = library_id) -} - -obs <- obs %>% - mutate(sample_id = ifelse(!is.na(sample_id), sample_id, run_id)) %>% - as.data.frame() %>% - column_to_rownames("cell_id") - -cat("Constructing var\\n") -# determine feature types of genes -var0 <- tryCatch({ - feature_types_file <- list.files(par\$input, pattern = "feature_types.tsv\$", full.names = TRUE) - - # abseq fasta reference has trailing info which apparently gets stripped off by the bd rhapsody pipeline - readr::read_tsv(feature_types_file) %>% - mutate( - trimmed_feature_id = gsub(" .*", "", feature_id), - i = match(feature_id, colnames(counts)), - j = match(trimmed_feature_id, colnames(counts)), - ij = ifelse(is.na(i), j, i), - final_feature_id = ifelse(!is.na(i), feature_id, trimmed_feature_id) - ) %>% - filter(!is.na(ij)) %>% - select(feature_id = final_feature_id, feature_type, reference_file) -}, error = function(e) { - cat("Feature matching error: ", e\$message, "\\n", sep = "") - tibble( - feature_id = character() - ) -}) - -# in case the feature types are missing -missing_features <- tibble( - feature_id = setdiff(colnames(counts), var0\$feature_id), - feature_type = "Gene Expression", - reference_file = NA_character_, - note = "Feature annotation file missing, assuming type is Gene Expression" -) - -var1 <- - if (nrow(missing_features) > 0) { - cat("Feature annotation file missing, assuming type is Gene Expression\\n") - bind_rows(var0, missing_features) %>% - slice(match(colnames(counts), feature_id)) - # Avoid nullable string columnns https://github.com/scverse/anndata/issues/679 - missing_features %>% mutate(across(reference_file, as.factor)) - } else { - var0 - } - -# create var -var <- var1 %>% - transmute(gene_ids = feature_id, gene_name = feature_id, feature_types = feature_type, reference_file) %>% - as.data.frame() %>% - column_to_rownames("gene_ids") - -cat("Constructing uns\\n") -names(metric_dfs) <- paste0("mapping_qc_", names(metric_dfs)) -smk_metric_dfs <- - if (!is.null(smk_metrics)) { - list(mapping_qc_smk_metrics = smk_metrics) - } else { - NULL - } -uns <- c(metric_dfs, smk_metric_dfs) - -cat("Constructing RNA (&ABC?) AnnData") -adata <- anndata::AnnData( - X = counts, - obs = obs, - var = var, - uns = uns -) - -adata_prot <- adata[, adata\$var\$feature_types == "Antibody Capture"] -if (ncol(adata_prot) == 0) { - adata_prot <- NULL -} -adata_rna <- adata[, adata\$var\$feature_types != "Antibody Capture"] - -adata_vdj <- - if (!is.null(vdj_data)) { - cat("Constructing VDJ AnnData\\n") - names(vdj_metric_dfs) <- paste0("mapping_qc_", names(vdj_metric_dfs)) - anndata::AnnData( - obs = vdj_data, - uns = vdj_metric_dfs, - shape = c(nrow(vdj_data), 0L) - ) - } else { - NULL - } - -cat("Constructing MuData object\\n") -modalities <- - list( - rna = adata_rna, - prot = adata_prot, - vdj = adata_vdj - ) -mdata <- mudata\$MuData(modalities[!sapply(modalities, is.null)]) - -cat("Writing to h5mu file\\n") -mdata\$write(par\$output, compression=par\$output_compression) -VIASHMAIN -Rscript "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/convert/from_cellranger_multi_to_h5mu/.config.vsh.yaml b/target/docker/convert/from_cellranger_multi_to_h5mu/.config.vsh.yaml deleted file mode 100644 index 2c96fe3a4b8..00000000000 --- a/target/docker/convert/from_cellranger_multi_to_h5mu/.config.vsh.yaml +++ /dev/null @@ -1,190 +0,0 @@ -functionality: - name: "from_cellranger_multi_to_h5mu" - namespace: "convert" - version: "0.12.3" - authors: - - name: "Dries Schaumont" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input folder. Must contain the output from a cellranger multi run." - info: null - example: - - "input_dir_containing_modalities" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output h5mu file." - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--uns_metrics" - description: "Name of the .uns slot under which to QC metrics (if any)." - info: null - default: - - "metrics_cellranger" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Converts the output from cellranger multi to a single .h5mu file.\n\ - By default, will map the following library type names to modality names:\n -\ - \ Gene Expression: rna\n - Peaks: atac\n - Antibody Capture: prot\n - VDJ:\ - \ vdj\n - VDJ-T: vdj_t\n - VDJ-B: vdj_b\n - CRISPR Guide Capture: crispr\n\ - \ - Multiplexing Capture: hashing\n\nOther library types have their whitepace\ - \ removed and dashes replaced by\nunderscores to generate the modality name.\n\ - \nCurrently does not allow parsing the output from cell barcode demultiplexing.\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/10x_5k_anticmv" - - type: "file" - path: "resources_test/10x_5k_lung_crispr" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "scanpy~=1.9.5" - - "scirpy~=0.11.1" - - "pandas~=2.0.0" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "lowmem" - - "singlecpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/convert/from_cellranger_multi_to_h5mu/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/convert/from_cellranger_multi_to_h5mu" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/convert/from_cellranger_multi_to_h5mu/from_cellranger_multi_to_h5mu" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/convert/from_cellranger_multi_to_h5mu/from_cellranger_multi_to_h5mu b/target/docker/convert/from_cellranger_multi_to_h5mu/from_cellranger_multi_to_h5mu deleted file mode 100755 index 4e46b6c0232..00000000000 --- a/target/docker/convert/from_cellranger_multi_to_h5mu/from_cellranger_multi_to_h5mu +++ /dev/null @@ -1,1166 +0,0 @@ -#!/usr/bin/env bash - -# from_cellranger_multi_to_h5mu 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Dries Schaumont (maintainer) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="from_cellranger_multi_to_h5mu" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "from_cellranger_multi_to_h5mu 0.12.3" - echo "" - echo "Converts the output from cellranger multi to a single .h5mu file." - echo "By default, will map the following library type names to modality names:" - echo " - Gene Expression: rna" - echo " - Peaks: atac" - echo " - Antibody Capture: prot" - echo " - VDJ: vdj" - echo " - VDJ-T: vdj_t" - echo " - VDJ-B: vdj_b" - echo " - CRISPR Guide Capture: crispr" - echo " - Multiplexing Capture: hashing" - echo "" - echo "Other library types have their whitepace removed and dashes replaced by" - echo "underscores to generate the modality name." - echo "" - echo "Currently does not allow parsing the output from cell barcode demultiplexing." - echo "" - echo "Arguments:" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " example: input_dir_containing_modalities" - echo " Input folder. Must contain the output from a cellranger multi run." - echo "" - echo " -o, --output" - echo " type: file, output, file must exist" - echo " example: output.h5mu" - echo " Output h5mu file." - echo "" - echo " --output_compression" - echo " type: string" - echo " example: gzip" - echo " choices: [ gzip, lzf ]" - echo "" - echo " --uns_metrics" - echo " type: string" - echo " default: metrics_cellranger" - echo " Name of the .uns slot under which to QC metrics (if any)." -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM python:3.10-slim - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "scanpy~=1.9.5" "scirpy~=0.11.1" "pandas~=2.0.0" - -LABEL org.opencontainers.image.authors="Dries Schaumont" -LABEL org.opencontainers.image.description="Companion container for running component convert from_cellranger_multi_to_h5mu" -LABEL org.opencontainers.image.created="2024-01-25T10:13:56Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-from_cellranger_multi_to_h5mu-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "from_cellranger_multi_to_h5mu 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -i) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression=*) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --uns_metrics) - [ -n "$VIASH_PAR_UNS_METRICS" ] && ViashError Bad arguments for option \'--uns_metrics\': \'$VIASH_PAR_UNS_METRICS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_UNS_METRICS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --uns_metrics. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --uns_metrics=*) - [ -n "$VIASH_PAR_UNS_METRICS" ] && ViashError Bad arguments for option \'--uns_metrics=*\': \'$VIASH_PAR_UNS_METRICS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_UNS_METRICS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/convert_from_cellranger_multi_to_h5mu:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/convert_from_cellranger_multi_to_h5mu:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/convert_from_cellranger_multi_to_h5mu:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/convert_from_cellranger_multi_to_h5mu:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_UNS_METRICS+x} ]; then - VIASH_PAR_UNS_METRICS="metrics_cellranger" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then - VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then - ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/convert_from_cellranger_multi_to_h5mu:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/convert_from_cellranger_multi_to_h5mu:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/convert_from_cellranger_multi_to_h5mu:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-from_cellranger_multi_to_h5mu-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -from pathlib import Path -import sys -import scanpy -import pandas as pd -import mudata -from scirpy.io import read_10x_vdj -from collections import defaultdict -from functools import partial - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'uns_metrics': $( if [ ! -z ${VIASH_PAR_UNS_METRICS+x} ]; then echo "r'${VIASH_PAR_UNS_METRICS//\'/\'\"\'\"r\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -## VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -POSSIBLE_LIBRARY_TYPES = ('vdj_t', 'vdj_b', 'vdj_t_gd', 'count') - -FEATURE_TYPES_NAMES = { - "Gene Expression": "rna", - "Peaks": "atac", - "Antibody Capture": "prot", - "VDJ": "vdj", - "VDJ-T": "vdj_t", - "VDJ-B": "vdj_b", - "CRISPR Guide Capture": "gdo", - "Multiplexing Capture": "hto" - } - -def gather_input_data(dir: Path): - if not dir.is_dir(): - raise ValueError("Specified input is not a directory.") - folder_contents = list(dir.iterdir()) - config = dir / 'config.csv' - if config not in folder_contents: - logger.warning('Config.csv not found in input directory, this folder might not be a valid cellranger multi output.') - - required_subfolders = [dir / subfolder_name for subfolder_name in ('multi', 'per_sample_outs')] - found_input = {key_: None for key_ in POSSIBLE_LIBRARY_TYPES + ('metrics_summary',)} - for required_subfolder in required_subfolders: - if not required_subfolder in folder_contents: - raise ValueError(f"Input folder must contain the subfolder {required_subfolder} please make " - "sure that the specified input folder is a valid cellranger multi output.") - - multi_dir = dir / 'multi' - for library_type in multi_dir.iterdir(): - if not library_type.is_dir(): - logger.warning("%s is not a directory. Contents of the multi folder " - "must be directories to be recognized as valid input data", - library_type) - continue - if library_type.name not in POSSIBLE_LIBRARY_TYPES: - raise ValueError(f"Contents of the 'multi' folder must be found one of the following: {','.join(POSSIBLE_LIBRARY_TYPES)}.") - - found_input[library_type.name] = library_type - - per_sample_outs_dir = dir / 'per_sample_outs' - for file_glob in ('*/metrics_summary.csv', '*/count/feature_reference.csv', - '*/count/crispr_analysis/perturbation_efficiencies_by_feature.csv', - '*/count/crispr_analysis/perturbation_efficiencies_by_target.csv'): - found_files = list(per_sample_outs_dir.glob(file_glob)) - if len(found_files) > 1: - raise ValueError(f"Found more than one file for glob '{file_glob}' file. " - "This component currently only supports parsing cellranger multi output for one sample.") - file_name = Path(file_glob).name.removesuffix('.csv') - found_input[file_name] = found_files[0] if found_files else None - - return found_input - - -def proces_perturbation(key_name: str, mudata: mudata.MuData, efficiency_file: Path): - assert 'gdo' in mudata.mod - eff_df = pd.read_csv(efficiency_file, index_col="Perturbation", sep=",", decimal=".", quotechar='"') - mudata.mod['gdo'].uns[key_name] = eff_df - return mudata - -def process_feature_reference(mudata: mudata.MuData, efficiency_file: Path): - df = pd.read_csv(efficiency_file, index_col="id", sep=",", decimal=".", quotechar='"') - assert 'feature_type' in df.columns, "Columns 'feature_type' should be present in features_reference file." - feature_types = df['feature_type'] - if set(feature_types) - set(FEATURE_TYPES_NAMES): - raise ValueError("Not all feature types present in the features_reference file are supported by this component.") - for feature_type in feature_types: - modality = FEATURE_TYPES_NAMES[feature_type] - subset_df = df.loc[df['feature_type'] == feature_type] - mudata.mod[modality].uns['feature_reference'] = subset_df - return mudata - -def process_counts(counts_folder: Path): - counts_matrix_file = counts_folder / "raw_feature_bc_matrix.h5" - logger.info("Reading %s.", counts_matrix_file) - adata = scanpy.read_10x_h5(counts_matrix_file, gex_only=False) - - # set the gene ids as var_names - logger.info("Renaming var columns") - adata.var = adata.var\\ - .rename_axis("gene_symbol")\\ - .reset_index()\\ - .set_index("gene_ids") - - # generate output - logger.info("Convert to mudata") - - def modality_name_factory(library_type): - return ("".join(library_type.replace("-", "_").split())).lower() - - feature_types = defaultdict(modality_name_factory, FEATURE_TYPES_NAMES) - return mudata.MuData(adata, feature_types_names=feature_types) - -def process_metrics_summary(mudata: mudata.MuData, metrics_file: Path): - def read_percentage(val): - try: - return float(val.strip('%')) / 100 - except (AttributeError, ValueError): - return val - - metrics_summary = pd.read_csv(metrics_file, - decimal=".", - quotechar='"', - thousands=",").applymap(read_percentage) - - mudata.uns[par["uns_metrics"]] = metrics_summary - for colname, coldata in metrics_summary.items(): - try: - new_column = coldata.astype(str, copy=True).astype({colname: "category"}) - metrics_summary[colname] = new_column - except (ValueError, TypeError): - logger.warning(f"Could not store column {colname} from metrics.") - pass - return mudata - -def process_vdj(mudata: mudata.MuData, vdj_folder_path: Path): - # https://scverse.org/scirpy/latest/generated/scirpy.io.read_10x_vdj.html#scirpy-io-read-10x-vdj - # According to docs, using the json is preferred as this file includes intron info. - all_config_json_file = vdj_folder_path / "all_contig_annotations.json" - vdj_anndata = read_10x_vdj(all_config_json_file) - vdj_type = vdj_folder_path.name - mudata.mod[vdj_type] = vdj_anndata - return mudata - -def get_modalities(input_data): - dispatcher = { - 'vdj_t': process_vdj, - 'vdj_b': process_vdj, - 'vdj_t_gd': process_vdj, - 'metrics_summary': process_metrics_summary, - 'feature_reference': process_feature_reference, - 'perturbation_efficiencies_by_feature': partial(proces_perturbation, 'perturbation_efficiencies_by_feature'), - 'perturbation_efficiencies_by_target': partial(proces_perturbation, 'perturbation_efficiencies_by_target'), - } - mudata_file = process_counts(input_data['count']) - for modality_name, modality_data_path in input_data.items(): - if modality_name == "count" or not modality_data_path: - continue - try: - parser_function = dispatcher[modality_name] - except KeyError as e: - raise ValueError("This component does not support the " - f"parsing of the '{modality_name}' yet.") from e - mudata_file = parser_function(mudata_file, modality_data_path) - return mudata_file - -def main(): - cellranger_multi_dir = Path(par["input"]) - input_data = gather_input_data(cellranger_multi_dir) - result = get_modalities(input_data) - logger.info("Writing %s", par["output"]) - result.write_h5mu(par["output"], compression=par["output_compression"]) - -if __name__ == "__main__": - main() -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/convert/from_cellranger_multi_to_h5mu/setup_logger.py b/target/docker/convert/from_cellranger_multi_to_h5mu/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/docker/convert/from_cellranger_multi_to_h5mu/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/docker/convert/from_h5ad_to_h5mu/.config.vsh.yaml b/target/docker/convert/from_h5ad_to_h5mu/.config.vsh.yaml deleted file mode 100644 index 1572fd33043..00000000000 --- a/target/docker/convert/from_h5ad_to_h5mu/.config.vsh.yaml +++ /dev/null @@ -1,177 +0,0 @@ -functionality: - name: "from_h5ad_to_h5mu" - namespace: "convert" - version: "0.12.3" - authors: - - name: "Dries De Maeyer" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "ddemaeyer@gmail.com" - github: "ddemaeyer" - linkedin: "dries-de-maeyer-b46a814" - organizations: - - name: "Janssen Pharmaceuticals" - href: "https://www.janssen.com" - role: "Principal Scientist" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input h5ad files" - info: null - default: - - "input.h5ad" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output MuData file." - info: null - default: - - "output.h5mu" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Converts a single layer h5ad file into a single MuData object\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.9-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "lowmem" - - "singlecpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/convert/from_h5ad_to_h5mu/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/convert/from_h5ad_to_h5mu" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/convert/from_h5ad_to_h5mu/from_h5ad_to_h5mu" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/convert/from_h5ad_to_h5mu/from_h5ad_to_h5mu b/target/docker/convert/from_h5ad_to_h5mu/from_h5ad_to_h5mu deleted file mode 100755 index cd92d16c44c..00000000000 --- a/target/docker/convert/from_h5ad_to_h5mu/from_h5ad_to_h5mu +++ /dev/null @@ -1,1056 +0,0 @@ -#!/usr/bin/env bash - -# from_h5ad_to_h5mu 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Dries De Maeyer (maintainer) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="from_h5ad_to_h5mu" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "from_h5ad_to_h5mu 0.12.3" - echo "" - echo "Converts a single layer h5ad file into a single MuData object" - echo "" - echo "Arguments:" - echo " -i, --input" - echo " type: file, required parameter, multiple values allowed, file must exist" - echo " default: input.h5ad" - echo " Input h5ad files" - echo "" - echo " --modality" - echo " type: string, multiple values allowed" - echo " default: rna" - echo "" - echo " -o, --output" - echo " type: file, output, file must exist" - echo " default: output.h5mu" - echo " Output MuData file." - echo "" - echo " --output_compression" - echo " type: string" - echo " example: gzip" - echo " choices: [ gzip, lzf ]" -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM python:3.9-slim - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" - -LABEL org.opencontainers.image.authors="Dries De Maeyer" -LABEL org.opencontainers.image.description="Companion container for running component convert from_h5ad_to_h5mu" -LABEL org.opencontainers.image.created="2024-01-25T10:13:55Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-from_h5ad_to_h5mu-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "from_h5ad_to_h5mu 0.12.3" - exit - ;; - --input) - if [ -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT="$2" - else - VIASH_PAR_INPUT="$VIASH_PAR_INPUT:""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - if [ -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - else - VIASH_PAR_INPUT="$VIASH_PAR_INPUT:"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - -i) - if [ -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT="$2" - else - VIASH_PAR_INPUT="$VIASH_PAR_INPUT:""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality) - if [ -z "$VIASH_PAR_MODALITY" ]; then - VIASH_PAR_MODALITY="$2" - else - VIASH_PAR_MODALITY="$VIASH_PAR_MODALITY:""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality=*) - if [ -z "$VIASH_PAR_MODALITY" ]; then - VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") - else - VIASH_PAR_MODALITY="$VIASH_PAR_MODALITY:"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression=*) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/convert_from_h5ad_to_h5mu:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/convert_from_h5ad_to_h5mu:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/convert_from_h5ad_to_h5mu:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/convert_from_h5ad_to_h5mu:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_MODALITY+x} ]; then - VIASH_PAR_MODALITY="rna" -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - VIASH_PAR_OUTPUT="output.h5mu" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ]; then - IFS=':' - set -f - for file in $VIASH_PAR_INPUT; do - unset IFS - if [ ! -e "$file" ]; then - ViashError "Input file '$file' does not exist." - exit 1 - fi - done - set +f -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then - VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then - ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_TEST_INPUT=() - IFS=':' - for var in $VIASH_PAR_INPUT; do - unset IFS - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$var")" ) - var=$(ViashAutodetectMount "$var") - VIASH_TEST_INPUT+=( "$var" ) - done - VIASH_PAR_INPUT=$(IFS=':' ; echo "${VIASH_TEST_INPUT[*]}") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/convert_from_h5ad_to_h5mu:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/convert_from_h5ad_to_h5mu:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/convert_from_h5ad_to_h5mu:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-from_h5ad_to_h5mu-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -import mudata as mu -import anndata -import sys - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'.split(':')"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'.split(':')"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -## VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -assert len(par["input"]) == len(par["modality"]), "Number of input files should be the same length as the number of modalities" - -logger.info("Reading input files") -data = { key: anndata.read_h5ad(path) for key, path in zip(par["modality"], par["input"]) } - -try: - data.var_names_make_unique() -except: - pass - -logger.info("Converting to mudata") -mudata = mu.MuData(data) - -try: - mudata.var_names_make_unique() -except: - pass - -logger.info("Writing to %s.", par['output']) -mudata.write_h5mu(par["output"], compression=par["output_compression"]) - -logger.info("Finished") -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - unset VIASH_TEST_INPUT - IFS=':' - for var in $VIASH_PAR_INPUT; do - unset IFS - if [ -z "$VIASH_TEST_INPUT" ]; then - VIASH_TEST_INPUT="$(ViashStripAutomount "$var")" - else - VIASH_TEST_INPUT="$VIASH_TEST_INPUT:""$(ViashStripAutomount "$var")" - fi - done - VIASH_PAR_INPUT="$VIASH_TEST_INPUT" -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/convert/from_h5ad_to_h5mu/setup_logger.py b/target/docker/convert/from_h5ad_to_h5mu/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/docker/convert/from_h5ad_to_h5mu/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/docker/convert/from_h5mu_to_h5ad/.config.vsh.yaml b/target/docker/convert/from_h5mu_to_h5ad/.config.vsh.yaml deleted file mode 100644 index e754fa369be..00000000000 --- a/target/docker/convert/from_h5mu_to_h5ad/.config.vsh.yaml +++ /dev/null @@ -1,182 +0,0 @@ -functionality: - name: "from_h5mu_to_h5ad" - namespace: "convert" - version: "0.12.3" - authors: - - name: "Robrecht Cannoodt" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input MuData file" - info: null - default: - - "input.h5mu" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output AnnData file." - info: null - default: - - "output.h5ad" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the final h5ad object." - info: null - default: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Converts a h5mu file into a h5ad file.\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.9-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "lowmem" - - "singlecpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/convert/from_h5mu_to_h5ad/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/convert/from_h5mu_to_h5ad" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/convert/from_h5mu_to_h5ad/from_h5mu_to_h5ad" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/convert/from_h5mu_to_h5ad/from_h5mu_to_h5ad b/target/docker/convert/from_h5mu_to_h5ad/from_h5mu_to_h5ad deleted file mode 100755 index 2ac562b3688..00000000000 --- a/target/docker/convert/from_h5mu_to_h5ad/from_h5mu_to_h5ad +++ /dev/null @@ -1,1009 +0,0 @@ -#!/usr/bin/env bash - -# from_h5mu_to_h5ad 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Robrecht Cannoodt (maintainer) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="from_h5mu_to_h5ad" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "from_h5mu_to_h5ad 0.12.3" - echo "" - echo "Converts a h5mu file into a h5ad file." - echo "" - echo "Arguments:" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " default: input.h5mu" - echo " Input MuData file" - echo "" - echo " --modality" - echo " type: string" - echo " default: rna" - echo "" - echo " -o, --output" - echo " type: file, output, file must exist" - echo " default: output.h5ad" - echo " Output AnnData file." - echo "" - echo " --output_compression" - echo " type: string" - echo " default: gzip" - echo " choices: [ gzip, lzf ]" - echo " The compression format to be used on the final h5ad object." -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM python:3.9-slim - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" - -LABEL org.opencontainers.image.authors="Robrecht Cannoodt" -LABEL org.opencontainers.image.description="Companion container for running component convert from_h5mu_to_h5ad" -LABEL org.opencontainers.image.created="2024-01-25T10:13:54Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-from_h5mu_to_h5ad-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "from_h5mu_to_h5ad 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -i) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality=*) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression=*) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/convert_from_h5mu_to_h5ad:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/convert_from_h5mu_to_h5ad:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/convert_from_h5mu_to_h5ad:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/convert_from_h5mu_to_h5ad:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_MODALITY+x} ]; then - VIASH_PAR_MODALITY="rna" -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - VIASH_PAR_OUTPUT="output.h5ad" -fi -if [ -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then - VIASH_PAR_OUTPUT_COMPRESSION="gzip" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then - VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then - ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/convert_from_h5mu_to_h5ad:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/convert_from_h5mu_to_h5ad:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/convert_from_h5mu_to_h5ad:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-from_h5mu_to_h5ad-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -import mudata as mu -import sys - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -## VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -# TODO: Merge modalities into one layer - -logger.info("Reading input h5mu file") -dat = mu.read_h5mu(par["input"]) - -logger.info("Converting to h5ad") -adat = dat.mod[par["modality"]] - -logger.info("Writing to %s.", par['output']) -adat.write_h5ad(par["output"], compression=par["output_compression"]) - -logger.info("Finished") -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/convert/from_h5mu_to_h5ad/setup_logger.py b/target/docker/convert/from_h5mu_to_h5ad/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/docker/convert/from_h5mu_to_h5ad/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/docker/convert/velocyto_to_h5mu/.config.vsh.yaml b/target/docker/convert/velocyto_to_h5mu/.config.vsh.yaml deleted file mode 100644 index b84e91db02f..00000000000 --- a/target/docker/convert/velocyto_to_h5mu/.config.vsh.yaml +++ /dev/null @@ -1,255 +0,0 @@ -functionality: - name: "velocyto_to_h5mu" - namespace: "convert" - version: "0.12.3" - authors: - - name: "Dries Schaumont" - roles: - - "maintainer" - - "author" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - - name: "Robrecht Cannoodt" - roles: - - "author" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - - name: "Angela Oliveira Pisco" - roles: - - "contributor" - info: - role: "Contributor" - links: - github: "aopisco" - orcid: "0000-0003-0142-2355" - linkedin: "aopisco" - organizations: - - name: "Insitro" - href: "https://insitro.com" - role: "Director of Computational Biology" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - argument_groups: - - name: "Inputs" - arguments: - - type: "file" - name: "--input_loom" - description: "Path to the input loom file." - info: null - example: - - "input.loom" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--input_h5mu" - description: "If a MuData file is provided," - info: null - example: - - "input.h5mu" - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - description: "The name of the modality to operate on." - info: null - default: - - "rna_velocity" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Outputs" - arguments: - - type: "file" - name: "--output" - description: "Path to the output MuData file." - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--layer_spliced" - description: "Output layer for the spliced reads." - info: null - default: - - "velo_spliced" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--layer_unspliced" - description: "Output layer for the unspliced reads." - info: null - default: - - "velo_unspliced" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--layer_ambiguous" - description: "Output layer for the ambiguous reads." - info: null - default: - - "velo_ambiguous" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - description: "Convert a velocyto loom file to a h5mu file.\n\nIf an input h5mu file\ - \ is also provided, the velocity\nh5ad object will get added to that h5mu instead.\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/cellranger_tiny_fastq" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "scanpy~=1.9.5" - - "loompy" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "lowmem" - - "lowcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/velocity/velocyto_to_h5mu/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/convert/velocyto_to_h5mu" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/convert/velocyto_to_h5mu/velocyto_to_h5mu" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/convert/velocyto_to_h5mu/velocyto_to_h5mu b/target/docker/convert/velocyto_to_h5mu/velocyto_to_h5mu deleted file mode 100755 index f379266f1ae..00000000000 --- a/target/docker/convert/velocyto_to_h5mu/velocyto_to_h5mu +++ /dev/null @@ -1,1086 +0,0 @@ -#!/usr/bin/env bash - -# velocyto_to_h5mu 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Dries Schaumont (maintainer, author) -# * Robrecht Cannoodt (author) -# * Angela Oliveira Pisco (contributor) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="velocyto_to_h5mu" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "velocyto_to_h5mu 0.12.3" - echo "" - echo "Convert a velocyto loom file to a h5mu file." - echo "" - echo "If an input h5mu file is also provided, the velocity" - echo "h5ad object will get added to that h5mu instead." - echo "" - echo "Inputs:" - echo " --input_loom" - echo " type: file, required parameter, file must exist" - echo " example: input.loom" - echo " Path to the input loom file." - echo "" - echo " --input_h5mu" - echo " type: file, file must exist" - echo " example: input.h5mu" - echo " If a MuData file is provided," - echo "" - echo " --modality" - echo " type: string" - echo " default: rna_velocity" - echo " The name of the modality to operate on." - echo "" - echo "Outputs:" - echo " --output" - echo " type: file, output, file must exist" - echo " example: output.h5mu" - echo " Path to the output MuData file." - echo "" - echo " --output_compression" - echo " type: string" - echo " example: gzip" - echo " choices: [ gzip, lzf ]" - echo " The compression format to be used on the output h5mu object." - echo "" - echo " --layer_spliced" - echo " type: string" - echo " default: velo_spliced" - echo " Output layer for the spliced reads." - echo "" - echo " --layer_unspliced" - echo " type: string" - echo " default: velo_unspliced" - echo " Output layer for the unspliced reads." - echo "" - echo " --layer_ambiguous" - echo " type: string" - echo " default: velo_ambiguous" - echo " Output layer for the ambiguous reads." -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM python:3.10-slim - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "scanpy~=1.9.5" "loompy" - -LABEL org.opencontainers.image.authors="Dries Schaumont, Robrecht Cannoodt, Angela Oliveira Pisco" -LABEL org.opencontainers.image.description="Companion container for running component convert velocyto_to_h5mu" -LABEL org.opencontainers.image.created="2024-01-25T10:13:58Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-velocyto_to_h5mu-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "velocyto_to_h5mu 0.12.3" - exit - ;; - --input_loom) - [ -n "$VIASH_PAR_INPUT_LOOM" ] && ViashError Bad arguments for option \'--input_loom\': \'$VIASH_PAR_INPUT_LOOM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT_LOOM="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_loom. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input_loom=*) - [ -n "$VIASH_PAR_INPUT_LOOM" ] && ViashError Bad arguments for option \'--input_loom=*\': \'$VIASH_PAR_INPUT_LOOM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT_LOOM=$(ViashRemoveFlags "$1") - shift 1 - ;; - --input_h5mu) - [ -n "$VIASH_PAR_INPUT_H5MU" ] && ViashError Bad arguments for option \'--input_h5mu\': \'$VIASH_PAR_INPUT_H5MU\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT_H5MU="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_h5mu. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input_h5mu=*) - [ -n "$VIASH_PAR_INPUT_H5MU" ] && ViashError Bad arguments for option \'--input_h5mu=*\': \'$VIASH_PAR_INPUT_H5MU\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT_H5MU=$(ViashRemoveFlags "$1") - shift 1 - ;; - --modality) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality=*) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output_compression) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression=*) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --layer_spliced) - [ -n "$VIASH_PAR_LAYER_SPLICED" ] && ViashError Bad arguments for option \'--layer_spliced\': \'$VIASH_PAR_LAYER_SPLICED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LAYER_SPLICED="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --layer_spliced. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --layer_spliced=*) - [ -n "$VIASH_PAR_LAYER_SPLICED" ] && ViashError Bad arguments for option \'--layer_spliced=*\': \'$VIASH_PAR_LAYER_SPLICED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LAYER_SPLICED=$(ViashRemoveFlags "$1") - shift 1 - ;; - --layer_unspliced) - [ -n "$VIASH_PAR_LAYER_UNSPLICED" ] && ViashError Bad arguments for option \'--layer_unspliced\': \'$VIASH_PAR_LAYER_UNSPLICED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LAYER_UNSPLICED="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --layer_unspliced. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --layer_unspliced=*) - [ -n "$VIASH_PAR_LAYER_UNSPLICED" ] && ViashError Bad arguments for option \'--layer_unspliced=*\': \'$VIASH_PAR_LAYER_UNSPLICED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LAYER_UNSPLICED=$(ViashRemoveFlags "$1") - shift 1 - ;; - --layer_ambiguous) - [ -n "$VIASH_PAR_LAYER_AMBIGUOUS" ] && ViashError Bad arguments for option \'--layer_ambiguous\': \'$VIASH_PAR_LAYER_AMBIGUOUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LAYER_AMBIGUOUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --layer_ambiguous. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --layer_ambiguous=*) - [ -n "$VIASH_PAR_LAYER_AMBIGUOUS" ] && ViashError Bad arguments for option \'--layer_ambiguous=*\': \'$VIASH_PAR_LAYER_AMBIGUOUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LAYER_AMBIGUOUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/convert_velocyto_to_h5mu:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/convert_velocyto_to_h5mu:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/convert_velocyto_to_h5mu:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/convert_velocyto_to_h5mu:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT_LOOM+x} ]; then - ViashError '--input_loom' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_MODALITY+x} ]; then - VIASH_PAR_MODALITY="rna_velocity" -fi -if [ -z ${VIASH_PAR_LAYER_SPLICED+x} ]; then - VIASH_PAR_LAYER_SPLICED="velo_spliced" -fi -if [ -z ${VIASH_PAR_LAYER_UNSPLICED+x} ]; then - VIASH_PAR_LAYER_UNSPLICED="velo_unspliced" -fi -if [ -z ${VIASH_PAR_LAYER_AMBIGUOUS+x} ]; then - VIASH_PAR_LAYER_AMBIGUOUS="velo_ambiguous" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT_LOOM" ] && [ ! -e "$VIASH_PAR_INPUT_LOOM" ]; then - ViashError "Input file '$VIASH_PAR_INPUT_LOOM' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_INPUT_H5MU" ] && [ ! -e "$VIASH_PAR_INPUT_H5MU" ]; then - ViashError "Input file '$VIASH_PAR_INPUT_H5MU' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then - VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then - ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT_LOOM" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT_LOOM")" ) - VIASH_PAR_INPUT_LOOM=$(ViashAutodetectMount "$VIASH_PAR_INPUT_LOOM") -fi -if [ ! -z "$VIASH_PAR_INPUT_H5MU" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT_H5MU")" ) - VIASH_PAR_INPUT_H5MU=$(ViashAutodetectMount "$VIASH_PAR_INPUT_H5MU") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/convert_velocyto_to_h5mu:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/convert_velocyto_to_h5mu:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/convert_velocyto_to_h5mu:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-velocyto_to_h5mu-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -import anndata as ad -import mudata as mu - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input_loom': $( if [ ! -z ${VIASH_PAR_INPUT_LOOM+x} ]; then echo "r'${VIASH_PAR_INPUT_LOOM//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'input_h5mu': $( if [ ! -z ${VIASH_PAR_INPUT_H5MU+x} ]; then echo "r'${VIASH_PAR_INPUT_H5MU//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'layer_spliced': $( if [ ! -z ${VIASH_PAR_LAYER_SPLICED+x} ]; then echo "r'${VIASH_PAR_LAYER_SPLICED//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'layer_unspliced': $( if [ ! -z ${VIASH_PAR_LAYER_UNSPLICED+x} ]; then echo "r'${VIASH_PAR_LAYER_UNSPLICED//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'layer_ambiguous': $( if [ ! -z ${VIASH_PAR_LAYER_AMBIGUOUS+x} ]; then echo "r'${VIASH_PAR_LAYER_AMBIGUOUS//\'/\'\"\'\"r\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -## VIASH END - -print("Parameters:", par, flush=True) - -print("Reading AnnData from loom", flush=True) -adata_in = ad.read_loom(par["input_loom"]) -adata_in.var_names = adata_in.var["Accession"] - -print("Creating clean AnnData", flush=True) -adata = ad.AnnData( - obs=adata_in.obs[[]], - var=adata_in.var[[]], - layers={ - par["layer_spliced"]: adata_in.layers["spliced"], - par["layer_unspliced"]: adata_in.layers["unspliced"], - par["layer_ambiguous"]: adata_in.layers["ambiguous"] - } -) - -if par["input_h5mu"]: - print("Received input h5mu to read", flush=True) - mdata = mu.read_h5mu(par["input_h5mu"]) - - print(f"Storing AnnData in modality {par['modality']}", flush=True) - mdata.mod[par["modality"]] = adata -else: - print("Creating h5mu from scratch", flush=True) - mdata = mu.MuData({par["modality"]: adata}) - -print("Resulting mudata:", mdata, flush=True) - -print("Writing h5mu to file", flush=True) -mdata.write_h5mu(par["output"], compression=par["output_compression"]) -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT_LOOM" ]; then - VIASH_PAR_INPUT_LOOM=$(ViashStripAutomount "$VIASH_PAR_INPUT_LOOM") -fi -if [ ! -z "$VIASH_PAR_INPUT_H5MU" ]; then - VIASH_PAR_INPUT_H5MU=$(ViashStripAutomount "$VIASH_PAR_INPUT_H5MU") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/correction/cellbender_remove_background/.config.vsh.yaml b/target/docker/correction/cellbender_remove_background/.config.vsh.yaml deleted file mode 100644 index 27e08715776..00000000000 --- a/target/docker/correction/cellbender_remove_background/.config.vsh.yaml +++ /dev/null @@ -1,637 +0,0 @@ -functionality: - name: "cellbender_remove_background" - namespace: "correction" - version: "0.12.3" - argument_groups: - - name: "Inputs" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input h5mu file. Data file on which to run tool. Data must be\ - \ un-filtered: it should include empty droplets." - info: null - example: - - "input.h5mu" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - description: "List of modalities to process." - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Outputs" - arguments: - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Full count matrix as an h5mu file, with background RNA removed.\ - \ This file contains all the original droplet barcodes." - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--layer_output" - description: "Output layer" - info: null - default: - - "cellbender_corrected" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_background_fraction" - info: null - default: - - "cellbender_background_fraction" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_cell_probability" - info: null - default: - - "cellbender_cell_probability" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_cell_size" - info: null - default: - - "cellbender_cell_size" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_droplet_efficiency" - info: null - default: - - "cellbender_droplet_efficiency" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_latent_scale" - info: null - default: - - "cellbender_latent_scale" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--var_ambient_expression" - info: null - default: - - "cellbender_ambient_expression" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obsm_gene_expression_encoding" - info: null - default: - - "cellbender_gene_expression_encoding" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Arguments" - arguments: - - type: "boolean" - name: "--expected_cells_from_qc" - description: "Will use the Cell Ranger QC to determine the estimated number\ - \ of cells" - info: null - default: - - false - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--expected_cells" - description: "Number of cells expected in the dataset (a rough estimate within\ - \ a factor of 2 is sufficient)." - info: null - example: - - 1000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--total_droplets_included" - description: "The number of droplets from the rank-ordered UMI plot\nthat will\ - \ have their cell probabilities inferred as an\noutput. Include the droplets\ - \ which might contain cells.\nDroplets beyond TOTAL_DROPLETS_INCLUDED should\ - \ be\n'surely empty' droplets.\n" - info: null - example: - - 25000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--force_cell_umi_prior" - description: "Ignore CellBender's heuristic prior estimation, and use this prior\ - \ for UMI counts in cells." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--force_empty_umi_prior" - description: "Ignore CellBender's heuristic prior estimation, and use this prior\ - \ for UMI counts in empty droplets." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--model" - description: "Which model is being used for count data.\n\n* 'naive' subtracts\ - \ the estimated ambient profile.\n* 'simple' does not model either ambient\ - \ RNA or random barcode swapping (for debugging purposes -- not recommended).\n\ - * 'ambient' assumes background RNA is incorporated into droplets.\n* 'swapping'\ - \ assumes background RNA comes from random barcode swapping (via PCR chimeras).\n\ - * 'full' uses a combined ambient and swapping model.\n" - info: null - default: - - "full" - required: false - choices: - - "naive" - - "simple" - - "ambient" - - "swapping" - - "full" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--epochs" - description: "Number of epochs to train." - info: null - default: - - 150 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--low_count_threshold" - description: "Droplets with UMI counts below this number are completely \nexcluded\ - \ from the analysis. This can help identify the correct \nprior for empty\ - \ droplet counts in the rare case where empty \ncounts are extremely high\ - \ (over 200).\n" - info: null - default: - - 5 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--z_dim" - description: "Dimension of latent variable z.\n" - info: null - default: - - 64 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--z_layers" - description: "Dimension of hidden layers in the encoder for z.\n" - info: null - default: - - 512 - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--training_fraction" - description: "Training detail: the fraction of the data used for training.\n\ - The rest is never seen by the inference algorithm. Speeds up learning.\n" - info: null - default: - - 0.9 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--empty_drop_training_fraction" - description: "Training detail: the fraction of the training data each epoch\ - \ that \nis drawn (randomly sampled) from surely empty droplets.\n" - info: null - default: - - 0.2 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--ignore_features" - description: "Integer indices of features to ignore entirely. In the output\n\ - count matrix, the counts for these features will be unchanged.\n" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--fpr" - description: "Target 'delta' false positive rate in [0, 1). Use 0 for a cohort\n\ - of samples which will be jointly analyzed for differential expression.\nA\ - \ false positive is a true signal count that is erroneously removed.\nMore\ - \ background removal is accompanied by more signal removal at\nhigh values\ - \ of FPR. You can specify multiple values, which will\ncreate multiple output\ - \ files.\n" - info: null - default: - - 0.01 - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--exclude_feature_types" - description: "Feature types to ignore during the analysis. These features will\n\ - be left unchanged in the output file.\n" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--projected_ambient_count_threshold" - description: "Controls how many features are included in the analysis, which\n\ - can lead to a large speedup. If a feature is expected to have less\nthan PROJECTED_AMBIENT_COUNT_THRESHOLD\ - \ counts total in all cells\n(summed), then that gene is excluded, and it\ - \ will be unchanged\nin the output count matrix. For example, \nPROJECTED_AMBIENT_COUNT_THRESHOLD\ - \ = 0 will include all features\nwhich have even a single count in any empty\ - \ droplet.\n" - info: null - default: - - 0.1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--learning_rate" - description: "Training detail: lower learning rate for inference.\nA OneCycle\ - \ learning rate schedule is used, where the\nupper learning rate is ten times\ - \ this value. (For this\nvalue, probably do not exceed 1e-3).\n" - info: null - default: - - 1.0E-4 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--final_elbo_fail_fraction" - description: "Training is considered to have failed if \n(best_test_ELBO - final_test_ELBO)/(best_test_ELBO\ - \ - initial_test_ELBO) > FINAL_ELBO_FAIL_FRACTION.\nTraining will automatically\ - \ re-run if --num-training-tries > 1.\nBy default, will not fail training\ - \ based on final_training_ELBO.\n" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--epoch_elbo_fail_fraction" - description: "Training is considered to have failed if \n(previous_epoch_test_ELBO\ - \ - current_epoch_test_ELBO)/(previous_epoch_test_ELBO - initial_train_ELBO)\ - \ > EPOCH_ELBO_FAIL_FRACTION.\nTraining will automatically re-run if --num-training-tries\ - \ > 1.\nBy default, will not fail training based on epoch_training_ELBO.\n" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--num_training_tries" - description: "Number of times to attempt to train the model. At each subsequent\ - \ attempt,\nthe learning rate is multiplied by LEARNING_RATE_RETRY_MULT.\n" - info: null - default: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--learning_rate_retry_mult" - description: "Learning rate is multiplied by this amount each time a new training\n\ - attempt is made. (This parameter is only used if training fails based\non\ - \ EPOCH_ELBO_FAIL_FRACTION or FINAL_ELBO_FAIL_FRACTION and\nNUM_TRAINING_TRIES\ - \ is > 1.) \n" - info: null - default: - - 0.2 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--posterior_batch_size" - description: "Training detail: size of batches when creating the posterior.\n\ - Reduce this to avoid running out of GPU memory creating the posterior\n(will\ - \ be slower).\n" - info: null - default: - - 128 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--posterior_regulation" - description: "Posterior regularization method. (For experts: not required for\ - \ normal usage,\nsee documentation). \n\n* PRq is approximate quantile-targeting.\n\ - * PRmu is approximate mean-targeting aggregated over genes (behavior of v0.2.0).\n\ - * PRmu_gene is approximate mean-targeting per gene.\n" - info: null - required: false - choices: - - "PRq" - - "PRmu" - - "PRmu_gene" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--alpha" - description: "Tunable parameter alpha for the PRq posterior regularization method\n\ - (not normally used: see documentation).\n" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--q" - description: "Tunable parameter q for the CDF threshold estimation method (not\n\ - normally used: see documentation).\n" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--estimator" - description: "Output denoised count estimation method. (For experts: not required\n\ - for normal usage, see documentation).\n" - info: null - default: - - "mckp" - required: false - choices: - - "map" - - "mean" - - "cdf" - - "sample" - - "mckp" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--estimator_multiple_cpu" - description: "Including the flag --estimator-multiple-cpu will use more than\ - \ one\nCPU to compute the MCKP output count estimator in parallel (does nothing\n\ - for other estimators).\n" - info: null - direction: "input" - dest: "par" - - type: "boolean" - name: "--constant_learning_rate" - description: "Including the flag --constant-learning-rate will use the ClippedAdam\n\ - optimizer instead of the OneCycleLR learning rate schedule, which is\nthe\ - \ default. Learning is faster with the OneCycleLR schedule.\nHowever, training\ - \ can easily be continued from a checkpoint for more\nepochs than the initial\ - \ command specified when using ClippedAdam. On\nthe other hand, if using the\ - \ OneCycleLR schedule with 150 epochs\nspecified, it is not possible to pick\ - \ up from that final checkpoint\nand continue training until 250 epochs.\n" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--debug" - description: "Including the flag --debug will log extra messages useful for\ - \ debugging.\n" - info: null - direction: "input" - dest: "par" - - type: "boolean_true" - name: "--cuda" - description: "Including the flag --cuda will run the inference on a\nGPU.\n" - info: null - direction: "input" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Eliminating technical artifacts from high-throughput single-cell RNA\ - \ sequencing data.\n\nThis module removes counts due to ambient RNA molecules\ - \ and random barcode swapping from (raw) UMI-based scRNA-seq count matrices. \n\ - At the moment, only the count matrices produced by the CellRanger count pipeline\ - \ is supported. Support for additional tools and protocols \nwill be added in\ - \ the future. A quick start tutorial can be found here.\n\nFleming et al. 2022,\ - \ bioRxiv.\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_raw_feature_bc_matrix.h5" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "nvcr.io/nvidia/cuda:11.8.0-devel-ubuntu22.04" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "docker" - run: - - "apt update && DEBIAN_FRONTEND=noninteractive apt install -y make build-essential\ - \ libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev wget ca-certificates\ - \ curl llvm libncurses5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev libffi-dev\ - \ liblzma-dev mecab-ipadic-utf8 git \\\n&& curl https://pyenv.run | bash \\\n\ - && pyenv update \\\n&& pyenv install $PYTHON_VERSION \\\n&& pyenv global $PYTHON_VERSION\ - \ \\\n&& apt-get clean\n" - env: - - "PYENV_ROOT=\"/root/.pyenv\"" - - "PATH=\"$PYENV_ROOT/shims:$PYENV_ROOT/bin:$PATH\"" - - "PYTHON_VERSION=3.7.16" - - type: "python" - user: false - packages: - - "mudata~=0.2.1" - - "cellbender~=0.3.0" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "muon" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "singlecpu" - - "lowmem" - - "gpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/correction/cellbender_remove_background/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/correction/cellbender_remove_background" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/correction/cellbender_remove_background/cellbender_remove_background" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/correction/cellbender_remove_background/cellbender_remove_background b/target/docker/correction/cellbender_remove_background/cellbender_remove_background deleted file mode 100755 index 28bee30b0b8..00000000000 --- a/target/docker/correction/cellbender_remove_background/cellbender_remove_background +++ /dev/null @@ -1,2153 +0,0 @@ -#!/usr/bin/env bash - -# cellbender_remove_background 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="cellbender_remove_background" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "cellbender_remove_background 0.12.3" - echo "" - echo "Eliminating technical artifacts from high-throughput single-cell RNA sequencing" - echo "data." - echo "" - echo "This module removes counts due to ambient RNA molecules and random barcode" - echo "swapping from (raw) UMI-based scRNA-seq count matrices." - echo "At the moment, only the count matrices produced by the CellRanger count pipeline" - echo "is supported. Support for additional tools and protocols" - echo "will be added in the future. A quick start tutorial can be found here." - echo "" - echo "Fleming et al. 2022, bioRxiv." - echo "" - echo "Inputs:" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " example: input.h5mu" - echo " Input h5mu file. Data file on which to run tool. Data must be" - echo " un-filtered: it should include empty droplets." - echo "" - echo " --modality" - echo " type: string" - echo " default: rna" - echo " List of modalities to process." - echo "" - echo "Outputs:" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " example: output.h5mu" - echo " Full count matrix as an h5mu file, with background RNA removed. This" - echo " file contains all the original droplet barcodes." - echo "" - echo " --output_compression" - echo " type: string" - echo " example: gzip" - echo " choices: [ gzip, lzf ]" - echo "" - echo " --layer_output" - echo " type: string" - echo " default: cellbender_corrected" - echo " Output layer" - echo "" - echo " --obs_background_fraction" - echo " type: string" - echo " default: cellbender_background_fraction" - echo "" - echo " --obs_cell_probability" - echo " type: string" - echo " default: cellbender_cell_probability" - echo "" - echo " --obs_cell_size" - echo " type: string" - echo " default: cellbender_cell_size" - echo "" - echo " --obs_droplet_efficiency" - echo " type: string" - echo " default: cellbender_droplet_efficiency" - echo "" - echo " --obs_latent_scale" - echo " type: string" - echo " default: cellbender_latent_scale" - echo "" - echo " --var_ambient_expression" - echo " type: string" - echo " default: cellbender_ambient_expression" - echo "" - echo " --obsm_gene_expression_encoding" - echo " type: string" - echo " default: cellbender_gene_expression_encoding" - echo "" - echo "Arguments:" - echo " --expected_cells_from_qc" - echo " type: boolean" - echo " default: false" - echo " Will use the Cell Ranger QC to determine the estimated number of cells" - echo "" - echo " --expected_cells" - echo " type: integer" - echo " example: 1000" - echo " Number of cells expected in the dataset (a rough estimate within a" - echo " factor of 2 is sufficient)." - echo "" - echo " --total_droplets_included" - echo " type: integer" - echo " example: 25000" - echo " The number of droplets from the rank-ordered UMI plot" - echo " that will have their cell probabilities inferred as an" - echo " output. Include the droplets which might contain cells." - echo " Droplets beyond TOTAL_DROPLETS_INCLUDED should be" - echo " 'surely empty' droplets." - echo "" - echo " --force_cell_umi_prior" - echo " type: integer" - echo " Ignore CellBender's heuristic prior estimation, and use this prior for" - echo " UMI counts in cells." - echo "" - echo " --force_empty_umi_prior" - echo " type: integer" - echo " Ignore CellBender's heuristic prior estimation, and use this prior for" - echo " UMI counts in empty droplets." - echo "" - echo " --model" - echo " type: string" - echo " default: full" - echo " choices: [ naive, simple, ambient, swapping, full ]" - echo " Which model is being used for count data." - echo " * 'naive' subtracts the estimated ambient profile." - echo " * 'simple' does not model either ambient RNA or random barcode swapping" - echo " (for debugging purposes -- not recommended)." - echo " * 'ambient' assumes background RNA is incorporated into droplets." - echo " * 'swapping' assumes background RNA comes from random barcode swapping" - echo " (via PCR chimeras)." - echo " * 'full' uses a combined ambient and swapping model." - echo "" - echo " --epochs" - echo " type: integer" - echo " default: 150" - echo " Number of epochs to train." - echo "" - echo " --low_count_threshold" - echo " type: integer" - echo " default: 5" - echo " Droplets with UMI counts below this number are completely" - echo " excluded from the analysis. This can help identify the correct" - echo " prior for empty droplet counts in the rare case where empty" - echo " counts are extremely high (over 200)." - echo "" - echo " --z_dim" - echo " type: integer" - echo " default: 64" - echo " Dimension of latent variable z." - echo "" - echo " --z_layers" - echo " type: integer, multiple values allowed" - echo " default: 512" - echo " Dimension of hidden layers in the encoder for z." - echo "" - echo " --training_fraction" - echo " type: double" - echo " default: 0.9" - echo " Training detail: the fraction of the data used for training." - echo " The rest is never seen by the inference algorithm. Speeds up learning." - echo "" - echo " --empty_drop_training_fraction" - echo " type: double" - echo " default: 0.2" - echo " Training detail: the fraction of the training data each epoch that" - echo " is drawn (randomly sampled) from surely empty droplets." - echo "" - echo " --ignore_features" - echo " type: integer, multiple values allowed" - echo " Integer indices of features to ignore entirely. In the output" - echo " count matrix, the counts for these features will be unchanged." - echo "" - echo " --fpr" - echo " type: double, multiple values allowed" - echo " default: 0.01" - echo " Target 'delta' false positive rate in [0, 1). Use 0 for a cohort" - echo " of samples which will be jointly analyzed for differential expression." - echo " A false positive is a true signal count that is erroneously removed." - echo " More background removal is accompanied by more signal removal at" - echo " high values of FPR. You can specify multiple values, which will" - echo " create multiple output files." - echo "" - echo " --exclude_feature_types" - echo " type: string, multiple values allowed" - echo " Feature types to ignore during the analysis. These features will" - echo " be left unchanged in the output file." - echo "" - echo " --projected_ambient_count_threshold" - echo " type: double" - echo " default: 0.1" - echo " Controls how many features are included in the analysis, which" - echo " can lead to a large speedup. If a feature is expected to have less" - echo " than PROJECTED_AMBIENT_COUNT_THRESHOLD counts total in all cells" - echo " (summed), then that gene is excluded, and it will be unchanged" - echo " in the output count matrix. For example," - echo " PROJECTED_AMBIENT_COUNT_THRESHOLD = 0 will include all features" - echo " which have even a single count in any empty droplet." - echo "" - echo " --learning_rate" - echo " type: double" - echo " default: 1.0E-4" - echo " Training detail: lower learning rate for inference." - echo " A OneCycle learning rate schedule is used, where the" - echo " upper learning rate is ten times this value. (For this" - echo " value, probably do not exceed 1e-3)." - echo "" - echo " --final_elbo_fail_fraction" - echo " type: double" - echo " Training is considered to have failed if" - echo " (best_test_ELBO - final_test_ELBO)/(best_test_ELBO - initial_test_ELBO)" - echo " > FINAL_ELBO_FAIL_FRACTION." - echo " Training will automatically re-run if --num-training-tries > 1." - echo " By default, will not fail training based on final_training_ELBO." - echo "" - echo " --epoch_elbo_fail_fraction" - echo " type: double" - echo " Training is considered to have failed if" - echo " (previous_epoch_test_ELBO -" - echo " current_epoch_test_ELBO)/(previous_epoch_test_ELBO - initial_train_ELBO)" - echo " > EPOCH_ELBO_FAIL_FRACTION." - echo " Training will automatically re-run if --num-training-tries > 1." - echo " By default, will not fail training based on epoch_training_ELBO." - echo "" - echo " --num_training_tries" - echo " type: integer" - echo " default: 1" - echo " Number of times to attempt to train the model. At each subsequent" - echo " attempt," - echo " the learning rate is multiplied by LEARNING_RATE_RETRY_MULT." - echo "" - echo " --learning_rate_retry_mult" - echo " type: double" - echo " default: 0.2" - echo " Learning rate is multiplied by this amount each time a new training" - echo " attempt is made. (This parameter is only used if training fails based" - echo " on EPOCH_ELBO_FAIL_FRACTION or FINAL_ELBO_FAIL_FRACTION and" - echo " NUM_TRAINING_TRIES is > 1.)" - echo "" - echo " --posterior_batch_size" - echo " type: integer" - echo " default: 128" - echo " Training detail: size of batches when creating the posterior." - echo " Reduce this to avoid running out of GPU memory creating the posterior" - echo " (will be slower)." - echo "" - echo " --posterior_regulation" - echo " type: string" - echo " choices: [ PRq, PRmu, PRmu_gene ]" - echo " Posterior regularization method. (For experts: not required for normal" - echo " usage," - echo " see documentation)." - echo " * PRq is approximate quantile-targeting." - echo " * PRmu is approximate mean-targeting aggregated over genes (behavior of" - echo " v0.2.0)." - echo " * PRmu_gene is approximate mean-targeting per gene." - echo "" - echo " --alpha" - echo " type: double" - echo " Tunable parameter alpha for the PRq posterior regularization method" - echo " (not normally used: see documentation)." - echo "" - echo " --q" - echo " type: double" - echo " Tunable parameter q for the CDF threshold estimation method (not" - echo " normally used: see documentation)." - echo "" - echo " --estimator" - echo " type: string" - echo " default: mckp" - echo " choices: [ map, mean, cdf, sample, mckp ]" - echo " Output denoised count estimation method. (For experts: not required" - echo " for normal usage, see documentation)." - echo "" - echo " --estimator_multiple_cpu" - echo " type: boolean_true" - echo " Including the flag --estimator-multiple-cpu will use more than one" - echo " CPU to compute the MCKP output count estimator in parallel (does nothing" - echo " for other estimators)." - echo "" - echo " --constant_learning_rate" - echo " type: boolean" - echo " Including the flag --constant-learning-rate will use the ClippedAdam" - echo " optimizer instead of the OneCycleLR learning rate schedule, which is" - echo " the default. Learning is faster with the OneCycleLR schedule." - echo " However, training can easily be continued from a checkpoint for more" - echo " epochs than the initial command specified when using ClippedAdam. On" - echo " the other hand, if using the OneCycleLR schedule with 150 epochs" - echo " specified, it is not possible to pick up from that final checkpoint" - echo " and continue training until 250 epochs." - echo "" - echo " --debug" - echo " type: boolean_true" - echo " Including the flag --debug will log extra messages useful for debugging." - echo "" - echo " --cuda" - echo " type: boolean_true" - echo " Including the flag --cuda will run the inference on a" - echo " GPU." -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM nvcr.io/nvidia/cuda:11.8.0-devel-ubuntu22.04 - -ENTRYPOINT [] - - -ENV PYENV_ROOT="/root/.pyenv" -ENV PATH="$PYENV_ROOT/shims:$PYENV_ROOT/bin:$PATH" -ENV PYTHON_VERSION=3.7.16 -RUN apt update && DEBIAN_FRONTEND=noninteractive apt install -y make build-essential libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev wget ca-certificates curl llvm libncurses5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev libffi-dev liblzma-dev mecab-ipadic-utf8 git \ -&& curl https://pyenv.run | bash \ -&& pyenv update \ -&& pyenv install $PYTHON_VERSION \ -&& pyenv global $PYTHON_VERSION \ -&& apt-get clean - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "mudata~=0.2.1" "cellbender~=0.3.0" - -LABEL org.opencontainers.image.description="Companion container for running component correction cellbender_remove_background" -LABEL org.opencontainers.image.created="2024-01-25T10:13:58Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-cellbender_remove_background-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "cellbender_remove_background 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -i) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality=*) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression=*) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --layer_output) - [ -n "$VIASH_PAR_LAYER_OUTPUT" ] && ViashError Bad arguments for option \'--layer_output\': \'$VIASH_PAR_LAYER_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LAYER_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --layer_output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --layer_output=*) - [ -n "$VIASH_PAR_LAYER_OUTPUT" ] && ViashError Bad arguments for option \'--layer_output=*\': \'$VIASH_PAR_LAYER_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LAYER_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --obs_background_fraction) - [ -n "$VIASH_PAR_OBS_BACKGROUND_FRACTION" ] && ViashError Bad arguments for option \'--obs_background_fraction\': \'$VIASH_PAR_OBS_BACKGROUND_FRACTION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBS_BACKGROUND_FRACTION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_background_fraction. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obs_background_fraction=*) - [ -n "$VIASH_PAR_OBS_BACKGROUND_FRACTION" ] && ViashError Bad arguments for option \'--obs_background_fraction=*\': \'$VIASH_PAR_OBS_BACKGROUND_FRACTION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBS_BACKGROUND_FRACTION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --obs_cell_probability) - [ -n "$VIASH_PAR_OBS_CELL_PROBABILITY" ] && ViashError Bad arguments for option \'--obs_cell_probability\': \'$VIASH_PAR_OBS_CELL_PROBABILITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBS_CELL_PROBABILITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_cell_probability. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obs_cell_probability=*) - [ -n "$VIASH_PAR_OBS_CELL_PROBABILITY" ] && ViashError Bad arguments for option \'--obs_cell_probability=*\': \'$VIASH_PAR_OBS_CELL_PROBABILITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBS_CELL_PROBABILITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --obs_cell_size) - [ -n "$VIASH_PAR_OBS_CELL_SIZE" ] && ViashError Bad arguments for option \'--obs_cell_size\': \'$VIASH_PAR_OBS_CELL_SIZE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBS_CELL_SIZE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_cell_size. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obs_cell_size=*) - [ -n "$VIASH_PAR_OBS_CELL_SIZE" ] && ViashError Bad arguments for option \'--obs_cell_size=*\': \'$VIASH_PAR_OBS_CELL_SIZE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBS_CELL_SIZE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --obs_droplet_efficiency) - [ -n "$VIASH_PAR_OBS_DROPLET_EFFICIENCY" ] && ViashError Bad arguments for option \'--obs_droplet_efficiency\': \'$VIASH_PAR_OBS_DROPLET_EFFICIENCY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBS_DROPLET_EFFICIENCY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_droplet_efficiency. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obs_droplet_efficiency=*) - [ -n "$VIASH_PAR_OBS_DROPLET_EFFICIENCY" ] && ViashError Bad arguments for option \'--obs_droplet_efficiency=*\': \'$VIASH_PAR_OBS_DROPLET_EFFICIENCY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBS_DROPLET_EFFICIENCY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --obs_latent_scale) - [ -n "$VIASH_PAR_OBS_LATENT_SCALE" ] && ViashError Bad arguments for option \'--obs_latent_scale\': \'$VIASH_PAR_OBS_LATENT_SCALE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBS_LATENT_SCALE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_latent_scale. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obs_latent_scale=*) - [ -n "$VIASH_PAR_OBS_LATENT_SCALE" ] && ViashError Bad arguments for option \'--obs_latent_scale=*\': \'$VIASH_PAR_OBS_LATENT_SCALE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBS_LATENT_SCALE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --var_ambient_expression) - [ -n "$VIASH_PAR_VAR_AMBIENT_EXPRESSION" ] && ViashError Bad arguments for option \'--var_ambient_expression\': \'$VIASH_PAR_VAR_AMBIENT_EXPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_VAR_AMBIENT_EXPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --var_ambient_expression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --var_ambient_expression=*) - [ -n "$VIASH_PAR_VAR_AMBIENT_EXPRESSION" ] && ViashError Bad arguments for option \'--var_ambient_expression=*\': \'$VIASH_PAR_VAR_AMBIENT_EXPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_VAR_AMBIENT_EXPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --obsm_gene_expression_encoding) - [ -n "$VIASH_PAR_OBSM_GENE_EXPRESSION_ENCODING" ] && ViashError Bad arguments for option \'--obsm_gene_expression_encoding\': \'$VIASH_PAR_OBSM_GENE_EXPRESSION_ENCODING\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBSM_GENE_EXPRESSION_ENCODING="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obsm_gene_expression_encoding. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obsm_gene_expression_encoding=*) - [ -n "$VIASH_PAR_OBSM_GENE_EXPRESSION_ENCODING" ] && ViashError Bad arguments for option \'--obsm_gene_expression_encoding=*\': \'$VIASH_PAR_OBSM_GENE_EXPRESSION_ENCODING\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBSM_GENE_EXPRESSION_ENCODING=$(ViashRemoveFlags "$1") - shift 1 - ;; - --expected_cells_from_qc) - [ -n "$VIASH_PAR_EXPECTED_CELLS_FROM_QC" ] && ViashError Bad arguments for option \'--expected_cells_from_qc\': \'$VIASH_PAR_EXPECTED_CELLS_FROM_QC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EXPECTED_CELLS_FROM_QC="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --expected_cells_from_qc. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --expected_cells_from_qc=*) - [ -n "$VIASH_PAR_EXPECTED_CELLS_FROM_QC" ] && ViashError Bad arguments for option \'--expected_cells_from_qc=*\': \'$VIASH_PAR_EXPECTED_CELLS_FROM_QC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EXPECTED_CELLS_FROM_QC=$(ViashRemoveFlags "$1") - shift 1 - ;; - --expected_cells) - [ -n "$VIASH_PAR_EXPECTED_CELLS" ] && ViashError Bad arguments for option \'--expected_cells\': \'$VIASH_PAR_EXPECTED_CELLS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EXPECTED_CELLS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --expected_cells. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --expected_cells=*) - [ -n "$VIASH_PAR_EXPECTED_CELLS" ] && ViashError Bad arguments for option \'--expected_cells=*\': \'$VIASH_PAR_EXPECTED_CELLS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EXPECTED_CELLS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --total_droplets_included) - [ -n "$VIASH_PAR_TOTAL_DROPLETS_INCLUDED" ] && ViashError Bad arguments for option \'--total_droplets_included\': \'$VIASH_PAR_TOTAL_DROPLETS_INCLUDED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TOTAL_DROPLETS_INCLUDED="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --total_droplets_included. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --total_droplets_included=*) - [ -n "$VIASH_PAR_TOTAL_DROPLETS_INCLUDED" ] && ViashError Bad arguments for option \'--total_droplets_included=*\': \'$VIASH_PAR_TOTAL_DROPLETS_INCLUDED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TOTAL_DROPLETS_INCLUDED=$(ViashRemoveFlags "$1") - shift 1 - ;; - --force_cell_umi_prior) - [ -n "$VIASH_PAR_FORCE_CELL_UMI_PRIOR" ] && ViashError Bad arguments for option \'--force_cell_umi_prior\': \'$VIASH_PAR_FORCE_CELL_UMI_PRIOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_FORCE_CELL_UMI_PRIOR="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --force_cell_umi_prior. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --force_cell_umi_prior=*) - [ -n "$VIASH_PAR_FORCE_CELL_UMI_PRIOR" ] && ViashError Bad arguments for option \'--force_cell_umi_prior=*\': \'$VIASH_PAR_FORCE_CELL_UMI_PRIOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_FORCE_CELL_UMI_PRIOR=$(ViashRemoveFlags "$1") - shift 1 - ;; - --force_empty_umi_prior) - [ -n "$VIASH_PAR_FORCE_EMPTY_UMI_PRIOR" ] && ViashError Bad arguments for option \'--force_empty_umi_prior\': \'$VIASH_PAR_FORCE_EMPTY_UMI_PRIOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_FORCE_EMPTY_UMI_PRIOR="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --force_empty_umi_prior. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --force_empty_umi_prior=*) - [ -n "$VIASH_PAR_FORCE_EMPTY_UMI_PRIOR" ] && ViashError Bad arguments for option \'--force_empty_umi_prior=*\': \'$VIASH_PAR_FORCE_EMPTY_UMI_PRIOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_FORCE_EMPTY_UMI_PRIOR=$(ViashRemoveFlags "$1") - shift 1 - ;; - --model) - [ -n "$VIASH_PAR_MODEL" ] && ViashError Bad arguments for option \'--model\': \'$VIASH_PAR_MODEL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODEL="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --model. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --model=*) - [ -n "$VIASH_PAR_MODEL" ] && ViashError Bad arguments for option \'--model=*\': \'$VIASH_PAR_MODEL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODEL=$(ViashRemoveFlags "$1") - shift 1 - ;; - --epochs) - [ -n "$VIASH_PAR_EPOCHS" ] && ViashError Bad arguments for option \'--epochs\': \'$VIASH_PAR_EPOCHS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EPOCHS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --epochs. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --epochs=*) - [ -n "$VIASH_PAR_EPOCHS" ] && ViashError Bad arguments for option \'--epochs=*\': \'$VIASH_PAR_EPOCHS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EPOCHS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --low_count_threshold) - [ -n "$VIASH_PAR_LOW_COUNT_THRESHOLD" ] && ViashError Bad arguments for option \'--low_count_threshold\': \'$VIASH_PAR_LOW_COUNT_THRESHOLD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LOW_COUNT_THRESHOLD="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --low_count_threshold. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --low_count_threshold=*) - [ -n "$VIASH_PAR_LOW_COUNT_THRESHOLD" ] && ViashError Bad arguments for option \'--low_count_threshold=*\': \'$VIASH_PAR_LOW_COUNT_THRESHOLD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LOW_COUNT_THRESHOLD=$(ViashRemoveFlags "$1") - shift 1 - ;; - --z_dim) - [ -n "$VIASH_PAR_Z_DIM" ] && ViashError Bad arguments for option \'--z_dim\': \'$VIASH_PAR_Z_DIM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_Z_DIM="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --z_dim. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --z_dim=*) - [ -n "$VIASH_PAR_Z_DIM" ] && ViashError Bad arguments for option \'--z_dim=*\': \'$VIASH_PAR_Z_DIM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_Z_DIM=$(ViashRemoveFlags "$1") - shift 1 - ;; - --z_layers) - if [ -z "$VIASH_PAR_Z_LAYERS" ]; then - VIASH_PAR_Z_LAYERS="$2" - else - VIASH_PAR_Z_LAYERS="$VIASH_PAR_Z_LAYERS:""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --z_layers. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --z_layers=*) - if [ -z "$VIASH_PAR_Z_LAYERS" ]; then - VIASH_PAR_Z_LAYERS=$(ViashRemoveFlags "$1") - else - VIASH_PAR_Z_LAYERS="$VIASH_PAR_Z_LAYERS:"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --training_fraction) - [ -n "$VIASH_PAR_TRAINING_FRACTION" ] && ViashError Bad arguments for option \'--training_fraction\': \'$VIASH_PAR_TRAINING_FRACTION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TRAINING_FRACTION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --training_fraction. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --training_fraction=*) - [ -n "$VIASH_PAR_TRAINING_FRACTION" ] && ViashError Bad arguments for option \'--training_fraction=*\': \'$VIASH_PAR_TRAINING_FRACTION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TRAINING_FRACTION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --empty_drop_training_fraction) - [ -n "$VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION" ] && ViashError Bad arguments for option \'--empty_drop_training_fraction\': \'$VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --empty_drop_training_fraction. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --empty_drop_training_fraction=*) - [ -n "$VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION" ] && ViashError Bad arguments for option \'--empty_drop_training_fraction=*\': \'$VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --ignore_features) - if [ -z "$VIASH_PAR_IGNORE_FEATURES" ]; then - VIASH_PAR_IGNORE_FEATURES="$2" - else - VIASH_PAR_IGNORE_FEATURES="$VIASH_PAR_IGNORE_FEATURES:""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --ignore_features. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --ignore_features=*) - if [ -z "$VIASH_PAR_IGNORE_FEATURES" ]; then - VIASH_PAR_IGNORE_FEATURES=$(ViashRemoveFlags "$1") - else - VIASH_PAR_IGNORE_FEATURES="$VIASH_PAR_IGNORE_FEATURES:"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --fpr) - if [ -z "$VIASH_PAR_FPR" ]; then - VIASH_PAR_FPR="$2" - else - VIASH_PAR_FPR="$VIASH_PAR_FPR:""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --fpr. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --fpr=*) - if [ -z "$VIASH_PAR_FPR" ]; then - VIASH_PAR_FPR=$(ViashRemoveFlags "$1") - else - VIASH_PAR_FPR="$VIASH_PAR_FPR:"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --exclude_feature_types) - if [ -z "$VIASH_PAR_EXCLUDE_FEATURE_TYPES" ]; then - VIASH_PAR_EXCLUDE_FEATURE_TYPES="$2" - else - VIASH_PAR_EXCLUDE_FEATURE_TYPES="$VIASH_PAR_EXCLUDE_FEATURE_TYPES:""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --exclude_feature_types. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --exclude_feature_types=*) - if [ -z "$VIASH_PAR_EXCLUDE_FEATURE_TYPES" ]; then - VIASH_PAR_EXCLUDE_FEATURE_TYPES=$(ViashRemoveFlags "$1") - else - VIASH_PAR_EXCLUDE_FEATURE_TYPES="$VIASH_PAR_EXCLUDE_FEATURE_TYPES:"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --projected_ambient_count_threshold) - [ -n "$VIASH_PAR_PROJECTED_AMBIENT_COUNT_THRESHOLD" ] && ViashError Bad arguments for option \'--projected_ambient_count_threshold\': \'$VIASH_PAR_PROJECTED_AMBIENT_COUNT_THRESHOLD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_PROJECTED_AMBIENT_COUNT_THRESHOLD="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --projected_ambient_count_threshold. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --projected_ambient_count_threshold=*) - [ -n "$VIASH_PAR_PROJECTED_AMBIENT_COUNT_THRESHOLD" ] && ViashError Bad arguments for option \'--projected_ambient_count_threshold=*\': \'$VIASH_PAR_PROJECTED_AMBIENT_COUNT_THRESHOLD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_PROJECTED_AMBIENT_COUNT_THRESHOLD=$(ViashRemoveFlags "$1") - shift 1 - ;; - --learning_rate) - [ -n "$VIASH_PAR_LEARNING_RATE" ] && ViashError Bad arguments for option \'--learning_rate\': \'$VIASH_PAR_LEARNING_RATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LEARNING_RATE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --learning_rate. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --learning_rate=*) - [ -n "$VIASH_PAR_LEARNING_RATE" ] && ViashError Bad arguments for option \'--learning_rate=*\': \'$VIASH_PAR_LEARNING_RATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LEARNING_RATE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --final_elbo_fail_fraction) - [ -n "$VIASH_PAR_FINAL_ELBO_FAIL_FRACTION" ] && ViashError Bad arguments for option \'--final_elbo_fail_fraction\': \'$VIASH_PAR_FINAL_ELBO_FAIL_FRACTION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_FINAL_ELBO_FAIL_FRACTION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --final_elbo_fail_fraction. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --final_elbo_fail_fraction=*) - [ -n "$VIASH_PAR_FINAL_ELBO_FAIL_FRACTION" ] && ViashError Bad arguments for option \'--final_elbo_fail_fraction=*\': \'$VIASH_PAR_FINAL_ELBO_FAIL_FRACTION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_FINAL_ELBO_FAIL_FRACTION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --epoch_elbo_fail_fraction) - [ -n "$VIASH_PAR_EPOCH_ELBO_FAIL_FRACTION" ] && ViashError Bad arguments for option \'--epoch_elbo_fail_fraction\': \'$VIASH_PAR_EPOCH_ELBO_FAIL_FRACTION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EPOCH_ELBO_FAIL_FRACTION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --epoch_elbo_fail_fraction. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --epoch_elbo_fail_fraction=*) - [ -n "$VIASH_PAR_EPOCH_ELBO_FAIL_FRACTION" ] && ViashError Bad arguments for option \'--epoch_elbo_fail_fraction=*\': \'$VIASH_PAR_EPOCH_ELBO_FAIL_FRACTION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EPOCH_ELBO_FAIL_FRACTION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --num_training_tries) - [ -n "$VIASH_PAR_NUM_TRAINING_TRIES" ] && ViashError Bad arguments for option \'--num_training_tries\': \'$VIASH_PAR_NUM_TRAINING_TRIES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_NUM_TRAINING_TRIES="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --num_training_tries. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --num_training_tries=*) - [ -n "$VIASH_PAR_NUM_TRAINING_TRIES" ] && ViashError Bad arguments for option \'--num_training_tries=*\': \'$VIASH_PAR_NUM_TRAINING_TRIES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_NUM_TRAINING_TRIES=$(ViashRemoveFlags "$1") - shift 1 - ;; - --learning_rate_retry_mult) - [ -n "$VIASH_PAR_LEARNING_RATE_RETRY_MULT" ] && ViashError Bad arguments for option \'--learning_rate_retry_mult\': \'$VIASH_PAR_LEARNING_RATE_RETRY_MULT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LEARNING_RATE_RETRY_MULT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --learning_rate_retry_mult. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --learning_rate_retry_mult=*) - [ -n "$VIASH_PAR_LEARNING_RATE_RETRY_MULT" ] && ViashError Bad arguments for option \'--learning_rate_retry_mult=*\': \'$VIASH_PAR_LEARNING_RATE_RETRY_MULT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LEARNING_RATE_RETRY_MULT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --posterior_batch_size) - [ -n "$VIASH_PAR_POSTERIOR_BATCH_SIZE" ] && ViashError Bad arguments for option \'--posterior_batch_size\': \'$VIASH_PAR_POSTERIOR_BATCH_SIZE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_POSTERIOR_BATCH_SIZE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --posterior_batch_size. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --posterior_batch_size=*) - [ -n "$VIASH_PAR_POSTERIOR_BATCH_SIZE" ] && ViashError Bad arguments for option \'--posterior_batch_size=*\': \'$VIASH_PAR_POSTERIOR_BATCH_SIZE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_POSTERIOR_BATCH_SIZE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --posterior_regulation) - [ -n "$VIASH_PAR_POSTERIOR_REGULATION" ] && ViashError Bad arguments for option \'--posterior_regulation\': \'$VIASH_PAR_POSTERIOR_REGULATION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_POSTERIOR_REGULATION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --posterior_regulation. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --posterior_regulation=*) - [ -n "$VIASH_PAR_POSTERIOR_REGULATION" ] && ViashError Bad arguments for option \'--posterior_regulation=*\': \'$VIASH_PAR_POSTERIOR_REGULATION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_POSTERIOR_REGULATION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --alpha) - [ -n "$VIASH_PAR_ALPHA" ] && ViashError Bad arguments for option \'--alpha\': \'$VIASH_PAR_ALPHA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALPHA="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --alpha. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --alpha=*) - [ -n "$VIASH_PAR_ALPHA" ] && ViashError Bad arguments for option \'--alpha=*\': \'$VIASH_PAR_ALPHA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALPHA=$(ViashRemoveFlags "$1") - shift 1 - ;; - --q) - [ -n "$VIASH_PAR_Q" ] && ViashError Bad arguments for option \'--q\': \'$VIASH_PAR_Q\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_Q="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --q. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --q=*) - [ -n "$VIASH_PAR_Q" ] && ViashError Bad arguments for option \'--q=*\': \'$VIASH_PAR_Q\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_Q=$(ViashRemoveFlags "$1") - shift 1 - ;; - --estimator) - [ -n "$VIASH_PAR_ESTIMATOR" ] && ViashError Bad arguments for option \'--estimator\': \'$VIASH_PAR_ESTIMATOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ESTIMATOR="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --estimator. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --estimator=*) - [ -n "$VIASH_PAR_ESTIMATOR" ] && ViashError Bad arguments for option \'--estimator=*\': \'$VIASH_PAR_ESTIMATOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ESTIMATOR=$(ViashRemoveFlags "$1") - shift 1 - ;; - --estimator_multiple_cpu) - [ -n "$VIASH_PAR_ESTIMATOR_MULTIPLE_CPU" ] && ViashError Bad arguments for option \'--estimator_multiple_cpu\': \'$VIASH_PAR_ESTIMATOR_MULTIPLE_CPU\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ESTIMATOR_MULTIPLE_CPU=true - shift 1 - ;; - --constant_learning_rate) - [ -n "$VIASH_PAR_CONSTANT_LEARNING_RATE" ] && ViashError Bad arguments for option \'--constant_learning_rate\': \'$VIASH_PAR_CONSTANT_LEARNING_RATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CONSTANT_LEARNING_RATE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --constant_learning_rate. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --constant_learning_rate=*) - [ -n "$VIASH_PAR_CONSTANT_LEARNING_RATE" ] && ViashError Bad arguments for option \'--constant_learning_rate=*\': \'$VIASH_PAR_CONSTANT_LEARNING_RATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CONSTANT_LEARNING_RATE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --debug) - [ -n "$VIASH_PAR_DEBUG" ] && ViashError Bad arguments for option \'--debug\': \'$VIASH_PAR_DEBUG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_DEBUG=true - shift 1 - ;; - --cuda) - [ -n "$VIASH_PAR_CUDA" ] && ViashError Bad arguments for option \'--cuda\': \'$VIASH_PAR_CUDA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CUDA=true - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/correction_cellbender_remove_background:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/correction_cellbender_remove_background:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/correction_cellbender_remove_background:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/correction_cellbender_remove_background:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_MODALITY+x} ]; then - VIASH_PAR_MODALITY="rna" -fi -if [ -z ${VIASH_PAR_LAYER_OUTPUT+x} ]; then - VIASH_PAR_LAYER_OUTPUT="cellbender_corrected" -fi -if [ -z ${VIASH_PAR_OBS_BACKGROUND_FRACTION+x} ]; then - VIASH_PAR_OBS_BACKGROUND_FRACTION="cellbender_background_fraction" -fi -if [ -z ${VIASH_PAR_OBS_CELL_PROBABILITY+x} ]; then - VIASH_PAR_OBS_CELL_PROBABILITY="cellbender_cell_probability" -fi -if [ -z ${VIASH_PAR_OBS_CELL_SIZE+x} ]; then - VIASH_PAR_OBS_CELL_SIZE="cellbender_cell_size" -fi -if [ -z ${VIASH_PAR_OBS_DROPLET_EFFICIENCY+x} ]; then - VIASH_PAR_OBS_DROPLET_EFFICIENCY="cellbender_droplet_efficiency" -fi -if [ -z ${VIASH_PAR_OBS_LATENT_SCALE+x} ]; then - VIASH_PAR_OBS_LATENT_SCALE="cellbender_latent_scale" -fi -if [ -z ${VIASH_PAR_VAR_AMBIENT_EXPRESSION+x} ]; then - VIASH_PAR_VAR_AMBIENT_EXPRESSION="cellbender_ambient_expression" -fi -if [ -z ${VIASH_PAR_OBSM_GENE_EXPRESSION_ENCODING+x} ]; then - VIASH_PAR_OBSM_GENE_EXPRESSION_ENCODING="cellbender_gene_expression_encoding" -fi -if [ -z ${VIASH_PAR_EXPECTED_CELLS_FROM_QC+x} ]; then - VIASH_PAR_EXPECTED_CELLS_FROM_QC="false" -fi -if [ -z ${VIASH_PAR_MODEL+x} ]; then - VIASH_PAR_MODEL="full" -fi -if [ -z ${VIASH_PAR_EPOCHS+x} ]; then - VIASH_PAR_EPOCHS="150" -fi -if [ -z ${VIASH_PAR_LOW_COUNT_THRESHOLD+x} ]; then - VIASH_PAR_LOW_COUNT_THRESHOLD="5" -fi -if [ -z ${VIASH_PAR_Z_DIM+x} ]; then - VIASH_PAR_Z_DIM="64" -fi -if [ -z ${VIASH_PAR_Z_LAYERS+x} ]; then - VIASH_PAR_Z_LAYERS="512" -fi -if [ -z ${VIASH_PAR_TRAINING_FRACTION+x} ]; then - VIASH_PAR_TRAINING_FRACTION="0.9" -fi -if [ -z ${VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION+x} ]; then - VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION="0.2" -fi -if [ -z ${VIASH_PAR_FPR+x} ]; then - VIASH_PAR_FPR="0.01" -fi -if [ -z ${VIASH_PAR_PROJECTED_AMBIENT_COUNT_THRESHOLD+x} ]; then - VIASH_PAR_PROJECTED_AMBIENT_COUNT_THRESHOLD="0.1" -fi -if [ -z ${VIASH_PAR_LEARNING_RATE+x} ]; then - VIASH_PAR_LEARNING_RATE="1.0E-4" -fi -if [ -z ${VIASH_PAR_NUM_TRAINING_TRIES+x} ]; then - VIASH_PAR_NUM_TRAINING_TRIES="1" -fi -if [ -z ${VIASH_PAR_LEARNING_RATE_RETRY_MULT+x} ]; then - VIASH_PAR_LEARNING_RATE_RETRY_MULT="0.2" -fi -if [ -z ${VIASH_PAR_POSTERIOR_BATCH_SIZE+x} ]; then - VIASH_PAR_POSTERIOR_BATCH_SIZE="128" -fi -if [ -z ${VIASH_PAR_ESTIMATOR+x} ]; then - VIASH_PAR_ESTIMATOR="mckp" -fi -if [ -z ${VIASH_PAR_ESTIMATOR_MULTIPLE_CPU+x} ]; then - VIASH_PAR_ESTIMATOR_MULTIPLE_CPU="false" -fi -if [ -z ${VIASH_PAR_DEBUG+x} ]; then - VIASH_PAR_DEBUG="false" -fi -if [ -z ${VIASH_PAR_CUDA+x} ]; then - VIASH_PAR_CUDA="false" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_EXPECTED_CELLS_FROM_QC" ]]; then - if ! [[ "$VIASH_PAR_EXPECTED_CELLS_FROM_QC" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--expected_cells_from_qc' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_EXPECTED_CELLS" ]]; then - if ! [[ "$VIASH_PAR_EXPECTED_CELLS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--expected_cells' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_TOTAL_DROPLETS_INCLUDED" ]]; then - if ! [[ "$VIASH_PAR_TOTAL_DROPLETS_INCLUDED" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--total_droplets_included' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_FORCE_CELL_UMI_PRIOR" ]]; then - if ! [[ "$VIASH_PAR_FORCE_CELL_UMI_PRIOR" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--force_cell_umi_prior' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_FORCE_EMPTY_UMI_PRIOR" ]]; then - if ! [[ "$VIASH_PAR_FORCE_EMPTY_UMI_PRIOR" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--force_empty_umi_prior' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_EPOCHS" ]]; then - if ! [[ "$VIASH_PAR_EPOCHS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--epochs' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_LOW_COUNT_THRESHOLD" ]]; then - if ! [[ "$VIASH_PAR_LOW_COUNT_THRESHOLD" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--low_count_threshold' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_Z_DIM" ]]; then - if ! [[ "$VIASH_PAR_Z_DIM" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--z_dim' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [ -n "$VIASH_PAR_Z_LAYERS" ]; then - IFS=':' - set -f - for val in $VIASH_PAR_Z_LAYERS; do - if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--z_layers' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi - done - set +f - unset IFS -fi - -if [[ -n "$VIASH_PAR_TRAINING_FRACTION" ]]; then - if ! [[ "$VIASH_PAR_TRAINING_FRACTION" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--training_fraction' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION" ]]; then - if ! [[ "$VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--empty_drop_training_fraction' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [ -n "$VIASH_PAR_IGNORE_FEATURES" ]; then - IFS=':' - set -f - for val in $VIASH_PAR_IGNORE_FEATURES; do - if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--ignore_features' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi - done - set +f - unset IFS -fi - -if [ -n "$VIASH_PAR_FPR" ]; then - IFS=':' - set -f - for val in $VIASH_PAR_FPR; do - if ! [[ "${val}" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--fpr' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi - done - set +f - unset IFS -fi - -if [[ -n "$VIASH_PAR_PROJECTED_AMBIENT_COUNT_THRESHOLD" ]]; then - if ! [[ "$VIASH_PAR_PROJECTED_AMBIENT_COUNT_THRESHOLD" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--projected_ambient_count_threshold' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_LEARNING_RATE" ]]; then - if ! [[ "$VIASH_PAR_LEARNING_RATE" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--learning_rate' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_FINAL_ELBO_FAIL_FRACTION" ]]; then - if ! [[ "$VIASH_PAR_FINAL_ELBO_FAIL_FRACTION" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--final_elbo_fail_fraction' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_EPOCH_ELBO_FAIL_FRACTION" ]]; then - if ! [[ "$VIASH_PAR_EPOCH_ELBO_FAIL_FRACTION" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--epoch_elbo_fail_fraction' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_NUM_TRAINING_TRIES" ]]; then - if ! [[ "$VIASH_PAR_NUM_TRAINING_TRIES" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--num_training_tries' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_LEARNING_RATE_RETRY_MULT" ]]; then - if ! [[ "$VIASH_PAR_LEARNING_RATE_RETRY_MULT" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--learning_rate_retry_mult' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_POSTERIOR_BATCH_SIZE" ]]; then - if ! [[ "$VIASH_PAR_POSTERIOR_BATCH_SIZE" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--posterior_batch_size' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_ALPHA" ]]; then - if ! [[ "$VIASH_PAR_ALPHA" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--alpha' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_Q" ]]; then - if ! [[ "$VIASH_PAR_Q" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--q' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_ESTIMATOR_MULTIPLE_CPU" ]]; then - if ! [[ "$VIASH_PAR_ESTIMATOR_MULTIPLE_CPU" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--estimator_multiple_cpu' has to be a boolean_true. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_CONSTANT_LEARNING_RATE" ]]; then - if ! [[ "$VIASH_PAR_CONSTANT_LEARNING_RATE" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--constant_learning_rate' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_DEBUG" ]]; then - if ! [[ "$VIASH_PAR_DEBUG" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--debug' has to be a boolean_true. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_CUDA" ]]; then - if ! [[ "$VIASH_PAR_CUDA" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--cuda' has to be a boolean_true. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then - VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then - ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -if [ ! -z "$VIASH_PAR_MODEL" ]; then - VIASH_PAR_MODEL_CHOICES=("naive:simple:ambient:swapping:full") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_MODEL_CHOICES[*]}:" =~ ":$VIASH_PAR_MODEL:" ]]; then - ViashError '--model' specified value of \'$VIASH_PAR_MODEL\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -if [ ! -z "$VIASH_PAR_POSTERIOR_REGULATION" ]; then - VIASH_PAR_POSTERIOR_REGULATION_CHOICES=("PRq:PRmu:PRmu_gene") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_POSTERIOR_REGULATION_CHOICES[*]}:" =~ ":$VIASH_PAR_POSTERIOR_REGULATION:" ]]; then - ViashError '--posterior_regulation' specified value of \'$VIASH_PAR_POSTERIOR_REGULATION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -if [ ! -z "$VIASH_PAR_ESTIMATOR" ]; then - VIASH_PAR_ESTIMATOR_CHOICES=("map:mean:cdf:sample:mckp") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_ESTIMATOR_CHOICES[*]}:" =~ ":$VIASH_PAR_ESTIMATOR:" ]]; then - ViashError '--estimator' specified value of \'$VIASH_PAR_ESTIMATOR\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/correction_cellbender_remove_background:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/correction_cellbender_remove_background:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/correction_cellbender_remove_background:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-cellbender_remove_background-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -import mudata as mu -import tempfile -import subprocess -import os -import sys -import numpy as np -from scipy.sparse import csr_matrix -from cellbender.remove_background.downstream import anndata_from_h5 -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'layer_output': $( if [ ! -z ${VIASH_PAR_LAYER_OUTPUT+x} ]; then echo "r'${VIASH_PAR_LAYER_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'obs_background_fraction': $( if [ ! -z ${VIASH_PAR_OBS_BACKGROUND_FRACTION+x} ]; then echo "r'${VIASH_PAR_OBS_BACKGROUND_FRACTION//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'obs_cell_probability': $( if [ ! -z ${VIASH_PAR_OBS_CELL_PROBABILITY+x} ]; then echo "r'${VIASH_PAR_OBS_CELL_PROBABILITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'obs_cell_size': $( if [ ! -z ${VIASH_PAR_OBS_CELL_SIZE+x} ]; then echo "r'${VIASH_PAR_OBS_CELL_SIZE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'obs_droplet_efficiency': $( if [ ! -z ${VIASH_PAR_OBS_DROPLET_EFFICIENCY+x} ]; then echo "r'${VIASH_PAR_OBS_DROPLET_EFFICIENCY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'obs_latent_scale': $( if [ ! -z ${VIASH_PAR_OBS_LATENT_SCALE+x} ]; then echo "r'${VIASH_PAR_OBS_LATENT_SCALE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'var_ambient_expression': $( if [ ! -z ${VIASH_PAR_VAR_AMBIENT_EXPRESSION+x} ]; then echo "r'${VIASH_PAR_VAR_AMBIENT_EXPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'obsm_gene_expression_encoding': $( if [ ! -z ${VIASH_PAR_OBSM_GENE_EXPRESSION_ENCODING+x} ]; then echo "r'${VIASH_PAR_OBSM_GENE_EXPRESSION_ENCODING//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'expected_cells_from_qc': $( if [ ! -z ${VIASH_PAR_EXPECTED_CELLS_FROM_QC+x} ]; then echo "r'${VIASH_PAR_EXPECTED_CELLS_FROM_QC//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), - 'expected_cells': $( if [ ! -z ${VIASH_PAR_EXPECTED_CELLS+x} ]; then echo "int(r'${VIASH_PAR_EXPECTED_CELLS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'total_droplets_included': $( if [ ! -z ${VIASH_PAR_TOTAL_DROPLETS_INCLUDED+x} ]; then echo "int(r'${VIASH_PAR_TOTAL_DROPLETS_INCLUDED//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'force_cell_umi_prior': $( if [ ! -z ${VIASH_PAR_FORCE_CELL_UMI_PRIOR+x} ]; then echo "int(r'${VIASH_PAR_FORCE_CELL_UMI_PRIOR//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'force_empty_umi_prior': $( if [ ! -z ${VIASH_PAR_FORCE_EMPTY_UMI_PRIOR+x} ]; then echo "int(r'${VIASH_PAR_FORCE_EMPTY_UMI_PRIOR//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'model': $( if [ ! -z ${VIASH_PAR_MODEL+x} ]; then echo "r'${VIASH_PAR_MODEL//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'epochs': $( if [ ! -z ${VIASH_PAR_EPOCHS+x} ]; then echo "int(r'${VIASH_PAR_EPOCHS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'low_count_threshold': $( if [ ! -z ${VIASH_PAR_LOW_COUNT_THRESHOLD+x} ]; then echo "int(r'${VIASH_PAR_LOW_COUNT_THRESHOLD//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'z_dim': $( if [ ! -z ${VIASH_PAR_Z_DIM+x} ]; then echo "int(r'${VIASH_PAR_Z_DIM//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'z_layers': $( if [ ! -z ${VIASH_PAR_Z_LAYERS+x} ]; then echo "list(map(int, r'${VIASH_PAR_Z_LAYERS//\'/\'\"\'\"r\'}'.split(':')))"; else echo None; fi ), - 'training_fraction': $( if [ ! -z ${VIASH_PAR_TRAINING_FRACTION+x} ]; then echo "float(r'${VIASH_PAR_TRAINING_FRACTION//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'empty_drop_training_fraction': $( if [ ! -z ${VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION+x} ]; then echo "float(r'${VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'ignore_features': $( if [ ! -z ${VIASH_PAR_IGNORE_FEATURES+x} ]; then echo "list(map(int, r'${VIASH_PAR_IGNORE_FEATURES//\'/\'\"\'\"r\'}'.split(':')))"; else echo None; fi ), - 'fpr': $( if [ ! -z ${VIASH_PAR_FPR+x} ]; then echo "list(map(float, r'${VIASH_PAR_FPR//\'/\'\"\'\"r\'}'.split(':')))"; else echo None; fi ), - 'exclude_feature_types': $( if [ ! -z ${VIASH_PAR_EXCLUDE_FEATURE_TYPES+x} ]; then echo "r'${VIASH_PAR_EXCLUDE_FEATURE_TYPES//\'/\'\"\'\"r\'}'.split(':')"; else echo None; fi ), - 'projected_ambient_count_threshold': $( if [ ! -z ${VIASH_PAR_PROJECTED_AMBIENT_COUNT_THRESHOLD+x} ]; then echo "float(r'${VIASH_PAR_PROJECTED_AMBIENT_COUNT_THRESHOLD//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'learning_rate': $( if [ ! -z ${VIASH_PAR_LEARNING_RATE+x} ]; then echo "float(r'${VIASH_PAR_LEARNING_RATE//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'final_elbo_fail_fraction': $( if [ ! -z ${VIASH_PAR_FINAL_ELBO_FAIL_FRACTION+x} ]; then echo "float(r'${VIASH_PAR_FINAL_ELBO_FAIL_FRACTION//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'epoch_elbo_fail_fraction': $( if [ ! -z ${VIASH_PAR_EPOCH_ELBO_FAIL_FRACTION+x} ]; then echo "float(r'${VIASH_PAR_EPOCH_ELBO_FAIL_FRACTION//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'num_training_tries': $( if [ ! -z ${VIASH_PAR_NUM_TRAINING_TRIES+x} ]; then echo "int(r'${VIASH_PAR_NUM_TRAINING_TRIES//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'learning_rate_retry_mult': $( if [ ! -z ${VIASH_PAR_LEARNING_RATE_RETRY_MULT+x} ]; then echo "float(r'${VIASH_PAR_LEARNING_RATE_RETRY_MULT//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'posterior_batch_size': $( if [ ! -z ${VIASH_PAR_POSTERIOR_BATCH_SIZE+x} ]; then echo "int(r'${VIASH_PAR_POSTERIOR_BATCH_SIZE//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'posterior_regulation': $( if [ ! -z ${VIASH_PAR_POSTERIOR_REGULATION+x} ]; then echo "r'${VIASH_PAR_POSTERIOR_REGULATION//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'alpha': $( if [ ! -z ${VIASH_PAR_ALPHA+x} ]; then echo "float(r'${VIASH_PAR_ALPHA//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'q': $( if [ ! -z ${VIASH_PAR_Q+x} ]; then echo "float(r'${VIASH_PAR_Q//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'estimator': $( if [ ! -z ${VIASH_PAR_ESTIMATOR+x} ]; then echo "r'${VIASH_PAR_ESTIMATOR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'estimator_multiple_cpu': $( if [ ! -z ${VIASH_PAR_ESTIMATOR_MULTIPLE_CPU+x} ]; then echo "r'${VIASH_PAR_ESTIMATOR_MULTIPLE_CPU//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), - 'constant_learning_rate': $( if [ ! -z ${VIASH_PAR_CONSTANT_LEARNING_RATE+x} ]; then echo "r'${VIASH_PAR_CONSTANT_LEARNING_RATE//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), - 'debug': $( if [ ! -z ${VIASH_PAR_DEBUG+x} ]; then echo "r'${VIASH_PAR_DEBUG//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), - 'cuda': $( if [ ! -z ${VIASH_PAR_CUDA+x} ]; then echo "r'${VIASH_PAR_CUDA//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -## VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - - -logger.info("Reading input mudata") -mdata = mu.read_h5mu(par["input"]) - -mod = par["modality"] -logger.info("Performing log transformation on modality %s", mod) -data = mdata.mod[mod] - -# import pathlib -# with pathlib.Path(os.path.dirname(par["output"])) / "cellbender" as temp_dir: -# os.mkdir(temp_dir) -with tempfile.TemporaryDirectory(prefix="cellbender-", dir=meta["temp_dir"]) as temp_dir: - # construct paths within tempdir - input_file = os.path.join(temp_dir, "input.h5ad") - output_file = os.path.join(temp_dir, "output.h5") - - logger.info("Creating AnnData input file for CellBender: '%s'", input_file) - data.write_h5ad(input_file) - - logger.info("Constructing CellBender command") - cmd_pars = [ - "cellbender", "remove-background", - "--input", input_file, - "--output", output_file, - # don't create checkpoints because they're not used / returned anyways - "--checkpoint-mins", "99999999" - ] - - if meta.get("cpus") is not None: - cmd_pars += ["--cpu-threads", str(meta["cpus"])] - - extra_args = [ - ("--expected-cells", "expected_cells", True), - ("--total-droplets-included", "total_droplets_included", True), - ("--force-cell-umi-prior", "force_cell_umi_prior", True), - ("--force-empty-umi-prior", "force_empty_umi_prior", True), - ("--model", "model", True), - ("--epochs", "epochs", True), - ("--low-count-threshold", "low_count_threshold", True), - ("--z-dim", "z_dim", True), - ("--z-layers", "z_layers", True), - ("--training-fraction", "training_fraction", True), - ("--empty-drop-training-fraction", "empty_drop_training_fraction", True), - ("--ignore-features", "ignore_features", True), - ("--fpr", "fpr", True), - ("--exclude-feature-types", "exclude_feature_types", True), - ("--projected-ambient-count-threshold", "projected_ambient_count_threshold", True), - ("--learning-rate", "learning_rate", True), - ("--final-elbo-fail-fraction", "final_elbo_fail_fraction", True), - ("--epoch-elbo-fail-fraction", "epoch_elbo_fail_fraction", True), - ("--num-training-tries", "num_training_tries", True), - ("--learning-rate-retry-mult", "learning_rate_retry_mult", True), - ("--posterior-batch-size", "posterior_batch_size", True), - ("--posterior-regulation", "posterior_regulation", True), - ("--alpha", "alpha", True), - ("--q", "q", True), - ("--estimator", "estimator", True), - ("--estimator-multiple-cpu", "estimator_multiple_cpu", False), - ("--constant-learning-rate", "constant_learning_rate", False), - ("--debug", "debug", False), - ("--cuda", "cuda", False), - ] - for (flag, name, is_kwarg) in extra_args: - if par[name]: - values = par[name] if isinstance(par[name], list) else [par[name]] - cmd_pars += [flag] + [str(val) for val in values] if is_kwarg else [flag] - - if par["expected_cells_from_qc"] and "metrics_cellranger" in data.uns: - assert par["expected_cells"] is None, "If min_counts is defined, expected_cells should be undefined" - assert par["total_droplets_included"] is None, "If min_counts is defined, expected_cells should be undefined" - met = data.uns["metrics_cellranger"] - col_name = "Estimated Number of Cells" - assert col_name in met.columns, "%s should be a column in .obs[metrics_cellranger]" - est_cells = met[col_name].values[0] - logger.info("Selecting --expected-cells %d and --total-droplets-included %d", est_cells, est_cells * 5) - cmd_pars += ["--expected-cells", str(est_cells), "--total-droplets-included", str(5*est_cells)] - - logger.info("Running CellBender: '%s'", ' '.join(cmd_pars)) - out = subprocess.check_output(cmd_pars).decode("utf-8") - - logger.info("Reading CellBender 10xh5 output file: '%s'", output_file) - adata_out = anndata_from_h5(output_file, analyzed_barcodes_only=False) - - logger.info("CellBender output format:", adata_out) - - # AnnData object with n_obs x n_vars = 6794880 x 33538 - # obs: 'cellbender_analyzed' - # var: 'ambient_expression', 'feature_type', 'genome', 'gene_id', 'cellbender_analyzed' - # uns: 'background_fraction', 'barcode_indices_for_latents', 'cell_probability', 'cell_size', 'droplet_efficiency', 'gene_expression_encoding', - # 'cell_size_lognormal_std', 'empty_droplet_size_lognormal_loc', 'empty_droplet_size_lognormal_scale', 'swapping_fraction_dist_params', - # 'barcodes_analyzed', 'barcodes_analyzed_inds', 'estimator', 'features_analyzed_inds', 'fraction_data_used_for_testing', 'learning_curve_learning_rate_epoch', - # 'learning_curve_learning_rate_value', 'learning_curve_test_elbo', 'learning_curve_test_epoch', 'learning_curve_train_elbo', 'learning_curve_train_epoch', - # 'target_false_positive_rate' - - logger.info("Copying X output to MuData") - data.layers[par["layer_output"]] = adata_out.X - - logger.info("Copying .obs output to MuData") - obs_store = { - "obs_background_fraction": "background_fraction", - "obs_cell_probability": "cell_probability", - "obs_cell_size": "cell_size", - "obs_droplet_efficiency": "droplet_efficiency", - "obs_latent_scale": "latent_scale" - } - for to_name, from_name in obs_store.items(): - if par[to_name]: - if from_name in adata_out.obs: - data.obs[par[to_name]] = adata_out.obs[from_name] - # when using unfiltered data, the values will be in uns instead of obs - elif from_name in adata_out.uns and "barcode_indices_for_latents" in adata_out.uns: - vec = np.zeros(data.n_obs) - vec[adata_out.uns["barcode_indices_for_latents"]] = adata_out.uns[from_name] - data.obs[par[to_name]] = vec - - logger.info("Copying .var output to MuData") - var_store = { "var_ambient_expression": "ambient_expression" } - for to_name, from_name in var_store.items(): - if par[to_name]: - data.var[par[to_name]] = adata_out.var[from_name] - - logger.info("Copying obsm_gene_expression_encoding output to MuData") - obsm_store = { "obsm_gene_expression_encoding": "gene_expression_encoding" } - for to_name, from_name in obsm_store.items(): - if par[to_name]: - if from_name in adata_out.obsm: - data.obsm[par[to_name]] = adata_out.obsm[from_name] - elif from_name in adata_out.uns and "barcode_indices_for_latents" in adata_out.uns: - matrix_to_store = adata_out.uns[from_name] - number_of_obs = data.X.shape[0] - latent_space_sparse = csr_matrix((number_of_obs, par["z_dim"]), - dtype=adata_out.uns[from_name].dtype) - obs_rows_in_space_representation = adata_out.uns["barcode_indices_for_latents"] - latent_space_sparse[obs_rows_in_space_representation] = adata_out.uns[from_name] - data.obsm[par[to_name]] = latent_space_sparse - else: - raise RuntimeError("Requested to save latent gene encoding, but the data is either missing " - "from cellbender output or in an incorrect format.") - - -logger.info("Writing to file %s", par["output"]) -mdata.write_h5mu(filename=par["output"], compression=par["output_compression"]) -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/correction/cellbender_remove_background/setup_logger.py b/target/docker/correction/cellbender_remove_background/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/docker/correction/cellbender_remove_background/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/docker/correction/cellbender_remove_background_v0_2/.config.vsh.yaml b/target/docker/correction/cellbender_remove_background_v0_2/.config.vsh.yaml deleted file mode 100644 index f797ad4313d..00000000000 --- a/target/docker/correction/cellbender_remove_background_v0_2/.config.vsh.yaml +++ /dev/null @@ -1,406 +0,0 @@ -functionality: - name: "cellbender_remove_background_v0_2" - namespace: "correction" - version: "0.12.3" - argument_groups: - - name: "Inputs" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input h5mu file." - info: null - example: - - "input.h5mu" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - description: "List of modalities to process." - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Outputs" - arguments: - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Full count matrix as an h5mu file, with background RNA removed.\ - \ This file contains all the original droplet barcodes." - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--layer_output" - description: "Output layer" - info: null - default: - - "corrected" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_latent_rt_efficiency" - info: null - default: - - "latent_rt_efficiency" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_latent_cell_probability" - info: null - default: - - "latent_cell_probability" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_latent_scale" - info: null - default: - - "latent_scale" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--var_ambient_expression" - info: null - default: - - "ambient_expression" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obsm_latent_gene_encoding" - info: null - default: - - "cellbender_latent_gene_encoding" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Arguments" - arguments: - - type: "integer" - name: "--expected_cells" - description: "Number of cells expected in the dataset (a rough estimate within\ - \ a factor of 2 is sufficient)." - info: null - example: - - 1000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--total_droplets_included" - description: "The number of droplets from the rank-ordered UMI plot\nthat will\ - \ be analyzed. The largest 'total_droplets'\ndroplets will have their cell\ - \ probabilities inferred\nas an output.\n" - info: null - example: - - 25000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean" - name: "--expected_cells_from_qc" - description: "Will use the Cell Ranger QC to determine the estimated number\ - \ of cells" - info: null - default: - - true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--model" - description: "Which model is being used for count data. 'simple'\ndoes not model\ - \ either ambient RNA or random barcode\nswapping (for debugging purposes --\ - \ not recommended).\n'ambient' assumes background RNA is incorporated into\n\ - droplets. 'swapping' assumes background RNA comes from\nrandom barcode swapping.\ - \ 'full' uses a combined\nambient and swapping model.\n" - info: null - default: - - "full" - required: false - choices: - - "simple" - - "ambient" - - "swapping" - - "full" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--epochs" - description: "Number of epochs to train." - info: null - default: - - 150 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--low_count_threshold" - description: "Droplets with UMI counts below this number are completely \nexcluded\ - \ from the analysis. This can help identify the correct \nprior for empty\ - \ droplet counts in the rare case where empty \ncounts are extremely high\ - \ (over 200).\n" - info: null - default: - - 15 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--z_dim" - description: "Dimension of latent variable z.\n" - info: null - default: - - 100 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--z_layers" - description: "Dimension of hidden layers in the encoder for z.\n" - info: null - default: - - 500 - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--training_fraction" - description: "Training detail: the fraction of the data used for training.\n\ - The rest is never seen by the inference algorithm. Speeds up learning.\n" - info: null - default: - - 0.9 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--empty_drop_training_fraction" - description: "Training detail: the fraction of the training data each epoch\ - \ that \nis drawn (randomly sampled) from surely empty droplets.\n" - info: null - default: - - 0.5 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--fpr" - description: "Target false positive rate in (0, 1). A false positive\nis a true\ - \ signal count that is erroneously removed.\nMore background removal is accompanied\ - \ by more signal\nremoval at high values of FPR. You can specify\nmultiple\ - \ values, which will create multiple output\nfiles.\n" - info: null - default: - - 0.01 - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--exclude_antibody_capture" - description: "Including the flag --exclude-antibody-capture will\ncause remove-background\ - \ to operate on gene counts\nonly, ignoring other features.\n" - info: null - direction: "input" - dest: "par" - - type: "double" - name: "--learning_rate" - description: "Training detail: lower learning rate for inference. A\nOneCycle\ - \ learning rate schedule is used, where the\nupper learning rate is ten times\ - \ this value. (For this\nvalue, probably do not exceed 1e-3).\n" - info: null - example: - - 1.0E-4 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--cuda" - description: "Including the flag --cuda will run the inference on a\nGPU.\n" - info: null - direction: "input" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "helper.py" - - type: "file" - path: "src/utils/setup_logger.py" - description: "Eliminating technical artifacts from high-throughput single-cell RNA\ - \ sequencing data.\n\nThis module removes counts due to ambient RNA molecules\ - \ and random barcode swapping from (raw) UMI-based scRNA-seq count matrices. \n\ - At the moment, only the count matrices produced by the CellRanger count pipeline\ - \ is supported. Support for additional tools and protocols \nwill be added in\ - \ the future. A quick start tutorial can be found here.\n\nFleming et al. 2022,\ - \ bioRxiv.\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_raw_feature_bc_matrix.h5" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "nvcr.io/nvidia/pytorch:22.12-py3" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "muon==0.1.5" - - "tables==3.8.0" - - "cellbender==0.2.1" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "muon~=0.1.4" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "gpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/correction/cellbender_remove_background_v0_2/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/correction/cellbender_remove_background_v0_2" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/correction/cellbender_remove_background_v0_2/cellbender_remove_background_v0_2" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/correction/cellbender_remove_background_v0_2/cellbender_remove_background_v0_2 b/target/docker/correction/cellbender_remove_background_v0_2/cellbender_remove_background_v0_2 deleted file mode 100755 index 3d153df830e..00000000000 --- a/target/docker/correction/cellbender_remove_background_v0_2/cellbender_remove_background_v0_2 +++ /dev/null @@ -1,1629 +0,0 @@ -#!/usr/bin/env bash - -# cellbender_remove_background_v0_2 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="cellbender_remove_background_v0_2" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "cellbender_remove_background_v0_2 0.12.3" - echo "" - echo "Eliminating technical artifacts from high-throughput single-cell RNA sequencing" - echo "data." - echo "" - echo "This module removes counts due to ambient RNA molecules and random barcode" - echo "swapping from (raw) UMI-based scRNA-seq count matrices." - echo "At the moment, only the count matrices produced by the CellRanger count pipeline" - echo "is supported. Support for additional tools and protocols" - echo "will be added in the future. A quick start tutorial can be found here." - echo "" - echo "Fleming et al. 2022, bioRxiv." - echo "" - echo "Inputs:" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " example: input.h5mu" - echo " Input h5mu file." - echo "" - echo " --modality" - echo " type: string" - echo " default: rna" - echo " List of modalities to process." - echo "" - echo "Outputs:" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " example: output.h5mu" - echo " Full count matrix as an h5mu file, with background RNA removed. This" - echo " file contains all the original droplet barcodes." - echo "" - echo " --output_compression" - echo " type: string" - echo " example: gzip" - echo " choices: [ gzip, lzf ]" - echo "" - echo " --layer_output" - echo " type: string" - echo " default: corrected" - echo " Output layer" - echo "" - echo " --obs_latent_rt_efficiency" - echo " type: string" - echo " default: latent_rt_efficiency" - echo "" - echo " --obs_latent_cell_probability" - echo " type: string" - echo " default: latent_cell_probability" - echo "" - echo " --obs_latent_scale" - echo " type: string" - echo " default: latent_scale" - echo "" - echo " --var_ambient_expression" - echo " type: string" - echo " default: ambient_expression" - echo "" - echo " --obsm_latent_gene_encoding" - echo " type: string" - echo " default: cellbender_latent_gene_encoding" - echo "" - echo "Arguments:" - echo " --expected_cells" - echo " type: integer" - echo " example: 1000" - echo " Number of cells expected in the dataset (a rough estimate within a" - echo " factor of 2 is sufficient)." - echo "" - echo " --total_droplets_included" - echo " type: integer" - echo " example: 25000" - echo " The number of droplets from the rank-ordered UMI plot" - echo " that will be analyzed. The largest 'total_droplets'" - echo " droplets will have their cell probabilities inferred" - echo " as an output." - echo "" - echo " --expected_cells_from_qc" - echo " type: boolean" - echo " default: true" - echo " Will use the Cell Ranger QC to determine the estimated number of cells" - echo "" - echo " --model" - echo " type: string" - echo " default: full" - echo " choices: [ simple, ambient, swapping, full ]" - echo " Which model is being used for count data. 'simple'" - echo " does not model either ambient RNA or random barcode" - echo " swapping (for debugging purposes -- not recommended)." - echo " 'ambient' assumes background RNA is incorporated into" - echo " droplets. 'swapping' assumes background RNA comes from" - echo " random barcode swapping. 'full' uses a combined" - echo " ambient and swapping model." - echo "" - echo " --epochs" - echo " type: integer" - echo " default: 150" - echo " Number of epochs to train." - echo "" - echo " --low_count_threshold" - echo " type: integer" - echo " default: 15" - echo " Droplets with UMI counts below this number are completely" - echo " excluded from the analysis. This can help identify the correct" - echo " prior for empty droplet counts in the rare case where empty" - echo " counts are extremely high (over 200)." - echo "" - echo " --z_dim" - echo " type: integer" - echo " default: 100" - echo " Dimension of latent variable z." - echo "" - echo " --z_layers" - echo " type: integer, multiple values allowed" - echo " default: 500" - echo " Dimension of hidden layers in the encoder for z." - echo "" - echo " --training_fraction" - echo " type: double" - echo " default: 0.9" - echo " Training detail: the fraction of the data used for training." - echo " The rest is never seen by the inference algorithm. Speeds up learning." - echo "" - echo " --empty_drop_training_fraction" - echo " type: double" - echo " default: 0.5" - echo " Training detail: the fraction of the training data each epoch that" - echo " is drawn (randomly sampled) from surely empty droplets." - echo "" - echo " --fpr" - echo " type: double, multiple values allowed" - echo " default: 0.01" - echo " Target false positive rate in (0, 1). A false positive" - echo " is a true signal count that is erroneously removed." - echo " More background removal is accompanied by more signal" - echo " removal at high values of FPR. You can specify" - echo " multiple values, which will create multiple output" - echo " files." - echo "" - echo " --exclude_antibody_capture" - echo " type: boolean_true" - echo " Including the flag --exclude-antibody-capture will" - echo " cause remove-background to operate on gene counts" - echo " only, ignoring other features." - echo "" - echo " --learning_rate" - echo " type: double" - echo " example: 1.0E-4" - echo " Training detail: lower learning rate for inference. A" - echo " OneCycle learning rate schedule is used, where the" - echo " upper learning rate is ten times this value. (For this" - echo " value, probably do not exceed 1e-3)." - echo "" - echo " --cuda" - echo " type: boolean_true" - echo " Including the flag --cuda will run the inference on a" - echo " GPU." -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM nvcr.io/nvidia/pytorch:22.12-py3 - -ENTRYPOINT [] - - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "muon==0.1.5" "tables==3.8.0" "cellbender==0.2.1" - -LABEL org.opencontainers.image.description="Companion container for running component correction cellbender_remove_background_v0_2" -LABEL org.opencontainers.image.created="2024-01-25T10:13:58Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-cellbender_remove_background_v0_2-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "cellbender_remove_background_v0_2 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -i) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality=*) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression=*) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --layer_output) - [ -n "$VIASH_PAR_LAYER_OUTPUT" ] && ViashError Bad arguments for option \'--layer_output\': \'$VIASH_PAR_LAYER_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LAYER_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --layer_output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --layer_output=*) - [ -n "$VIASH_PAR_LAYER_OUTPUT" ] && ViashError Bad arguments for option \'--layer_output=*\': \'$VIASH_PAR_LAYER_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LAYER_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --obs_latent_rt_efficiency) - [ -n "$VIASH_PAR_OBS_LATENT_RT_EFFICIENCY" ] && ViashError Bad arguments for option \'--obs_latent_rt_efficiency\': \'$VIASH_PAR_OBS_LATENT_RT_EFFICIENCY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBS_LATENT_RT_EFFICIENCY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_latent_rt_efficiency. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obs_latent_rt_efficiency=*) - [ -n "$VIASH_PAR_OBS_LATENT_RT_EFFICIENCY" ] && ViashError Bad arguments for option \'--obs_latent_rt_efficiency=*\': \'$VIASH_PAR_OBS_LATENT_RT_EFFICIENCY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBS_LATENT_RT_EFFICIENCY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --obs_latent_cell_probability) - [ -n "$VIASH_PAR_OBS_LATENT_CELL_PROBABILITY" ] && ViashError Bad arguments for option \'--obs_latent_cell_probability\': \'$VIASH_PAR_OBS_LATENT_CELL_PROBABILITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBS_LATENT_CELL_PROBABILITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_latent_cell_probability. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obs_latent_cell_probability=*) - [ -n "$VIASH_PAR_OBS_LATENT_CELL_PROBABILITY" ] && ViashError Bad arguments for option \'--obs_latent_cell_probability=*\': \'$VIASH_PAR_OBS_LATENT_CELL_PROBABILITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBS_LATENT_CELL_PROBABILITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --obs_latent_scale) - [ -n "$VIASH_PAR_OBS_LATENT_SCALE" ] && ViashError Bad arguments for option \'--obs_latent_scale\': \'$VIASH_PAR_OBS_LATENT_SCALE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBS_LATENT_SCALE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_latent_scale. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obs_latent_scale=*) - [ -n "$VIASH_PAR_OBS_LATENT_SCALE" ] && ViashError Bad arguments for option \'--obs_latent_scale=*\': \'$VIASH_PAR_OBS_LATENT_SCALE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBS_LATENT_SCALE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --var_ambient_expression) - [ -n "$VIASH_PAR_VAR_AMBIENT_EXPRESSION" ] && ViashError Bad arguments for option \'--var_ambient_expression\': \'$VIASH_PAR_VAR_AMBIENT_EXPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_VAR_AMBIENT_EXPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --var_ambient_expression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --var_ambient_expression=*) - [ -n "$VIASH_PAR_VAR_AMBIENT_EXPRESSION" ] && ViashError Bad arguments for option \'--var_ambient_expression=*\': \'$VIASH_PAR_VAR_AMBIENT_EXPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_VAR_AMBIENT_EXPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --obsm_latent_gene_encoding) - [ -n "$VIASH_PAR_OBSM_LATENT_GENE_ENCODING" ] && ViashError Bad arguments for option \'--obsm_latent_gene_encoding\': \'$VIASH_PAR_OBSM_LATENT_GENE_ENCODING\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBSM_LATENT_GENE_ENCODING="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obsm_latent_gene_encoding. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obsm_latent_gene_encoding=*) - [ -n "$VIASH_PAR_OBSM_LATENT_GENE_ENCODING" ] && ViashError Bad arguments for option \'--obsm_latent_gene_encoding=*\': \'$VIASH_PAR_OBSM_LATENT_GENE_ENCODING\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBSM_LATENT_GENE_ENCODING=$(ViashRemoveFlags "$1") - shift 1 - ;; - --expected_cells) - [ -n "$VIASH_PAR_EXPECTED_CELLS" ] && ViashError Bad arguments for option \'--expected_cells\': \'$VIASH_PAR_EXPECTED_CELLS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EXPECTED_CELLS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --expected_cells. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --expected_cells=*) - [ -n "$VIASH_PAR_EXPECTED_CELLS" ] && ViashError Bad arguments for option \'--expected_cells=*\': \'$VIASH_PAR_EXPECTED_CELLS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EXPECTED_CELLS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --total_droplets_included) - [ -n "$VIASH_PAR_TOTAL_DROPLETS_INCLUDED" ] && ViashError Bad arguments for option \'--total_droplets_included\': \'$VIASH_PAR_TOTAL_DROPLETS_INCLUDED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TOTAL_DROPLETS_INCLUDED="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --total_droplets_included. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --total_droplets_included=*) - [ -n "$VIASH_PAR_TOTAL_DROPLETS_INCLUDED" ] && ViashError Bad arguments for option \'--total_droplets_included=*\': \'$VIASH_PAR_TOTAL_DROPLETS_INCLUDED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TOTAL_DROPLETS_INCLUDED=$(ViashRemoveFlags "$1") - shift 1 - ;; - --expected_cells_from_qc) - [ -n "$VIASH_PAR_EXPECTED_CELLS_FROM_QC" ] && ViashError Bad arguments for option \'--expected_cells_from_qc\': \'$VIASH_PAR_EXPECTED_CELLS_FROM_QC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EXPECTED_CELLS_FROM_QC="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --expected_cells_from_qc. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --expected_cells_from_qc=*) - [ -n "$VIASH_PAR_EXPECTED_CELLS_FROM_QC" ] && ViashError Bad arguments for option \'--expected_cells_from_qc=*\': \'$VIASH_PAR_EXPECTED_CELLS_FROM_QC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EXPECTED_CELLS_FROM_QC=$(ViashRemoveFlags "$1") - shift 1 - ;; - --model) - [ -n "$VIASH_PAR_MODEL" ] && ViashError Bad arguments for option \'--model\': \'$VIASH_PAR_MODEL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODEL="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --model. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --model=*) - [ -n "$VIASH_PAR_MODEL" ] && ViashError Bad arguments for option \'--model=*\': \'$VIASH_PAR_MODEL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODEL=$(ViashRemoveFlags "$1") - shift 1 - ;; - --epochs) - [ -n "$VIASH_PAR_EPOCHS" ] && ViashError Bad arguments for option \'--epochs\': \'$VIASH_PAR_EPOCHS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EPOCHS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --epochs. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --epochs=*) - [ -n "$VIASH_PAR_EPOCHS" ] && ViashError Bad arguments for option \'--epochs=*\': \'$VIASH_PAR_EPOCHS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EPOCHS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --low_count_threshold) - [ -n "$VIASH_PAR_LOW_COUNT_THRESHOLD" ] && ViashError Bad arguments for option \'--low_count_threshold\': \'$VIASH_PAR_LOW_COUNT_THRESHOLD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LOW_COUNT_THRESHOLD="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --low_count_threshold. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --low_count_threshold=*) - [ -n "$VIASH_PAR_LOW_COUNT_THRESHOLD" ] && ViashError Bad arguments for option \'--low_count_threshold=*\': \'$VIASH_PAR_LOW_COUNT_THRESHOLD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LOW_COUNT_THRESHOLD=$(ViashRemoveFlags "$1") - shift 1 - ;; - --z_dim) - [ -n "$VIASH_PAR_Z_DIM" ] && ViashError Bad arguments for option \'--z_dim\': \'$VIASH_PAR_Z_DIM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_Z_DIM="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --z_dim. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --z_dim=*) - [ -n "$VIASH_PAR_Z_DIM" ] && ViashError Bad arguments for option \'--z_dim=*\': \'$VIASH_PAR_Z_DIM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_Z_DIM=$(ViashRemoveFlags "$1") - shift 1 - ;; - --z_layers) - if [ -z "$VIASH_PAR_Z_LAYERS" ]; then - VIASH_PAR_Z_LAYERS="$2" - else - VIASH_PAR_Z_LAYERS="$VIASH_PAR_Z_LAYERS:""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --z_layers. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --z_layers=*) - if [ -z "$VIASH_PAR_Z_LAYERS" ]; then - VIASH_PAR_Z_LAYERS=$(ViashRemoveFlags "$1") - else - VIASH_PAR_Z_LAYERS="$VIASH_PAR_Z_LAYERS:"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --training_fraction) - [ -n "$VIASH_PAR_TRAINING_FRACTION" ] && ViashError Bad arguments for option \'--training_fraction\': \'$VIASH_PAR_TRAINING_FRACTION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TRAINING_FRACTION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --training_fraction. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --training_fraction=*) - [ -n "$VIASH_PAR_TRAINING_FRACTION" ] && ViashError Bad arguments for option \'--training_fraction=*\': \'$VIASH_PAR_TRAINING_FRACTION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TRAINING_FRACTION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --empty_drop_training_fraction) - [ -n "$VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION" ] && ViashError Bad arguments for option \'--empty_drop_training_fraction\': \'$VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --empty_drop_training_fraction. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --empty_drop_training_fraction=*) - [ -n "$VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION" ] && ViashError Bad arguments for option \'--empty_drop_training_fraction=*\': \'$VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --fpr) - if [ -z "$VIASH_PAR_FPR" ]; then - VIASH_PAR_FPR="$2" - else - VIASH_PAR_FPR="$VIASH_PAR_FPR:""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --fpr. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --fpr=*) - if [ -z "$VIASH_PAR_FPR" ]; then - VIASH_PAR_FPR=$(ViashRemoveFlags "$1") - else - VIASH_PAR_FPR="$VIASH_PAR_FPR:"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --exclude_antibody_capture) - [ -n "$VIASH_PAR_EXCLUDE_ANTIBODY_CAPTURE" ] && ViashError Bad arguments for option \'--exclude_antibody_capture\': \'$VIASH_PAR_EXCLUDE_ANTIBODY_CAPTURE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EXCLUDE_ANTIBODY_CAPTURE=true - shift 1 - ;; - --learning_rate) - [ -n "$VIASH_PAR_LEARNING_RATE" ] && ViashError Bad arguments for option \'--learning_rate\': \'$VIASH_PAR_LEARNING_RATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LEARNING_RATE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --learning_rate. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --learning_rate=*) - [ -n "$VIASH_PAR_LEARNING_RATE" ] && ViashError Bad arguments for option \'--learning_rate=*\': \'$VIASH_PAR_LEARNING_RATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LEARNING_RATE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --cuda) - [ -n "$VIASH_PAR_CUDA" ] && ViashError Bad arguments for option \'--cuda\': \'$VIASH_PAR_CUDA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CUDA=true - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/correction_cellbender_remove_background_v0_2:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/correction_cellbender_remove_background_v0_2:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/correction_cellbender_remove_background_v0_2:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/correction_cellbender_remove_background_v0_2:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_MODALITY+x} ]; then - VIASH_PAR_MODALITY="rna" -fi -if [ -z ${VIASH_PAR_LAYER_OUTPUT+x} ]; then - VIASH_PAR_LAYER_OUTPUT="corrected" -fi -if [ -z ${VIASH_PAR_OBS_LATENT_RT_EFFICIENCY+x} ]; then - VIASH_PAR_OBS_LATENT_RT_EFFICIENCY="latent_rt_efficiency" -fi -if [ -z ${VIASH_PAR_OBS_LATENT_CELL_PROBABILITY+x} ]; then - VIASH_PAR_OBS_LATENT_CELL_PROBABILITY="latent_cell_probability" -fi -if [ -z ${VIASH_PAR_OBS_LATENT_SCALE+x} ]; then - VIASH_PAR_OBS_LATENT_SCALE="latent_scale" -fi -if [ -z ${VIASH_PAR_VAR_AMBIENT_EXPRESSION+x} ]; then - VIASH_PAR_VAR_AMBIENT_EXPRESSION="ambient_expression" -fi -if [ -z ${VIASH_PAR_OBSM_LATENT_GENE_ENCODING+x} ]; then - VIASH_PAR_OBSM_LATENT_GENE_ENCODING="cellbender_latent_gene_encoding" -fi -if [ -z ${VIASH_PAR_EXPECTED_CELLS_FROM_QC+x} ]; then - VIASH_PAR_EXPECTED_CELLS_FROM_QC="true" -fi -if [ -z ${VIASH_PAR_MODEL+x} ]; then - VIASH_PAR_MODEL="full" -fi -if [ -z ${VIASH_PAR_EPOCHS+x} ]; then - VIASH_PAR_EPOCHS="150" -fi -if [ -z ${VIASH_PAR_LOW_COUNT_THRESHOLD+x} ]; then - VIASH_PAR_LOW_COUNT_THRESHOLD="15" -fi -if [ -z ${VIASH_PAR_Z_DIM+x} ]; then - VIASH_PAR_Z_DIM="100" -fi -if [ -z ${VIASH_PAR_Z_LAYERS+x} ]; then - VIASH_PAR_Z_LAYERS="500" -fi -if [ -z ${VIASH_PAR_TRAINING_FRACTION+x} ]; then - VIASH_PAR_TRAINING_FRACTION="0.9" -fi -if [ -z ${VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION+x} ]; then - VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION="0.5" -fi -if [ -z ${VIASH_PAR_FPR+x} ]; then - VIASH_PAR_FPR="0.01" -fi -if [ -z ${VIASH_PAR_EXCLUDE_ANTIBODY_CAPTURE+x} ]; then - VIASH_PAR_EXCLUDE_ANTIBODY_CAPTURE="false" -fi -if [ -z ${VIASH_PAR_CUDA+x} ]; then - VIASH_PAR_CUDA="false" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_EXPECTED_CELLS" ]]; then - if ! [[ "$VIASH_PAR_EXPECTED_CELLS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--expected_cells' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_TOTAL_DROPLETS_INCLUDED" ]]; then - if ! [[ "$VIASH_PAR_TOTAL_DROPLETS_INCLUDED" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--total_droplets_included' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_EXPECTED_CELLS_FROM_QC" ]]; then - if ! [[ "$VIASH_PAR_EXPECTED_CELLS_FROM_QC" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--expected_cells_from_qc' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_EPOCHS" ]]; then - if ! [[ "$VIASH_PAR_EPOCHS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--epochs' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_LOW_COUNT_THRESHOLD" ]]; then - if ! [[ "$VIASH_PAR_LOW_COUNT_THRESHOLD" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--low_count_threshold' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_Z_DIM" ]]; then - if ! [[ "$VIASH_PAR_Z_DIM" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--z_dim' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [ -n "$VIASH_PAR_Z_LAYERS" ]; then - IFS=':' - set -f - for val in $VIASH_PAR_Z_LAYERS; do - if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--z_layers' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi - done - set +f - unset IFS -fi - -if [[ -n "$VIASH_PAR_TRAINING_FRACTION" ]]; then - if ! [[ "$VIASH_PAR_TRAINING_FRACTION" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--training_fraction' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION" ]]; then - if ! [[ "$VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--empty_drop_training_fraction' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [ -n "$VIASH_PAR_FPR" ]; then - IFS=':' - set -f - for val in $VIASH_PAR_FPR; do - if ! [[ "${val}" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--fpr' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi - done - set +f - unset IFS -fi - -if [[ -n "$VIASH_PAR_EXCLUDE_ANTIBODY_CAPTURE" ]]; then - if ! [[ "$VIASH_PAR_EXCLUDE_ANTIBODY_CAPTURE" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--exclude_antibody_capture' has to be a boolean_true. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_LEARNING_RATE" ]]; then - if ! [[ "$VIASH_PAR_LEARNING_RATE" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--learning_rate' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_CUDA" ]]; then - if ! [[ "$VIASH_PAR_CUDA" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--cuda' has to be a boolean_true. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then - VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then - ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -if [ ! -z "$VIASH_PAR_MODEL" ]; then - VIASH_PAR_MODEL_CHOICES=("simple:ambient:swapping:full") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_MODEL_CHOICES[*]}:" =~ ":$VIASH_PAR_MODEL:" ]]; then - ViashError '--model' specified value of \'$VIASH_PAR_MODEL\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/correction_cellbender_remove_background_v0_2:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/correction_cellbender_remove_background_v0_2:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/correction_cellbender_remove_background_v0_2:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-cellbender_remove_background_v0_2-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -import mudata as mu -import tempfile -import subprocess -import os -import sys -import numpy as np -from scipy.sparse import csr_matrix - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'layer_output': $( if [ ! -z ${VIASH_PAR_LAYER_OUTPUT+x} ]; then echo "r'${VIASH_PAR_LAYER_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'obs_latent_rt_efficiency': $( if [ ! -z ${VIASH_PAR_OBS_LATENT_RT_EFFICIENCY+x} ]; then echo "r'${VIASH_PAR_OBS_LATENT_RT_EFFICIENCY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'obs_latent_cell_probability': $( if [ ! -z ${VIASH_PAR_OBS_LATENT_CELL_PROBABILITY+x} ]; then echo "r'${VIASH_PAR_OBS_LATENT_CELL_PROBABILITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'obs_latent_scale': $( if [ ! -z ${VIASH_PAR_OBS_LATENT_SCALE+x} ]; then echo "r'${VIASH_PAR_OBS_LATENT_SCALE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'var_ambient_expression': $( if [ ! -z ${VIASH_PAR_VAR_AMBIENT_EXPRESSION+x} ]; then echo "r'${VIASH_PAR_VAR_AMBIENT_EXPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'obsm_latent_gene_encoding': $( if [ ! -z ${VIASH_PAR_OBSM_LATENT_GENE_ENCODING+x} ]; then echo "r'${VIASH_PAR_OBSM_LATENT_GENE_ENCODING//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'expected_cells': $( if [ ! -z ${VIASH_PAR_EXPECTED_CELLS+x} ]; then echo "int(r'${VIASH_PAR_EXPECTED_CELLS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'total_droplets_included': $( if [ ! -z ${VIASH_PAR_TOTAL_DROPLETS_INCLUDED+x} ]; then echo "int(r'${VIASH_PAR_TOTAL_DROPLETS_INCLUDED//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'expected_cells_from_qc': $( if [ ! -z ${VIASH_PAR_EXPECTED_CELLS_FROM_QC+x} ]; then echo "r'${VIASH_PAR_EXPECTED_CELLS_FROM_QC//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), - 'model': $( if [ ! -z ${VIASH_PAR_MODEL+x} ]; then echo "r'${VIASH_PAR_MODEL//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'epochs': $( if [ ! -z ${VIASH_PAR_EPOCHS+x} ]; then echo "int(r'${VIASH_PAR_EPOCHS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'low_count_threshold': $( if [ ! -z ${VIASH_PAR_LOW_COUNT_THRESHOLD+x} ]; then echo "int(r'${VIASH_PAR_LOW_COUNT_THRESHOLD//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'z_dim': $( if [ ! -z ${VIASH_PAR_Z_DIM+x} ]; then echo "int(r'${VIASH_PAR_Z_DIM//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'z_layers': $( if [ ! -z ${VIASH_PAR_Z_LAYERS+x} ]; then echo "list(map(int, r'${VIASH_PAR_Z_LAYERS//\'/\'\"\'\"r\'}'.split(':')))"; else echo None; fi ), - 'training_fraction': $( if [ ! -z ${VIASH_PAR_TRAINING_FRACTION+x} ]; then echo "float(r'${VIASH_PAR_TRAINING_FRACTION//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'empty_drop_training_fraction': $( if [ ! -z ${VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION+x} ]; then echo "float(r'${VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'fpr': $( if [ ! -z ${VIASH_PAR_FPR+x} ]; then echo "list(map(float, r'${VIASH_PAR_FPR//\'/\'\"\'\"r\'}'.split(':')))"; else echo None; fi ), - 'exclude_antibody_capture': $( if [ ! -z ${VIASH_PAR_EXCLUDE_ANTIBODY_CAPTURE+x} ]; then echo "r'${VIASH_PAR_EXCLUDE_ANTIBODY_CAPTURE//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), - 'learning_rate': $( if [ ! -z ${VIASH_PAR_LEARNING_RATE+x} ]; then echo "float(r'${VIASH_PAR_LEARNING_RATE//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'cuda': $( if [ ! -z ${VIASH_PAR_CUDA+x} ]; then echo "r'${VIASH_PAR_CUDA//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -## VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -from helper import anndata_from_h5 - -logger.info("Reading input mudata") -mdata = mu.read_h5mu(par["input"]) - -mod = par["modality"] -logger.info("Performing log transformation on modality %s", mod) -data = mdata.mod[mod] - -# with pathlib.Path(meta["temp_dir"]) / "cellbender" as temp_dir: -# os.mkdir(temp_dir) -with tempfile.TemporaryDirectory(prefix="cellbender-", dir=meta["temp_dir"]) as temp_dir: - # construct paths within tempdir - input_file = os.path.join(temp_dir, "input.h5ad") - output_file = os.path.join(temp_dir, "output.h5") - - logger.info("Creating AnnData input file for CellBender: '%s'", input_file) - data.write_h5ad(input_file) - - logger.info("Constructing CellBender command") - cmd_pars = [ - "cellbender", "remove-background", - "--input", input_file, - "--output", output_file - ] - - extra_args = [ - ("--expected-cells", "expected_cells", True), - ("--total-droplets-included", "total_droplets_included", True), - ("--model", "model", True), - ("--epochs", "epochs", True), - ("--cuda", "cuda", False), - ("--low-count-threshold", "low_count_threshold", True), - ("--z-dim", "z_dim", True), - ("--z-layers", "z_layers", True), - ("--training-fraction", "training_fraction", True), - ("--exclude-antibody-capture", "exclude_antibody_capture", False), - ("--learning-rate", "learning_rate", True), - ("--empty-drop-training-fraction", "empty_drop_training_fraction", True), - ] - for (flag, name, is_kwarg) in extra_args: - if par[name]: - values = par[name] if isinstance(par[name], list) else [par[name]] - cmd_pars += [flag] + [str(val) for val in values] if is_kwarg else [flag] - - if par["expected_cells_from_qc"] and "metrics_cellranger" in data.uns: - assert par["expected_cells"] is None, "If min_counts is defined, expected_cells should be undefined" - assert par["total_droplets_included"] is None, "If min_counts is defined, expected_cells should be undefined" - met = data.uns["metrics_cellranger"] - col_name = "Estimated Number of Cells" - assert col_name in met.columns, "%s should be a column in .obs[metrics_cellranger]" - est_cells = met[col_name].values[0] - logger.info("Selecting --expected-cells %d and --total-droplets-included %d", est_cells, est_cells * 5) - cmd_pars += ["--expected-cells", str(est_cells), "--total-droplets-included", str(5*est_cells)] - - logger.info("Running CellBender: '%s'", ' '.join(cmd_pars)) - out = subprocess.check_output(cmd_pars).decode("utf-8") - - logger.info("Reading CellBender 10xh5 output file: '%s'", output_file) - # have to use custom read_10x_h5 function for now - # will be fixed when https://github.com/scverse/scanpy/pull/2344 is merged - # adata_out = sc.read_10x_h5(output_file, gex_only=False) - adata_out = anndata_from_h5(output_file, analyzed_barcodes_only=False) - - logger.info("Copying X output to MuData") - data.layers[par["layer_output"]] = adata_out.X - - logger.info("Copying .obs output to MuData") - obs_store = { - "obs_latent_rt_efficiency": "latent_RT_efficiency", - "obs_latent_cell_probability": "latent_cell_probability", - "obs_latent_scale": "latent_scale" - } - for to_name, from_name in obs_store.items(): - if par[to_name]: - if from_name in adata_out.obs: - data.obs[par[to_name]] = adata_out.obs[from_name] - # when using unfiltered data, the values will be in uns instead of obs - elif from_name in adata_out.uns and 'barcode_indices_for_latents' in adata_out.uns: - vec = np.zeros(data.n_obs) - vec[adata_out.uns['barcode_indices_for_latents']] = adata_out.uns[from_name] - data.obs[par[to_name]] = vec - - logger.info("Copying .var output to MuData") - var_store = { "var_ambient_expression": "ambient_expression" } - for to_name, from_name in var_store.items(): - if par[to_name]: - data.var[par[to_name]] = adata_out.var[from_name] - - logger.info("Copying obsm_latent_gene_encoding output to MuData") - obsm_store = { "obsm_latent_gene_encoding": "latent_gene_encoding" } - for to_name, from_name in obsm_store.items(): - if par[to_name]: - if from_name in adata_out.obsm: - data.obsm[par[to_name]] = adata_out.obsm[from_name] - elif from_name in adata_out.uns and 'barcode_indices_for_latents' in adata_out.uns: - matrix_to_store = adata_out.uns[from_name] - number_of_obs = data.X.shape[0] - latent_space_sparse = csr_matrix((number_of_obs, par['z_dim']), - dtype=adata_out.uns[from_name].dtype) - obs_rows_in_space_representation = adata_out.uns['barcode_indices_for_latents'] - latent_space_sparse[obs_rows_in_space_representation] = adata_out.uns[from_name] - data.obsm[par[to_name]] = latent_space_sparse - else: - raise RuntimeError("Requested to save latent gene encoding, but the data is either missing " - "from cellbender output or in an incorrect format.") - - -logger.info("Writing to file %s", par["output"]) -mdata.write_h5mu(filename=par["output"], compression=par["output_compression"]) -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/correction/cellbender_remove_background_v0_2/helper.py b/target/docker/correction/cellbender_remove_background_v0_2/helper.py deleted file mode 100644 index 479dd56f596..00000000000 --- a/target/docker/correction/cellbender_remove_background_v0_2/helper.py +++ /dev/null @@ -1,143 +0,0 @@ -# This file is copied from https://github.com/broadinstitute/CellBender/issues/128#issuecomment-1175336065 -# to solve an issue with scanpy not being able to read in the 10x h5 files produced by cellbender. -# -# Note: If something doesn't work in this helper function, it may be interesting to -# take a look at the comments by Dries: https://github.com/openpipelines-bio/openpipeline/pull/115 -# I'm not going to apply them for now -- if it ain't broke, don't fix it. -import tables -import numpy as np -import scipy.sparse as sp -import anndata -from typing import Dict - - -def anndata_from_h5(file: str, - analyzed_barcodes_only: bool = True) -> 'anndata.AnnData': - """Load an output h5 file into an AnnData object for downstream work. - - Args: - file: The h5 file - analyzed_barcodes_only: False to load all barcodes, so that the size of - the AnnData object will match the size of the input raw count matrix. - True to load a limited set of barcodes: only those analyzed by the - algorithm. This allows relevant latent variables to be loaded - properly into adata.obs and adata.obsm, rather than adata.uns. - - Returns: - adata: The anndata object, populated with inferred latent variables - and metadata. - - """ - - d = dict_from_h5(file) - X = sp.csc_matrix((d.pop('data'), d.pop('indices'), d.pop('indptr')), - shape=d.pop('shape')).transpose().tocsr() - - # check and see if we have barcode index annotations, and if the file is filtered - barcode_key = [k for k in d.keys() if (('barcode' in k) and ('ind' in k))] - if len(barcode_key) > 0: - max_barcode_ind = d[barcode_key[0]].max() - filtered_file = (max_barcode_ind >= X.shape[0]) - else: - filtered_file = True - - if analyzed_barcodes_only: - if filtered_file: - # filtered file being read, so we don't need to subset - print('Assuming we are loading a "filtered" file that contains only cells.') - pass - elif 'barcode_indices_for_latents' in d.keys(): - X = X[d['barcode_indices_for_latents'], :] - d['barcodes'] = d['barcodes'][d['barcode_indices_for_latents']] - elif 'barcodes_analyzed_inds' in d.keys(): - X = X[d['barcodes_analyzed_inds'], :] - d['barcodes'] = d['barcodes'][d['barcodes_analyzed_inds']] - else: - print('Warning: analyzed_barcodes_only=True, but the key ' - '"barcodes_analyzed_inds" or "barcode_indices_for_latents" ' - 'is missing from the h5 file. ' - 'Will output all barcodes, and proceed as if ' - 'analyzed_barcodes_only=False') - - # Construct the anndata object. - adata = anndata.AnnData(X=X, - obs={'barcode': d.pop('barcodes').astype(str)}, - var={'gene_name': (d.pop('gene_names') if 'gene_names' in d.keys() - else d.pop('name')).astype(str)}, - dtype=X.dtype) - adata.obs.set_index('barcode', inplace=True) - adata.var.set_index('gene_name', inplace=True) - - # For CellRanger v2 legacy format, "gene_ids" was called "genes"... rename this - if 'genes' in d.keys(): - d['id'] = d.pop('genes') - - # For purely aesthetic purposes, rename "id" to "gene_id" - if 'id' in d.keys(): - d['gene_id'] = d.pop('id') - - # If genomes are empty, try to guess them based on gene_id - if 'genome' in d.keys(): - if np.array([s.decode() == '' for s in d['genome']]).all(): - if '_' in d['gene_id'][0].decode(): - print('Genome field blank, so attempting to guess genomes based on gene_id prefixes') - d['genome'] = np.array([s.decode().split('_')[0] for s in d['gene_id']], dtype=str) - - # Add other information to the anndata object in the appropriate slot. - _fill_adata_slots_automatically(adata, d) - - # Add a special additional field to .var if it exists. - if 'features_analyzed_inds' in adata.uns.keys(): - adata.var['cellbender_analyzed'] = [True if (i in adata.uns['features_analyzed_inds']) - else False for i in range(adata.shape[1])] - - if analyzed_barcodes_only: - for col in adata.obs.columns[adata.obs.columns.str.startswith('barcodes_analyzed') - | adata.obs.columns.str.startswith('barcode_indices')]: - try: - del adata.obs[col] - except Exception: - pass - else: - # Add a special additional field to .obs if all barcodes are included. - if 'barcodes_analyzed_inds' in adata.uns.keys(): - adata.obs['cellbender_analyzed'] = [True if (i in adata.uns['barcodes_analyzed_inds']) - else False for i in range(adata.shape[0])] - - return adata - - -def dict_from_h5(file: str) -> Dict[str, np.ndarray]: - """Read in everything from an h5 file and put into a dictionary.""" - d = {} - with tables.open_file(file) as f: - # read in everything - for array in f.walk_nodes("/", "Array"): - d[array.name] = array.read() - return d - - -def _fill_adata_slots_automatically(adata, d): - """Add other information to the adata object in the appropriate slot.""" - - for key, value in d.items(): - try: - if value is None: - continue - value = np.asarray(value) - if len(value.shape) == 0: - adata.uns[key] = value - elif value.shape[0] == adata.shape[0]: - if (len(value.shape) < 2) or (value.shape[1] < 2): - adata.obs[key] = value - else: - adata.obsm[key] = value - elif value.shape[0] == adata.shape[1]: - if value.dtype.name.startswith('bytes'): - adata.var[key] = value.astype(str) - else: - adata.var[key] = value - else: - adata.uns[key] = value - except Exception: - print('Unable to load data into AnnData: ', key, value, type(value)) \ No newline at end of file diff --git a/target/docker/correction/cellbender_remove_background_v0_2/setup_logger.py b/target/docker/correction/cellbender_remove_background_v0_2/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/docker/correction/cellbender_remove_background_v0_2/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/docker/dataflow/concat/.config.vsh.yaml b/target/docker/dataflow/concat/.config.vsh.yaml deleted file mode 100644 index 3f0e4975e9e..00000000000 --- a/target/docker/dataflow/concat/.config.vsh.yaml +++ /dev/null @@ -1,222 +0,0 @@ -functionality: - name: "concat" - namespace: "dataflow" - version: "0.12.3" - authors: - - name: "Dries Schaumont" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Paths to the different samples to be concatenated." - info: null - example: - - "sample_paths" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: true - multiple_sep: "," - dest: "par" - - type: "string" - name: "--input_id" - description: "Names of the different samples that have to be concatenated. Must\ - \ be specified when using '--mode move'.\nIn this case, the ids will be used\ - \ for the columns names of the dataframes registring the conflicts.\nIf specified,\ - \ must be of same length as `--input`.\n" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: "," - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_sample_name" - description: "Name of the .obs key under which to add the sample names." - info: null - default: - - "sample_id" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--other_axis_mode" - description: "How to handle the merging of other axis (var, obs, ...).\n\n - None:\ - \ keep no data\n - same: only keep elements of the matrices which are the same\ - \ in each of the samples\n - unique: only keep elements for which there is only\ - \ 1 possible value (1 value that can occur in multiple samples)\n - first: keep\ - \ the annotation from the first sample\n - only: keep elements that show up\ - \ in only one of the objects (1 unique element in only 1 sample)\n - move: identical\ - \ to 'same', but moving the conflicting values to .varm or .obsm\n" - info: null - default: - - "move" - required: false - choices: - - "same" - - "unique" - - "first" - - "only" - - "concat" - - "move" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Concatenates several uni-modal samples in .h5mu files into a single\ - \ file.\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/concat_test_data/e18_mouse_brain_fresh_5k_filtered_feature_bc_matrix_subset_unique_obs.h5mu" - - type: "file" - path: "resources_test/concat_test_data/human_brain_3k_filtered_feature_bc_matrix_subset_unique_obs.h5mu" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "pandas~=2.1.1" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - - "muon" - upgrade: true - entrypoint: [] - cmd: null -- type: "native" - id: "native" -- type: "nextflow" - id: "nextflow" - directives: - label: - - "midcpu" - - "highmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/dataflow/concat/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/dataflow/concat" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/dataflow/concat/concat" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/dataflow/concat/concat b/target/docker/dataflow/concat/concat deleted file mode 100755 index d274156f9f5..00000000000 --- a/target/docker/dataflow/concat/concat +++ /dev/null @@ -1,1386 +0,0 @@ -#!/usr/bin/env bash - -# concat 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Dries Schaumont (maintainer) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="concat" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "concat 0.12.3" - echo "" - echo "Concatenates several uni-modal samples in .h5mu files into a single file." - echo "" - echo "Arguments:" - echo " -i, --input" - echo " type: file, required parameter, multiple values allowed, file must exist" - echo " example: sample_paths" - echo " Paths to the different samples to be concatenated." - echo "" - echo " --input_id" - echo " type: string, multiple values allowed" - echo " Names of the different samples that have to be concatenated. Must be" - echo " specified when using '--mode move'." - echo " In this case, the ids will be used for the columns names of the" - echo " dataframes registring the conflicts." - echo " If specified, must be of same length as \`--input\`." - echo "" - echo " -o, --output" - echo " type: file, output, file must exist" - echo " example: output.h5mu" - echo "" - echo " --output_compression" - echo " type: string" - echo " example: gzip" - echo " choices: [ gzip, lzf ]" - echo " The compression format to be used on the output h5mu object." - echo "" - echo " --obs_sample_name" - echo " type: string" - echo " default: sample_id" - echo " Name of the .obs key under which to add the sample names." - echo "" - echo " --other_axis_mode" - echo " type: string" - echo " default: move" - echo " choices: [ same, unique, first, only, concat, move ]" - echo " How to handle the merging of other axis (var, obs, ...)." - echo " - None: keep no data" - echo " - same: only keep elements of the matrices which are the same in each" - echo " of the samples" - echo " - unique: only keep elements for which there is only 1 possible value" - echo " (1 value that can occur in multiple samples)" - echo " - first: keep the annotation from the first sample" - echo " - only: keep elements that show up in only one of the objects (1 unique" - echo " element in only 1 sample)" - echo " - move: identical to 'same', but moving the conflicting values to .varm" - echo " or .obsm" -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM python:3.10-slim - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "pandas~=2.1.1" - -LABEL org.opencontainers.image.authors="Dries Schaumont" -LABEL org.opencontainers.image.description="Companion container for running component dataflow concat" -LABEL org.opencontainers.image.created="2024-01-25T10:13:58Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-concat-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "concat 0.12.3" - exit - ;; - --input) - if [ -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT="$2" - else - VIASH_PAR_INPUT="$VIASH_PAR_INPUT,""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - if [ -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - else - VIASH_PAR_INPUT="$VIASH_PAR_INPUT,"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - -i) - if [ -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT="$2" - else - VIASH_PAR_INPUT="$VIASH_PAR_INPUT,""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input_id) - if [ -z "$VIASH_PAR_INPUT_ID" ]; then - VIASH_PAR_INPUT_ID="$2" - else - VIASH_PAR_INPUT_ID="$VIASH_PAR_INPUT_ID,""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_id. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input_id=*) - if [ -z "$VIASH_PAR_INPUT_ID" ]; then - VIASH_PAR_INPUT_ID=$(ViashRemoveFlags "$1") - else - VIASH_PAR_INPUT_ID="$VIASH_PAR_INPUT_ID,"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression=*) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --obs_sample_name) - [ -n "$VIASH_PAR_OBS_SAMPLE_NAME" ] && ViashError Bad arguments for option \'--obs_sample_name\': \'$VIASH_PAR_OBS_SAMPLE_NAME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBS_SAMPLE_NAME="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_sample_name. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obs_sample_name=*) - [ -n "$VIASH_PAR_OBS_SAMPLE_NAME" ] && ViashError Bad arguments for option \'--obs_sample_name=*\': \'$VIASH_PAR_OBS_SAMPLE_NAME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBS_SAMPLE_NAME=$(ViashRemoveFlags "$1") - shift 1 - ;; - --other_axis_mode) - [ -n "$VIASH_PAR_OTHER_AXIS_MODE" ] && ViashError Bad arguments for option \'--other_axis_mode\': \'$VIASH_PAR_OTHER_AXIS_MODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OTHER_AXIS_MODE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --other_axis_mode. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --other_axis_mode=*) - [ -n "$VIASH_PAR_OTHER_AXIS_MODE" ] && ViashError Bad arguments for option \'--other_axis_mode=*\': \'$VIASH_PAR_OTHER_AXIS_MODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OTHER_AXIS_MODE=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/dataflow_concat:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/dataflow_concat:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/dataflow_concat:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/dataflow_concat:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_OBS_SAMPLE_NAME+x} ]; then - VIASH_PAR_OBS_SAMPLE_NAME="sample_id" -fi -if [ -z ${VIASH_PAR_OTHER_AXIS_MODE+x} ]; then - VIASH_PAR_OTHER_AXIS_MODE="move" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ]; then - IFS=',' - set -f - for file in $VIASH_PAR_INPUT; do - unset IFS - if [ ! -e "$file" ]; then - ViashError "Input file '$file' does not exist." - exit 1 - fi - done - set +f -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then - VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then - ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -if [ ! -z "$VIASH_PAR_OTHER_AXIS_MODE" ]; then - VIASH_PAR_OTHER_AXIS_MODE_CHOICES=("same:unique:first:only:concat:move") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OTHER_AXIS_MODE_CHOICES[*]}:" =~ ":$VIASH_PAR_OTHER_AXIS_MODE:" ]]; then - ViashError '--other_axis_mode' specified value of \'$VIASH_PAR_OTHER_AXIS_MODE\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_TEST_INPUT=() - IFS=',' - for var in $VIASH_PAR_INPUT; do - unset IFS - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$var")" ) - var=$(ViashAutodetectMount "$var") - VIASH_TEST_INPUT+=( "$var" ) - done - VIASH_PAR_INPUT=$(IFS=',' ; echo "${VIASH_TEST_INPUT[*]}") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/dataflow_concat:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/dataflow_concat:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/dataflow_concat:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-concat-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -from __future__ import annotations -import sys -import anndata -import mudata as mu -import pandas as pd -import numpy as np -from collections.abc import Iterable -from multiprocessing import Pool -from pathlib import Path -from h5py import File as H5File -from typing import Literal -import shutil - -### VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'.split(',')"; else echo None; fi ), - 'input_id': $( if [ ! -z ${VIASH_PAR_INPUT_ID+x} ]; then echo "r'${VIASH_PAR_INPUT_ID//\'/\'\"\'\"r\'}'.split(',')"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'obs_sample_name': $( if [ ! -z ${VIASH_PAR_OBS_SAMPLE_NAME+x} ]; then echo "r'${VIASH_PAR_OBS_SAMPLE_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'other_axis_mode': $( if [ ! -z ${VIASH_PAR_OTHER_AXIS_MODE+x} ]; then echo "r'${VIASH_PAR_OTHER_AXIS_MODE//\'/\'\"\'\"r\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -### VIASH END - -sys.path.append(meta["resources_dir"]) - -# START TEMPORARY WORKAROUND compress_h5mu -# reason: resources aren't available when using Nextflow fusion - -# from compress_h5mu import compress_h5mu -from h5py import Group, Dataset -from typing import Union -from functools import partial - -def compress_h5mu(input_path: Union[str, Path], - output_path: Union[str, Path], - compression: Union[Literal['gzip'], Literal['lzf']]): - input_path, output_path = str(input_path), str(output_path) - - def copy_attributes(in_object, out_object): - for key, value in in_object.attrs.items(): - out_object.attrs[key] = value - - def visit_path(output_h5: H5File, - compression: Union[Literal['gzip'], Literal['lzf']], - name: str, object: Union[Group, Dataset]): - if isinstance(object, Group): - new_group = output_h5.create_group(name) - copy_attributes(object, new_group) - elif isinstance(object, Dataset): - # Compression only works for non-scalar Dataset objects - # Scalar objects dont have a shape defined - if not object.compression and object.shape not in [None, ()]: - new_dataset = output_h5.create_dataset(name, data=object, compression=compression) - copy_attributes(object, new_dataset) - else: - output_h5.copy(object, name) - else: - raise NotImplementedError(f"Could not copy element {name}, " - f"type has not been implemented yet: {type(object)}") - - with H5File(input_path, 'r') as input_h5, H5File(output_path, 'w', userblock_size=512) as output_h5: - copy_attributes(input_h5, output_h5) - input_h5.visititems(partial(visit_path, output_h5, compression)) - - with open(input_path, "rb") as input_bytes: - # Mudata puts metadata like this in the first 512 bytes: - # MuData (format-version=0.1.0;creator=muon;creator-version=0.2.0) - # See mudata/_core/io.py, read_h5mu() function - starting_metadata = input_bytes.read(100) - # The metadata is padded with extra null bytes up until 512 bytes - truncate_location = starting_metadata.find(b"\\x00") - starting_metadata = starting_metadata[:truncate_location] - with open(output_path, "br+") as f: - nbytes = f.write(starting_metadata) - f.write(b"\\0" * (512 - nbytes)) -# END TEMPORARY WORKAROUND compress_h5mu - -# START TEMPORARY WORKAROUND setup_logger -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -def indexes_unique(indices: Iterable[pd.Index]) -> bool: - combined_indices = indices[0].append(indices[1:]) - return combined_indices.is_unique - -def check_observations_unique(samples: Iterable[anndata.AnnData]) -> None: - observation_ids = [sample.obs.index for sample in samples] - if not indexes_unique(observation_ids): - raise ValueError("Observations are not unique across samples.") - - -def nunique(row): - unique = pd.unique(row) - unique_without_na = pd.core.dtypes.missing.remove_na_arraylike(unique) - return len(unique_without_na) > 1 - -def any_row_contains_duplicate_values(n_processes: int, frame: pd.DataFrame) -> bool: - """ - Check if any row contains duplicate values, that are not NA. - """ - numpy_array = frame.to_numpy() - with Pool(n_processes) as pool: - is_duplicated = pool.map(nunique, iter(numpy_array)) - return any(is_duplicated) - -def concatenate_matrices(n_processes: int, matrices: dict[str, pd.DataFrame], align_to: pd.Index | None) \\ - -> tuple[dict[str, pd.DataFrame], pd.DataFrame | None, dict[str, pd.core.dtypes.dtypes.Dtype]]: - """ - Merge matrices by combining columns that have the same name. - Columns that contain conflicting values (e.i. the columns have different values), - are not merged, but instead moved to a new dataframe. - """ - column_names = set(column_name for var in matrices.values() for column_name in var) - logger.debug('Trying to concatenate columns: %s.', ",".join(column_names)) - if not column_names: - return {}, pd.DataFrame(index=align_to) - conflicts, concatenated_matrix = \\ - split_conflicts_and_concatenated_columns(n_processes, - matrices, - column_names, - align_to) - concatenated_matrix = cast_to_writeable_dtype(concatenated_matrix) - conflicts = {conflict_name: cast_to_writeable_dtype(conflict_df) - for conflict_name, conflict_df in conflicts.items()} - return conflicts, concatenated_matrix - -def get_first_non_na_value_vector(df): - numpy_arr = df.to_numpy() - n_rows, n_cols = numpy_arr.shape - col_index = pd.isna(numpy_arr).argmin(axis=1) - flat_index = n_cols * np.arange(n_rows) + col_index - return pd.Series(numpy_arr.ravel()[flat_index], index=df.index, name=df.columns[0]) - -def split_conflicts_and_concatenated_columns(n_processes: int, - matrices: dict[str, pd.DataFrame], - column_names: Iterable[str], - align_to: pd.Index | None = None) -> \\ - tuple[dict[str, pd.DataFrame], pd.DataFrame]: - """ - Retrieve columns with the same name from a list of dataframes which are - identical across all the frames (ignoring NA values). - Columns which are not the same are regarded as 'conflicts', - which are stored in seperate dataframes, one per columns - with the same name that store conflicting values. - """ - conflicts = {} - concatenated_matrix = [] - for column_name in column_names: - columns = {input_id: var[column_name] - for input_id, var in matrices.items() - if column_name in var} - assert columns, "Some columns should have been found." - concatenated_columns = pd.concat(columns.values(), axis=1, - join="outer", sort=False) - if any_row_contains_duplicate_values(n_processes, concatenated_columns): - concatenated_columns.columns = columns.keys() # Use the sample id as column name - if align_to is not None: - concatenated_columns = concatenated_columns.reindex(align_to, copy=False) - conflicts[f'conflict_{column_name}'] = concatenated_columns - else: - unique_values = get_first_non_na_value_vector(concatenated_columns) - concatenated_matrix.append(unique_values) - if not concatenated_matrix: - return conflicts, pd.DataFrame(index=align_to) - concatenated_matrix = pd.concat(concatenated_matrix, join="outer", - axis=1, sort=False) - if align_to is not None: - concatenated_matrix = concatenated_matrix.reindex(align_to, copy=False) - return conflicts, concatenated_matrix - -def cast_to_writeable_dtype(result: pd.DataFrame) -> pd.DataFrame: - """ - Cast the dataframe to dtypes that can be written by mudata. - """ - # dtype inferral workfs better with np.nan - result = result.replace({pd.NA: np.nan}) - - # MuData supports nullable booleans and ints - # ie. \`IntegerArray\` and \`BooleanArray\` - result = result.convert_dtypes(infer_objects=True, - convert_integer=True, - convert_string=False, - convert_boolean=True, - convert_floating=False) - - # Convert leftover 'object' columns to string - # However, na values are supported, so convert all values except NA's to string - object_cols = result.select_dtypes(include='object').columns.values - for obj_col in object_cols: - result[obj_col] = result[obj_col].where(result[obj_col].isna(), result[obj_col].astype(str)).astype('category') - return result - -def split_conflicts_modalities(n_processes: int, samples: dict[str, anndata.AnnData], output: anndata.AnnData) \\ - -> anndata.AnnData: - """ - Merge .var and .obs matrices of the anndata objects. Columns are merged - when the values (excl NA) are the same in each of the matrices. - Conflicting columns are moved to a separate dataframe (one dataframe for each column, - containing all the corresponding column from each sample). - """ - matrices_to_parse = ("var", "obs") - for matrix_name in matrices_to_parse: - matrices = {sample_id: getattr(sample, matrix_name) for sample_id, sample in samples.items()} - output_index = getattr(output, matrix_name).index - align_to = output_index if matrix_name == "var" else None - conflicts, concatenated_matrix = concatenate_matrices(n_processes, matrices, align_to) - if concatenated_matrix.empty: - concatenated_matrix.index = output_index - # Write the conflicts to the output - for conflict_name, conflict_data in conflicts.items(): - getattr(output, f"{matrix_name}m")[conflict_name] = conflict_data - - # Set other annotation matrices in the output - setattr(output, matrix_name, concatenated_matrix) - - return output - - -def concatenate_modality(n_processes: int, mod: str, input_files: Iterable[str | Path], - other_axis_mode: str, input_ids: tuple[str]) -> anndata.AnnData: - - concat_modes = { - "move": None, - } - other_axis_mode_to_apply = concat_modes.get(other_axis_mode, other_axis_mode) - - mod_data = {} - for input_id, input_file in zip(input_ids, input_files): - try: - mod_data[input_id] = mu.read_h5ad(input_file, mod=mod) - except KeyError as e: # Modality does not exist for this sample, skip it - if f"Unable to open object '{mod}' doesn't exist" not in str(e): - raise e - pass - check_observations_unique(mod_data.values()) - - concatenated_data = anndata.concat(mod_data.values(), join='outer', merge=other_axis_mode_to_apply) - - if other_axis_mode == "move": - concatenated_data = split_conflicts_modalities(n_processes, mod_data, concatenated_data) - - return concatenated_data - -def concatenate_modalities(n_processes: int, modalities: list[str], input_files: Path | str, - other_axis_mode: str, output_file: Path | str, - compression: Literal['gzip'] | Literal['lzf'], - input_ids: tuple[str] | None = None) -> None: - """ - Join the modalities together into a single multimodal sample. - """ - logger.info('Concatenating samples.') - output_file, input_files = Path(output_file), [Path(input_file) for input_file in input_files] - output_file_uncompressed = output_file.with_name(output_file.stem + "_uncompressed.h5mu") - output_file_uncompressed.touch() - # Create empty mudata file - mdata = mu.MuData({modality: anndata.AnnData() for modality in modalities}) - mdata.write(output_file_uncompressed, compression=compression) - - for mod_name in modalities: - new_mod = concatenate_modality(n_processes, mod_name, - input_files, other_axis_mode, - input_ids) - logger.info("Writing out modality '%s' to '%s' with compression '%s'.", - mod_name, output_file_uncompressed, compression) - mu.write_h5ad(output_file_uncompressed, data=new_mod, mod=mod_name) - - if compression: - compress_h5mu(output_file_uncompressed, output_file, compression=compression) - output_file_uncompressed.unlink() - else: - shutil.move(output_file_uncompressed, output_file) - - logger.info("Concatenation successful.") - -def main() -> None: - # Get a list of all possible modalities - mods = set() - for path in par["input"]: - try: - with H5File(path, 'r') as f_root: - mods = mods | set(f_root["mod"].keys()) - except OSError: - raise OSError(f"Failed to load {path}. Is it a valid h5 file?") - - input_ids = None - if par["input_id"]: - input_ids: tuple[str] = tuple(i.strip() for i in par["input_id"]) - if len(input_ids) != len(par["input"]): - raise ValueError("The number of sample names must match the number of sample files.") - - if len(set(input_ids)) != len(input_ids): - raise ValueError("The sample names should be unique.") - - logger.info("\\nConcatenating data from paths:\\n\\t%s", - "\\n\\t".join(par["input"])) - - if par["other_axis_mode"] == "move" and not input_ids: - raise ValueError("--mode 'move' requires --input_ids.") - - n_processes = meta["cpus"] if meta["cpus"] else 1 - concatenate_modalities(n_processes, - list(mods), - par["input"], - par["other_axis_mode"], - par["output"], - par["output_compression"], - input_ids=input_ids) - - -if __name__ == "__main__": - main() -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - unset VIASH_TEST_INPUT - IFS=',' - for var in $VIASH_PAR_INPUT; do - unset IFS - if [ -z "$VIASH_TEST_INPUT" ]; then - VIASH_TEST_INPUT="$(ViashStripAutomount "$var")" - else - VIASH_TEST_INPUT="$VIASH_TEST_INPUT,""$(ViashStripAutomount "$var")" - fi - done - VIASH_PAR_INPUT="$VIASH_TEST_INPUT" -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/dataflow/concat/setup_logger.py b/target/docker/dataflow/concat/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/docker/dataflow/concat/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/docker/dataflow/merge/.config.vsh.yaml b/target/docker/dataflow/merge/.config.vsh.yaml deleted file mode 100644 index 65b92df4ab3..00000000000 --- a/target/docker/dataflow/merge/.config.vsh.yaml +++ /dev/null @@ -1,175 +0,0 @@ -functionality: - name: "merge" - namespace: "dataflow" - version: "0.12.3" - authors: - - name: "Dries Schaumont" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Paths to the single-modality .h5mu files that need to be combined" - info: null - default: - - "sample_paths" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: true - multiple_sep: "," - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Path to the output file." - info: null - default: - - "output.h5mu" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Combine one or more single-modality .h5mu files together into one\ - \ .h5mu file.\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "../../../resources_test/merge_test_data/pbmc_1k_protein_v3_filtered_feature_bc_matrix_rna.h5mu" - - type: "file" - path: "../../../resources_test/merge_test_data/pbmc_1k_protein_v3_filtered_feature_bc_matrix_prot.h5mu" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "pandas~=2.0.0" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "native" - id: "native" -- type: "nextflow" - id: "nextflow" - directives: - label: - - "singlecpu" - - "highmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/dataflow/merge/config.vsh.yml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/dataflow/merge" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/dataflow/merge/merge" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/dataflow/merge/merge b/target/docker/dataflow/merge/merge deleted file mode 100755 index 680184215dc..00000000000 --- a/target/docker/dataflow/merge/merge +++ /dev/null @@ -1,1051 +0,0 @@ -#!/usr/bin/env bash - -# merge 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Dries Schaumont (maintainer) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="merge" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "merge 0.12.3" - echo "" - echo "Combine one or more single-modality .h5mu files together into one .h5mu file." - echo "" - echo "Arguments:" - echo " -i, --input" - echo " type: file, required parameter, multiple values allowed, file must exist" - echo " default: sample_paths" - echo " Paths to the single-modality .h5mu files that need to be combined" - echo "" - echo " -o, --output" - echo " type: file, output, file must exist" - echo " default: output.h5mu" - echo " Path to the output file." - echo "" - echo " --output_compression" - echo " type: string" - echo " example: gzip" - echo " choices: [ gzip, lzf ]" - echo " The compression format to be used on the output h5mu object." -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM python:3.10-slim - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "pandas~=2.0.0" - -LABEL org.opencontainers.image.authors="Dries Schaumont" -LABEL org.opencontainers.image.description="Companion container for running component dataflow merge" -LABEL org.opencontainers.image.created="2024-01-25T10:13:57Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-merge-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "merge 0.12.3" - exit - ;; - --input) - if [ -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT="$2" - else - VIASH_PAR_INPUT="$VIASH_PAR_INPUT,""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - if [ -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - else - VIASH_PAR_INPUT="$VIASH_PAR_INPUT,"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - -i) - if [ -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT="$2" - else - VIASH_PAR_INPUT="$VIASH_PAR_INPUT,""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression=*) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/dataflow_merge:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/dataflow_merge:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/dataflow_merge:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/dataflow_merge:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - VIASH_PAR_OUTPUT="output.h5mu" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ]; then - IFS=',' - set -f - for file in $VIASH_PAR_INPUT; do - unset IFS - if [ ! -e "$file" ]; then - ViashError "Input file '$file' does not exist." - exit 1 - fi - done - set +f -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then - VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then - ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_TEST_INPUT=() - IFS=',' - for var in $VIASH_PAR_INPUT; do - unset IFS - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$var")" ) - var=$(ViashAutodetectMount "$var") - VIASH_TEST_INPUT+=( "$var" ) - done - VIASH_PAR_INPUT=$(IFS=',' ; echo "${VIASH_TEST_INPUT[*]}") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/dataflow_merge:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/dataflow_merge:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/dataflow_merge:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-merge-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -from __future__ import annotations -import sys -import mudata as md -import pandas as pd -import numpy as np - - -### VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'.split(',')"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -### VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -def main(): - logger.info('Reading input files %s', ",".join(par["input"])) - input_samples = [md.read_h5mu(path) for path in par["input"]] - - logger.info('Merging into single object.') - sample_modalities = {} - for input_sample in input_samples: - for mod_name, mod_data in input_sample.mod.items(): - if mod_name in sample_modalities: - raise ValueError(f"Modality '{mod_name}' was found in more than 1 sample.") - sample_modalities[mod_name] = mod_data - - merged = md.MuData(sample_modalities) - merged.update() - for df_attr in ("var", "obs"): - df = getattr(merged, df_attr) - df = df.replace({pd.NA: np.nan}, inplace=False) - - # MuData supports nullable booleans and ints - # ie. \`IntegerArray\` and \`BooleanArray\` - df = df.convert_dtypes(infer_objects=True, - convert_integer=True, - convert_string=False, - convert_boolean=True, - convert_floating=False) - - # Convert leftover 'object' columns to string - object_cols = df.select_dtypes(include='object').columns.values - for obj_col in object_cols: - df[obj_col].astype(str).astype('category') - setattr(merged, df_attr, df) - - merged.write_h5mu(par["output"], compression=par["output_compression"]) - logger.info('Finished') - - -if __name__ == '__main__': - main() -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - unset VIASH_TEST_INPUT - IFS=',' - for var in $VIASH_PAR_INPUT; do - unset IFS - if [ -z "$VIASH_TEST_INPUT" ]; then - VIASH_TEST_INPUT="$(ViashStripAutomount "$var")" - else - VIASH_TEST_INPUT="$VIASH_TEST_INPUT,""$(ViashStripAutomount "$var")" - fi - done - VIASH_PAR_INPUT="$VIASH_TEST_INPUT" -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/dataflow/merge/setup_logger.py b/target/docker/dataflow/merge/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/docker/dataflow/merge/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/docker/dataflow/split_modalities/.config.vsh.yaml b/target/docker/dataflow/split_modalities/.config.vsh.yaml deleted file mode 100644 index a32bc5f622b..00000000000 --- a/target/docker/dataflow/split_modalities/.config.vsh.yaml +++ /dev/null @@ -1,214 +0,0 @@ -functionality: - name: "split_modalities" - namespace: "dataflow" - version: "0.12.3" - authors: - - name: "Dries Schaumont" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - - name: "Robrecht Cannoodt" - roles: - - "contributor" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Path to a single .h5mu file." - info: null - default: - - "sample_path" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output directory containing multiple h5mu files." - info: null - example: - - "/path/to/output" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output_types" - description: "A csv containing the base filename and modality type per output\ - \ file." - info: null - example: - - "types.csv" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--compression" - description: "The compression format to be used on the final h5mu object." - info: null - default: - - "gzip" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Split the modalities from a single .h5mu multimodal sample into seperate\ - \ .h5mu files. \n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "native" - id: "native" -- type: "nextflow" - id: "nextflow" - directives: - label: - - "singlecpu" - - "lowmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/dataflow/split_modalities/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/dataflow/split_modalities" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/dataflow/split_modalities/split_modalities" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/dataflow/split_modalities/setup_logger.py b/target/docker/dataflow/split_modalities/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/docker/dataflow/split_modalities/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/docker/dataflow/split_modalities/split_modalities b/target/docker/dataflow/split_modalities/split_modalities deleted file mode 100755 index 3fa63d3502b..00000000000 --- a/target/docker/dataflow/split_modalities/split_modalities +++ /dev/null @@ -1,1065 +0,0 @@ -#!/usr/bin/env bash - -# split_modalities 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Dries Schaumont (maintainer) -# * Robrecht Cannoodt (contributor) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="split_modalities" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "split_modalities 0.12.3" - echo "" - echo "Split the modalities from a single .h5mu multimodal sample into seperate .h5mu" - echo "files." - echo "" - echo "Arguments:" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " default: sample_path" - echo " Path to a single .h5mu file." - echo "" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " example: /path/to/output" - echo " Output directory containing multiple h5mu files." - echo "" - echo " --output_compression" - echo " type: string" - echo " example: gzip" - echo " choices: [ gzip, lzf ]" - echo " The compression format to be used on the output h5mu object." - echo "" - echo " --output_types" - echo " type: file, required parameter, output, file must exist" - echo " example: types.csv" - echo " A csv containing the base filename and modality type per output file." - echo "" - echo " --compression" - echo " type: string" - echo " default: gzip" - echo " The compression format to be used on the final h5mu object." -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM python:3.10-slim - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" - -LABEL org.opencontainers.image.authors="Dries Schaumont, Robrecht Cannoodt" -LABEL org.opencontainers.image.description="Companion container for running component dataflow split_modalities" -LABEL org.opencontainers.image.created="2024-01-25T10:13:57Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-split_modalities-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "split_modalities 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -i) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression=*) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output_types) - [ -n "$VIASH_PAR_OUTPUT_TYPES" ] && ViashError Bad arguments for option \'--output_types\': \'$VIASH_PAR_OUTPUT_TYPES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_TYPES="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_types. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_types=*) - [ -n "$VIASH_PAR_OUTPUT_TYPES" ] && ViashError Bad arguments for option \'--output_types=*\': \'$VIASH_PAR_OUTPUT_TYPES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_TYPES=$(ViashRemoveFlags "$1") - shift 1 - ;; - --compression) - [ -n "$VIASH_PAR_COMPRESSION" ] && ViashError Bad arguments for option \'--compression\': \'$VIASH_PAR_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --compression=*) - [ -n "$VIASH_PAR_COMPRESSION" ] && ViashError Bad arguments for option \'--compression=*\': \'$VIASH_PAR_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/dataflow_split_modalities:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/dataflow_split_modalities:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/dataflow_split_modalities:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/dataflow_split_modalities:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT_TYPES+x} ]; then - ViashError '--output_types' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_COMPRESSION+x} ]; then - VIASH_PAR_COMPRESSION="gzip" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then - VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then - ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi -if [ ! -z "$VIASH_PAR_OUTPUT_TYPES" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_TYPES")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_TYPES")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_PAR_OUTPUT_TYPES" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT_TYPES")" ) - VIASH_PAR_OUTPUT_TYPES=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT_TYPES") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_TYPES" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/dataflow_split_modalities:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/dataflow_split_modalities:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/dataflow_split_modalities:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-split_modalities-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -from __future__ import annotations -import sys -import mudata as md -from sys import stdout -from pathlib import Path -import pandas as pd - -### VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_types': $( if [ ! -z ${VIASH_PAR_OUTPUT_TYPES+x} ]; then echo "r'${VIASH_PAR_OUTPUT_TYPES//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'compression': $( if [ ! -z ${VIASH_PAR_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -### VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -def main() -> None: - output_dir = Path(par["output"]) - if not output_dir.is_dir(): - output_dir.mkdir(parents=True) - - logger.info('Reading input file %s', par['input']) - sample = md.read_h5mu(par["input"].strip()) - input_file = Path(par["input"]) - - logger.info('Creating output types csv') - - names = {mod_name: f"{input_file.stem}_{mod_name}.h5mu" - for mod_name in sample.mod.keys() } - df = pd.DataFrame({"name": list(names.keys()), "filename": list(names.values())}) - df.to_csv(par["output_types"], index=False) - - logger.info('Splitting up modalities %s', ", ".join(sample.mod.keys())) - for mod_name, mod in sample.mod.items(): - new_sample = md.MuData({mod_name: mod}) - logger.info('Writing to %s', names[mod_name]) - new_sample.write_h5mu(output_dir / names[mod_name], compression=par["output_compression"]) - - logger.info("Finished") - - -if __name__ == "__main__": - main() -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT_TYPES" ]; then - VIASH_PAR_OUTPUT_TYPES=$(ViashStripAutomount "$VIASH_PAR_OUTPUT_TYPES") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_OUTPUT_TYPES" ] && [ ! -e "$VIASH_PAR_OUTPUT_TYPES" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT_TYPES' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/demux/bcl2fastq/.config.vsh.yaml b/target/docker/demux/bcl2fastq/.config.vsh.yaml deleted file mode 100644 index 9d0adfe6947..00000000000 --- a/target/docker/demux/bcl2fastq/.config.vsh.yaml +++ /dev/null @@ -1,169 +0,0 @@ -functionality: - name: "bcl2fastq" - namespace: "demux" - version: "0.12.3" - authors: - - name: "Toni Verbeiren" - roles: - - "author" - - "maintainer" - info: - role: "Core Team Member" - links: - github: "tverbeiren" - linkedin: "verbeiren" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist and CEO" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - - "--runfolder_dir" - description: "Input run directory" - info: null - example: - - "bcl_dir" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--sample_sheet" - alternatives: - - "-s" - description: "Pointer to the sample sheet" - info: null - example: - - "SampleSheet.csv" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output directory containig fastq files" - info: null - example: - - "fastq_dir" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--reports" - description: "Reports directory" - info: null - example: - - "reports_dir" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--ignore_missing" - info: null - direction: "input" - dest: "par" - resources: - - type: "bash_script" - path: "script.sh" - is_executable: true - description: "Convert bcl files to fastq files using bcl2fastq.\n" - test_resources: - - type: "bash_script" - path: "test.sh" - is_executable: true - - type: "file" - path: "resources_test/cellranger_tiny_bcl/bcl" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "ghcr.io/data-intuitive/bcl2fastq:2.20" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "midmem" - - "midcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/demux/bcl2fastq/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/demux/bcl2fastq" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/demux/bcl2fastq/bcl2fastq" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/demux/bcl2fastq/bcl2fastq b/target/docker/demux/bcl2fastq/bcl2fastq deleted file mode 100755 index 81fdbb33bc7..00000000000 --- a/target/docker/demux/bcl2fastq/bcl2fastq +++ /dev/null @@ -1,1028 +0,0 @@ -#!/usr/bin/env bash - -# bcl2fastq 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Toni Verbeiren (author, maintainer) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="bcl2fastq" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "bcl2fastq 0.12.3" - echo "" - echo "Convert bcl files to fastq files using bcl2fastq." - echo "" - echo "Arguments:" - echo " -i, --runfolder_dir, --input" - echo " type: file, required parameter, file must exist" - echo " example: bcl_dir" - echo " Input run directory" - echo "" - echo " -s, --sample_sheet" - echo " type: file, required parameter, file must exist" - echo " example: SampleSheet.csv" - echo " Pointer to the sample sheet" - echo "" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " example: fastq_dir" - echo " Output directory containig fastq files" - echo "" - echo " --reports" - echo " type: file, output, file must exist" - echo " example: reports_dir" - echo " Reports directory" - echo "" - echo " --ignore_missing" - echo " type: boolean_true" -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM ghcr.io/data-intuitive/bcl2fastq:2.20 - -ENTRYPOINT [] - - -RUN : -LABEL org.opencontainers.image.authors="Toni Verbeiren" -LABEL org.opencontainers.image.description="Companion container for running component demux bcl2fastq" -LABEL org.opencontainers.image.created="2024-01-25T10:14:00Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-bcl2fastq-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "bcl2fastq 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -i) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --runfolder_dir) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--runfolder_dir\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --runfolder_dir. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --sample_sheet) - [ -n "$VIASH_PAR_SAMPLE_SHEET" ] && ViashError Bad arguments for option \'--sample_sheet\': \'$VIASH_PAR_SAMPLE_SHEET\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SAMPLE_SHEET="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --sample_sheet. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --sample_sheet=*) - [ -n "$VIASH_PAR_SAMPLE_SHEET" ] && ViashError Bad arguments for option \'--sample_sheet=*\': \'$VIASH_PAR_SAMPLE_SHEET\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SAMPLE_SHEET=$(ViashRemoveFlags "$1") - shift 1 - ;; - -s) - [ -n "$VIASH_PAR_SAMPLE_SHEET" ] && ViashError Bad arguments for option \'-s\': \'$VIASH_PAR_SAMPLE_SHEET\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SAMPLE_SHEET="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -s. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reports) - [ -n "$VIASH_PAR_REPORTS" ] && ViashError Bad arguments for option \'--reports\': \'$VIASH_PAR_REPORTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REPORTS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --reports. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reports=*) - [ -n "$VIASH_PAR_REPORTS" ] && ViashError Bad arguments for option \'--reports=*\': \'$VIASH_PAR_REPORTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REPORTS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --ignore_missing) - [ -n "$VIASH_PAR_IGNORE_MISSING" ] && ViashError Bad arguments for option \'--ignore_missing\': \'$VIASH_PAR_IGNORE_MISSING\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_IGNORE_MISSING=true - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/demux_bcl2fastq:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/demux_bcl2fastq:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/demux_bcl2fastq:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/demux_bcl2fastq:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_SAMPLE_SHEET+x} ]; then - ViashError '--sample_sheet' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_IGNORE_MISSING+x} ]; then - VIASH_PAR_IGNORE_MISSING="false" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_SAMPLE_SHEET" ] && [ ! -e "$VIASH_PAR_SAMPLE_SHEET" ]; then - ViashError "Input file '$VIASH_PAR_SAMPLE_SHEET' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_IGNORE_MISSING" ]]; then - if ! [[ "$VIASH_PAR_IGNORE_MISSING" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--ignore_missing' has to be a boolean_true. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi -if [ ! -z "$VIASH_PAR_REPORTS" ] && [ ! -d "$(dirname "$VIASH_PAR_REPORTS")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_REPORTS")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_SAMPLE_SHEET" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_SAMPLE_SHEET")" ) - VIASH_PAR_SAMPLE_SHEET=$(ViashAutodetectMount "$VIASH_PAR_SAMPLE_SHEET") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_PAR_REPORTS" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_REPORTS")" ) - VIASH_PAR_REPORTS=$(ViashAutodetectMount "$VIASH_PAR_REPORTS") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_REPORTS" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/demux_bcl2fastq:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/demux_bcl2fastq:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/demux_bcl2fastq:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-bcl2fastq-XXXXXX").sh -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -## VIASH START -# The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) -$( if [ ! -z ${VIASH_PAR_SAMPLE_SHEET+x} ]; then echo "${VIASH_PAR_SAMPLE_SHEET}" | sed "s#'#'\"'\"'#g;s#.*#par_sample_sheet='&'#" ; else echo "# par_sample_sheet="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) -$( if [ ! -z ${VIASH_PAR_REPORTS+x} ]; then echo "${VIASH_PAR_REPORTS}" | sed "s#'#'\"'\"'#g;s#.*#par_reports='&'#" ; else echo "# par_reports="; fi ) -$( if [ ! -z ${VIASH_PAR_IGNORE_MISSING+x} ]; then echo "${VIASH_PAR_IGNORE_MISSING}" | sed "s#'#'\"'\"'#g;s#.*#par_ignore_missing='&'#" ; else echo "# par_ignore_missing="; fi ) -$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) -$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) -$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) -$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) -$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) -$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) - -## VIASH END -#!/bin/bash - -set -exo pipefail - -extra_params=() - -# Handle reports stored separate -if [ ! -z "\$par_reports" ]; then - extra_params+=("--reports-dir" "\$par_reports") -fi - -# Handle the boolean flag -if [ "\$par_ignore_missing" == "true" ]; then - extra_params+=("--ignore-missing-control" "--ignore-missing-bcl" "--ignore-missing-filter") -fi - -# Run the actual command -bcl2fastq \\ - --runfolder-dir "\$par_input" \\ - --sample-sheet "\$par_sample_sheet" \\ - --output-dir "\$par_output" \\ - "\${extra_params[@]}" -VIASHMAIN -bash "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_SAMPLE_SHEET" ]; then - VIASH_PAR_SAMPLE_SHEET=$(ViashStripAutomount "$VIASH_PAR_SAMPLE_SHEET") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_PAR_REPORTS" ]; then - VIASH_PAR_REPORTS=$(ViashStripAutomount "$VIASH_PAR_REPORTS") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_REPORTS" ] && [ ! -e "$VIASH_PAR_REPORTS" ]; then - ViashError "Output file '$VIASH_PAR_REPORTS' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/demux/bcl_convert/.config.vsh.yaml b/target/docker/demux/bcl_convert/.config.vsh.yaml deleted file mode 100644 index c682f50ee63..00000000000 --- a/target/docker/demux/bcl_convert/.config.vsh.yaml +++ /dev/null @@ -1,189 +0,0 @@ -functionality: - name: "bcl_convert" - namespace: "demux" - version: "0.12.3" - authors: - - name: "Toni Verbeiren" - roles: - - "author" - - "maintainer" - info: - role: "Core Team Member" - links: - github: "tverbeiren" - linkedin: "verbeiren" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist and CEO" - - name: "Marijke Van Moerbeke" - roles: - - "author" - info: - role: "Contributor" - links: - github: "mvanmoerbeke" - orcid: "0000-0002-3097-5621" - linkedin: "marijke-van-moerbeke-84303a34" - organizations: - - name: "OpenAnalytics" - href: "https://www.openanalytics.eu" - role: "Statistical Consultant" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input run directory" - info: null - example: - - "bcl_dir" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--sample_sheet" - alternatives: - - "-s" - description: "Pointer to the sample sheet" - info: null - example: - - "bcl_dir" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output directory containig fastq files" - info: null - example: - - "fastq_dir" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--reports" - description: "Reports directory" - info: null - example: - - "reports_dir" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean" - name: "--test_mode" - description: "Should bcl-convert be run in test mode (using --first-tile-only)?" - info: null - default: - - false - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "bash_script" - path: "script.sh" - is_executable: true - description: "Convert bcl files to fastq files using bcl-convert.\nInformation about\ - \ upgrading from bcl2fastq via\nhttps://emea.support.illumina.com/bulletins/2020/10/upgrading-from-bcl2fastq-to-bcl-convert.html\n\ - and https://support.illumina.com/sequencing/sequencing_software/bcl-convert/compatibility.html\n" - test_resources: - - type: "bash_script" - path: "test.sh" - is_executable: true - - type: "file" - path: "resources_test/cellranger_tiny_bcl/bcl2" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "ghcr.io/data-intuitive/bclconvert:3.10" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "midmem" - - "midcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/demux/bcl_convert/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/demux/bcl_convert" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/demux/bcl_convert/bcl_convert" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/demux/bcl_convert/bcl_convert b/target/docker/demux/bcl_convert/bcl_convert deleted file mode 100755 index 3217d86dd53..00000000000 --- a/target/docker/demux/bcl_convert/bcl_convert +++ /dev/null @@ -1,1033 +0,0 @@ -#!/usr/bin/env bash - -# bcl_convert 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Toni Verbeiren (author, maintainer) -# * Marijke Van Moerbeke (author) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="bcl_convert" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "bcl_convert 0.12.3" - echo "" - echo "Convert bcl files to fastq files using bcl-convert." - echo "Information about upgrading from bcl2fastq via" - echo "https://emea.support.illumina.com/bulletins/2020/10/upgrading-from-bcl2fastq-to-bcl-convert.html" - echo "and" - echo "https://support.illumina.com/sequencing/sequencing_software/bcl-convert/compatibility.html" - echo "" - echo "Arguments:" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " example: bcl_dir" - echo " Input run directory" - echo "" - echo " -s, --sample_sheet" - echo " type: file, required parameter, file must exist" - echo " example: bcl_dir" - echo " Pointer to the sample sheet" - echo "" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " example: fastq_dir" - echo " Output directory containig fastq files" - echo "" - echo " --reports" - echo " type: file, output, file must exist" - echo " example: reports_dir" - echo " Reports directory" - echo "" - echo " --test_mode" - echo " type: boolean" - echo " default: false" - echo " Should bcl-convert be run in test mode (using --first-tile-only)?" -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM ghcr.io/data-intuitive/bclconvert:3.10 - -ENTRYPOINT [] - - -RUN : -LABEL org.opencontainers.image.authors="Toni Verbeiren, Marijke Van Moerbeke" -LABEL org.opencontainers.image.description="Companion container for running component demux bcl_convert" -LABEL org.opencontainers.image.created="2024-01-25T10:13:55Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-bcl_convert-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "bcl_convert 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -i) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --sample_sheet) - [ -n "$VIASH_PAR_SAMPLE_SHEET" ] && ViashError Bad arguments for option \'--sample_sheet\': \'$VIASH_PAR_SAMPLE_SHEET\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SAMPLE_SHEET="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --sample_sheet. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --sample_sheet=*) - [ -n "$VIASH_PAR_SAMPLE_SHEET" ] && ViashError Bad arguments for option \'--sample_sheet=*\': \'$VIASH_PAR_SAMPLE_SHEET\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SAMPLE_SHEET=$(ViashRemoveFlags "$1") - shift 1 - ;; - -s) - [ -n "$VIASH_PAR_SAMPLE_SHEET" ] && ViashError Bad arguments for option \'-s\': \'$VIASH_PAR_SAMPLE_SHEET\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SAMPLE_SHEET="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -s. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reports) - [ -n "$VIASH_PAR_REPORTS" ] && ViashError Bad arguments for option \'--reports\': \'$VIASH_PAR_REPORTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REPORTS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --reports. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reports=*) - [ -n "$VIASH_PAR_REPORTS" ] && ViashError Bad arguments for option \'--reports=*\': \'$VIASH_PAR_REPORTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REPORTS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --test_mode) - [ -n "$VIASH_PAR_TEST_MODE" ] && ViashError Bad arguments for option \'--test_mode\': \'$VIASH_PAR_TEST_MODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TEST_MODE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --test_mode. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --test_mode=*) - [ -n "$VIASH_PAR_TEST_MODE" ] && ViashError Bad arguments for option \'--test_mode=*\': \'$VIASH_PAR_TEST_MODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TEST_MODE=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/demux_bcl_convert:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/demux_bcl_convert:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/demux_bcl_convert:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/demux_bcl_convert:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_SAMPLE_SHEET+x} ]; then - ViashError '--sample_sheet' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_TEST_MODE+x} ]; then - VIASH_PAR_TEST_MODE="false" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_SAMPLE_SHEET" ] && [ ! -e "$VIASH_PAR_SAMPLE_SHEET" ]; then - ViashError "Input file '$VIASH_PAR_SAMPLE_SHEET' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_TEST_MODE" ]]; then - if ! [[ "$VIASH_PAR_TEST_MODE" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--test_mode' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi -if [ ! -z "$VIASH_PAR_REPORTS" ] && [ ! -d "$(dirname "$VIASH_PAR_REPORTS")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_REPORTS")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_SAMPLE_SHEET" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_SAMPLE_SHEET")" ) - VIASH_PAR_SAMPLE_SHEET=$(ViashAutodetectMount "$VIASH_PAR_SAMPLE_SHEET") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_PAR_REPORTS" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_REPORTS")" ) - VIASH_PAR_REPORTS=$(ViashAutodetectMount "$VIASH_PAR_REPORTS") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_REPORTS" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/demux_bcl_convert:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/demux_bcl_convert:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/demux_bcl_convert:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-bcl_convert-XXXXXX").sh -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -#!/bin/bash - -set -eo pipefail - -## VIASH START -# The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) -$( if [ ! -z ${VIASH_PAR_SAMPLE_SHEET+x} ]; then echo "${VIASH_PAR_SAMPLE_SHEET}" | sed "s#'#'\"'\"'#g;s#.*#par_sample_sheet='&'#" ; else echo "# par_sample_sheet="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) -$( if [ ! -z ${VIASH_PAR_REPORTS+x} ]; then echo "${VIASH_PAR_REPORTS}" | sed "s#'#'\"'\"'#g;s#.*#par_reports='&'#" ; else echo "# par_reports="; fi ) -$( if [ ! -z ${VIASH_PAR_TEST_MODE+x} ]; then echo "${VIASH_PAR_TEST_MODE}" | sed "s#'#'\"'\"'#g;s#.*#par_test_mode='&'#" ; else echo "# par_test_mode="; fi ) -$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) -$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) -$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) -$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) -$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) -$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) - -## VIASH END - -[ -d "\$par_output" ] || mkdir -p "\$par_output" - -bcl-convert \\ - --force \\ - --bcl-input-directory "\$par_input" \\ - --output-directory "\$par_output" \\ - --sample-sheet "\$par_sample_sheet" \\ - --first-tile-only \$par_test_mode - -if [ ! -z "\$par_reports" ]; then - echo "Moving reports to its own location" - mv "\$par_output"/Reports "\$par_reports" -else - echo "Leaving reports alone" -fi -VIASHMAIN -bash "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_SAMPLE_SHEET" ]; then - VIASH_PAR_SAMPLE_SHEET=$(ViashStripAutomount "$VIASH_PAR_SAMPLE_SHEET") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_PAR_REPORTS" ]; then - VIASH_PAR_REPORTS=$(ViashStripAutomount "$VIASH_PAR_REPORTS") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_REPORTS" ] && [ ! -e "$VIASH_PAR_REPORTS" ]; then - ViashError "Output file '$VIASH_PAR_REPORTS' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/demux/cellranger_mkfastq/.config.vsh.yaml b/target/docker/demux/cellranger_mkfastq/.config.vsh.yaml deleted file mode 100644 index 193a6e627b8..00000000000 --- a/target/docker/demux/cellranger_mkfastq/.config.vsh.yaml +++ /dev/null @@ -1,207 +0,0 @@ -functionality: - name: "cellranger_mkfastq" - namespace: "demux" - version: "0.12.3" - authors: - - name: "Angela Oliveira Pisco" - roles: - - "author" - info: - role: "Contributor" - links: - github: "aopisco" - orcid: "0000-0003-0142-2355" - linkedin: "aopisco" - organizations: - - name: "Insitro" - href: "https://insitro.com" - role: "Director of Computational Biology" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - - name: "Samuel D'Souza" - roles: - - "author" - info: - role: "Contributor" - links: - github: "srdsam" - linkedin: "samuel-d-souza-887023150/" - organizations: - - name: "Chan Zuckerberg Biohub" - href: "https://www.czbiohub.org" - role: "Data Engineer" - - name: "Robrecht Cannoodt" - roles: - - "author" - - "maintainer" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - arguments: - - type: "file" - name: "--input" - description: "Path to the (untarred) BCL files. Expects 'RunParameters.xml' at\ - \ './'." - info: null - example: - - "/path/to/bcl" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--sample_sheet" - description: "The path to the sample sheet." - info: null - example: - - "SampleSheet.csv" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - description: "The folder to store the demux results" - info: null - example: - - "/path/to/output" - default: - - "fastqs" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--reports" - description: "Reports directory" - info: null - example: - - "reports_dir" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "bash_script" - path: "script.sh" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Demultiplex raw sequencing data" - usage: "cellranger_mkfastq \\\n --input /path/to/bcl \\\n --sample_sheet SampleSheet.csv\ - \ \\\n --output /path/to/output\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/cellranger_tiny_bcl" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "ghcr.io/data-intuitive/cellranger:6.1" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "docker" - run: - - "apt-get update && apt-get upgrade -y" - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highmem" - - "highcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/demux/cellranger_mkfastq/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/demux/cellranger_mkfastq" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/demux/cellranger_mkfastq/cellranger_mkfastq" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/demux/cellranger_mkfastq/cellranger_mkfastq b/target/docker/demux/cellranger_mkfastq/cellranger_mkfastq deleted file mode 100755 index 785ee6aab85..00000000000 --- a/target/docker/demux/cellranger_mkfastq/cellranger_mkfastq +++ /dev/null @@ -1,1026 +0,0 @@ -#!/usr/bin/env bash - -# cellranger_mkfastq 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Angela Oliveira Pisco (author) -# * Samuel D'Souza (author) -# * Robrecht Cannoodt (author, maintainer) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="cellranger_mkfastq" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "cellranger_mkfastq 0.12.3" - echo "" - echo "Demultiplex raw sequencing data" - echo "" - echo "Usage:" - echo "cellranger_mkfastq \\" - echo " --input /path/to/bcl \\" - echo " --sample_sheet SampleSheet.csv \\" - echo " --output /path/to/output" - echo "" - echo "Arguments:" - echo " --input" - echo " type: file, required parameter, file must exist" - echo " example: /path/to/bcl" - echo " Path to the (untarred) BCL files. Expects 'RunParameters.xml' at './'." - echo "" - echo " --sample_sheet" - echo " type: file, required parameter, file must exist" - echo " example: SampleSheet.csv" - echo " The path to the sample sheet." - echo "" - echo " --output" - echo " type: file, required parameter, output, file must exist" - echo " default: fastqs" - echo " example: /path/to/output" - echo " The folder to store the demux results" - echo "" - echo " --reports" - echo " type: file, output, file must exist" - echo " example: reports_dir" - echo " Reports directory" -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM ghcr.io/data-intuitive/cellranger:6.1 - -ENTRYPOINT [] - - -RUN apt-get update && apt-get upgrade -y -LABEL org.opencontainers.image.authors="Angela Oliveira Pisco, Samuel D'Souza, Robrecht Cannoodt" -LABEL org.opencontainers.image.description="Companion container for running component demux cellranger_mkfastq" -LABEL org.opencontainers.image.created="2024-01-25T10:13:55Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-cellranger_mkfastq-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "cellranger_mkfastq 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --sample_sheet) - [ -n "$VIASH_PAR_SAMPLE_SHEET" ] && ViashError Bad arguments for option \'--sample_sheet\': \'$VIASH_PAR_SAMPLE_SHEET\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SAMPLE_SHEET="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --sample_sheet. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --sample_sheet=*) - [ -n "$VIASH_PAR_SAMPLE_SHEET" ] && ViashError Bad arguments for option \'--sample_sheet=*\': \'$VIASH_PAR_SAMPLE_SHEET\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SAMPLE_SHEET=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --reports) - [ -n "$VIASH_PAR_REPORTS" ] && ViashError Bad arguments for option \'--reports\': \'$VIASH_PAR_REPORTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REPORTS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --reports. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reports=*) - [ -n "$VIASH_PAR_REPORTS" ] && ViashError Bad arguments for option \'--reports=*\': \'$VIASH_PAR_REPORTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REPORTS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/demux_cellranger_mkfastq:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/demux_cellranger_mkfastq:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/demux_cellranger_mkfastq:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/demux_cellranger_mkfastq:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_SAMPLE_SHEET+x} ]; then - ViashError '--sample_sheet' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_SAMPLE_SHEET" ] && [ ! -e "$VIASH_PAR_SAMPLE_SHEET" ]; then - ViashError "Input file '$VIASH_PAR_SAMPLE_SHEET' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi -if [ ! -z "$VIASH_PAR_REPORTS" ] && [ ! -d "$(dirname "$VIASH_PAR_REPORTS")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_REPORTS")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_SAMPLE_SHEET" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_SAMPLE_SHEET")" ) - VIASH_PAR_SAMPLE_SHEET=$(ViashAutodetectMount "$VIASH_PAR_SAMPLE_SHEET") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_PAR_REPORTS" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_REPORTS")" ) - VIASH_PAR_REPORTS=$(ViashAutodetectMount "$VIASH_PAR_REPORTS") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_REPORTS" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/demux_cellranger_mkfastq:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/demux_cellranger_mkfastq:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/demux_cellranger_mkfastq:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-cellranger_mkfastq-XXXXXX").sh -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -#!/bin/bash - -set -eo pipefail - -## VIASH START -# The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) -$( if [ ! -z ${VIASH_PAR_SAMPLE_SHEET+x} ]; then echo "${VIASH_PAR_SAMPLE_SHEET}" | sed "s#'#'\"'\"'#g;s#.*#par_sample_sheet='&'#" ; else echo "# par_sample_sheet="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) -$( if [ ! -z ${VIASH_PAR_REPORTS+x} ]; then echo "${VIASH_PAR_REPORTS}" | sed "s#'#'\"'\"'#g;s#.*#par_reports='&'#" ; else echo "# par_reports="; fi ) -$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) -$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) -$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) -$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) -$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) -$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) - -## VIASH END - -# create temporary directory -tmpdir=\$(mktemp -d "$VIASH_TEMP/\$meta_functionality_name-XXXXXXXX") -function clean_up { - rm -rf "\$tmpdir" -} -trap clean_up EXIT - -# if par_input not is a folder, untar first -if [ ! -d "\$par_input" ]; then - echo "Assuming input is a tar.gz, untarring" - input_dir="\$tmpdir/bcl" - mkdir -p "\$input_dir" - tar -xzf "\$par_input" -C "\$input_dir" --strip-components=1 -else - input_dir="\$par_input" -fi - - -# add additional params -extra_params=( ) - -if [ ! -z "\$meta_cpus" ]; then - extra_params+=( "--localcores=\$meta_cpus" ) -fi -if [ ! -z "\$meta_memory_gb" ]; then - # always keep 2gb for the OS itself - memory_gb=\`python -c "print(int('\$meta_memory_gb') - 2)"\` - extra_params+=( "--localmem=\$memory_gb" ) -fi - - -echo "Running cellranger demux" - -id=myoutput - -cellranger mkfastq \\ - --id "\$id" \\ - --csv "\$par_sample_sheet" \\ - --run "\$par_input" \\ - "\${extra_params[@]}" \\ - --disable-ui \\ - --output-dir "\$par_output" - -# Move reports to their own output location -if [ ! -z "\$par_reports" ]; then - echo "Moving reports its own location" - mv "\$par_output"/Reports "\$par_reports" -else - echo "Leaving reports alone" -fi -VIASHMAIN -bash "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_SAMPLE_SHEET" ]; then - VIASH_PAR_SAMPLE_SHEET=$(ViashStripAutomount "$VIASH_PAR_SAMPLE_SHEET") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_PAR_REPORTS" ]; then - VIASH_PAR_REPORTS=$(ViashStripAutomount "$VIASH_PAR_REPORTS") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_REPORTS" ] && [ ! -e "$VIASH_PAR_REPORTS" ]; then - ViashError "Output file '$VIASH_PAR_REPORTS' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/demux/cellranger_mkfastq/setup_logger.py b/target/docker/demux/cellranger_mkfastq/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/docker/demux/cellranger_mkfastq/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/docker/dimred/pca/.config.vsh.yaml b/target/docker/dimred/pca/.config.vsh.yaml deleted file mode 100644 index 09d7e977c32..00000000000 --- a/target/docker/dimred/pca/.config.vsh.yaml +++ /dev/null @@ -1,253 +0,0 @@ -functionality: - name: "pca" - namespace: "dimred" - version: "0.12.3" - authors: - - name: "Dries De Maeyer" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "ddemaeyer@gmail.com" - github: "ddemaeyer" - linkedin: "dries-de-maeyer-b46a814" - organizations: - - name: "Janssen Pharmaceuticals" - href: "https://www.janssen.com" - role: "Principal Scientist" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input h5mu file" - info: null - example: - - "input.h5mu" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--layer" - description: "Use specified layer for expression values instead of the .X object\ - \ from the modality." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--var_input" - description: "Column name in .var matrix that will be used to select which genes\ - \ to run the PCA on." - info: null - example: - - "filter_with_hvg" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output h5mu file." - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obsm_output" - description: "In which .obsm slot to store the resulting embedding." - info: null - default: - - "X_pca" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--varm_output" - description: "In which .varm slot to store the resulting loadings matrix." - info: null - default: - - "pca_loadings" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--uns_output" - description: "In which .uns slot to store the resulting variance objects." - info: null - default: - - "pca_variance" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--num_components" - description: "Number of principal components to compute. Defaults to 50, or 1\ - \ - minimum dimension size of selected representation." - info: null - example: - - 25 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--overwrite" - description: "Allow overwriting .obsm, .varm and .uns slots." - info: null - direction: "input" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Computes PCA coordinates, loadings and variance decomposition. Uses\ - \ the implementation of scikit-learn [Pedregosa11].\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.9-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "scanpy~=1.9.5" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highcpu" - - "highmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/dimred/pca/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/dimred/pca" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/dimred/pca/pca" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/dimred/pca/pca b/target/docker/dimred/pca/pca deleted file mode 100755 index 028c6e95b8f..00000000000 --- a/target/docker/dimred/pca/pca +++ /dev/null @@ -1,1188 +0,0 @@ -#!/usr/bin/env bash - -# pca 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Dries De Maeyer (maintainer) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="pca" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "pca 0.12.3" - echo "" - echo "Computes PCA coordinates, loadings and variance decomposition. Uses the" - echo "implementation of scikit-learn [Pedregosa11]." - echo "" - echo "Arguments:" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " example: input.h5mu" - echo " Input h5mu file" - echo "" - echo " --modality" - echo " type: string" - echo " default: rna" - echo "" - echo " --layer" - echo " type: string" - echo " Use specified layer for expression values instead of the .X object from" - echo " the modality." - echo "" - echo " --var_input" - echo " type: string" - echo " example: filter_with_hvg" - echo " Column name in .var matrix that will be used to select which genes to" - echo " run the PCA on." - echo "" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " example: output.h5mu" - echo " Output h5mu file." - echo "" - echo " --output_compression" - echo " type: string" - echo " example: gzip" - echo " choices: [ gzip, lzf ]" - echo " The compression format to be used on the output h5mu object." - echo "" - echo " --obsm_output" - echo " type: string" - echo " default: X_pca" - echo " In which .obsm slot to store the resulting embedding." - echo "" - echo " --varm_output" - echo " type: string" - echo " default: pca_loadings" - echo " In which .varm slot to store the resulting loadings matrix." - echo "" - echo " --uns_output" - echo " type: string" - echo " default: pca_variance" - echo " In which .uns slot to store the resulting variance objects." - echo "" - echo " --num_components" - echo " type: integer" - echo " example: 25" - echo " Number of principal components to compute. Defaults to 50, or 1 -" - echo " minimum dimension size of selected representation." - echo "" - echo " --overwrite" - echo " type: boolean_true" - echo " Allow overwriting .obsm, .varm and .uns slots." -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM python:3.9-slim - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "scanpy~=1.9.5" - -LABEL org.opencontainers.image.authors="Dries De Maeyer" -LABEL org.opencontainers.image.description="Companion container for running component dimred pca" -LABEL org.opencontainers.image.created="2024-01-25T10:13:57Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-pca-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "pca 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -i) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality=*) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --layer) - [ -n "$VIASH_PAR_LAYER" ] && ViashError Bad arguments for option \'--layer\': \'$VIASH_PAR_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LAYER="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --layer. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --layer=*) - [ -n "$VIASH_PAR_LAYER" ] && ViashError Bad arguments for option \'--layer=*\': \'$VIASH_PAR_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LAYER=$(ViashRemoveFlags "$1") - shift 1 - ;; - --var_input) - [ -n "$VIASH_PAR_VAR_INPUT" ] && ViashError Bad arguments for option \'--var_input\': \'$VIASH_PAR_VAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_VAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --var_input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --var_input=*) - [ -n "$VIASH_PAR_VAR_INPUT" ] && ViashError Bad arguments for option \'--var_input=*\': \'$VIASH_PAR_VAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_VAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression=*) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --obsm_output) - [ -n "$VIASH_PAR_OBSM_OUTPUT" ] && ViashError Bad arguments for option \'--obsm_output\': \'$VIASH_PAR_OBSM_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBSM_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obsm_output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obsm_output=*) - [ -n "$VIASH_PAR_OBSM_OUTPUT" ] && ViashError Bad arguments for option \'--obsm_output=*\': \'$VIASH_PAR_OBSM_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBSM_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --varm_output) - [ -n "$VIASH_PAR_VARM_OUTPUT" ] && ViashError Bad arguments for option \'--varm_output\': \'$VIASH_PAR_VARM_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_VARM_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --varm_output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --varm_output=*) - [ -n "$VIASH_PAR_VARM_OUTPUT" ] && ViashError Bad arguments for option \'--varm_output=*\': \'$VIASH_PAR_VARM_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_VARM_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --uns_output) - [ -n "$VIASH_PAR_UNS_OUTPUT" ] && ViashError Bad arguments for option \'--uns_output\': \'$VIASH_PAR_UNS_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_UNS_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --uns_output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --uns_output=*) - [ -n "$VIASH_PAR_UNS_OUTPUT" ] && ViashError Bad arguments for option \'--uns_output=*\': \'$VIASH_PAR_UNS_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_UNS_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --num_components) - [ -n "$VIASH_PAR_NUM_COMPONENTS" ] && ViashError Bad arguments for option \'--num_components\': \'$VIASH_PAR_NUM_COMPONENTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_NUM_COMPONENTS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --num_components. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --num_components=*) - [ -n "$VIASH_PAR_NUM_COMPONENTS" ] && ViashError Bad arguments for option \'--num_components=*\': \'$VIASH_PAR_NUM_COMPONENTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_NUM_COMPONENTS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --overwrite) - [ -n "$VIASH_PAR_OVERWRITE" ] && ViashError Bad arguments for option \'--overwrite\': \'$VIASH_PAR_OVERWRITE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OVERWRITE=true - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/dimred_pca:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/dimred_pca:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/dimred_pca:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/dimred_pca:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_MODALITY+x} ]; then - VIASH_PAR_MODALITY="rna" -fi -if [ -z ${VIASH_PAR_OBSM_OUTPUT+x} ]; then - VIASH_PAR_OBSM_OUTPUT="X_pca" -fi -if [ -z ${VIASH_PAR_VARM_OUTPUT+x} ]; then - VIASH_PAR_VARM_OUTPUT="pca_loadings" -fi -if [ -z ${VIASH_PAR_UNS_OUTPUT+x} ]; then - VIASH_PAR_UNS_OUTPUT="pca_variance" -fi -if [ -z ${VIASH_PAR_OVERWRITE+x} ]; then - VIASH_PAR_OVERWRITE="false" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_NUM_COMPONENTS" ]]; then - if ! [[ "$VIASH_PAR_NUM_COMPONENTS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--num_components' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OVERWRITE" ]]; then - if ! [[ "$VIASH_PAR_OVERWRITE" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--overwrite' has to be a boolean_true. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then - VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then - ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/dimred_pca:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/dimred_pca:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/dimred_pca:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-pca-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -import scanpy as sc -import mudata as mu -import sys -from anndata import AnnData - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'layer': $( if [ ! -z ${VIASH_PAR_LAYER+x} ]; then echo "r'${VIASH_PAR_LAYER//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'var_input': $( if [ ! -z ${VIASH_PAR_VAR_INPUT+x} ]; then echo "r'${VIASH_PAR_VAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'obsm_output': $( if [ ! -z ${VIASH_PAR_OBSM_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'varm_output': $( if [ ! -z ${VIASH_PAR_VARM_OUTPUT+x} ]; then echo "r'${VIASH_PAR_VARM_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'uns_output': $( if [ ! -z ${VIASH_PAR_UNS_OUTPUT+x} ]; then echo "r'${VIASH_PAR_UNS_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'num_components': $( if [ ! -z ${VIASH_PAR_NUM_COMPONENTS+x} ]; then echo "int(r'${VIASH_PAR_NUM_COMPONENTS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'overwrite': $( if [ ! -z ${VIASH_PAR_OVERWRITE+x} ]; then echo "r'${VIASH_PAR_OVERWRITE//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -## VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -logger.info("Reading %s.", par["input"]) -mdata = mu.read_h5mu(par["input"]) - -logger.info("Computing PCA components for modality '%s'", par['modality']) -data = mdata.mod[par['modality']] -if par['layer'] and par['layer'] not in data.layers: - raise ValueError(f"{par['layer']} was not found in modality {par['modality']}.") -layer = data.X if not par['layer'] else data.layers[par['layer']] -adata_input_layer = AnnData(layer) -adata_input_layer.var.index = data.var.index - -use_highly_variable = False -if par["var_input"]: - if not par["var_input"] in data.var.columns: - raise ValueError(f"Requested to use .var column {par['var_input']} " - "as a selection of genes to run the PCA on, " - f"but the column is not available for modality {par['modality']}") - use_highly_variable = True - adata_input_layer.var['highly_variable'] = data.var[par["var_input"]] - -# run pca -output_adata = sc.tl.pca( - adata_input_layer, - n_comps=par["num_components"], - copy=True, - use_highly_variable=use_highly_variable -) - -# store output in specific objects - -check_exist_dict = { - "obsm_output": ("obs"), - "varm_output": ("varm"), - "uns_output": ("uns") -} -for parameter_name, field in check_exist_dict.items(): - if par[parameter_name] in getattr(data, field): - if not par["overwrite"]: - raise ValueError(f"Requested to create field {par[parameter_name]} in .{field} " - f"for modality {par['modality']}, but field already exists.") - del getattr(data, field)[par[parameter_name]] - -data.obsm[par["obsm_output"]] = output_adata.obsm['X_pca'] -data.varm[par["varm_output"]] = output_adata.varm['PCs'] -data.uns[par["uns_output"]] = { "variance": output_adata.uns['pca']['variance'], - "variance_ratio": output_adata.uns['pca']['variance_ratio'] } - - -logger.info("Writing to %s.", par["output"]) -mdata.write_h5mu(filename=par["output"], compression=par["output_compression"]) - -logger.info("Finished") -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/dimred/pca/setup_logger.py b/target/docker/dimred/pca/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/docker/dimred/pca/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/docker/dimred/umap/.config.vsh.yaml b/target/docker/dimred/umap/.config.vsh.yaml deleted file mode 100644 index c67e3e8cc0b..00000000000 --- a/target/docker/dimred/umap/.config.vsh.yaml +++ /dev/null @@ -1,312 +0,0 @@ -functionality: - name: "umap" - namespace: "dimred" - version: "0.12.3" - authors: - - name: "Dries De Maeyer" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "ddemaeyer@gmail.com" - github: "ddemaeyer" - linkedin: "dries-de-maeyer-b46a814" - organizations: - - name: "Janssen Pharmaceuticals" - href: "https://www.janssen.com" - role: "Principal Scientist" - argument_groups: - - name: "Inputs" - arguments: - - type: "file" - name: "--input" - description: "Input h5mu file" - info: null - example: - - "input.h5mu" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--uns_neighbors" - description: "The `.uns` neighbors slot as output by the `find_neighbors` component." - info: null - default: - - "neighbors" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Outputs" - arguments: - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output h5mu file." - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obsm_output" - description: "The pre/postfix under which to store the UMAP results." - info: null - default: - - "umap" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Arguments" - arguments: - - type: "double" - name: "--min_dist" - description: "The effective minimum distance between embedded points. Smaller\ - \ values will result in a more clustered/clumped embedding where nearby points\ - \ on the manifold are drawn closer together, while larger values will result\ - \ on a more even dispersal of points. The value should be set relative to\ - \ the spread value, which determines the scale at which embedded points will\ - \ be spread out." - info: null - default: - - 0.5 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--spread" - description: "The effective scale of embedded points. In combination with `min_dist`\ - \ this determines how clustered/clumped the embedded points are." - info: null - default: - - 1.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--num_components" - description: "The number of dimensions of the embedding." - info: null - default: - - 2 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--max_iter" - description: "The number of iterations (epochs) of the optimization. Called\ - \ `n_epochs` in the original UMAP. Default is set to 500 if neighbors['connectivities'].shape[0]\ - \ <= 10000, else 200." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--alpha" - description: "The initial learning rate for the embedding optimization." - info: null - default: - - 1.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--gamma" - description: "Weighting applied to negative samples in low dimensional embedding\ - \ optimization. Values higher than one will result in greater weight being\ - \ given to negative samples." - info: null - default: - - 1.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--negative_sample_rate" - description: "The number of negative edge/1-simplex samples to use per positive\ - \ edge/1-simplex sample in optimizing the low dimensional embedding." - info: null - default: - - 5 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--init_pos" - description: "How to initialize the low dimensional embedding. Called `init`\ - \ in the original UMAP. Options are:\n\n* Any key from `.obsm`\n* `'paga'`:\ - \ positions from `paga()`\n* `'spectral'`: use a spectral embedding of the\ - \ graph\n* `'random'`: assign initial embedding positions at random.\n" - info: null - default: - - "spectral" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "UMAP (Uniform Manifold Approximation and Projection) is a manifold\ - \ learning technique suitable for visualizing high-dimensional data. Besides tending\ - \ to be faster than tSNE, it optimizes the embedding such that it best reflects\ - \ the topology of the data, which we represent throughout Scanpy using a neighborhood\ - \ graph. tSNE, by contrast, optimizes the distribution of nearest-neighbor distances\ - \ in the embedding such that these best match the distribution of distances in\ - \ the high-dimensional space. We use the implementation of umap-learn [McInnes18].\ - \ For a few comparisons of UMAP with tSNE, see this preprint.\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.9-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "scanpy~=1.9.5" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highcpu" - - "midmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/dimred/umap/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/dimred/umap" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/dimred/umap/umap" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/dimred/umap/setup_logger.py b/target/docker/dimred/umap/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/docker/dimred/umap/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/docker/dimred/umap/umap b/target/docker/dimred/umap/umap deleted file mode 100755 index 7b88be04993..00000000000 --- a/target/docker/dimred/umap/umap +++ /dev/null @@ -1,1306 +0,0 @@ -#!/usr/bin/env bash - -# umap 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Dries De Maeyer (maintainer) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="umap" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "umap 0.12.3" - echo "" - echo "UMAP (Uniform Manifold Approximation and Projection) is a manifold learning" - echo "technique suitable for visualizing high-dimensional data. Besides tending to be" - echo "faster than tSNE, it optimizes the embedding such that it best reflects the" - echo "topology of the data, which we represent throughout Scanpy using a neighborhood" - echo "graph. tSNE, by contrast, optimizes the distribution of nearest-neighbor" - echo "distances in the embedding such that these best match the distribution of" - echo "distances in the high-dimensional space. We use the implementation of umap-learn" - echo "[McInnes18]. For a few comparisons of UMAP with tSNE, see this preprint." - echo "" - echo "Inputs:" - echo " --input" - echo " type: file, required parameter, file must exist" - echo " example: input.h5mu" - echo " Input h5mu file" - echo "" - echo " --modality" - echo " type: string" - echo " default: rna" - echo "" - echo " --uns_neighbors" - echo " type: string" - echo " default: neighbors" - echo " The \`.uns\` neighbors slot as output by the \`find_neighbors\` component." - echo "" - echo "Outputs:" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " example: output.h5mu" - echo " Output h5mu file." - echo "" - echo " --output_compression" - echo " type: string" - echo " example: gzip" - echo " choices: [ gzip, lzf ]" - echo " The compression format to be used on the output h5mu object." - echo "" - echo " --obsm_output" - echo " type: string" - echo " default: umap" - echo " The pre/postfix under which to store the UMAP results." - echo "" - echo "Arguments:" - echo " --min_dist" - echo " type: double" - echo " default: 0.5" - echo " The effective minimum distance between embedded points. Smaller values" - echo " will result in a more clustered/clumped embedding where nearby points on" - echo " the manifold are drawn closer together, while larger values will result" - echo " on a more even dispersal of points. The value should be set relative to" - echo " the spread value, which determines the scale at which embedded points" - echo " will be spread out." - echo "" - echo " --spread" - echo " type: double" - echo " default: 1.0" - echo " The effective scale of embedded points. In combination with \`min_dist\`" - echo " this determines how clustered/clumped the embedded points are." - echo "" - echo " --num_components" - echo " type: integer" - echo " default: 2" - echo " The number of dimensions of the embedding." - echo "" - echo " --max_iter" - echo " type: integer" - echo " The number of iterations (epochs) of the optimization. Called \`n_epochs\`" - echo " in the original UMAP. Default is set to 500 if" - echo " neighbors['connectivities'].shape[0] <= 10000, else 200." - echo "" - echo " --alpha" - echo " type: double" - echo " default: 1.0" - echo " The initial learning rate for the embedding optimization." - echo "" - echo " --gamma" - echo " type: double" - echo " default: 1.0" - echo " Weighting applied to negative samples in low dimensional embedding" - echo " optimization. Values higher than one will result in greater weight being" - echo " given to negative samples." - echo "" - echo " --negative_sample_rate" - echo " type: integer" - echo " default: 5" - echo " The number of negative edge/1-simplex samples to use per positive" - echo " edge/1-simplex sample in optimizing the low dimensional embedding." - echo "" - echo " --init_pos" - echo " type: string" - echo " default: spectral" - echo " How to initialize the low dimensional embedding. Called \`init\` in the" - echo " original UMAP. Options are:" - echo " * Any key from \`.obsm\`" - echo " * \`'paga'\`: positions from \`paga()\`" - echo " * \`'spectral'\`: use a spectral embedding of the graph" - echo " * \`'random'\`: assign initial embedding positions at random." -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM python:3.9-slim - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "scanpy~=1.9.5" - -LABEL org.opencontainers.image.authors="Dries De Maeyer" -LABEL org.opencontainers.image.description="Companion container for running component dimred umap" -LABEL org.opencontainers.image.created="2024-01-25T10:13:57Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-umap-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "umap 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --modality) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality=*) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --uns_neighbors) - [ -n "$VIASH_PAR_UNS_NEIGHBORS" ] && ViashError Bad arguments for option \'--uns_neighbors\': \'$VIASH_PAR_UNS_NEIGHBORS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_UNS_NEIGHBORS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --uns_neighbors. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --uns_neighbors=*) - [ -n "$VIASH_PAR_UNS_NEIGHBORS" ] && ViashError Bad arguments for option \'--uns_neighbors=*\': \'$VIASH_PAR_UNS_NEIGHBORS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_UNS_NEIGHBORS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression=*) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --obsm_output) - [ -n "$VIASH_PAR_OBSM_OUTPUT" ] && ViashError Bad arguments for option \'--obsm_output\': \'$VIASH_PAR_OBSM_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBSM_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obsm_output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obsm_output=*) - [ -n "$VIASH_PAR_OBSM_OUTPUT" ] && ViashError Bad arguments for option \'--obsm_output=*\': \'$VIASH_PAR_OBSM_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBSM_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --min_dist) - [ -n "$VIASH_PAR_MIN_DIST" ] && ViashError Bad arguments for option \'--min_dist\': \'$VIASH_PAR_MIN_DIST\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_DIST="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_dist. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --min_dist=*) - [ -n "$VIASH_PAR_MIN_DIST" ] && ViashError Bad arguments for option \'--min_dist=*\': \'$VIASH_PAR_MIN_DIST\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_DIST=$(ViashRemoveFlags "$1") - shift 1 - ;; - --spread) - [ -n "$VIASH_PAR_SPREAD" ] && ViashError Bad arguments for option \'--spread\': \'$VIASH_PAR_SPREAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SPREAD="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --spread. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --spread=*) - [ -n "$VIASH_PAR_SPREAD" ] && ViashError Bad arguments for option \'--spread=*\': \'$VIASH_PAR_SPREAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SPREAD=$(ViashRemoveFlags "$1") - shift 1 - ;; - --num_components) - [ -n "$VIASH_PAR_NUM_COMPONENTS" ] && ViashError Bad arguments for option \'--num_components\': \'$VIASH_PAR_NUM_COMPONENTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_NUM_COMPONENTS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --num_components. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --num_components=*) - [ -n "$VIASH_PAR_NUM_COMPONENTS" ] && ViashError Bad arguments for option \'--num_components=*\': \'$VIASH_PAR_NUM_COMPONENTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_NUM_COMPONENTS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --max_iter) - [ -n "$VIASH_PAR_MAX_ITER" ] && ViashError Bad arguments for option \'--max_iter\': \'$VIASH_PAR_MAX_ITER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAX_ITER="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --max_iter. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --max_iter=*) - [ -n "$VIASH_PAR_MAX_ITER" ] && ViashError Bad arguments for option \'--max_iter=*\': \'$VIASH_PAR_MAX_ITER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAX_ITER=$(ViashRemoveFlags "$1") - shift 1 - ;; - --alpha) - [ -n "$VIASH_PAR_ALPHA" ] && ViashError Bad arguments for option \'--alpha\': \'$VIASH_PAR_ALPHA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALPHA="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --alpha. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --alpha=*) - [ -n "$VIASH_PAR_ALPHA" ] && ViashError Bad arguments for option \'--alpha=*\': \'$VIASH_PAR_ALPHA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALPHA=$(ViashRemoveFlags "$1") - shift 1 - ;; - --gamma) - [ -n "$VIASH_PAR_GAMMA" ] && ViashError Bad arguments for option \'--gamma\': \'$VIASH_PAR_GAMMA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_GAMMA="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --gamma. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --gamma=*) - [ -n "$VIASH_PAR_GAMMA" ] && ViashError Bad arguments for option \'--gamma=*\': \'$VIASH_PAR_GAMMA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_GAMMA=$(ViashRemoveFlags "$1") - shift 1 - ;; - --negative_sample_rate) - [ -n "$VIASH_PAR_NEGATIVE_SAMPLE_RATE" ] && ViashError Bad arguments for option \'--negative_sample_rate\': \'$VIASH_PAR_NEGATIVE_SAMPLE_RATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_NEGATIVE_SAMPLE_RATE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --negative_sample_rate. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --negative_sample_rate=*) - [ -n "$VIASH_PAR_NEGATIVE_SAMPLE_RATE" ] && ViashError Bad arguments for option \'--negative_sample_rate=*\': \'$VIASH_PAR_NEGATIVE_SAMPLE_RATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_NEGATIVE_SAMPLE_RATE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --init_pos) - [ -n "$VIASH_PAR_INIT_POS" ] && ViashError Bad arguments for option \'--init_pos\': \'$VIASH_PAR_INIT_POS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INIT_POS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --init_pos. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --init_pos=*) - [ -n "$VIASH_PAR_INIT_POS" ] && ViashError Bad arguments for option \'--init_pos=*\': \'$VIASH_PAR_INIT_POS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INIT_POS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/dimred_umap:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/dimred_umap:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/dimred_umap:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/dimred_umap:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_MODALITY+x} ]; then - VIASH_PAR_MODALITY="rna" -fi -if [ -z ${VIASH_PAR_UNS_NEIGHBORS+x} ]; then - VIASH_PAR_UNS_NEIGHBORS="neighbors" -fi -if [ -z ${VIASH_PAR_OBSM_OUTPUT+x} ]; then - VIASH_PAR_OBSM_OUTPUT="umap" -fi -if [ -z ${VIASH_PAR_MIN_DIST+x} ]; then - VIASH_PAR_MIN_DIST="0.5" -fi -if [ -z ${VIASH_PAR_SPREAD+x} ]; then - VIASH_PAR_SPREAD="1.0" -fi -if [ -z ${VIASH_PAR_NUM_COMPONENTS+x} ]; then - VIASH_PAR_NUM_COMPONENTS="2" -fi -if [ -z ${VIASH_PAR_ALPHA+x} ]; then - VIASH_PAR_ALPHA="1.0" -fi -if [ -z ${VIASH_PAR_GAMMA+x} ]; then - VIASH_PAR_GAMMA="1.0" -fi -if [ -z ${VIASH_PAR_NEGATIVE_SAMPLE_RATE+x} ]; then - VIASH_PAR_NEGATIVE_SAMPLE_RATE="5" -fi -if [ -z ${VIASH_PAR_INIT_POS+x} ]; then - VIASH_PAR_INIT_POS="spectral" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_MIN_DIST" ]]; then - if ! [[ "$VIASH_PAR_MIN_DIST" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--min_dist' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SPREAD" ]]; then - if ! [[ "$VIASH_PAR_SPREAD" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--spread' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_NUM_COMPONENTS" ]]; then - if ! [[ "$VIASH_PAR_NUM_COMPONENTS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--num_components' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_MAX_ITER" ]]; then - if ! [[ "$VIASH_PAR_MAX_ITER" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--max_iter' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_ALPHA" ]]; then - if ! [[ "$VIASH_PAR_ALPHA" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--alpha' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_GAMMA" ]]; then - if ! [[ "$VIASH_PAR_GAMMA" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--gamma' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_NEGATIVE_SAMPLE_RATE" ]]; then - if ! [[ "$VIASH_PAR_NEGATIVE_SAMPLE_RATE" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--negative_sample_rate' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then - VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then - ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/dimred_umap:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/dimred_umap:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/dimred_umap:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-umap-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -import scanpy as sc -import mudata as mu -import sys -import anndata as ad - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'uns_neighbors': $( if [ ! -z ${VIASH_PAR_UNS_NEIGHBORS+x} ]; then echo "r'${VIASH_PAR_UNS_NEIGHBORS//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'obsm_output': $( if [ ! -z ${VIASH_PAR_OBSM_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'min_dist': $( if [ ! -z ${VIASH_PAR_MIN_DIST+x} ]; then echo "float(r'${VIASH_PAR_MIN_DIST//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'spread': $( if [ ! -z ${VIASH_PAR_SPREAD+x} ]; then echo "float(r'${VIASH_PAR_SPREAD//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'num_components': $( if [ ! -z ${VIASH_PAR_NUM_COMPONENTS+x} ]; then echo "int(r'${VIASH_PAR_NUM_COMPONENTS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'max_iter': $( if [ ! -z ${VIASH_PAR_MAX_ITER+x} ]; then echo "int(r'${VIASH_PAR_MAX_ITER//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'alpha': $( if [ ! -z ${VIASH_PAR_ALPHA+x} ]; then echo "float(r'${VIASH_PAR_ALPHA//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'gamma': $( if [ ! -z ${VIASH_PAR_GAMMA+x} ]; then echo "float(r'${VIASH_PAR_GAMMA//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'negative_sample_rate': $( if [ ! -z ${VIASH_PAR_NEGATIVE_SAMPLE_RATE+x} ]; then echo "int(r'${VIASH_PAR_NEGATIVE_SAMPLE_RATE//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'init_pos': $( if [ ! -z ${VIASH_PAR_INIT_POS+x} ]; then echo "r'${VIASH_PAR_INIT_POS//\'/\'\"\'\"r\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -## VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -logger.info("Reading %s", par["input"]) -mdata = mu.read_h5mu(par["input"]) - -logger.info("Computing UMAP for modality '%s'", par['modality']) -data = mdata.mod[par['modality']] - -if par['uns_neighbors'] not in data.uns: - raise ValueError(f"'{par['uns_neighbors']}' was not found in .mod['{par['modality']}'].uns.") - -# create temporary AnnData -# ... because sc.tl.umap doesn't allow to choose -# the obsm output slot -# ... also we can see scanpy is a data format dependency hell -neigh_key = par["uns_neighbors"] -temp_uns = { neigh_key: data.uns[neigh_key] } -conn_key = temp_uns[neigh_key]['connectivities_key'] -dist_key = temp_uns[neigh_key]['distances_key'] -temp_obsp = { - conn_key: data.obsp[conn_key], - dist_key: data.obsp[dist_key], -} -pca_key = temp_uns[neigh_key]['params']['use_rep'] -temp_obsm = { - pca_key: data.obsm[pca_key] -} - -temp_adata = ad.AnnData( - obsm=temp_obsm, - obsp=temp_obsp, - uns=temp_uns, - shape=data.shape -) - -sc.tl.umap( - temp_adata, - min_dist=par["min_dist"], - spread=par["spread"], - n_components=par["num_components"], - maxiter=par["max_iter"], - alpha=par["alpha"], - gamma=par["gamma"], - negative_sample_rate=par["negative_sample_rate"], - init_pos=par["init_pos"], - neighbors_key=neigh_key -) - -data.obsm[par['obsm_output']] = temp_adata.obsm['X_umap'] - -logger.info("Writing to %s.", par["output"]) -mdata.write_h5mu(filename=par["output"], compression=par["output_compression"]) - -logger.info("Finished") -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/download/download_file/.config.vsh.yaml b/target/docker/download/download_file/.config.vsh.yaml deleted file mode 100644 index 86a0c8b5142..00000000000 --- a/target/docker/download/download_file/.config.vsh.yaml +++ /dev/null @@ -1,138 +0,0 @@ -functionality: - name: "download_file" - namespace: "download" - version: "0.12.3" - authors: - - name: "Robrecht Cannoodt" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - arguments: - - type: "string" - name: "--input" - description: "URL to a file to download." - info: null - example: - - "https://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_1k_protein_v3/pbmc_1k_protein_v3_raw_feature_bc_matrix.h5" - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - description: "Path where to store output." - info: null - example: - - "pbmc_1k_protein_v3_raw_feature_bc_matrix.h5" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--verbose" - alternatives: - - "-v" - description: "Increase verbosity" - info: null - direction: "input" - dest: "par" - resources: - - type: "bash_script" - path: "script.sh" - is_executable: true - description: "Download a file.\n" - usage: "download_file \\\n --input https://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_1k_protein_v3/pbmc_1k_protein_v3_raw_feature_bc_matrix.h5\ - \ \\\n --output output_rna.h5\n" - test_resources: - - type: "bash_script" - path: "run_test.sh" - is_executable: true - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "bash:5.1.16" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/download/download_file/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/download/download_file" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/download/download_file/download_file" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/download/download_file/download_file b/target/docker/download/download_file/download_file deleted file mode 100755 index c95c6155173..00000000000 --- a/target/docker/download/download_file/download_file +++ /dev/null @@ -1,931 +0,0 @@ -#!/usr/bin/env bash - -# download_file 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Robrecht Cannoodt (maintainer) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="download_file" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "download_file 0.12.3" - echo "" - echo "Download a file." - echo "" - echo "Usage:" - echo "download_file \\" - echo " --input" - echo "https://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_1k_protein_v3/pbmc_1k_protein_v3_raw_feature_bc_matrix.h5" - echo "\\" - echo " --output output_rna.h5" - echo "" - echo "Arguments:" - echo " --input" - echo " type: string, required parameter" - echo " example:" - echo "https://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_1k_protein_v3/pbmc_1k_protein_v3_raw_feature_bc_matrix.h5" - echo " URL to a file to download." - echo "" - echo " --output" - echo " type: file, required parameter, output, file must exist" - echo " example: pbmc_1k_protein_v3_raw_feature_bc_matrix.h5" - echo " Path where to store output." - echo "" - echo " -v, --verbose" - echo " type: boolean_true" - echo " Increase verbosity" -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM bash:5.1.16 - -ENTRYPOINT [] - - -RUN : -LABEL org.opencontainers.image.authors="Robrecht Cannoodt" -LABEL org.opencontainers.image.description="Companion container for running component download download_file" -LABEL org.opencontainers.image.created="2024-01-25T10:13:56Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-download_file-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "download_file 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --verbose) - [ -n "$VIASH_PAR_VERBOSE" ] && ViashError Bad arguments for option \'--verbose\': \'$VIASH_PAR_VERBOSE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_VERBOSE=true - shift 1 - ;; - -v) - [ -n "$VIASH_PAR_VERBOSE" ] && ViashError Bad arguments for option \'-v\': \'$VIASH_PAR_VERBOSE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_VERBOSE=true - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/download_download_file:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/download_download_file:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/download_download_file:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/download_download_file:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_VERBOSE+x} ]; then - VIASH_PAR_VERBOSE="false" -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_VERBOSE" ]]; then - if ! [[ "$VIASH_PAR_VERBOSE" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--verbose' has to be a boolean_true. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/download_download_file:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/download_download_file:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/download_download_file:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-download_file-XXXXXX").sh -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -#!/bin/bash - -set -eo pipefail - -## VIASH START -# The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) -$( if [ ! -z ${VIASH_PAR_VERBOSE+x} ]; then echo "${VIASH_PAR_VERBOSE}" | sed "s#'#'\"'\"'#g;s#.*#par_verbose='&'#" ; else echo "# par_verbose="; fi ) -$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) -$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) -$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) -$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) -$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) -$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) - -## VIASH END - -extra_params=() - -if [ "\$par_verbose" != "true" ]; then - extra_params+=("--quiet") -fi - -wget "\$par_input" -O "\$par_output" "\${extra_params[@]}" -VIASHMAIN -bash "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/download/sync_test_resources/.config.vsh.yaml b/target/docker/download/sync_test_resources/.config.vsh.yaml deleted file mode 100644 index c6815328d22..00000000000 --- a/target/docker/download/sync_test_resources/.config.vsh.yaml +++ /dev/null @@ -1,170 +0,0 @@ -functionality: - name: "sync_test_resources" - namespace: "download" - version: "0.12.3" - authors: - - name: "Robrecht Cannoodt" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - arguments: - - type: "string" - name: "--input" - alternatives: - - "-i" - description: "Path to the S3 bucket to sync from." - info: null - default: - - "s3://openpipelines-data" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Path to the test resource directory." - info: null - default: - - "resources_test" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--quiet" - description: "Displays the operations that would be performed using the specified\ - \ command without actually running them." - info: null - direction: "input" - dest: "par" - - type: "boolean_true" - name: "--dryrun" - description: "Does not display the operations performed from the specified command." - info: null - direction: "input" - dest: "par" - - type: "boolean_true" - name: "--delete" - description: "Files that exist in the destination but not in the source are deleted\ - \ during sync." - info: null - direction: "input" - dest: "par" - - type: "string" - name: "--exclude" - description: "Exclude all files or objects from the command that matches the specified\ - \ pattern." - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - resources: - - type: "bash_script" - path: "script.sh" - is_executable: true - description: "Synchronise the test resources from s3://openpipelines-data to resources_test" - usage: "sync_test_resources\nsync_test_resources --input s3://openpipelines-data\ - \ --output resources_test\n" - test_resources: - - type: "bash_script" - path: "run_test.sh" - is_executable: true - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "amazon/aws-cli:2.11.0" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "yum" - packages: - - "procps" - entrypoint: [] - cmd: null -- type: "native" - id: "native" -- type: "nextflow" - id: "nextflow" - directives: - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/download/sync_test_resources/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/download/sync_test_resources" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/download/sync_test_resources/sync_test_resources" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/download/sync_test_resources/sync_test_resources b/target/docker/download/sync_test_resources/sync_test_resources deleted file mode 100755 index 16ef6e1ce1c..00000000000 --- a/target/docker/download/sync_test_resources/sync_test_resources +++ /dev/null @@ -1,1018 +0,0 @@ -#!/usr/bin/env bash - -# sync_test_resources 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Robrecht Cannoodt (maintainer) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="sync_test_resources" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "sync_test_resources 0.12.3" - echo "" - echo "Synchronise the test resources from s3://openpipelines-data to resources_test" - echo "" - echo "Usage:" - echo "sync_test_resources" - echo "sync_test_resources --input s3://openpipelines-data --output resources_test" - echo "" - echo "Arguments:" - echo " -i, --input" - echo " type: string" - echo " default: s3://openpipelines-data" - echo " Path to the S3 bucket to sync from." - echo "" - echo " -o, --output" - echo " type: file, output, file must exist" - echo " default: resources_test" - echo " Path to the test resource directory." - echo "" - echo " --quiet" - echo " type: boolean_true" - echo " Displays the operations that would be performed using the specified" - echo " command without actually running them." - echo "" - echo " --dryrun" - echo " type: boolean_true" - echo " Does not display the operations performed from the specified command." - echo "" - echo " --delete" - echo " type: boolean_true" - echo " Files that exist in the destination but not in the source are deleted" - echo " during sync." - echo "" - echo " --exclude" - echo " type: string, multiple values allowed" - echo " Exclude all files or objects from the command that matches the specified" - echo " pattern." -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM amazon/aws-cli:2.11.0 - -ENTRYPOINT [] - - -RUN yum install -y procps && \ - yum clean all && \ - rm -rf /var/cache/yum - -LABEL org.opencontainers.image.authors="Robrecht Cannoodt" -LABEL org.opencontainers.image.description="Companion container for running component download sync_test_resources" -LABEL org.opencontainers.image.created="2024-01-25T10:13:55Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-sync_test_resources-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "sync_test_resources 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -i) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --quiet) - [ -n "$VIASH_PAR_QUIET" ] && ViashError Bad arguments for option \'--quiet\': \'$VIASH_PAR_QUIET\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_QUIET=true - shift 1 - ;; - --dryrun) - [ -n "$VIASH_PAR_DRYRUN" ] && ViashError Bad arguments for option \'--dryrun\': \'$VIASH_PAR_DRYRUN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_DRYRUN=true - shift 1 - ;; - --delete) - [ -n "$VIASH_PAR_DELETE" ] && ViashError Bad arguments for option \'--delete\': \'$VIASH_PAR_DELETE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_DELETE=true - shift 1 - ;; - --exclude) - if [ -z "$VIASH_PAR_EXCLUDE" ]; then - VIASH_PAR_EXCLUDE="$2" - else - VIASH_PAR_EXCLUDE="$VIASH_PAR_EXCLUDE:""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --exclude. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --exclude=*) - if [ -z "$VIASH_PAR_EXCLUDE" ]; then - VIASH_PAR_EXCLUDE=$(ViashRemoveFlags "$1") - else - VIASH_PAR_EXCLUDE="$VIASH_PAR_EXCLUDE:"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/download_sync_test_resources:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/download_sync_test_resources:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/download_sync_test_resources:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/download_sync_test_resources:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_INPUT+x} ]; then - VIASH_PAR_INPUT="s3://openpipelines-data" -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - VIASH_PAR_OUTPUT="resources_test" -fi -if [ -z ${VIASH_PAR_QUIET+x} ]; then - VIASH_PAR_QUIET="false" -fi -if [ -z ${VIASH_PAR_DRYRUN+x} ]; then - VIASH_PAR_DRYRUN="false" -fi -if [ -z ${VIASH_PAR_DELETE+x} ]; then - VIASH_PAR_DELETE="false" -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_QUIET" ]]; then - if ! [[ "$VIASH_PAR_QUIET" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--quiet' has to be a boolean_true. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_DRYRUN" ]]; then - if ! [[ "$VIASH_PAR_DRYRUN" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--dryrun' has to be a boolean_true. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_DELETE" ]]; then - if ! [[ "$VIASH_PAR_DELETE" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--delete' has to be a boolean_true. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/download_sync_test_resources:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/download_sync_test_resources:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/download_sync_test_resources:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-sync_test_resources-XXXXXX").sh -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -#!/bin/bash - -set -eo pipefail - -## VIASH START -# The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) -$( if [ ! -z ${VIASH_PAR_QUIET+x} ]; then echo "${VIASH_PAR_QUIET}" | sed "s#'#'\"'\"'#g;s#.*#par_quiet='&'#" ; else echo "# par_quiet="; fi ) -$( if [ ! -z ${VIASH_PAR_DRYRUN+x} ]; then echo "${VIASH_PAR_DRYRUN}" | sed "s#'#'\"'\"'#g;s#.*#par_dryrun='&'#" ; else echo "# par_dryrun="; fi ) -$( if [ ! -z ${VIASH_PAR_DELETE+x} ]; then echo "${VIASH_PAR_DELETE}" | sed "s#'#'\"'\"'#g;s#.*#par_delete='&'#" ; else echo "# par_delete="; fi ) -$( if [ ! -z ${VIASH_PAR_EXCLUDE+x} ]; then echo "${VIASH_PAR_EXCLUDE}" | sed "s#'#'\"'\"'#g;s#.*#par_exclude='&'#" ; else echo "# par_exclude="; fi ) -$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) -$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) -$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) -$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) -$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) -$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) - -## VIASH END - -extra_params=( ) - -if [ "\$par_quiet" == "true" ]; then - extra_params+=( "--quiet" ) -fi -if [ "\$par_dryrun" == "true" ]; then - extra_params+=( "--dryrun" ) -fi -if [ "\$par_delete" == "true" ]; then - extra_params+=( "--delete" ) -fi - -if [ ! -z \${par_exclude+x} ]; then - IFS=":" - for var in \$par_exclude; do - unset IFS - extra_params+=( "--exclude" "\$var" ) - done -fi - - -# Disable the use of the Amazon EC2 instance metadata service (IMDS). -# see https://florian.ec/blog/github-actions-awscli-errors/ -# or https://github.com/aws/aws-cli/issues/5234#issuecomment-705831465 -export AWS_EC2_METADATA_DISABLED=true - -aws s3 sync "\$par_input" "\$par_output" --no-sign-request "\${extra_params[@]}" -VIASHMAIN -bash "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/files/make_params/.config.vsh.yaml b/target/docker/files/make_params/.config.vsh.yaml deleted file mode 100644 index 8b3297655b7..00000000000 --- a/target/docker/files/make_params/.config.vsh.yaml +++ /dev/null @@ -1,220 +0,0 @@ -functionality: - name: "make_params" - namespace: "files" - version: "0.12.3" - authors: - - name: "Angela Oliveira Pisco" - roles: - - "author" - info: - role: "Contributor" - links: - github: "aopisco" - orcid: "0000-0003-0142-2355" - linkedin: "aopisco" - organizations: - - name: "Insitro" - href: "https://insitro.com" - role: "Director of Computational Biology" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - - name: "Robrecht Cannoodt" - roles: - - "maintainer" - - "author" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - arguments: - - type: "file" - name: "--base_dir" - description: "Base directory to search recursively" - info: null - example: - - "/path/to/dir" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--pattern" - description: "An optional regular expression. Only file names which match the\ - \ regular expression will be matched." - info: null - example: - - "*.fastq.gz" - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--n_dirname_drop" - description: "For every matched file, the parent directory will be traversed N\ - \ times." - info: null - default: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--n_basename_id" - description: "The unique identifiers will consist of at least N dirnames." - info: null - default: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--id_name" - description: "The name for storing the identifier field in the yaml." - info: null - default: - - "id" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--path_name" - description: "The name for storing the path field in the yaml." - info: null - default: - - "path" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--group_name" - description: "Top level name for the group of entries." - info: null - example: - - "param_list" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - description: "Output YAML file." - info: null - example: - - "params.yaml" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "r_script" - path: "script.R" - is_executable: true - description: "Looks for files in a directory and turn it in a params file." - test_resources: - - type: "bash_script" - path: "test_make_params.sh" - is_executable: true - - type: "file" - path: "../../../src" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "ghcr.io/data-intuitive/randpy:r4.0" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "singlecpu" - - "lowmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/files/make_params/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/files/make_params" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/files/make_params/make_params" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/files/make_params/make_params b/target/docker/files/make_params/make_params deleted file mode 100755 index c59e9ee4a20..00000000000 --- a/target/docker/files/make_params/make_params +++ /dev/null @@ -1,1100 +0,0 @@ -#!/usr/bin/env bash - -# make_params 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Angela Oliveira Pisco (author) -# * Robrecht Cannoodt (maintainer, author) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="make_params" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "make_params 0.12.3" - echo "" - echo "Looks for files in a directory and turn it in a params file." - echo "" - echo "Arguments:" - echo " --base_dir" - echo " type: file, required parameter, file must exist" - echo " example: /path/to/dir" - echo " Base directory to search recursively" - echo "" - echo " --pattern" - echo " type: string, required parameter" - echo " example: *.fastq.gz" - echo " An optional regular expression. Only file names which match the regular" - echo " expression will be matched." - echo "" - echo " --n_dirname_drop" - echo " type: integer" - echo " default: 0" - echo " For every matched file, the parent directory will be traversed N times." - echo "" - echo " --n_basename_id" - echo " type: integer" - echo " default: 0" - echo " The unique identifiers will consist of at least N dirnames." - echo "" - echo " --id_name" - echo " type: string" - echo " default: id" - echo " The name for storing the identifier field in the yaml." - echo "" - echo " --path_name" - echo " type: string" - echo " default: path" - echo " The name for storing the path field in the yaml." - echo "" - echo " --group_name" - echo " type: string" - echo " example: param_list" - echo " Top level name for the group of entries." - echo "" - echo " --output" - echo " type: file, required parameter, output, file must exist" - echo " example: params.yaml" - echo " Output YAML file." -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM ghcr.io/data-intuitive/randpy:r4.0 - -ENTRYPOINT [] - - -RUN : -LABEL org.opencontainers.image.authors="Angela Oliveira Pisco, Robrecht Cannoodt" -LABEL org.opencontainers.image.description="Companion container for running component files make_params" -LABEL org.opencontainers.image.created="2024-01-25T10:13:57Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-make_params-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "make_params 0.12.3" - exit - ;; - --base_dir) - [ -n "$VIASH_PAR_BASE_DIR" ] && ViashError Bad arguments for option \'--base_dir\': \'$VIASH_PAR_BASE_DIR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BASE_DIR="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --base_dir. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --base_dir=*) - [ -n "$VIASH_PAR_BASE_DIR" ] && ViashError Bad arguments for option \'--base_dir=*\': \'$VIASH_PAR_BASE_DIR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BASE_DIR=$(ViashRemoveFlags "$1") - shift 1 - ;; - --pattern) - [ -n "$VIASH_PAR_PATTERN" ] && ViashError Bad arguments for option \'--pattern\': \'$VIASH_PAR_PATTERN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_PATTERN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --pattern. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --pattern=*) - [ -n "$VIASH_PAR_PATTERN" ] && ViashError Bad arguments for option \'--pattern=*\': \'$VIASH_PAR_PATTERN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_PATTERN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --n_dirname_drop) - [ -n "$VIASH_PAR_N_DIRNAME_DROP" ] && ViashError Bad arguments for option \'--n_dirname_drop\': \'$VIASH_PAR_N_DIRNAME_DROP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_N_DIRNAME_DROP="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --n_dirname_drop. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --n_dirname_drop=*) - [ -n "$VIASH_PAR_N_DIRNAME_DROP" ] && ViashError Bad arguments for option \'--n_dirname_drop=*\': \'$VIASH_PAR_N_DIRNAME_DROP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_N_DIRNAME_DROP=$(ViashRemoveFlags "$1") - shift 1 - ;; - --n_basename_id) - [ -n "$VIASH_PAR_N_BASENAME_ID" ] && ViashError Bad arguments for option \'--n_basename_id\': \'$VIASH_PAR_N_BASENAME_ID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_N_BASENAME_ID="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --n_basename_id. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --n_basename_id=*) - [ -n "$VIASH_PAR_N_BASENAME_ID" ] && ViashError Bad arguments for option \'--n_basename_id=*\': \'$VIASH_PAR_N_BASENAME_ID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_N_BASENAME_ID=$(ViashRemoveFlags "$1") - shift 1 - ;; - --id_name) - [ -n "$VIASH_PAR_ID_NAME" ] && ViashError Bad arguments for option \'--id_name\': \'$VIASH_PAR_ID_NAME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ID_NAME="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --id_name. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --id_name=*) - [ -n "$VIASH_PAR_ID_NAME" ] && ViashError Bad arguments for option \'--id_name=*\': \'$VIASH_PAR_ID_NAME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ID_NAME=$(ViashRemoveFlags "$1") - shift 1 - ;; - --path_name) - [ -n "$VIASH_PAR_PATH_NAME" ] && ViashError Bad arguments for option \'--path_name\': \'$VIASH_PAR_PATH_NAME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_PATH_NAME="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --path_name. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --path_name=*) - [ -n "$VIASH_PAR_PATH_NAME" ] && ViashError Bad arguments for option \'--path_name=*\': \'$VIASH_PAR_PATH_NAME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_PATH_NAME=$(ViashRemoveFlags "$1") - shift 1 - ;; - --group_name) - [ -n "$VIASH_PAR_GROUP_NAME" ] && ViashError Bad arguments for option \'--group_name\': \'$VIASH_PAR_GROUP_NAME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_GROUP_NAME="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --group_name. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --group_name=*) - [ -n "$VIASH_PAR_GROUP_NAME" ] && ViashError Bad arguments for option \'--group_name=*\': \'$VIASH_PAR_GROUP_NAME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_GROUP_NAME=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/files_make_params:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/files_make_params:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/files_make_params:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/files_make_params:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_BASE_DIR+x} ]; then - ViashError '--base_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_PATTERN+x} ]; then - ViashError '--pattern' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_N_DIRNAME_DROP+x} ]; then - VIASH_PAR_N_DIRNAME_DROP="0" -fi -if [ -z ${VIASH_PAR_N_BASENAME_ID+x} ]; then - VIASH_PAR_N_BASENAME_ID="0" -fi -if [ -z ${VIASH_PAR_ID_NAME+x} ]; then - VIASH_PAR_ID_NAME="id" -fi -if [ -z ${VIASH_PAR_PATH_NAME+x} ]; then - VIASH_PAR_PATH_NAME="path" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_BASE_DIR" ] && [ ! -e "$VIASH_PAR_BASE_DIR" ]; then - ViashError "Input file '$VIASH_PAR_BASE_DIR' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_N_DIRNAME_DROP" ]]; then - if ! [[ "$VIASH_PAR_N_DIRNAME_DROP" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--n_dirname_drop' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_N_BASENAME_ID" ]]; then - if ! [[ "$VIASH_PAR_N_BASENAME_ID" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--n_basename_id' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_BASE_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_BASE_DIR")" ) - VIASH_PAR_BASE_DIR=$(ViashAutodetectMount "$VIASH_PAR_BASE_DIR") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/files_make_params:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/files_make_params:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/files_make_params:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-make_params-XXXXXX").R -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -library(dplyr) -library(purrr) - -## VIASH START -# The following code has been auto-generated by Viash. -# treat warnings as errors -.viash_orig_warn <- options(warn = 2) - -par <- list( - "base_dir" = $( if [ ! -z ${VIASH_PAR_BASE_DIR+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_BASE_DIR" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), - "pattern" = $( if [ ! -z ${VIASH_PAR_PATTERN+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_PATTERN" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), - "n_dirname_drop" = $( if [ ! -z ${VIASH_PAR_N_DIRNAME_DROP+x} ]; then echo -n "as.integer('"; echo -n "$VIASH_PAR_N_DIRNAME_DROP" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), - "n_basename_id" = $( if [ ! -z ${VIASH_PAR_N_BASENAME_ID+x} ]; then echo -n "as.integer('"; echo -n "$VIASH_PAR_N_BASENAME_ID" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), - "id_name" = $( if [ ! -z ${VIASH_PAR_ID_NAME+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_ID_NAME" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), - "path_name" = $( if [ ! -z ${VIASH_PAR_PATH_NAME+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_PATH_NAME" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), - "group_name" = $( if [ ! -z ${VIASH_PAR_GROUP_NAME+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_GROUP_NAME" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), - "output" = $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_OUTPUT" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ) -) -meta <- list( - "functionality_name" = $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo -n "'"; echo -n "$VIASH_META_FUNCTIONALITY_NAME" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), - "resources_dir" = $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo -n "'"; echo -n "$VIASH_META_RESOURCES_DIR" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), - "executable" = $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo -n "'"; echo -n "$VIASH_META_EXECUTABLE" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), - "config" = $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo -n "'"; echo -n "$VIASH_META_CONFIG" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), - "temp_dir" = $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo -n "'"; echo -n "$VIASH_META_TEMP_DIR" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), - "cpus" = $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo -n "as.integer('"; echo -n "$VIASH_META_CPUS" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), - "memory_b" = $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_B" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), - "memory_kb" = $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_KB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), - "memory_mb" = $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_MB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), - "memory_gb" = $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_GB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), - "memory_tb" = $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_TB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), - "memory_pb" = $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_PB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ) -) - - -# restore original warn setting -options(.viash_orig_warn) -rm(.viash_orig_warn) - -## VIASH END - -cat("> Listing files of base dir ", par\$base_dir, "\\n", sep = "") -paths <- list.files( - normalizePath(par\$base_dir), - pattern = par\$pattern, - recursive = TRUE, - full.names = TRUE -) - -cat("> Traversing up ", par\$n_dirname_apply, " times\\n", sep = "") -for (i in seq_len(par\$n_dirname_drop)) { - paths <- dirname(paths) %>% unique() -} - -# removing /viash_automount in case we're inside a docker container -paths <- gsub("^/viash_automount", "", paths) - -cat("> Checking whether basenames are unique\\n") -i <- par\$n_basename_id -maxi <- strsplit(paths, "/") %>% map_int(length) %>% max - -regex <- paste0(".*/(", paste(rep("[^/]+/", i), collapse = ""), "[^/]*)\$") -ids <- gsub("/", "_", gsub(regex, "\\\\1", paths)) - -cat("> Printing first five rows\\n") -print(tibble(id = ids, path = paths) %>% head(5)) -cat("\\n") - -while (i < maxi && any(duplicated(ids))) { - i <- i + 1 - cat("Duplicated ids detected, combining with ", i, " dirnames in an attempt to get unique ids.\\n") - regex <- paste0(".*/(", paste(rep("[^/]+/", i), collapse = ""), "[^/]*)\$") - ids <- gsub("/", "_", gsub(regex, "\\\\1", paths)) - - cat("> Printing first five rows\\n") - print(tibble(id = ids, path = paths) %>% head(5)) - cat("\\n") -} - -cat("> Transforming into list of items\\n") -par_list <- map2( - ids, paths, - function(id, input) { - setNames(list(id, input), c(par\$id_name, par\$path_name)) - } -) - -if (!is.null(par\$group_name)) { - par_list <- setNames(list(par_list), par\$group_name) -} - -cat("> Writing as YAML\\n") -yaml::write_yaml(par_list, par\$output) -VIASHMAIN -Rscript "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_BASE_DIR" ]; then - VIASH_PAR_BASE_DIR=$(ViashStripAutomount "$VIASH_PAR_BASE_DIR") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/filter/delimit_fraction/.config.vsh.yaml b/target/docker/filter/delimit_fraction/.config.vsh.yaml deleted file mode 100644 index d16f34cc3ee..00000000000 --- a/target/docker/filter/delimit_fraction/.config.vsh.yaml +++ /dev/null @@ -1,241 +0,0 @@ -functionality: - name: "delimit_fraction" - namespace: "filter" - version: "0.12.3" - authors: - - name: "Dries Schaumont" - roles: - - "author" - - "maintainer" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - argument_groups: - - name: "Inputs" - arguments: - - type: "file" - name: "--input" - description: "Input h5mu file" - info: null - example: - - "input.h5mu" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--layer" - info: null - example: - - "raw_counts" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_fraction_column" - description: "Name of column from .var dataframe selecting\na column that contains\ - \ floating point values between 0 and 1.\n" - info: null - example: - - "fraction_mitochondrial" - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Outputs" - arguments: - - type: "file" - name: "--output" - description: "Output h5mu file." - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_name_filter" - description: "In which .obs slot to store a boolean array corresponding to which\ - \ observations should be removed." - info: null - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Arguments" - arguments: - - type: "double" - name: "--min_fraction" - description: "Min fraction for an observation to be retained (True in output)." - info: null - default: - - 0.0 - required: false - min: 0.0 - max: 1.0 - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--max_fraction" - description: "Max fraction for an observation to be retained (True in output)." - info: null - default: - - 1.0 - required: false - min: 0.0 - max: 1.0 - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Turns a column containing values between 0 and 1 into a boolean column\ - \ based on thresholds.\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.9-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "singlecpu" - - "lowmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/filter/delimit_fraction/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/filter/delimit_fraction" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/filter/delimit_fraction/delimit_fraction" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/filter/delimit_fraction/delimit_fraction b/target/docker/filter/delimit_fraction/delimit_fraction deleted file mode 100755 index 4c18c9a5133..00000000000 --- a/target/docker/filter/delimit_fraction/delimit_fraction +++ /dev/null @@ -1,1207 +0,0 @@ -#!/usr/bin/env bash - -# delimit_fraction 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Dries Schaumont (author, maintainer) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="delimit_fraction" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "delimit_fraction 0.12.3" - echo "" - echo "Turns a column containing values between 0 and 1 into a boolean column based on" - echo "thresholds." - echo "" - echo "Inputs:" - echo " --input" - echo " type: file, required parameter, file must exist" - echo " example: input.h5mu" - echo " Input h5mu file" - echo "" - echo " --modality" - echo " type: string" - echo " default: rna" - echo "" - echo " --layer" - echo " type: string" - echo " example: raw_counts" - echo "" - echo " --obs_fraction_column" - echo " type: string, required parameter" - echo " example: fraction_mitochondrial" - echo " Name of column from .var dataframe selecting" - echo " a column that contains floating point values between 0 and 1." - echo "" - echo "Outputs:" - echo " --output" - echo " type: file, output, file must exist" - echo " example: output.h5mu" - echo " Output h5mu file." - echo "" - echo " --output_compression" - echo " type: string" - echo " example: gzip" - echo " choices: [ gzip, lzf ]" - echo " The compression format to be used on the output h5mu object." - echo "" - echo " --obs_name_filter" - echo " type: string, required parameter" - echo " In which .obs slot to store a boolean array corresponding to which" - echo " observations should be removed." - echo "" - echo "Arguments:" - echo " --min_fraction" - echo " type: double" - echo " default: 0.0" - echo " min: 0.0" - echo " max: 1.0" - echo " Min fraction for an observation to be retained (True in output)." - echo "" - echo " --max_fraction" - echo " type: double" - echo " default: 1.0" - echo " min: 0.0" - echo " max: 1.0" - echo " Max fraction for an observation to be retained (True in output)." -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM python:3.9-slim - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" - -LABEL org.opencontainers.image.authors="Dries Schaumont" -LABEL org.opencontainers.image.description="Companion container for running component filter delimit_fraction" -LABEL org.opencontainers.image.created="2024-01-25T10:13:56Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-delimit_fraction-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "delimit_fraction 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --modality) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality=*) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --layer) - [ -n "$VIASH_PAR_LAYER" ] && ViashError Bad arguments for option \'--layer\': \'$VIASH_PAR_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LAYER="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --layer. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --layer=*) - [ -n "$VIASH_PAR_LAYER" ] && ViashError Bad arguments for option \'--layer=*\': \'$VIASH_PAR_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LAYER=$(ViashRemoveFlags "$1") - shift 1 - ;; - --obs_fraction_column) - [ -n "$VIASH_PAR_OBS_FRACTION_COLUMN" ] && ViashError Bad arguments for option \'--obs_fraction_column\': \'$VIASH_PAR_OBS_FRACTION_COLUMN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBS_FRACTION_COLUMN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_fraction_column. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obs_fraction_column=*) - [ -n "$VIASH_PAR_OBS_FRACTION_COLUMN" ] && ViashError Bad arguments for option \'--obs_fraction_column=*\': \'$VIASH_PAR_OBS_FRACTION_COLUMN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBS_FRACTION_COLUMN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output_compression) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression=*) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --obs_name_filter) - [ -n "$VIASH_PAR_OBS_NAME_FILTER" ] && ViashError Bad arguments for option \'--obs_name_filter\': \'$VIASH_PAR_OBS_NAME_FILTER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBS_NAME_FILTER="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_name_filter. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obs_name_filter=*) - [ -n "$VIASH_PAR_OBS_NAME_FILTER" ] && ViashError Bad arguments for option \'--obs_name_filter=*\': \'$VIASH_PAR_OBS_NAME_FILTER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBS_NAME_FILTER=$(ViashRemoveFlags "$1") - shift 1 - ;; - --min_fraction) - [ -n "$VIASH_PAR_MIN_FRACTION" ] && ViashError Bad arguments for option \'--min_fraction\': \'$VIASH_PAR_MIN_FRACTION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_FRACTION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_fraction. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --min_fraction=*) - [ -n "$VIASH_PAR_MIN_FRACTION" ] && ViashError Bad arguments for option \'--min_fraction=*\': \'$VIASH_PAR_MIN_FRACTION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_FRACTION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --max_fraction) - [ -n "$VIASH_PAR_MAX_FRACTION" ] && ViashError Bad arguments for option \'--max_fraction\': \'$VIASH_PAR_MAX_FRACTION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAX_FRACTION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --max_fraction. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --max_fraction=*) - [ -n "$VIASH_PAR_MAX_FRACTION" ] && ViashError Bad arguments for option \'--max_fraction=*\': \'$VIASH_PAR_MAX_FRACTION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAX_FRACTION=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/filter_delimit_fraction:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/filter_delimit_fraction:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/filter_delimit_fraction:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/filter_delimit_fraction:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OBS_FRACTION_COLUMN+x} ]; then - ViashError '--obs_fraction_column' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OBS_NAME_FILTER+x} ]; then - ViashError '--obs_name_filter' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_MODALITY+x} ]; then - VIASH_PAR_MODALITY="rna" -fi -if [ -z ${VIASH_PAR_MIN_FRACTION+x} ]; then - VIASH_PAR_MIN_FRACTION="0.0" -fi -if [ -z ${VIASH_PAR_MAX_FRACTION+x} ]; then - VIASH_PAR_MAX_FRACTION="1.0" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_MIN_FRACTION" ]]; then - if ! [[ "$VIASH_PAR_MIN_FRACTION" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--min_fraction' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi - if command -v bc &> /dev/null; then - if ! [[ `echo $VIASH_PAR_MIN_FRACTION '>=' 0.0 | bc` -eq 1 ]]; then - ViashError '--min_fraction' has be more than or equal to 0.0. Use "--help" to get more information on the parameters. - exit 1 - fi - elif command -v awk &> /dev/null; then - if ! [[ `awk -v n1=$VIASH_PAR_MIN_FRACTION -v n2=0.0 'BEGIN { print (n1 >= n2) ? "1" : "0" }'` -eq 1 ]]; then - ViashError '--min_fraction' has be more than or equal to 0.0. Use "--help" to get more information on the parameters. - exit 1 - fi - else - ViashWarning '--min_fraction' specifies a minimum value but the value was not verified as neither \'bc\' or \`awk\` are present on the system. - fi - if command -v bc &> /dev/null; then - if ! [[ `echo $VIASH_PAR_MIN_FRACTION '<=' 1.0 | bc` -eq 1 ]]; then - ViashError '--min_fraction' has to be less than or equal to 1.0. Use "--help" to get more information on the parameters. - exit 1 - fi - elif command -v awk &> /dev/null; then - if ! [[ `awk -v n1=$VIASH_PAR_MIN_FRACTION -v n2=1.0 'BEGIN { print (n1 <= n2) ? "1" : "0" }'` -eq 1 ]]; then - ViashError '--min_fraction' has be less than or equal to 1.0. Use "--help" to get more information on the parameters. - exit 1 - fi - else - ViashWarning '--min_fraction' specifies a maximum value but the value was not verified as neither \'bc\' or \'awk\' are present on the system. - fi -fi -if [[ -n "$VIASH_PAR_MAX_FRACTION" ]]; then - if ! [[ "$VIASH_PAR_MAX_FRACTION" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--max_fraction' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi - if command -v bc &> /dev/null; then - if ! [[ `echo $VIASH_PAR_MAX_FRACTION '>=' 0.0 | bc` -eq 1 ]]; then - ViashError '--max_fraction' has be more than or equal to 0.0. Use "--help" to get more information on the parameters. - exit 1 - fi - elif command -v awk &> /dev/null; then - if ! [[ `awk -v n1=$VIASH_PAR_MAX_FRACTION -v n2=0.0 'BEGIN { print (n1 >= n2) ? "1" : "0" }'` -eq 1 ]]; then - ViashError '--max_fraction' has be more than or equal to 0.0. Use "--help" to get more information on the parameters. - exit 1 - fi - else - ViashWarning '--max_fraction' specifies a minimum value but the value was not verified as neither \'bc\' or \`awk\` are present on the system. - fi - if command -v bc &> /dev/null; then - if ! [[ `echo $VIASH_PAR_MAX_FRACTION '<=' 1.0 | bc` -eq 1 ]]; then - ViashError '--max_fraction' has to be less than or equal to 1.0. Use "--help" to get more information on the parameters. - exit 1 - fi - elif command -v awk &> /dev/null; then - if ! [[ `awk -v n1=$VIASH_PAR_MAX_FRACTION -v n2=1.0 'BEGIN { print (n1 <= n2) ? "1" : "0" }'` -eq 1 ]]; then - ViashError '--max_fraction' has be less than or equal to 1.0. Use "--help" to get more information on the parameters. - exit 1 - fi - else - ViashWarning '--max_fraction' specifies a maximum value but the value was not verified as neither \'bc\' or \'awk\' are present on the system. - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then - VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then - ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/filter_delimit_fraction:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/filter_delimit_fraction:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/filter_delimit_fraction:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-delimit_fraction-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' - -import mudata as mu -import numpy as np -import sys -from operator import le, ge -from pandas.api.types import is_float_dtype - - -### VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'layer': $( if [ ! -z ${VIASH_PAR_LAYER+x} ]; then echo "r'${VIASH_PAR_LAYER//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'obs_fraction_column': $( if [ ! -z ${VIASH_PAR_OBS_FRACTION_COLUMN+x} ]; then echo "r'${VIASH_PAR_OBS_FRACTION_COLUMN//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'obs_name_filter': $( if [ ! -z ${VIASH_PAR_OBS_NAME_FILTER+x} ]; then echo "r'${VIASH_PAR_OBS_NAME_FILTER//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'min_fraction': $( if [ ! -z ${VIASH_PAR_MIN_FRACTION+x} ]; then echo "float(r'${VIASH_PAR_MIN_FRACTION//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'max_fraction': $( if [ ! -z ${VIASH_PAR_MAX_FRACTION+x} ]; then echo "float(r'${VIASH_PAR_MAX_FRACTION//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -### VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -logger.info("Reading input data") -mdata = mu.read_h5mu(par["input"]) - -mdata.var_names_make_unique() - -mod = par['modality'] -logger.info("Processing modality %s.", mod) -data = mdata.mod[mod] - -logger.info("\\tUnfiltered data: %s", data) - -logger.info("\\tComputing aggregations.") - -def apply_filter_to_mask(mask, base, filter, comparator): - new_filt = np.ravel(comparator(base, filter)) - num_removed = np.sum(np.invert(new_filt) & mask) - mask &= new_filt - return num_removed, mask - -try: - fraction = data.obs[par['obs_fraction_column']] -except KeyError: - raise ValueError(f"Could not find column '{par['obs_fraction_column']}'") -if not is_float_dtype(fraction): - raise ValueError(f"Column '{par['obs_fraction_column']}' does not contain float datatype.") -if fraction.max() > 1: - raise ValueError(f"Column '{par['obs_fraction_column']}' contains values > 1.") -if fraction.min() < 0: - raise ValueError(f"Column '{par['obs_fraction_column']}' contains values < 0.") - - -# Filter cells -filters = (("min_fraction", fraction, ge, "\\tRemoving %s cells with <%s percentage mitochondrial reads."), - ("max_fraction", fraction, le, "\\tRemoving %s cells with >%s percentage mitochondrial reads."), - ) - -keep_cells = np.repeat(True, data.n_obs) -for filter_name_or_value, base, comparator, message in filters: - try: - filter = par[filter_name_or_value] - except KeyError: - filter = filter_name_or_value - if filter is not None: - num_removed, keep_cells = apply_filter_to_mask(keep_cells, base, filter, comparator) - logger.info(message, num_removed, filter) - -data.obs[par["obs_name_filter"]] = keep_cells - -logger.info("\\tFiltered data: %s", data) -logger.info("Writing output data to %s", par["output"]) -mdata.write_h5mu(par["output"], compression=par["output_compression"]) - -logger.info("Finished") -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/filter/delimit_fraction/setup_logger.py b/target/docker/filter/delimit_fraction/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/docker/filter/delimit_fraction/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/docker/filter/do_filter/.config.vsh.yaml b/target/docker/filter/do_filter/.config.vsh.yaml deleted file mode 100644 index 3b73a95236b..00000000000 --- a/target/docker/filter/do_filter/.config.vsh.yaml +++ /dev/null @@ -1,202 +0,0 @@ -functionality: - name: "do_filter" - namespace: "filter" - version: "0.12.3" - authors: - - name: "Robrecht Cannoodt" - roles: - - "maintainer" - - "contributor" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - arguments: - - type: "file" - name: "--input" - description: "Input h5mu file" - info: null - example: - - "input.h5mu" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_filter" - description: "Which .obs columns to use to filter the observations by." - info: null - example: - - "filter_with_x" - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--var_filter" - description: "Which .var columns to use to filter the observations by." - info: null - example: - - "filter_with_x" - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - description: "Output h5mu file." - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Remove observations and variables based on specified .obs and .var\ - \ columns.\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.9-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "singlecpu" - - "lowmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/filter/do_filter/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/filter/do_filter" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/filter/do_filter/do_filter" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/filter/do_filter/do_filter b/target/docker/filter/do_filter/do_filter deleted file mode 100755 index bd18ba5afe6..00000000000 --- a/target/docker/filter/do_filter/do_filter +++ /dev/null @@ -1,1056 +0,0 @@ -#!/usr/bin/env bash - -# do_filter 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Robrecht Cannoodt (maintainer, contributor) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="do_filter" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "do_filter 0.12.3" - echo "" - echo "Remove observations and variables based on specified .obs and .var columns." - echo "" - echo "Arguments:" - echo " --input" - echo " type: file, required parameter, file must exist" - echo " example: input.h5mu" - echo " Input h5mu file" - echo "" - echo " --modality" - echo " type: string" - echo " default: rna" - echo "" - echo " --obs_filter" - echo " type: string, multiple values allowed" - echo " example: filter_with_x" - echo " Which .obs columns to use to filter the observations by." - echo "" - echo " --var_filter" - echo " type: string, multiple values allowed" - echo " example: filter_with_x" - echo " Which .var columns to use to filter the observations by." - echo "" - echo " --output" - echo " type: file, output, file must exist" - echo " example: output.h5mu" - echo " Output h5mu file." - echo "" - echo " --output_compression" - echo " type: string" - echo " example: gzip" - echo " choices: [ gzip, lzf ]" - echo " The compression format to be used on the output h5mu object." -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM python:3.9-slim - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" - -LABEL org.opencontainers.image.authors="Robrecht Cannoodt" -LABEL org.opencontainers.image.description="Companion container for running component filter do_filter" -LABEL org.opencontainers.image.created="2024-01-25T10:13:57Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-do_filter-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "do_filter 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --modality) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality=*) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --obs_filter) - if [ -z "$VIASH_PAR_OBS_FILTER" ]; then - VIASH_PAR_OBS_FILTER="$2" - else - VIASH_PAR_OBS_FILTER="$VIASH_PAR_OBS_FILTER:""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_filter. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obs_filter=*) - if [ -z "$VIASH_PAR_OBS_FILTER" ]; then - VIASH_PAR_OBS_FILTER=$(ViashRemoveFlags "$1") - else - VIASH_PAR_OBS_FILTER="$VIASH_PAR_OBS_FILTER:"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --var_filter) - if [ -z "$VIASH_PAR_VAR_FILTER" ]; then - VIASH_PAR_VAR_FILTER="$2" - else - VIASH_PAR_VAR_FILTER="$VIASH_PAR_VAR_FILTER:""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --var_filter. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --var_filter=*) - if [ -z "$VIASH_PAR_VAR_FILTER" ]; then - VIASH_PAR_VAR_FILTER=$(ViashRemoveFlags "$1") - else - VIASH_PAR_VAR_FILTER="$VIASH_PAR_VAR_FILTER:"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output_compression) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression=*) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/filter_do_filter:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/filter_do_filter:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/filter_do_filter:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/filter_do_filter:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_MODALITY+x} ]; then - VIASH_PAR_MODALITY="rna" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then - VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then - ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/filter_do_filter:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/filter_do_filter:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/filter_do_filter:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-do_filter-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -import mudata as mu -import numpy as np -import sys - -### VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'obs_filter': $( if [ ! -z ${VIASH_PAR_OBS_FILTER+x} ]; then echo "r'${VIASH_PAR_OBS_FILTER//\'/\'\"\'\"r\'}'.split(':')"; else echo None; fi ), - 'var_filter': $( if [ ! -z ${VIASH_PAR_VAR_FILTER+x} ]; then echo "r'${VIASH_PAR_VAR_FILTER//\'/\'\"\'\"r\'}'.split(':')"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -### VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -logger.info("Reading %s", par['input']) -mdata = mu.read_h5mu(par["input"]) - -mod = par["modality"] -logger.info("Processing modality '%s'", mod) - -obs_filt = np.repeat(True, mdata.mod[mod].n_obs) -var_filt = np.repeat(True, mdata.mod[mod].n_vars) - -par["obs_filter"] = par["obs_filter"] if par["obs_filter"] else [] -par["var_filter"] = par["var_filter"] if par["var_filter"] else [] - -for obs_name in par["obs_filter"]: - logger.info("Filtering modality '%s' observations by .obs['%s']", mod, obs_name) - if not obs_name in mdata.mod[mod].obs: - raise ValueError(f".mod[{mod}].obs[{obs_name}] does not exist.") - if obs_name in mdata.mod[mod].obs: - obs_filt &= mdata.mod[mod].obs[obs_name] - -for var_name in par["var_filter"]: - logger.info("Filtering modality '%s' variables by .var['%s']", mod, var_name) - if not var_name in mdata.mod[mod].var: - raise ValueError(f".mod[{mod}].var[{var_name}] does not exist.") - if var_name in mdata.mod[mod].var: - var_filt &= mdata.mod[mod].var[var_name] - -mdata.mod[mod] = mdata.mod[mod][obs_filt, var_filt].copy() - -logger.info("Writing h5mu to file %s.", par["output"]) -mdata.write_h5mu(par["output"], compression=par["output_compression"]) -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/filter/do_filter/setup_logger.py b/target/docker/filter/do_filter/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/docker/filter/do_filter/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/docker/filter/filter_with_counts/.config.vsh.yaml b/target/docker/filter/filter_with_counts/.config.vsh.yaml deleted file mode 100644 index 199eeea87cc..00000000000 --- a/target/docker/filter/filter_with_counts/.config.vsh.yaml +++ /dev/null @@ -1,295 +0,0 @@ -functionality: - name: "filter_with_counts" - namespace: "filter" - version: "0.12.3" - authors: - - name: "Dries De Maeyer" - roles: - - "author" - info: - role: "Core Team Member" - links: - email: "ddemaeyer@gmail.com" - github: "ddemaeyer" - linkedin: "dries-de-maeyer-b46a814" - organizations: - - name: "Janssen Pharmaceuticals" - href: "https://www.janssen.com" - role: "Principal Scientist" - - name: "Robrecht Cannoodt" - roles: - - "maintainer" - - "author" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - argument_groups: - - name: "Inputs" - arguments: - - type: "file" - name: "--input" - description: "Input h5mu file" - info: null - example: - - "input.h5mu" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--layer" - info: null - example: - - "raw_counts" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Outputs" - arguments: - - type: "file" - name: "--output" - description: "Output h5mu file." - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--do_subset" - description: "Whether to subset before storing the output." - info: null - direction: "input" - dest: "par" - - type: "string" - name: "--obs_name_filter" - description: "In which .obs slot to store a boolean array corresponding to which\ - \ observations should be removed." - info: null - default: - - "filter_with_counts" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--var_name_filter" - description: "In which .var slot to store a boolean array corresponding to which\ - \ variables should be removed." - info: null - default: - - "filter_with_counts" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Arguments" - arguments: - - type: "integer" - name: "--min_counts" - description: "Minimum number of counts captured per cell." - info: null - example: - - 200 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--max_counts" - description: "Maximum number of counts captured per cell." - info: null - example: - - 5000000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--min_genes_per_cell" - description: "Minimum of non-zero values per cell." - info: null - example: - - 200 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--max_genes_per_cell" - description: "Maximum of non-zero values per cell." - info: null - example: - - 1500000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--min_cells_per_gene" - description: "Minimum of non-zero values per gene." - info: null - example: - - 3 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Filter scRNA-seq data based on the primary QC metrics. \nThis is based\ - \ on both the UMI counts, the gene counts \nand the mitochondrial genes (genes\ - \ starting with mt/MT).\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.9-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "singlecpu" - - "lowmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/filter/filter_with_counts/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/filter/filter_with_counts" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/filter/filter_with_counts/filter_with_counts" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/filter/filter_with_counts/filter_with_counts b/target/docker/filter/filter_with_counts/filter_with_counts deleted file mode 100755 index 68ef1ee30bd..00000000000 --- a/target/docker/filter/filter_with_counts/filter_with_counts +++ /dev/null @@ -1,1241 +0,0 @@ -#!/usr/bin/env bash - -# filter_with_counts 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Dries De Maeyer (author) -# * Robrecht Cannoodt (maintainer, author) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="filter_with_counts" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "filter_with_counts 0.12.3" - echo "" - echo "Filter scRNA-seq data based on the primary QC metrics." - echo "This is based on both the UMI counts, the gene counts" - echo "and the mitochondrial genes (genes starting with mt/MT)." - echo "" - echo "Inputs:" - echo " --input" - echo " type: file, required parameter, file must exist" - echo " example: input.h5mu" - echo " Input h5mu file" - echo "" - echo " --modality" - echo " type: string" - echo " default: rna" - echo "" - echo " --layer" - echo " type: string" - echo " example: raw_counts" - echo "" - echo "Outputs:" - echo " --output" - echo " type: file, output, file must exist" - echo " example: output.h5mu" - echo " Output h5mu file." - echo "" - echo " --output_compression" - echo " type: string" - echo " example: gzip" - echo " choices: [ gzip, lzf ]" - echo " The compression format to be used on the output h5mu object." - echo "" - echo " --do_subset" - echo " type: boolean_true" - echo " Whether to subset before storing the output." - echo "" - echo " --obs_name_filter" - echo " type: string" - echo " default: filter_with_counts" - echo " In which .obs slot to store a boolean array corresponding to which" - echo " observations should be removed." - echo "" - echo " --var_name_filter" - echo " type: string" - echo " default: filter_with_counts" - echo " In which .var slot to store a boolean array corresponding to which" - echo " variables should be removed." - echo "" - echo "Arguments:" - echo " --min_counts" - echo " type: integer" - echo " example: 200" - echo " Minimum number of counts captured per cell." - echo "" - echo " --max_counts" - echo " type: integer" - echo " example: 5000000" - echo " Maximum number of counts captured per cell." - echo "" - echo " --min_genes_per_cell" - echo " type: integer" - echo " example: 200" - echo " Minimum of non-zero values per cell." - echo "" - echo " --max_genes_per_cell" - echo " type: integer" - echo " example: 1500000" - echo " Maximum of non-zero values per cell." - echo "" - echo " --min_cells_per_gene" - echo " type: integer" - echo " example: 3" - echo " Minimum of non-zero values per gene." -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM python:3.9-slim - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" - -LABEL org.opencontainers.image.authors="Dries De Maeyer, Robrecht Cannoodt" -LABEL org.opencontainers.image.description="Companion container for running component filter filter_with_counts" -LABEL org.opencontainers.image.created="2024-01-25T10:13:57Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-filter_with_counts-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "filter_with_counts 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --modality) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality=*) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --layer) - [ -n "$VIASH_PAR_LAYER" ] && ViashError Bad arguments for option \'--layer\': \'$VIASH_PAR_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LAYER="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --layer. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --layer=*) - [ -n "$VIASH_PAR_LAYER" ] && ViashError Bad arguments for option \'--layer=*\': \'$VIASH_PAR_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LAYER=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output_compression) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression=*) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --do_subset) - [ -n "$VIASH_PAR_DO_SUBSET" ] && ViashError Bad arguments for option \'--do_subset\': \'$VIASH_PAR_DO_SUBSET\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_DO_SUBSET=true - shift 1 - ;; - --obs_name_filter) - [ -n "$VIASH_PAR_OBS_NAME_FILTER" ] && ViashError Bad arguments for option \'--obs_name_filter\': \'$VIASH_PAR_OBS_NAME_FILTER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBS_NAME_FILTER="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_name_filter. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obs_name_filter=*) - [ -n "$VIASH_PAR_OBS_NAME_FILTER" ] && ViashError Bad arguments for option \'--obs_name_filter=*\': \'$VIASH_PAR_OBS_NAME_FILTER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBS_NAME_FILTER=$(ViashRemoveFlags "$1") - shift 1 - ;; - --var_name_filter) - [ -n "$VIASH_PAR_VAR_NAME_FILTER" ] && ViashError Bad arguments for option \'--var_name_filter\': \'$VIASH_PAR_VAR_NAME_FILTER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_VAR_NAME_FILTER="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --var_name_filter. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --var_name_filter=*) - [ -n "$VIASH_PAR_VAR_NAME_FILTER" ] && ViashError Bad arguments for option \'--var_name_filter=*\': \'$VIASH_PAR_VAR_NAME_FILTER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_VAR_NAME_FILTER=$(ViashRemoveFlags "$1") - shift 1 - ;; - --min_counts) - [ -n "$VIASH_PAR_MIN_COUNTS" ] && ViashError Bad arguments for option \'--min_counts\': \'$VIASH_PAR_MIN_COUNTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_COUNTS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_counts. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --min_counts=*) - [ -n "$VIASH_PAR_MIN_COUNTS" ] && ViashError Bad arguments for option \'--min_counts=*\': \'$VIASH_PAR_MIN_COUNTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_COUNTS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --max_counts) - [ -n "$VIASH_PAR_MAX_COUNTS" ] && ViashError Bad arguments for option \'--max_counts\': \'$VIASH_PAR_MAX_COUNTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAX_COUNTS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --max_counts. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --max_counts=*) - [ -n "$VIASH_PAR_MAX_COUNTS" ] && ViashError Bad arguments for option \'--max_counts=*\': \'$VIASH_PAR_MAX_COUNTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAX_COUNTS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --min_genes_per_cell) - [ -n "$VIASH_PAR_MIN_GENES_PER_CELL" ] && ViashError Bad arguments for option \'--min_genes_per_cell\': \'$VIASH_PAR_MIN_GENES_PER_CELL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_GENES_PER_CELL="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_genes_per_cell. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --min_genes_per_cell=*) - [ -n "$VIASH_PAR_MIN_GENES_PER_CELL" ] && ViashError Bad arguments for option \'--min_genes_per_cell=*\': \'$VIASH_PAR_MIN_GENES_PER_CELL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_GENES_PER_CELL=$(ViashRemoveFlags "$1") - shift 1 - ;; - --max_genes_per_cell) - [ -n "$VIASH_PAR_MAX_GENES_PER_CELL" ] && ViashError Bad arguments for option \'--max_genes_per_cell\': \'$VIASH_PAR_MAX_GENES_PER_CELL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAX_GENES_PER_CELL="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --max_genes_per_cell. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --max_genes_per_cell=*) - [ -n "$VIASH_PAR_MAX_GENES_PER_CELL" ] && ViashError Bad arguments for option \'--max_genes_per_cell=*\': \'$VIASH_PAR_MAX_GENES_PER_CELL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAX_GENES_PER_CELL=$(ViashRemoveFlags "$1") - shift 1 - ;; - --min_cells_per_gene) - [ -n "$VIASH_PAR_MIN_CELLS_PER_GENE" ] && ViashError Bad arguments for option \'--min_cells_per_gene\': \'$VIASH_PAR_MIN_CELLS_PER_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_CELLS_PER_GENE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_cells_per_gene. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --min_cells_per_gene=*) - [ -n "$VIASH_PAR_MIN_CELLS_PER_GENE" ] && ViashError Bad arguments for option \'--min_cells_per_gene=*\': \'$VIASH_PAR_MIN_CELLS_PER_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_CELLS_PER_GENE=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/filter_filter_with_counts:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/filter_filter_with_counts:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/filter_filter_with_counts:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/filter_filter_with_counts:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_MODALITY+x} ]; then - VIASH_PAR_MODALITY="rna" -fi -if [ -z ${VIASH_PAR_DO_SUBSET+x} ]; then - VIASH_PAR_DO_SUBSET="false" -fi -if [ -z ${VIASH_PAR_OBS_NAME_FILTER+x} ]; then - VIASH_PAR_OBS_NAME_FILTER="filter_with_counts" -fi -if [ -z ${VIASH_PAR_VAR_NAME_FILTER+x} ]; then - VIASH_PAR_VAR_NAME_FILTER="filter_with_counts" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_DO_SUBSET" ]]; then - if ! [[ "$VIASH_PAR_DO_SUBSET" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--do_subset' has to be a boolean_true. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_MIN_COUNTS" ]]; then - if ! [[ "$VIASH_PAR_MIN_COUNTS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--min_counts' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_MAX_COUNTS" ]]; then - if ! [[ "$VIASH_PAR_MAX_COUNTS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--max_counts' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_MIN_GENES_PER_CELL" ]]; then - if ! [[ "$VIASH_PAR_MIN_GENES_PER_CELL" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--min_genes_per_cell' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_MAX_GENES_PER_CELL" ]]; then - if ! [[ "$VIASH_PAR_MAX_GENES_PER_CELL" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--max_genes_per_cell' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_MIN_CELLS_PER_GENE" ]]; then - if ! [[ "$VIASH_PAR_MIN_CELLS_PER_GENE" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--min_cells_per_gene' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then - VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then - ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/filter_filter_with_counts:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/filter_filter_with_counts:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/filter_filter_with_counts:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-filter_with_counts-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' - -import mudata as mu -import numpy as np -import sys -from operator import le, ge, gt - -### VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'layer': $( if [ ! -z ${VIASH_PAR_LAYER+x} ]; then echo "r'${VIASH_PAR_LAYER//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'do_subset': $( if [ ! -z ${VIASH_PAR_DO_SUBSET+x} ]; then echo "r'${VIASH_PAR_DO_SUBSET//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), - 'obs_name_filter': $( if [ ! -z ${VIASH_PAR_OBS_NAME_FILTER+x} ]; then echo "r'${VIASH_PAR_OBS_NAME_FILTER//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'var_name_filter': $( if [ ! -z ${VIASH_PAR_VAR_NAME_FILTER+x} ]; then echo "r'${VIASH_PAR_VAR_NAME_FILTER//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'min_counts': $( if [ ! -z ${VIASH_PAR_MIN_COUNTS+x} ]; then echo "int(r'${VIASH_PAR_MIN_COUNTS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'max_counts': $( if [ ! -z ${VIASH_PAR_MAX_COUNTS+x} ]; then echo "int(r'${VIASH_PAR_MAX_COUNTS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'min_genes_per_cell': $( if [ ! -z ${VIASH_PAR_MIN_GENES_PER_CELL+x} ]; then echo "int(r'${VIASH_PAR_MIN_GENES_PER_CELL//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'max_genes_per_cell': $( if [ ! -z ${VIASH_PAR_MAX_GENES_PER_CELL+x} ]; then echo "int(r'${VIASH_PAR_MAX_GENES_PER_CELL//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'min_cells_per_gene': $( if [ ! -z ${VIASH_PAR_MIN_CELLS_PER_GENE+x} ]; then echo "int(r'${VIASH_PAR_MIN_CELLS_PER_GENE//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -### VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -logger.info("Reading input data") -mdata = mu.read_h5mu(par["input"]) - -mdata.var_names_make_unique() - -mod = par['modality'] -logger.info("Processing modality %s.", mod) -data = mdata.mod[mod] - -logger.info("\\tUnfiltered data: %s", data) - -logger.info("\\tComputing aggregations.") -n_counts_per_cell = np.ravel(np.sum(data.X, axis=1)) -n_cells_per_gene = np.sum(data.X > 0, axis=0) -n_genes_per_cell = np.sum(data.X > 0, axis=1) - -def apply_filter_to_mask(mask, base, filter, comparator): - new_filt = np.ravel(comparator(base, filter)) - num_removed = np.sum(np.invert(new_filt) & mask) - mask &= new_filt - return num_removed, mask - -# Filter genes -keep_genes = np.repeat(True, data.n_vars) -if par["min_cells_per_gene"] is not None: - num_removed, keep_genes = apply_filter_to_mask(keep_genes, - n_cells_per_gene, - par['min_cells_per_gene'], - ge) - logger.info("\\tRemoving %s genes with non-zero values in <%s cells.", - num_removed, par['min_cells_per_gene']) - -# Filter cells -filters = (("min_genes_per_cell", n_genes_per_cell, ge, "\\tRemoving %s cells with non-zero values in <%s genes."), - ("max_genes_per_cell", n_genes_per_cell, le, "\\tRemoving %s cells with non-zero values in >%s genes."), - ("min_counts", n_counts_per_cell, ge, "\\tRemoving %s cells with <%s total counts."), - ("max_counts", n_counts_per_cell, le, "\\tRemoving %s cells with >%s total counts."), - (0, np.sum(data[:,keep_genes].X, axis=1), gt, "\\tRemoving %s cells with %s counts")) - -keep_cells = np.repeat(True, data.n_obs) -for filter_name_or_value, base, comparator, message in filters: - try: - filter = par[filter_name_or_value] - except KeyError: - filter = filter_name_or_value - if filter is not None: - num_removed, keep_cells = apply_filter_to_mask(keep_cells, base, filter, comparator) - logger.info(message, num_removed, filter) - -if par["obs_name_filter"] is not None: - data.obs[par["obs_name_filter"]] = keep_cells -if par["var_name_filter"] is not None: - data.var[par["var_name_filter"]] = keep_genes - -if par["do_subset"]: - mdata.mod[mod] = data[keep_cells, keep_genes] - -logger.info("\\tFiltered data: %s", data) -logger.info("Writing output data to %s", par["output"]) -mdata.write_h5mu(par["output"], compression=par["output_compression"]) - -logger.info("Finished") -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/filter/filter_with_counts/setup_logger.py b/target/docker/filter/filter_with_counts/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/docker/filter/filter_with_counts/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/docker/filter/filter_with_hvg/.config.vsh.yaml b/target/docker/filter/filter_with_hvg/.config.vsh.yaml deleted file mode 100644 index 4a0d796bf0c..00000000000 --- a/target/docker/filter/filter_with_hvg/.config.vsh.yaml +++ /dev/null @@ -1,352 +0,0 @@ -functionality: - name: "filter_with_hvg" - namespace: "filter" - version: "0.12.3" - authors: - - name: "Dries De Maeyer" - roles: - - "contributor" - info: - role: "Core Team Member" - links: - email: "ddemaeyer@gmail.com" - github: "ddemaeyer" - linkedin: "dries-de-maeyer-b46a814" - organizations: - - name: "Janssen Pharmaceuticals" - href: "https://www.janssen.com" - role: "Principal Scientist" - - name: "Robrecht Cannoodt" - roles: - - "maintainer" - - "contributor" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - arguments: - - type: "file" - name: "--input" - description: "Input h5mu file" - info: null - example: - - "input.h5mu" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--layer" - description: "use adata.layers[layer] for expression values instead of adata.X." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - description: "Output h5mu file." - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--var_name_filter" - description: "In which .var slot to store a boolean array corresponding to which\ - \ observations should be filtered out." - info: null - default: - - "filter_with_hvg" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--varm_name" - description: "In which .varm slot to store additional metadata." - info: null - default: - - "hvg" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--do_subset" - description: "Whether to subset before storing the output." - info: null - direction: "input" - dest: "par" - - type: "string" - name: "--flavor" - description: "Choose the flavor for identifying highly variable genes. For the\ - \ dispersion based methods\nin their default workflows, Seurat passes the cutoffs\ - \ whereas Cell Ranger passes n_top_genes.\n" - info: null - default: - - "seurat" - required: false - choices: - - "seurat" - - "cell_ranger" - - "seurat_v3" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--n_top_genes" - description: "Number of highly-variable genes to keep. Mandatory if flavor='seurat_v3'." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--min_mean" - description: "If n_top_genes is defined, this and all other cutoffs for the means\ - \ and the normalized dispersions are ignored. Ignored if flavor='seurat_v3'." - info: null - default: - - 0.0125 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--max_mean" - description: "If n_top_genes is defined, this and all other cutoffs for the means\ - \ and the normalized dispersions are ignored. Ignored if flavor='seurat_v3'." - info: null - default: - - 3.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--min_disp" - description: "If n_top_genes is defined, this and all other cutoffs for the means\ - \ and the normalized dispersions are ignored. Ignored if flavor='seurat_v3'." - info: null - default: - - 0.5 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--max_disp" - description: "If n_top_genes is defined, this and all other cutoffs for the means\ - \ and the normalized dispersions are ignored. Ignored if flavor='seurat_v3'.\ - \ Default is +inf." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--span" - description: "The fraction of the data (cells) used when estimating the variance\ - \ in the loess model fit if flavor='seurat_v3'." - info: null - default: - - 0.3 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--n_bins" - description: "Number of bins for binning the mean gene expression. Normalization\ - \ is done with respect to each bin. If just a single gene falls into a bin,\ - \ the normalized dispersion is artificially set to 1." - info: null - default: - - 20 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_batch_key" - description: "If specified, highly-variable genes are selected within each batch\ - \ separately and merged. This simple \nprocess avoids the selection of batch-specific\ - \ genes and acts as a lightweight batch correction method. \nFor all flavors,\ - \ genes are first sorted by how many batches they are a HVG. For dispersion-based\ - \ flavors \nties are broken by normalized dispersion. If flavor = 'seurat_v3',\ - \ ties are broken by the median (across\nbatches) rank based on within-batch\ - \ normalized variance.\n" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Annotate highly variable genes [Satija15] [Zheng17] [Stuart19].\n\n\ - Expects logarithmized data, except when flavor='seurat_v3' in which count data\ - \ is expected.\n\nDepending on flavor, this reproduces the R-implementations of\ - \ Seurat [Satija15], Cell Ranger [Zheng17], and Seurat v3 [Stuart19].\n\nFor the\ - \ dispersion-based methods ([Satija15] and [Zheng17]), the normalized dispersion\ - \ is obtained by scaling with the mean and standard deviation of the dispersions\ - \ for genes falling into a given bin for mean expression of genes. This means\ - \ that for each bin of mean expression, highly variable genes are selected.\n\n\ - For [Stuart19], a normalized variance for each gene is computed. First, the data\ - \ are standardized (i.e., z-score normalization per feature) with a regularized\ - \ standard deviation. Next, the normalized variance is computed as the variance\ - \ of each gene after the transformation. Genes are ranked by the normalized variance.\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.9" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "scanpy~=1.9.5" - - "scikit-misc" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "singlecpu" - - "lowmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/filter/filter_with_hvg/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/filter/filter_with_hvg" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/filter/filter_with_hvg/filter_with_hvg" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/filter/filter_with_hvg/filter_with_hvg b/target/docker/filter/filter_with_hvg/filter_with_hvg deleted file mode 100755 index b0031c94d3a..00000000000 --- a/target/docker/filter/filter_with_hvg/filter_with_hvg +++ /dev/null @@ -1,1407 +0,0 @@ -#!/usr/bin/env bash - -# filter_with_hvg 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Dries De Maeyer (contributor) -# * Robrecht Cannoodt (maintainer, contributor) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="filter_with_hvg" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "filter_with_hvg 0.12.3" - echo "" - echo "Annotate highly variable genes [Satija15] [Zheng17] [Stuart19]." - echo "" - echo "Expects logarithmized data, except when flavor='seurat_v3' in which count data" - echo "is expected." - echo "" - echo "Depending on flavor, this reproduces the R-implementations of Seurat [Satija15]," - echo "Cell Ranger [Zheng17], and Seurat v3 [Stuart19]." - echo "" - echo "For the dispersion-based methods ([Satija15] and [Zheng17]), the normalized" - echo "dispersion is obtained by scaling with the mean and standard deviation of the" - echo "dispersions for genes falling into a given bin for mean expression of genes." - echo "This means that for each bin of mean expression, highly variable genes are" - echo "selected." - echo "" - echo "For [Stuart19], a normalized variance for each gene is computed. First, the data" - echo "are standardized (i.e., z-score normalization per feature) with a regularized" - echo "standard deviation. Next, the normalized variance is computed as the variance of" - echo "each gene after the transformation. Genes are ranked by the normalized variance." - echo "" - echo "Arguments:" - echo " --input" - echo " type: file, required parameter, file must exist" - echo " example: input.h5mu" - echo " Input h5mu file" - echo "" - echo " --modality" - echo " type: string" - echo " default: rna" - echo "" - echo " --layer" - echo " type: string" - echo " use adata.layers[layer] for expression values instead of adata.X." - echo "" - echo " --output" - echo " type: file, output, file must exist" - echo " example: output.h5mu" - echo " Output h5mu file." - echo "" - echo " --output_compression" - echo " type: string" - echo " example: gzip" - echo " choices: [ gzip, lzf ]" - echo " The compression format to be used on the output h5mu object." - echo "" - echo " --var_name_filter" - echo " type: string" - echo " default: filter_with_hvg" - echo " In which .var slot to store a boolean array corresponding to which" - echo " observations should be filtered out." - echo "" - echo " --varm_name" - echo " type: string" - echo " default: hvg" - echo " In which .varm slot to store additional metadata." - echo "" - echo " --do_subset" - echo " type: boolean_true" - echo " Whether to subset before storing the output." - echo "" - echo " --flavor" - echo " type: string" - echo " default: seurat" - echo " choices: [ seurat, cell_ranger, seurat_v3 ]" - echo " Choose the flavor for identifying highly variable genes. For the" - echo " dispersion based methods" - echo " in their default workflows, Seurat passes the cutoffs whereas Cell" - echo " Ranger passes n_top_genes." - echo "" - echo " --n_top_genes" - echo " type: integer" - echo " Number of highly-variable genes to keep. Mandatory if" - echo " flavor='seurat_v3'." - echo "" - echo " --min_mean" - echo " type: double" - echo " default: 0.0125" - echo " If n_top_genes is defined, this and all other cutoffs for the means and" - echo " the normalized dispersions are ignored. Ignored if flavor='seurat_v3'." - echo "" - echo " --max_mean" - echo " type: double" - echo " default: 3.0" - echo " If n_top_genes is defined, this and all other cutoffs for the means and" - echo " the normalized dispersions are ignored. Ignored if flavor='seurat_v3'." - echo "" - echo " --min_disp" - echo " type: double" - echo " default: 0.5" - echo " If n_top_genes is defined, this and all other cutoffs for the means and" - echo " the normalized dispersions are ignored. Ignored if flavor='seurat_v3'." - echo "" - echo " --max_disp" - echo " type: double" - echo " If n_top_genes is defined, this and all other cutoffs for the means and" - echo " the normalized dispersions are ignored. Ignored if flavor='seurat_v3'." - echo " Default is +inf." - echo "" - echo " --span" - echo " type: double" - echo " default: 0.3" - echo " The fraction of the data (cells) used when estimating the variance in" - echo " the loess model fit if flavor='seurat_v3'." - echo "" - echo " --n_bins" - echo " type: integer" - echo " default: 20" - echo " Number of bins for binning the mean gene expression. Normalization is" - echo " done with respect to each bin. If just a single gene falls into a bin," - echo " the normalized dispersion is artificially set to 1." - echo "" - echo " --obs_batch_key" - echo " type: string" - echo " If specified, highly-variable genes are selected within each batch" - echo " separately and merged. This simple" - echo " process avoids the selection of batch-specific genes and acts as a" - echo " lightweight batch correction method." - echo " For all flavors, genes are first sorted by how many batches they are a" - echo " HVG. For dispersion-based flavors" - echo " ties are broken by normalized dispersion. If flavor = 'seurat_v3', ties" - echo " are broken by the median (across" - echo " batches) rank based on within-batch normalized variance." -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM python:3.9 - -ENTRYPOINT [] - - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "scanpy~=1.9.5" "scikit-misc" - -LABEL org.opencontainers.image.authors="Dries De Maeyer, Robrecht Cannoodt" -LABEL org.opencontainers.image.description="Companion container for running component filter filter_with_hvg" -LABEL org.opencontainers.image.created="2024-01-25T10:13:58Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-filter_with_hvg-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "filter_with_hvg 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --modality) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality=*) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --layer) - [ -n "$VIASH_PAR_LAYER" ] && ViashError Bad arguments for option \'--layer\': \'$VIASH_PAR_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LAYER="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --layer. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --layer=*) - [ -n "$VIASH_PAR_LAYER" ] && ViashError Bad arguments for option \'--layer=*\': \'$VIASH_PAR_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LAYER=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output_compression) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression=*) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --var_name_filter) - [ -n "$VIASH_PAR_VAR_NAME_FILTER" ] && ViashError Bad arguments for option \'--var_name_filter\': \'$VIASH_PAR_VAR_NAME_FILTER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_VAR_NAME_FILTER="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --var_name_filter. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --var_name_filter=*) - [ -n "$VIASH_PAR_VAR_NAME_FILTER" ] && ViashError Bad arguments for option \'--var_name_filter=*\': \'$VIASH_PAR_VAR_NAME_FILTER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_VAR_NAME_FILTER=$(ViashRemoveFlags "$1") - shift 1 - ;; - --varm_name) - [ -n "$VIASH_PAR_VARM_NAME" ] && ViashError Bad arguments for option \'--varm_name\': \'$VIASH_PAR_VARM_NAME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_VARM_NAME="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --varm_name. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --varm_name=*) - [ -n "$VIASH_PAR_VARM_NAME" ] && ViashError Bad arguments for option \'--varm_name=*\': \'$VIASH_PAR_VARM_NAME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_VARM_NAME=$(ViashRemoveFlags "$1") - shift 1 - ;; - --do_subset) - [ -n "$VIASH_PAR_DO_SUBSET" ] && ViashError Bad arguments for option \'--do_subset\': \'$VIASH_PAR_DO_SUBSET\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_DO_SUBSET=true - shift 1 - ;; - --flavor) - [ -n "$VIASH_PAR_FLAVOR" ] && ViashError Bad arguments for option \'--flavor\': \'$VIASH_PAR_FLAVOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_FLAVOR="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --flavor. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --flavor=*) - [ -n "$VIASH_PAR_FLAVOR" ] && ViashError Bad arguments for option \'--flavor=*\': \'$VIASH_PAR_FLAVOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_FLAVOR=$(ViashRemoveFlags "$1") - shift 1 - ;; - --n_top_genes) - [ -n "$VIASH_PAR_N_TOP_GENES" ] && ViashError Bad arguments for option \'--n_top_genes\': \'$VIASH_PAR_N_TOP_GENES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_N_TOP_GENES="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --n_top_genes. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --n_top_genes=*) - [ -n "$VIASH_PAR_N_TOP_GENES" ] && ViashError Bad arguments for option \'--n_top_genes=*\': \'$VIASH_PAR_N_TOP_GENES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_N_TOP_GENES=$(ViashRemoveFlags "$1") - shift 1 - ;; - --min_mean) - [ -n "$VIASH_PAR_MIN_MEAN" ] && ViashError Bad arguments for option \'--min_mean\': \'$VIASH_PAR_MIN_MEAN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_MEAN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_mean. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --min_mean=*) - [ -n "$VIASH_PAR_MIN_MEAN" ] && ViashError Bad arguments for option \'--min_mean=*\': \'$VIASH_PAR_MIN_MEAN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_MEAN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --max_mean) - [ -n "$VIASH_PAR_MAX_MEAN" ] && ViashError Bad arguments for option \'--max_mean\': \'$VIASH_PAR_MAX_MEAN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAX_MEAN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --max_mean. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --max_mean=*) - [ -n "$VIASH_PAR_MAX_MEAN" ] && ViashError Bad arguments for option \'--max_mean=*\': \'$VIASH_PAR_MAX_MEAN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAX_MEAN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --min_disp) - [ -n "$VIASH_PAR_MIN_DISP" ] && ViashError Bad arguments for option \'--min_disp\': \'$VIASH_PAR_MIN_DISP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_DISP="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_disp. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --min_disp=*) - [ -n "$VIASH_PAR_MIN_DISP" ] && ViashError Bad arguments for option \'--min_disp=*\': \'$VIASH_PAR_MIN_DISP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_DISP=$(ViashRemoveFlags "$1") - shift 1 - ;; - --max_disp) - [ -n "$VIASH_PAR_MAX_DISP" ] && ViashError Bad arguments for option \'--max_disp\': \'$VIASH_PAR_MAX_DISP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAX_DISP="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --max_disp. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --max_disp=*) - [ -n "$VIASH_PAR_MAX_DISP" ] && ViashError Bad arguments for option \'--max_disp=*\': \'$VIASH_PAR_MAX_DISP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAX_DISP=$(ViashRemoveFlags "$1") - shift 1 - ;; - --span) - [ -n "$VIASH_PAR_SPAN" ] && ViashError Bad arguments for option \'--span\': \'$VIASH_PAR_SPAN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SPAN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --span. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --span=*) - [ -n "$VIASH_PAR_SPAN" ] && ViashError Bad arguments for option \'--span=*\': \'$VIASH_PAR_SPAN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SPAN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --n_bins) - [ -n "$VIASH_PAR_N_BINS" ] && ViashError Bad arguments for option \'--n_bins\': \'$VIASH_PAR_N_BINS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_N_BINS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --n_bins. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --n_bins=*) - [ -n "$VIASH_PAR_N_BINS" ] && ViashError Bad arguments for option \'--n_bins=*\': \'$VIASH_PAR_N_BINS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_N_BINS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --obs_batch_key) - [ -n "$VIASH_PAR_OBS_BATCH_KEY" ] && ViashError Bad arguments for option \'--obs_batch_key\': \'$VIASH_PAR_OBS_BATCH_KEY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBS_BATCH_KEY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_batch_key. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obs_batch_key=*) - [ -n "$VIASH_PAR_OBS_BATCH_KEY" ] && ViashError Bad arguments for option \'--obs_batch_key=*\': \'$VIASH_PAR_OBS_BATCH_KEY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBS_BATCH_KEY=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/filter_filter_with_hvg:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/filter_filter_with_hvg:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/filter_filter_with_hvg:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/filter_filter_with_hvg:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_MODALITY+x} ]; then - VIASH_PAR_MODALITY="rna" -fi -if [ -z ${VIASH_PAR_VAR_NAME_FILTER+x} ]; then - VIASH_PAR_VAR_NAME_FILTER="filter_with_hvg" -fi -if [ -z ${VIASH_PAR_VARM_NAME+x} ]; then - VIASH_PAR_VARM_NAME="hvg" -fi -if [ -z ${VIASH_PAR_DO_SUBSET+x} ]; then - VIASH_PAR_DO_SUBSET="false" -fi -if [ -z ${VIASH_PAR_FLAVOR+x} ]; then - VIASH_PAR_FLAVOR="seurat" -fi -if [ -z ${VIASH_PAR_MIN_MEAN+x} ]; then - VIASH_PAR_MIN_MEAN="0.0125" -fi -if [ -z ${VIASH_PAR_MAX_MEAN+x} ]; then - VIASH_PAR_MAX_MEAN="3.0" -fi -if [ -z ${VIASH_PAR_MIN_DISP+x} ]; then - VIASH_PAR_MIN_DISP="0.5" -fi -if [ -z ${VIASH_PAR_SPAN+x} ]; then - VIASH_PAR_SPAN="0.3" -fi -if [ -z ${VIASH_PAR_N_BINS+x} ]; then - VIASH_PAR_N_BINS="20" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_DO_SUBSET" ]]; then - if ! [[ "$VIASH_PAR_DO_SUBSET" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--do_subset' has to be a boolean_true. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_N_TOP_GENES" ]]; then - if ! [[ "$VIASH_PAR_N_TOP_GENES" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--n_top_genes' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_MIN_MEAN" ]]; then - if ! [[ "$VIASH_PAR_MIN_MEAN" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--min_mean' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_MAX_MEAN" ]]; then - if ! [[ "$VIASH_PAR_MAX_MEAN" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--max_mean' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_MIN_DISP" ]]; then - if ! [[ "$VIASH_PAR_MIN_DISP" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--min_disp' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_MAX_DISP" ]]; then - if ! [[ "$VIASH_PAR_MAX_DISP" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--max_disp' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SPAN" ]]; then - if ! [[ "$VIASH_PAR_SPAN" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--span' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_N_BINS" ]]; then - if ! [[ "$VIASH_PAR_N_BINS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--n_bins' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then - VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then - ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -if [ ! -z "$VIASH_PAR_FLAVOR" ]; then - VIASH_PAR_FLAVOR_CHOICES=("seurat:cell_ranger:seurat_v3") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_FLAVOR_CHOICES[*]}:" =~ ":$VIASH_PAR_FLAVOR:" ]]; then - ViashError '--flavor' specified value of \'$VIASH_PAR_FLAVOR\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/filter_filter_with_hvg:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/filter_filter_with_hvg:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/filter_filter_with_hvg:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-filter_with_hvg-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -import scanpy as sc -import mudata as mu -import numpy as np -import sys -import re - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'layer': $( if [ ! -z ${VIASH_PAR_LAYER+x} ]; then echo "r'${VIASH_PAR_LAYER//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'var_name_filter': $( if [ ! -z ${VIASH_PAR_VAR_NAME_FILTER+x} ]; then echo "r'${VIASH_PAR_VAR_NAME_FILTER//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'varm_name': $( if [ ! -z ${VIASH_PAR_VARM_NAME+x} ]; then echo "r'${VIASH_PAR_VARM_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'do_subset': $( if [ ! -z ${VIASH_PAR_DO_SUBSET+x} ]; then echo "r'${VIASH_PAR_DO_SUBSET//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), - 'flavor': $( if [ ! -z ${VIASH_PAR_FLAVOR+x} ]; then echo "r'${VIASH_PAR_FLAVOR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'n_top_genes': $( if [ ! -z ${VIASH_PAR_N_TOP_GENES+x} ]; then echo "int(r'${VIASH_PAR_N_TOP_GENES//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'min_mean': $( if [ ! -z ${VIASH_PAR_MIN_MEAN+x} ]; then echo "float(r'${VIASH_PAR_MIN_MEAN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'max_mean': $( if [ ! -z ${VIASH_PAR_MAX_MEAN+x} ]; then echo "float(r'${VIASH_PAR_MAX_MEAN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'min_disp': $( if [ ! -z ${VIASH_PAR_MIN_DISP+x} ]; then echo "float(r'${VIASH_PAR_MIN_DISP//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'max_disp': $( if [ ! -z ${VIASH_PAR_MAX_DISP+x} ]; then echo "float(r'${VIASH_PAR_MAX_DISP//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'span': $( if [ ! -z ${VIASH_PAR_SPAN+x} ]; then echo "float(r'${VIASH_PAR_SPAN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'n_bins': $( if [ ! -z ${VIASH_PAR_N_BINS+x} ]; then echo "int(r'${VIASH_PAR_N_BINS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'obs_batch_key': $( if [ ! -z ${VIASH_PAR_OBS_BATCH_KEY+x} ]; then echo "r'${VIASH_PAR_OBS_BATCH_KEY//\'/\'\"\'\"r\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -## VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -mdata = mu.read_h5mu(par["input"]) -mdata.var_names_make_unique() - -mod = par['modality'] -logger.info(f"Processing modality '%s'", mod) -data = mdata.mod[mod] - -# Workaround for issue -# https://github.com/scverse/scanpy/issues/2239 -# https://github.com/scverse/scanpy/issues/2181 -if par['flavor'] != "seurat_v3": - # This component requires log normalized data when flavor is not seurat_v3 - # We assume that the data is correctly normalized but scanpy will look at - # .uns to check the transformations performed on the data. - # To prevent scanpy from automatically tranforming the counts when they are - # already transformed, we set the appropriate values to .uns. - if 'log1p' not in data.uns: - logger.warning("When flavor is not set to 'seurat_v3', " - "the input data for this component must be log-transformed. " - "However, the 'log1p' dictionairy in .uns has not been set. " - "This is fine if you did not log transform your data with scanpy." - "Otherwise, please check if you are providing log transformed " - "data using --layer.") - data.uns['log1p'] = {'base': None} - elif 'log1p' in data.uns and 'base' not in data.uns['log1p']: - data.uns['log1p']['base'] = None - -logger.info("\\tUnfiltered data: %s", data) - -logger.info("\\tComputing hvg") -# construct arguments -hvg_args = { - 'adata': data, - 'n_top_genes': par["n_top_genes"], - 'min_mean': par["min_mean"], - 'max_mean': par["max_mean"], - 'min_disp': par["min_disp"], - 'span': par["span"], - 'n_bins': par["n_bins"], - 'flavor': par["flavor"], - 'subset': False, - 'inplace': False, - 'layer': par['layer'], -} - -optional_parameters = { - "max_disp": "max_disp", - "obs_batch_key": "batch_key", - "n_top_genes": "n_top_genes" -} -# only add parameter if it's passed -for par_name, dest_name in optional_parameters.items(): - if par.get(par_name): - hvg_args[dest_name] = par[par_name] - -# scanpy does not do this check, although it is stated in the documentation -if par['flavor'] == "seurat_v3" and not par['n_top_genes']: - raise ValueError("When flavor is set to 'seurat_v3', you are required to set 'n_top_genes'.") - -if par["layer"] and not par['layer'] in data.layers: - raise ValueError(f"Layer '{par['layer']}' not found in layers for modality '{mod}'. " - f"Found layers are: {','.join(data.layers)}") -# call function -try: - out = sc.pp.highly_variable_genes(**hvg_args) - if par['obs_batch_key'] is not None: - assert (out.index == data.var.index).all(), "Expected output index values to be equivalent to the input index" -except ValueError as err: - if str(err) == "cannot specify integer \`bins\` when input data contains infinity": - err.args = ("Cannot specify integer \`bins\` when input data contains infinity. " - "Perhaps input data has not been log normalized?",) - if re.search("Bin edges must be unique:", str(err)): - raise RuntimeError("Scanpy failed to calculate hvg. The error " - "returned by scanpy (see above) could be the " - "result from trying to use this component on unfiltered data.") from err - raise err - -out.index = data.var.index -logger.info("\\tStoring output into .var") -if par.get("var_name_filter", None) is not None: - data.var[par["var_name_filter"]] = out["highly_variable"] - -if par.get("varm_name", None) is not None and 'mean_bin' in out: - # drop mean_bin as mudata/anndata doesn't support tuples - data.varm[par["varm_name"]] = out.drop("mean_bin", axis=1) - -if par["do_subset"]: - keep_feats = np.ravel(data.var[par["var_name_filter"]]) - mdata.mod[mod] = data[:,keep_feats] - -logger.info("Writing h5mu to file") -mdata.write_h5mu(par["output"], compression=par["output_compression"]) -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/filter/filter_with_hvg/setup_logger.py b/target/docker/filter/filter_with_hvg/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/docker/filter/filter_with_hvg/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/docker/filter/filter_with_scrublet/.config.vsh.yaml b/target/docker/filter/filter_with_scrublet/.config.vsh.yaml deleted file mode 100644 index 3412282dd29..00000000000 --- a/target/docker/filter/filter_with_scrublet/.config.vsh.yaml +++ /dev/null @@ -1,304 +0,0 @@ -functionality: - name: "filter_with_scrublet" - namespace: "filter" - version: "0.12.3" - authors: - - name: "Dries De Maeyer" - roles: - - "contributor" - info: - role: "Core Team Member" - links: - email: "ddemaeyer@gmail.com" - github: "ddemaeyer" - linkedin: "dries-de-maeyer-b46a814" - organizations: - - name: "Janssen Pharmaceuticals" - href: "https://www.janssen.com" - role: "Principal Scientist" - - name: "Robrecht Cannoodt" - roles: - - "maintainer" - - "contributor" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - arguments: - - type: "file" - name: "--input" - description: "Input h5mu file" - info: null - example: - - "input.h5mu" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - description: "Output h5mu file." - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_name_filter" - description: "In which .obs slot to store a boolean array corresponding to which\ - \ observations should be filtered out." - info: null - default: - - "filter_with_scrublet" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--do_subset" - description: "Whether to subset before storing the output." - info: null - direction: "input" - dest: "par" - - type: "string" - name: "--obs_name_doublet_score" - description: "Name of the doublet scores column in the obs slot of the returned\ - \ object." - info: null - default: - - "scrublet_doublet_score" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--min_counts" - description: "The number of minimal UMI counts per cell that have to be present\ - \ for initial cell detection." - info: null - default: - - 2 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--min_cells" - description: "The number of cells in which UMIs for a gene were detected." - info: null - default: - - 3 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--min_gene_variablity_percent" - description: "Used for gene filtering prior to PCA. Keep the most highly variable\ - \ genes (in the top min_gene_variability_pctl percentile), as measured by the\ - \ v-statistic [Klein et al., Cell 2015]." - info: null - default: - - 85.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--num_pca_components" - description: "Number of principal components to use during PCA dimensionality\ - \ reduction." - info: null - default: - - 30 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--distance_metric" - description: "The distance metric used for computing similarities." - info: null - default: - - "euclidean" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--allow_automatic_threshold_detection_fail" - description: "When scrublet fails to automatically determine the double score\ - \ threshold, \nallow the component to continue and set the output columns to\ - \ NA.\n" - info: null - direction: "input" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Doublet detection using the Scrublet method (Wolock, Lopez and Klein,\ - \ 2019).\nThe method tests for potential doublets by using the expression profiles\ - \ of\ncells to generate synthetic potential doubles which are tested against cells.\ - \ \nThe method returns a \"doublet score\" on which it calls for potential doublets.\n\ - \nFor the source code please visit https://github.com/AllonKleinLab/scrublet.\n\ - \nFor 10x we expect the doublet rates to be:\n Multiplet Rate (%) - # of Cells\ - \ Loaded - # of Cells Recovered\n ~0.4% ~800 ~500\n ~0.8% ~1,600 ~1,000\n ~1.6%\ - \ ~3,200 ~2,000\n ~2.3% ~4,800 ~3,000\n ~3.1% ~6,400 ~4,000\n ~3.9% ~8,000\ - \ ~5,000\n ~4.6% ~9,600 ~6,000\n ~5.4% ~11,200 ~7,000\n ~6.1% ~12,800 ~8,000\n\ - \ ~6.9% ~14,400 ~9,000\n ~7.6% ~16,000 ~10,000\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - run_args: - - "--env NUMBA_CACHE_DIR=/tmp" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - - "build-essential" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "scanpy~=1.9.5" - - "scrublet" - - "annoy==1.16.3" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highcpu" - - "midmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/filter/filter_with_scrublet/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/filter/filter_with_scrublet" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/filter/filter_with_scrublet/filter_with_scrublet" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/filter/filter_with_scrublet/filter_with_scrublet b/target/docker/filter/filter_with_scrublet/filter_with_scrublet deleted file mode 100755 index dfcfe09f054..00000000000 --- a/target/docker/filter/filter_with_scrublet/filter_with_scrublet +++ /dev/null @@ -1,1260 +0,0 @@ -#!/usr/bin/env bash - -# filter_with_scrublet 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Dries De Maeyer (contributor) -# * Robrecht Cannoodt (maintainer, contributor) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="filter_with_scrublet" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "filter_with_scrublet 0.12.3" - echo "" - echo "Doublet detection using the Scrublet method (Wolock, Lopez and Klein, 2019)." - echo "The method tests for potential doublets by using the expression profiles of" - echo "cells to generate synthetic potential doubles which are tested against cells." - echo "The method returns a \"doublet score\" on which it calls for potential doublets." - echo "" - echo "For the source code please visit https://github.com/AllonKleinLab/scrublet." - echo "" - echo "For 10x we expect the doublet rates to be:" - echo " Multiplet Rate (%) - # of Cells Loaded - # of Cells Recovered" - echo " ~0.4% ~800 ~500" - echo " ~0.8% ~1,600 ~1,000" - echo " ~1.6% ~3,200 ~2,000" - echo " ~2.3% ~4,800 ~3,000" - echo " ~3.1% ~6,400 ~4,000" - echo " ~3.9% ~8,000 ~5,000" - echo " ~4.6% ~9,600 ~6,000" - echo " ~5.4% ~11,200 ~7,000" - echo " ~6.1% ~12,800 ~8,000" - echo " ~6.9% ~14,400 ~9,000" - echo " ~7.6% ~16,000 ~10,000" - echo "" - echo "Arguments:" - echo " --input" - echo " type: file, required parameter, file must exist" - echo " example: input.h5mu" - echo " Input h5mu file" - echo "" - echo " --modality" - echo " type: string" - echo " default: rna" - echo "" - echo " --output" - echo " type: file, output, file must exist" - echo " example: output.h5mu" - echo " Output h5mu file." - echo "" - echo " --output_compression" - echo " type: string" - echo " example: gzip" - echo " choices: [ gzip, lzf ]" - echo " The compression format to be used on the output h5mu object." - echo "" - echo " --obs_name_filter" - echo " type: string" - echo " default: filter_with_scrublet" - echo " In which .obs slot to store a boolean array corresponding to which" - echo " observations should be filtered out." - echo "" - echo " --do_subset" - echo " type: boolean_true" - echo " Whether to subset before storing the output." - echo "" - echo " --obs_name_doublet_score" - echo " type: string" - echo " default: scrublet_doublet_score" - echo " Name of the doublet scores column in the obs slot of the returned" - echo " object." - echo "" - echo " --min_counts" - echo " type: integer" - echo " default: 2" - echo " The number of minimal UMI counts per cell that have to be present for" - echo " initial cell detection." - echo "" - echo " --min_cells" - echo " type: integer" - echo " default: 3" - echo " The number of cells in which UMIs for a gene were detected." - echo "" - echo " --min_gene_variablity_percent" - echo " type: double" - echo " default: 85.0" - echo " Used for gene filtering prior to PCA. Keep the most highly variable" - echo " genes (in the top min_gene_variability_pctl percentile), as measured by" - echo " the v-statistic [Klein et al., Cell 2015]." - echo "" - echo " --num_pca_components" - echo " type: integer" - echo " default: 30" - echo " Number of principal components to use during PCA dimensionality" - echo " reduction." - echo "" - echo " --distance_metric" - echo " type: string" - echo " default: euclidean" - echo " The distance metric used for computing similarities." - echo "" - echo " --allow_automatic_threshold_detection_fail" - echo " type: boolean_true" - echo " When scrublet fails to automatically determine the double score" - echo " threshold," - echo " allow the component to continue and set the output columns to NA." -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM python:3.10-slim - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y procps build-essential && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "scanpy~=1.9.5" "scrublet" "annoy==1.16.3" - -LABEL org.opencontainers.image.authors="Dries De Maeyer, Robrecht Cannoodt" -LABEL org.opencontainers.image.description="Companion container for running component filter filter_with_scrublet" -LABEL org.opencontainers.image.created="2024-01-25T10:13:58Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-filter_with_scrublet-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "filter_with_scrublet 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --modality) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality=*) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output_compression) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression=*) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --obs_name_filter) - [ -n "$VIASH_PAR_OBS_NAME_FILTER" ] && ViashError Bad arguments for option \'--obs_name_filter\': \'$VIASH_PAR_OBS_NAME_FILTER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBS_NAME_FILTER="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_name_filter. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obs_name_filter=*) - [ -n "$VIASH_PAR_OBS_NAME_FILTER" ] && ViashError Bad arguments for option \'--obs_name_filter=*\': \'$VIASH_PAR_OBS_NAME_FILTER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBS_NAME_FILTER=$(ViashRemoveFlags "$1") - shift 1 - ;; - --do_subset) - [ -n "$VIASH_PAR_DO_SUBSET" ] && ViashError Bad arguments for option \'--do_subset\': \'$VIASH_PAR_DO_SUBSET\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_DO_SUBSET=true - shift 1 - ;; - --obs_name_doublet_score) - [ -n "$VIASH_PAR_OBS_NAME_DOUBLET_SCORE" ] && ViashError Bad arguments for option \'--obs_name_doublet_score\': \'$VIASH_PAR_OBS_NAME_DOUBLET_SCORE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBS_NAME_DOUBLET_SCORE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_name_doublet_score. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obs_name_doublet_score=*) - [ -n "$VIASH_PAR_OBS_NAME_DOUBLET_SCORE" ] && ViashError Bad arguments for option \'--obs_name_doublet_score=*\': \'$VIASH_PAR_OBS_NAME_DOUBLET_SCORE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBS_NAME_DOUBLET_SCORE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --min_counts) - [ -n "$VIASH_PAR_MIN_COUNTS" ] && ViashError Bad arguments for option \'--min_counts\': \'$VIASH_PAR_MIN_COUNTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_COUNTS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_counts. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --min_counts=*) - [ -n "$VIASH_PAR_MIN_COUNTS" ] && ViashError Bad arguments for option \'--min_counts=*\': \'$VIASH_PAR_MIN_COUNTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_COUNTS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --min_cells) - [ -n "$VIASH_PAR_MIN_CELLS" ] && ViashError Bad arguments for option \'--min_cells\': \'$VIASH_PAR_MIN_CELLS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_CELLS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_cells. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --min_cells=*) - [ -n "$VIASH_PAR_MIN_CELLS" ] && ViashError Bad arguments for option \'--min_cells=*\': \'$VIASH_PAR_MIN_CELLS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_CELLS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --min_gene_variablity_percent) - [ -n "$VIASH_PAR_MIN_GENE_VARIABLITY_PERCENT" ] && ViashError Bad arguments for option \'--min_gene_variablity_percent\': \'$VIASH_PAR_MIN_GENE_VARIABLITY_PERCENT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_GENE_VARIABLITY_PERCENT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_gene_variablity_percent. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --min_gene_variablity_percent=*) - [ -n "$VIASH_PAR_MIN_GENE_VARIABLITY_PERCENT" ] && ViashError Bad arguments for option \'--min_gene_variablity_percent=*\': \'$VIASH_PAR_MIN_GENE_VARIABLITY_PERCENT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_GENE_VARIABLITY_PERCENT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --num_pca_components) - [ -n "$VIASH_PAR_NUM_PCA_COMPONENTS" ] && ViashError Bad arguments for option \'--num_pca_components\': \'$VIASH_PAR_NUM_PCA_COMPONENTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_NUM_PCA_COMPONENTS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --num_pca_components. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --num_pca_components=*) - [ -n "$VIASH_PAR_NUM_PCA_COMPONENTS" ] && ViashError Bad arguments for option \'--num_pca_components=*\': \'$VIASH_PAR_NUM_PCA_COMPONENTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_NUM_PCA_COMPONENTS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --distance_metric) - [ -n "$VIASH_PAR_DISTANCE_METRIC" ] && ViashError Bad arguments for option \'--distance_metric\': \'$VIASH_PAR_DISTANCE_METRIC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_DISTANCE_METRIC="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --distance_metric. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --distance_metric=*) - [ -n "$VIASH_PAR_DISTANCE_METRIC" ] && ViashError Bad arguments for option \'--distance_metric=*\': \'$VIASH_PAR_DISTANCE_METRIC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_DISTANCE_METRIC=$(ViashRemoveFlags "$1") - shift 1 - ;; - --allow_automatic_threshold_detection_fail) - [ -n "$VIASH_PAR_ALLOW_AUTOMATIC_THRESHOLD_DETECTION_FAIL" ] && ViashError Bad arguments for option \'--allow_automatic_threshold_detection_fail\': \'$VIASH_PAR_ALLOW_AUTOMATIC_THRESHOLD_DETECTION_FAIL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALLOW_AUTOMATIC_THRESHOLD_DETECTION_FAIL=true - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/filter_filter_with_scrublet:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/filter_filter_with_scrublet:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm --env NUMBA_CACHE_DIR=/tmp -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/filter_filter_with_scrublet:0.12.0'" - docker run --entrypoint=bash -i --rm --env NUMBA_CACHE_DIR=/tmp -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/filter_filter_with_scrublet:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_MODALITY+x} ]; then - VIASH_PAR_MODALITY="rna" -fi -if [ -z ${VIASH_PAR_OBS_NAME_FILTER+x} ]; then - VIASH_PAR_OBS_NAME_FILTER="filter_with_scrublet" -fi -if [ -z ${VIASH_PAR_DO_SUBSET+x} ]; then - VIASH_PAR_DO_SUBSET="false" -fi -if [ -z ${VIASH_PAR_OBS_NAME_DOUBLET_SCORE+x} ]; then - VIASH_PAR_OBS_NAME_DOUBLET_SCORE="scrublet_doublet_score" -fi -if [ -z ${VIASH_PAR_MIN_COUNTS+x} ]; then - VIASH_PAR_MIN_COUNTS="2" -fi -if [ -z ${VIASH_PAR_MIN_CELLS+x} ]; then - VIASH_PAR_MIN_CELLS="3" -fi -if [ -z ${VIASH_PAR_MIN_GENE_VARIABLITY_PERCENT+x} ]; then - VIASH_PAR_MIN_GENE_VARIABLITY_PERCENT="85.0" -fi -if [ -z ${VIASH_PAR_NUM_PCA_COMPONENTS+x} ]; then - VIASH_PAR_NUM_PCA_COMPONENTS="30" -fi -if [ -z ${VIASH_PAR_DISTANCE_METRIC+x} ]; then - VIASH_PAR_DISTANCE_METRIC="euclidean" -fi -if [ -z ${VIASH_PAR_ALLOW_AUTOMATIC_THRESHOLD_DETECTION_FAIL+x} ]; then - VIASH_PAR_ALLOW_AUTOMATIC_THRESHOLD_DETECTION_FAIL="false" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_DO_SUBSET" ]]; then - if ! [[ "$VIASH_PAR_DO_SUBSET" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--do_subset' has to be a boolean_true. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_MIN_COUNTS" ]]; then - if ! [[ "$VIASH_PAR_MIN_COUNTS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--min_counts' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_MIN_CELLS" ]]; then - if ! [[ "$VIASH_PAR_MIN_CELLS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--min_cells' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_MIN_GENE_VARIABLITY_PERCENT" ]]; then - if ! [[ "$VIASH_PAR_MIN_GENE_VARIABLITY_PERCENT" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--min_gene_variablity_percent' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_NUM_PCA_COMPONENTS" ]]; then - if ! [[ "$VIASH_PAR_NUM_PCA_COMPONENTS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--num_pca_components' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_ALLOW_AUTOMATIC_THRESHOLD_DETECTION_FAIL" ]]; then - if ! [[ "$VIASH_PAR_ALLOW_AUTOMATIC_THRESHOLD_DETECTION_FAIL" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--allow_automatic_threshold_detection_fail' has to be a boolean_true. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then - VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then - ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm --env NUMBA_CACHE_DIR=/tmp ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/filter_filter_with_scrublet:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm --env NUMBA_CACHE_DIR=/tmp ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/filter_filter_with_scrublet:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm --env NUMBA_CACHE_DIR=/tmp ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/filter_filter_with_scrublet:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-filter_with_scrublet-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -import scrublet as scr -import mudata as mu -import numpy as np -import sys -import pandas as pd - -### VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'obs_name_filter': $( if [ ! -z ${VIASH_PAR_OBS_NAME_FILTER+x} ]; then echo "r'${VIASH_PAR_OBS_NAME_FILTER//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'do_subset': $( if [ ! -z ${VIASH_PAR_DO_SUBSET+x} ]; then echo "r'${VIASH_PAR_DO_SUBSET//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), - 'obs_name_doublet_score': $( if [ ! -z ${VIASH_PAR_OBS_NAME_DOUBLET_SCORE+x} ]; then echo "r'${VIASH_PAR_OBS_NAME_DOUBLET_SCORE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'min_counts': $( if [ ! -z ${VIASH_PAR_MIN_COUNTS+x} ]; then echo "int(r'${VIASH_PAR_MIN_COUNTS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'min_cells': $( if [ ! -z ${VIASH_PAR_MIN_CELLS+x} ]; then echo "int(r'${VIASH_PAR_MIN_CELLS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'min_gene_variablity_percent': $( if [ ! -z ${VIASH_PAR_MIN_GENE_VARIABLITY_PERCENT+x} ]; then echo "float(r'${VIASH_PAR_MIN_GENE_VARIABLITY_PERCENT//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'num_pca_components': $( if [ ! -z ${VIASH_PAR_NUM_PCA_COMPONENTS+x} ]; then echo "int(r'${VIASH_PAR_NUM_PCA_COMPONENTS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'distance_metric': $( if [ ! -z ${VIASH_PAR_DISTANCE_METRIC+x} ]; then echo "r'${VIASH_PAR_DISTANCE_METRIC//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'allow_automatic_threshold_detection_fail': $( if [ ! -z ${VIASH_PAR_ALLOW_AUTOMATIC_THRESHOLD_DETECTION_FAIL+x} ]; then echo "r'${VIASH_PAR_ALLOW_AUTOMATIC_THRESHOLD_DETECTION_FAIL//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -### VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -logger.info("Reading %s.", par['input']) -mdata = mu.read_h5mu(par["input"]) - -mod = par["modality"] -logger.info("Processing modality '%s'.", mod) -data = mdata.mod[mod] - -logger.info("\\tRunning scrublet") -scrub = scr.Scrublet(data.X) - -doublet_scores, predicted_doublets = scrub.scrub_doublets( - min_counts=par["min_counts"], - min_cells=par["min_cells"], - min_gene_variability_pctl=par["min_gene_variablity_percent"], - n_prin_comps=par["num_pca_components"], - distance_metric=par["distance_metric"], - use_approx_neighbors=False -) - -try: - keep_cells = np.invert(predicted_doublets) -except TypeError: - if par['allow_automatic_threshold_detection_fail']: - # Scrublet might not throw an error and return None if it fails to detect doublets... - logger.info("\\tScrublet could not automatically detect the doublet score threshold. Setting output columns to NA.") - keep_cells = np.nan - doublet_scores = np.nan - else: - raise RuntimeError("Scrublet could not automatically detect the doublet score threshold. " - "--allow_automatic_threshold_detection_fail can be used to ignore this failure " - "and set the corresponding output columns to NA.") - -logger.info("\\tStoring output into .obs") -if par["obs_name_doublet_score"] is not None: - data.obs[par["obs_name_doublet_score"]] = doublet_scores - data.obs[par["obs_name_doublet_score"]] = data.obs[par["obs_name_doublet_score"]].astype("float64") -if par["obs_name_filter"] is not None: - data.obs[par["obs_name_filter"]] = keep_cells - data.obs[par["obs_name_filter"]] = data.obs[par["obs_name_filter"]].astype(pd.BooleanDtype()) - -if par["do_subset"]: - if pd.api.types.is_scalar(keep_cells) and pd.isna(keep_cells): - logger.warning("Not subsetting beacuse doublets were not predicted") - else: - mdata.mod[mod] = data[keep_cells, :] - -logger.info("Writing h5mu to %s", par["output"]) -mdata.write_h5mu(par["output"], compression=par["output_compression"]) -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/filter/filter_with_scrublet/setup_logger.py b/target/docker/filter/filter_with_scrublet/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/docker/filter/filter_with_scrublet/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/docker/filter/remove_modality/.config.vsh.yaml b/target/docker/filter/remove_modality/.config.vsh.yaml deleted file mode 100644 index d09eeffbd9c..00000000000 --- a/target/docker/filter/remove_modality/.config.vsh.yaml +++ /dev/null @@ -1,171 +0,0 @@ -functionality: - name: "remove_modality" - namespace: "filter" - version: "0.12.3" - authors: - - name: "Dries Schaumont" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - arguments: - - type: "file" - name: "--input" - description: "Input h5mu file" - info: null - example: - - "input.h5mu" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - required: true - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - description: "Output h5mu file." - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - description: "Remove a modality from a .h5mu file\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.9-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "singlecpu" - - "lowmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/filter/remove_modality/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/filter/remove_modality" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/filter/remove_modality/remove_modality" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/filter/remove_modality/remove_modality b/target/docker/filter/remove_modality/remove_modality deleted file mode 100755 index 47c305cbe2c..00000000000 --- a/target/docker/filter/remove_modality/remove_modality +++ /dev/null @@ -1,972 +0,0 @@ -#!/usr/bin/env bash - -# remove_modality 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Dries Schaumont (maintainer) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="remove_modality" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "remove_modality 0.12.3" - echo "" - echo "Remove a modality from a .h5mu file" - echo "" - echo "Arguments:" - echo " --input" - echo " type: file, required parameter, file must exist" - echo " example: input.h5mu" - echo " Input h5mu file" - echo "" - echo " --modality" - echo " type: string, required parameter, multiple values allowed" - echo "" - echo " --output" - echo " type: file, output, file must exist" - echo " example: output.h5mu" - echo " Output h5mu file." - echo "" - echo " --output_compression" - echo " type: string" - echo " example: gzip" - echo " choices: [ gzip, lzf ]" - echo " The compression format to be used on the output h5mu object." -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM python:3.9-slim - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" - -LABEL org.opencontainers.image.authors="Dries Schaumont" -LABEL org.opencontainers.image.description="Companion container for running component filter remove_modality" -LABEL org.opencontainers.image.created="2024-01-25T10:13:57Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-remove_modality-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "remove_modality 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --modality) - if [ -z "$VIASH_PAR_MODALITY" ]; then - VIASH_PAR_MODALITY="$2" - else - VIASH_PAR_MODALITY="$VIASH_PAR_MODALITY:""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality=*) - if [ -z "$VIASH_PAR_MODALITY" ]; then - VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") - else - VIASH_PAR_MODALITY="$VIASH_PAR_MODALITY:"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output_compression) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression=*) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/filter_remove_modality:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/filter_remove_modality:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/filter_remove_modality:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/filter_remove_modality:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_MODALITY+x} ]; then - ViashError '--modality' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then - VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then - ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/filter_remove_modality:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/filter_remove_modality:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/filter_remove_modality:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-remove_modality-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -from mudata import read_h5mu, MuData - - -### VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'.split(':')"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -### VIASH END - - -input_mudata = read_h5mu(par['input']) -new_mods = {mod_name: mod for mod_name, mod - in input_mudata.mod.items() - if mod_name not in par['modality']} - -new_mudata = MuData(new_mods) -new_mudata.write_h5mu(filename=par["output"], compression=par["output_compression"]) -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/filter/subset_h5mu/.config.vsh.yaml b/target/docker/filter/subset_h5mu/.config.vsh.yaml deleted file mode 100644 index bfd2021f189..00000000000 --- a/target/docker/filter/subset_h5mu/.config.vsh.yaml +++ /dev/null @@ -1,187 +0,0 @@ -functionality: - name: "subset_h5mu" - namespace: "filter" - version: "0.12.3" - authors: - - name: "Dries Schaumont" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - arguments: - - type: "file" - name: "--input" - description: "Input h5mu file" - info: null - example: - - "input.h5mu" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - description: "Output h5mu file." - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--number_of_observations" - description: "Number of observations to be selected from the h5mu file." - info: null - example: - - 5 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Create a subset of a mudata file by selecting the first number of\ - \ observations\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.9-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "singlecpu" - - "lowmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/filter/subset_h5mu/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/filter/subset_h5mu" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/filter/subset_h5mu/subset_h5mu" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/filter/subset_h5mu/setup_logger.py b/target/docker/filter/subset_h5mu/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/docker/filter/subset_h5mu/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/docker/filter/subset_h5mu/subset_h5mu b/target/docker/filter/subset_h5mu/subset_h5mu deleted file mode 100755 index b43a9d3a3c1..00000000000 --- a/target/docker/filter/subset_h5mu/subset_h5mu +++ /dev/null @@ -1,994 +0,0 @@ -#!/usr/bin/env bash - -# subset_h5mu 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Dries Schaumont (maintainer) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="subset_h5mu" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "subset_h5mu 0.12.3" - echo "" - echo "Create a subset of a mudata file by selecting the first number of observations" - echo "" - echo "Arguments:" - echo " --input" - echo " type: file, required parameter, file must exist" - echo " example: input.h5mu" - echo " Input h5mu file" - echo "" - echo " --modality" - echo " type: string" - echo " default: rna" - echo "" - echo " --output" - echo " type: file, output, file must exist" - echo " example: output.h5mu" - echo " Output h5mu file." - echo "" - echo " --output_compression" - echo " type: string" - echo " example: gzip" - echo " choices: [ gzip, lzf ]" - echo " The compression format to be used on the output h5mu object." - echo "" - echo " --number_of_observations" - echo " type: integer" - echo " example: 5" - echo " Number of observations to be selected from the h5mu file." -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM python:3.9-slim - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" - -LABEL org.opencontainers.image.authors="Dries Schaumont" -LABEL org.opencontainers.image.description="Companion container for running component filter subset_h5mu" -LABEL org.opencontainers.image.created="2024-01-25T10:13:57Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-subset_h5mu-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "subset_h5mu 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --modality) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality=*) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output_compression) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression=*) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --number_of_observations) - [ -n "$VIASH_PAR_NUMBER_OF_OBSERVATIONS" ] && ViashError Bad arguments for option \'--number_of_observations\': \'$VIASH_PAR_NUMBER_OF_OBSERVATIONS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_NUMBER_OF_OBSERVATIONS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --number_of_observations. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --number_of_observations=*) - [ -n "$VIASH_PAR_NUMBER_OF_OBSERVATIONS" ] && ViashError Bad arguments for option \'--number_of_observations=*\': \'$VIASH_PAR_NUMBER_OF_OBSERVATIONS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_NUMBER_OF_OBSERVATIONS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/filter_subset_h5mu:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/filter_subset_h5mu:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/filter_subset_h5mu:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/filter_subset_h5mu:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_MODALITY+x} ]; then - VIASH_PAR_MODALITY="rna" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_NUMBER_OF_OBSERVATIONS" ]]; then - if ! [[ "$VIASH_PAR_NUMBER_OF_OBSERVATIONS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--number_of_observations' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then - VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then - ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/filter_subset_h5mu:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/filter_subset_h5mu:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/filter_subset_h5mu:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-subset_h5mu-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -import mudata - -### VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'number_of_observations': $( if [ ! -z ${VIASH_PAR_NUMBER_OF_OBSERVATIONS+x} ]; then echo "int(r'${VIASH_PAR_NUMBER_OF_OBSERVATIONS//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -### VIASH END - -if __name__ == "__main__": - # read data - data = mudata.read(par["input"]) - - # subset data - if par["modality"]: - data.mod[par["modality"]] = data.mod[par["modality"]][:par["number_of_observations"]] - else: - data = data[:par["number_of_observations"]] - - # write data - data.write_h5mu(par["output"], compression=par["output_compression"]) -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/integrate/harmonypy/.config.vsh.yaml b/target/docker/integrate/harmonypy/.config.vsh.yaml deleted file mode 100644 index ccec2aa8068..00000000000 --- a/target/docker/integrate/harmonypy/.config.vsh.yaml +++ /dev/null @@ -1,240 +0,0 @@ -functionality: - name: "harmonypy" - namespace: "integrate" - version: "0.12.3" - authors: - - name: "Dries Schaumont" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - - name: "Robrecht Cannoodt" - roles: - - "contributor" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input h5mu file" - info: null - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output h5mu file." - info: null - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obsm_input" - description: "Which .obsm slot to use as a starting PCA embedding." - info: null - default: - - "X_pca" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obsm_output" - description: "In which .obsm slot to store the resulting integrated embedding." - info: null - default: - - "X_pca_integrated" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--theta" - description: "Diversity clustering penalty parameter. Specify for each variable\ - \ in group.by.vars. theta=0 does not encourage any diversity. Larger values\ - \ of theta result in more diverse clusters." - info: null - default: - - 2.0 - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_covariates" - description: "The .obs field(s) that define the covariate(s) to regress out." - info: null - example: - - "batch" - - "sample" - required: true - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - description: "Performs Harmony integration based as described in https://github.com/immunogenomics/harmony.\ - \ Based on an implementation in python from https://github.com/slowkow/harmonypy" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "scanpy~=1.9.5" - - "harmonypy~=0.0.6" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highmem" - - "highcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/integrate/harmonypy/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/integrate/harmonypy" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/integrate/harmonypy/harmonypy" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/integrate/harmonypy/harmonypy b/target/docker/integrate/harmonypy/harmonypy deleted file mode 100755 index f5f252f0a9e..00000000000 --- a/target/docker/integrate/harmonypy/harmonypy +++ /dev/null @@ -1,1099 +0,0 @@ -#!/usr/bin/env bash - -# harmonypy 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Dries Schaumont (maintainer) -# * Robrecht Cannoodt (contributor) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="harmonypy" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "harmonypy 0.12.3" - echo "" - echo "Performs Harmony integration based as described in" - echo "https://github.com/immunogenomics/harmony. Based on an implementation in python" - echo "from https://github.com/slowkow/harmonypy" - echo "" - echo "Arguments:" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " Input h5mu file" - echo "" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " Output h5mu file." - echo "" - echo " --output_compression" - echo " type: string" - echo " example: gzip" - echo " choices: [ gzip, lzf ]" - echo " The compression format to be used on the output h5mu object." - echo "" - echo " --modality" - echo " type: string" - echo " default: rna" - echo "" - echo " --obsm_input" - echo " type: string" - echo " default: X_pca" - echo " Which .obsm slot to use as a starting PCA embedding." - echo "" - echo " --obsm_output" - echo " type: string" - echo " default: X_pca_integrated" - echo " In which .obsm slot to store the resulting integrated embedding." - echo "" - echo " --theta" - echo " type: double, multiple values allowed" - echo " default: 2.0" - echo " Diversity clustering penalty parameter. Specify for each variable in" - echo " group.by.vars. theta=0 does not encourage any diversity. Larger values" - echo " of theta result in more diverse clusters." - echo "" - echo " --obs_covariates" - echo " type: string, required parameter, multiple values allowed" - echo " example: batch:sample" - echo " The .obs field(s) that define the covariate(s) to regress out." -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM python:3.10-slim - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "scanpy~=1.9.5" "harmonypy~=0.0.6" - -LABEL org.opencontainers.image.authors="Dries Schaumont, Robrecht Cannoodt" -LABEL org.opencontainers.image.description="Companion container for running component integrate harmonypy" -LABEL org.opencontainers.image.created="2024-01-25T10:13:59Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-harmonypy-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "harmonypy 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -i) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression=*) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --modality) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality=*) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --obsm_input) - [ -n "$VIASH_PAR_OBSM_INPUT" ] && ViashError Bad arguments for option \'--obsm_input\': \'$VIASH_PAR_OBSM_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBSM_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obsm_input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obsm_input=*) - [ -n "$VIASH_PAR_OBSM_INPUT" ] && ViashError Bad arguments for option \'--obsm_input=*\': \'$VIASH_PAR_OBSM_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBSM_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --obsm_output) - [ -n "$VIASH_PAR_OBSM_OUTPUT" ] && ViashError Bad arguments for option \'--obsm_output\': \'$VIASH_PAR_OBSM_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBSM_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obsm_output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obsm_output=*) - [ -n "$VIASH_PAR_OBSM_OUTPUT" ] && ViashError Bad arguments for option \'--obsm_output=*\': \'$VIASH_PAR_OBSM_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBSM_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --theta) - if [ -z "$VIASH_PAR_THETA" ]; then - VIASH_PAR_THETA="$2" - else - VIASH_PAR_THETA="$VIASH_PAR_THETA:""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --theta. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --theta=*) - if [ -z "$VIASH_PAR_THETA" ]; then - VIASH_PAR_THETA=$(ViashRemoveFlags "$1") - else - VIASH_PAR_THETA="$VIASH_PAR_THETA:"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --obs_covariates) - if [ -z "$VIASH_PAR_OBS_COVARIATES" ]; then - VIASH_PAR_OBS_COVARIATES="$2" - else - VIASH_PAR_OBS_COVARIATES="$VIASH_PAR_OBS_COVARIATES:""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_covariates. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obs_covariates=*) - if [ -z "$VIASH_PAR_OBS_COVARIATES" ]; then - VIASH_PAR_OBS_COVARIATES=$(ViashRemoveFlags "$1") - else - VIASH_PAR_OBS_COVARIATES="$VIASH_PAR_OBS_COVARIATES:"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/integrate_harmonypy:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/integrate_harmonypy:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/integrate_harmonypy:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/integrate_harmonypy:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OBS_COVARIATES+x} ]; then - ViashError '--obs_covariates' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_MODALITY+x} ]; then - VIASH_PAR_MODALITY="rna" -fi -if [ -z ${VIASH_PAR_OBSM_INPUT+x} ]; then - VIASH_PAR_OBSM_INPUT="X_pca" -fi -if [ -z ${VIASH_PAR_OBSM_OUTPUT+x} ]; then - VIASH_PAR_OBSM_OUTPUT="X_pca_integrated" -fi -if [ -z ${VIASH_PAR_THETA+x} ]; then - VIASH_PAR_THETA="2.0" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [ -n "$VIASH_PAR_THETA" ]; then - IFS=':' - set -f - for val in $VIASH_PAR_THETA; do - if ! [[ "${val}" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--theta' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi - done - set +f - unset IFS -fi - -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then - VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then - ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/integrate_harmonypy:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/integrate_harmonypy:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/integrate_harmonypy:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-harmonypy-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -import mudata -from harmonypy import run_harmony - - -### VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'obsm_input': $( if [ ! -z ${VIASH_PAR_OBSM_INPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'obsm_output': $( if [ ! -z ${VIASH_PAR_OBSM_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'theta': $( if [ ! -z ${VIASH_PAR_THETA+x} ]; then echo "list(map(float, r'${VIASH_PAR_THETA//\'/\'\"\'\"r\'}'.split(':')))"; else echo None; fi ), - 'obs_covariates': $( if [ ! -z ${VIASH_PAR_OBS_COVARIATES+x} ]; then echo "r'${VIASH_PAR_OBS_COVARIATES//\'/\'\"\'\"r\'}'.split(':')"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -### VIASH END - - -def main(): - mdata = mudata.read(par["input"].strip()) - mod_name = par['modality'] - mod = mdata.mod[mod_name] - pca_embedding = mod.obsm[par['obsm_input']] - metadata = mod.obs - ho = run_harmony(pca_embedding, metadata, par['obs_covariates'], theta=par['theta']) - mod.obsm[par["obsm_output"]] = ho.Z_corr.T - mdata.write_h5mu(par['output'].strip(), compression=par["output_compression"]) - -if __name__ == "__main__": - main() -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/integrate/scanorama/.config.vsh.yaml b/target/docker/integrate/scanorama/.config.vsh.yaml deleted file mode 100644 index 76c83623e16..00000000000 --- a/target/docker/integrate/scanorama/.config.vsh.yaml +++ /dev/null @@ -1,283 +0,0 @@ -functionality: - name: "scanorama" - namespace: "integrate" - version: "0.12.3" - authors: - - name: "Dries De Maeyer" - roles: - - "author" - info: - role: "Core Team Member" - links: - email: "ddemaeyer@gmail.com" - github: "ddemaeyer" - linkedin: "dries-de-maeyer-b46a814" - organizations: - - name: "Janssen Pharmaceuticals" - href: "https://www.janssen.com" - role: "Principal Scientist" - - name: "Dries Schaumont" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input h5mu file" - info: null - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output .h5mu file" - info: null - default: - - "output.h5ad" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_batch" - description: "Column name discriminating between your batches." - info: null - default: - - "batch" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obsm_input" - description: "Basis obsm slot to run scanorama on." - info: null - default: - - "X_pca" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obsm_output" - description: "The name of the field in adata.obsm where the integrated embeddings\ - \ will be stored after running this function. Defaults to X_scanorama." - info: null - default: - - "X_scanorama" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--knn" - description: "Number of nearest neighbors to use for matching." - info: null - default: - - 20 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--batch_size" - description: "The batch size used in the alignment vector computation. Useful\ - \ when integrating very large (>100k samples) datasets. Set to large value that\ - \ runs within available memory." - info: null - default: - - 5000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--sigma" - description: "Correction smoothing parameter on Gaussian kernel." - info: null - default: - - 15.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean" - name: "--approx" - description: "Use approximate nearest neighbors with Python annoy; greatly speeds\ - \ up matching runtime." - info: null - default: - - true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--alpha" - description: "Alignment score minimum cutoff" - info: null - default: - - 0.1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - description: "Use Scanorama to integrate different experiments.\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.9-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - - "build-essential" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "scanpy~=1.9.5" - - "scanorama" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "midcpu" - - "highmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/integrate/scanorama/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/integrate/scanorama" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/integrate/scanorama/scanorama" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/integrate/scanorama/scanorama b/target/docker/integrate/scanorama/scanorama deleted file mode 100755 index 1d9ebd61fad..00000000000 --- a/target/docker/integrate/scanorama/scanorama +++ /dev/null @@ -1,1187 +0,0 @@ -#!/usr/bin/env bash - -# scanorama 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Dries De Maeyer (author) -# * Dries Schaumont (maintainer) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="scanorama" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "scanorama 0.12.3" - echo "" - echo "Use Scanorama to integrate different experiments." - echo "" - echo "Arguments:" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " Input h5mu file" - echo "" - echo " --modality" - echo " type: string" - echo " default: rna" - echo "" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " default: output.h5ad" - echo " Output .h5mu file" - echo "" - echo " --output_compression" - echo " type: string" - echo " example: gzip" - echo " choices: [ gzip, lzf ]" - echo " The compression format to be used on the output h5mu object." - echo "" - echo " --obs_batch" - echo " type: string" - echo " default: batch" - echo " Column name discriminating between your batches." - echo "" - echo " --obsm_input" - echo " type: string" - echo " default: X_pca" - echo " Basis obsm slot to run scanorama on." - echo "" - echo " --obsm_output" - echo " type: string" - echo " default: X_scanorama" - echo " The name of the field in adata.obsm where the integrated embeddings will" - echo " be stored after running this function. Defaults to X_scanorama." - echo "" - echo " --knn" - echo " type: integer" - echo " default: 20" - echo " Number of nearest neighbors to use for matching." - echo "" - echo " --batch_size" - echo " type: integer" - echo " default: 5000" - echo " The batch size used in the alignment vector computation. Useful when" - echo " integrating very large (>100k samples) datasets. Set to large value that" - echo " runs within available memory." - echo "" - echo " --sigma" - echo " type: double" - echo " default: 15.0" - echo " Correction smoothing parameter on Gaussian kernel." - echo "" - echo " --approx" - echo " type: boolean" - echo " default: true" - echo " Use approximate nearest neighbors with Python annoy; greatly speeds up" - echo " matching runtime." - echo "" - echo " --alpha" - echo " type: double" - echo " default: 0.1" - echo " Alignment score minimum cutoff" -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM python:3.9-slim - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y procps build-essential && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "scanpy~=1.9.5" "scanorama" - -LABEL org.opencontainers.image.authors="Dries De Maeyer, Dries Schaumont" -LABEL org.opencontainers.image.description="Companion container for running component integrate scanorama" -LABEL org.opencontainers.image.created="2024-01-25T10:13:58Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-scanorama-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "scanorama 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -i) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality=*) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression=*) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --obs_batch) - [ -n "$VIASH_PAR_OBS_BATCH" ] && ViashError Bad arguments for option \'--obs_batch\': \'$VIASH_PAR_OBS_BATCH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBS_BATCH="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_batch. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obs_batch=*) - [ -n "$VIASH_PAR_OBS_BATCH" ] && ViashError Bad arguments for option \'--obs_batch=*\': \'$VIASH_PAR_OBS_BATCH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBS_BATCH=$(ViashRemoveFlags "$1") - shift 1 - ;; - --obsm_input) - [ -n "$VIASH_PAR_OBSM_INPUT" ] && ViashError Bad arguments for option \'--obsm_input\': \'$VIASH_PAR_OBSM_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBSM_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obsm_input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obsm_input=*) - [ -n "$VIASH_PAR_OBSM_INPUT" ] && ViashError Bad arguments for option \'--obsm_input=*\': \'$VIASH_PAR_OBSM_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBSM_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --obsm_output) - [ -n "$VIASH_PAR_OBSM_OUTPUT" ] && ViashError Bad arguments for option \'--obsm_output\': \'$VIASH_PAR_OBSM_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBSM_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obsm_output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obsm_output=*) - [ -n "$VIASH_PAR_OBSM_OUTPUT" ] && ViashError Bad arguments for option \'--obsm_output=*\': \'$VIASH_PAR_OBSM_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBSM_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --knn) - [ -n "$VIASH_PAR_KNN" ] && ViashError Bad arguments for option \'--knn\': \'$VIASH_PAR_KNN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_KNN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --knn. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --knn=*) - [ -n "$VIASH_PAR_KNN" ] && ViashError Bad arguments for option \'--knn=*\': \'$VIASH_PAR_KNN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_KNN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --batch_size) - [ -n "$VIASH_PAR_BATCH_SIZE" ] && ViashError Bad arguments for option \'--batch_size\': \'$VIASH_PAR_BATCH_SIZE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BATCH_SIZE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --batch_size. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --batch_size=*) - [ -n "$VIASH_PAR_BATCH_SIZE" ] && ViashError Bad arguments for option \'--batch_size=*\': \'$VIASH_PAR_BATCH_SIZE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BATCH_SIZE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --sigma) - [ -n "$VIASH_PAR_SIGMA" ] && ViashError Bad arguments for option \'--sigma\': \'$VIASH_PAR_SIGMA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SIGMA="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --sigma. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --sigma=*) - [ -n "$VIASH_PAR_SIGMA" ] && ViashError Bad arguments for option \'--sigma=*\': \'$VIASH_PAR_SIGMA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SIGMA=$(ViashRemoveFlags "$1") - shift 1 - ;; - --approx) - [ -n "$VIASH_PAR_APPROX" ] && ViashError Bad arguments for option \'--approx\': \'$VIASH_PAR_APPROX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_APPROX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --approx. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --approx=*) - [ -n "$VIASH_PAR_APPROX" ] && ViashError Bad arguments for option \'--approx=*\': \'$VIASH_PAR_APPROX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_APPROX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --alpha) - [ -n "$VIASH_PAR_ALPHA" ] && ViashError Bad arguments for option \'--alpha\': \'$VIASH_PAR_ALPHA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALPHA="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --alpha. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --alpha=*) - [ -n "$VIASH_PAR_ALPHA" ] && ViashError Bad arguments for option \'--alpha=*\': \'$VIASH_PAR_ALPHA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALPHA=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/integrate_scanorama:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/integrate_scanorama:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/integrate_scanorama:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/integrate_scanorama:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_MODALITY+x} ]; then - VIASH_PAR_MODALITY="rna" -fi -if [ -z ${VIASH_PAR_OBS_BATCH+x} ]; then - VIASH_PAR_OBS_BATCH="batch" -fi -if [ -z ${VIASH_PAR_OBSM_INPUT+x} ]; then - VIASH_PAR_OBSM_INPUT="X_pca" -fi -if [ -z ${VIASH_PAR_OBSM_OUTPUT+x} ]; then - VIASH_PAR_OBSM_OUTPUT="X_scanorama" -fi -if [ -z ${VIASH_PAR_KNN+x} ]; then - VIASH_PAR_KNN="20" -fi -if [ -z ${VIASH_PAR_BATCH_SIZE+x} ]; then - VIASH_PAR_BATCH_SIZE="5000" -fi -if [ -z ${VIASH_PAR_SIGMA+x} ]; then - VIASH_PAR_SIGMA="15.0" -fi -if [ -z ${VIASH_PAR_APPROX+x} ]; then - VIASH_PAR_APPROX="true" -fi -if [ -z ${VIASH_PAR_ALPHA+x} ]; then - VIASH_PAR_ALPHA="0.1" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_KNN" ]]; then - if ! [[ "$VIASH_PAR_KNN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--knn' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_BATCH_SIZE" ]]; then - if ! [[ "$VIASH_PAR_BATCH_SIZE" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--batch_size' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SIGMA" ]]; then - if ! [[ "$VIASH_PAR_SIGMA" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--sigma' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_APPROX" ]]; then - if ! [[ "$VIASH_PAR_APPROX" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--approx' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_ALPHA" ]]; then - if ! [[ "$VIASH_PAR_ALPHA" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--alpha' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then - VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then - ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/integrate_scanorama:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/integrate_scanorama:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/integrate_scanorama:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-scanorama-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -### VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'obs_batch': $( if [ ! -z ${VIASH_PAR_OBS_BATCH+x} ]; then echo "r'${VIASH_PAR_OBS_BATCH//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'obsm_input': $( if [ ! -z ${VIASH_PAR_OBSM_INPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'obsm_output': $( if [ ! -z ${VIASH_PAR_OBSM_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'knn': $( if [ ! -z ${VIASH_PAR_KNN+x} ]; then echo "int(r'${VIASH_PAR_KNN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'batch_size': $( if [ ! -z ${VIASH_PAR_BATCH_SIZE+x} ]; then echo "int(r'${VIASH_PAR_BATCH_SIZE//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'sigma': $( if [ ! -z ${VIASH_PAR_SIGMA+x} ]; then echo "float(r'${VIASH_PAR_SIGMA//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'approx': $( if [ ! -z ${VIASH_PAR_APPROX+x} ]; then echo "r'${VIASH_PAR_APPROX//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), - 'alpha': $( if [ ! -z ${VIASH_PAR_ALPHA+x} ]; then echo "float(r'${VIASH_PAR_ALPHA//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -### VIASH END - -from scanpy.external.pp import scanorama_integrate -from mudata import read_h5mu - -mdata = read_h5mu(par["input"]) - -mod_name = par["modality"] -mod = mdata.mod[mod_name] - -# Integration. -scanorama_integrate(mod, - key=par["obs_batch"], - basis=par["obsm_input"], - adjusted_basis=par["obsm_output"], - knn=par["knn"], - alpha=par["alpha"], - sigma=par["sigma"], - approx=par["approx"], - batch_size=par["batch_size"] ) - -mdata.write_h5mu(par["output"], compression=par["output_compression"]) -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/integrate/scarches/.config.vsh.yaml b/target/docker/integrate/scarches/.config.vsh.yaml deleted file mode 100644 index e1b64d597ce..00000000000 --- a/target/docker/integrate/scarches/.config.vsh.yaml +++ /dev/null @@ -1,331 +0,0 @@ -functionality: - name: "scarches" - namespace: "integrate" - version: "0.12.3" - authors: - - name: "Vladimir Shitov" - info: - role: "Contributor" - links: - email: "vladimir.shitov@helmholtz-muenchen.de" - github: "vladimirshitov" - orcid: "0000-0002-1960-8812" - linkedin: "vladimir-shitov-9a659513b" - organizations: - - name: "Helmholtz Munich" - href: "https://www.helmholtz-munich.de" - role: "PhD Candidate" - argument_groups: - - name: "Inputs" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input h5mu file to use as a query" - info: null - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--reference" - alternatives: - - "-r" - description: "Path to the directory with reference model or a web link. For\ - \ HLCA use https://zenodo.org/record/6337966/files/HLCA_reference_model.zip" - info: null - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--dataset_name" - description: "Name of query dataset to use as a batch name. If not set, name\ - \ of the input file is used" - info: null - default: - - "test_dataset" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Outputs" - arguments: - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output h5mu file." - info: null - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--model_output" - description: "Output directory for model" - info: null - default: - - "model" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obsm_output" - description: "In which .obsm slot to store the resulting integrated embedding." - info: null - default: - - "X_integrated_scanvi" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Early stopping arguments" - arguments: - - type: "boolean" - name: "--early_stopping" - description: "Whether to perform early stopping with respect to the validation\ - \ set." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--early_stopping_monitor" - description: "Metric logged during validation set epoch." - info: null - default: - - "elbo_validation" - required: false - choices: - - "elbo_validation" - - "reconstruction_loss_validation" - - "kl_local_validation" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--early_stopping_patience" - description: "Number of validation epochs with no improvement after which training\ - \ will be stopped." - info: null - default: - - 45 - required: false - min: 1 - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--early_stopping_min_delta" - description: "Minimum change in the monitored quantity to qualify as an improvement,\ - \ i.e. an absolute change of less than min_delta, will count as no improvement." - info: null - default: - - 0.0 - required: false - min: 0.0 - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Learning parameters" - arguments: - - type: "integer" - name: "--max_epochs" - description: "Number of passes through the dataset, defaults to (20000 / number\ - \ of cells) * 400 or 400; whichever is smallest." - info: null - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean" - name: "--reduce_lr_on_plateau" - description: "Whether to monitor validation loss and reduce learning rate when\ - \ validation set `lr_scheduler_metric` plateaus." - info: null - default: - - true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--lr_factor" - description: "Factor to reduce learning rate." - info: null - default: - - 0.6 - required: false - min: 0.0 - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--lr_patience" - description: "Number of epochs with no improvement after which learning rate\ - \ will be reduced." - info: null - default: - - 30.0 - required: false - min: 0.0 - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Performs reference mapping with scArches" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu" - - type: "file" - path: "resources_test/HLCA_reference_model/HLCA_reference_model.zip" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "nvcr.io/nvidia/pytorch:23.09-py3" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "scvi-tools~=1.0.3" - - "pandas~=2.1.0" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highmem" - - "highcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -- type: "native" - id: "native" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/integrate/scarches/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/integrate/scarches" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/integrate/scarches/scarches" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/integrate/scarches/scarches b/target/docker/integrate/scarches/scarches deleted file mode 100755 index 0dfe18f77c7..00000000000 --- a/target/docker/integrate/scarches/scarches +++ /dev/null @@ -1,1568 +0,0 @@ -#!/usr/bin/env bash - -# scarches 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Vladimir Shitov - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="scarches" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "scarches 0.12.3" - echo "" - echo "Performs reference mapping with scArches" - echo "" - echo "Inputs:" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " Input h5mu file to use as a query" - echo "" - echo " --modality" - echo " type: string" - echo " default: rna" - echo "" - echo " -r, --reference" - echo " type: file, required parameter, file must exist" - echo " Path to the directory with reference model or a web link. For HLCA use" - echo " https://zenodo.org/record/6337966/files/HLCA_reference_model.zip" - echo "" - echo " --dataset_name" - echo " type: string" - echo " default: test_dataset" - echo " Name of query dataset to use as a batch name. If not set, name of the" - echo " input file is used" - echo "" - echo "Outputs:" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " Output h5mu file." - echo "" - echo " --output_compression" - echo " type: string" - echo " example: gzip" - echo " choices: [ gzip, lzf ]" - echo " The compression format to be used on the output h5mu object." - echo "" - echo " --model_output" - echo " type: file, output, file must exist" - echo " default: model" - echo " Output directory for model" - echo "" - echo " --obsm_output" - echo " type: string" - echo " default: X_integrated_scanvi" - echo " In which .obsm slot to store the resulting integrated embedding." - echo "" - echo "Early stopping arguments:" - echo " --early_stopping" - echo " type: boolean" - echo " Whether to perform early stopping with respect to the validation set." - echo "" - echo " --early_stopping_monitor" - echo " type: string" - echo " default: elbo_validation" - echo " choices: [ elbo_validation, reconstruction_loss_validation," - echo "kl_local_validation ]" - echo " Metric logged during validation set epoch." - echo "" - echo " --early_stopping_patience" - echo " type: integer" - echo " default: 45" - echo " min: 1" - echo " Number of validation epochs with no improvement after which training" - echo " will be stopped." - echo "" - echo " --early_stopping_min_delta" - echo " type: double" - echo " default: 0.0" - echo " min: 0.0" - echo " Minimum change in the monitored quantity to qualify as an improvement," - echo " i.e. an absolute change of less than min_delta, will count as no" - echo " improvement." - echo "" - echo "Learning parameters:" - echo " --max_epochs" - echo " type: integer, required parameter" - echo " Number of passes through the dataset, defaults to (20000 / number of" - echo " cells) * 400 or 400; whichever is smallest." - echo "" - echo " --reduce_lr_on_plateau" - echo " type: boolean" - echo " default: true" - echo " Whether to monitor validation loss and reduce learning rate when" - echo " validation set \`lr_scheduler_metric\` plateaus." - echo "" - echo " --lr_factor" - echo " type: double" - echo " default: 0.6" - echo " min: 0.0" - echo " Factor to reduce learning rate." - echo "" - echo " --lr_patience" - echo " type: double" - echo " default: 30.0" - echo " min: 0.0" - echo " Number of epochs with no improvement after which learning rate will be" - echo " reduced." -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM nvcr.io/nvidia/pytorch:23.09-py3 - -ENTRYPOINT [] - - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "scvi-tools~=1.0.3" "pandas~=2.1.0" - -LABEL org.opencontainers.image.authors="Vladimir Shitov" -LABEL org.opencontainers.image.description="Companion container for running component integrate scarches" -LABEL org.opencontainers.image.created="2024-01-25T10:13:57Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-scarches-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "scarches 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -i) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality=*) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --reference) - [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reference=*) - [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference=*\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE=$(ViashRemoveFlags "$1") - shift 1 - ;; - -r) - [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'-r\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -r. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --dataset_name) - [ -n "$VIASH_PAR_DATASET_NAME" ] && ViashError Bad arguments for option \'--dataset_name\': \'$VIASH_PAR_DATASET_NAME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_DATASET_NAME="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --dataset_name. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --dataset_name=*) - [ -n "$VIASH_PAR_DATASET_NAME" ] && ViashError Bad arguments for option \'--dataset_name=*\': \'$VIASH_PAR_DATASET_NAME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_DATASET_NAME=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression=*) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --model_output) - [ -n "$VIASH_PAR_MODEL_OUTPUT" ] && ViashError Bad arguments for option \'--model_output\': \'$VIASH_PAR_MODEL_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODEL_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --model_output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --model_output=*) - [ -n "$VIASH_PAR_MODEL_OUTPUT" ] && ViashError Bad arguments for option \'--model_output=*\': \'$VIASH_PAR_MODEL_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODEL_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --obsm_output) - [ -n "$VIASH_PAR_OBSM_OUTPUT" ] && ViashError Bad arguments for option \'--obsm_output\': \'$VIASH_PAR_OBSM_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBSM_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obsm_output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obsm_output=*) - [ -n "$VIASH_PAR_OBSM_OUTPUT" ] && ViashError Bad arguments for option \'--obsm_output=*\': \'$VIASH_PAR_OBSM_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBSM_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --early_stopping) - [ -n "$VIASH_PAR_EARLY_STOPPING" ] && ViashError Bad arguments for option \'--early_stopping\': \'$VIASH_PAR_EARLY_STOPPING\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EARLY_STOPPING="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --early_stopping. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --early_stopping=*) - [ -n "$VIASH_PAR_EARLY_STOPPING" ] && ViashError Bad arguments for option \'--early_stopping=*\': \'$VIASH_PAR_EARLY_STOPPING\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EARLY_STOPPING=$(ViashRemoveFlags "$1") - shift 1 - ;; - --early_stopping_monitor) - [ -n "$VIASH_PAR_EARLY_STOPPING_MONITOR" ] && ViashError Bad arguments for option \'--early_stopping_monitor\': \'$VIASH_PAR_EARLY_STOPPING_MONITOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EARLY_STOPPING_MONITOR="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --early_stopping_monitor. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --early_stopping_monitor=*) - [ -n "$VIASH_PAR_EARLY_STOPPING_MONITOR" ] && ViashError Bad arguments for option \'--early_stopping_monitor=*\': \'$VIASH_PAR_EARLY_STOPPING_MONITOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EARLY_STOPPING_MONITOR=$(ViashRemoveFlags "$1") - shift 1 - ;; - --early_stopping_patience) - [ -n "$VIASH_PAR_EARLY_STOPPING_PATIENCE" ] && ViashError Bad arguments for option \'--early_stopping_patience\': \'$VIASH_PAR_EARLY_STOPPING_PATIENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EARLY_STOPPING_PATIENCE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --early_stopping_patience. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --early_stopping_patience=*) - [ -n "$VIASH_PAR_EARLY_STOPPING_PATIENCE" ] && ViashError Bad arguments for option \'--early_stopping_patience=*\': \'$VIASH_PAR_EARLY_STOPPING_PATIENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EARLY_STOPPING_PATIENCE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --early_stopping_min_delta) - [ -n "$VIASH_PAR_EARLY_STOPPING_MIN_DELTA" ] && ViashError Bad arguments for option \'--early_stopping_min_delta\': \'$VIASH_PAR_EARLY_STOPPING_MIN_DELTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EARLY_STOPPING_MIN_DELTA="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --early_stopping_min_delta. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --early_stopping_min_delta=*) - [ -n "$VIASH_PAR_EARLY_STOPPING_MIN_DELTA" ] && ViashError Bad arguments for option \'--early_stopping_min_delta=*\': \'$VIASH_PAR_EARLY_STOPPING_MIN_DELTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EARLY_STOPPING_MIN_DELTA=$(ViashRemoveFlags "$1") - shift 1 - ;; - --max_epochs) - [ -n "$VIASH_PAR_MAX_EPOCHS" ] && ViashError Bad arguments for option \'--max_epochs\': \'$VIASH_PAR_MAX_EPOCHS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAX_EPOCHS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --max_epochs. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --max_epochs=*) - [ -n "$VIASH_PAR_MAX_EPOCHS" ] && ViashError Bad arguments for option \'--max_epochs=*\': \'$VIASH_PAR_MAX_EPOCHS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAX_EPOCHS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --reduce_lr_on_plateau) - [ -n "$VIASH_PAR_REDUCE_LR_ON_PLATEAU" ] && ViashError Bad arguments for option \'--reduce_lr_on_plateau\': \'$VIASH_PAR_REDUCE_LR_ON_PLATEAU\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REDUCE_LR_ON_PLATEAU="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --reduce_lr_on_plateau. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reduce_lr_on_plateau=*) - [ -n "$VIASH_PAR_REDUCE_LR_ON_PLATEAU" ] && ViashError Bad arguments for option \'--reduce_lr_on_plateau=*\': \'$VIASH_PAR_REDUCE_LR_ON_PLATEAU\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REDUCE_LR_ON_PLATEAU=$(ViashRemoveFlags "$1") - shift 1 - ;; - --lr_factor) - [ -n "$VIASH_PAR_LR_FACTOR" ] && ViashError Bad arguments for option \'--lr_factor\': \'$VIASH_PAR_LR_FACTOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LR_FACTOR="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --lr_factor. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --lr_factor=*) - [ -n "$VIASH_PAR_LR_FACTOR" ] && ViashError Bad arguments for option \'--lr_factor=*\': \'$VIASH_PAR_LR_FACTOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LR_FACTOR=$(ViashRemoveFlags "$1") - shift 1 - ;; - --lr_patience) - [ -n "$VIASH_PAR_LR_PATIENCE" ] && ViashError Bad arguments for option \'--lr_patience\': \'$VIASH_PAR_LR_PATIENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LR_PATIENCE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --lr_patience. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --lr_patience=*) - [ -n "$VIASH_PAR_LR_PATIENCE" ] && ViashError Bad arguments for option \'--lr_patience=*\': \'$VIASH_PAR_LR_PATIENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LR_PATIENCE=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/integrate_scarches:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/integrate_scarches:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/integrate_scarches:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/integrate_scarches:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_REFERENCE+x} ]; then - ViashError '--reference' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_MAX_EPOCHS+x} ]; then - ViashError '--max_epochs' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_MODALITY+x} ]; then - VIASH_PAR_MODALITY="rna" -fi -if [ -z ${VIASH_PAR_DATASET_NAME+x} ]; then - VIASH_PAR_DATASET_NAME="test_dataset" -fi -if [ -z ${VIASH_PAR_MODEL_OUTPUT+x} ]; then - VIASH_PAR_MODEL_OUTPUT="model" -fi -if [ -z ${VIASH_PAR_OBSM_OUTPUT+x} ]; then - VIASH_PAR_OBSM_OUTPUT="X_integrated_scanvi" -fi -if [ -z ${VIASH_PAR_EARLY_STOPPING_MONITOR+x} ]; then - VIASH_PAR_EARLY_STOPPING_MONITOR="elbo_validation" -fi -if [ -z ${VIASH_PAR_EARLY_STOPPING_PATIENCE+x} ]; then - VIASH_PAR_EARLY_STOPPING_PATIENCE="45" -fi -if [ -z ${VIASH_PAR_EARLY_STOPPING_MIN_DELTA+x} ]; then - VIASH_PAR_EARLY_STOPPING_MIN_DELTA="0.0" -fi -if [ -z ${VIASH_PAR_REDUCE_LR_ON_PLATEAU+x} ]; then - VIASH_PAR_REDUCE_LR_ON_PLATEAU="true" -fi -if [ -z ${VIASH_PAR_LR_FACTOR+x} ]; then - VIASH_PAR_LR_FACTOR="0.6" -fi -if [ -z ${VIASH_PAR_LR_PATIENCE+x} ]; then - VIASH_PAR_LR_PATIENCE="30.0" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_REFERENCE" ] && [ ! -e "$VIASH_PAR_REFERENCE" ]; then - ViashError "Input file '$VIASH_PAR_REFERENCE' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_EARLY_STOPPING" ]]; then - if ! [[ "$VIASH_PAR_EARLY_STOPPING" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--early_stopping' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_EARLY_STOPPING_PATIENCE" ]]; then - if ! [[ "$VIASH_PAR_EARLY_STOPPING_PATIENCE" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--early_stopping_patience' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi - if [[ $VIASH_PAR_EARLY_STOPPING_PATIENCE -lt 1 ]]; then - ViashError '--early_stopping_patience' has be more than or equal to 1. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_EARLY_STOPPING_MIN_DELTA" ]]; then - if ! [[ "$VIASH_PAR_EARLY_STOPPING_MIN_DELTA" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--early_stopping_min_delta' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi - if command -v bc &> /dev/null; then - if ! [[ `echo $VIASH_PAR_EARLY_STOPPING_MIN_DELTA '>=' 0.0 | bc` -eq 1 ]]; then - ViashError '--early_stopping_min_delta' has be more than or equal to 0.0. Use "--help" to get more information on the parameters. - exit 1 - fi - elif command -v awk &> /dev/null; then - if ! [[ `awk -v n1=$VIASH_PAR_EARLY_STOPPING_MIN_DELTA -v n2=0.0 'BEGIN { print (n1 >= n2) ? "1" : "0" }'` -eq 1 ]]; then - ViashError '--early_stopping_min_delta' has be more than or equal to 0.0. Use "--help" to get more information on the parameters. - exit 1 - fi - else - ViashWarning '--early_stopping_min_delta' specifies a minimum value but the value was not verified as neither \'bc\' or \`awk\` are present on the system. - fi -fi -if [[ -n "$VIASH_PAR_MAX_EPOCHS" ]]; then - if ! [[ "$VIASH_PAR_MAX_EPOCHS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--max_epochs' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_REDUCE_LR_ON_PLATEAU" ]]; then - if ! [[ "$VIASH_PAR_REDUCE_LR_ON_PLATEAU" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--reduce_lr_on_plateau' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_LR_FACTOR" ]]; then - if ! [[ "$VIASH_PAR_LR_FACTOR" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--lr_factor' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi - if command -v bc &> /dev/null; then - if ! [[ `echo $VIASH_PAR_LR_FACTOR '>=' 0.0 | bc` -eq 1 ]]; then - ViashError '--lr_factor' has be more than or equal to 0.0. Use "--help" to get more information on the parameters. - exit 1 - fi - elif command -v awk &> /dev/null; then - if ! [[ `awk -v n1=$VIASH_PAR_LR_FACTOR -v n2=0.0 'BEGIN { print (n1 >= n2) ? "1" : "0" }'` -eq 1 ]]; then - ViashError '--lr_factor' has be more than or equal to 0.0. Use "--help" to get more information on the parameters. - exit 1 - fi - else - ViashWarning '--lr_factor' specifies a minimum value but the value was not verified as neither \'bc\' or \`awk\` are present on the system. - fi -fi -if [[ -n "$VIASH_PAR_LR_PATIENCE" ]]; then - if ! [[ "$VIASH_PAR_LR_PATIENCE" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--lr_patience' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi - if command -v bc &> /dev/null; then - if ! [[ `echo $VIASH_PAR_LR_PATIENCE '>=' 0.0 | bc` -eq 1 ]]; then - ViashError '--lr_patience' has be more than or equal to 0.0. Use "--help" to get more information on the parameters. - exit 1 - fi - elif command -v awk &> /dev/null; then - if ! [[ `awk -v n1=$VIASH_PAR_LR_PATIENCE -v n2=0.0 'BEGIN { print (n1 >= n2) ? "1" : "0" }'` -eq 1 ]]; then - ViashError '--lr_patience' has be more than or equal to 0.0. Use "--help" to get more information on the parameters. - exit 1 - fi - else - ViashWarning '--lr_patience' specifies a minimum value but the value was not verified as neither \'bc\' or \`awk\` are present on the system. - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then - VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then - ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -if [ ! -z "$VIASH_PAR_EARLY_STOPPING_MONITOR" ]; then - VIASH_PAR_EARLY_STOPPING_MONITOR_CHOICES=("elbo_validation:reconstruction_loss_validation:kl_local_validation") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_EARLY_STOPPING_MONITOR_CHOICES[*]}:" =~ ":$VIASH_PAR_EARLY_STOPPING_MONITOR:" ]]; then - ViashError '--early_stopping_monitor' specified value of \'$VIASH_PAR_EARLY_STOPPING_MONITOR\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi -if [ ! -z "$VIASH_PAR_MODEL_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_MODEL_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_MODEL_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_REFERENCE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_REFERENCE")" ) - VIASH_PAR_REFERENCE=$(ViashAutodetectMount "$VIASH_PAR_REFERENCE") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_PAR_MODEL_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_MODEL_OUTPUT")" ) - VIASH_PAR_MODEL_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_MODEL_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_MODEL_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/integrate_scarches:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/integrate_scarches:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/integrate_scarches:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-scarches-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -import sys -import mudata -import scvi -from torch.cuda import is_available as cuda_is_available -try: - from torch.backends.mps import is_available as mps_is_available -except ModuleNotFoundError: - # Older pytorch versions - # MacOS GPUs - def mps_is_available(): - return False - -### VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'reference': $( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "r'${VIASH_PAR_REFERENCE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'dataset_name': $( if [ ! -z ${VIASH_PAR_DATASET_NAME+x} ]; then echo "r'${VIASH_PAR_DATASET_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'model_output': $( if [ ! -z ${VIASH_PAR_MODEL_OUTPUT+x} ]; then echo "r'${VIASH_PAR_MODEL_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'obsm_output': $( if [ ! -z ${VIASH_PAR_OBSM_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'early_stopping': $( if [ ! -z ${VIASH_PAR_EARLY_STOPPING+x} ]; then echo "r'${VIASH_PAR_EARLY_STOPPING//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), - 'early_stopping_monitor': $( if [ ! -z ${VIASH_PAR_EARLY_STOPPING_MONITOR+x} ]; then echo "r'${VIASH_PAR_EARLY_STOPPING_MONITOR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'early_stopping_patience': $( if [ ! -z ${VIASH_PAR_EARLY_STOPPING_PATIENCE+x} ]; then echo "int(r'${VIASH_PAR_EARLY_STOPPING_PATIENCE//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'early_stopping_min_delta': $( if [ ! -z ${VIASH_PAR_EARLY_STOPPING_MIN_DELTA+x} ]; then echo "float(r'${VIASH_PAR_EARLY_STOPPING_MIN_DELTA//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'max_epochs': $( if [ ! -z ${VIASH_PAR_MAX_EPOCHS+x} ]; then echo "int(r'${VIASH_PAR_MAX_EPOCHS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'reduce_lr_on_plateau': $( if [ ! -z ${VIASH_PAR_REDUCE_LR_ON_PLATEAU+x} ]; then echo "r'${VIASH_PAR_REDUCE_LR_ON_PLATEAU//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), - 'lr_factor': $( if [ ! -z ${VIASH_PAR_LR_FACTOR+x} ]; then echo "float(r'${VIASH_PAR_LR_FACTOR//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'lr_patience': $( if [ ! -z ${VIASH_PAR_LR_PATIENCE+x} ]; then echo "float(r'${VIASH_PAR_LR_PATIENCE//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -### VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -def _read_model_name_from_registry(model_path) -> str: - """Read registry with information about the model, return the model name""" - registry = scvi.model.base.BaseModelClass.load_registry(model_path) - return registry["model_name"] - - -def _detect_base_model(model_path): - """Read from the model's file which scvi_tools model it contains""" - - names_to_models_map = { - "AUTOZI": scvi.model.AUTOZI, - "CondSCVI": scvi.model.CondSCVI, - "DestVI": scvi.model.DestVI, - "LinearSCVI": scvi.model.LinearSCVI, - "PEAKVI": scvi.model.PEAKVI, - "SCANVI": scvi.model.SCANVI, - "SCVI": scvi.model.SCVI, - "TOTALVI": scvi.model.TOTALVI, - "MULTIVI": scvi.model.MULTIVI, - "AmortizedLDA": scvi.model.AmortizedLDA, - "JaxSCVI": scvi.model.JaxSCVI, - } - - return names_to_models_map[_read_model_name_from_registry(model_path)] - - -def extract_file_name(file_path): - """Return the name of the file from path to this file - - Examples - -------- - >>> extract_file_name("resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu") - pbmc_1k_protein_v3_mms - """ - slash_position = file_path.rfind("/") - dot_position = file_path.rfind(".") - - return file_path[slash_position + 1: dot_position] - - -def map_to_existing_reference(adata_query, model_path, check_val_every_n_epoch=1): - """ - A function to map the query data to the reference atlas - - Input: - * adata_query: An AnnData object with the query - * model_path: The reference model directory - - Output: - * vae_query: the trained scvi_tools model - * adata_query: The AnnData object with the query preprocessed for the mapping to the reference - """ - model = _detect_base_model(model_path) - - try: - model.prepare_query_anndata(adata_query, model_path) - except ValueError: - logger.warning("ValueError thrown when preparing adata for mapping. Clearing .varm field to prevent it") - adata_query.varm.clear() - model.prepare_query_anndata(adata_query, model_path) - - # Load query data into the model - vae_query = model.load_query_data( - adata_query, - model_path, - freeze_dropout=True - ) - - # Train scArches model for query mapping - vae_query.train( - max_epochs=par["max_epochs"], - early_stopping=par['early_stopping'], - early_stopping_monitor=par['early_stopping_monitor'], - early_stopping_patience=par['early_stopping_patience'], - early_stopping_min_delta=par['early_stopping_min_delta'], - check_val_every_n_epoch=check_val_every_n_epoch, - use_gpu=(cuda_is_available() or mps_is_available()) - ) - - return vae_query, adata_query - - -def _convert_object_dtypes_to_strings(adata): - """Convert object dtypes in .var and .obs to string to prevent error when saving file""" - def convert_cols(df): - object_cols = df.columns[df.dtypes == "object"] - for col in object_cols: - df[col] = df[col].astype(str) - return df - - adata.var = convert_cols(adata.var) - adata.obs = convert_cols(adata.obs) - - return adata - - -def _get_model_path(model_path: str): - """Obtain path to the directory with reference model. If the proposed \`model_path\` is a .zip archive, unzip it. If nesessary, convert model to the new format - - Parameters - ---------- - model_path : str - Path to a directory, where to search for the model or to a zip file containing the model - - Returns - ------- - Path to a directory with reference model in format of scvi-tools>=0.15 - """ - import os - import zipfile - import tempfile - from pathlib import Path - - if os.path.isdir(model_path) and "model.pt" in os.listdir(model_path): - # Probably, the \`model_path\` already contains model in the output format of scvi-tools>=0.15 - return model_path - - # The model either has old format or is a zip file downloaded from Zenodo - new_directory = Path(tempfile.TemporaryDirectory().name) - - if zipfile.is_zipfile(model_path): - with zipfile.ZipFile(model_path) as archive: - archive.extractall(new_directory) - model_dir = next(new_directory.glob("**/*.pt")).parent - - else: - model_dir = next(Path(model_path).glob("**/*.pt")).parent - - if "model_params.pt" in os.listdir(model_dir): - # The model is in the \`directory\`, but it was generated with scvi-tools<0.15 - # TODO: for new references (that could not be SCANVI based), we need to check the base class somehow. Reading registry does not work with models generated by scvi-tools<0.15 - # Here I assume that the reference model is for HLCA and thus is SCANVI based - converted_model_path = os.path.join(model_dir, "converted") - scvi.model.SCANVI.convert_legacy_save(model_dir, converted_model_path) - return converted_model_path - - elif "model.pt" in os.listdir(model_dir): - # Archive contained model in the new format, so just return the directory - return model_dir - - else: - raise ValueError("Cannot find model in the provided reference path. Please, provide a path or a link to the directory with reference model. For HLCA use https://zenodo.org/record/6337966/files/HLCA_reference_model.zip") - - -def main(): - - mdata_query = mudata.read(par["input"].strip()) - adata_query = mdata_query.mod[par["modality"]].copy() - - if "dataset" not in adata_query.obs.columns: - # Write name of the dataset as batch variable - if par["dataset_name"] is None: - logger.info("Detecting dataset name") - par["dataset_name"] = extract_file_name(par["input"]) - logger.info(f"Detected {par['dataset_name']}") - - adata_query.obs["dataset"] = par["dataset_name"] - - model_path = _get_model_path(par["reference"]) - vae_query, adata_query = map_to_existing_reference(adata_query, model_path=model_path) - model_name = _read_model_name_from_registry(model_path) - - # Save info about the used model - mdata_query.mod[par["modality"]].uns["integration_method"] = model_name - - logger.info("Trying to write latent representation") - output_key = par["obsm_output"].format(model_name=model_name) - mdata_query.mod[par["modality"]].obsm[output_key] = vae_query.get_latent_representation() - - logger.info("Converting dtypes") - mdata_query.mod[par["modality"]] = _convert_object_dtypes_to_strings(mdata_query.mod[par["modality"]]) - - logger.info("Updating mudata") - try: - mdata_query.update() # Without that error might be thrown during file saving - except KeyError: - # Sometimes this error is thrown, but then everything is magically fixed, and the file gets saved normally - # This is discussed here a bit: https://github.com/scverse/mudata/issues/27 - logger.warning("KeyError was thrown during updating mudata. Probably, the file is fixed after that, but be careful") - - logger.info("Saving h5mu file") - mdata_query.write_h5mu(par["output"].strip(), compression=par["output_compression"]) - - logger.info("Saving model") - vae_query.save(par["model_output"], overwrite=True) - -if __name__ == "__main__": - main() -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_REFERENCE" ]; then - VIASH_PAR_REFERENCE=$(ViashStripAutomount "$VIASH_PAR_REFERENCE") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_PAR_MODEL_OUTPUT" ]; then - VIASH_PAR_MODEL_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_MODEL_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_MODEL_OUTPUT" ] && [ ! -e "$VIASH_PAR_MODEL_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_MODEL_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/integrate/scarches/setup_logger.py b/target/docker/integrate/scarches/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/docker/integrate/scarches/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/docker/integrate/scvi/.config.vsh.yaml b/target/docker/integrate/scvi/.config.vsh.yaml deleted file mode 100644 index d52c24f5d8b..00000000000 --- a/target/docker/integrate/scvi/.config.vsh.yaml +++ /dev/null @@ -1,591 +0,0 @@ -functionality: - name: "scvi" - namespace: "integrate" - version: "0.12.3" - authors: - - name: "Malte D. Luecken" - roles: - - "author" - info: - role: "Core Team Member" - links: - email: "malte.luecken@helmholtz-muenchen.de" - github: "LuckyMD" - orcid: "0000-0001-7464-7921" - linkedin: "malte-l%C3%BCcken-b8b21049" - twitter: "MDLuecken" - organizations: - - name: "Helmholtz Munich" - href: "https://www.helmholtz-munich.de" - role: "Group Leader" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - - name: "Dries Schaumont" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - - name: "Matthias Beyens" - roles: - - "contributor" - info: - role: "Contributor" - links: - github: "MatthiasBeyens" - orcid: "0000-0003-3304-0706" - email: "matthias.beyens@gmail.com" - linkedin: "mbeyens" - organizations: - - name: "Janssen Pharmaceuticals" - href: "https://www.janssen.com" - role: "Principal Scientist" - argument_groups: - - name: "Inputs" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input h5mu file" - info: null - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--input_layer" - description: "Input layer to use. If None, X is used" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_batch" - description: "Column name discriminating between your batches." - info: null - default: - - "sample_id" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--var_input" - description: ".var column containing highly variable genes. By default, do not\ - \ subset genes." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_labels" - description: "Key in adata.obs for label information. Categories will automatically\ - \ be \nconverted into integer categories and saved to adata.obs['_scvi_labels'].\n\ - If None, assigns the same label to all the data.\n" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_size_factor" - description: "Key in adata.obs for size factor information. Instead of using\ - \ library size as a size factor,\nthe provided size factor column will be\ - \ used as offset in the mean of the likelihood.\nAssumed to be on linear scale.\n" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_categorical_covariate" - description: "Keys in adata.obs that correspond to categorical data. These covariates\ - \ can be added in\naddition to the batch covariate and are also treated as\ - \ nuisance factors\n(i.e., the model tries to minimize their effects on the\ - \ latent space).\nThus, these should not be used for biologically-relevant\ - \ factors that you do _not_ want to correct for.\n" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_continuous_covariate" - description: "Keys in adata.obs that correspond to continuous data. These covariates\ - \ can be added in\naddition to the batch covariate and are also treated as\ - \ nuisance factors\n(i.e., the model tries to minimize their effects on the\ - \ latent space). Thus, these should not be\nused for biologically-relevant\ - \ factors that you do _not_ want to correct for.\n" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - name: "Outputs" - arguments: - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output h5mu file." - info: null - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output_model" - description: "Folder where the state of the trained model will be saved to." - info: null - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obsm_output" - description: "In which .obsm slot to store the resulting integrated embedding." - info: null - default: - - "X_scvi_integrated" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "SCVI options" - arguments: - - type: "integer" - name: "--n_hidden_nodes" - description: "Number of nodes per hidden layer." - info: null - default: - - 128 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--n_dimensions_latent_space" - description: "Dimensionality of the latent space." - info: null - default: - - 30 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--n_hidden_layers" - description: "Number of hidden layers used for encoder and decoder neural-networks." - info: null - default: - - 2 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--dropout_rate" - description: "Dropout rate for the neural networks." - info: null - default: - - 0.1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--dispersion" - description: "Set the behavior for the dispersion for negative binomial distributions:\n\ - - gene: dispersion parameter of negative binomial is constant per gene across\ - \ cells\n- gene-batch: dispersion can differ between different batches\n-\ - \ gene-label: dispersion can differ between different labels\n- gene-cell:\ - \ dispersion can differ for every gene in every cell\n" - info: null - default: - - "gene" - required: false - choices: - - "gene" - - "gene-batch" - - "gene-label" - - "gene-cell" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--gene_likelihood" - description: "Model used to generate the expression data from a count-based\ - \ likelihood distribution.\n- nb: Negative binomial distribution\n- zinb:\ - \ Zero-inflated negative binomial distribution\n- poisson: Poisson distribution\n" - info: null - default: - - "nb" - required: false - choices: - - "nb" - - "zinb" - - "poisson" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Variational auto-encoder model options" - arguments: - - type: "string" - name: "--use_layer_normalization" - description: "Neural networks for which to enable layer normalization. \n" - info: null - default: - - "both" - required: false - choices: - - "encoder" - - "decoder" - - "none" - - "both" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--use_batch_normalization" - description: "Neural networks for which to enable batch normalization. \n" - info: null - default: - - "none" - required: false - choices: - - "encoder" - - "decoder" - - "none" - - "both" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean_false" - name: "--encode_covariates" - description: "Whether to concatenate covariates to expression in encoder" - info: null - direction: "input" - dest: "par" - - type: "boolean_true" - name: "--deeply_inject_covariates" - description: "Whether to concatenate covariates into output of hidden layers\ - \ in encoder/decoder. \nThis option only applies when n_layers > 1. The covariates\ - \ are concatenated to\nthe input of subsequent hidden layers.\n" - info: null - direction: "input" - dest: "par" - - type: "boolean_true" - name: "--use_observed_lib_size" - description: "Use observed library size for RNA as scaling factor in mean of\ - \ conditional distribution.\n" - info: null - direction: "input" - dest: "par" - - name: "Early stopping arguments" - arguments: - - type: "boolean" - name: "--early_stopping" - description: "Whether to perform early stopping with respect to the validation\ - \ set." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--early_stopping_monitor" - description: "Metric logged during validation set epoch." - info: null - default: - - "elbo_validation" - required: false - choices: - - "elbo_validation" - - "reconstruction_loss_validation" - - "kl_local_validation" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--early_stopping_patience" - description: "Number of validation epochs with no improvement after which training\ - \ will be stopped." - info: null - default: - - 45 - required: false - min: 1 - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--early_stopping_min_delta" - description: "Minimum change in the monitored quantity to qualify as an improvement,\ - \ i.e. an absolute change of less than min_delta, will count as no improvement." - info: null - default: - - 0.0 - required: false - min: 0.0 - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Learning parameters" - arguments: - - type: "integer" - name: "--max_epochs" - description: "Number of passes through the dataset, defaults to (20000 / number\ - \ of cells) * 400 or 400; whichever is smallest." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean" - name: "--reduce_lr_on_plateau" - description: "Whether to monitor validation loss and reduce learning rate when\ - \ validation set `lr_scheduler_metric` plateaus." - info: null - default: - - true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--lr_factor" - description: "Factor to reduce learning rate." - info: null - default: - - 0.6 - required: false - min: 0.0 - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--lr_patience" - description: "Number of epochs with no improvement after which learning rate\ - \ will be reduced." - info: null - default: - - 30.0 - required: false - min: 0.0 - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Data validition" - arguments: - - type: "integer" - name: "--n_obs_min_count" - description: "Minimum number of cells threshold ensuring that every obs_batch\ - \ category has sufficient observations (cells) for model training." - info: null - default: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--n_var_min_count" - description: "Minimum number of genes threshold ensuring that every var_input\ - \ filter has sufficient observations (genes) for model training." - info: null - default: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "../../utils/subset_vars.py" - description: "Performs scvi integration as done in the human lung cell atlas https://github.com/LungCellAtlas/HLCA" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "../../utils/subset_vars.py" - - type: "file" - path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "nvcr.io/nvidia/pytorch:23.06-py3" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "libpng-dev" - - "libjpeg-dev" - interactive: false - - type: "docker" - run: - - "pip install \"jax[cuda]\" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html\n" - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "scanpy~=1.9.5" - upgrade: true - - type: "python" - user: false - packages: - - "numba~=0.57.1" - - "scvi-tools~=1.0.0" - upgrade: false - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "midcpu" - - "midmem" - - "gpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/integrate/scvi/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/integrate/scvi" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/integrate/scvi/scvi" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/integrate/scvi/scvi b/target/docker/integrate/scvi/scvi deleted file mode 100755 index eda834a8674..00000000000 --- a/target/docker/integrate/scvi/scvi +++ /dev/null @@ -1,1912 +0,0 @@ -#!/usr/bin/env bash - -# scvi 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Malte D. Luecken (author) -# * Dries Schaumont (maintainer) -# * Matthias Beyens (contributor) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="scvi" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "scvi 0.12.3" - echo "" - echo "Performs scvi integration as done in the human lung cell atlas" - echo "https://github.com/LungCellAtlas/HLCA" - echo "" - echo "Inputs:" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " Input h5mu file" - echo "" - echo " --modality" - echo " type: string" - echo " default: rna" - echo "" - echo " --input_layer" - echo " type: string" - echo " Input layer to use. If None, X is used" - echo "" - echo " --obs_batch" - echo " type: string" - echo " default: sample_id" - echo " Column name discriminating between your batches." - echo "" - echo " --var_input" - echo " type: string" - echo " .var column containing highly variable genes. By default, do not subset" - echo " genes." - echo "" - echo " --obs_labels" - echo " type: string" - echo " Key in adata.obs for label information. Categories will automatically be" - echo " converted into integer categories and saved to" - echo " adata.obs['_scvi_labels']." - echo " If None, assigns the same label to all the data." - echo "" - echo " --obs_size_factor" - echo " type: string" - echo " Key in adata.obs for size factor information. Instead of using library" - echo " size as a size factor," - echo " the provided size factor column will be used as offset in the mean of" - echo " the likelihood." - echo " Assumed to be on linear scale." - echo "" - echo " --obs_categorical_covariate" - echo " type: string, multiple values allowed" - echo " Keys in adata.obs that correspond to categorical data. These covariates" - echo " can be added in" - echo " addition to the batch covariate and are also treated as nuisance factors" - echo " (i.e., the model tries to minimize their effects on the latent space)." - echo " Thus, these should not be used for biologically-relevant factors that" - echo " you do _not_ want to correct for." - echo "" - echo " --obs_continuous_covariate" - echo " type: string, multiple values allowed" - echo " Keys in adata.obs that correspond to continuous data. These covariates" - echo " can be added in" - echo " addition to the batch covariate and are also treated as nuisance factors" - echo " (i.e., the model tries to minimize their effects on the latent space)." - echo " Thus, these should not be" - echo " used for biologically-relevant factors that you do _not_ want to correct" - echo " for." - echo "" - echo "Outputs:" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " Output h5mu file." - echo "" - echo " --output_model" - echo " type: file, output, file must exist" - echo " Folder where the state of the trained model will be saved to." - echo "" - echo " --output_compression" - echo " type: string" - echo " example: gzip" - echo " choices: [ gzip, lzf ]" - echo " The compression format to be used on the output h5mu object." - echo "" - echo " --obsm_output" - echo " type: string" - echo " default: X_scvi_integrated" - echo " In which .obsm slot to store the resulting integrated embedding." - echo "" - echo "SCVI options:" - echo " --n_hidden_nodes" - echo " type: integer" - echo " default: 128" - echo " Number of nodes per hidden layer." - echo "" - echo " --n_dimensions_latent_space" - echo " type: integer" - echo " default: 30" - echo " Dimensionality of the latent space." - echo "" - echo " --n_hidden_layers" - echo " type: integer" - echo " default: 2" - echo " Number of hidden layers used for encoder and decoder neural-networks." - echo "" - echo " --dropout_rate" - echo " type: double" - echo " default: 0.1" - echo " Dropout rate for the neural networks." - echo "" - echo " --dispersion" - echo " type: string" - echo " default: gene" - echo " choices: [ gene, gene-batch, gene-label, gene-cell ]" - echo " Set the behavior for the dispersion for negative binomial distributions:" - echo " - gene: dispersion parameter of negative binomial is constant per gene" - echo " across cells" - echo " - gene-batch: dispersion can differ between different batches" - echo " - gene-label: dispersion can differ between different labels" - echo " - gene-cell: dispersion can differ for every gene in every cell" - echo "" - echo " --gene_likelihood" - echo " type: string" - echo " default: nb" - echo " choices: [ nb, zinb, poisson ]" - echo " Model used to generate the expression data from a count-based likelihood" - echo " distribution." - echo " - nb: Negative binomial distribution" - echo " - zinb: Zero-inflated negative binomial distribution" - echo " - poisson: Poisson distribution" - echo "" - echo "Variational auto-encoder model options:" - echo " --use_layer_normalization" - echo " type: string" - echo " default: both" - echo " choices: [ encoder, decoder, none, both ]" - echo " Neural networks for which to enable layer normalization." - echo "" - echo " --use_batch_normalization" - echo " type: string" - echo " default: none" - echo " choices: [ encoder, decoder, none, both ]" - echo " Neural networks for which to enable batch normalization." - echo "" - echo " --encode_covariates" - echo " type: boolean_false" - echo " Whether to concatenate covariates to expression in encoder" - echo "" - echo " --deeply_inject_covariates" - echo " type: boolean_true" - echo " Whether to concatenate covariates into output of hidden layers in" - echo " encoder/decoder." - echo " This option only applies when n_layers > 1. The covariates are" - echo " concatenated to" - echo " the input of subsequent hidden layers." - echo "" - echo " --use_observed_lib_size" - echo " type: boolean_true" - echo " Use observed library size for RNA as scaling factor in mean of" - echo " conditional distribution." - echo "" - echo "Early stopping arguments:" - echo " --early_stopping" - echo " type: boolean" - echo " Whether to perform early stopping with respect to the validation set." - echo "" - echo " --early_stopping_monitor" - echo " type: string" - echo " default: elbo_validation" - echo " choices: [ elbo_validation, reconstruction_loss_validation," - echo "kl_local_validation ]" - echo " Metric logged during validation set epoch." - echo "" - echo " --early_stopping_patience" - echo " type: integer" - echo " default: 45" - echo " min: 1" - echo " Number of validation epochs with no improvement after which training" - echo " will be stopped." - echo "" - echo " --early_stopping_min_delta" - echo " type: double" - echo " default: 0.0" - echo " min: 0.0" - echo " Minimum change in the monitored quantity to qualify as an improvement," - echo " i.e. an absolute change of less than min_delta, will count as no" - echo " improvement." - echo "" - echo "Learning parameters:" - echo " --max_epochs" - echo " type: integer" - echo " Number of passes through the dataset, defaults to (20000 / number of" - echo " cells) * 400 or 400; whichever is smallest." - echo "" - echo " --reduce_lr_on_plateau" - echo " type: boolean" - echo " default: true" - echo " Whether to monitor validation loss and reduce learning rate when" - echo " validation set \`lr_scheduler_metric\` plateaus." - echo "" - echo " --lr_factor" - echo " type: double" - echo " default: 0.6" - echo " min: 0.0" - echo " Factor to reduce learning rate." - echo "" - echo " --lr_patience" - echo " type: double" - echo " default: 30.0" - echo " min: 0.0" - echo " Number of epochs with no improvement after which learning rate will be" - echo " reduced." - echo "" - echo "Data validition:" - echo " --n_obs_min_count" - echo " type: integer" - echo " default: 0" - echo " Minimum number of cells threshold ensuring that every obs_batch category" - echo " has sufficient observations (cells) for model training." - echo "" - echo " --n_var_min_count" - echo " type: integer" - echo " default: 0" - echo " Minimum number of genes threshold ensuring that every var_input filter" - echo " has sufficient observations (genes) for model training." -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM nvcr.io/nvidia/pytorch:23.06-py3 - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y libpng-dev libjpeg-dev && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install "jax[cuda]" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "scanpy~=1.9.5" - -RUN pip install --upgrade pip && \ - pip install --no-cache-dir "numba~=0.57.1" "scvi-tools~=1.0.0" - -LABEL org.opencontainers.image.authors="Malte D. Luecken, Dries Schaumont, Matthias Beyens" -LABEL org.opencontainers.image.description="Companion container for running component integrate scvi" -LABEL org.opencontainers.image.created="2024-01-25T10:13:58Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-scvi-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "scvi 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -i) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality=*) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --input_layer) - [ -n "$VIASH_PAR_INPUT_LAYER" ] && ViashError Bad arguments for option \'--input_layer\': \'$VIASH_PAR_INPUT_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT_LAYER="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_layer. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input_layer=*) - [ -n "$VIASH_PAR_INPUT_LAYER" ] && ViashError Bad arguments for option \'--input_layer=*\': \'$VIASH_PAR_INPUT_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT_LAYER=$(ViashRemoveFlags "$1") - shift 1 - ;; - --obs_batch) - [ -n "$VIASH_PAR_OBS_BATCH" ] && ViashError Bad arguments for option \'--obs_batch\': \'$VIASH_PAR_OBS_BATCH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBS_BATCH="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_batch. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obs_batch=*) - [ -n "$VIASH_PAR_OBS_BATCH" ] && ViashError Bad arguments for option \'--obs_batch=*\': \'$VIASH_PAR_OBS_BATCH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBS_BATCH=$(ViashRemoveFlags "$1") - shift 1 - ;; - --var_input) - [ -n "$VIASH_PAR_VAR_INPUT" ] && ViashError Bad arguments for option \'--var_input\': \'$VIASH_PAR_VAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_VAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --var_input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --var_input=*) - [ -n "$VIASH_PAR_VAR_INPUT" ] && ViashError Bad arguments for option \'--var_input=*\': \'$VIASH_PAR_VAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_VAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --obs_labels) - [ -n "$VIASH_PAR_OBS_LABELS" ] && ViashError Bad arguments for option \'--obs_labels\': \'$VIASH_PAR_OBS_LABELS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBS_LABELS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_labels. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obs_labels=*) - [ -n "$VIASH_PAR_OBS_LABELS" ] && ViashError Bad arguments for option \'--obs_labels=*\': \'$VIASH_PAR_OBS_LABELS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBS_LABELS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --obs_size_factor) - [ -n "$VIASH_PAR_OBS_SIZE_FACTOR" ] && ViashError Bad arguments for option \'--obs_size_factor\': \'$VIASH_PAR_OBS_SIZE_FACTOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBS_SIZE_FACTOR="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_size_factor. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obs_size_factor=*) - [ -n "$VIASH_PAR_OBS_SIZE_FACTOR" ] && ViashError Bad arguments for option \'--obs_size_factor=*\': \'$VIASH_PAR_OBS_SIZE_FACTOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBS_SIZE_FACTOR=$(ViashRemoveFlags "$1") - shift 1 - ;; - --obs_categorical_covariate) - if [ -z "$VIASH_PAR_OBS_CATEGORICAL_COVARIATE" ]; then - VIASH_PAR_OBS_CATEGORICAL_COVARIATE="$2" - else - VIASH_PAR_OBS_CATEGORICAL_COVARIATE="$VIASH_PAR_OBS_CATEGORICAL_COVARIATE:""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_categorical_covariate. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obs_categorical_covariate=*) - if [ -z "$VIASH_PAR_OBS_CATEGORICAL_COVARIATE" ]; then - VIASH_PAR_OBS_CATEGORICAL_COVARIATE=$(ViashRemoveFlags "$1") - else - VIASH_PAR_OBS_CATEGORICAL_COVARIATE="$VIASH_PAR_OBS_CATEGORICAL_COVARIATE:"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --obs_continuous_covariate) - if [ -z "$VIASH_PAR_OBS_CONTINUOUS_COVARIATE" ]; then - VIASH_PAR_OBS_CONTINUOUS_COVARIATE="$2" - else - VIASH_PAR_OBS_CONTINUOUS_COVARIATE="$VIASH_PAR_OBS_CONTINUOUS_COVARIATE:""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_continuous_covariate. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obs_continuous_covariate=*) - if [ -z "$VIASH_PAR_OBS_CONTINUOUS_COVARIATE" ]; then - VIASH_PAR_OBS_CONTINUOUS_COVARIATE=$(ViashRemoveFlags "$1") - else - VIASH_PAR_OBS_CONTINUOUS_COVARIATE="$VIASH_PAR_OBS_CONTINUOUS_COVARIATE:"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_model) - [ -n "$VIASH_PAR_OUTPUT_MODEL" ] && ViashError Bad arguments for option \'--output_model\': \'$VIASH_PAR_OUTPUT_MODEL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_MODEL="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_model. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_model=*) - [ -n "$VIASH_PAR_OUTPUT_MODEL" ] && ViashError Bad arguments for option \'--output_model=*\': \'$VIASH_PAR_OUTPUT_MODEL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_MODEL=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output_compression) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression=*) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --obsm_output) - [ -n "$VIASH_PAR_OBSM_OUTPUT" ] && ViashError Bad arguments for option \'--obsm_output\': \'$VIASH_PAR_OBSM_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBSM_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obsm_output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obsm_output=*) - [ -n "$VIASH_PAR_OBSM_OUTPUT" ] && ViashError Bad arguments for option \'--obsm_output=*\': \'$VIASH_PAR_OBSM_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBSM_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --n_hidden_nodes) - [ -n "$VIASH_PAR_N_HIDDEN_NODES" ] && ViashError Bad arguments for option \'--n_hidden_nodes\': \'$VIASH_PAR_N_HIDDEN_NODES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_N_HIDDEN_NODES="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --n_hidden_nodes. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --n_hidden_nodes=*) - [ -n "$VIASH_PAR_N_HIDDEN_NODES" ] && ViashError Bad arguments for option \'--n_hidden_nodes=*\': \'$VIASH_PAR_N_HIDDEN_NODES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_N_HIDDEN_NODES=$(ViashRemoveFlags "$1") - shift 1 - ;; - --n_dimensions_latent_space) - [ -n "$VIASH_PAR_N_DIMENSIONS_LATENT_SPACE" ] && ViashError Bad arguments for option \'--n_dimensions_latent_space\': \'$VIASH_PAR_N_DIMENSIONS_LATENT_SPACE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_N_DIMENSIONS_LATENT_SPACE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --n_dimensions_latent_space. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --n_dimensions_latent_space=*) - [ -n "$VIASH_PAR_N_DIMENSIONS_LATENT_SPACE" ] && ViashError Bad arguments for option \'--n_dimensions_latent_space=*\': \'$VIASH_PAR_N_DIMENSIONS_LATENT_SPACE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_N_DIMENSIONS_LATENT_SPACE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --n_hidden_layers) - [ -n "$VIASH_PAR_N_HIDDEN_LAYERS" ] && ViashError Bad arguments for option \'--n_hidden_layers\': \'$VIASH_PAR_N_HIDDEN_LAYERS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_N_HIDDEN_LAYERS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --n_hidden_layers. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --n_hidden_layers=*) - [ -n "$VIASH_PAR_N_HIDDEN_LAYERS" ] && ViashError Bad arguments for option \'--n_hidden_layers=*\': \'$VIASH_PAR_N_HIDDEN_LAYERS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_N_HIDDEN_LAYERS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --dropout_rate) - [ -n "$VIASH_PAR_DROPOUT_RATE" ] && ViashError Bad arguments for option \'--dropout_rate\': \'$VIASH_PAR_DROPOUT_RATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_DROPOUT_RATE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --dropout_rate. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --dropout_rate=*) - [ -n "$VIASH_PAR_DROPOUT_RATE" ] && ViashError Bad arguments for option \'--dropout_rate=*\': \'$VIASH_PAR_DROPOUT_RATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_DROPOUT_RATE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --dispersion) - [ -n "$VIASH_PAR_DISPERSION" ] && ViashError Bad arguments for option \'--dispersion\': \'$VIASH_PAR_DISPERSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_DISPERSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --dispersion. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --dispersion=*) - [ -n "$VIASH_PAR_DISPERSION" ] && ViashError Bad arguments for option \'--dispersion=*\': \'$VIASH_PAR_DISPERSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_DISPERSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --gene_likelihood) - [ -n "$VIASH_PAR_GENE_LIKELIHOOD" ] && ViashError Bad arguments for option \'--gene_likelihood\': \'$VIASH_PAR_GENE_LIKELIHOOD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_GENE_LIKELIHOOD="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --gene_likelihood. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --gene_likelihood=*) - [ -n "$VIASH_PAR_GENE_LIKELIHOOD" ] && ViashError Bad arguments for option \'--gene_likelihood=*\': \'$VIASH_PAR_GENE_LIKELIHOOD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_GENE_LIKELIHOOD=$(ViashRemoveFlags "$1") - shift 1 - ;; - --use_layer_normalization) - [ -n "$VIASH_PAR_USE_LAYER_NORMALIZATION" ] && ViashError Bad arguments for option \'--use_layer_normalization\': \'$VIASH_PAR_USE_LAYER_NORMALIZATION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_USE_LAYER_NORMALIZATION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --use_layer_normalization. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --use_layer_normalization=*) - [ -n "$VIASH_PAR_USE_LAYER_NORMALIZATION" ] && ViashError Bad arguments for option \'--use_layer_normalization=*\': \'$VIASH_PAR_USE_LAYER_NORMALIZATION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_USE_LAYER_NORMALIZATION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --use_batch_normalization) - [ -n "$VIASH_PAR_USE_BATCH_NORMALIZATION" ] && ViashError Bad arguments for option \'--use_batch_normalization\': \'$VIASH_PAR_USE_BATCH_NORMALIZATION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_USE_BATCH_NORMALIZATION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --use_batch_normalization. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --use_batch_normalization=*) - [ -n "$VIASH_PAR_USE_BATCH_NORMALIZATION" ] && ViashError Bad arguments for option \'--use_batch_normalization=*\': \'$VIASH_PAR_USE_BATCH_NORMALIZATION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_USE_BATCH_NORMALIZATION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --encode_covariates) - [ -n "$VIASH_PAR_ENCODE_COVARIATES" ] && ViashError Bad arguments for option \'--encode_covariates\': \'$VIASH_PAR_ENCODE_COVARIATES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ENCODE_COVARIATES=false - shift 1 - ;; - --deeply_inject_covariates) - [ -n "$VIASH_PAR_DEEPLY_INJECT_COVARIATES" ] && ViashError Bad arguments for option \'--deeply_inject_covariates\': \'$VIASH_PAR_DEEPLY_INJECT_COVARIATES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_DEEPLY_INJECT_COVARIATES=true - shift 1 - ;; - --use_observed_lib_size) - [ -n "$VIASH_PAR_USE_OBSERVED_LIB_SIZE" ] && ViashError Bad arguments for option \'--use_observed_lib_size\': \'$VIASH_PAR_USE_OBSERVED_LIB_SIZE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_USE_OBSERVED_LIB_SIZE=true - shift 1 - ;; - --early_stopping) - [ -n "$VIASH_PAR_EARLY_STOPPING" ] && ViashError Bad arguments for option \'--early_stopping\': \'$VIASH_PAR_EARLY_STOPPING\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EARLY_STOPPING="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --early_stopping. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --early_stopping=*) - [ -n "$VIASH_PAR_EARLY_STOPPING" ] && ViashError Bad arguments for option \'--early_stopping=*\': \'$VIASH_PAR_EARLY_STOPPING\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EARLY_STOPPING=$(ViashRemoveFlags "$1") - shift 1 - ;; - --early_stopping_monitor) - [ -n "$VIASH_PAR_EARLY_STOPPING_MONITOR" ] && ViashError Bad arguments for option \'--early_stopping_monitor\': \'$VIASH_PAR_EARLY_STOPPING_MONITOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EARLY_STOPPING_MONITOR="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --early_stopping_monitor. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --early_stopping_monitor=*) - [ -n "$VIASH_PAR_EARLY_STOPPING_MONITOR" ] && ViashError Bad arguments for option \'--early_stopping_monitor=*\': \'$VIASH_PAR_EARLY_STOPPING_MONITOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EARLY_STOPPING_MONITOR=$(ViashRemoveFlags "$1") - shift 1 - ;; - --early_stopping_patience) - [ -n "$VIASH_PAR_EARLY_STOPPING_PATIENCE" ] && ViashError Bad arguments for option \'--early_stopping_patience\': \'$VIASH_PAR_EARLY_STOPPING_PATIENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EARLY_STOPPING_PATIENCE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --early_stopping_patience. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --early_stopping_patience=*) - [ -n "$VIASH_PAR_EARLY_STOPPING_PATIENCE" ] && ViashError Bad arguments for option \'--early_stopping_patience=*\': \'$VIASH_PAR_EARLY_STOPPING_PATIENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EARLY_STOPPING_PATIENCE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --early_stopping_min_delta) - [ -n "$VIASH_PAR_EARLY_STOPPING_MIN_DELTA" ] && ViashError Bad arguments for option \'--early_stopping_min_delta\': \'$VIASH_PAR_EARLY_STOPPING_MIN_DELTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EARLY_STOPPING_MIN_DELTA="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --early_stopping_min_delta. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --early_stopping_min_delta=*) - [ -n "$VIASH_PAR_EARLY_STOPPING_MIN_DELTA" ] && ViashError Bad arguments for option \'--early_stopping_min_delta=*\': \'$VIASH_PAR_EARLY_STOPPING_MIN_DELTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EARLY_STOPPING_MIN_DELTA=$(ViashRemoveFlags "$1") - shift 1 - ;; - --max_epochs) - [ -n "$VIASH_PAR_MAX_EPOCHS" ] && ViashError Bad arguments for option \'--max_epochs\': \'$VIASH_PAR_MAX_EPOCHS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAX_EPOCHS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --max_epochs. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --max_epochs=*) - [ -n "$VIASH_PAR_MAX_EPOCHS" ] && ViashError Bad arguments for option \'--max_epochs=*\': \'$VIASH_PAR_MAX_EPOCHS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAX_EPOCHS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --reduce_lr_on_plateau) - [ -n "$VIASH_PAR_REDUCE_LR_ON_PLATEAU" ] && ViashError Bad arguments for option \'--reduce_lr_on_plateau\': \'$VIASH_PAR_REDUCE_LR_ON_PLATEAU\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REDUCE_LR_ON_PLATEAU="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --reduce_lr_on_plateau. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reduce_lr_on_plateau=*) - [ -n "$VIASH_PAR_REDUCE_LR_ON_PLATEAU" ] && ViashError Bad arguments for option \'--reduce_lr_on_plateau=*\': \'$VIASH_PAR_REDUCE_LR_ON_PLATEAU\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REDUCE_LR_ON_PLATEAU=$(ViashRemoveFlags "$1") - shift 1 - ;; - --lr_factor) - [ -n "$VIASH_PAR_LR_FACTOR" ] && ViashError Bad arguments for option \'--lr_factor\': \'$VIASH_PAR_LR_FACTOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LR_FACTOR="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --lr_factor. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --lr_factor=*) - [ -n "$VIASH_PAR_LR_FACTOR" ] && ViashError Bad arguments for option \'--lr_factor=*\': \'$VIASH_PAR_LR_FACTOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LR_FACTOR=$(ViashRemoveFlags "$1") - shift 1 - ;; - --lr_patience) - [ -n "$VIASH_PAR_LR_PATIENCE" ] && ViashError Bad arguments for option \'--lr_patience\': \'$VIASH_PAR_LR_PATIENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LR_PATIENCE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --lr_patience. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --lr_patience=*) - [ -n "$VIASH_PAR_LR_PATIENCE" ] && ViashError Bad arguments for option \'--lr_patience=*\': \'$VIASH_PAR_LR_PATIENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LR_PATIENCE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --n_obs_min_count) - [ -n "$VIASH_PAR_N_OBS_MIN_COUNT" ] && ViashError Bad arguments for option \'--n_obs_min_count\': \'$VIASH_PAR_N_OBS_MIN_COUNT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_N_OBS_MIN_COUNT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --n_obs_min_count. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --n_obs_min_count=*) - [ -n "$VIASH_PAR_N_OBS_MIN_COUNT" ] && ViashError Bad arguments for option \'--n_obs_min_count=*\': \'$VIASH_PAR_N_OBS_MIN_COUNT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_N_OBS_MIN_COUNT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --n_var_min_count) - [ -n "$VIASH_PAR_N_VAR_MIN_COUNT" ] && ViashError Bad arguments for option \'--n_var_min_count\': \'$VIASH_PAR_N_VAR_MIN_COUNT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_N_VAR_MIN_COUNT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --n_var_min_count. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --n_var_min_count=*) - [ -n "$VIASH_PAR_N_VAR_MIN_COUNT" ] && ViashError Bad arguments for option \'--n_var_min_count=*\': \'$VIASH_PAR_N_VAR_MIN_COUNT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_N_VAR_MIN_COUNT=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/integrate_scvi:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/integrate_scvi:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/integrate_scvi:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/integrate_scvi:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_MODALITY+x} ]; then - VIASH_PAR_MODALITY="rna" -fi -if [ -z ${VIASH_PAR_OBS_BATCH+x} ]; then - VIASH_PAR_OBS_BATCH="sample_id" -fi -if [ -z ${VIASH_PAR_OBSM_OUTPUT+x} ]; then - VIASH_PAR_OBSM_OUTPUT="X_scvi_integrated" -fi -if [ -z ${VIASH_PAR_N_HIDDEN_NODES+x} ]; then - VIASH_PAR_N_HIDDEN_NODES="128" -fi -if [ -z ${VIASH_PAR_N_DIMENSIONS_LATENT_SPACE+x} ]; then - VIASH_PAR_N_DIMENSIONS_LATENT_SPACE="30" -fi -if [ -z ${VIASH_PAR_N_HIDDEN_LAYERS+x} ]; then - VIASH_PAR_N_HIDDEN_LAYERS="2" -fi -if [ -z ${VIASH_PAR_DROPOUT_RATE+x} ]; then - VIASH_PAR_DROPOUT_RATE="0.1" -fi -if [ -z ${VIASH_PAR_DISPERSION+x} ]; then - VIASH_PAR_DISPERSION="gene" -fi -if [ -z ${VIASH_PAR_GENE_LIKELIHOOD+x} ]; then - VIASH_PAR_GENE_LIKELIHOOD="nb" -fi -if [ -z ${VIASH_PAR_USE_LAYER_NORMALIZATION+x} ]; then - VIASH_PAR_USE_LAYER_NORMALIZATION="both" -fi -if [ -z ${VIASH_PAR_USE_BATCH_NORMALIZATION+x} ]; then - VIASH_PAR_USE_BATCH_NORMALIZATION="none" -fi -if [ -z ${VIASH_PAR_ENCODE_COVARIATES+x} ]; then - VIASH_PAR_ENCODE_COVARIATES="true" -fi -if [ -z ${VIASH_PAR_DEEPLY_INJECT_COVARIATES+x} ]; then - VIASH_PAR_DEEPLY_INJECT_COVARIATES="false" -fi -if [ -z ${VIASH_PAR_USE_OBSERVED_LIB_SIZE+x} ]; then - VIASH_PAR_USE_OBSERVED_LIB_SIZE="false" -fi -if [ -z ${VIASH_PAR_EARLY_STOPPING_MONITOR+x} ]; then - VIASH_PAR_EARLY_STOPPING_MONITOR="elbo_validation" -fi -if [ -z ${VIASH_PAR_EARLY_STOPPING_PATIENCE+x} ]; then - VIASH_PAR_EARLY_STOPPING_PATIENCE="45" -fi -if [ -z ${VIASH_PAR_EARLY_STOPPING_MIN_DELTA+x} ]; then - VIASH_PAR_EARLY_STOPPING_MIN_DELTA="0.0" -fi -if [ -z ${VIASH_PAR_REDUCE_LR_ON_PLATEAU+x} ]; then - VIASH_PAR_REDUCE_LR_ON_PLATEAU="true" -fi -if [ -z ${VIASH_PAR_LR_FACTOR+x} ]; then - VIASH_PAR_LR_FACTOR="0.6" -fi -if [ -z ${VIASH_PAR_LR_PATIENCE+x} ]; then - VIASH_PAR_LR_PATIENCE="30.0" -fi -if [ -z ${VIASH_PAR_N_OBS_MIN_COUNT+x} ]; then - VIASH_PAR_N_OBS_MIN_COUNT="0" -fi -if [ -z ${VIASH_PAR_N_VAR_MIN_COUNT+x} ]; then - VIASH_PAR_N_VAR_MIN_COUNT="0" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_N_HIDDEN_NODES" ]]; then - if ! [[ "$VIASH_PAR_N_HIDDEN_NODES" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--n_hidden_nodes' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_N_DIMENSIONS_LATENT_SPACE" ]]; then - if ! [[ "$VIASH_PAR_N_DIMENSIONS_LATENT_SPACE" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--n_dimensions_latent_space' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_N_HIDDEN_LAYERS" ]]; then - if ! [[ "$VIASH_PAR_N_HIDDEN_LAYERS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--n_hidden_layers' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_DROPOUT_RATE" ]]; then - if ! [[ "$VIASH_PAR_DROPOUT_RATE" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--dropout_rate' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_ENCODE_COVARIATES" ]]; then - if ! [[ "$VIASH_PAR_ENCODE_COVARIATES" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--encode_covariates' has to be a boolean_false. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_DEEPLY_INJECT_COVARIATES" ]]; then - if ! [[ "$VIASH_PAR_DEEPLY_INJECT_COVARIATES" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--deeply_inject_covariates' has to be a boolean_true. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_USE_OBSERVED_LIB_SIZE" ]]; then - if ! [[ "$VIASH_PAR_USE_OBSERVED_LIB_SIZE" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--use_observed_lib_size' has to be a boolean_true. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_EARLY_STOPPING" ]]; then - if ! [[ "$VIASH_PAR_EARLY_STOPPING" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--early_stopping' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_EARLY_STOPPING_PATIENCE" ]]; then - if ! [[ "$VIASH_PAR_EARLY_STOPPING_PATIENCE" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--early_stopping_patience' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi - if [[ $VIASH_PAR_EARLY_STOPPING_PATIENCE -lt 1 ]]; then - ViashError '--early_stopping_patience' has be more than or equal to 1. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_EARLY_STOPPING_MIN_DELTA" ]]; then - if ! [[ "$VIASH_PAR_EARLY_STOPPING_MIN_DELTA" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--early_stopping_min_delta' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi - if command -v bc &> /dev/null; then - if ! [[ `echo $VIASH_PAR_EARLY_STOPPING_MIN_DELTA '>=' 0.0 | bc` -eq 1 ]]; then - ViashError '--early_stopping_min_delta' has be more than or equal to 0.0. Use "--help" to get more information on the parameters. - exit 1 - fi - elif command -v awk &> /dev/null; then - if ! [[ `awk -v n1=$VIASH_PAR_EARLY_STOPPING_MIN_DELTA -v n2=0.0 'BEGIN { print (n1 >= n2) ? "1" : "0" }'` -eq 1 ]]; then - ViashError '--early_stopping_min_delta' has be more than or equal to 0.0. Use "--help" to get more information on the parameters. - exit 1 - fi - else - ViashWarning '--early_stopping_min_delta' specifies a minimum value but the value was not verified as neither \'bc\' or \`awk\` are present on the system. - fi -fi -if [[ -n "$VIASH_PAR_MAX_EPOCHS" ]]; then - if ! [[ "$VIASH_PAR_MAX_EPOCHS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--max_epochs' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_REDUCE_LR_ON_PLATEAU" ]]; then - if ! [[ "$VIASH_PAR_REDUCE_LR_ON_PLATEAU" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--reduce_lr_on_plateau' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_LR_FACTOR" ]]; then - if ! [[ "$VIASH_PAR_LR_FACTOR" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--lr_factor' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi - if command -v bc &> /dev/null; then - if ! [[ `echo $VIASH_PAR_LR_FACTOR '>=' 0.0 | bc` -eq 1 ]]; then - ViashError '--lr_factor' has be more than or equal to 0.0. Use "--help" to get more information on the parameters. - exit 1 - fi - elif command -v awk &> /dev/null; then - if ! [[ `awk -v n1=$VIASH_PAR_LR_FACTOR -v n2=0.0 'BEGIN { print (n1 >= n2) ? "1" : "0" }'` -eq 1 ]]; then - ViashError '--lr_factor' has be more than or equal to 0.0. Use "--help" to get more information on the parameters. - exit 1 - fi - else - ViashWarning '--lr_factor' specifies a minimum value but the value was not verified as neither \'bc\' or \`awk\` are present on the system. - fi -fi -if [[ -n "$VIASH_PAR_LR_PATIENCE" ]]; then - if ! [[ "$VIASH_PAR_LR_PATIENCE" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--lr_patience' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi - if command -v bc &> /dev/null; then - if ! [[ `echo $VIASH_PAR_LR_PATIENCE '>=' 0.0 | bc` -eq 1 ]]; then - ViashError '--lr_patience' has be more than or equal to 0.0. Use "--help" to get more information on the parameters. - exit 1 - fi - elif command -v awk &> /dev/null; then - if ! [[ `awk -v n1=$VIASH_PAR_LR_PATIENCE -v n2=0.0 'BEGIN { print (n1 >= n2) ? "1" : "0" }'` -eq 1 ]]; then - ViashError '--lr_patience' has be more than or equal to 0.0. Use "--help" to get more information on the parameters. - exit 1 - fi - else - ViashWarning '--lr_patience' specifies a minimum value but the value was not verified as neither \'bc\' or \`awk\` are present on the system. - fi -fi -if [[ -n "$VIASH_PAR_N_OBS_MIN_COUNT" ]]; then - if ! [[ "$VIASH_PAR_N_OBS_MIN_COUNT" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--n_obs_min_count' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_N_VAR_MIN_COUNT" ]]; then - if ! [[ "$VIASH_PAR_N_VAR_MIN_COUNT" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--n_var_min_count' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then - VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then - ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -if [ ! -z "$VIASH_PAR_DISPERSION" ]; then - VIASH_PAR_DISPERSION_CHOICES=("gene:gene-batch:gene-label:gene-cell") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_DISPERSION_CHOICES[*]}:" =~ ":$VIASH_PAR_DISPERSION:" ]]; then - ViashError '--dispersion' specified value of \'$VIASH_PAR_DISPERSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -if [ ! -z "$VIASH_PAR_GENE_LIKELIHOOD" ]; then - VIASH_PAR_GENE_LIKELIHOOD_CHOICES=("nb:zinb:poisson") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_GENE_LIKELIHOOD_CHOICES[*]}:" =~ ":$VIASH_PAR_GENE_LIKELIHOOD:" ]]; then - ViashError '--gene_likelihood' specified value of \'$VIASH_PAR_GENE_LIKELIHOOD\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -if [ ! -z "$VIASH_PAR_USE_LAYER_NORMALIZATION" ]; then - VIASH_PAR_USE_LAYER_NORMALIZATION_CHOICES=("encoder:decoder:none:both") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_USE_LAYER_NORMALIZATION_CHOICES[*]}:" =~ ":$VIASH_PAR_USE_LAYER_NORMALIZATION:" ]]; then - ViashError '--use_layer_normalization' specified value of \'$VIASH_PAR_USE_LAYER_NORMALIZATION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -if [ ! -z "$VIASH_PAR_USE_BATCH_NORMALIZATION" ]; then - VIASH_PAR_USE_BATCH_NORMALIZATION_CHOICES=("encoder:decoder:none:both") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_USE_BATCH_NORMALIZATION_CHOICES[*]}:" =~ ":$VIASH_PAR_USE_BATCH_NORMALIZATION:" ]]; then - ViashError '--use_batch_normalization' specified value of \'$VIASH_PAR_USE_BATCH_NORMALIZATION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -if [ ! -z "$VIASH_PAR_EARLY_STOPPING_MONITOR" ]; then - VIASH_PAR_EARLY_STOPPING_MONITOR_CHOICES=("elbo_validation:reconstruction_loss_validation:kl_local_validation") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_EARLY_STOPPING_MONITOR_CHOICES[*]}:" =~ ":$VIASH_PAR_EARLY_STOPPING_MONITOR:" ]]; then - ViashError '--early_stopping_monitor' specified value of \'$VIASH_PAR_EARLY_STOPPING_MONITOR\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi -if [ ! -z "$VIASH_PAR_OUTPUT_MODEL" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_MODEL")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_MODEL")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_PAR_OUTPUT_MODEL" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT_MODEL")" ) - VIASH_PAR_OUTPUT_MODEL=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT_MODEL") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_MODEL" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/integrate_scvi:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/integrate_scvi:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/integrate_scvi:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-scvi-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -from scanpy._utils import check_nonnegative_integers -import mudata -import scvi - -### VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'input_layer': $( if [ ! -z ${VIASH_PAR_INPUT_LAYER+x} ]; then echo "r'${VIASH_PAR_INPUT_LAYER//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'obs_batch': $( if [ ! -z ${VIASH_PAR_OBS_BATCH+x} ]; then echo "r'${VIASH_PAR_OBS_BATCH//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'var_input': $( if [ ! -z ${VIASH_PAR_VAR_INPUT+x} ]; then echo "r'${VIASH_PAR_VAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'obs_labels': $( if [ ! -z ${VIASH_PAR_OBS_LABELS+x} ]; then echo "r'${VIASH_PAR_OBS_LABELS//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'obs_size_factor': $( if [ ! -z ${VIASH_PAR_OBS_SIZE_FACTOR+x} ]; then echo "r'${VIASH_PAR_OBS_SIZE_FACTOR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'obs_categorical_covariate': $( if [ ! -z ${VIASH_PAR_OBS_CATEGORICAL_COVARIATE+x} ]; then echo "r'${VIASH_PAR_OBS_CATEGORICAL_COVARIATE//\'/\'\"\'\"r\'}'.split(':')"; else echo None; fi ), - 'obs_continuous_covariate': $( if [ ! -z ${VIASH_PAR_OBS_CONTINUOUS_COVARIATE+x} ]; then echo "r'${VIASH_PAR_OBS_CONTINUOUS_COVARIATE//\'/\'\"\'\"r\'}'.split(':')"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_model': $( if [ ! -z ${VIASH_PAR_OUTPUT_MODEL+x} ]; then echo "r'${VIASH_PAR_OUTPUT_MODEL//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'obsm_output': $( if [ ! -z ${VIASH_PAR_OBSM_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'n_hidden_nodes': $( if [ ! -z ${VIASH_PAR_N_HIDDEN_NODES+x} ]; then echo "int(r'${VIASH_PAR_N_HIDDEN_NODES//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'n_dimensions_latent_space': $( if [ ! -z ${VIASH_PAR_N_DIMENSIONS_LATENT_SPACE+x} ]; then echo "int(r'${VIASH_PAR_N_DIMENSIONS_LATENT_SPACE//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'n_hidden_layers': $( if [ ! -z ${VIASH_PAR_N_HIDDEN_LAYERS+x} ]; then echo "int(r'${VIASH_PAR_N_HIDDEN_LAYERS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'dropout_rate': $( if [ ! -z ${VIASH_PAR_DROPOUT_RATE+x} ]; then echo "float(r'${VIASH_PAR_DROPOUT_RATE//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'dispersion': $( if [ ! -z ${VIASH_PAR_DISPERSION+x} ]; then echo "r'${VIASH_PAR_DISPERSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'gene_likelihood': $( if [ ! -z ${VIASH_PAR_GENE_LIKELIHOOD+x} ]; then echo "r'${VIASH_PAR_GENE_LIKELIHOOD//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'use_layer_normalization': $( if [ ! -z ${VIASH_PAR_USE_LAYER_NORMALIZATION+x} ]; then echo "r'${VIASH_PAR_USE_LAYER_NORMALIZATION//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'use_batch_normalization': $( if [ ! -z ${VIASH_PAR_USE_BATCH_NORMALIZATION+x} ]; then echo "r'${VIASH_PAR_USE_BATCH_NORMALIZATION//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'encode_covariates': $( if [ ! -z ${VIASH_PAR_ENCODE_COVARIATES+x} ]; then echo "r'${VIASH_PAR_ENCODE_COVARIATES//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), - 'deeply_inject_covariates': $( if [ ! -z ${VIASH_PAR_DEEPLY_INJECT_COVARIATES+x} ]; then echo "r'${VIASH_PAR_DEEPLY_INJECT_COVARIATES//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), - 'use_observed_lib_size': $( if [ ! -z ${VIASH_PAR_USE_OBSERVED_LIB_SIZE+x} ]; then echo "r'${VIASH_PAR_USE_OBSERVED_LIB_SIZE//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), - 'early_stopping': $( if [ ! -z ${VIASH_PAR_EARLY_STOPPING+x} ]; then echo "r'${VIASH_PAR_EARLY_STOPPING//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), - 'early_stopping_monitor': $( if [ ! -z ${VIASH_PAR_EARLY_STOPPING_MONITOR+x} ]; then echo "r'${VIASH_PAR_EARLY_STOPPING_MONITOR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'early_stopping_patience': $( if [ ! -z ${VIASH_PAR_EARLY_STOPPING_PATIENCE+x} ]; then echo "int(r'${VIASH_PAR_EARLY_STOPPING_PATIENCE//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'early_stopping_min_delta': $( if [ ! -z ${VIASH_PAR_EARLY_STOPPING_MIN_DELTA+x} ]; then echo "float(r'${VIASH_PAR_EARLY_STOPPING_MIN_DELTA//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'max_epochs': $( if [ ! -z ${VIASH_PAR_MAX_EPOCHS+x} ]; then echo "int(r'${VIASH_PAR_MAX_EPOCHS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'reduce_lr_on_plateau': $( if [ ! -z ${VIASH_PAR_REDUCE_LR_ON_PLATEAU+x} ]; then echo "r'${VIASH_PAR_REDUCE_LR_ON_PLATEAU//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), - 'lr_factor': $( if [ ! -z ${VIASH_PAR_LR_FACTOR+x} ]; then echo "float(r'${VIASH_PAR_LR_FACTOR//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'lr_patience': $( if [ ! -z ${VIASH_PAR_LR_PATIENCE+x} ]; then echo "float(r'${VIASH_PAR_LR_PATIENCE//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'n_obs_min_count': $( if [ ! -z ${VIASH_PAR_N_OBS_MIN_COUNT+x} ]; then echo "int(r'${VIASH_PAR_N_OBS_MIN_COUNT//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'n_var_min_count': $( if [ ! -z ${VIASH_PAR_N_VAR_MIN_COUNT+x} ]; then echo "int(r'${VIASH_PAR_N_VAR_MIN_COUNT//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -### VIASH END - -import sys -sys.path.append(meta['resources_dir']) - -# START TEMPORARY WORKAROUND subset_vars -# reason: resources aren't available when using Nextflow fusion -# from subset_vars import subset_vars -def subset_vars(adata, subset_col): - return adata[:, adata.var[subset_col]].copy() - -# END TEMPORARY WORKAROUND subset_vars - -#TODO: optionally, move to qa -# https://github.com/openpipelines-bio/openpipeline/issues/435 -def check_validity_anndata(adata, layer, obs_batch, - n_obs_min_count, n_var_min_count): - assert check_nonnegative_integers( - adata.layers[layer] if layer else adata.X - ), f"Make sure input adata contains raw_counts" - - assert len(set(adata.var_names)) == len( - adata.var_names - ), f"Dataset contains multiple genes with same gene name." - - # Ensure every obs_batch category has sufficient observations - assert min(adata.obs[[obs_batch]].value_counts()) > n_obs_min_count, \\ - f"Anndata has fewer than {n_obs_min_count} cells." - - assert adata.n_vars > n_var_min_count, \\ - f"Anndata has fewer than {n_var_min_count} genes." - - - -def main(): - mdata = mudata.read(par["input"].strip()) - adata = mdata.mod[par['modality']] - - if par['var_input']: - # Subset to HVG - adata_subset = subset_vars(adata, subset_col=par["var_input"]).copy() - else: - adata_subset = adata.copy() - - check_validity_anndata( - adata_subset, par['input_layer'], par['obs_batch'], - par["n_obs_min_count"], par["n_var_min_count"] - ) - # Set up the data - scvi.model.SCVI.setup_anndata( - adata_subset, - batch_key=par['obs_batch'], - layer=par['input_layer'], - labels_key=par['obs_labels'], - size_factor_key=par['obs_size_factor'], - categorical_covariate_keys=par['obs_categorical_covariate'], - continuous_covariate_keys=par['obs_continuous_covariate'], - ) - - # Set up the model - vae_uns = scvi.model.SCVI( - adata_subset, - n_hidden=par["n_hidden_nodes"], - n_latent=par["n_dimensions_latent_space"], - n_layers=par["n_hidden_layers"], - dropout_rate=par["dropout_rate"], - dispersion=par["dispersion"], - gene_likelihood=par["gene_likelihood"], - use_layer_norm=par["use_layer_normalization"], - use_batch_norm=par["use_batch_normalization"], - encode_covariates=par["encode_covariates"], # Default (True) is for better scArches performance -> maybe don't use this always? - deeply_inject_covariates=par["deeply_inject_covariates"], # Default (False) for better scArches performance -> maybe don't use this always? - use_observed_lib_size=par["use_observed_lib_size"], # When size_factors are not passed - ) - - plan_kwargs = { - "reduce_lr_on_plateau": par['reduce_lr_on_plateau'], - "lr_patience": par['lr_patience'], - "lr_factor": par['lr_factor'], - } - - - # Train the model - vae_uns.train( - max_epochs=par['max_epochs'], - early_stopping=par['early_stopping'], - early_stopping_monitor=par['early_stopping_monitor'], - early_stopping_patience=par['early_stopping_patience'], - early_stopping_min_delta=par['early_stopping_min_delta'], - plan_kwargs=plan_kwargs, - check_val_every_n_epoch=1, - accelerator="auto", - ) - # Note: train_size=1.0 should give better results, but then can't do early_stopping on validation set - - # Get the latent output - adata.obsm[par['obsm_output']] = vae_uns.get_latent_representation() - - mdata.mod[par['modality']] = adata - mdata.write_h5mu(par['output'].strip(), compression=par["output_compression"]) - if par["output_model"]: - vae_uns.save(par["output_model"], overwrite=True) - -if __name__ == "__main__": - main() -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT_MODEL" ]; then - VIASH_PAR_OUTPUT_MODEL=$(ViashStripAutomount "$VIASH_PAR_OUTPUT_MODEL") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_OUTPUT_MODEL" ] && [ ! -e "$VIASH_PAR_OUTPUT_MODEL" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT_MODEL' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/integrate/scvi/subset_vars.py b/target/docker/integrate/scvi/subset_vars.py deleted file mode 100644 index 10011c8fcca..00000000000 --- a/target/docker/integrate/scvi/subset_vars.py +++ /dev/null @@ -1,16 +0,0 @@ -def subset_vars(adata, subset_col): - """Subset highly variable genes from AnnData object - - Parameters - ---------- - adata : AnnData - Annotated data object - subset_col : str - Name of the boolean column in `adata.var` that contains the information if features should be used or not - - Returns - ------- - AnnData - Copy of `adata` with subsetted features - """ - return adata[:, adata.var[subset_col]].copy() diff --git a/target/docker/integrate/totalvi/.config.vsh.yaml b/target/docker/integrate/totalvi/.config.vsh.yaml deleted file mode 100644 index deebb486875..00000000000 --- a/target/docker/integrate/totalvi/.config.vsh.yaml +++ /dev/null @@ -1,348 +0,0 @@ -functionality: - name: "totalvi" - namespace: "integrate" - version: "0.12.3" - authors: - - name: "Vladimir Shitov" - info: - role: "Contributor" - links: - email: "vladimir.shitov@helmholtz-muenchen.de" - github: "vladimirshitov" - orcid: "0000-0002-1960-8812" - linkedin: "vladimir-shitov-9a659513b" - organizations: - - name: "Helmholtz Munich" - href: "https://www.helmholtz-munich.de" - role: "PhD Candidate" - argument_groups: - - name: "Inputs" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input h5mu file with query data to integrate with reference." - info: null - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--reference" - alternatives: - - "-r" - description: "Input h5mu file with reference data to train the TOTALVI model." - info: null - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--force_retrain" - alternatives: - - "-f" - description: "If true, retrain the model and save it to reference_model_path" - info: null - direction: "input" - dest: "par" - - type: "string" - name: "--query_modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--query_proteins_modality" - description: "Name of the modality in the input (query) h5mu file containing\ - \ protein data" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--reference_modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--reference_proteins_modality" - description: "Name of the modality containing proteins in the reference" - info: null - default: - - "prot" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--input_layer" - description: "Input layer to use. If None, X is used" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_batch" - description: "Column name discriminating between your batches." - info: null - default: - - "sample_id" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--var_input" - description: ".var column containing highly variable genes. By default, do not\ - \ subset genes." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Outputs" - arguments: - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output h5mu file." - info: null - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obsm_output" - description: "In which .obsm slot to store the resulting integrated embedding." - info: null - default: - - "X_integrated_totalvi" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obsm_normalized_rna_output" - description: "In which .obsm slot to store the normalized RNA from TOTALVI." - info: null - default: - - "X_totalvi_normalized_rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obsm_normalized_protein_output" - description: "In which .obsm slot to store the normalized protein data from\ - \ TOTALVI." - info: null - default: - - "X_totalvi_normalized_protein" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--reference_model_path" - description: "Directory with the reference model. If not exists, trained model\ - \ will be saved there" - info: null - default: - - "totalvi_model_reference" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--query_model_path" - description: "Directory, where the query model will be saved" - info: null - default: - - "totalvi_model_query" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Learning parameters" - arguments: - - type: "integer" - name: "--max_epochs" - description: "Number of passes through the dataset" - info: null - default: - - 400 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--max_query_epochs" - description: "Number of passes through the dataset, when fine-tuning model for\ - \ query" - info: null - default: - - 200 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--weight_decay" - description: "Weight decay, when fine-tuning model for query" - info: null - default: - - 0.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Performs mapping to the reference by totalvi model: https://docs.scvi-tools.org/en/stable/tutorials/notebooks/scarches_scvi_tools.html#Reference-mapping-with-TOTALVI" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.9" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "libopenblas-dev" - - "liblapack-dev" - - "gfortran" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "torchmetrics~=0.11.0" - - "scvi-tools~=1.0.3" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highmem" - - "highcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -- type: "native" - id: "native" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/integrate/totalvi/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/integrate/totalvi" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/integrate/totalvi/totalvi" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/integrate/totalvi/setup_logger.py b/target/docker/integrate/totalvi/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/docker/integrate/totalvi/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/docker/integrate/totalvi/totalvi b/target/docker/integrate/totalvi/totalvi deleted file mode 100755 index 1fe55f29974..00000000000 --- a/target/docker/integrate/totalvi/totalvi +++ /dev/null @@ -1,1479 +0,0 @@ -#!/usr/bin/env bash - -# totalvi 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Vladimir Shitov - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="totalvi" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "totalvi 0.12.3" - echo "" - echo "Performs mapping to the reference by totalvi model:" - echo "https://docs.scvi-tools.org/en/stable/tutorials/notebooks/scarches_scvi_tools.html#Reference-mapping-with-TOTALVI" - echo "" - echo "Inputs:" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " Input h5mu file with query data to integrate with reference." - echo "" - echo " -r, --reference" - echo " type: file, required parameter, file must exist" - echo " Input h5mu file with reference data to train the TOTALVI model." - echo "" - echo " -f, --force_retrain" - echo " type: boolean_true" - echo " If true, retrain the model and save it to reference_model_path" - echo "" - echo " --query_modality" - echo " type: string" - echo " default: rna" - echo "" - echo " --query_proteins_modality" - echo " type: string" - echo " Name of the modality in the input (query) h5mu file containing protein" - echo " data" - echo "" - echo " --reference_modality" - echo " type: string" - echo " default: rna" - echo "" - echo " --reference_proteins_modality" - echo " type: string" - echo " default: prot" - echo " Name of the modality containing proteins in the reference" - echo "" - echo " --input_layer" - echo " type: string" - echo " Input layer to use. If None, X is used" - echo "" - echo " --obs_batch" - echo " type: string" - echo " default: sample_id" - echo " Column name discriminating between your batches." - echo "" - echo " --var_input" - echo " type: string" - echo " .var column containing highly variable genes. By default, do not subset" - echo " genes." - echo "" - echo "Outputs:" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " Output h5mu file." - echo "" - echo " --obsm_output" - echo " type: string" - echo " default: X_integrated_totalvi" - echo " In which .obsm slot to store the resulting integrated embedding." - echo "" - echo " --obsm_normalized_rna_output" - echo " type: string" - echo " default: X_totalvi_normalized_rna" - echo " In which .obsm slot to store the normalized RNA from TOTALVI." - echo "" - echo " --obsm_normalized_protein_output" - echo " type: string" - echo " default: X_totalvi_normalized_protein" - echo " In which .obsm slot to store the normalized protein data from TOTALVI." - echo "" - echo " --reference_model_path" - echo " type: file, output, file must exist" - echo " default: totalvi_model_reference" - echo " Directory with the reference model. If not exists, trained model will be" - echo " saved there" - echo "" - echo " --query_model_path" - echo " type: file, output, file must exist" - echo " default: totalvi_model_query" - echo " Directory, where the query model will be saved" - echo "" - echo "Learning parameters:" - echo " --max_epochs" - echo " type: integer" - echo " default: 400" - echo " Number of passes through the dataset" - echo "" - echo " --max_query_epochs" - echo " type: integer" - echo " default: 200" - echo " Number of passes through the dataset, when fine-tuning model for query" - echo "" - echo " --weight_decay" - echo " type: double" - echo " default: 0.0" - echo " Weight decay, when fine-tuning model for query" -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM python:3.9 - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y libopenblas-dev liblapack-dev gfortran && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "torchmetrics~=0.11.0" "scvi-tools~=1.0.3" - -LABEL org.opencontainers.image.authors="Vladimir Shitov" -LABEL org.opencontainers.image.description="Companion container for running component integrate totalvi" -LABEL org.opencontainers.image.created="2024-01-25T10:13:58Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-totalvi-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "totalvi 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -i) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reference) - [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reference=*) - [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference=*\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE=$(ViashRemoveFlags "$1") - shift 1 - ;; - -r) - [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'-r\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -r. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --force_retrain) - [ -n "$VIASH_PAR_FORCE_RETRAIN" ] && ViashError Bad arguments for option \'--force_retrain\': \'$VIASH_PAR_FORCE_RETRAIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_FORCE_RETRAIN=true - shift 1 - ;; - -f) - [ -n "$VIASH_PAR_FORCE_RETRAIN" ] && ViashError Bad arguments for option \'-f\': \'$VIASH_PAR_FORCE_RETRAIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_FORCE_RETRAIN=true - shift 1 - ;; - --query_modality) - [ -n "$VIASH_PAR_QUERY_MODALITY" ] && ViashError Bad arguments for option \'--query_modality\': \'$VIASH_PAR_QUERY_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_QUERY_MODALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --query_modality. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --query_modality=*) - [ -n "$VIASH_PAR_QUERY_MODALITY" ] && ViashError Bad arguments for option \'--query_modality=*\': \'$VIASH_PAR_QUERY_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_QUERY_MODALITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --query_proteins_modality) - [ -n "$VIASH_PAR_QUERY_PROTEINS_MODALITY" ] && ViashError Bad arguments for option \'--query_proteins_modality\': \'$VIASH_PAR_QUERY_PROTEINS_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_QUERY_PROTEINS_MODALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --query_proteins_modality. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --query_proteins_modality=*) - [ -n "$VIASH_PAR_QUERY_PROTEINS_MODALITY" ] && ViashError Bad arguments for option \'--query_proteins_modality=*\': \'$VIASH_PAR_QUERY_PROTEINS_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_QUERY_PROTEINS_MODALITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --reference_modality) - [ -n "$VIASH_PAR_REFERENCE_MODALITY" ] && ViashError Bad arguments for option \'--reference_modality\': \'$VIASH_PAR_REFERENCE_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE_MODALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference_modality. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reference_modality=*) - [ -n "$VIASH_PAR_REFERENCE_MODALITY" ] && ViashError Bad arguments for option \'--reference_modality=*\': \'$VIASH_PAR_REFERENCE_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE_MODALITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --reference_proteins_modality) - [ -n "$VIASH_PAR_REFERENCE_PROTEINS_MODALITY" ] && ViashError Bad arguments for option \'--reference_proteins_modality\': \'$VIASH_PAR_REFERENCE_PROTEINS_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE_PROTEINS_MODALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference_proteins_modality. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reference_proteins_modality=*) - [ -n "$VIASH_PAR_REFERENCE_PROTEINS_MODALITY" ] && ViashError Bad arguments for option \'--reference_proteins_modality=*\': \'$VIASH_PAR_REFERENCE_PROTEINS_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE_PROTEINS_MODALITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --input_layer) - [ -n "$VIASH_PAR_INPUT_LAYER" ] && ViashError Bad arguments for option \'--input_layer\': \'$VIASH_PAR_INPUT_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT_LAYER="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_layer. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input_layer=*) - [ -n "$VIASH_PAR_INPUT_LAYER" ] && ViashError Bad arguments for option \'--input_layer=*\': \'$VIASH_PAR_INPUT_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT_LAYER=$(ViashRemoveFlags "$1") - shift 1 - ;; - --obs_batch) - [ -n "$VIASH_PAR_OBS_BATCH" ] && ViashError Bad arguments for option \'--obs_batch\': \'$VIASH_PAR_OBS_BATCH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBS_BATCH="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_batch. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obs_batch=*) - [ -n "$VIASH_PAR_OBS_BATCH" ] && ViashError Bad arguments for option \'--obs_batch=*\': \'$VIASH_PAR_OBS_BATCH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBS_BATCH=$(ViashRemoveFlags "$1") - shift 1 - ;; - --var_input) - [ -n "$VIASH_PAR_VAR_INPUT" ] && ViashError Bad arguments for option \'--var_input\': \'$VIASH_PAR_VAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_VAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --var_input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --var_input=*) - [ -n "$VIASH_PAR_VAR_INPUT" ] && ViashError Bad arguments for option \'--var_input=*\': \'$VIASH_PAR_VAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_VAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obsm_output) - [ -n "$VIASH_PAR_OBSM_OUTPUT" ] && ViashError Bad arguments for option \'--obsm_output\': \'$VIASH_PAR_OBSM_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBSM_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obsm_output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obsm_output=*) - [ -n "$VIASH_PAR_OBSM_OUTPUT" ] && ViashError Bad arguments for option \'--obsm_output=*\': \'$VIASH_PAR_OBSM_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBSM_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --obsm_normalized_rna_output) - [ -n "$VIASH_PAR_OBSM_NORMALIZED_RNA_OUTPUT" ] && ViashError Bad arguments for option \'--obsm_normalized_rna_output\': \'$VIASH_PAR_OBSM_NORMALIZED_RNA_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBSM_NORMALIZED_RNA_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obsm_normalized_rna_output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obsm_normalized_rna_output=*) - [ -n "$VIASH_PAR_OBSM_NORMALIZED_RNA_OUTPUT" ] && ViashError Bad arguments for option \'--obsm_normalized_rna_output=*\': \'$VIASH_PAR_OBSM_NORMALIZED_RNA_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBSM_NORMALIZED_RNA_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --obsm_normalized_protein_output) - [ -n "$VIASH_PAR_OBSM_NORMALIZED_PROTEIN_OUTPUT" ] && ViashError Bad arguments for option \'--obsm_normalized_protein_output\': \'$VIASH_PAR_OBSM_NORMALIZED_PROTEIN_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBSM_NORMALIZED_PROTEIN_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obsm_normalized_protein_output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obsm_normalized_protein_output=*) - [ -n "$VIASH_PAR_OBSM_NORMALIZED_PROTEIN_OUTPUT" ] && ViashError Bad arguments for option \'--obsm_normalized_protein_output=*\': \'$VIASH_PAR_OBSM_NORMALIZED_PROTEIN_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBSM_NORMALIZED_PROTEIN_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --reference_model_path) - [ -n "$VIASH_PAR_REFERENCE_MODEL_PATH" ] && ViashError Bad arguments for option \'--reference_model_path\': \'$VIASH_PAR_REFERENCE_MODEL_PATH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE_MODEL_PATH="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference_model_path. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reference_model_path=*) - [ -n "$VIASH_PAR_REFERENCE_MODEL_PATH" ] && ViashError Bad arguments for option \'--reference_model_path=*\': \'$VIASH_PAR_REFERENCE_MODEL_PATH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE_MODEL_PATH=$(ViashRemoveFlags "$1") - shift 1 - ;; - --query_model_path) - [ -n "$VIASH_PAR_QUERY_MODEL_PATH" ] && ViashError Bad arguments for option \'--query_model_path\': \'$VIASH_PAR_QUERY_MODEL_PATH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_QUERY_MODEL_PATH="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --query_model_path. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --query_model_path=*) - [ -n "$VIASH_PAR_QUERY_MODEL_PATH" ] && ViashError Bad arguments for option \'--query_model_path=*\': \'$VIASH_PAR_QUERY_MODEL_PATH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_QUERY_MODEL_PATH=$(ViashRemoveFlags "$1") - shift 1 - ;; - --max_epochs) - [ -n "$VIASH_PAR_MAX_EPOCHS" ] && ViashError Bad arguments for option \'--max_epochs\': \'$VIASH_PAR_MAX_EPOCHS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAX_EPOCHS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --max_epochs. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --max_epochs=*) - [ -n "$VIASH_PAR_MAX_EPOCHS" ] && ViashError Bad arguments for option \'--max_epochs=*\': \'$VIASH_PAR_MAX_EPOCHS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAX_EPOCHS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --max_query_epochs) - [ -n "$VIASH_PAR_MAX_QUERY_EPOCHS" ] && ViashError Bad arguments for option \'--max_query_epochs\': \'$VIASH_PAR_MAX_QUERY_EPOCHS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAX_QUERY_EPOCHS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --max_query_epochs. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --max_query_epochs=*) - [ -n "$VIASH_PAR_MAX_QUERY_EPOCHS" ] && ViashError Bad arguments for option \'--max_query_epochs=*\': \'$VIASH_PAR_MAX_QUERY_EPOCHS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAX_QUERY_EPOCHS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --weight_decay) - [ -n "$VIASH_PAR_WEIGHT_DECAY" ] && ViashError Bad arguments for option \'--weight_decay\': \'$VIASH_PAR_WEIGHT_DECAY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_WEIGHT_DECAY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --weight_decay. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --weight_decay=*) - [ -n "$VIASH_PAR_WEIGHT_DECAY" ] && ViashError Bad arguments for option \'--weight_decay=*\': \'$VIASH_PAR_WEIGHT_DECAY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_WEIGHT_DECAY=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/integrate_totalvi:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/integrate_totalvi:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/integrate_totalvi:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/integrate_totalvi:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_REFERENCE+x} ]; then - ViashError '--reference' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_FORCE_RETRAIN+x} ]; then - VIASH_PAR_FORCE_RETRAIN="false" -fi -if [ -z ${VIASH_PAR_QUERY_MODALITY+x} ]; then - VIASH_PAR_QUERY_MODALITY="rna" -fi -if [ -z ${VIASH_PAR_REFERENCE_MODALITY+x} ]; then - VIASH_PAR_REFERENCE_MODALITY="rna" -fi -if [ -z ${VIASH_PAR_REFERENCE_PROTEINS_MODALITY+x} ]; then - VIASH_PAR_REFERENCE_PROTEINS_MODALITY="prot" -fi -if [ -z ${VIASH_PAR_OBS_BATCH+x} ]; then - VIASH_PAR_OBS_BATCH="sample_id" -fi -if [ -z ${VIASH_PAR_OBSM_OUTPUT+x} ]; then - VIASH_PAR_OBSM_OUTPUT="X_integrated_totalvi" -fi -if [ -z ${VIASH_PAR_OBSM_NORMALIZED_RNA_OUTPUT+x} ]; then - VIASH_PAR_OBSM_NORMALIZED_RNA_OUTPUT="X_totalvi_normalized_rna" -fi -if [ -z ${VIASH_PAR_OBSM_NORMALIZED_PROTEIN_OUTPUT+x} ]; then - VIASH_PAR_OBSM_NORMALIZED_PROTEIN_OUTPUT="X_totalvi_normalized_protein" -fi -if [ -z ${VIASH_PAR_REFERENCE_MODEL_PATH+x} ]; then - VIASH_PAR_REFERENCE_MODEL_PATH="totalvi_model_reference" -fi -if [ -z ${VIASH_PAR_QUERY_MODEL_PATH+x} ]; then - VIASH_PAR_QUERY_MODEL_PATH="totalvi_model_query" -fi -if [ -z ${VIASH_PAR_MAX_EPOCHS+x} ]; then - VIASH_PAR_MAX_EPOCHS="400" -fi -if [ -z ${VIASH_PAR_MAX_QUERY_EPOCHS+x} ]; then - VIASH_PAR_MAX_QUERY_EPOCHS="200" -fi -if [ -z ${VIASH_PAR_WEIGHT_DECAY+x} ]; then - VIASH_PAR_WEIGHT_DECAY="0.0" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_REFERENCE" ] && [ ! -e "$VIASH_PAR_REFERENCE" ]; then - ViashError "Input file '$VIASH_PAR_REFERENCE' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_FORCE_RETRAIN" ]]; then - if ! [[ "$VIASH_PAR_FORCE_RETRAIN" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--force_retrain' has to be a boolean_true. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_MAX_EPOCHS" ]]; then - if ! [[ "$VIASH_PAR_MAX_EPOCHS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--max_epochs' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_MAX_QUERY_EPOCHS" ]]; then - if ! [[ "$VIASH_PAR_MAX_QUERY_EPOCHS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--max_query_epochs' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_WEIGHT_DECAY" ]]; then - if ! [[ "$VIASH_PAR_WEIGHT_DECAY" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--weight_decay' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi -if [ ! -z "$VIASH_PAR_REFERENCE_MODEL_PATH" ] && [ ! -d "$(dirname "$VIASH_PAR_REFERENCE_MODEL_PATH")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_REFERENCE_MODEL_PATH")" -fi -if [ ! -z "$VIASH_PAR_QUERY_MODEL_PATH" ] && [ ! -d "$(dirname "$VIASH_PAR_QUERY_MODEL_PATH")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_QUERY_MODEL_PATH")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_REFERENCE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_REFERENCE")" ) - VIASH_PAR_REFERENCE=$(ViashAutodetectMount "$VIASH_PAR_REFERENCE") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_PAR_REFERENCE_MODEL_PATH" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_REFERENCE_MODEL_PATH")" ) - VIASH_PAR_REFERENCE_MODEL_PATH=$(ViashAutodetectMount "$VIASH_PAR_REFERENCE_MODEL_PATH") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_REFERENCE_MODEL_PATH" ) -fi -if [ ! -z "$VIASH_PAR_QUERY_MODEL_PATH" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_QUERY_MODEL_PATH")" ) - VIASH_PAR_QUERY_MODEL_PATH=$(ViashAutodetectMount "$VIASH_PAR_QUERY_MODEL_PATH") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_QUERY_MODEL_PATH" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/integrate_totalvi:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/integrate_totalvi:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/integrate_totalvi:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-totalvi-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -from typing import Tuple - -import os -import sys -import mudata -from anndata import AnnData # For type hints -from mudata import MuData # For type hints -import numpy as np -import scvi -from scipy.sparse import issparse - - -### VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'reference': $( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "r'${VIASH_PAR_REFERENCE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'force_retrain': $( if [ ! -z ${VIASH_PAR_FORCE_RETRAIN+x} ]; then echo "r'${VIASH_PAR_FORCE_RETRAIN//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), - 'query_modality': $( if [ ! -z ${VIASH_PAR_QUERY_MODALITY+x} ]; then echo "r'${VIASH_PAR_QUERY_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'query_proteins_modality': $( if [ ! -z ${VIASH_PAR_QUERY_PROTEINS_MODALITY+x} ]; then echo "r'${VIASH_PAR_QUERY_PROTEINS_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'reference_modality': $( if [ ! -z ${VIASH_PAR_REFERENCE_MODALITY+x} ]; then echo "r'${VIASH_PAR_REFERENCE_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'reference_proteins_modality': $( if [ ! -z ${VIASH_PAR_REFERENCE_PROTEINS_MODALITY+x} ]; then echo "r'${VIASH_PAR_REFERENCE_PROTEINS_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'input_layer': $( if [ ! -z ${VIASH_PAR_INPUT_LAYER+x} ]; then echo "r'${VIASH_PAR_INPUT_LAYER//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'obs_batch': $( if [ ! -z ${VIASH_PAR_OBS_BATCH+x} ]; then echo "r'${VIASH_PAR_OBS_BATCH//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'var_input': $( if [ ! -z ${VIASH_PAR_VAR_INPUT+x} ]; then echo "r'${VIASH_PAR_VAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'obsm_output': $( if [ ! -z ${VIASH_PAR_OBSM_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'obsm_normalized_rna_output': $( if [ ! -z ${VIASH_PAR_OBSM_NORMALIZED_RNA_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_NORMALIZED_RNA_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'obsm_normalized_protein_output': $( if [ ! -z ${VIASH_PAR_OBSM_NORMALIZED_PROTEIN_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_NORMALIZED_PROTEIN_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'reference_model_path': $( if [ ! -z ${VIASH_PAR_REFERENCE_MODEL_PATH+x} ]; then echo "r'${VIASH_PAR_REFERENCE_MODEL_PATH//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'query_model_path': $( if [ ! -z ${VIASH_PAR_QUERY_MODEL_PATH+x} ]; then echo "r'${VIASH_PAR_QUERY_MODEL_PATH//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'max_epochs': $( if [ ! -z ${VIASH_PAR_MAX_EPOCHS+x} ]; then echo "int(r'${VIASH_PAR_MAX_EPOCHS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'max_query_epochs': $( if [ ! -z ${VIASH_PAR_MAX_QUERY_EPOCHS+x} ]; then echo "int(r'${VIASH_PAR_MAX_QUERY_EPOCHS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'weight_decay': $( if [ ! -z ${VIASH_PAR_WEIGHT_DECAY+x} ]; then echo "float(r'${VIASH_PAR_WEIGHT_DECAY//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -### VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -def align_proteins_names(adata_reference: AnnData, mdata_query: MuData, adata_query: AnnData, reference_proteins_key: str, query_proteins_key: str) -> AnnData: - """Make sure that proteins are located in the same .obsm slot in reference and query. Pad query proteins with zeros if they are absent""" - proteins_reference = adata_reference.obsm[reference_proteins_key] - - # If query has no protein data, put matrix of zeros - if not query_proteins_key or query_proteins_key not in mdata_query.mod: - adata_query.obsm[reference_proteins_key] = np.zeros((adata_query.n_obs, proteins_reference.shape[1])) - else: - # Make sure that proteins expression has the same key in query and reference - adata_query.obsm[reference_proteins_key] = adata_query.obsm[query_proteins_key] - - return adata_query - - -def extract_proteins_to_anndata(mdata: MuData, rna_modality_key, protein_modality_key, input_layer, hvg_var_key=None) -> AnnData: - """TOTALVI requires data to be stored in AnnData format with protein counts in .obsm slot. This function performs the conversion""" - adata: AnnData = mdata.mod[rna_modality_key].copy() - - if hvg_var_key: - selected_genes = adata.var_names[adata.var[hvg_var_key]] - adata = adata[:, selected_genes].copy() - - if protein_modality_key in mdata.mod: - # Put the proteins modality into .obsm slot - proteins_reference_adata = mdata.mod[protein_modality_key].copy() - - if input_layer is None: - proteins = proteins_reference_adata.X - else: - proteins = proteins_reference_adata.obsm[input_layer] - - if issparse(proteins): - proteins = proteins.toarray() - - adata.obsm[protein_modality_key] = proteins - - return adata - - -def build_reference_model(adata_reference: AnnData, max_train_epochs: int = 400) -> scvi.model.TOTALVI: - - vae_reference = scvi.model.TOTALVI(adata_reference, use_layer_norm="both", use_batch_norm="none") - vae_reference.train(max_train_epochs) - - vae_reference.save(par["reference_model_path"]) - - return vae_reference - -def is_retraining_model() -> bool: - """Decide, whether reference model should be trained. It happens when no model exists or force_retrain flag is on""" - - trained_model_exists = os.path.isdir(par["reference_model_path"]) and ("model.pt" in os.listdir(par["reference_model_path"])) - return not trained_model_exists or par["force_retrain"] - - -def map_query_to_reference(mdata_reference: MuData, mdata_query: MuData, adata_query: AnnData) -> Tuple[scvi.model.TOTALVI, AnnData]: - """Build model on the provided reference if necessary, and map query to the reference""" - - adata_reference: AnnData = extract_proteins_to_anndata(mdata_reference, rna_modality_key=par["reference_modality"], protein_modality_key=par["reference_proteins_modality"], - input_layer=par["input_layer"], hvg_var_key=par["var_input"]) - - scvi.model.TOTALVI.setup_anndata( - adata_reference, - batch_key=par["obs_batch"], - protein_expression_obsm_key=par["reference_proteins_modality"] - ) - - if is_retraining_model(): - vae_reference = build_reference_model(adata_reference, max_train_epochs=par["max_epochs"]) - else: - vae_reference = scvi.model.TOTALVI.load(dir_path=par["reference_model_path"], adata=adata_reference) - - adata_query: AnnData = align_proteins_names(adata_reference, mdata_query, adata_query, reference_proteins_key=par["reference_proteins_modality"], - query_proteins_key=par["query_proteins_modality"]) - - # Reorder genes and pad missing genes with 0s - scvi.model.TOTALVI.prepare_query_anndata(adata_query, vae_reference) - - # Train the model for query - vae_query = scvi.model.TOTALVI.load_query_data( - adata_query, - vae_reference - ) - vae_query.train(par["max_query_epochs"], plan_kwargs=dict(weight_decay=par["weight_decay"])) - - return vae_query, adata_query - -def main(): - mdata_query = mudata.read(par["input"].strip()) - adata_query = extract_proteins_to_anndata(mdata_query, - rna_modality_key=par["query_modality"], - protein_modality_key=par["query_proteins_modality"], - input_layer=par["input_layer"], - hvg_var_key=par["var_input"]) - - if par["reference"].endswith(".h5mu"): - logger.info("Reading reference") - mdata_reference = mudata.read(par["reference"].strip()) - - logger.info("Mapping query to the reference") - vae_query, adata_query = map_query_to_reference(mdata_reference, mdata_query, adata_query) - else: - raise ValueError("Incorrect format of reference, please provide a .h5mu file") - - adata_query.uns["integration_method"] = "totalvi" - - logger.info("Getting the latent representation of query") - mdata_query.mod[par["query_modality"]].obsm[par["obsm_output"]] = vae_query.get_latent_representation() - - norm_rna, norm_protein = vae_query.get_normalized_expression() - mdata_query.mod[par["query_modality"]].obsm[par["obsm_normalized_rna_output"]] = norm_rna.to_numpy() - - if par["query_proteins_modality"] in mdata_query.mod: - mdata_query.mod[par["query_proteins_modality"]].obsm[par["obsm_normalized_protein_output"]] = norm_protein.to_numpy() - - logger.info("Updating mdata") - mdata_query.update() - - logger.info("Saving updated query data") - mdata_query.write_h5mu(par["output"].strip()) - - logger.info("Saving query model") - vae_query.save(par["query_model_path"], overwrite=True) - -if __name__ == "__main__": - main() -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_REFERENCE" ]; then - VIASH_PAR_REFERENCE=$(ViashStripAutomount "$VIASH_PAR_REFERENCE") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_PAR_REFERENCE_MODEL_PATH" ]; then - VIASH_PAR_REFERENCE_MODEL_PATH=$(ViashStripAutomount "$VIASH_PAR_REFERENCE_MODEL_PATH") -fi -if [ ! -z "$VIASH_PAR_QUERY_MODEL_PATH" ]; then - VIASH_PAR_QUERY_MODEL_PATH=$(ViashStripAutomount "$VIASH_PAR_QUERY_MODEL_PATH") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_REFERENCE_MODEL_PATH" ] && [ ! -e "$VIASH_PAR_REFERENCE_MODEL_PATH" ]; then - ViashError "Output file '$VIASH_PAR_REFERENCE_MODEL_PATH' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_QUERY_MODEL_PATH" ] && [ ! -e "$VIASH_PAR_QUERY_MODEL_PATH" ]; then - ViashError "Output file '$VIASH_PAR_QUERY_MODEL_PATH' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/interactive/run_cellxgene/.config.vsh.yaml b/target/docker/interactive/run_cellxgene/.config.vsh.yaml deleted file mode 100644 index c0c1dca36fd..00000000000 --- a/target/docker/interactive/run_cellxgene/.config.vsh.yaml +++ /dev/null @@ -1,83 +0,0 @@ -functionality: - name: "run_cellxgene" - namespace: "interactive" - version: "0.12.3" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Directory to mount" - info: null - default: - - "." - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--port" - alternatives: - - "-p" - description: "Port to use" - info: null - default: - - "5005" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "bash_script" - path: "script.sh" - is_executable: true - description: "" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "ubuntu:focal" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - run_args: - - "$( echo $VIASH_PAR_PORT | sed 's/..*/-p &:&/' )" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "docker" - run: - - "apt update && apt upgrade -y" - - type: "apt" - packages: - - "build-essential libxml2-dev python3-dev python3-pip zlib1g-dev python3-requests\ - \ python3-aiohttp python-is-python3" - interactive: false - - type: "python" - user: false - packages: - - "cellxgene" - upgrade: true - entrypoint: [] - cmd: null -info: - config: "/home/runner/work/openpipeline/openpipeline/src/interactive/run_cellxgene/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/interactive/run_cellxgene" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/interactive/run_cellxgene/run_cellxgene" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/interactive/run_cellxgene/run_cellxgene b/target/docker/interactive/run_cellxgene/run_cellxgene deleted file mode 100755 index 47e66cbc109..00000000000 --- a/target/docker/interactive/run_cellxgene/run_cellxgene +++ /dev/null @@ -1,901 +0,0 @@ -#!/usr/bin/env bash - -# run_cellxgene 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="run_cellxgene" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "run_cellxgene 0.12.3" - echo "" - echo "" - echo "" - echo "Arguments:" - echo " -i, --input" - echo " type: file, file must exist" - echo " default: ." - echo " Directory to mount" - echo "" - echo " -p, --port" - echo " type: string" - echo " default: 5005" - echo " Port to use" -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM ubuntu:focal - -ENTRYPOINT [] - - -RUN apt update && apt upgrade -y -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y build-essential libxml2-dev python3-dev python3-pip zlib1g-dev python3-requests python3-aiohttp python-is-python3 && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "cellxgene" - -LABEL org.opencontainers.image.description="Companion container for running component interactive run_cellxgene" -LABEL org.opencontainers.image.created="2024-01-25T10:13:56Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-run_cellxgene-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "run_cellxgene 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -i) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --port) - [ -n "$VIASH_PAR_PORT" ] && ViashError Bad arguments for option \'--port\': \'$VIASH_PAR_PORT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_PORT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --port. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --port=*) - [ -n "$VIASH_PAR_PORT" ] && ViashError Bad arguments for option \'--port=*\': \'$VIASH_PAR_PORT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_PORT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -p) - [ -n "$VIASH_PAR_PORT" ] && ViashError Bad arguments for option \'-p\': \'$VIASH_PAR_PORT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_PORT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -p. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/interactive_run_cellxgene:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/interactive_run_cellxgene:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm $( echo $VIASH_PAR_PORT | sed 's/..*/-p &:&/' ) -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/interactive_run_cellxgene:0.12.0'" - docker run --entrypoint=bash -i --rm $( echo $VIASH_PAR_PORT | sed 's/..*/-p &:&/' ) -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/interactive_run_cellxgene:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_INPUT+x} ]; then - VIASH_PAR_INPUT="." -fi -if [ -z ${VIASH_PAR_PORT+x} ]; then - VIASH_PAR_PORT="5005" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm $( echo $VIASH_PAR_PORT | sed 's/..*/-p &:&/' ) ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/interactive_run_cellxgene:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm $( echo $VIASH_PAR_PORT | sed 's/..*/-p &:&/' ) ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/interactive_run_cellxgene:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm $( echo $VIASH_PAR_PORT | sed 's/..*/-p &:&/' ) ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/interactive_run_cellxgene:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-run_cellxgene-XXXXXX").sh -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -## VIASH START -# The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) -$( if [ ! -z ${VIASH_PAR_PORT+x} ]; then echo "${VIASH_PAR_PORT}" | sed "s#'#'\"'\"'#g;s#.*#par_port='&'#" ; else echo "# par_port="; fi ) -$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) -$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) -$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) -$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) -$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) -$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) - -## VIASH END -#!/bin/bash - -set -eo pipefail - -export LC_ALL=C.UTF-8 -export LANG=C.UTF-8 - -/usr/local/bin/cellxgene launch -p $VIASH_PAR_PORT --host 0.0.0.0 -v $VIASH_PAR_INPUT -VIASHMAIN -bash "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -exit 0 diff --git a/target/docker/interactive/run_cirrocumulus/.config.vsh.yaml b/target/docker/interactive/run_cirrocumulus/.config.vsh.yaml deleted file mode 100644 index 3e76e98ee2c..00000000000 --- a/target/docker/interactive/run_cirrocumulus/.config.vsh.yaml +++ /dev/null @@ -1,85 +0,0 @@ -functionality: - name: "run_cirrocumulus" - namespace: "interactive" - version: "0.12.3" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Directory to mount" - info: null - default: - - "." - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--port" - alternatives: - - "-p" - description: "Port to use" - info: null - default: - - "5005" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "bash_script" - path: "script.sh" - is_executable: true - description: "" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "ubuntu:focal" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - run_args: - - "-p $VIASH_PAR_PORT:$VIASH_PAR_PORT" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "docker" - run: - - "apt update && apt upgrade -y" - - type: "apt" - packages: - - "build-essential libxml2-dev python3-dev python3-pip zlib1g-dev libhdf5-dev\ - \ python-is-python3" - interactive: false - - type: "python" - user: false - packages: - - "requests" - - "aiohttp" - - "cirrocumulus" - upgrade: true - entrypoint: [] - cmd: null -info: - config: "/home/runner/work/openpipeline/openpipeline/src/interactive/run_cirrocumulus/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/interactive/run_cirrocumulus" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/interactive/run_cirrocumulus/run_cirrocumulus" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/interactive/run_cirrocumulus/run_cirrocumulus b/target/docker/interactive/run_cirrocumulus/run_cirrocumulus deleted file mode 100755 index 65a3c5ad76d..00000000000 --- a/target/docker/interactive/run_cirrocumulus/run_cirrocumulus +++ /dev/null @@ -1,901 +0,0 @@ -#!/usr/bin/env bash - -# run_cirrocumulus 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="run_cirrocumulus" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "run_cirrocumulus 0.12.3" - echo "" - echo "" - echo "" - echo "Arguments:" - echo " -i, --input" - echo " type: file, file must exist" - echo " default: ." - echo " Directory to mount" - echo "" - echo " -p, --port" - echo " type: string" - echo " default: 5005" - echo " Port to use" -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM ubuntu:focal - -ENTRYPOINT [] - - -RUN apt update && apt upgrade -y -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y build-essential libxml2-dev python3-dev python3-pip zlib1g-dev libhdf5-dev python-is-python3 && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "requests" "aiohttp" "cirrocumulus" - -LABEL org.opencontainers.image.description="Companion container for running component interactive run_cirrocumulus" -LABEL org.opencontainers.image.created="2024-01-25T10:13:56Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-run_cirrocumulus-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "run_cirrocumulus 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -i) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --port) - [ -n "$VIASH_PAR_PORT" ] && ViashError Bad arguments for option \'--port\': \'$VIASH_PAR_PORT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_PORT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --port. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --port=*) - [ -n "$VIASH_PAR_PORT" ] && ViashError Bad arguments for option \'--port=*\': \'$VIASH_PAR_PORT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_PORT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -p) - [ -n "$VIASH_PAR_PORT" ] && ViashError Bad arguments for option \'-p\': \'$VIASH_PAR_PORT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_PORT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -p. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/interactive_run_cirrocumulus:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/interactive_run_cirrocumulus:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -p $VIASH_PAR_PORT:$VIASH_PAR_PORT -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/interactive_run_cirrocumulus:0.12.0'" - docker run --entrypoint=bash -i --rm -p $VIASH_PAR_PORT:$VIASH_PAR_PORT -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/interactive_run_cirrocumulus:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_INPUT+x} ]; then - VIASH_PAR_INPUT="." -fi -if [ -z ${VIASH_PAR_PORT+x} ]; then - VIASH_PAR_PORT="5005" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm -p $VIASH_PAR_PORT:$VIASH_PAR_PORT ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/interactive_run_cirrocumulus:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm -p $VIASH_PAR_PORT:$VIASH_PAR_PORT ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/interactive_run_cirrocumulus:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm -p $VIASH_PAR_PORT:$VIASH_PAR_PORT ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/interactive_run_cirrocumulus:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-run_cirrocumulus-XXXXXX").sh -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -## VIASH START -# The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) -$( if [ ! -z ${VIASH_PAR_PORT+x} ]; then echo "${VIASH_PAR_PORT}" | sed "s#'#'\"'\"'#g;s#.*#par_port='&'#" ; else echo "# par_port="; fi ) -$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) -$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) -$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) -$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) -$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) -$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) - -## VIASH END -#!/bin/bash - -set -eo pipefail - -export LC_ALL=C.UTF-8 -export LANG=C.UTF-8 - -/usr/local/bin/cirro launch --port $VIASH_PAR_PORT --host 0.0.0.0 $VIASH_PAR_INPUT -VIASHMAIN -bash "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -exit 0 diff --git a/target/docker/interpret/lianapy/.config.vsh.yaml b/target/docker/interpret/lianapy/.config.vsh.yaml deleted file mode 100644 index 1d24676cd7a..00000000000 --- a/target/docker/interpret/lianapy/.config.vsh.yaml +++ /dev/null @@ -1,313 +0,0 @@ -functionality: - name: "lianapy" - namespace: "interpret" - version: "0.12.3" - authors: - - name: "Mauro Saporita" - roles: - - "author" - info: - role: "Contributor" - links: - email: "maurosaporita@gmail.com" - github: "mauro-saporita" - linkedin: "mauro-saporita-930b06a5" - organizations: - - name: "Ardigen" - href: "https://ardigen.com" - role: "Lead Nextflow Developer" - - name: "Povilas Gibas" - roles: - - "author" - info: - role: "Contributor" - links: - email: "povilasgibas@gmail.com" - github: "PoGibas" - linkedin: "povilas-gibas" - organizations: - - name: "Ardigen" - href: "https://ardigen.com" - role: "Bioinformatician" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input h5mu file" - info: null - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output h5mu file." - info: null - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - info: null - default: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--layer" - description: "Layer in anndata.AnnData.layers to use. If None, use mudata.mod[modality].X." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--groupby" - description: "The key of the observations grouping to consider." - info: null - default: - - "bulk_labels" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--resource_name" - description: "Name of the resource to be loaded and use for ligand-receptor inference." - info: null - default: - - "consensus" - required: false - choices: - - "baccin2019" - - "cellcall" - - "cellchatdb" - - "cellinker" - - "cellphonedb" - - "celltalkdb" - - "connectomedb2020" - - "consensus" - - "embrace" - - "guide2pharma" - - "hpmr" - - "icellnet" - - "italk" - - "kirouac2010" - - "lrdb" - - "mouseconsensus" - - "ramilowski2015" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--gene_symbol" - description: "Column name in var DataFrame in which gene symbol are stored." - info: null - default: - - "gene_symbol" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--expr_prop" - description: "Minimum expression proportion for the ligands/receptors (and their\ - \ subunits) in the corresponding cell identities. Set to '0', to return unfiltered\ - \ results." - info: null - default: - - 0.1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--min_cells" - description: "Minimum cells per cell identity ('groupby') to be considered for\ - \ downstream analysis." - info: null - default: - - 5 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--aggregate_method" - description: "Method aggregation approach, one of ['mean', 'rra'], where 'mean'\ - \ represents the mean rank, while 'rra' is the RobustRankAggregate (Kolde et\ - \ al., 2014) of the interactions." - info: null - default: - - "rra" - required: false - choices: - - "mean" - - "rra" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean" - name: "--return_all_lrs" - description: "Bool whether to return all LRs, or only those that surpass the 'expr_prop'\ - \ threshold. Those interactions that do not pass the 'expr_prop' threshold will\ - \ be assigned to the *worst* score of the ones that do. 'False' by default." - info: null - default: - - false - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--n_perms" - description: "Number of permutations for the permutation test. Note that this\ - \ is relevant only for permutation-based methods - e.g. 'CellPhoneDB" - info: null - default: - - 100 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - description: "Performs LIANA integration based as described in https://github.com/saezlab/liana-py" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - - "git" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "liana~=0.1.9" - - "numpy~=1.24.3" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "lowmem" - - "lowcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/interpret/lianapy/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/interpret/lianapy" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/interpret/lianapy/lianapy" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/interpret/lianapy/lianapy b/target/docker/interpret/lianapy/lianapy deleted file mode 100755 index 0cd6aaf5700..00000000000 --- a/target/docker/interpret/lianapy/lianapy +++ /dev/null @@ -1,1259 +0,0 @@ -#!/usr/bin/env bash - -# lianapy 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Mauro Saporita (author) -# * Povilas Gibas (author) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="lianapy" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "lianapy 0.12.3" - echo "" - echo "Performs LIANA integration based as described in" - echo "https://github.com/saezlab/liana-py" - echo "" - echo "Arguments:" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " Input h5mu file" - echo "" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " Output h5mu file." - echo "" - echo " --output_compression" - echo " type: string" - echo " default: gzip" - echo " choices: [ gzip, lzf ]" - echo "" - echo " --modality" - echo " type: string" - echo " default: rna" - echo "" - echo " --layer" - echo " type: string" - echo " Layer in anndata.AnnData.layers to use. If None, use" - echo " mudata.mod[modality].X." - echo "" - echo " --groupby" - echo " type: string" - echo " default: bulk_labels" - echo " The key of the observations grouping to consider." - echo "" - echo " --resource_name" - echo " type: string" - echo " default: consensus" - echo " choices: [ baccin2019, cellcall, cellchatdb, cellinker, cellphonedb," - echo "celltalkdb, connectomedb2020, consensus, embrace, guide2pharma, hpmr, icellnet," - echo "italk, kirouac2010, lrdb, mouseconsensus, ramilowski2015 ]" - echo " Name of the resource to be loaded and use for ligand-receptor inference." - echo "" - echo " --gene_symbol" - echo " type: string" - echo " default: gene_symbol" - echo " Column name in var DataFrame in which gene symbol are stored." - echo "" - echo " --expr_prop" - echo " type: double" - echo " default: 0.1" - echo " Minimum expression proportion for the ligands/receptors (and their" - echo " subunits) in the corresponding cell identities. Set to '0', to return" - echo " unfiltered results." - echo "" - echo " --min_cells" - echo " type: integer" - echo " default: 5" - echo " Minimum cells per cell identity ('groupby') to be considered for" - echo " downstream analysis." - echo "" - echo " --aggregate_method" - echo " type: string" - echo " default: rra" - echo " choices: [ mean, rra ]" - echo " Method aggregation approach, one of ['mean', 'rra'], where 'mean'" - echo " represents the mean rank, while 'rra' is the RobustRankAggregate (Kolde" - echo " et al., 2014) of the interactions." - echo "" - echo " --return_all_lrs" - echo " type: boolean" - echo " default: false" - echo " Bool whether to return all LRs, or only those that surpass the" - echo " 'expr_prop' threshold. Those interactions that do not pass the" - echo " 'expr_prop' threshold will be assigned to the *worst* score of the ones" - echo " that do. 'False' by default." - echo "" - echo " --n_perms" - echo " type: integer" - echo " default: 100" - echo " Number of permutations for the permutation test. Note that this is" - echo " relevant only for permutation-based methods - e.g. 'CellPhoneDB" -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM python:3.10-slim - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y procps git && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "liana~=0.1.9" "numpy~=1.24.3" - -LABEL org.opencontainers.image.authors="Mauro Saporita, Povilas Gibas" -LABEL org.opencontainers.image.description="Companion container for running component interpret lianapy" -LABEL org.opencontainers.image.created="2024-01-25T10:13:55Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-lianapy-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "lianapy 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -i) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression=*) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --modality) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality=*) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --layer) - [ -n "$VIASH_PAR_LAYER" ] && ViashError Bad arguments for option \'--layer\': \'$VIASH_PAR_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LAYER="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --layer. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --layer=*) - [ -n "$VIASH_PAR_LAYER" ] && ViashError Bad arguments for option \'--layer=*\': \'$VIASH_PAR_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LAYER=$(ViashRemoveFlags "$1") - shift 1 - ;; - --groupby) - [ -n "$VIASH_PAR_GROUPBY" ] && ViashError Bad arguments for option \'--groupby\': \'$VIASH_PAR_GROUPBY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_GROUPBY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --groupby. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --groupby=*) - [ -n "$VIASH_PAR_GROUPBY" ] && ViashError Bad arguments for option \'--groupby=*\': \'$VIASH_PAR_GROUPBY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_GROUPBY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --resource_name) - [ -n "$VIASH_PAR_RESOURCE_NAME" ] && ViashError Bad arguments for option \'--resource_name\': \'$VIASH_PAR_RESOURCE_NAME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_RESOURCE_NAME="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --resource_name. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --resource_name=*) - [ -n "$VIASH_PAR_RESOURCE_NAME" ] && ViashError Bad arguments for option \'--resource_name=*\': \'$VIASH_PAR_RESOURCE_NAME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_RESOURCE_NAME=$(ViashRemoveFlags "$1") - shift 1 - ;; - --gene_symbol) - [ -n "$VIASH_PAR_GENE_SYMBOL" ] && ViashError Bad arguments for option \'--gene_symbol\': \'$VIASH_PAR_GENE_SYMBOL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_GENE_SYMBOL="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --gene_symbol. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --gene_symbol=*) - [ -n "$VIASH_PAR_GENE_SYMBOL" ] && ViashError Bad arguments for option \'--gene_symbol=*\': \'$VIASH_PAR_GENE_SYMBOL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_GENE_SYMBOL=$(ViashRemoveFlags "$1") - shift 1 - ;; - --expr_prop) - [ -n "$VIASH_PAR_EXPR_PROP" ] && ViashError Bad arguments for option \'--expr_prop\': \'$VIASH_PAR_EXPR_PROP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EXPR_PROP="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --expr_prop. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --expr_prop=*) - [ -n "$VIASH_PAR_EXPR_PROP" ] && ViashError Bad arguments for option \'--expr_prop=*\': \'$VIASH_PAR_EXPR_PROP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EXPR_PROP=$(ViashRemoveFlags "$1") - shift 1 - ;; - --min_cells) - [ -n "$VIASH_PAR_MIN_CELLS" ] && ViashError Bad arguments for option \'--min_cells\': \'$VIASH_PAR_MIN_CELLS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_CELLS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_cells. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --min_cells=*) - [ -n "$VIASH_PAR_MIN_CELLS" ] && ViashError Bad arguments for option \'--min_cells=*\': \'$VIASH_PAR_MIN_CELLS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_CELLS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --aggregate_method) - [ -n "$VIASH_PAR_AGGREGATE_METHOD" ] && ViashError Bad arguments for option \'--aggregate_method\': \'$VIASH_PAR_AGGREGATE_METHOD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_AGGREGATE_METHOD="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --aggregate_method. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --aggregate_method=*) - [ -n "$VIASH_PAR_AGGREGATE_METHOD" ] && ViashError Bad arguments for option \'--aggregate_method=*\': \'$VIASH_PAR_AGGREGATE_METHOD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_AGGREGATE_METHOD=$(ViashRemoveFlags "$1") - shift 1 - ;; - --return_all_lrs) - [ -n "$VIASH_PAR_RETURN_ALL_LRS" ] && ViashError Bad arguments for option \'--return_all_lrs\': \'$VIASH_PAR_RETURN_ALL_LRS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_RETURN_ALL_LRS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --return_all_lrs. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --return_all_lrs=*) - [ -n "$VIASH_PAR_RETURN_ALL_LRS" ] && ViashError Bad arguments for option \'--return_all_lrs=*\': \'$VIASH_PAR_RETURN_ALL_LRS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_RETURN_ALL_LRS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --n_perms) - [ -n "$VIASH_PAR_N_PERMS" ] && ViashError Bad arguments for option \'--n_perms\': \'$VIASH_PAR_N_PERMS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_N_PERMS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --n_perms. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --n_perms=*) - [ -n "$VIASH_PAR_N_PERMS" ] && ViashError Bad arguments for option \'--n_perms=*\': \'$VIASH_PAR_N_PERMS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_N_PERMS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/interpret_lianapy:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/interpret_lianapy:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/interpret_lianapy:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/interpret_lianapy:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then - VIASH_PAR_OUTPUT_COMPRESSION="gzip" -fi -if [ -z ${VIASH_PAR_MODALITY+x} ]; then - VIASH_PAR_MODALITY="rna" -fi -if [ -z ${VIASH_PAR_GROUPBY+x} ]; then - VIASH_PAR_GROUPBY="bulk_labels" -fi -if [ -z ${VIASH_PAR_RESOURCE_NAME+x} ]; then - VIASH_PAR_RESOURCE_NAME="consensus" -fi -if [ -z ${VIASH_PAR_GENE_SYMBOL+x} ]; then - VIASH_PAR_GENE_SYMBOL="gene_symbol" -fi -if [ -z ${VIASH_PAR_EXPR_PROP+x} ]; then - VIASH_PAR_EXPR_PROP="0.1" -fi -if [ -z ${VIASH_PAR_MIN_CELLS+x} ]; then - VIASH_PAR_MIN_CELLS="5" -fi -if [ -z ${VIASH_PAR_AGGREGATE_METHOD+x} ]; then - VIASH_PAR_AGGREGATE_METHOD="rra" -fi -if [ -z ${VIASH_PAR_RETURN_ALL_LRS+x} ]; then - VIASH_PAR_RETURN_ALL_LRS="false" -fi -if [ -z ${VIASH_PAR_N_PERMS+x} ]; then - VIASH_PAR_N_PERMS="100" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_EXPR_PROP" ]]; then - if ! [[ "$VIASH_PAR_EXPR_PROP" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--expr_prop' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_MIN_CELLS" ]]; then - if ! [[ "$VIASH_PAR_MIN_CELLS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--min_cells' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_RETURN_ALL_LRS" ]]; then - if ! [[ "$VIASH_PAR_RETURN_ALL_LRS" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--return_all_lrs' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_N_PERMS" ]]; then - if ! [[ "$VIASH_PAR_N_PERMS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--n_perms' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then - VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then - ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -if [ ! -z "$VIASH_PAR_RESOURCE_NAME" ]; then - VIASH_PAR_RESOURCE_NAME_CHOICES=("baccin2019:cellcall:cellchatdb:cellinker:cellphonedb:celltalkdb:connectomedb2020:consensus:embrace:guide2pharma:hpmr:icellnet:italk:kirouac2010:lrdb:mouseconsensus:ramilowski2015") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_RESOURCE_NAME_CHOICES[*]}:" =~ ":$VIASH_PAR_RESOURCE_NAME:" ]]; then - ViashError '--resource_name' specified value of \'$VIASH_PAR_RESOURCE_NAME\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -if [ ! -z "$VIASH_PAR_AGGREGATE_METHOD" ]; then - VIASH_PAR_AGGREGATE_METHOD_CHOICES=("mean:rra") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_AGGREGATE_METHOD_CHOICES[*]}:" =~ ":$VIASH_PAR_AGGREGATE_METHOD:" ]]; then - ViashError '--aggregate_method' specified value of \'$VIASH_PAR_AGGREGATE_METHOD\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/interpret_lianapy:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/interpret_lianapy:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/interpret_lianapy:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-lianapy-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -import liana -import mudata -# TODO: Remove when grouping labels exist -# For sign/PCA/ -import numpy as np - -### VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'layer': $( if [ ! -z ${VIASH_PAR_LAYER+x} ]; then echo "r'${VIASH_PAR_LAYER//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'groupby': $( if [ ! -z ${VIASH_PAR_GROUPBY+x} ]; then echo "r'${VIASH_PAR_GROUPBY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resource_name': $( if [ ! -z ${VIASH_PAR_RESOURCE_NAME+x} ]; then echo "r'${VIASH_PAR_RESOURCE_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'gene_symbol': $( if [ ! -z ${VIASH_PAR_GENE_SYMBOL+x} ]; then echo "r'${VIASH_PAR_GENE_SYMBOL//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'expr_prop': $( if [ ! -z ${VIASH_PAR_EXPR_PROP+x} ]; then echo "float(r'${VIASH_PAR_EXPR_PROP//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'min_cells': $( if [ ! -z ${VIASH_PAR_MIN_CELLS+x} ]; then echo "int(r'${VIASH_PAR_MIN_CELLS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'aggregate_method': $( if [ ! -z ${VIASH_PAR_AGGREGATE_METHOD+x} ]; then echo "r'${VIASH_PAR_AGGREGATE_METHOD//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'return_all_lrs': $( if [ ! -z ${VIASH_PAR_RETURN_ALL_LRS+x} ]; then echo "r'${VIASH_PAR_RETURN_ALL_LRS//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), - 'n_perms': $( if [ ! -z ${VIASH_PAR_N_PERMS+x} ]; then echo "int(r'${VIASH_PAR_N_PERMS//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -### VIASH END - - -def main(): - - # Get input data - mdata = mudata.read(par['input'].strip()) - mod = mdata.mod[par['modality']] - - # Add dummy grouping labels when they do not exist - if par['groupby'] not in mod.obs: - foo = mod.obsm.to_df().iloc[:, 0] - mod.obs[par['groupby']] = np.sign(foo).astype('category') - - # Solve gene labels - orig_gene_label = mod.var.index - mod.var_names = mod.var[par['gene_symbol']].astype(str) - mod.var_names_make_unique() - - liana.mt.rank_aggregate( - adata = mod, - groupby = par['groupby'], - resource_name = par["resource_name"], - expr_prop = par["expr_prop"], - min_cells = par["min_cells"], - aggregate_method = par["aggregate_method"], - return_all_lrs = par["return_all_lrs"], - layer = par["layer"], - n_perms = par["n_perms"], - verbose = True, - inplace = True, - use_raw = False - ) - - # Return original gene labels - mod.var_names = orig_gene_label - - # TODO: make sure compression is needed - mdata.write_h5mu(par['output'].strip(), compression=par['output_compression']) - -if __name__ == "__main__": - main() -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/labels_transfer/knn/.config.vsh.yaml b/target/docker/labels_transfer/knn/.config.vsh.yaml deleted file mode 100644 index b45adbd8b4e..00000000000 --- a/target/docker/labels_transfer/knn/.config.vsh.yaml +++ /dev/null @@ -1,379 +0,0 @@ -functionality: - name: "knn" - namespace: "labels_transfer" - version: "0.12.3" - authors: - - name: "Vladimir Shitov" - roles: - - "author" - info: - role: "Contributor" - links: - email: "vladimir.shitov@helmholtz-muenchen.de" - github: "vladimirshitov" - orcid: "0000-0002-1960-8812" - linkedin: "vladimir-shitov-9a659513b" - organizations: - - name: "Helmholtz Munich" - href: "https://www.helmholtz-munich.de" - role: "PhD Candidate" - argument_groups: - - name: "Input dataset (query) arguments" - arguments: - - type: "file" - name: "--input" - description: "The query data to transfer the labels to. Should be a .h5mu file." - info: - label: "Query" - file_format: - type: "h5mu" - mod: - rna: - description: "Modality in AnnData format containing RNA data." - required: true - slots: - X: - type: "double" - name: "features" - required: false - description: "The expression data to use for the classifier's inference,\ - \ if `--input_obsm_features` argument is not provided.\n" - obsm: - - type: "double" - name: "features" - example: "X_integrated_scanvi" - required: false - description: "The embedding to use for the classifier's inference.\ - \ Override using the `--input_obsm_features` argument. If not\ - \ provided, the `.X` slot will be used instead.\nMake sure that\ - \ embedding was obtained in the same way as the reference embedding\ - \ (e.g. by the same model or preprocessing).\n" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - description: "Which modality to use." - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--input_obsm_features" - description: "The `.obsm` key of the embedding to use for the classifier's inference.\ - \ If not provided, the `.X` slot will be used instead.\nMake sure that embedding\ - \ was obtained in the same way as the reference embedding (e.g. by the same\ - \ model or preprocessing).\n" - info: null - example: - - "X_integrated_scanvi" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Reference dataset arguments" - arguments: - - type: "file" - name: "--reference" - description: "The reference data to train classifiers on." - info: - label: "Reference" - file_format: - type: "h5ad" - X: - type: "double" - name: "features" - required: false - description: "The expression data to use for the classifier's training,\ - \ if `--input_obsm_features` argument is not provided.\n" - obsm: - - type: "double" - name: "features" - example: "X_integrated_scanvi" - description: "The embedding to use for the classifier's training. Override\ - \ using the `--reference_obsm_features` argument.\nMake sure that embedding\ - \ was obtained in the same way as the query embedding (e.g. by the same\ - \ model or preprocessing).\n" - required: true - obs: - - type: "string" - name: "targets" - multiple: true - example: - - "ann_level_1" - - "ann_level_2" - - "ann_level_3" - - "ann_level_4" - - "ann_level_5" - - "ann_finest_level" - description: "The target labels to transfer. Override using the `--reference_obs_targets`\ - \ argument." - required: true - example: - - "https:/zenodo.org/record/6337966/files/HLCA_emb_and_metadata.h5ad" - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--reference_obsm_features" - description: "The `.obsm` key of the embedding to use for the classifier's training.\n\ - Make sure that embedding was obtained in the same way as the query embedding\ - \ (e.g. by the same model or preprocessing).\n" - info: null - default: - - "X_integrated_scanvi" - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--reference_obs_targets" - description: "The `.obs` key of the target labels to tranfer." - info: null - default: - - "ann_level_1" - - "ann_level_2" - - "ann_level_3" - - "ann_level_4" - - "ann_level_5" - - "ann_finest_level" - required: false - direction: "input" - multiple: true - multiple_sep: "," - dest: "par" - - name: "Outputs" - arguments: - - type: "file" - name: "--output" - description: "The query data in .h5mu format with predicted labels transfered\ - \ from the reference." - info: - label: "Output data" - file_format: - type: "h5mu" - mod: - rna: - description: "Modality in AnnData format containing RNA data." - required: true - obs: - - type: "string" - name: "predictions" - description: "The predicted labels. Override using the `--output_obs_predictions`\ - \ argument." - required: true - - type: "double" - name: "uncertainty" - description: "The uncertainty of the predicted labels. Override using\ - \ the `--output_obs_uncertainty` argument." - required: false - obsm: - - type: "double" - name: "X_integrated_scanvi" - description: "The embedding used for the classifier's inference. Could\ - \ have any name, specified by `input_obsm_features` argument.\"" - required: false - uns: - - type: "string" - name: "parameters" - example: "labels_tranfer" - description: "Additional information about the parameters used for\ - \ the label transfer." - required: true - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_obs_predictions" - description: "In which `.obs` slots to store the predicted information.\nIf\ - \ provided, must have the same length as `--reference_obs_targets`.\nIf empty,\ - \ will default to the `reference_obs_targets` combined with the `\"_pred\"\ - ` suffix.\n" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_obs_uncertainty" - description: "In which `.obs` slots to store the uncertainty of the predictions.\n\ - If provided, must have the same length as `--reference_obs_targets`.\nIf empty,\ - \ will default to the `reference_obs_targets` combined with the `\"_uncertainty\"\ - ` suffix.\n" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_uns_parameters" - description: "The `.uns` key to store additional information about the parameters\ - \ used for the label transfer." - info: null - default: - - "labels_transfer" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Learning parameters" - arguments: - - type: "integer" - name: "--n_neighbors" - alternatives: - - "-k" - description: "Number of nearest neighbors to use for classification" - info: null - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "../utils/helper.py" - - type: "file" - path: "../../utils/setup_logger.py" - description: "Performs label transfer from reference to query using KNN classifier" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/annotation_test_data/" - - type: "file" - path: "resources_test/pbmc_1k_protein_v3/" - info: - method_id: "KNN_pynndescent" - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - - "git" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - upgrade: true - - type: "apt" - packages: - - "libopenblas-dev" - - "liblapack-dev" - - "gfortran" - interactive: false - - type: "python" - user: false - packages: - - "scanpy~=1.9.5" - - "pynndescent~=0.5.8" - - "numba~=0.56.4" - - "numpy~=1.23.5" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highmem" - - "highcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -- type: "native" - id: "native" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/labels_transfer/knn/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/labels_transfer/knn" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/labels_transfer/knn/knn" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/labels_transfer/knn/helper.py b/target/docker/labels_transfer/knn/helper.py deleted file mode 100644 index a90bf59efdb..00000000000 --- a/target/docker/labels_transfer/knn/helper.py +++ /dev/null @@ -1,32 +0,0 @@ -def check_arguments(par): - # check output .obs predictions - if not par["output_obs_predictions"]: - par["output_obs_predictions"] = [ t + "_pred" for t in par["reference_obs_targets"]] - assert len(par["output_obs_predictions"]) == len(par["reference_obs_targets"]), f"Number of output_obs_predictions must match number of reference_obs_targets\npar: {par}" - - # check output .obs uncertainty - if not par["output_obs_uncertainty"]: - par["output_obs_uncertainty"] = [ t + "_uncertainty" for t in par["reference_obs_targets"]] - assert len(par["output_obs_uncertainty"]) == len(par["reference_obs_targets"]), f"Number of output_obs_uncertainty must match number of reference_obs_targets\npar: {par}" - - return par - -def get_reference_features(adata_reference, par, logger): - if par["reference_obsm_features"] is None: - logger.info("Using .X of reference data") - train_data = adata_reference.X - else: - logger.info(f"Using .obsm[{par['reference_obsm_features']}] of reference data") - train_data = adata_reference.obsm[par["reference_obsm_features"]] - - return train_data - -def get_query_features(adata, par, logger): - if par["input_obsm_features"] is None: - logger.info("Using .X of query data") - query_data = adata.X - else: - logger.info(f"Using .obsm[{par['input_obsm_features']}] of query data") - query_data = adata.obsm[par["input_obsm_features"]] - - return query_data \ No newline at end of file diff --git a/target/docker/labels_transfer/knn/knn b/target/docker/labels_transfer/knn/knn deleted file mode 100755 index 32eb89ffe31..00000000000 --- a/target/docker/labels_transfer/knn/knn +++ /dev/null @@ -1,1258 +0,0 @@ -#!/usr/bin/env bash - -# knn 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Vladimir Shitov (author) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="knn" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "knn 0.12.3" - echo "" - echo "Performs label transfer from reference to query using KNN classifier" - echo "" - echo "Input dataset (query) arguments:" - echo " --input" - echo " type: file, required parameter, file must exist" - echo " The query data to transfer the labels to. Should be a .h5mu file." - echo "" - echo " --modality" - echo " type: string" - echo " default: rna" - echo " Which modality to use." - echo "" - echo " --input_obsm_features" - echo " type: string" - echo " example: X_integrated_scanvi" - echo " The \`.obsm\` key of the embedding to use for the classifier's inference." - echo " If not provided, the \`.X\` slot will be used instead." - echo " Make sure that embedding was obtained in the same way as the reference" - echo " embedding (e.g. by the same model or preprocessing)." - echo "" - echo "Reference dataset arguments:" - echo " --reference" - echo " type: file, file must exist" - echo " example:" - echo "https:/zenodo.org/record/6337966/files/HLCA_emb_and_metadata.h5ad" - echo " The reference data to train classifiers on." - echo "" - echo " --reference_obsm_features" - echo " type: string, required parameter" - echo " default: X_integrated_scanvi" - echo " The \`.obsm\` key of the embedding to use for the classifier's training." - echo " Make sure that embedding was obtained in the same way as the query" - echo " embedding (e.g. by the same model or preprocessing)." - echo "" - echo " --reference_obs_targets" - echo " type: string, multiple values allowed" - echo " default:" - echo "ann_level_1,ann_level_2,ann_level_3,ann_level_4,ann_level_5,ann_finest_level" - echo " The \`.obs\` key of the target labels to tranfer." - echo "" - echo "Outputs:" - echo " --output" - echo " type: file, required parameter, output, file must exist" - echo " The query data in .h5mu format with predicted labels transfered from the" - echo " reference." - echo "" - echo " --output_obs_predictions" - echo " type: string, multiple values allowed" - echo " In which \`.obs\` slots to store the predicted information." - echo " If provided, must have the same length as \`--reference_obs_targets\`." - echo " If empty, will default to the \`reference_obs_targets\` combined with the" - echo " \`\"_pred\"\` suffix." - echo "" - echo " --output_obs_uncertainty" - echo " type: string, multiple values allowed" - echo " In which \`.obs\` slots to store the uncertainty of the predictions." - echo " If provided, must have the same length as \`--reference_obs_targets\`." - echo " If empty, will default to the \`reference_obs_targets\` combined with the" - echo " \`\"_uncertainty\"\` suffix." - echo "" - echo " --output_uns_parameters" - echo " type: string" - echo " default: labels_transfer" - echo " The \`.uns\` key to store additional information about the parameters used" - echo " for the label transfer." - echo "" - echo "Learning parameters:" - echo " -k, --n_neighbors" - echo " type: integer, required parameter" - echo " Number of nearest neighbors to use for classification" -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM python:3.10-slim - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y procps git && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y libopenblas-dev liblapack-dev gfortran && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "scanpy~=1.9.5" "pynndescent~=0.5.8" "numba~=0.56.4" "numpy~=1.23.5" - -LABEL org.opencontainers.image.authors="Vladimir Shitov" -LABEL org.opencontainers.image.description="Companion container for running component labels_transfer knn" -LABEL org.opencontainers.image.created="2024-01-25T10:13:56Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-knn-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "knn 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --modality) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality=*) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --input_obsm_features) - [ -n "$VIASH_PAR_INPUT_OBSM_FEATURES" ] && ViashError Bad arguments for option \'--input_obsm_features\': \'$VIASH_PAR_INPUT_OBSM_FEATURES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT_OBSM_FEATURES="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_obsm_features. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input_obsm_features=*) - [ -n "$VIASH_PAR_INPUT_OBSM_FEATURES" ] && ViashError Bad arguments for option \'--input_obsm_features=*\': \'$VIASH_PAR_INPUT_OBSM_FEATURES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT_OBSM_FEATURES=$(ViashRemoveFlags "$1") - shift 1 - ;; - --reference) - [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reference=*) - [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference=*\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --reference_obsm_features) - [ -n "$VIASH_PAR_REFERENCE_OBSM_FEATURES" ] && ViashError Bad arguments for option \'--reference_obsm_features\': \'$VIASH_PAR_REFERENCE_OBSM_FEATURES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE_OBSM_FEATURES="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference_obsm_features. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reference_obsm_features=*) - [ -n "$VIASH_PAR_REFERENCE_OBSM_FEATURES" ] && ViashError Bad arguments for option \'--reference_obsm_features=*\': \'$VIASH_PAR_REFERENCE_OBSM_FEATURES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE_OBSM_FEATURES=$(ViashRemoveFlags "$1") - shift 1 - ;; - --reference_obs_targets) - if [ -z "$VIASH_PAR_REFERENCE_OBS_TARGETS" ]; then - VIASH_PAR_REFERENCE_OBS_TARGETS="$2" - else - VIASH_PAR_REFERENCE_OBS_TARGETS="$VIASH_PAR_REFERENCE_OBS_TARGETS,""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference_obs_targets. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reference_obs_targets=*) - if [ -z "$VIASH_PAR_REFERENCE_OBS_TARGETS" ]; then - VIASH_PAR_REFERENCE_OBS_TARGETS=$(ViashRemoveFlags "$1") - else - VIASH_PAR_REFERENCE_OBS_TARGETS="$VIASH_PAR_REFERENCE_OBS_TARGETS,"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output_obs_predictions) - if [ -z "$VIASH_PAR_OUTPUT_OBS_PREDICTIONS" ]; then - VIASH_PAR_OUTPUT_OBS_PREDICTIONS="$2" - else - VIASH_PAR_OUTPUT_OBS_PREDICTIONS="$VIASH_PAR_OUTPUT_OBS_PREDICTIONS:""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_obs_predictions. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_obs_predictions=*) - if [ -z "$VIASH_PAR_OUTPUT_OBS_PREDICTIONS" ]; then - VIASH_PAR_OUTPUT_OBS_PREDICTIONS=$(ViashRemoveFlags "$1") - else - VIASH_PAR_OUTPUT_OBS_PREDICTIONS="$VIASH_PAR_OUTPUT_OBS_PREDICTIONS:"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --output_obs_uncertainty) - if [ -z "$VIASH_PAR_OUTPUT_OBS_UNCERTAINTY" ]; then - VIASH_PAR_OUTPUT_OBS_UNCERTAINTY="$2" - else - VIASH_PAR_OUTPUT_OBS_UNCERTAINTY="$VIASH_PAR_OUTPUT_OBS_UNCERTAINTY:""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_obs_uncertainty. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_obs_uncertainty=*) - if [ -z "$VIASH_PAR_OUTPUT_OBS_UNCERTAINTY" ]; then - VIASH_PAR_OUTPUT_OBS_UNCERTAINTY=$(ViashRemoveFlags "$1") - else - VIASH_PAR_OUTPUT_OBS_UNCERTAINTY="$VIASH_PAR_OUTPUT_OBS_UNCERTAINTY:"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --output_uns_parameters) - [ -n "$VIASH_PAR_OUTPUT_UNS_PARAMETERS" ] && ViashError Bad arguments for option \'--output_uns_parameters\': \'$VIASH_PAR_OUTPUT_UNS_PARAMETERS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_UNS_PARAMETERS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_uns_parameters. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_uns_parameters=*) - [ -n "$VIASH_PAR_OUTPUT_UNS_PARAMETERS" ] && ViashError Bad arguments for option \'--output_uns_parameters=*\': \'$VIASH_PAR_OUTPUT_UNS_PARAMETERS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_UNS_PARAMETERS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --n_neighbors) - [ -n "$VIASH_PAR_N_NEIGHBORS" ] && ViashError Bad arguments for option \'--n_neighbors\': \'$VIASH_PAR_N_NEIGHBORS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_N_NEIGHBORS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --n_neighbors. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --n_neighbors=*) - [ -n "$VIASH_PAR_N_NEIGHBORS" ] && ViashError Bad arguments for option \'--n_neighbors=*\': \'$VIASH_PAR_N_NEIGHBORS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_N_NEIGHBORS=$(ViashRemoveFlags "$1") - shift 1 - ;; - -k) - [ -n "$VIASH_PAR_N_NEIGHBORS" ] && ViashError Bad arguments for option \'-k\': \'$VIASH_PAR_N_NEIGHBORS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_N_NEIGHBORS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -k. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/labels_transfer_knn:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/labels_transfer_knn:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/labels_transfer_knn:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/labels_transfer_knn:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_REFERENCE_OBSM_FEATURES+x} ]; then - ViashError '--reference_obsm_features' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_N_NEIGHBORS+x} ]; then - ViashError '--n_neighbors' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_MODALITY+x} ]; then - VIASH_PAR_MODALITY="rna" -fi -if [ -z ${VIASH_PAR_REFERENCE_OBS_TARGETS+x} ]; then - VIASH_PAR_REFERENCE_OBS_TARGETS="ann_level_1,ann_level_2,ann_level_3,ann_level_4,ann_level_5,ann_finest_level" -fi -if [ -z ${VIASH_PAR_OUTPUT_UNS_PARAMETERS+x} ]; then - VIASH_PAR_OUTPUT_UNS_PARAMETERS="labels_transfer" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_REFERENCE" ] && [ ! -e "$VIASH_PAR_REFERENCE" ]; then - ViashError "Input file '$VIASH_PAR_REFERENCE' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_N_NEIGHBORS" ]]; then - if ! [[ "$VIASH_PAR_N_NEIGHBORS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--n_neighbors' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_REFERENCE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_REFERENCE")" ) - VIASH_PAR_REFERENCE=$(ViashAutodetectMount "$VIASH_PAR_REFERENCE") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/labels_transfer_knn:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/labels_transfer_knn:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/labels_transfer_knn:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-knn-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -import sys -import warnings - -import mudata -import numpy as np -import scanpy as sc -from scipy.sparse import issparse -import pynndescent -import numba - - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'input_obsm_features': $( if [ ! -z ${VIASH_PAR_INPUT_OBSM_FEATURES+x} ]; then echo "r'${VIASH_PAR_INPUT_OBSM_FEATURES//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'reference': $( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "r'${VIASH_PAR_REFERENCE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'reference_obsm_features': $( if [ ! -z ${VIASH_PAR_REFERENCE_OBSM_FEATURES+x} ]; then echo "r'${VIASH_PAR_REFERENCE_OBSM_FEATURES//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'reference_obs_targets': $( if [ ! -z ${VIASH_PAR_REFERENCE_OBS_TARGETS+x} ]; then echo "r'${VIASH_PAR_REFERENCE_OBS_TARGETS//\'/\'\"\'\"r\'}'.split(',')"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_obs_predictions': $( if [ ! -z ${VIASH_PAR_OUTPUT_OBS_PREDICTIONS+x} ]; then echo "r'${VIASH_PAR_OUTPUT_OBS_PREDICTIONS//\'/\'\"\'\"r\'}'.split(':')"; else echo None; fi ), - 'output_obs_uncertainty': $( if [ ! -z ${VIASH_PAR_OUTPUT_OBS_UNCERTAINTY+x} ]; then echo "r'${VIASH_PAR_OUTPUT_OBS_UNCERTAINTY//\'/\'\"\'\"r\'}'.split(':')"; else echo None; fi ), - 'output_uns_parameters': $( if [ ! -z ${VIASH_PAR_OUTPUT_UNS_PARAMETERS+x} ]; then echo "r'${VIASH_PAR_OUTPUT_UNS_PARAMETERS//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'n_neighbors': $( if [ ! -z ${VIASH_PAR_N_NEIGHBORS+x} ]; then echo "int(r'${VIASH_PAR_N_NEIGHBORS//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -## VIASH END - -sys.path.append(meta["resources_dir"]) -from helper import check_arguments, get_reference_features, get_query_features -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger - -@numba.njit -def weighted_prediction(weights, ref_cats): - """Get highest weight category.""" - N = len(weights) - predictions = np.zeros((N,), dtype=ref_cats.dtype) - uncertainty = np.zeros((N,)) - for i in range(N): - obs_weights = weights[i] - obs_cats = ref_cats[i] - best_prob = 0 - for c in np.unique(obs_cats): - cand_prob = np.sum(obs_weights[obs_cats == c]) - if cand_prob > best_prob: - best_prob = cand_prob - predictions[i] = c - uncertainty[i] = max(1 - best_prob, 0) - - return predictions, uncertainty - -def distances_to_affinities(distances): - stds = np.std(distances, axis=1) - stds = (2.0 / stds) ** 2 - stds = stds.reshape(-1, 1) - distances_tilda = np.exp(-np.true_divide(distances, stds)) - - return distances_tilda / np.sum(distances_tilda, axis=1, keepdims=True) - -def main(par): - logger = setup_logger() - - logger.info("Checking arguments") - par = check_arguments(par) - - logger.info("Reading input (query) data") - mdata = mudata.read(par["input"]) - adata = mdata.mod[par["modality"]] - - logger.info("Reading reference data") - adata_reference = sc.read(par["reference"], backup_url=par["reference"]) - - # fetch feature data - train_data = get_reference_features(adata_reference, par, logger) - query_data = get_query_features(adata, par, logger) - - # pynndescent does not support sparse matrices - if issparse(train_data): - warnings.warn("Converting sparse matrix to dense. This may consume a lot of memory.") - train_data = train_data.toarray() - - logger.debug(f"Shape of train data: {train_data.shape}") - - logger.info("Building NN index") - ref_nn_index = pynndescent.NNDescent(train_data, n_neighbors=par["n_neighbors"]) - ref_nn_index.prepare() - - ref_neighbors, ref_distances = ref_nn_index.query(query_data, k=par["n_neighbors"]) - - weights = distances_to_affinities(ref_distances) - - output_uns_parameters = adata.uns.get(par["output_uns_parameters"], {}) - - # for each annotation level, get prediction and uncertainty - - for obs_tar, obs_pred, obs_unc in zip(par["reference_obs_targets"], par["output_obs_predictions"], par["output_obs_uncertainty"]): - logger.info(f"Predicting labels for {obs_tar}") - ref_cats = adata_reference.obs[obs_tar].cat.codes.to_numpy()[ref_neighbors] - prediction, uncertainty = weighted_prediction(weights, ref_cats) - prediction = np.asarray(adata_reference.obs[obs_tar].cat.categories)[prediction] - - adata.obs[obs_pred], adata.obs[obs_unc] = prediction, uncertainty - - # Write information about labels transfer to uns - output_uns_parameters[obs_tar] = { - "method": "KNN_pynndescent", - "n_neighbors": par["n_neighbors"], - "reference": par["reference"] - } - - adata.uns[par["output_uns_parameters"]] = output_uns_parameters - - mdata.mod[par['modality']] = adata - mdata.update() - mdata.write_h5mu(par['output'].strip()) - -if __name__ == "__main__": - main(par) -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_REFERENCE" ]; then - VIASH_PAR_REFERENCE=$(ViashStripAutomount "$VIASH_PAR_REFERENCE") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/labels_transfer/knn/setup_logger.py b/target/docker/labels_transfer/knn/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/docker/labels_transfer/knn/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/docker/labels_transfer/xgboost/.config.vsh.yaml b/target/docker/labels_transfer/xgboost/.config.vsh.yaml deleted file mode 100644 index 1ff97a896f4..00000000000 --- a/target/docker/labels_transfer/xgboost/.config.vsh.yaml +++ /dev/null @@ -1,594 +0,0 @@ -functionality: - name: "xgboost" - namespace: "labels_transfer" - version: "0.12.3" - authors: - - name: "Vladimir Shitov" - roles: - - "author" - info: - role: "Contributor" - links: - email: "vladimir.shitov@helmholtz-muenchen.de" - github: "vladimirshitov" - orcid: "0000-0002-1960-8812" - linkedin: "vladimir-shitov-9a659513b" - organizations: - - name: "Helmholtz Munich" - href: "https://www.helmholtz-munich.de" - role: "PhD Candidate" - argument_groups: - - name: "Input dataset (query) arguments" - arguments: - - type: "file" - name: "--input" - description: "The query data to transfer the labels to. Should be a .h5mu file." - info: - label: "Query" - file_format: - type: "h5mu" - mod: - rna: - description: "Modality in AnnData format containing RNA data." - required: true - slots: - X: - type: "double" - name: "features" - required: false - description: "The expression data to use for the classifier's inference,\ - \ if `--input_obsm_features` argument is not provided.\n" - obsm: - - type: "double" - name: "features" - example: "X_integrated_scanvi" - required: false - description: "The embedding to use for the classifier's inference.\ - \ Override using the `--input_obsm_features` argument. If not\ - \ provided, the `.X` slot will be used instead.\nMake sure that\ - \ embedding was obtained in the same way as the reference embedding\ - \ (e.g. by the same model or preprocessing).\n" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - description: "Which modality to use." - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--input_obsm_features" - description: "The `.obsm` key of the embedding to use for the classifier's inference.\ - \ If not provided, the `.X` slot will be used instead.\nMake sure that embedding\ - \ was obtained in the same way as the reference embedding (e.g. by the same\ - \ model or preprocessing).\n" - info: null - example: - - "X_integrated_scanvi" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Reference dataset arguments" - arguments: - - type: "file" - name: "--reference" - description: "The reference data to train classifiers on." - info: - label: "Reference" - file_format: - type: "h5ad" - X: - type: "double" - name: "features" - required: false - description: "The expression data to use for the classifier's training,\ - \ if `--input_obsm_features` argument is not provided.\n" - obsm: - - type: "double" - name: "features" - example: "X_integrated_scanvi" - description: "The embedding to use for the classifier's training. Override\ - \ using the `--reference_obsm_features` argument.\nMake sure that embedding\ - \ was obtained in the same way as the query embedding (e.g. by the same\ - \ model or preprocessing).\n" - required: true - obs: - - type: "string" - name: "targets" - multiple: true - example: - - "ann_level_1" - - "ann_level_2" - - "ann_level_3" - - "ann_level_4" - - "ann_level_5" - - "ann_finest_level" - description: "The target labels to transfer. Override using the `--reference_obs_targets`\ - \ argument." - required: true - example: - - "https:/zenodo.org/record/6337966/files/HLCA_emb_and_metadata.h5ad" - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--reference_obsm_features" - description: "The `.obsm` key of the embedding to use for the classifier's training.\n\ - Make sure that embedding was obtained in the same way as the query embedding\ - \ (e.g. by the same model or preprocessing).\n" - info: null - default: - - "X_integrated_scanvi" - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--reference_obs_targets" - description: "The `.obs` key of the target labels to tranfer." - info: null - default: - - "ann_level_1" - - "ann_level_2" - - "ann_level_3" - - "ann_level_4" - - "ann_level_5" - - "ann_finest_level" - required: false - direction: "input" - multiple: true - multiple_sep: "," - dest: "par" - - name: "Outputs" - arguments: - - type: "file" - name: "--output" - description: "The query data in .h5mu format with predicted labels transfered\ - \ from the reference." - info: - label: "Output data" - file_format: - type: "h5mu" - mod: - rna: - description: "Modality in AnnData format containing RNA data." - required: true - obs: - - type: "string" - name: "predictions" - description: "The predicted labels. Override using the `--output_obs_predictions`\ - \ argument." - required: true - - type: "double" - name: "uncertainty" - description: "The uncertainty of the predicted labels. Override using\ - \ the `--output_obs_uncertainty` argument." - required: false - obsm: - - type: "double" - name: "X_integrated_scanvi" - description: "The embedding used for the classifier's inference. Could\ - \ have any name, specified by `input_obsm_features` argument.\"" - required: false - uns: - - type: "string" - name: "parameters" - example: "labels_tranfer" - description: "Additional information about the parameters used for\ - \ the label transfer." - required: true - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_obs_predictions" - description: "In which `.obs` slots to store the predicted information.\nIf\ - \ provided, must have the same length as `--reference_obs_targets`.\nIf empty,\ - \ will default to the `reference_obs_targets` combined with the `\"_pred\"\ - ` suffix.\n" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_obs_uncertainty" - description: "In which `.obs` slots to store the uncertainty of the predictions.\n\ - If provided, must have the same length as `--reference_obs_targets`.\nIf empty,\ - \ will default to the `reference_obs_targets` combined with the `\"_uncertainty\"\ - ` suffix.\n" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_uns_parameters" - description: "The `.uns` key to store additional information about the parameters\ - \ used for the label transfer." - info: null - default: - - "labels_transfer" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Execution arguments" - arguments: - - type: "boolean_true" - name: "--force_retrain" - alternatives: - - "-f" - description: "Retrain models on the reference even if model_output directory\ - \ already has trained classifiers. WARNING! It will rewrite existing classifiers\ - \ for targets in the model_output directory!" - info: null - direction: "input" - dest: "par" - - type: "boolean" - name: "--use_gpu" - description: "Use GPU during models training and inference (recommended)." - info: null - default: - - false - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--verbosity" - alternatives: - - "-v" - description: "The verbosity level for evaluation of the classifier from the\ - \ range [0,2]" - info: null - default: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--model_output" - description: "Output directory for model" - info: null - default: - - "model" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Learning parameters" - arguments: - - type: "double" - name: "--learning_rate" - alternatives: - - "--eta" - description: "Step size shrinkage used in update to prevents overfitting. Range:\ - \ [0,1]. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ - \ for the reference" - info: null - default: - - 0.3 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--min_split_loss" - alternatives: - - "--gamma" - description: "Minimum loss reduction required to make a further partition on\ - \ a leaf node of the tree. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ - \ for the reference" - info: null - default: - - 0.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--max_depth" - alternatives: - - "-d" - description: "Maximum depth of a tree. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ - \ for the reference" - info: null - default: - - 6 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--min_child_weight" - description: "Minimum sum of instance weight (hessian) needed in a child. See\ - \ https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ - \ for the reference" - info: null - default: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--max_delta_step" - description: "Maximum delta step we allow each leaf output to be. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ - \ for the reference" - info: null - default: - - 0.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--subsample" - description: "Subsample ratio of the training instances. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ - \ for the reference" - info: null - default: - - 1.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--sampling_method" - description: "The method to use to sample the training instances. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ - \ for the reference" - info: null - default: - - "uniform" - required: false - choices: - - "uniform" - - "gradient_based" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--colsample_bytree" - description: "Fraction of columns to be subsampled. Range (0, 1]. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ - \ for the reference" - info: null - default: - - 1.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--colsample_bylevel" - description: "Subsample ratio of columns for each level. Range (0, 1]. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ - \ for the reference" - info: null - default: - - 1.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--colsample_bynode" - description: "Subsample ratio of columns for each node (split). Range (0, 1].\ - \ See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ - \ for the reference" - info: null - default: - - 1.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--reg_lambda" - alternatives: - - "--lambda" - description: "L2 regularization term on weights. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ - \ for the reference" - info: null - default: - - 1.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--reg_alpha" - alternatives: - - "--alpha" - description: "L1 regularization term on weights. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ - \ for the reference" - info: null - default: - - 0.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--scale_pos_weight" - description: "Control the balance of positive and negative weights, useful for\ - \ unbalanced classes. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ - \ for the reference" - info: null - default: - - 1.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "../utils/helper.py" - - type: "file" - path: "src/utils/setup_logger.py" - description: "Performs label transfer from reference to query using XGBoost classifier" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/annotation_test_data/" - - type: "file" - path: "resources_test/pbmc_1k_protein_v3/" - info: - method_id: "XGBClassifier" - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - - "git" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - upgrade: true - - type: "apt" - packages: - - "libopenblas-dev" - - "liblapack-dev" - - "gfortran" - interactive: false - - type: "python" - user: false - packages: - - "scanpy~=1.9.5" - - "xgboost~=1.7.1" - - "scikit-learn~=1.1.1" - - "numpy~=1.23.5" - - "pandas~=1.4.4" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highmem" - - "highcpu" - - "gpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -- type: "native" - id: "native" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/labels_transfer/xgboost/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/labels_transfer/xgboost" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/labels_transfer/xgboost/xgboost" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/labels_transfer/xgboost/helper.py b/target/docker/labels_transfer/xgboost/helper.py deleted file mode 100644 index a90bf59efdb..00000000000 --- a/target/docker/labels_transfer/xgboost/helper.py +++ /dev/null @@ -1,32 +0,0 @@ -def check_arguments(par): - # check output .obs predictions - if not par["output_obs_predictions"]: - par["output_obs_predictions"] = [ t + "_pred" for t in par["reference_obs_targets"]] - assert len(par["output_obs_predictions"]) == len(par["reference_obs_targets"]), f"Number of output_obs_predictions must match number of reference_obs_targets\npar: {par}" - - # check output .obs uncertainty - if not par["output_obs_uncertainty"]: - par["output_obs_uncertainty"] = [ t + "_uncertainty" for t in par["reference_obs_targets"]] - assert len(par["output_obs_uncertainty"]) == len(par["reference_obs_targets"]), f"Number of output_obs_uncertainty must match number of reference_obs_targets\npar: {par}" - - return par - -def get_reference_features(adata_reference, par, logger): - if par["reference_obsm_features"] is None: - logger.info("Using .X of reference data") - train_data = adata_reference.X - else: - logger.info(f"Using .obsm[{par['reference_obsm_features']}] of reference data") - train_data = adata_reference.obsm[par["reference_obsm_features"]] - - return train_data - -def get_query_features(adata, par, logger): - if par["input_obsm_features"] is None: - logger.info("Using .X of query data") - query_data = adata.X - else: - logger.info(f"Using .obsm[{par['input_obsm_features']}] of query data") - query_data = adata.obsm[par["input_obsm_features"]] - - return query_data \ No newline at end of file diff --git a/target/docker/labels_transfer/xgboost/setup_logger.py b/target/docker/labels_transfer/xgboost/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/docker/labels_transfer/xgboost/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/docker/labels_transfer/xgboost/xgboost b/target/docker/labels_transfer/xgboost/xgboost deleted file mode 100755 index acbe5e5cfc8..00000000000 --- a/target/docker/labels_transfer/xgboost/xgboost +++ /dev/null @@ -1,2013 +0,0 @@ -#!/usr/bin/env bash - -# xgboost 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Vladimir Shitov (author) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="xgboost" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "xgboost 0.12.3" - echo "" - echo "Performs label transfer from reference to query using XGBoost classifier" - echo "" - echo "Input dataset (query) arguments:" - echo " --input" - echo " type: file, required parameter, file must exist" - echo " The query data to transfer the labels to. Should be a .h5mu file." - echo "" - echo " --modality" - echo " type: string" - echo " default: rna" - echo " Which modality to use." - echo "" - echo " --input_obsm_features" - echo " type: string" - echo " example: X_integrated_scanvi" - echo " The \`.obsm\` key of the embedding to use for the classifier's inference." - echo " If not provided, the \`.X\` slot will be used instead." - echo " Make sure that embedding was obtained in the same way as the reference" - echo " embedding (e.g. by the same model or preprocessing)." - echo "" - echo "Reference dataset arguments:" - echo " --reference" - echo " type: file, file must exist" - echo " example:" - echo "https:/zenodo.org/record/6337966/files/HLCA_emb_and_metadata.h5ad" - echo " The reference data to train classifiers on." - echo "" - echo " --reference_obsm_features" - echo " type: string, required parameter" - echo " default: X_integrated_scanvi" - echo " The \`.obsm\` key of the embedding to use for the classifier's training." - echo " Make sure that embedding was obtained in the same way as the query" - echo " embedding (e.g. by the same model or preprocessing)." - echo "" - echo " --reference_obs_targets" - echo " type: string, multiple values allowed" - echo " default:" - echo "ann_level_1,ann_level_2,ann_level_3,ann_level_4,ann_level_5,ann_finest_level" - echo " The \`.obs\` key of the target labels to tranfer." - echo "" - echo "Outputs:" - echo " --output" - echo " type: file, required parameter, output, file must exist" - echo " The query data in .h5mu format with predicted labels transfered from the" - echo " reference." - echo "" - echo " --output_obs_predictions" - echo " type: string, multiple values allowed" - echo " In which \`.obs\` slots to store the predicted information." - echo " If provided, must have the same length as \`--reference_obs_targets\`." - echo " If empty, will default to the \`reference_obs_targets\` combined with the" - echo " \`\"_pred\"\` suffix." - echo "" - echo " --output_obs_uncertainty" - echo " type: string, multiple values allowed" - echo " In which \`.obs\` slots to store the uncertainty of the predictions." - echo " If provided, must have the same length as \`--reference_obs_targets\`." - echo " If empty, will default to the \`reference_obs_targets\` combined with the" - echo " \`\"_uncertainty\"\` suffix." - echo "" - echo " --output_uns_parameters" - echo " type: string" - echo " default: labels_transfer" - echo " The \`.uns\` key to store additional information about the parameters used" - echo " for the label transfer." - echo "" - echo "Execution arguments:" - echo " -f, --force_retrain" - echo " type: boolean_true" - echo " Retrain models on the reference even if model_output directory already" - echo " has trained classifiers. WARNING! It will rewrite existing classifiers" - echo " for targets in the model_output directory!" - echo "" - echo " --use_gpu" - echo " type: boolean" - echo " default: false" - echo " Use GPU during models training and inference (recommended)." - echo "" - echo " -v, --verbosity" - echo " type: integer" - echo " default: 1" - echo " The verbosity level for evaluation of the classifier from the range" - echo " [0,2]" - echo "" - echo " --model_output" - echo " type: file, output, file must exist" - echo " default: model" - echo " Output directory for model" - echo "" - echo "Learning parameters:" - echo " --eta, --learning_rate" - echo " type: double" - echo " default: 0.3" - echo " Step size shrinkage used in update to prevents overfitting. Range:" - echo " [0,1]. See" - echo " " - echo "https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster" - echo " for the reference" - echo "" - echo " --gamma, --min_split_loss" - echo " type: double" - echo " default: 0.0" - echo " Minimum loss reduction required to make a further partition on a leaf" - echo " node of the tree. See" - echo " " - echo "https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster" - echo " for the reference" - echo "" - echo " -d, --max_depth" - echo " type: integer" - echo " default: 6" - echo " Maximum depth of a tree. See" - echo " " - echo "https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster" - echo " for the reference" - echo "" - echo " --min_child_weight" - echo " type: integer" - echo " default: 1" - echo " Minimum sum of instance weight (hessian) needed in a child. See" - echo " " - echo "https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster" - echo " for the reference" - echo "" - echo " --max_delta_step" - echo " type: double" - echo " default: 0.0" - echo " Maximum delta step we allow each leaf output to be. See" - echo " " - echo "https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster" - echo " for the reference" - echo "" - echo " --subsample" - echo " type: double" - echo " default: 1.0" - echo " Subsample ratio of the training instances. See" - echo " " - echo "https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster" - echo " for the reference" - echo "" - echo " --sampling_method" - echo " type: string" - echo " default: uniform" - echo " choices: [ uniform, gradient_based ]" - echo " The method to use to sample the training instances. See" - echo " " - echo "https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster" - echo " for the reference" - echo "" - echo " --colsample_bytree" - echo " type: double" - echo " default: 1.0" - echo " Fraction of columns to be subsampled. Range (0, 1]. See" - echo " " - echo "https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster" - echo " for the reference" - echo "" - echo " --colsample_bylevel" - echo " type: double" - echo " default: 1.0" - echo " Subsample ratio of columns for each level. Range (0, 1]. See" - echo " " - echo "https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster" - echo " for the reference" - echo "" - echo " --colsample_bynode" - echo " type: double" - echo " default: 1.0" - echo " Subsample ratio of columns for each node (split). Range (0, 1]. See" - echo " " - echo "https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster" - echo " for the reference" - echo "" - echo " --lambda, --reg_lambda" - echo " type: double" - echo " default: 1.0" - echo " L2 regularization term on weights. See" - echo " " - echo "https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster" - echo " for the reference" - echo "" - echo " --alpha, --reg_alpha" - echo " type: double" - echo " default: 0.0" - echo " L1 regularization term on weights. See" - echo " " - echo "https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster" - echo " for the reference" - echo "" - echo " --scale_pos_weight" - echo " type: double" - echo " default: 1.0" - echo " Control the balance of positive and negative weights, useful for" - echo " unbalanced classes. See" - echo " " - echo "https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster" - echo " for the reference" -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM python:3.10-slim - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y procps git && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y libopenblas-dev liblapack-dev gfortran && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "scanpy~=1.9.5" "xgboost~=1.7.1" "scikit-learn~=1.1.1" "numpy~=1.23.5" "pandas~=1.4.4" - -LABEL org.opencontainers.image.authors="Vladimir Shitov" -LABEL org.opencontainers.image.description="Companion container for running component labels_transfer xgboost" -LABEL org.opencontainers.image.created="2024-01-25T10:13:56Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-xgboost-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "xgboost 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --modality) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality=*) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --input_obsm_features) - [ -n "$VIASH_PAR_INPUT_OBSM_FEATURES" ] && ViashError Bad arguments for option \'--input_obsm_features\': \'$VIASH_PAR_INPUT_OBSM_FEATURES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT_OBSM_FEATURES="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_obsm_features. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input_obsm_features=*) - [ -n "$VIASH_PAR_INPUT_OBSM_FEATURES" ] && ViashError Bad arguments for option \'--input_obsm_features=*\': \'$VIASH_PAR_INPUT_OBSM_FEATURES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT_OBSM_FEATURES=$(ViashRemoveFlags "$1") - shift 1 - ;; - --reference) - [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reference=*) - [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference=*\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --reference_obsm_features) - [ -n "$VIASH_PAR_REFERENCE_OBSM_FEATURES" ] && ViashError Bad arguments for option \'--reference_obsm_features\': \'$VIASH_PAR_REFERENCE_OBSM_FEATURES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE_OBSM_FEATURES="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference_obsm_features. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reference_obsm_features=*) - [ -n "$VIASH_PAR_REFERENCE_OBSM_FEATURES" ] && ViashError Bad arguments for option \'--reference_obsm_features=*\': \'$VIASH_PAR_REFERENCE_OBSM_FEATURES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE_OBSM_FEATURES=$(ViashRemoveFlags "$1") - shift 1 - ;; - --reference_obs_targets) - if [ -z "$VIASH_PAR_REFERENCE_OBS_TARGETS" ]; then - VIASH_PAR_REFERENCE_OBS_TARGETS="$2" - else - VIASH_PAR_REFERENCE_OBS_TARGETS="$VIASH_PAR_REFERENCE_OBS_TARGETS,""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference_obs_targets. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reference_obs_targets=*) - if [ -z "$VIASH_PAR_REFERENCE_OBS_TARGETS" ]; then - VIASH_PAR_REFERENCE_OBS_TARGETS=$(ViashRemoveFlags "$1") - else - VIASH_PAR_REFERENCE_OBS_TARGETS="$VIASH_PAR_REFERENCE_OBS_TARGETS,"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output_obs_predictions) - if [ -z "$VIASH_PAR_OUTPUT_OBS_PREDICTIONS" ]; then - VIASH_PAR_OUTPUT_OBS_PREDICTIONS="$2" - else - VIASH_PAR_OUTPUT_OBS_PREDICTIONS="$VIASH_PAR_OUTPUT_OBS_PREDICTIONS:""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_obs_predictions. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_obs_predictions=*) - if [ -z "$VIASH_PAR_OUTPUT_OBS_PREDICTIONS" ]; then - VIASH_PAR_OUTPUT_OBS_PREDICTIONS=$(ViashRemoveFlags "$1") - else - VIASH_PAR_OUTPUT_OBS_PREDICTIONS="$VIASH_PAR_OUTPUT_OBS_PREDICTIONS:"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --output_obs_uncertainty) - if [ -z "$VIASH_PAR_OUTPUT_OBS_UNCERTAINTY" ]; then - VIASH_PAR_OUTPUT_OBS_UNCERTAINTY="$2" - else - VIASH_PAR_OUTPUT_OBS_UNCERTAINTY="$VIASH_PAR_OUTPUT_OBS_UNCERTAINTY:""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_obs_uncertainty. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_obs_uncertainty=*) - if [ -z "$VIASH_PAR_OUTPUT_OBS_UNCERTAINTY" ]; then - VIASH_PAR_OUTPUT_OBS_UNCERTAINTY=$(ViashRemoveFlags "$1") - else - VIASH_PAR_OUTPUT_OBS_UNCERTAINTY="$VIASH_PAR_OUTPUT_OBS_UNCERTAINTY:"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --output_uns_parameters) - [ -n "$VIASH_PAR_OUTPUT_UNS_PARAMETERS" ] && ViashError Bad arguments for option \'--output_uns_parameters\': \'$VIASH_PAR_OUTPUT_UNS_PARAMETERS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_UNS_PARAMETERS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_uns_parameters. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_uns_parameters=*) - [ -n "$VIASH_PAR_OUTPUT_UNS_PARAMETERS" ] && ViashError Bad arguments for option \'--output_uns_parameters=*\': \'$VIASH_PAR_OUTPUT_UNS_PARAMETERS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_UNS_PARAMETERS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --force_retrain) - [ -n "$VIASH_PAR_FORCE_RETRAIN" ] && ViashError Bad arguments for option \'--force_retrain\': \'$VIASH_PAR_FORCE_RETRAIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_FORCE_RETRAIN=true - shift 1 - ;; - -f) - [ -n "$VIASH_PAR_FORCE_RETRAIN" ] && ViashError Bad arguments for option \'-f\': \'$VIASH_PAR_FORCE_RETRAIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_FORCE_RETRAIN=true - shift 1 - ;; - --use_gpu) - [ -n "$VIASH_PAR_USE_GPU" ] && ViashError Bad arguments for option \'--use_gpu\': \'$VIASH_PAR_USE_GPU\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_USE_GPU="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --use_gpu. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --use_gpu=*) - [ -n "$VIASH_PAR_USE_GPU" ] && ViashError Bad arguments for option \'--use_gpu=*\': \'$VIASH_PAR_USE_GPU\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_USE_GPU=$(ViashRemoveFlags "$1") - shift 1 - ;; - --verbosity) - [ -n "$VIASH_PAR_VERBOSITY" ] && ViashError Bad arguments for option \'--verbosity\': \'$VIASH_PAR_VERBOSITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_VERBOSITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --verbosity. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --verbosity=*) - [ -n "$VIASH_PAR_VERBOSITY" ] && ViashError Bad arguments for option \'--verbosity=*\': \'$VIASH_PAR_VERBOSITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_VERBOSITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - -v) - [ -n "$VIASH_PAR_VERBOSITY" ] && ViashError Bad arguments for option \'-v\': \'$VIASH_PAR_VERBOSITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_VERBOSITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -v. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --model_output) - [ -n "$VIASH_PAR_MODEL_OUTPUT" ] && ViashError Bad arguments for option \'--model_output\': \'$VIASH_PAR_MODEL_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODEL_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --model_output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --model_output=*) - [ -n "$VIASH_PAR_MODEL_OUTPUT" ] && ViashError Bad arguments for option \'--model_output=*\': \'$VIASH_PAR_MODEL_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODEL_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --learning_rate) - [ -n "$VIASH_PAR_LEARNING_RATE" ] && ViashError Bad arguments for option \'--learning_rate\': \'$VIASH_PAR_LEARNING_RATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LEARNING_RATE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --learning_rate. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --learning_rate=*) - [ -n "$VIASH_PAR_LEARNING_RATE" ] && ViashError Bad arguments for option \'--learning_rate=*\': \'$VIASH_PAR_LEARNING_RATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LEARNING_RATE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --eta) - [ -n "$VIASH_PAR_LEARNING_RATE" ] && ViashError Bad arguments for option \'--eta\': \'$VIASH_PAR_LEARNING_RATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LEARNING_RATE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --eta. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --min_split_loss) - [ -n "$VIASH_PAR_MIN_SPLIT_LOSS" ] && ViashError Bad arguments for option \'--min_split_loss\': \'$VIASH_PAR_MIN_SPLIT_LOSS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_SPLIT_LOSS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_split_loss. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --min_split_loss=*) - [ -n "$VIASH_PAR_MIN_SPLIT_LOSS" ] && ViashError Bad arguments for option \'--min_split_loss=*\': \'$VIASH_PAR_MIN_SPLIT_LOSS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_SPLIT_LOSS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --gamma) - [ -n "$VIASH_PAR_MIN_SPLIT_LOSS" ] && ViashError Bad arguments for option \'--gamma\': \'$VIASH_PAR_MIN_SPLIT_LOSS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_SPLIT_LOSS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --gamma. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --max_depth) - [ -n "$VIASH_PAR_MAX_DEPTH" ] && ViashError Bad arguments for option \'--max_depth\': \'$VIASH_PAR_MAX_DEPTH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAX_DEPTH="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --max_depth. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --max_depth=*) - [ -n "$VIASH_PAR_MAX_DEPTH" ] && ViashError Bad arguments for option \'--max_depth=*\': \'$VIASH_PAR_MAX_DEPTH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAX_DEPTH=$(ViashRemoveFlags "$1") - shift 1 - ;; - -d) - [ -n "$VIASH_PAR_MAX_DEPTH" ] && ViashError Bad arguments for option \'-d\': \'$VIASH_PAR_MAX_DEPTH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAX_DEPTH="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -d. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --min_child_weight) - [ -n "$VIASH_PAR_MIN_CHILD_WEIGHT" ] && ViashError Bad arguments for option \'--min_child_weight\': \'$VIASH_PAR_MIN_CHILD_WEIGHT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_CHILD_WEIGHT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_child_weight. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --min_child_weight=*) - [ -n "$VIASH_PAR_MIN_CHILD_WEIGHT" ] && ViashError Bad arguments for option \'--min_child_weight=*\': \'$VIASH_PAR_MIN_CHILD_WEIGHT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_CHILD_WEIGHT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --max_delta_step) - [ -n "$VIASH_PAR_MAX_DELTA_STEP" ] && ViashError Bad arguments for option \'--max_delta_step\': \'$VIASH_PAR_MAX_DELTA_STEP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAX_DELTA_STEP="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --max_delta_step. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --max_delta_step=*) - [ -n "$VIASH_PAR_MAX_DELTA_STEP" ] && ViashError Bad arguments for option \'--max_delta_step=*\': \'$VIASH_PAR_MAX_DELTA_STEP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAX_DELTA_STEP=$(ViashRemoveFlags "$1") - shift 1 - ;; - --subsample) - [ -n "$VIASH_PAR_SUBSAMPLE" ] && ViashError Bad arguments for option \'--subsample\': \'$VIASH_PAR_SUBSAMPLE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SUBSAMPLE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --subsample. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --subsample=*) - [ -n "$VIASH_PAR_SUBSAMPLE" ] && ViashError Bad arguments for option \'--subsample=*\': \'$VIASH_PAR_SUBSAMPLE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SUBSAMPLE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --sampling_method) - [ -n "$VIASH_PAR_SAMPLING_METHOD" ] && ViashError Bad arguments for option \'--sampling_method\': \'$VIASH_PAR_SAMPLING_METHOD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SAMPLING_METHOD="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --sampling_method. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --sampling_method=*) - [ -n "$VIASH_PAR_SAMPLING_METHOD" ] && ViashError Bad arguments for option \'--sampling_method=*\': \'$VIASH_PAR_SAMPLING_METHOD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SAMPLING_METHOD=$(ViashRemoveFlags "$1") - shift 1 - ;; - --colsample_bytree) - [ -n "$VIASH_PAR_COLSAMPLE_BYTREE" ] && ViashError Bad arguments for option \'--colsample_bytree\': \'$VIASH_PAR_COLSAMPLE_BYTREE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_COLSAMPLE_BYTREE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --colsample_bytree. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --colsample_bytree=*) - [ -n "$VIASH_PAR_COLSAMPLE_BYTREE" ] && ViashError Bad arguments for option \'--colsample_bytree=*\': \'$VIASH_PAR_COLSAMPLE_BYTREE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_COLSAMPLE_BYTREE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --colsample_bylevel) - [ -n "$VIASH_PAR_COLSAMPLE_BYLEVEL" ] && ViashError Bad arguments for option \'--colsample_bylevel\': \'$VIASH_PAR_COLSAMPLE_BYLEVEL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_COLSAMPLE_BYLEVEL="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --colsample_bylevel. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --colsample_bylevel=*) - [ -n "$VIASH_PAR_COLSAMPLE_BYLEVEL" ] && ViashError Bad arguments for option \'--colsample_bylevel=*\': \'$VIASH_PAR_COLSAMPLE_BYLEVEL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_COLSAMPLE_BYLEVEL=$(ViashRemoveFlags "$1") - shift 1 - ;; - --colsample_bynode) - [ -n "$VIASH_PAR_COLSAMPLE_BYNODE" ] && ViashError Bad arguments for option \'--colsample_bynode\': \'$VIASH_PAR_COLSAMPLE_BYNODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_COLSAMPLE_BYNODE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --colsample_bynode. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --colsample_bynode=*) - [ -n "$VIASH_PAR_COLSAMPLE_BYNODE" ] && ViashError Bad arguments for option \'--colsample_bynode=*\': \'$VIASH_PAR_COLSAMPLE_BYNODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_COLSAMPLE_BYNODE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --reg_lambda) - [ -n "$VIASH_PAR_REG_LAMBDA" ] && ViashError Bad arguments for option \'--reg_lambda\': \'$VIASH_PAR_REG_LAMBDA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REG_LAMBDA="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --reg_lambda. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reg_lambda=*) - [ -n "$VIASH_PAR_REG_LAMBDA" ] && ViashError Bad arguments for option \'--reg_lambda=*\': \'$VIASH_PAR_REG_LAMBDA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REG_LAMBDA=$(ViashRemoveFlags "$1") - shift 1 - ;; - --lambda) - [ -n "$VIASH_PAR_REG_LAMBDA" ] && ViashError Bad arguments for option \'--lambda\': \'$VIASH_PAR_REG_LAMBDA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REG_LAMBDA="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --lambda. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reg_alpha) - [ -n "$VIASH_PAR_REG_ALPHA" ] && ViashError Bad arguments for option \'--reg_alpha\': \'$VIASH_PAR_REG_ALPHA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REG_ALPHA="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --reg_alpha. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reg_alpha=*) - [ -n "$VIASH_PAR_REG_ALPHA" ] && ViashError Bad arguments for option \'--reg_alpha=*\': \'$VIASH_PAR_REG_ALPHA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REG_ALPHA=$(ViashRemoveFlags "$1") - shift 1 - ;; - --alpha) - [ -n "$VIASH_PAR_REG_ALPHA" ] && ViashError Bad arguments for option \'--alpha\': \'$VIASH_PAR_REG_ALPHA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REG_ALPHA="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --alpha. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --scale_pos_weight) - [ -n "$VIASH_PAR_SCALE_POS_WEIGHT" ] && ViashError Bad arguments for option \'--scale_pos_weight\': \'$VIASH_PAR_SCALE_POS_WEIGHT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCALE_POS_WEIGHT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --scale_pos_weight. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --scale_pos_weight=*) - [ -n "$VIASH_PAR_SCALE_POS_WEIGHT" ] && ViashError Bad arguments for option \'--scale_pos_weight=*\': \'$VIASH_PAR_SCALE_POS_WEIGHT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCALE_POS_WEIGHT=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/labels_transfer_xgboost:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/labels_transfer_xgboost:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/labels_transfer_xgboost:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/labels_transfer_xgboost:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_REFERENCE_OBSM_FEATURES+x} ]; then - ViashError '--reference_obsm_features' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_MODALITY+x} ]; then - VIASH_PAR_MODALITY="rna" -fi -if [ -z ${VIASH_PAR_REFERENCE_OBS_TARGETS+x} ]; then - VIASH_PAR_REFERENCE_OBS_TARGETS="ann_level_1,ann_level_2,ann_level_3,ann_level_4,ann_level_5,ann_finest_level" -fi -if [ -z ${VIASH_PAR_OUTPUT_UNS_PARAMETERS+x} ]; then - VIASH_PAR_OUTPUT_UNS_PARAMETERS="labels_transfer" -fi -if [ -z ${VIASH_PAR_FORCE_RETRAIN+x} ]; then - VIASH_PAR_FORCE_RETRAIN="false" -fi -if [ -z ${VIASH_PAR_USE_GPU+x} ]; then - VIASH_PAR_USE_GPU="false" -fi -if [ -z ${VIASH_PAR_VERBOSITY+x} ]; then - VIASH_PAR_VERBOSITY="1" -fi -if [ -z ${VIASH_PAR_MODEL_OUTPUT+x} ]; then - VIASH_PAR_MODEL_OUTPUT="model" -fi -if [ -z ${VIASH_PAR_LEARNING_RATE+x} ]; then - VIASH_PAR_LEARNING_RATE="0.3" -fi -if [ -z ${VIASH_PAR_MIN_SPLIT_LOSS+x} ]; then - VIASH_PAR_MIN_SPLIT_LOSS="0.0" -fi -if [ -z ${VIASH_PAR_MAX_DEPTH+x} ]; then - VIASH_PAR_MAX_DEPTH="6" -fi -if [ -z ${VIASH_PAR_MIN_CHILD_WEIGHT+x} ]; then - VIASH_PAR_MIN_CHILD_WEIGHT="1" -fi -if [ -z ${VIASH_PAR_MAX_DELTA_STEP+x} ]; then - VIASH_PAR_MAX_DELTA_STEP="0.0" -fi -if [ -z ${VIASH_PAR_SUBSAMPLE+x} ]; then - VIASH_PAR_SUBSAMPLE="1.0" -fi -if [ -z ${VIASH_PAR_SAMPLING_METHOD+x} ]; then - VIASH_PAR_SAMPLING_METHOD="uniform" -fi -if [ -z ${VIASH_PAR_COLSAMPLE_BYTREE+x} ]; then - VIASH_PAR_COLSAMPLE_BYTREE="1.0" -fi -if [ -z ${VIASH_PAR_COLSAMPLE_BYLEVEL+x} ]; then - VIASH_PAR_COLSAMPLE_BYLEVEL="1.0" -fi -if [ -z ${VIASH_PAR_COLSAMPLE_BYNODE+x} ]; then - VIASH_PAR_COLSAMPLE_BYNODE="1.0" -fi -if [ -z ${VIASH_PAR_REG_LAMBDA+x} ]; then - VIASH_PAR_REG_LAMBDA="1.0" -fi -if [ -z ${VIASH_PAR_REG_ALPHA+x} ]; then - VIASH_PAR_REG_ALPHA="0.0" -fi -if [ -z ${VIASH_PAR_SCALE_POS_WEIGHT+x} ]; then - VIASH_PAR_SCALE_POS_WEIGHT="1.0" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_REFERENCE" ] && [ ! -e "$VIASH_PAR_REFERENCE" ]; then - ViashError "Input file '$VIASH_PAR_REFERENCE' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_FORCE_RETRAIN" ]]; then - if ! [[ "$VIASH_PAR_FORCE_RETRAIN" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--force_retrain' has to be a boolean_true. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_USE_GPU" ]]; then - if ! [[ "$VIASH_PAR_USE_GPU" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--use_gpu' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_VERBOSITY" ]]; then - if ! [[ "$VIASH_PAR_VERBOSITY" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--verbosity' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_LEARNING_RATE" ]]; then - if ! [[ "$VIASH_PAR_LEARNING_RATE" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--learning_rate' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_MIN_SPLIT_LOSS" ]]; then - if ! [[ "$VIASH_PAR_MIN_SPLIT_LOSS" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--min_split_loss' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_MAX_DEPTH" ]]; then - if ! [[ "$VIASH_PAR_MAX_DEPTH" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--max_depth' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_MIN_CHILD_WEIGHT" ]]; then - if ! [[ "$VIASH_PAR_MIN_CHILD_WEIGHT" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--min_child_weight' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_MAX_DELTA_STEP" ]]; then - if ! [[ "$VIASH_PAR_MAX_DELTA_STEP" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--max_delta_step' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SUBSAMPLE" ]]; then - if ! [[ "$VIASH_PAR_SUBSAMPLE" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--subsample' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_COLSAMPLE_BYTREE" ]]; then - if ! [[ "$VIASH_PAR_COLSAMPLE_BYTREE" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--colsample_bytree' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_COLSAMPLE_BYLEVEL" ]]; then - if ! [[ "$VIASH_PAR_COLSAMPLE_BYLEVEL" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--colsample_bylevel' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_COLSAMPLE_BYNODE" ]]; then - if ! [[ "$VIASH_PAR_COLSAMPLE_BYNODE" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--colsample_bynode' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_REG_LAMBDA" ]]; then - if ! [[ "$VIASH_PAR_REG_LAMBDA" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--reg_lambda' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_REG_ALPHA" ]]; then - if ! [[ "$VIASH_PAR_REG_ALPHA" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--reg_alpha' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SCALE_POS_WEIGHT" ]]; then - if ! [[ "$VIASH_PAR_SCALE_POS_WEIGHT" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--scale_pos_weight' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_SAMPLING_METHOD" ]; then - VIASH_PAR_SAMPLING_METHOD_CHOICES=("uniform:gradient_based") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_SAMPLING_METHOD_CHOICES[*]}:" =~ ":$VIASH_PAR_SAMPLING_METHOD:" ]]; then - ViashError '--sampling_method' specified value of \'$VIASH_PAR_SAMPLING_METHOD\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi -if [ ! -z "$VIASH_PAR_MODEL_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_MODEL_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_MODEL_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_REFERENCE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_REFERENCE")" ) - VIASH_PAR_REFERENCE=$(ViashAutodetectMount "$VIASH_PAR_REFERENCE") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_PAR_MODEL_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_MODEL_OUTPUT")" ) - VIASH_PAR_MODEL_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_MODEL_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_MODEL_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/labels_transfer_xgboost:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/labels_transfer_xgboost:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/labels_transfer_xgboost:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-xgboost-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -import sys -import json -import os -from typing import Optional -import yaml -from pathlib import Path - -import mudata -import numpy as np -import scanpy as sc -import pandas as pd -import xgboost as xgb -from sklearn.model_selection import train_test_split -from sklearn.metrics import classification_report -from sklearn.preprocessing import LabelEncoder - - -### VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'input_obsm_features': $( if [ ! -z ${VIASH_PAR_INPUT_OBSM_FEATURES+x} ]; then echo "r'${VIASH_PAR_INPUT_OBSM_FEATURES//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'reference': $( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "r'${VIASH_PAR_REFERENCE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'reference_obsm_features': $( if [ ! -z ${VIASH_PAR_REFERENCE_OBSM_FEATURES+x} ]; then echo "r'${VIASH_PAR_REFERENCE_OBSM_FEATURES//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'reference_obs_targets': $( if [ ! -z ${VIASH_PAR_REFERENCE_OBS_TARGETS+x} ]; then echo "r'${VIASH_PAR_REFERENCE_OBS_TARGETS//\'/\'\"\'\"r\'}'.split(',')"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_obs_predictions': $( if [ ! -z ${VIASH_PAR_OUTPUT_OBS_PREDICTIONS+x} ]; then echo "r'${VIASH_PAR_OUTPUT_OBS_PREDICTIONS//\'/\'\"\'\"r\'}'.split(':')"; else echo None; fi ), - 'output_obs_uncertainty': $( if [ ! -z ${VIASH_PAR_OUTPUT_OBS_UNCERTAINTY+x} ]; then echo "r'${VIASH_PAR_OUTPUT_OBS_UNCERTAINTY//\'/\'\"\'\"r\'}'.split(':')"; else echo None; fi ), - 'output_uns_parameters': $( if [ ! -z ${VIASH_PAR_OUTPUT_UNS_PARAMETERS+x} ]; then echo "r'${VIASH_PAR_OUTPUT_UNS_PARAMETERS//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'force_retrain': $( if [ ! -z ${VIASH_PAR_FORCE_RETRAIN+x} ]; then echo "r'${VIASH_PAR_FORCE_RETRAIN//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), - 'use_gpu': $( if [ ! -z ${VIASH_PAR_USE_GPU+x} ]; then echo "r'${VIASH_PAR_USE_GPU//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), - 'verbosity': $( if [ ! -z ${VIASH_PAR_VERBOSITY+x} ]; then echo "int(r'${VIASH_PAR_VERBOSITY//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'model_output': $( if [ ! -z ${VIASH_PAR_MODEL_OUTPUT+x} ]; then echo "r'${VIASH_PAR_MODEL_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'learning_rate': $( if [ ! -z ${VIASH_PAR_LEARNING_RATE+x} ]; then echo "float(r'${VIASH_PAR_LEARNING_RATE//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'min_split_loss': $( if [ ! -z ${VIASH_PAR_MIN_SPLIT_LOSS+x} ]; then echo "float(r'${VIASH_PAR_MIN_SPLIT_LOSS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'max_depth': $( if [ ! -z ${VIASH_PAR_MAX_DEPTH+x} ]; then echo "int(r'${VIASH_PAR_MAX_DEPTH//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'min_child_weight': $( if [ ! -z ${VIASH_PAR_MIN_CHILD_WEIGHT+x} ]; then echo "int(r'${VIASH_PAR_MIN_CHILD_WEIGHT//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'max_delta_step': $( if [ ! -z ${VIASH_PAR_MAX_DELTA_STEP+x} ]; then echo "float(r'${VIASH_PAR_MAX_DELTA_STEP//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'subsample': $( if [ ! -z ${VIASH_PAR_SUBSAMPLE+x} ]; then echo "float(r'${VIASH_PAR_SUBSAMPLE//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'sampling_method': $( if [ ! -z ${VIASH_PAR_SAMPLING_METHOD+x} ]; then echo "r'${VIASH_PAR_SAMPLING_METHOD//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'colsample_bytree': $( if [ ! -z ${VIASH_PAR_COLSAMPLE_BYTREE+x} ]; then echo "float(r'${VIASH_PAR_COLSAMPLE_BYTREE//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'colsample_bylevel': $( if [ ! -z ${VIASH_PAR_COLSAMPLE_BYLEVEL+x} ]; then echo "float(r'${VIASH_PAR_COLSAMPLE_BYLEVEL//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'colsample_bynode': $( if [ ! -z ${VIASH_PAR_COLSAMPLE_BYNODE+x} ]; then echo "float(r'${VIASH_PAR_COLSAMPLE_BYNODE//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'reg_lambda': $( if [ ! -z ${VIASH_PAR_REG_LAMBDA+x} ]; then echo "float(r'${VIASH_PAR_REG_LAMBDA//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'reg_alpha': $( if [ ! -z ${VIASH_PAR_REG_ALPHA+x} ]; then echo "float(r'${VIASH_PAR_REG_ALPHA//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'scale_pos_weight': $( if [ ! -z ${VIASH_PAR_SCALE_POS_WEIGHT+x} ]; then echo "float(r'${VIASH_PAR_SCALE_POS_WEIGHT//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -### VIASH END - -sys.path.append(meta["resources_dir"]) -from helper import check_arguments, get_reference_features, get_query_features -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -# read config arguments -config = yaml.safe_load(Path(meta["config"]).read_text()) - -# look for training params for method -argument_groups = { grp["name"]: grp["arguments"] for grp in config["functionality"]["argument_groups"] } -training_arg_names = [ arg["name"].replace("--", "") for arg in argument_groups["Learning parameters"] ] -training_params = { arg_name: par[arg_name] for arg_name in training_arg_names } - -def encode_labels(y): - labels_encoder = LabelEncoder() - labels_encoder.fit(y) - - return labels_encoder.transform(y), labels_encoder - - -def get_model_eval(xgb_model, X_test, y_test, labels_encoder): - preds = xgb_model.predict(X_test) - - cr = classification_report(labels_encoder.inverse_transform(y_test), - labels_encoder.inverse_transform(preds), - output_dict=True) - cr_df = pd.DataFrame(cr).transpose() - - return cr_df - - -def train_test_split_adata(adata, labels): - train_data = pd.DataFrame(data=adata.X, index=adata.obs_names) - - X_train, X_test, y_train, y_test = train_test_split( - train_data, labels, test_size=0.2, random_state=42, stratify=labels) - - return X_train, X_test, y_train, y_test - - -def train_xgb_model(X_train, y_train, gpu=True) -> xgb.XGBClassifier: - n_classes = len(np.unique(y_train)) - objective = "binary:logistic" if n_classes == 2 else "multi:softprob" - - tree_method = "gpu_hist" if gpu else "hist" - xgbc = xgb.XGBClassifier(tree_method=tree_method, objective=objective, **training_params) - xgbc.fit(X_train, y_train) - - return xgbc - - -def build_classifier(X, y, labels_encoder, label_key, eval_verbosity: Optional[int] = 1, gpu=True) -> xgb.XGBClassifier: - # Adata prep - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y) - #Note: Do we need a new train-test split for each classifier? - - # Model training - xgb_model = train_xgb_model(X_train, y_train, gpu=gpu) - - # Model eval - if eval_verbosity != 0: - cr_df = get_model_eval(xgb_model, X_test, y_test, labels_encoder) - - if eval_verbosity == 2: - print(cr_df) - - else: - overall_accuracy = cr_df["support"]["accuracy"] - low_prec_key = cr_df.precision.idxmin() - low_prec_val = cr_df.precision.min() - low_rec_key = cr_df.recall.idxmin() - low_rec_val = cr_df.recall.min() - low_f1_key = cr_df["f1-score"].idxmin() - low_f1_val = cr_df["f1-score"].min() - - print("") - print(f"Summary stats for {label_key} model:") - print(f"Overall accuracy: {overall_accuracy}") - print(f"Min. precision: {low_prec_key}: {low_prec_val}") - print(f"Min. Recall: {low_rec_key}: {low_rec_val}") - print(f"Min. F1-score: {low_f1_key}: {low_f1_val}") - print("") - - return xgb_model - - -def build_ref_classifiers(adata_reference, targets, model_path, - eval_verbosity: Optional[int] = 1, gpu: Optional[bool] = True) -> None: - """ - This function builds xgboost classifiers on a reference embedding for a designated number of - adata_reference.obs columns. Classifier .xgb files and a model_info.json file is written to the \`model_path\` - directory. Model evaluation is printed to stdout. - - Inputs: - * \`adata_reference\`: The AnnData object that was used to train the reference model - * \`model_path\`: The reference model directory where the classifiers will also be stored - * \`eval_verbosity\`: The verbosity level for evaluation of the classifier from the range [0;2]. - * \`gpu\`: Boolean indicating whether a gpu is available for classifier training - - - Example: - \`\`\` - >>> adata - AnnData object with n_obs x n_vars = 700 x 765 - obs: "ann_finest_level", "ann_level_1" - - >>> os.listdir("/path/to/model") - model_params.pt* - - >>> build_ref_classifiers(adata, "path/to/model", eval_verbosity=1, gpu=True) - >>> os.listdir("/path/to/model") - classifier_ann_finest_level.xgb* model_info.json* - classifier_ann_level_1.xgb* model_params.pt* - \`\`\` - """ - - # Check inputs - if not isinstance(eval_verbosity, int): - raise TypeError("\`eval_verbosity\` should be an integer between 0 and 2.") - - if eval_verbosity < 0 or eval_verbosity > 2: - raise ValueError("\`eval_verbosity\` should be an integer between 0 and 2.") - - train_data = get_reference_features(adata_reference, par, logger) - - if not os.path.exists(model_path): - os.makedirs(model_path, exist_ok=True) - - # Map from name of classifier to file names - classifiers = dict() - - for label, obs_pred in zip(targets, par["output_obs_predictions"]): - if label not in adata_reference.obs: - raise ValueError(f"{label} is not in the \`adata\` object passed!") - - filename = "classifier_" + label + ".xgb" - - labels, labels_encoder = encode_labels(adata_reference.obs[label]) - logger.info(f"Classes: {labels_encoder.classes_}") - - logger.info(f"Building classifier for {label}...") - xgb_model = build_classifier( - X=train_data, - y=labels, - labels_encoder=labels_encoder, - label_key=label, - eval_verbosity=eval_verbosity, - gpu=gpu - ) - - # Save classifier - logger.info("Saving model") - xgb_model.save_model(os.path.join(model_path, filename)) - - # Store classifier info - classifiers[label] = { - "filename": filename, - "labels": labels_encoder.classes_.tolist(), - "obs_column": obs_pred, - "model_params": training_params, - } - - # Store model_info.json file - model_info = { - "classifier_info": classifiers - } - - logger.info("Writing model_info to the file") - # Read previous file if it exists - if os.path.exists(model_path + "/model_info.json"): - logger.info("Old model_info file found, updating") - with open(model_path + "/model_info.json", "r") as f: - old_model_info = json.loads(f.read()) - - for key in old_model_info: - if key in model_info: - old_model_info[key].update(model_info[key]) - json_string = json.dumps(old_model_info, indent=4) - - else: - logger.info("Creating a new file") - json_string = json.dumps(model_info, indent=4) - - with open(model_path + "/model_info.json", "w") as f: - f.write(json_string) - - -def project_labels( - query_dataset, - cell_type_classifier_model: xgb.XGBClassifier, - annotation_column_name='label_pred', - uncertainty_column_name='label_uncertainty', - uncertainty_thresh=None # Note: currently not passed to predict function -): - """ - A function that projects predicted labels onto the query dataset, along with uncertainty scores. - Performs in-place update of the adata object, adding columns to the \`obs\` DataFrame. - - Input: - * \`query_dataset\`: The query \`AnnData\` object - * \`model_file\`: Path to the classification model file - * \`prediction_key\`: Column name in \`adata.obs\` where to store the predicted labels - * \`uncertainty_key\`: Column name in \`adata.obs\` where to store the uncertainty scores - * \`uncertainty_thresh\`: The uncertainty threshold above which we call a cell 'Unknown' - - Output: - Nothing is output, the passed anndata is modified inplace - - """ - - if (uncertainty_thresh is not None) and (uncertainty_thresh < 0 or uncertainty_thresh > 1): - raise ValueError(f'\`uncertainty_thresh\` must be \`None\` or between 0 and 1.') - - query_data = get_query_features(query_dataset, par, logger) - - # Predict labels and probabilities - query_dataset.obs[annotation_column_name] = cell_type_classifier_model.predict(query_data) - - logger.info("Predicting probabilities") - probs = cell_type_classifier_model.predict_proba(query_data) - - # Format probabilities - df_probs = pd.DataFrame(probs, columns=cell_type_classifier_model.classes_, index=query_dataset.obs_names) - query_dataset.obs[uncertainty_column_name] = 1 - df_probs.max(1) - - # Note: this is here in case we want to propose a set of values for the user to accept to seed the - # manual curation of predicted labels - if uncertainty_thresh is not None: - logger.info("Marking uncertain predictions") - query_dataset.obs[annotation_column_name + "_filtered"] = [ - val if query_dataset.obs[uncertainty_column_name][i] < uncertainty_thresh - else "Unknown" for i, val in enumerate(query_dataset.obs[annotation_column_name])] - - return query_dataset - - -def predict( - query_dataset, - cell_type_classifier_model_path, - annotation_column_name: str, - prediction_column_name: str, - uncertainty_column_name: str, - models_info, - use_gpu: bool = False -) -> pd.DataFrame: - """ - Returns \`obs\` DataFrame with prediction columns appended - """ - - tree_method = "gpu_hist" if use_gpu else "hist" - - labels = models_info["classifier_info"][annotation_column_name]["labels"] - - objective = "binary:logistic" if len(labels) == 2 else "multi:softprob" - cell_type_classifier_model = xgb.XGBClassifier(tree_method=tree_method, objective=objective) - - logger.info("Loading model") - cell_type_classifier_model.load_model(fname=cell_type_classifier_model_path) - - logger.info("Predicting labels") - project_labels(query_dataset, - cell_type_classifier_model, - annotation_column_name=prediction_column_name, - uncertainty_column_name=uncertainty_column_name) - - logger.info("Converting labels from numbers to classes") - labels_encoder = LabelEncoder() - labels_encoder.classes_ = np.array(labels) - query_dataset.obs[prediction_column_name] = labels_encoder.inverse_transform(query_dataset.obs[prediction_column_name]) - - return query_dataset - - -def main(par): - logger.info("Checking arguments") - par = check_arguments(par) - - mdata = mudata.read(par["input"].strip()) - adata = mdata.mod[par["modality"]] - - adata_reference = sc.read(par["reference"], backup_url=par["reference"]) - - # If classifiers for targets are in the model_output directory, simply open them and run (unless \`retrain\` != True) - # If some classifiers are missing, train and save them first - # Predict and save the query data - - targets_to_train = [] - - for obs_target in par["reference_obs_targets"]: - if not os.path.exists(par["model_output"]) or f"classifier_{obs_target}.xgb" not in os.listdir(par["model_output"]) or par["force_retrain"]: - logger.info(f"Classifier for {obs_target} added to a training schedule") - targets_to_train.append(obs_target) - else: - logger.info(f"Found classifier for {obs_target}, no retraining required") - - build_ref_classifiers(adata_reference, targets_to_train, model_path=par["model_output"], - gpu=par["use_gpu"], eval_verbosity=par["verbosity"]) - - output_uns_parameters = adata.uns.get(par["output_uns_parameters"], {}) - - with open(par["model_output"] + "/model_info.json", "r") as f: - models_info = json.loads(f.read()) - - for obs_target, obs_pred, obs_unc in zip(par["reference_obs_targets"], par["output_obs_predictions"], par["output_obs_uncertainty"]): - logger.info(f"Predicting {obs_target}") - - adata = predict(query_dataset=adata, - cell_type_classifier_model_path=os.path.join(par["model_output"], "classifier_" + obs_target + ".xgb"), - annotation_column_name=obs_target, - prediction_column_name=obs_pred, - uncertainty_column_name=obs_unc, - models_info=models_info, - use_gpu=par["use_gpu"]) - - if obs_target in targets_to_train: - # Save information about the transfer to .uns - output_uns_parameters[obs_target] = { - "method": "XGBClassifier", - **training_params - } - - adata.uns[par["output_uns_parameters"]] = output_uns_parameters - - logger.info("Updating mdata") - mdata.mod[par['modality']] = adata - mdata.update() - - logger.info("Writing output") - mdata.write_h5mu(par['output'].strip()) - -if __name__ == "__main__": - main(par) -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_REFERENCE" ]; then - VIASH_PAR_REFERENCE=$(ViashStripAutomount "$VIASH_PAR_REFERENCE") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_PAR_MODEL_OUTPUT" ]; then - VIASH_PAR_MODEL_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_MODEL_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_MODEL_OUTPUT" ] && [ ! -e "$VIASH_PAR_MODEL_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_MODEL_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/mapping/bd_rhapsody/.config.vsh.yaml b/target/docker/mapping/bd_rhapsody/.config.vsh.yaml deleted file mode 100644 index 84f54badcb1..00000000000 --- a/target/docker/mapping/bd_rhapsody/.config.vsh.yaml +++ /dev/null @@ -1,417 +0,0 @@ -functionality: - name: "bd_rhapsody" - namespace: "mapping" - version: "0.12.3" - authors: - - name: "Robrecht Cannoodt" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - argument_groups: - - name: "Inputs" - arguments: - - type: "string" - name: "--mode" - description: "Whether to run a whole transcriptome analysis (WTA) or a targeted\ - \ analysis." - info: null - example: - - "wta" - required: true - choices: - - "wta" - - "targeted" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Path to your read files in the FASTQ.GZ format. You may specify\ - \ as many R1/R2 read pairs as you want." - info: null - example: - - "input.fastq.gz" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "file" - name: "--reference" - alternatives: - - "-r" - - "--reference_genome" - description: "Refence to map to. For `--mode wta`, this is the path to STAR\ - \ index as a tar.gz file. For `--mode targeted`, this is the path to mRNA\ - \ reference file for pre-designed, supplemental, or custom panel, in FASTA\ - \ format" - info: null - example: - - "reference_genome.tar.gz|reference.fasta" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "file" - name: "--transcriptome_annotation" - alternatives: - - "-t" - description: "Path to GTF annotation file (only for `--mode wta`)." - info: null - example: - - "transcriptome.gtf" - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--abseq_reference" - alternatives: - - "-a" - description: "Path to the AbSeq reference file in FASTA format. Only needed\ - \ if BD AbSeq Ab-Oligos are used." - info: null - example: - - "abseq_reference.fasta" - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "file" - name: "--supplemental_reference" - alternatives: - - "-s" - description: "Path to the supplemental reference file in FASTA format. Only\ - \ needed if there are additional transgene sequences used in the experiment\ - \ (only for `--mode wta`)." - info: null - example: - - "supplemental_reference.fasta" - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--sample_prefix" - description: "Specify a run name to use as the output file base name. Use only\ - \ letters, numbers, or hyphens. Do not use special characters or spaces." - info: null - default: - - "sample" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Outputs" - arguments: - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output folder. Output still needs to be processed further." - info: null - example: - - "output_dir" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Putative cell calling settings" - arguments: - - type: "string" - name: "--putative_cell_call" - description: "Specify the dataset to be used for putative cell calling. For\ - \ putative cell calling using an AbSeq dataset, please provide an AbSeq_Reference\ - \ fasta file above." - info: null - example: - - "mRNA" - required: false - choices: - - "mRNA" - - "AbSeq_Experimental" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--exact_cell_count" - description: "Exact cell count - Set a specific number (>=1) of cells as putative,\ - \ based on those with the highest error-corrected read count" - info: null - example: - - 10000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--disable_putative_calling" - description: "Disable Refined Putative Cell Calling - Determine putative cells\ - \ using only the basic algorithm (minimum second derivative along the cumulative\ - \ reads curve). The refined algorithm attempts to remove false positives and\ - \ recover false negatives, but may not be ideal for certain complex mixtures\ - \ of cell types. Does not apply if Exact Cell Count is set." - info: null - direction: "input" - dest: "par" - - name: "Subsample arguments" - arguments: - - type: "double" - name: "--subsample" - description: "A number >1 or fraction (0 < n < 1) to indicate the number or\ - \ percentage of reads to subsample." - info: null - example: - - 0.01 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--subsample_seed" - description: "A seed for replicating a previous subsampled run." - info: null - example: - - 3445 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Multiplex arguments" - arguments: - - type: "string" - name: "--sample_tags_version" - description: "Specify if multiplexed run." - info: null - example: - - "human" - required: false - choices: - - "human" - - "hs" - - "mouse" - - "mm" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--tag_names" - description: "Tag_Names (optional) - Specify the tag number followed by '-'\ - \ and the desired sample name to appear in Sample_Tag_Metrics.csv.\nDo not\ - \ use the special characters: &, (), [], {}, <>, ?, |\n" - info: null - example: - - "4-mySample" - - "9-myOtherSample" - - "6-alsoThisSample" - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - name: "VDJ arguments" - arguments: - - type: "string" - name: "--vdj_version" - description: "Specify if VDJ run." - info: null - example: - - "human" - required: false - choices: - - "human" - - "mouse" - - "humanBCR" - - "humanBCR" - - "humanTCR" - - "mouseBCR" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "CWL-runner arguments" - arguments: - - type: "boolean" - name: "--parallel" - description: "Run jobs in parallel." - info: null - default: - - true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--timestamps" - description: "Add timestamps to the errors, warnings, and notifications." - info: null - direction: "input" - dest: "par" - - type: "boolean_true" - name: "--dryrun" - description: "If true, the output directory will only contain the CWL input\ - \ files, but the pipeline itself will not be executed." - info: null - direction: "input" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "rhapsody_wta_1.10.1_nodocker.cwl" - - type: "file" - path: "rhapsody_targeted_1.10.1_nodocker.cwl" - - type: "file" - path: "src/utils/setup_logger.py" - description: "A wrapper for the BD Rhapsody Analysis CWL v1.10.1 pipeline.\n\nThe\ - \ CWL pipeline file is obtained by cloning 'https://bitbucket.org/CRSwDev/cwl/src/master/'\ - \ and removing all objects with class 'DockerRequirement' from the YML.\n\nThis\ - \ pipeline can be used for a targeted analysis (with `--mode targeted`) or for\ - \ a whole transcriptome analysis (with `--mode wta`).\n\n* If mode is `\"targeted\"\ - `, then either the `--reference` or `--abseq_reference` parameters must be defined.\n\ - * If mode is `\"wta\"`, then `--reference` and `--transcriptome_annotation` must\ - \ be defined, `--abseq_reference` and `--supplemental_reference` is optional.\n\ - \nThe reference_genome and transcriptome_annotation files can be generated with\ - \ the make_reference pipeline.\nAlternatively, BD also provides standard references\ - \ which can be downloaded from these locations:\n\n - Human: http://bd-rhapsody-public.s3-website-us-east-1.amazonaws.com/Rhapsody-WTA/GRCh38-PhiX-gencodev29/\n\ - \ - Mouse: http://bd-rhapsody-public.s3-website-us-east-1.amazonaws.com/Rhapsody-WTA/GRCm38-PhiX-gencodevM19/\n" - test_resources: - - type: "bash_script" - path: "test_memory.sh" - is_executable: true - - type: "bash_script" - path: "test_wta.sh" - is_executable: true - - type: "bash_script" - path: "test_targeted.sh" - is_executable: true - - type: "file" - path: "resources_test/bdrhap_vdj" - - type: "file" - path: "resources_test/bdrhap_5kjrt" - - type: "file" - path: "resources_test/reference_gencodev41_chr1/" - info: - name: "BD Rhapsody" - short_description: "A wrapper for the BD Rhapsody Analysis CWL v1.10.1 pipeline" - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "ghcr.io/data-intuitive/bd_rhapsody:1.10.1" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "python" - user: false - packages: - - "pandas<2" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highmem" - - "highcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/mapping/bd_rhapsody/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/mapping/bd_rhapsody" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/mapping/bd_rhapsody/bd_rhapsody" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/mapping/bd_rhapsody/bd_rhapsody b/target/docker/mapping/bd_rhapsody/bd_rhapsody deleted file mode 100755 index 6bb7983fb3e..00000000000 --- a/target/docker/mapping/bd_rhapsody/bd_rhapsody +++ /dev/null @@ -1,1975 +0,0 @@ -#!/usr/bin/env bash - -# bd_rhapsody 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Robrecht Cannoodt (maintainer) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="bd_rhapsody" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "bd_rhapsody 0.12.3" - echo "" - echo "A wrapper for the BD Rhapsody Analysis CWL v1.10.1 pipeline." - echo "" - echo "The CWL pipeline file is obtained by cloning" - echo "'https://bitbucket.org/CRSwDev/cwl/src/master/' and removing all objects with" - echo "class 'DockerRequirement' from the YML." - echo "" - echo "This pipeline can be used for a targeted analysis (with \`--mode targeted\`) or" - echo "for a whole transcriptome analysis (with \`--mode wta\`)." - echo "" - echo "* If mode is \`\"targeted\"\`, then either the \`--reference\` or \`--abseq_reference\`" - echo "parameters must be defined." - echo "* If mode is \`\"wta\"\`, then \`--reference\` and \`--transcriptome_annotation\` must" - echo "be defined, \`--abseq_reference\` and \`--supplemental_reference\` is optional." - echo "" - echo "The reference_genome and transcriptome_annotation files can be generated with" - echo "the make_reference pipeline." - echo "Alternatively, BD also provides standard references which can be downloaded from" - echo "these locations:" - echo "" - echo " - Human:" - echo "http://bd-rhapsody-public.s3-website-us-east-1.amazonaws.com/Rhapsody-WTA/GRCh38-PhiX-gencodev29/" - echo " - Mouse:" - echo "http://bd-rhapsody-public.s3-website-us-east-1.amazonaws.com/Rhapsody-WTA/GRCm38-PhiX-gencodevM19/" - echo "" - echo "Inputs:" - echo " --mode" - echo " type: string, required parameter" - echo " example: wta" - echo " choices: [ wta, targeted ]" - echo " Whether to run a whole transcriptome analysis (WTA) or a targeted" - echo " analysis." - echo "" - echo " -i, --input" - echo " type: file, required parameter, multiple values allowed, file must exist" - echo " example: input.fastq.gz" - echo " Path to your read files in the FASTQ.GZ format. You may specify as many" - echo " R1/R2 read pairs as you want." - echo "" - echo " -r, --reference_genome, --reference" - echo " type: file, required parameter, multiple values allowed, file must exist" - echo " example: reference_genome.tar.gz|reference.fasta" - echo " Refence to map to. For \`--mode wta\`, this is the path to STAR index as a" - echo " tar.gz file. For \`--mode targeted\`, this is the path to mRNA reference" - echo " file for pre-designed, supplemental, or custom panel, in FASTA format" - echo "" - echo " -t, --transcriptome_annotation" - echo " type: file, file must exist" - echo " example: transcriptome.gtf" - echo " Path to GTF annotation file (only for \`--mode wta\`)." - echo "" - echo " -a, --abseq_reference" - echo " type: file, multiple values allowed, file must exist" - echo " example: abseq_reference.fasta" - echo " Path to the AbSeq reference file in FASTA format. Only needed if BD" - echo " AbSeq Ab-Oligos are used." - echo "" - echo " -s, --supplemental_reference" - echo " type: file, multiple values allowed, file must exist" - echo " example: supplemental_reference.fasta" - echo " Path to the supplemental reference file in FASTA format. Only needed if" - echo " there are additional transgene sequences used in the experiment (only" - echo " for \`--mode wta\`)." - echo "" - echo " --sample_prefix" - echo " type: string" - echo " default: sample" - echo " Specify a run name to use as the output file base name. Use only" - echo " letters, numbers, or hyphens. Do not use special characters or spaces." - echo "" - echo "Outputs:" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " example: output_dir" - echo " Output folder. Output still needs to be processed further." - echo "" - echo "Putative cell calling settings:" - echo " --putative_cell_call" - echo " type: string" - echo " example: mRNA" - echo " choices: [ mRNA, AbSeq_Experimental ]" - echo " Specify the dataset to be used for putative cell calling. For putative" - echo " cell calling using an AbSeq dataset, please provide an AbSeq_Reference" - echo " fasta file above." - echo "" - echo " --exact_cell_count" - echo " type: integer" - echo " example: 10000" - echo " Exact cell count - Set a specific number (>=1) of cells as putative," - echo " based on those with the highest error-corrected read count" - echo "" - echo " --disable_putative_calling" - echo " type: boolean_true" - echo " Disable Refined Putative Cell Calling - Determine putative cells using" - echo " only the basic algorithm (minimum second derivative along the cumulative" - echo " reads curve). The refined algorithm attempts to remove false positives" - echo " and recover false negatives, but may not be ideal for certain complex" - echo " mixtures of cell types. Does not apply if Exact Cell Count is set." - echo "" - echo "Subsample arguments:" - echo " --subsample" - echo " type: double" - echo " example: 0.01" - echo " A number >1 or fraction (0 < n < 1) to indicate the number or percentage" - echo " of reads to subsample." - echo "" - echo " --subsample_seed" - echo " type: integer" - echo " example: 3445" - echo " A seed for replicating a previous subsampled run." - echo "" - echo "Multiplex arguments:" - echo " --sample_tags_version" - echo " type: string" - echo " example: human" - echo " choices: [ human, hs, mouse, mm ]" - echo " Specify if multiplexed run." - echo "" - echo " --tag_names" - echo " type: string, multiple values allowed" - echo " example: 4-mySample:9-myOtherSample:6-alsoThisSample" - echo " Tag_Names (optional) - Specify the tag number followed by '-' and the" - echo " desired sample name to appear in Sample_Tag_Metrics.csv." - echo " Do not use the special characters: &, (), [], {}, <>, ?, |" - echo "" - echo "VDJ arguments:" - echo " --vdj_version" - echo " type: string" - echo " example: human" - echo " choices: [ human, mouse, humanBCR, humanBCR, humanTCR, mouseBCR ]" - echo " Specify if VDJ run." - echo "" - echo "CWL-runner arguments:" - echo " --parallel" - echo " type: boolean" - echo " default: true" - echo " Run jobs in parallel." - echo "" - echo " --timestamps" - echo " type: boolean_true" - echo " Add timestamps to the errors, warnings, and notifications." - echo "" - echo " --dryrun" - echo " type: boolean_true" - echo " If true, the output directory will only contain the CWL input files, but" - echo " the pipeline itself will not be executed." -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM ghcr.io/data-intuitive/bd_rhapsody:1.10.1 - -ENTRYPOINT [] - - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "pandas<2" - -LABEL org.opencontainers.image.authors="Robrecht Cannoodt" -LABEL org.opencontainers.image.description="Companion container for running component mapping bd_rhapsody" -LABEL org.opencontainers.image.created="2024-01-25T10:13:57Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-bd_rhapsody-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "bd_rhapsody 0.12.3" - exit - ;; - --mode) - [ -n "$VIASH_PAR_MODE" ] && ViashError Bad arguments for option \'--mode\': \'$VIASH_PAR_MODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --mode. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --mode=*) - [ -n "$VIASH_PAR_MODE" ] && ViashError Bad arguments for option \'--mode=*\': \'$VIASH_PAR_MODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --input) - if [ -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT="$2" - else - VIASH_PAR_INPUT="$VIASH_PAR_INPUT;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - if [ -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - else - VIASH_PAR_INPUT="$VIASH_PAR_INPUT;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - -i) - if [ -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT="$2" - else - VIASH_PAR_INPUT="$VIASH_PAR_INPUT;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reference) - if [ -z "$VIASH_PAR_REFERENCE" ]; then - VIASH_PAR_REFERENCE="$2" - else - VIASH_PAR_REFERENCE="$VIASH_PAR_REFERENCE;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reference=*) - if [ -z "$VIASH_PAR_REFERENCE" ]; then - VIASH_PAR_REFERENCE=$(ViashRemoveFlags "$1") - else - VIASH_PAR_REFERENCE="$VIASH_PAR_REFERENCE;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - -r) - if [ -z "$VIASH_PAR_REFERENCE" ]; then - VIASH_PAR_REFERENCE="$2" - else - VIASH_PAR_REFERENCE="$VIASH_PAR_REFERENCE;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to -r. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reference_genome) - if [ -z "$VIASH_PAR_REFERENCE" ]; then - VIASH_PAR_REFERENCE="$2" - else - VIASH_PAR_REFERENCE="$VIASH_PAR_REFERENCE;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference_genome. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --transcriptome_annotation) - [ -n "$VIASH_PAR_TRANSCRIPTOME_ANNOTATION" ] && ViashError Bad arguments for option \'--transcriptome_annotation\': \'$VIASH_PAR_TRANSCRIPTOME_ANNOTATION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TRANSCRIPTOME_ANNOTATION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --transcriptome_annotation. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --transcriptome_annotation=*) - [ -n "$VIASH_PAR_TRANSCRIPTOME_ANNOTATION" ] && ViashError Bad arguments for option \'--transcriptome_annotation=*\': \'$VIASH_PAR_TRANSCRIPTOME_ANNOTATION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TRANSCRIPTOME_ANNOTATION=$(ViashRemoveFlags "$1") - shift 1 - ;; - -t) - [ -n "$VIASH_PAR_TRANSCRIPTOME_ANNOTATION" ] && ViashError Bad arguments for option \'-t\': \'$VIASH_PAR_TRANSCRIPTOME_ANNOTATION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TRANSCRIPTOME_ANNOTATION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -t. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --abseq_reference) - if [ -z "$VIASH_PAR_ABSEQ_REFERENCE" ]; then - VIASH_PAR_ABSEQ_REFERENCE="$2" - else - VIASH_PAR_ABSEQ_REFERENCE="$VIASH_PAR_ABSEQ_REFERENCE;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --abseq_reference. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --abseq_reference=*) - if [ -z "$VIASH_PAR_ABSEQ_REFERENCE" ]; then - VIASH_PAR_ABSEQ_REFERENCE=$(ViashRemoveFlags "$1") - else - VIASH_PAR_ABSEQ_REFERENCE="$VIASH_PAR_ABSEQ_REFERENCE;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - -a) - if [ -z "$VIASH_PAR_ABSEQ_REFERENCE" ]; then - VIASH_PAR_ABSEQ_REFERENCE="$2" - else - VIASH_PAR_ABSEQ_REFERENCE="$VIASH_PAR_ABSEQ_REFERENCE;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to -a. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --supplemental_reference) - if [ -z "$VIASH_PAR_SUPPLEMENTAL_REFERENCE" ]; then - VIASH_PAR_SUPPLEMENTAL_REFERENCE="$2" - else - VIASH_PAR_SUPPLEMENTAL_REFERENCE="$VIASH_PAR_SUPPLEMENTAL_REFERENCE;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --supplemental_reference. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --supplemental_reference=*) - if [ -z "$VIASH_PAR_SUPPLEMENTAL_REFERENCE" ]; then - VIASH_PAR_SUPPLEMENTAL_REFERENCE=$(ViashRemoveFlags "$1") - else - VIASH_PAR_SUPPLEMENTAL_REFERENCE="$VIASH_PAR_SUPPLEMENTAL_REFERENCE;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - -s) - if [ -z "$VIASH_PAR_SUPPLEMENTAL_REFERENCE" ]; then - VIASH_PAR_SUPPLEMENTAL_REFERENCE="$2" - else - VIASH_PAR_SUPPLEMENTAL_REFERENCE="$VIASH_PAR_SUPPLEMENTAL_REFERENCE;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to -s. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --sample_prefix) - [ -n "$VIASH_PAR_SAMPLE_PREFIX" ] && ViashError Bad arguments for option \'--sample_prefix\': \'$VIASH_PAR_SAMPLE_PREFIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SAMPLE_PREFIX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --sample_prefix. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --sample_prefix=*) - [ -n "$VIASH_PAR_SAMPLE_PREFIX" ] && ViashError Bad arguments for option \'--sample_prefix=*\': \'$VIASH_PAR_SAMPLE_PREFIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SAMPLE_PREFIX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --putative_cell_call) - [ -n "$VIASH_PAR_PUTATIVE_CELL_CALL" ] && ViashError Bad arguments for option \'--putative_cell_call\': \'$VIASH_PAR_PUTATIVE_CELL_CALL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_PUTATIVE_CELL_CALL="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --putative_cell_call. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --putative_cell_call=*) - [ -n "$VIASH_PAR_PUTATIVE_CELL_CALL" ] && ViashError Bad arguments for option \'--putative_cell_call=*\': \'$VIASH_PAR_PUTATIVE_CELL_CALL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_PUTATIVE_CELL_CALL=$(ViashRemoveFlags "$1") - shift 1 - ;; - --exact_cell_count) - [ -n "$VIASH_PAR_EXACT_CELL_COUNT" ] && ViashError Bad arguments for option \'--exact_cell_count\': \'$VIASH_PAR_EXACT_CELL_COUNT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EXACT_CELL_COUNT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --exact_cell_count. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --exact_cell_count=*) - [ -n "$VIASH_PAR_EXACT_CELL_COUNT" ] && ViashError Bad arguments for option \'--exact_cell_count=*\': \'$VIASH_PAR_EXACT_CELL_COUNT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EXACT_CELL_COUNT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --disable_putative_calling) - [ -n "$VIASH_PAR_DISABLE_PUTATIVE_CALLING" ] && ViashError Bad arguments for option \'--disable_putative_calling\': \'$VIASH_PAR_DISABLE_PUTATIVE_CALLING\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_DISABLE_PUTATIVE_CALLING=true - shift 1 - ;; - --subsample) - [ -n "$VIASH_PAR_SUBSAMPLE" ] && ViashError Bad arguments for option \'--subsample\': \'$VIASH_PAR_SUBSAMPLE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SUBSAMPLE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --subsample. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --subsample=*) - [ -n "$VIASH_PAR_SUBSAMPLE" ] && ViashError Bad arguments for option \'--subsample=*\': \'$VIASH_PAR_SUBSAMPLE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SUBSAMPLE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --subsample_seed) - [ -n "$VIASH_PAR_SUBSAMPLE_SEED" ] && ViashError Bad arguments for option \'--subsample_seed\': \'$VIASH_PAR_SUBSAMPLE_SEED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SUBSAMPLE_SEED="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --subsample_seed. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --subsample_seed=*) - [ -n "$VIASH_PAR_SUBSAMPLE_SEED" ] && ViashError Bad arguments for option \'--subsample_seed=*\': \'$VIASH_PAR_SUBSAMPLE_SEED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SUBSAMPLE_SEED=$(ViashRemoveFlags "$1") - shift 1 - ;; - --sample_tags_version) - [ -n "$VIASH_PAR_SAMPLE_TAGS_VERSION" ] && ViashError Bad arguments for option \'--sample_tags_version\': \'$VIASH_PAR_SAMPLE_TAGS_VERSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SAMPLE_TAGS_VERSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --sample_tags_version. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --sample_tags_version=*) - [ -n "$VIASH_PAR_SAMPLE_TAGS_VERSION" ] && ViashError Bad arguments for option \'--sample_tags_version=*\': \'$VIASH_PAR_SAMPLE_TAGS_VERSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SAMPLE_TAGS_VERSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --tag_names) - if [ -z "$VIASH_PAR_TAG_NAMES" ]; then - VIASH_PAR_TAG_NAMES="$2" - else - VIASH_PAR_TAG_NAMES="$VIASH_PAR_TAG_NAMES:""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --tag_names. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --tag_names=*) - if [ -z "$VIASH_PAR_TAG_NAMES" ]; then - VIASH_PAR_TAG_NAMES=$(ViashRemoveFlags "$1") - else - VIASH_PAR_TAG_NAMES="$VIASH_PAR_TAG_NAMES:"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --vdj_version) - [ -n "$VIASH_PAR_VDJ_VERSION" ] && ViashError Bad arguments for option \'--vdj_version\': \'$VIASH_PAR_VDJ_VERSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_VDJ_VERSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --vdj_version. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --vdj_version=*) - [ -n "$VIASH_PAR_VDJ_VERSION" ] && ViashError Bad arguments for option \'--vdj_version=*\': \'$VIASH_PAR_VDJ_VERSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_VDJ_VERSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --parallel) - [ -n "$VIASH_PAR_PARALLEL" ] && ViashError Bad arguments for option \'--parallel\': \'$VIASH_PAR_PARALLEL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_PARALLEL="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --parallel. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --parallel=*) - [ -n "$VIASH_PAR_PARALLEL" ] && ViashError Bad arguments for option \'--parallel=*\': \'$VIASH_PAR_PARALLEL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_PARALLEL=$(ViashRemoveFlags "$1") - shift 1 - ;; - --timestamps) - [ -n "$VIASH_PAR_TIMESTAMPS" ] && ViashError Bad arguments for option \'--timestamps\': \'$VIASH_PAR_TIMESTAMPS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TIMESTAMPS=true - shift 1 - ;; - --dryrun) - [ -n "$VIASH_PAR_DRYRUN" ] && ViashError Bad arguments for option \'--dryrun\': \'$VIASH_PAR_DRYRUN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_DRYRUN=true - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/mapping_bd_rhapsody:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/mapping_bd_rhapsody:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/mapping_bd_rhapsody:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/mapping_bd_rhapsody:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_MODE+x} ]; then - ViashError '--mode' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_REFERENCE+x} ]; then - ViashError '--reference' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_SAMPLE_PREFIX+x} ]; then - VIASH_PAR_SAMPLE_PREFIX="sample" -fi -if [ -z ${VIASH_PAR_DISABLE_PUTATIVE_CALLING+x} ]; then - VIASH_PAR_DISABLE_PUTATIVE_CALLING="false" -fi -if [ -z ${VIASH_PAR_PARALLEL+x} ]; then - VIASH_PAR_PARALLEL="true" -fi -if [ -z ${VIASH_PAR_TIMESTAMPS+x} ]; then - VIASH_PAR_TIMESTAMPS="false" -fi -if [ -z ${VIASH_PAR_DRYRUN+x} ]; then - VIASH_PAR_DRYRUN="false" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ]; then - IFS=';' - set -f - for file in $VIASH_PAR_INPUT; do - unset IFS - if [ ! -e "$file" ]; then - ViashError "Input file '$file' does not exist." - exit 1 - fi - done - set +f -fi -if [ ! -z "$VIASH_PAR_REFERENCE" ]; then - IFS=';' - set -f - for file in $VIASH_PAR_REFERENCE; do - unset IFS - if [ ! -e "$file" ]; then - ViashError "Input file '$file' does not exist." - exit 1 - fi - done - set +f -fi -if [ ! -z "$VIASH_PAR_TRANSCRIPTOME_ANNOTATION" ] && [ ! -e "$VIASH_PAR_TRANSCRIPTOME_ANNOTATION" ]; then - ViashError "Input file '$VIASH_PAR_TRANSCRIPTOME_ANNOTATION' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_ABSEQ_REFERENCE" ]; then - IFS=';' - set -f - for file in $VIASH_PAR_ABSEQ_REFERENCE; do - unset IFS - if [ ! -e "$file" ]; then - ViashError "Input file '$file' does not exist." - exit 1 - fi - done - set +f -fi -if [ ! -z "$VIASH_PAR_SUPPLEMENTAL_REFERENCE" ]; then - IFS=';' - set -f - for file in $VIASH_PAR_SUPPLEMENTAL_REFERENCE; do - unset IFS - if [ ! -e "$file" ]; then - ViashError "Input file '$file' does not exist." - exit 1 - fi - done - set +f -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_EXACT_CELL_COUNT" ]]; then - if ! [[ "$VIASH_PAR_EXACT_CELL_COUNT" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--exact_cell_count' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_DISABLE_PUTATIVE_CALLING" ]]; then - if ! [[ "$VIASH_PAR_DISABLE_PUTATIVE_CALLING" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--disable_putative_calling' has to be a boolean_true. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SUBSAMPLE" ]]; then - if ! [[ "$VIASH_PAR_SUBSAMPLE" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--subsample' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SUBSAMPLE_SEED" ]]; then - if ! [[ "$VIASH_PAR_SUBSAMPLE_SEED" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--subsample_seed' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_PARALLEL" ]]; then - if ! [[ "$VIASH_PAR_PARALLEL" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--parallel' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_TIMESTAMPS" ]]; then - if ! [[ "$VIASH_PAR_TIMESTAMPS" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--timestamps' has to be a boolean_true. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_DRYRUN" ]]; then - if ! [[ "$VIASH_PAR_DRYRUN" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--dryrun' has to be a boolean_true. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_MODE" ]; then - VIASH_PAR_MODE_CHOICES=("wta:targeted") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_MODE_CHOICES[*]}:" =~ ":$VIASH_PAR_MODE:" ]]; then - ViashError '--mode' specified value of \'$VIASH_PAR_MODE\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -if [ ! -z "$VIASH_PAR_PUTATIVE_CELL_CALL" ]; then - VIASH_PAR_PUTATIVE_CELL_CALL_CHOICES=("mRNA:AbSeq_Experimental") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_PUTATIVE_CELL_CALL_CHOICES[*]}:" =~ ":$VIASH_PAR_PUTATIVE_CELL_CALL:" ]]; then - ViashError '--putative_cell_call' specified value of \'$VIASH_PAR_PUTATIVE_CELL_CALL\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -if [ ! -z "$VIASH_PAR_SAMPLE_TAGS_VERSION" ]; then - VIASH_PAR_SAMPLE_TAGS_VERSION_CHOICES=("human:hs:mouse:mm") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_SAMPLE_TAGS_VERSION_CHOICES[*]}:" =~ ":$VIASH_PAR_SAMPLE_TAGS_VERSION:" ]]; then - ViashError '--sample_tags_version' specified value of \'$VIASH_PAR_SAMPLE_TAGS_VERSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -if [ ! -z "$VIASH_PAR_VDJ_VERSION" ]; then - VIASH_PAR_VDJ_VERSION_CHOICES=("human:mouse:humanBCR:humanBCR:humanTCR:mouseBCR") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_VDJ_VERSION_CHOICES[*]}:" =~ ":$VIASH_PAR_VDJ_VERSION:" ]]; then - ViashError '--vdj_version' specified value of \'$VIASH_PAR_VDJ_VERSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_TEST_INPUT=() - IFS=';' - for var in $VIASH_PAR_INPUT; do - unset IFS - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$var")" ) - var=$(ViashAutodetectMount "$var") - VIASH_TEST_INPUT+=( "$var" ) - done - VIASH_PAR_INPUT=$(IFS=';' ; echo "${VIASH_TEST_INPUT[*]}") -fi -if [ ! -z "$VIASH_PAR_REFERENCE" ]; then - VIASH_TEST_REFERENCE=() - IFS=';' - for var in $VIASH_PAR_REFERENCE; do - unset IFS - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$var")" ) - var=$(ViashAutodetectMount "$var") - VIASH_TEST_REFERENCE+=( "$var" ) - done - VIASH_PAR_REFERENCE=$(IFS=';' ; echo "${VIASH_TEST_REFERENCE[*]}") -fi -if [ ! -z "$VIASH_PAR_TRANSCRIPTOME_ANNOTATION" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_TRANSCRIPTOME_ANNOTATION")" ) - VIASH_PAR_TRANSCRIPTOME_ANNOTATION=$(ViashAutodetectMount "$VIASH_PAR_TRANSCRIPTOME_ANNOTATION") -fi -if [ ! -z "$VIASH_PAR_ABSEQ_REFERENCE" ]; then - VIASH_TEST_ABSEQ_REFERENCE=() - IFS=';' - for var in $VIASH_PAR_ABSEQ_REFERENCE; do - unset IFS - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$var")" ) - var=$(ViashAutodetectMount "$var") - VIASH_TEST_ABSEQ_REFERENCE+=( "$var" ) - done - VIASH_PAR_ABSEQ_REFERENCE=$(IFS=';' ; echo "${VIASH_TEST_ABSEQ_REFERENCE[*]}") -fi -if [ ! -z "$VIASH_PAR_SUPPLEMENTAL_REFERENCE" ]; then - VIASH_TEST_SUPPLEMENTAL_REFERENCE=() - IFS=';' - for var in $VIASH_PAR_SUPPLEMENTAL_REFERENCE; do - unset IFS - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$var")" ) - var=$(ViashAutodetectMount "$var") - VIASH_TEST_SUPPLEMENTAL_REFERENCE+=( "$var" ) - done - VIASH_PAR_SUPPLEMENTAL_REFERENCE=$(IFS=';' ; echo "${VIASH_TEST_SUPPLEMENTAL_REFERENCE[*]}") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/mapping_bd_rhapsody:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/mapping_bd_rhapsody:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/mapping_bd_rhapsody:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-bd_rhapsody-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -import os -import re -import subprocess -import tempfile -import sys -from typing import Any -import pandas as pd -import gzip -import shutil - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'mode': $( if [ ! -z ${VIASH_PAR_MODE+x} ]; then echo "r'${VIASH_PAR_MODE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'reference': $( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "r'${VIASH_PAR_REFERENCE//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'transcriptome_annotation': $( if [ ! -z ${VIASH_PAR_TRANSCRIPTOME_ANNOTATION+x} ]; then echo "r'${VIASH_PAR_TRANSCRIPTOME_ANNOTATION//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'abseq_reference': $( if [ ! -z ${VIASH_PAR_ABSEQ_REFERENCE+x} ]; then echo "r'${VIASH_PAR_ABSEQ_REFERENCE//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'supplemental_reference': $( if [ ! -z ${VIASH_PAR_SUPPLEMENTAL_REFERENCE+x} ]; then echo "r'${VIASH_PAR_SUPPLEMENTAL_REFERENCE//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'sample_prefix': $( if [ ! -z ${VIASH_PAR_SAMPLE_PREFIX+x} ]; then echo "r'${VIASH_PAR_SAMPLE_PREFIX//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'putative_cell_call': $( if [ ! -z ${VIASH_PAR_PUTATIVE_CELL_CALL+x} ]; then echo "r'${VIASH_PAR_PUTATIVE_CELL_CALL//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'exact_cell_count': $( if [ ! -z ${VIASH_PAR_EXACT_CELL_COUNT+x} ]; then echo "int(r'${VIASH_PAR_EXACT_CELL_COUNT//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'disable_putative_calling': $( if [ ! -z ${VIASH_PAR_DISABLE_PUTATIVE_CALLING+x} ]; then echo "r'${VIASH_PAR_DISABLE_PUTATIVE_CALLING//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), - 'subsample': $( if [ ! -z ${VIASH_PAR_SUBSAMPLE+x} ]; then echo "float(r'${VIASH_PAR_SUBSAMPLE//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'subsample_seed': $( if [ ! -z ${VIASH_PAR_SUBSAMPLE_SEED+x} ]; then echo "int(r'${VIASH_PAR_SUBSAMPLE_SEED//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'sample_tags_version': $( if [ ! -z ${VIASH_PAR_SAMPLE_TAGS_VERSION+x} ]; then echo "r'${VIASH_PAR_SAMPLE_TAGS_VERSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'tag_names': $( if [ ! -z ${VIASH_PAR_TAG_NAMES+x} ]; then echo "r'${VIASH_PAR_TAG_NAMES//\'/\'\"\'\"r\'}'.split(':')"; else echo None; fi ), - 'vdj_version': $( if [ ! -z ${VIASH_PAR_VDJ_VERSION+x} ]; then echo "r'${VIASH_PAR_VDJ_VERSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'parallel': $( if [ ! -z ${VIASH_PAR_PARALLEL+x} ]; then echo "r'${VIASH_PAR_PARALLEL//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), - 'timestamps': $( if [ ! -z ${VIASH_PAR_TIMESTAMPS+x} ]; then echo "r'${VIASH_PAR_TIMESTAMPS//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), - 'dryrun': $( if [ ! -z ${VIASH_PAR_DRYRUN+x} ]; then echo "r'${VIASH_PAR_DRYRUN//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -## VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -def is_gz_file(filepath): - with open(filepath, 'rb') as test_f: - return test_f.read(2) == b'\\x1f\\x8b' - -def strip_margin(text: str) -> str: - return re.sub('(\\n?)[ \\t]*\\|', '\\\\1', text) - -def process_params(par: dict[str, Any]) -> str: - # check input parameters - assert par["input"] is not None, "Pass at least one set of inputs to --input." - if par["mode"] == "wta": - assert len(par["reference"]) == 1, "When mode is \\"wta\\", --reference should be length 1" - assert par["transcriptome_annotation"] is not None, "When mode is \\"wta\\", --transcriptome_annotation should be defined" - elif par["mode"] == "targeted": - assert par["transcriptome_annotation"] is None, "When mode is \\"targeted\\", --transcriptome_annotation should be undefined" - assert par["supplemental_reference"] is None, "When mode is \\"targeted\\", --supplemental_reference should be undefined" - - # checking sample prefix - if re.match("[^A-Za-z0-9]", par["sample_prefix"]): - logger.warning("--sample_prefix should only consist of letters, numbers or hyphens. Replacing all '[^A-Za-z0-9]' with '-'.") - par["sample_prefix"] = re.sub("[^A-Za-z0-9\\\\-]", "-", par["sample_prefix"]) - - # if par_input is a directory, look for fastq files - if len(par["input"]) == 1 and os.path.isdir(par["input"][0]): - par["input"] = [ os.path.join(dp, f) for dp, dn, filenames in os.walk(par["input"]) for f in filenames if re.match(r'.*\\.fastq.gz', f) ] - - # use absolute paths - par["input"] = [ os.path.abspath(f) for f in par["input"] ] - if par["reference"]: - par["reference"] = [ os.path.abspath(f) for f in par["reference"] ] - if par["transcriptome_annotation"]: - par["transcriptome_annotation"] = os.path.abspath(par["transcriptome_annotation"]) - if par["abseq_reference"]: - par["abseq_reference"] = [ os.path.abspath(f) for f in par["abseq_reference"] ] - if par["supplemental_reference"]: - par["supplemental_reference"] = [ os.path.abspath(f) for f in par["supplemental_reference"] ] - par["output"] = os.path.abspath(par["output"]) - - return par - -def generate_config(par: dict[str, Any]) -> str: - content_list = [strip_margin(f"""\\ - |#!/usr/bin/env cwl-runner - | - |cwl:tool: rhapsody - | - |# This is a YML file used to specify the inputs for a BD Genomics {"WTA" if par["mode"] == "wta" else "Targeted" } Rhapsody Analysis pipeline run. See the - |# BD Genomics Analysis Setup User Guide (Doc ID: 47383) for more details. - | - |## Reads (required) - Path to your read files in the FASTQ.GZ format. You may specify as many R1/R2 read pairs as you want. - |Reads: - |""")] - - for file in par["input"]: - content_list.append(strip_margin(f"""\\ - | - class: File - | location: "{file}" - |""")) - - if par["reference"] and par["mode"] == "wta": - content_list.append(strip_margin(f"""\\ - | - |## Reference_Genome (required) - Path to STAR index for tar.gz format. See Doc ID: 47383 for instructions to obtain pre-built STAR index file. - |Reference_Genome: - | class: File - | location: "{par["reference"][0]}" - |""")) - - if par["reference"] and par["mode"] == "targeted": - content_list.append(strip_margin(f"""\\ - | - |## Reference (optional) - Path to mRNA reference file for pre-designed, supplemental, or custom panel, in FASTA format. - |Reference: - |""")) - for file in par["reference"]: - content_list.append(strip_margin(f"""\\ - | - class: File - | location: {file} - |""")) - - if par["transcriptome_annotation"]: - content_list.append(strip_margin(f"""\\ - | - |## Transcriptome_Annotation (required) - Path to GTF annotation file - |Transcriptome_Annotation: - | class: File - | location: "{par["transcriptome_annotation"]}" - |""")) - - if par["abseq_reference"]: - content_list.append(strip_margin(f"""\\ - | - |## AbSeq_Reference (optional) - Path to the AbSeq reference file in FASTA format. Only needed if BD AbSeq Ab-Oligos are used. - |AbSeq_Reference: - |""")) - for file in par["abseq_reference"]: - content_list.append(strip_margin(f"""\\ - | - class: File - | location: {file} - |""")) - - if par["supplemental_reference"]: - content_list.append(strip_margin(f"""\\ - | - |## Supplemental_Reference (optional) - Path to the supplemental reference file in FASTA format. Only needed if there are additional transgene sequences used in the experiment. - |Supplemental_Reference: - |""")) - for file in par["supplemental_reference"]: - content_list.append(strip_margin(f"""\\ - | - class: File - | location: {file} - |""")) - - ## Putative Cell Calling Settings - content_list.append(strip_margin(f"""\\ - | - |#################################### - |## Putative Cell Calling Settings ## - |#################################### - |""")) - - if par["putative_cell_call"]: - content_list.append(strip_margin(f"""\\ - |## Putative cell calling dataset (optional) - Specify the dataset to be used for putative cell calling: mRNA or AbSeq_Experimental. - |## For putative cell calling using an AbSeq dataset, please provide an AbSeq_Reference fasta file above. - |## By default, the mRNA data will be used for putative cell calling. - |Putative_Cell_Call: {par["putative_cell_call"]} - |""")) - - if par["exact_cell_count"]: - content_list.append(strip_margin(f"""\\ - |## Exact cell count (optional) - Set a specific number (>=1) of cells as putative, based on those with the highest error-corrected read count - |Exact_Cell_Count: {par["exact_cell_count"]} - |""")) - - if par["disable_putative_calling"]: - content_list.append(strip_margin(f"""\\ - |## Disable Refined Putative Cell Calling (optional) - Determine putative cells using only the basic algorithm (minimum second derivative along the cumulative reads curve). The refined algorithm attempts to remove false positives and recover false negatives, but may not be ideal for certain complex mixtures of cell types. Does not apply if Exact Cell Count is set. - |## The values can be true or false. By default, the refined algorithm is used. - |Basic_Algo_Only: {str(par["disable_putative_calling"]).lower()} - |""")) - - ## Subsample Settings - content_list.append(strip_margin(f"""\\ - | - |######################## - |## Subsample Settings ## - |######################## - |""" - )) - - if par["subsample"]: - content_list.append(strip_margin(f"""\\ - |## Subsample (optional) - A number >1 or fraction (0 < n < 1) to indicate the number or percentage of reads to subsample. - |Subsample: {par["subsample"]} - |""")) - - if par["subsample_seed"]: - content_list.append(strip_margin(f"""\\ - |## Subsample seed (optional) - A seed for replicating a previous subsampled run. - |Subsample_seed: {par["subsample_seed"]} - |""")) - - - ## Multiplex options - content_list.append(strip_margin(f"""\\ - | - |####################### - |## Multiplex options ## - |####################### - |""" - )) - - if par["sample_tags_version"]: - content_list.append(strip_margin(f"""\\ - |## Sample Tags Version (optional) - Specify if multiplexed run: human, hs, mouse or mm - |Sample_Tags_Version: {par["sample_tags_version"]} - |""")) - - if par["tag_names"]: - content_list.append(strip_margin(f"""\\ - |## Tag_Names (optional) - Specify the tag number followed by '-' and the desired sample name to appear in Sample_Tag_Metrics.csv - |# Do not use the special characters: &, (), [], {{}}, <>, ?, | - |Tag_Names: [{', '.join(par["tag_names"])}] - |""")) - - ## VDJ options - content_list.append(strip_margin(f"""\\ - | - |################# - |## VDJ options ## - |################# - |""" - )) - - if par["vdj_version"]: - content_list.append(strip_margin(f"""\\ - |## VDJ Version (optional) - Specify if VDJ run: human, mouse, humanBCR, humanTCR, mouseBCR, mouseTCR - |VDJ_Version: {par["vdj_version"]} - |""")) - - ## VDJ options - content_list.append(strip_margin(f"""\\ - | - |######################## - |## Additional Options ## - |######################## - |""" - )) - - if par["sample_prefix"]: - content_list.append(strip_margin(f"""\\ - |## Run Name (optional) - Specify a run name to use as the output file base name. Use only letters, numbers, or hyphens. Do not use special characters or spaces. - |Run_Name: {par["sample_prefix"]} - |""")) - - ## Write config to file - return ''.join(content_list) - -def generate_cwl_file(par: dict[str, Any], meta: dict[str, Any]) -> str: - # create cwl file (if need be) - if par["mode"] == "wta": - orig_cwl_file=os.path.join(meta["resources_dir"], "rhapsody_wta_1.10.1_nodocker.cwl") - elif par["mode"] == "targeted": - orig_cwl_file=os.path.join(meta["resources_dir"], "rhapsody_targeted_1.10.1_nodocker.cwl") - - # Inject computational requirements into pipeline - if meta["memory_mb"] or meta["cpus"]: - cwl_file = os.path.join(par["output"], "pipeline.cwl") - - # Read in the file - with open(orig_cwl_file, 'r') as file : - cwl_data = file.read() - - # Inject computational requirements into pipeline - if meta["memory_mb"]: - memory = int(meta["memory_mb"]) - 2000 # keep 2gb for OS - cwl_data = re.sub('"ramMin": [^\\n]*,\\n', f'"ramMin": {memory},\\n', cwl_data) - if meta["cpus"]: - cwl_data = re.sub('"coresMin": [^\\n]*,\\n', f'"coresMin": {meta["cpus"]},\\n', cwl_data) - - # Write the file out again - with open(cwl_file, 'w') as file: - file.write(cwl_data) - else: - cwl_file = orig_cwl_file - - return cwl_file - -def process_fasta(feature_type: str, path: str) -> pd.DataFrame: - with open(path) as f: - df = pd.DataFrame(data={ - 'feature_type': feature_type, - 'feature_id': [line[1:].strip() for line in f if line[0] == ">"], - 'reference_file': os.path.basename(path), - }) - return df - -def process_gtf(feature_type: str, path: str) -> pd.DataFrame: - with open(path) as f: - data = [] - for line in f: - if not line.startswith("#"): - attr = dict(item.strip().split(' ') for item in line.split('\\t')[8].strip('\\n').split(';') if item) - row = { - 'feature_types': feature_type, - 'feature_ids': attr["gene_name"].strip("\\""), - 'reference_file': os.path.basename(path), - } - data.append(row) - df = pd.DataFrame(data) - df = df.drop_duplicates() - return df - -def extract_feature_types(par: dict[str, Any]): - feature_types = [] - - if par["mode"] == "targeted": - for file in par["reference"]: - logger.info(f"Processing reference fasta {file}") - feature_types.append(process_fasta("Gene Expression", file)) - - if par["mode"] == "wta": - file = par["transcriptome_annotation"] - logger.info(f"Processing reference gtf {file}") - feature_types.append(process_gtf("Gene Expression", file)) - - if par["abseq_reference"]: - for file in par["abseq_reference"]: - logger.info(f"Processing abseq fasta {file}") - feature_types.append(process_fasta("Antibody Capture", file)) - - if par["supplemental_reference"]: - for file in par["supplemental_reference"]: - logger.info(f"Processing supp fasta {file}") - feature_types.append(process_fasta("Other", file)) - - return pd.concat(feature_types) - -def main(par: dict[str, Any], meta: dict[str, Any]): - # Preprocess params - par = process_params(par) - - # Create output dir if not exists - if not os.path.exists(par["output"]): - os.makedirs(par["output"]) - - ## Process parameters - proc_pars = ["--no-container", "--outdir", par["output"]] - - if par["parallel"]: - proc_pars.append("--parallel") - - if par["timestamps"]: - proc_pars.append("--timestamps") - - with tempfile.TemporaryDirectory(prefix="cwl-bd_rhapsody_wta-", dir=meta["temp_dir"]) as temp_dir: - # extract transcriptome gtf if need be - if par["transcriptome_annotation"] and is_gz_file(par["transcriptome_annotation"]): - with open(os.path.join(temp_dir, "transcriptome.gtf"), 'wb') as genes_uncompressed: - with gzip.open(par["transcriptome_annotation"], 'rb') as genes_compressed: - shutil.copyfileobj(genes_compressed, genes_uncompressed) - par["transcriptome_annotation"] = genes_uncompressed.name - - # Create params file - config_file = os.path.join(par["output"], "config.yml") - config_content = generate_config(par) - with open(config_file, "w") as f: - f.write(config_content) - - # Create cwl file (if need be) - cwl_file = generate_cwl_file(par, meta) - - ## Run pipeline - if not par["dryrun"]: - cmd = ["cwl-runner"] + proc_pars + [cwl_file, os.path.basename(config_file)] - - env = dict(os.environ) - env["TMPDIR"] = temp_dir - - logger.info("> " + ' '.join(cmd)) - _ = subprocess.check_call( - cmd, - cwd=os.path.dirname(config_file), - env=env - ) - - # extracting feature ids from references - # extract info from reference files (while they still exist) - feature_df = extract_feature_types(par) - feature_types_file = os.path.join(par["output"], "feature_types.tsv") - feature_df.to_csv(feature_types_file, sep="\\t", index=False) - - - if not par["dryrun"]: - # look for counts file - if not par["sample_prefix"]: - par["sample_prefix"] = "sample" - counts_filename = par["sample_prefix"] + "_RSEC_MolsPerCell.csv" - - if par["sample_tags_version"]: - counts_filename = "Combined_" + counts_filename - counts_file = os.path.join(par["output"], counts_filename) - - if not os.path.exists(counts_file): - raise ValueError(f"Could not find output counts file '{counts_filename}'") - - # look for metrics file - metrics_filename = par["sample_prefix"] + "_Metrics_Summary.csv" - metrics_file = os.path.join(par["output"], metrics_filename) - if not os.path.exists(metrics_file): - raise ValueError(f"Could not find output metrics file '{metrics_filename}'") - -if __name__ == "__main__": - main(par, meta) -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - unset VIASH_TEST_INPUT - IFS=';' - for var in $VIASH_PAR_INPUT; do - unset IFS - if [ -z "$VIASH_TEST_INPUT" ]; then - VIASH_TEST_INPUT="$(ViashStripAutomount "$var")" - else - VIASH_TEST_INPUT="$VIASH_TEST_INPUT;""$(ViashStripAutomount "$var")" - fi - done - VIASH_PAR_INPUT="$VIASH_TEST_INPUT" -fi -if [ ! -z "$VIASH_PAR_REFERENCE" ]; then - unset VIASH_TEST_REFERENCE - IFS=';' - for var in $VIASH_PAR_REFERENCE; do - unset IFS - if [ -z "$VIASH_TEST_REFERENCE" ]; then - VIASH_TEST_REFERENCE="$(ViashStripAutomount "$var")" - else - VIASH_TEST_REFERENCE="$VIASH_TEST_REFERENCE;""$(ViashStripAutomount "$var")" - fi - done - VIASH_PAR_REFERENCE="$VIASH_TEST_REFERENCE" -fi -if [ ! -z "$VIASH_PAR_TRANSCRIPTOME_ANNOTATION" ]; then - VIASH_PAR_TRANSCRIPTOME_ANNOTATION=$(ViashStripAutomount "$VIASH_PAR_TRANSCRIPTOME_ANNOTATION") -fi -if [ ! -z "$VIASH_PAR_ABSEQ_REFERENCE" ]; then - unset VIASH_TEST_ABSEQ_REFERENCE - IFS=';' - for var in $VIASH_PAR_ABSEQ_REFERENCE; do - unset IFS - if [ -z "$VIASH_TEST_ABSEQ_REFERENCE" ]; then - VIASH_TEST_ABSEQ_REFERENCE="$(ViashStripAutomount "$var")" - else - VIASH_TEST_ABSEQ_REFERENCE="$VIASH_TEST_ABSEQ_REFERENCE;""$(ViashStripAutomount "$var")" - fi - done - VIASH_PAR_ABSEQ_REFERENCE="$VIASH_TEST_ABSEQ_REFERENCE" -fi -if [ ! -z "$VIASH_PAR_SUPPLEMENTAL_REFERENCE" ]; then - unset VIASH_TEST_SUPPLEMENTAL_REFERENCE - IFS=';' - for var in $VIASH_PAR_SUPPLEMENTAL_REFERENCE; do - unset IFS - if [ -z "$VIASH_TEST_SUPPLEMENTAL_REFERENCE" ]; then - VIASH_TEST_SUPPLEMENTAL_REFERENCE="$(ViashStripAutomount "$var")" - else - VIASH_TEST_SUPPLEMENTAL_REFERENCE="$VIASH_TEST_SUPPLEMENTAL_REFERENCE;""$(ViashStripAutomount "$var")" - fi - done - VIASH_PAR_SUPPLEMENTAL_REFERENCE="$VIASH_TEST_SUPPLEMENTAL_REFERENCE" -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/mapping/bd_rhapsody/rhapsody_targeted_1.10.1_nodocker.cwl b/target/docker/mapping/bd_rhapsody/rhapsody_targeted_1.10.1_nodocker.cwl deleted file mode 100755 index 56a6310bc07..00000000000 --- a/target/docker/mapping/bd_rhapsody/rhapsody_targeted_1.10.1_nodocker.cwl +++ /dev/null @@ -1,5159 +0,0 @@ -#!/usr/bin/env cwl-runner -{ - "cwlVersion": "v1.0", - "$graph": [ - { - "inputs": [ - { - "inputBinding": { - "prefix": "--annot-r1", - "itemSeparator": "," - }, - "type": { - "items": "File", - "type": "array" - }, - "id": "#AddtoBam.cwl/Annotation_R1" - }, - { - "inputBinding": { - "prefix": "--cell-order" - }, - "type": "File", - "id": "#AddtoBam.cwl/Cell_Order" - }, - { - "inputBinding": { - "prefix": "--annot-mol-file" - }, - "type": "File", - "id": "#AddtoBam.cwl/Molecular_Annotation" - }, - { - "inputBinding": { - "prefix": "--r2-bam" - }, - "type": "File", - "id": "#AddtoBam.cwl/R2_Bam" - }, - { - "inputBinding": { - "prefix": "--run-metadata" - }, - "type": "File", - "id": "#AddtoBam.cwl/Run_Metadata" - }, - { - "inputBinding": { - "prefix": "--tag-calls" - }, - "type": [ - "null", - "File" - ], - "id": "#AddtoBam.cwl/Tag_Calls" - }, - { - "inputBinding": { - "prefix": "--target-gene-mapping" - }, - "type": [ - "null", - "File" - ], - "id": "#AddtoBam.cwl/Target_Gene_Mapping" - } - ], - "requirements": [ - ], - "outputs": [ - { - "outputBinding": { - "glob": "Annotated_mapping_R2.BAM" - }, - "type": "File", - "id": "#AddtoBam.cwl/Annotated_Bam" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#AddtoBam.cwl/output" - } - ], - "baseCommand": [ - "mist_add_to_bam.py" - ], - "class": "CommandLineTool", - "id": "#AddtoBam.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "prefix": "--extra-seqs" - }, - "type": [ - "null", - "File" - ], - "id": "#AlignR2.cwl/Extra_Seqs" - }, - { - "inputBinding": { - "prefix": "--index" - }, - "type": "File", - "id": "#AlignR2.cwl/Index" - }, - { - "inputBinding": { - "prefix": "--r2-fastqs", - "itemSeparator": "," - }, - "type": { - "items": "File", - "type": "array" - }, - "id": "#AlignR2.cwl/R2" - }, - { - "inputBinding": { - "prefix": "--run-metadata" - }, - "type": "File", - "id": "#AlignR2.cwl/Run_Metadata" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - }, - { - "envDef": [ - { - "envName": "CORES_ALLOCATED_PER_CWL_PROCESS", - "envValue": "$(String(runtime.cores))" - } - ], - "class": "EnvVarRequirement" - } - ], - "outputs": [ - { - "outputBinding": { - "glob": "*zip" - }, - "type": { - "items": "File", - "type": "array" - }, - "id": "#AlignR2.cwl/Alignments" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#AlignR2.cwl/output" - } - ], - "baseCommand": [ - "mist_align_R2.py" - ], - "class": "CommandLineTool", - "id": "#AlignR2.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "prefix": "--umi-option" - }, - "type": [ - "null", - "int" - ], - "id": "#AnnotateMolecules.cwl/AbSeq_UMI" - }, - { - "inputBinding": { - "prefix": "--run-metadata" - }, - "type": "File", - "id": "#AnnotateMolecules.cwl/Run_Metadata" - }, - { - "inputBinding": { - "prefix": "--use-dbec" - }, - "type": [ - "null", - "boolean" - ], - "id": "#AnnotateMolecules.cwl/Use_DBEC" - }, - { - "inputBinding": { - "prefix": "--valid-annot" - }, - "type": "File", - "id": "#AnnotateMolecules.cwl/Valids" - } - ], - "requirements": [ - ], - "outputs": [ - { - "outputBinding": { - "glob": "*_GeneStatus.csv.*" - }, - "type": "File", - "id": "#AnnotateMolecules.cwl/Gene_Status_List" - }, - { - "outputBinding": { - "glob": "stats.json", - "loadContents": true, - "outputEval": "$(JSON.parse(self[0].contents).max_count)\n" - }, - "type": "int", - "id": "#AnnotateMolecules.cwl/Max_Count" - }, - { - "outputBinding": { - "glob": "*_Annotation_Molecule.csv.*" - }, - "type": "File", - "id": "#AnnotateMolecules.cwl/Mol_Annot_List" - }, - { - "outputBinding": { - "glob": "stats.json", - "loadContents": true, - "outputEval": "$(JSON.parse(self[0].contents).total_molecules)\n" - }, - "type": "int", - "id": "#AnnotateMolecules.cwl/Total_Molecules" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#AnnotateMolecules.cwl/output" - } - ], - "baseCommand": [ - "mist_annotate_molecules.py" - ], - "class": "CommandLineTool", - "id": "#AnnotateMolecules.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "prefix": "--filter-metrics", - "itemSeparator": "," - }, - "type": [ - { - "items": [ - "null", - "File" - ], - "type": "array" - } - ], - "id": "#AnnotateR1.cwl/Filter_Metrics" - }, - { - "inputBinding": { - "prefix": "--R1" - }, - "type": "File", - "id": "#AnnotateR1.cwl/R1" - }, - { - "inputBinding": { - "prefix": "--run-metadata" - }, - "type": "File", - "id": "#AnnotateR1.cwl/Run_Metadata" - } - ], - "requirements": [ - { - "ramMin": 2000, - "class": "ResourceRequirement" - } - ], - "outputs": [ - { - "outputBinding": { - "glob": "*_Annotation_R1.csv.gz" - }, - "type": "File", - "id": "#AnnotateR1.cwl/Annotation_R1" - }, - { - "outputBinding": { - "glob": "*_R1_error_count_table.npy" - }, - "type": "File", - "id": "#AnnotateR1.cwl/R1_error_count_table" - }, - { - "outputBinding": { - "glob": "*_R1_read_count_breakdown.json" - }, - "type": "File", - "id": "#AnnotateR1.cwl/R1_read_count_breakdown" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#AnnotateR1.cwl/output" - } - ], - "baseCommand": [ - "mist_annotate_R1.py" - ], - "class": "CommandLineTool", - "id": "#AnnotateR1.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "prefix": "--extra-seqs" - }, - "type": [ - "null", - "File" - ], - "id": "#AnnotateR2.cwl/Extra_Seqs" - }, - { - "inputBinding": { - "prefix": "--gtf" - }, - "type": [ - "null", - "File" - ], - "id": "#AnnotateR2.cwl/GTF_Annotation" - }, - { - "inputBinding": { - "prefix": "--R2-zip" - }, - "type": "File", - "id": "#AnnotateR2.cwl/R2_zip" - }, - { - "inputBinding": { - "prefix": "--run-metadata" - }, - "type": "File", - "id": "#AnnotateR2.cwl/Run_Metadata" - }, - { - "inputBinding": { - "prefix": "--transcript-length" - }, - "type": [ - "null", - "File" - ], - "id": "#AnnotateR2.cwl/Transcript_Length" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "outputs": [ - { - "outputBinding": { - "glob": "*Annotation_R2.csv.gz" - }, - "type": "File", - "id": "#AnnotateR2.cwl/Annot_R2" - }, - { - "outputBinding": { - "glob": "*-annot.gtf" - }, - "type": [ - "null", - "File" - ], - "id": "#AnnotateR2.cwl/GTF" - }, - { - "outputBinding": { - "glob": "*mapping_R2.BAM" - }, - "type": "File", - "id": "#AnnotateR2.cwl/R2_Bam" - }, - { - "outputBinding": { - "glob": "*_picard_quality_metrics.csv.gz" - }, - "type": "File", - "id": "#AnnotateR2.cwl/R2_Quality_Metrics" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#AnnotateR2.cwl/output" - } - ], - "baseCommand": [ - "mist_annotate_R2.py" - ], - "class": "CommandLineTool", - "id": "#AnnotateR2.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "prefix": "--umi-option" - }, - "type": [ - "null", - "int" - ], - "id": "#AnnotateReads.cwl/AbSeq_UMI" - }, - { - "inputBinding": { - "prefix": "--extra-seqs", - "itemSeparator": "," - }, - "type": [ - "null", - "File" - ], - "id": "#AnnotateReads.cwl/Extra_Seqs" - }, - { - "type": { - "items": [ - "null", - "File" - ], - "type": "array" - }, - "id": "#AnnotateReads.cwl/Filter_Metrics" - }, - { - "inputBinding": { - "prefix": "--putative-cell-call" - }, - "type": [ - "null", - "int" - ], - "id": "#AnnotateReads.cwl/Putative_Cell_Call" - }, - { - "type": { - "items": "File", - "type": "array" - }, - "id": "#AnnotateReads.cwl/R1_Annotation" - }, - { - "type": { - "items": "File", - "type": "array" - }, - "id": "#AnnotateReads.cwl/R1_error_count_table" - }, - { - "type": { - "items": "File", - "type": "array" - }, - "id": "#AnnotateReads.cwl/R1_read_count_breakdown" - }, - { - "type": { - "items": "File", - "type": "array" - }, - "id": "#AnnotateReads.cwl/R2_Annotation" - }, - { - "type": { - "items": "File", - "type": "array" - }, - "id": "#AnnotateReads.cwl/R2_Quality_Metrics" - }, - { - "inputBinding": { - "prefix": "--run-metadata" - }, - "type": "File", - "id": "#AnnotateReads.cwl/Run_Metadata" - }, - { - "inputBinding": { - "prefix": "--target-gene-mapping" - }, - "type": [ - "null", - "File" - ], - "id": "#AnnotateReads.cwl/Target_Gene_Mapping" - } - ], - "requirements": [ - { - "class": "InitialWorkDirRequirement", - "listing": [ - { - "writable": false, - "entry": "${\n function getPaths(inputs, attribute) {\n var fp_arr = []\n for (var i = 0; i < inputs[attribute].length; i++)\n {\n fp_arr.push(inputs[attribute][i].path);\n }\n return fp_arr;\n }\n var paths = {}\n paths['annotR1'] = getPaths(inputs, 'R1_Annotation')\n paths['R1_error_count_table'] = getPaths(inputs, 'R1_error_count_table')\n paths['R1_read_count_breakdown'] = getPaths(inputs, 'R1_read_count_breakdown')\n paths['annotR2'] = getPaths(inputs, 'R2_Annotation')\n paths['r2_quality_metrics_fps'] = getPaths(inputs, 'R2_Quality_Metrics')\n if(inputs.Filter_Metrics[0] != null){\n paths['filtering_stat_files'] = getPaths(inputs, 'Filter_Metrics')\n }\n var paths_json = JSON.stringify(paths);\n return paths_json;\n}", - "entryname": "manifest.json" - } - ] - }, - { - "class": "InlineJavascriptRequirement" - }, - { - "envDef": [ - { - "envName": "CORES_ALLOCATED_PER_CWL_PROCESS", - "envValue": "4" - } - ], - "class": "EnvVarRequirement" - } - ], - "outputs": [ - { - "outputBinding": { - "glob": "*_Annotation_Read.csv.gz" - }, - "type": [ - "null", - "File" - ], - "id": "#AnnotateReads.cwl/Annotation_Read" - }, - { - "outputBinding": { - "glob": "*read1_error_rate_archive*" - }, - "type": "File", - "id": "#AnnotateReads.cwl/Read1_error_rate" - }, - { - "outputBinding": { - "glob": "*_SeqMetrics.csv.gz" - }, - "type": "File", - "id": "#AnnotateReads.cwl/Seq_Metrics" - }, - { - "outputBinding": { - "glob": "*Sorted_Valid_Reads.csv.*" - }, - "type": { - "items": "File", - "type": "array" - }, - "id": "#AnnotateReads.cwl/Valid_Reads" - }, - { - "outputBinding": { - "glob": "num_vdj_reads.json", - "loadContents": true, - "outputEval": "${ if (!self[0]) { return 0; } return parseInt(JSON.parse(self[0].contents).BCR); }" - }, - "type": "int", - "id": "#AnnotateReads.cwl/num_valid_ig_reads" - }, - { - "outputBinding": { - "glob": "num_vdj_reads.json", - "loadContents": true, - "outputEval": "${ if (!self[0]) { return 0; } return parseInt(JSON.parse(self[0].contents).TCR); }" - }, - "type": "int", - "id": "#AnnotateReads.cwl/num_valid_tcr_reads" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#AnnotateReads.cwl/output" - }, - { - "outputBinding": { - "glob": "*_VDJ_IG_Valid_Reads.fastq.gz" - }, - "type": [ - "null", - "File" - ], - "id": "#AnnotateReads.cwl/validIgReads" - }, - { - "outputBinding": { - "glob": "*_VDJ_TCR_Valid_Reads.fastq.gz" - }, - "type": [ - "null", - "File" - ], - "id": "#AnnotateReads.cwl/validTcrReads" - } - ], - "baseCommand": [ - "mist_annotate_reads.py" - ], - "class": "CommandLineTool", - "id": "#AnnotateReads.cwl" - }, - { - "inputs": [ - { - "type": { - "items": "File", - "type": "array" - }, - "id": "#BundleLogs.cwl/log_files" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - }, - { - "class": "MultipleInputFeatureRequirement" - } - ], - "outputs": [ - { - "type": "Directory", - "id": "#BundleLogs.cwl/logs_dir" - } - ], - "class": "ExpressionTool", - "expression": "${\n /* shamelly cribbed from https://gist.github.com/jcxplorer/823878 */\n function uuid() {\n var uuid = \"\", i, random;\n for (i = 0; i < 32; i++) {\n random = Math.random() * 16 | 0;\n if (i == 8 || i == 12 || i == 16 || i == 20) {\n uuid += \"-\";\n }\n uuid += (i == 12 ? 4 : (i == 16 ? (random & 3 | 8) : random)).toString(16);\n }\n return uuid;\n }\n var listing = [];\n for (var i = 0; i < inputs.log_files.length; i++) {\n var log_file = inputs.log_files[i];\n log_file.basename = uuid() + \"-\" + log_file.basename;\n listing.push(log_file);\n }\n return ({\n logs_dir: {\n class: \"Directory\",\n basename: \"Logs\",\n listing: listing\n }\n });\n}", - "id": "#BundleLogs.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "position": 0 - }, - "type": [ - "null", - "File" - ], - "id": "#Cell_Classifier.cwl/molsPerCellMatrix" - } - ], - "requirements": [ - ], - "outputs": [ - { - "outputBinding": { - "glob": "*cell_type_experimental.csv" - }, - "type": [ - "null", - "File" - ], - "id": "#Cell_Classifier.cwl/cellTypePredictions" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#Cell_Classifier.cwl/log" - } - ], - "baseCommand": [ - "mist_cell_classifier.py" - ], - "class": "CommandLineTool", - "id": "#Cell_Classifier.cwl" - }, - { - "inputs": [ - { - "doc": "The minimum size (megabytes) of a file that should get split into chunks of a size designated in NumRecordsPerSplit\n", - "inputBinding": { - "prefix": "--min-split-size" - }, - "type": [ - "null", - "int" - ], - "id": "#CheckFastqs.cwl/MinChunkSize" - }, - { - "inputBinding": { - "prefix": "--reads", - "itemSeparator": "," - }, - "type": { - "items": "File", - "type": "array" - }, - "id": "#CheckFastqs.cwl/Reads" - }, - { - "inputBinding": { - "prefix": "--subsample" - }, - "type": [ - "null", - "float" - ], - "id": "#CheckFastqs.cwl/Subsample" - }, - { - "inputBinding": { - "prefix": "--subsample-seed" - }, - "type": [ - "null", - "int" - ], - "id": "#CheckFastqs.cwl/Subsample_Seed" - }, - { - "inputBinding": { - "prefix": "--subsample-seed" - }, - "type": [ - "null", - "int" - ], - "id": "#CheckFastqs.cwl/UserInputSubsampleSeed" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "doc": "CheckFastqs does several quality control routines including: (1) ensuring that read pair file names are formatted correctly and contain a read pair mate; (2) disambiguating the \"Subsample Reads\" input and; (3) if not provided, generating a subsampling seed that the downstream instances can use.\n", - "baseCommand": [ - "mist_check_fastqs.py" - ], - "id": "#CheckFastqs.cwl", - "outputs": [ - { - "outputBinding": { - "glob": "bead_version.json", - "loadContents": true, - "outputEval": "$(JSON.parse(self[0].contents).BeadVersion)\n" - }, - "type": { - "items": { - "fields": [ - { - "type": "string", - "name": "#CheckFastqs.cwl/Bead_Version/Library" - }, - { - "type": "string", - "name": "#CheckFastqs.cwl/Bead_Version/bead_version" - } - ], - "type": "record" - }, - "type": "array" - }, - "id": "#CheckFastqs.cwl/Bead_Version" - }, - { - "outputBinding": { - "glob": "fastq_read_pairs.json", - "loadContents": true, - "outputEval": "$(JSON.parse(self[0].contents).fastq_read_pairs)\n" - }, - "type": { - "items": { - "fields": [ - { - "type": "string", - "name": "#CheckFastqs.cwl/FastqReadPairs/filename" - }, - { - "type": "string", - "name": "#CheckFastqs.cwl/FastqReadPairs/readFlag" - }, - { - "type": "string", - "name": "#CheckFastqs.cwl/FastqReadPairs/readPairId" - }, - { - "type": "string", - "name": "#CheckFastqs.cwl/FastqReadPairs/library" - }, - { - "type": "string", - "name": "#CheckFastqs.cwl/FastqReadPairs/beadVersion" - } - ], - "type": "record" - }, - "type": "array" - }, - "id": "#CheckFastqs.cwl/FastqReadPairs" - }, - { - "outputBinding": { - "glob": "files_to_skip_split_and_subsample.json", - "loadContents": true, - "outputEval": "$(JSON.parse(self[0].contents).files_to_skip_split_and_subsample)\n" - }, - "type": { - "items": "string", - "type": "array" - }, - "id": "#CheckFastqs.cwl/FilesToSkipSplitAndSubsample" - }, - { - "outputBinding": { - "glob": "fastq_read_pairs.json", - "loadContents": true, - "outputEval": "${\n var obj = JSON.parse(self[0].contents);\n var libraries = [];\n var pairs = obj.fastq_read_pairs\n for (var i in pairs){\n if (pairs[i][\"readFlag\"] == \"R1\"){\n if (libraries.indexOf(pairs[i][\"library\"]) == -1){ \n libraries.push(pairs[i][\"library\"]);\n }\n }\n }\n libraries.sort();\n return(libraries.toString())\n}\n" - }, - "type": [ - "null", - "string" - ], - "id": "#CheckFastqs.cwl/Libraries" - }, - { - "outputBinding": { - "outputEval": "${ \n var reads = []; \n var files = inputs.Reads\n for (var i in files){\n reads.push(files[i][\"basename\"]);\n }\n reads.sort();\n return(reads)\n}\n" - }, - "type": [ - "null", - { - "items": "string", - "type": "array" - } - ], - "id": "#CheckFastqs.cwl/ReadsList" - }, - { - "outputBinding": { - "glob": "subsampling_info.json", - "loadContents": true, - "outputEval": "$(JSON.parse(self[0].contents).subsampling_seed)\n" - }, - "type": "int", - "id": "#CheckFastqs.cwl/SubsampleSeed" - }, - { - "outputBinding": { - "glob": "subsampling_info.json", - "loadContents": true, - "outputEval": "$(JSON.parse(self[0].contents).subsampling_ratio)\n" - }, - "type": "float", - "id": "#CheckFastqs.cwl/SubsamplingRatio" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#CheckFastqs.cwl/log" - } - ], - "class": "CommandLineTool" - }, - { - "inputs": [ - { - "inputBinding": { - "prefix": "--abseq-reference", - "itemSeparator": "," - }, - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#CheckReference.cwl/AbSeq_Reference" - }, - { - "inputBinding": { - "prefix": "--putative-cell-call" - }, - "type": [ - "null", - "int" - ], - "id": "#CheckReference.cwl/Putative_Cell_Call" - }, - { - "inputBinding": { - "prefix": "--reference", - "itemSeparator": "," - }, - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#CheckReference.cwl/Reference" - }, - { - "inputBinding": { - "prefix": "--run-metadata" - }, - "type": "File", - "id": "#CheckReference.cwl/Run_Metadata" - }, - { - "inputBinding": { - "prefix": "--supplemental-reference", - "itemSeparator": "," - }, - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#CheckReference.cwl/Supplemental_Reference" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "outputs": [ - { - "outputBinding": { - "glob": "combined_extra_seq.fasta" - }, - "type": [ - "null", - "File" - ], - "id": "#CheckReference.cwl/Extra_Seqs" - }, - { - "outputBinding": { - "glob": "full-gene-list.json" - }, - "type": [ - "null", - "File" - ], - "id": "#CheckReference.cwl/Full_Genes" - }, - { - "outputBinding": { - "glob": "*gtf", - "outputEval": "${\n // get the WTA modified GTF with extra seqs\n if (self.length == 1) {\n return self;\n // there is no modified GTF\n } else if (self.length == 0) {\n // if Reference is null (i.e. AbSeq_Reference only), return no GTF\n if (inputs.Reference === null) {\n return null;\n } else {\n // get the original WTA GTF without extra seqs\n for (var i = 0; i < inputs.Reference.length; i++) {\n if (inputs.Reference[i].basename.toLowerCase().indexOf('gtf') !== -1) {\n return inputs.Reference[i];\n }\n }\n // return no GTF for Targeted\n return null\n }\n }\n}\n" - }, - "type": [ - "null", - "File" - ], - "id": "#CheckReference.cwl/GTF" - }, - { - "outputBinding": { - "glob": "*-annot.*", - "outputEval": "${\n if (self.length == 1) { // Targeted\n return self;\n } else if (self.length == 0){ // WTA without extra seqs or targets\n for (var i = 0; i < inputs.Reference.length; i++) {\n if (inputs.Reference[i].basename.toLowerCase().indexOf('tar.gz') !== -1) {\n return inputs.Reference[i];\n }\n }\n return null\n }\n}\n" - }, - "type": "File", - "id": "#CheckReference.cwl/Index" - }, - { - "outputBinding": { - "glob": "target-gene.json" - }, - "type": [ - "null", - "File" - ], - "id": "#CheckReference.cwl/Target_Gene_Mapping" - }, - { - "outputBinding": { - "glob": "transcript_length.json" - }, - "type": [ - "null", - "File" - ], - "id": "#CheckReference.cwl/Transcript_Length" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#CheckReference.cwl/output" - } - ], - "baseCommand": [ - "mist_check_references.py" - ], - "class": "CommandLineTool", - "id": "#CheckReference.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "prefix": "--cell-order" - }, - "type": "File", - "id": "#DensetoSparse.cwl/Cell_Order" - }, - { - "inputBinding": { - "prefix": "--dense-data-table" - }, - "type": [ - "null", - "File" - ], - "id": "#DensetoSparse.cwl/Dense_Data_Table" - }, - { - "inputBinding": { - "prefix": "--gene-list" - }, - "type": "File", - "id": "#DensetoSparse.cwl/Gene_List" - }, - { - "inputBinding": { - "prefix": "--run-metadata" - }, - "type": "File", - "id": "#DensetoSparse.cwl/Run_Metadata" - } - ], - "requirements": [ - ], - "outputs": [ - { - "outputBinding": { - "glob": "*.csv.gz" - }, - "type": "File", - "id": "#DensetoSparse.cwl/Data_Tables" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#DensetoSparse.cwl/output" - } - ], - "baseCommand": [ - "mist_dense_to_sparse.py" - ], - "class": "CommandLineTool", - "id": "#DensetoSparse.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "position": 1 - }, - "type": [ - "null", - "File" - ], - "id": "#DensetoSparseFile.cwl/GDT_cell_order" - } - ], - "requirements": [ - ], - "stdout": "cell_order.json", - "outputs": [ - { - "type": "stdout", - "id": "#DensetoSparseFile.cwl/Cell_Order" - } - ], - "baseCommand": "cat", - "id": "#DensetoSparseFile.cwl", - "class": "CommandLineTool" - }, - { - "inputs": [ - { - "inputBinding": { - "prefix": "--full-gene-list" - }, - "type": [ - "null", - "File" - ], - "id": "#GetDataTable.cwl/Full_Genes" - }, - { - "inputBinding": { - "prefix": "--gene-status", - "itemSeparator": "," - }, - "type": { - "items": "File", - "type": "array" - }, - "id": "#GetDataTable.cwl/Gene_Status_List" - }, - { - "inputBinding": { - "prefix": "--max-count", - "itemSeparator": "," - }, - "type": { - "items": "int", - "type": "array" - }, - "id": "#GetDataTable.cwl/Max_Count" - }, - { - "inputBinding": { - "prefix": "--mol-annot", - "itemSeparator": "," - }, - "type": { - "items": "File", - "type": "array" - }, - "id": "#GetDataTable.cwl/Molecule_Annotation_List" - }, - { - "inputBinding": { - "prefix": "--putative-cell-call" - }, - "type": [ - "null", - "int" - ], - "id": "#GetDataTable.cwl/Putative_Cell_Call" - }, - { - "inputBinding": { - "prefix": "--run-metadata" - }, - "type": "File", - "id": "#GetDataTable.cwl/Run_Metadata" - }, - { - "inputBinding": { - "prefix": "--seq-metrics" - }, - "type": "File", - "id": "#GetDataTable.cwl/Seq_Metrics" - }, - { - "inputBinding": { - "prefix": "--tag-names", - "itemSeparator": "," - }, - "type": [ - "null", - { - "items": "string", - "type": "array" - } - ], - "id": "#GetDataTable.cwl/Tag_Names" - }, - { - "type": { - "items": "int", - "type": "array" - }, - "id": "#GetDataTable.cwl/Total_Molecules" - } - ], - "requirements": [ - { - "ramMin": "${return Math.min(Math.max(parseInt(inputs.Total_Molecules.reduce(function(a, b) { return a + b; }, 0) / 4000), 32000), 768000);}", - "class": "ResourceRequirement" - }, - { - "class": "InlineJavascriptRequirement" - } - ], - "outputs": [ - { - "outputBinding": { - "glob": "metrics-files.tar.gz" - }, - "type": "File", - "id": "#GetDataTable.cwl/Annot_Files" - }, - { - "outputBinding": { - "glob": "Annotations/*_Bioproduct_Stats.csv" - }, - "type": [ - "null", - "File" - ], - "id": "#GetDataTable.cwl/Bioproduct_Stats" - }, - { - "outputBinding": { - "glob": "Cell_Label_Filtering/*.png" - }, - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#GetDataTable.cwl/Cell_Label_Filter" - }, - { - "outputBinding": { - "glob": "cell_order.json" - }, - "type": "File", - "id": "#GetDataTable.cwl/Cell_Order" - }, - { - "outputBinding": { - "glob": "*_Annotation_Molecule_corrected.csv.gz" - }, - "type": "File", - "id": "#GetDataTable.cwl/Corrected_Molecular_Annotation" - }, - { - "outputBinding": { - "glob": "*PerCell_Dense.csv.gz" - }, - "type": { - "items": "File", - "type": "array" - }, - "id": "#GetDataTable.cwl/Dense_Data_Tables" - }, - { - "outputBinding": { - "glob": "*PerCell_Unfiltered_Dense.csv.gz" - }, - "type": { - "items": "File", - "type": "array" - }, - "id": "#GetDataTable.cwl/Dense_Data_Tables_Unfiltered" - }, - { - "outputBinding": { - "glob": "*_Expression_Data.st.gz" - }, - "type": "File", - "id": "#GetDataTable.cwl/Expression_Data" - }, - { - "outputBinding": { - "glob": "*_Expression_Data_Unfiltered.st.gz" - }, - "type": [ - "null", - "File" - ], - "id": "#GetDataTable.cwl/Expression_Data_Unfiltered" - }, - { - "outputBinding": { - "glob": "gene_list.json" - }, - "type": "File", - "id": "#GetDataTable.cwl/Gene_List" - }, - { - "outputBinding": { - "glob": "Annotations/*_Annotation_Molecule.csv.gz" - }, - "type": "File", - "id": "#GetDataTable.cwl/Molecular_Annotation" - }, - { - "outputBinding": { - "glob": "Cell_Label_Filtering/*_Protein_Aggregates_Experimental.csv" - }, - "type": [ - "null", - "File" - ], - "id": "#GetDataTable.cwl/Protein_Aggregates_Experimental" - }, - { - "outputBinding": { - "glob": "Cell_Label_Filtering/*_Putative_Cells_Origin.csv" - }, - "type": [ - "null", - "File" - ], - "id": "#GetDataTable.cwl/Putative_Cells_Origin" - }, - { - "outputBinding": { - "glob": "Annotations/*_Annotation_Molecule_Trueno.csv" - }, - "type": [ - "null", - "File" - ], - "id": "#GetDataTable.cwl/Tag_Annotation" - }, - { - "outputBinding": { - "glob": "Trueno/*_Calls.csv" - }, - "type": [ - "null", - "File" - ], - "id": "#GetDataTable.cwl/Tag_Calls" - }, - { - "outputBinding": { - "glob": "Trueno/*csv" - }, - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#GetDataTable.cwl/Trueno_out" - }, - { - "outputBinding": { - "glob": "Trueno/*zip" - }, - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#GetDataTable.cwl/Trueno_zip" - }, - { - "outputBinding": { - "glob": "Annotations/*_UMI_Adjusted_CellLabel_Stats.csv" - }, - "type": [ - "null", - "File" - ], - "id": "#GetDataTable.cwl/UMI_Adjusted_CellLabel_Stats" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#GetDataTable.cwl/output" - } - ], - "baseCommand": [ - "mist_get_datatables.py" - ], - "class": "CommandLineTool", - "id": "#GetDataTable.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "position": 1 - }, - "type": "File", - "id": "#IndexBAM.cwl/BamFile" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "stdout": "samtools_index.log", - "outputs": [ - { - "outputBinding": { - "glob": "*.bai" - }, - "type": "File", - "id": "#IndexBAM.cwl/Index" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#IndexBAM.cwl/log" - } - ], - "baseCommand": [ - "samtools", - "index" - ], - "id": "#IndexBAM.cwl", - "arguments": [ - { - "position": 2, - "valueFrom": "${\n return inputs.BamFile.basename + \".bai\"\n}" - } - ], - "class": "CommandLineTool" - }, - { - "inputs": [], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "outputs": [ - { - "type": [ - "null", - "int" - ], - "id": "#InternalSettings.cwl/AbSeq_UMI" - }, - { - "type": [ - "null", - "int" - ], - "id": "#InternalSettings.cwl/Barcode_Num" - }, - { - "type": [ - "null", - "File" - ], - "id": "#InternalSettings.cwl/Extra_Seqs" - }, - { - "type": [ - "null", - "int" - ], - "id": "#InternalSettings.cwl/Label_Version" - }, - { - "type": [ - "null", - "int" - ], - "id": "#InternalSettings.cwl/MinChunkSize" - }, - { - "type": [ - "null", - "long" - ], - "id": "#InternalSettings.cwl/NumRecordsPerSplit" - }, - { - "type": [ - "null", - "boolean" - ], - "id": "#InternalSettings.cwl/Read_Filter_Off" - }, - { - "type": [ - "null", - "string" - ], - "id": "#InternalSettings.cwl/Seq_Run" - }, - { - "type": [ - "null", - "float" - ], - "id": "#InternalSettings.cwl/Subsample_Tags" - }, - { - "type": [ - "null", - "boolean" - ], - "id": "#InternalSettings.cwl/Target_analysis" - }, - { - "type": [ - "null", - "boolean" - ], - "id": "#InternalSettings.cwl/Use_DBEC" - }, - { - "type": [ - "null", - "float" - ], - "id": "#InternalSettings.cwl/VDJ_JGene_Evalue" - }, - { - "type": [ - "null", - "float" - ], - "id": "#InternalSettings.cwl/VDJ_VGene_Evalue" - } - ], - "class": "ExpressionTool", - "expression": "${\n var internalInputs = [\n '_Label_Version',\n '_Read_Filter_Off',\n '_Barcode_Num',\n '_Seq_Run',\n '_AbSeq_UMI',\n '_Use_DBEC',\n '_Extra_Seqs',\n '_MinChunkSize',\n '_NumRecordsPerSplit',\n '_Target_analysis',\n '_Subsample_Tags',\n '_VDJ_VGene_Evalue',\n '_VDJ_JGene_Evalue',\n ];\n var internalOutputs = {}\n for (var i = 0; i < internalInputs.length; i++) {\n var internalInput = internalInputs[i];\n var internalOutput = internalInput.slice(1); // remove leading underscore\n if (inputs.hasOwnProperty(internalInput)) {\n internalOutputs[internalOutput] = inputs[internalInput]; // if input specified, redirect to output\n } else {\n internalOutputs[internalOutput] = null; // if input not specified, provide a null\n }\n }\n return internalOutputs;\n}", - "id": "#InternalSettings.cwl" - }, - { - "inputs": [ - { - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#main/AbSeq_Reference", - "label": "AbSeq Reference" - }, - { - "doc": "Determine putative cells using only the basic algorithm (minimum second derivative along the cumulative reads curve). The refined algorithm attempts to remove false positives and recover false negatives, but may not be ideal for certain complex mixtures of cell types. Does not apply if Exact Cell Count is set.", - "type": [ - "null", - "boolean" - ], - "id": "#main/Basic_Algo_Only", - "label": "Disable Refined Putative Cell Calling" - }, - { - "doc": "Set a specific number (>=1) of cells as putative, based on those with the highest error-corrected read count", - "type": [ - "null", - "int" - ], - "id": "#main/Exact_Cell_Count", - "label": "Exact Cell Count" - }, - { - "doc": "Specify the data to be used for putative cell calling. mRNA is the default selected option. AbSeq (Experimental) is for troubleshooting only.", - "type": [ - "null", - { - "symbols": [ - "#main/Putative_Cell_Call/Putative_Cell_Call/mRNA", - "#main/Putative_Cell_Call/Putative_Cell_Call/AbSeq_Experimental" - ], - "type": "enum", - "name": "#main/Putative_Cell_Call/Putative_Cell_Call" - } - ], - "id": "#main/Putative_Cell_Call", - "label": "Putative Cell Calling" - }, - { - "type": { - "items": "File", - "type": "array" - }, - "id": "#main/Reads", - "label": "Reads" - }, - { - "doc": "A fasta file containing the mRNA panel amplicon targets used in the experiment", - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#main/Reference", - "label": "Reference" - }, - { - "doc": "This is a name for output files, for example Experiment1_Metrics_Summary.csv. Default if left empty is to name run based on a library. Any non-alpha numeric characters will be changed to a hyphen.", - "type": [ - "null", - "string" - ], - "id": "#main/Run_Name", - "label": "Run Name" - }, - { - "doc": "The sample multiplexing kit version. This option should only be set for a multiplexed experiment.", - "type": [ - "null", - { - "symbols": [ - "#main/Sample_Tags_Version/Sample_Tags_Version/human", - "#main/Sample_Tags_Version/Sample_Tags_Version/hs", - "#main/Sample_Tags_Version/Sample_Tags_Version/mouse", - "#main/Sample_Tags_Version/Sample_Tags_Version/mm", - "#main/Sample_Tags_Version/Sample_Tags_Version/custom" - ], - "type": "enum", - "name": "#main/Sample_Tags_Version/Sample_Tags_Version" - } - ], - "id": "#main/Sample_Tags_Version", - "label": "Sample Tags Version" - }, - { - "doc": "Any number of reads >1 or a fraction between 0 < n < 1 to indicate the percentage of reads to subsample.\n", - "type": [ - "null", - "float" - ], - "id": "#main/Subsample", - "label": "Subsample Reads" - }, - { - "doc": "For use when replicating a previous subsampling run only. Obtain the seed generated from the log file for the SplitFastQ node.\n", - "type": [ - "null", - "int" - ], - "id": "#main/Subsample_seed", - "label": "Subsample Seed" - }, - { - "doc": "Specify the Sample Tag number followed by - (hyphen) and a sample name to appear in the output files. For example: 4-Ramos. Should be alpha numeric, with + - and _ allowed. Any special characters: &, (), [], {}, <>, ?, | will be corrected to underscores. \n", - "type": [ - "null", - { - "items": "string", - "type": "array" - } - ], - "id": "#main/Tag_Names", - "label": "Tag Names" - }, - { - "doc": "The VDJ species and chain types. This option should only be set for VDJ experiment.", - "type": [ - "null", - { - "symbols": [ - "#main/VDJ_Version/VDJ_Version/human", - "#main/VDJ_Version/VDJ_Version/hs", - "#main/VDJ_Version/VDJ_Version/mouse", - "#main/VDJ_Version/VDJ_Version/mm", - "#main/VDJ_Version/VDJ_Version/humanBCR", - "#main/VDJ_Version/VDJ_Version/humanTCR", - "#main/VDJ_Version/VDJ_Version/mouseBCR", - "#main/VDJ_Version/VDJ_Version/mouseTCR" - ], - "type": "enum", - "name": "#main/VDJ_Version/VDJ_Version" - } - ], - "id": "#main/VDJ_Version", - "label": "VDJ Species Version" - } - ], - "requirements": [ - { - "class": "ScatterFeatureRequirement" - }, - { - "class": "MultipleInputFeatureRequirement" - }, - { - "class": "SubworkflowFeatureRequirement" - }, - { - "class": "StepInputExpressionRequirement" - }, - { - "class": "InlineJavascriptRequirement" - } - ], - "doc": "The BD Rhapsody\u2122 assays are used to create sequencing libraries from single cell transcriptomes.\n\nAfter sequencing, the analysis pipeline takes the FASTQ files and a reference file for gene alignment. The pipeline generates molecular counts per cell, read counts per cell, metrics, and an alignment file.", - "label": "BD Rhapsody\u2122 Targeted Analysis Pipeline", - "steps": [ - { - "run": "#AddtoBam.cwl", - "scatter": [ - "#main/AddtoBam/R2_Bam" - ], - "in": [ - { - "source": "#main/AnnotateR1/Annotation_R1", - "id": "#main/AddtoBam/Annotation_R1" - }, - { - "source": "#main/Dense_to_Sparse_File/Cell_Order", - "id": "#main/AddtoBam/Cell_Order" - }, - { - "source": "#main/GetDataTable/Corrected_Molecular_Annotation", - "id": "#main/AddtoBam/Molecular_Annotation" - }, - { - "source": "#main/AnnotateR2/R2_Bam", - "id": "#main/AddtoBam/R2_Bam" - }, - { - "source": "#main/Metadata_Settings/Run_Metadata", - "id": "#main/AddtoBam/Run_Metadata" - }, - { - "source": "#main/GetDataTable/Tag_Calls", - "id": "#main/AddtoBam/Tag_Calls" - }, - { - "source": "#main/CheckReference/Target_Gene_Mapping", - "id": "#main/AddtoBam/Target_Gene_Mapping" - } - ], - "requirements": [ - { - "ramMin": 16000, - "class": "ResourceRequirement" - } - ], - "id": "#main/AddtoBam", - "out": [ - "#main/AddtoBam/Annotated_Bam", - "#main/AddtoBam/output" - ] - }, - { - "run": "#AlignR2.cwl", - "out": [ - "#main/AlignR2/Alignments", - "#main/AlignR2/output" - ], - "requirements": [ - { - "coresMin": 8, - "ramMin": 4000, - "class": "ResourceRequirement" - } - ], - "id": "#main/AlignR2", - "in": [ - { - "source": "#main/CheckReference/Extra_Seqs", - "id": "#main/AlignR2/Extra_Seqs" - }, - { - "source": "#main/CheckReference/Index", - "id": "#main/AlignR2/Index" - }, - { - "source": "#main/QualityFilterOuter/R2", - "id": "#main/AlignR2/R2" - }, - { - "source": "#main/Metadata_Settings/Run_Metadata", - "id": "#main/AlignR2/Run_Metadata" - } - ] - }, - { - "run": "#AnnotateMolecules.cwl", - "scatter": [ - "#main/AnnotateMolecules/Valids" - ], - "in": [ - { - "source": "#main/Internal_Settings/AbSeq_UMI", - "id": "#main/AnnotateMolecules/AbSeq_UMI" - }, - { - "source": "#main/Metadata_Settings/Run_Metadata", - "id": "#main/AnnotateMolecules/Run_Metadata" - }, - { - "source": "#main/Internal_Settings/Use_DBEC", - "id": "#main/AnnotateMolecules/Use_DBEC" - }, - { - "source": "#main/AnnotateReads/Valid_Reads", - "id": "#main/AnnotateMolecules/Valids" - } - ], - "requirements": [ - { - "ramMin": 32000, - "class": "ResourceRequirement" - } - ], - "id": "#main/AnnotateMolecules", - "out": [ - "#main/AnnotateMolecules/Mol_Annot_List", - "#main/AnnotateMolecules/Gene_Status_List", - "#main/AnnotateMolecules/Max_Count", - "#main/AnnotateMolecules/Total_Molecules", - "#main/AnnotateMolecules/output" - ] - }, - { - "id": "#main/AnnotateR1", - "out": [ - "#main/AnnotateR1/Annotation_R1", - "#main/AnnotateR1/R1_error_count_table", - "#main/AnnotateR1/R1_read_count_breakdown", - "#main/AnnotateR1/output" - ], - "run": "#AnnotateR1.cwl", - "scatter": [ - "#main/AnnotateR1/R1" - ], - "in": [ - { - "source": "#main/QualityFilterOuter/Filter_Metrics", - "id": "#main/AnnotateR1/Filter_Metrics" - }, - { - "source": "#main/QualityFilterOuter/R1", - "id": "#main/AnnotateR1/R1" - }, - { - "source": "#main/Metadata_Settings/Run_Metadata", - "id": "#main/AnnotateR1/Run_Metadata" - } - ] - }, - { - "run": "#AnnotateR2.cwl", - "scatter": [ - "#main/AnnotateR2/R2_zip" - ], - "in": [ - { - "source": "#main/CheckReference/Extra_Seqs", - "id": "#main/AnnotateR2/Extra_Seqs" - }, - { - "source": "#main/CheckReference/GTF", - "id": "#main/AnnotateR2/GTF_Annotation" - }, - { - "source": "#main/AlignR2/Alignments", - "id": "#main/AnnotateR2/R2_zip" - }, - { - "source": "#main/Metadata_Settings/Run_Metadata", - "id": "#main/AnnotateR2/Run_Metadata" - }, - { - "source": "#main/CheckReference/Transcript_Length", - "id": "#main/AnnotateR2/Transcript_Length" - } - ], - "requirements": [ - { - "ramMin": 4000, - "class": "ResourceRequirement" - } - ], - "id": "#main/AnnotateR2", - "out": [ - "#main/AnnotateR2/Annot_R2", - "#main/AnnotateR2/R2_Bam", - "#main/AnnotateR2/GTF", - "#main/AnnotateR2/output", - "#main/AnnotateR2/R2_Quality_Metrics" - ] - }, - { - "run": "#AnnotateReads.cwl", - "out": [ - "#main/AnnotateReads/Seq_Metrics", - "#main/AnnotateReads/Valid_Reads", - "#main/AnnotateReads/Read1_error_rate", - "#main/AnnotateReads/Annotation_Read", - "#main/AnnotateReads/output", - "#main/AnnotateReads/validTcrReads", - "#main/AnnotateReads/validIgReads", - "#main/AnnotateReads/num_valid_tcr_reads", - "#main/AnnotateReads/num_valid_ig_reads" - ], - "requirements": [ - { - "ramMin": 32000, - "class": "ResourceRequirement" - } - ], - "id": "#main/AnnotateReads", - "in": [ - { - "source": "#main/Internal_Settings/AbSeq_UMI", - "id": "#main/AnnotateReads/AbSeq_UMI" - }, - { - "source": "#main/CheckReference/Extra_Seqs", - "id": "#main/AnnotateReads/Extra_Seqs" - }, - { - "source": "#main/QualityFilterOuter/Filter_Metrics", - "id": "#main/AnnotateReads/Filter_Metrics" - }, - { - "source": "#main/Putative_Cell_Calling_Settings/Putative_Cell_Call", - "id": "#main/AnnotateReads/Putative_Cell_Call" - }, - { - "source": "#main/AnnotateR1/Annotation_R1", - "id": "#main/AnnotateReads/R1_Annotation" - }, - { - "source": "#main/AnnotateR1/R1_error_count_table", - "id": "#main/AnnotateReads/R1_error_count_table" - }, - { - "source": "#main/AnnotateR1/R1_read_count_breakdown", - "id": "#main/AnnotateReads/R1_read_count_breakdown" - }, - { - "source": "#main/AnnotateR2/Annot_R2", - "id": "#main/AnnotateReads/R2_Annotation" - }, - { - "source": "#main/AnnotateR2/R2_Quality_Metrics", - "id": "#main/AnnotateReads/R2_Quality_Metrics" - }, - { - "source": "#main/Metadata_Settings/Run_Metadata", - "id": "#main/AnnotateReads/Run_Metadata" - }, - { - "source": "#main/CheckReference/Target_Gene_Mapping", - "id": "#main/AnnotateReads/Target_Gene_Mapping" - } - ] - }, - { - "out": [ - "#main/BundleLogs/logs_dir" - ], - "run": "#BundleLogs.cwl", - "id": "#main/BundleLogs", - "in": [ - { - "source": [ - "#main/AnnotateReads/output", - "#main/AnnotateR1/output", - "#main/AnnotateR2/output", - "#main/CheckReference/output", - "#main/GetDataTable/output", - "#main/Metrics/output", - "#main/AddtoBam/output", - "#main/AnnotateMolecules/output", - "#main/QualityFilterOuter/output", - "#main/CheckFastqs/log", - "#main/SplitAndSubsample/log", - "#main/MergeBAM/log", - "#main/Dense_to_Sparse_Datatable/output", - "#main/Dense_to_Sparse_Datatable_Unfiltered/output", - "#main/IndexBAM/log", - "#main/CellClassifier/log" - ], - "linkMerge": "merge_flattened", - "id": "#main/BundleLogs/log_files" - } - ] - }, - { - "run": "#Cell_Classifier.cwl", - "out": [ - "#main/CellClassifier/cellTypePredictions", - "#main/CellClassifier/log" - ], - "requirements": [ - { - "ramMin": 4000, - "class": "ResourceRequirement" - } - ], - "id": "#main/CellClassifier", - "in": [ - { - "source": "#main/FindDataTableForCellClassifier/molsPerCellMatrixForCellClassifier", - "id": "#main/CellClassifier/molsPerCellMatrix" - } - ] - }, - { - "out": [ - "#main/CheckFastqs/SubsampleSeed", - "#main/CheckFastqs/SubsamplingRatio", - "#main/CheckFastqs/FilesToSkipSplitAndSubsample", - "#main/CheckFastqs/FastqReadPairs", - "#main/CheckFastqs/Bead_Version", - "#main/CheckFastqs/Libraries", - "#main/CheckFastqs/ReadsList", - "#main/CheckFastqs/log" - ], - "run": "#CheckFastqs.cwl", - "id": "#main/CheckFastqs", - "in": [ - { - "source": "#main/Internal_Settings/MinChunkSize", - "id": "#main/CheckFastqs/MinChunkSize" - }, - { - "source": "#main/Reads", - "id": "#main/CheckFastqs/Reads" - }, - { - "source": "#main/Subsample_Settings/Subsample_Reads", - "id": "#main/CheckFastqs/Subsample" - }, - { - "source": "#main/Subsample_Settings/Subsample_Seed", - "id": "#main/CheckFastqs/Subsample_Seed" - } - ] - }, - { - "run": "#CheckReference.cwl", - "out": [ - "#main/CheckReference/Index", - "#main/CheckReference/Extra_Seqs", - "#main/CheckReference/Full_Genes", - "#main/CheckReference/output", - "#main/CheckReference/Transcript_Length", - "#main/CheckReference/GTF", - "#main/CheckReference/Target_Gene_Mapping" - ], - "requirements": [ - { - "ramMin": 1000, - "class": "ResourceRequirement" - } - ], - "id": "#main/CheckReference", - "in": [ - { - "source": "#main/AbSeq_Reference", - "id": "#main/CheckReference/AbSeq_Reference" - }, - { - "source": "#main/Putative_Cell_Calling_Settings/Putative_Cell_Call", - "id": "#main/CheckReference/Putative_Cell_Call" - }, - { - "source": "#main/Reference", - "id": "#main/CheckReference/Reference" - }, - { - "source": "#main/Metadata_Settings/Run_Metadata", - "id": "#main/CheckReference/Run_Metadata" - } - ] - }, - { - "run": "#DensetoSparse.cwl", - "scatter": [ - "#main/Dense_to_Sparse_Datatable/Dense_Data_Table" - ], - "in": [ - { - "source": "#main/Dense_to_Sparse_File/Cell_Order", - "id": "#main/Dense_to_Sparse_Datatable/Cell_Order" - }, - { - "source": "#main/GetDataTable/Dense_Data_Tables", - "id": "#main/Dense_to_Sparse_Datatable/Dense_Data_Table" - }, - { - "source": "#main/GetDataTable/Gene_List", - "id": "#main/Dense_to_Sparse_Datatable/Gene_List" - }, - { - "source": "#main/Metadata_Settings/Run_Metadata", - "id": "#main/Dense_to_Sparse_Datatable/Run_Metadata" - } - ], - "requirements": [ - { - "ramMin": 16000, - "class": "ResourceRequirement" - } - ], - "id": "#main/Dense_to_Sparse_Datatable", - "out": [ - "#main/Dense_to_Sparse_Datatable/Data_Tables", - "#main/Dense_to_Sparse_Datatable/output" - ] - }, - { - "run": "#DensetoSparse.cwl", - "scatter": [ - "#main/Dense_to_Sparse_Datatable_Unfiltered/Dense_Data_Table" - ], - "in": [ - { - "source": "#main/GetDataTable/Cell_Order", - "id": "#main/Dense_to_Sparse_Datatable_Unfiltered/Cell_Order" - }, - { - "source": "#main/GetDataTable/Dense_Data_Tables_Unfiltered", - "id": "#main/Dense_to_Sparse_Datatable_Unfiltered/Dense_Data_Table" - }, - { - "source": "#main/GetDataTable/Gene_List", - "id": "#main/Dense_to_Sparse_Datatable_Unfiltered/Gene_List" - }, - { - "source": "#main/Metadata_Settings/Run_Metadata", - "id": "#main/Dense_to_Sparse_Datatable_Unfiltered/Run_Metadata" - } - ], - "requirements": [ - { - "ramMin": 16000, - "class": "ResourceRequirement" - } - ], - "id": "#main/Dense_to_Sparse_Datatable_Unfiltered", - "out": [ - "#main/Dense_to_Sparse_Datatable_Unfiltered/Data_Tables", - "#main/Dense_to_Sparse_Datatable_Unfiltered/output" - ] - }, - { - "out": [ - "#main/Dense_to_Sparse_File/Cell_Order" - ], - "run": "#DensetoSparseFile.cwl", - "id": "#main/Dense_to_Sparse_File", - "in": [ - { - "source": "#main/GetDataTable/Cell_Order", - "id": "#main/Dense_to_Sparse_File/GDT_cell_order" - } - ] - }, - { - "out": [ - "#main/FindDataTableForCellClassifier/molsPerCellMatrixForCellClassifier" - ], - "run": { - "cwlVersion": "v1.0", - "inputs": [ - { - "type": { - "items": "File", - "type": "array" - }, - "id": "#main/FindDataTableForCellClassifier/c174ddb5-9fdb-4dae-a1c5-b5666a631cc7/dataTables" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "outputs": [ - { - "type": "File", - "id": "#main/FindDataTableForCellClassifier/c174ddb5-9fdb-4dae-a1c5-b5666a631cc7/molsPerCellMatrixForCellClassifier" - } - ], - "id": "#main/FindDataTableForCellClassifier/c174ddb5-9fdb-4dae-a1c5-b5666a631cc7", - "expression": "${\n for (var i = 0; i < inputs.dataTables.length; i++) {\n var dataTable = inputs.dataTables[i];\n if (dataTable.basename.indexOf(\"_RSEC_MolsPerCell.csv\") >= 0) {\n return({molsPerCellMatrixForCellClassifier: dataTable});\n }\n }\n return({molsPerCellMatrixForCellClassifier: null});\n}", - "class": "ExpressionTool" - }, - "id": "#main/FindDataTableForCellClassifier", - "in": [ - { - "source": "#main/Dense_to_Sparse_Datatable/Data_Tables", - "id": "#main/FindDataTableForCellClassifier/dataTables" - } - ] - }, - { - "out": [ - "#main/GetDataTable/Tag_Calls", - "#main/GetDataTable/Molecular_Annotation", - "#main/GetDataTable/Corrected_Molecular_Annotation", - "#main/GetDataTable/Tag_Annotation", - "#main/GetDataTable/Annot_Files", - "#main/GetDataTable/Cell_Label_Filter", - "#main/GetDataTable/Dense_Data_Tables", - "#main/GetDataTable/Dense_Data_Tables_Unfiltered", - "#main/GetDataTable/Expression_Data", - "#main/GetDataTable/Expression_Data_Unfiltered", - "#main/GetDataTable/Bioproduct_Stats", - "#main/GetDataTable/UMI_Adjusted_CellLabel_Stats", - "#main/GetDataTable/Putative_Cells_Origin", - "#main/GetDataTable/Protein_Aggregates_Experimental", - "#main/GetDataTable/Trueno_out", - "#main/GetDataTable/Trueno_zip", - "#main/GetDataTable/output", - "#main/GetDataTable/Cell_Order", - "#main/GetDataTable/Gene_List" - ], - "run": "#GetDataTable.cwl", - "id": "#main/GetDataTable", - "in": [ - { - "source": "#main/CheckReference/Full_Genes", - "id": "#main/GetDataTable/Full_Genes" - }, - { - "source": "#main/AnnotateMolecules/Gene_Status_List", - "id": "#main/GetDataTable/Gene_Status_List" - }, - { - "source": "#main/AnnotateMolecules/Max_Count", - "id": "#main/GetDataTable/Max_Count" - }, - { - "source": "#main/AnnotateMolecules/Mol_Annot_List", - "id": "#main/GetDataTable/Molecule_Annotation_List" - }, - { - "source": "#main/Putative_Cell_Calling_Settings/Putative_Cell_Call", - "id": "#main/GetDataTable/Putative_Cell_Call" - }, - { - "source": "#main/Metadata_Settings/Run_Metadata", - "id": "#main/GetDataTable/Run_Metadata" - }, - { - "source": "#main/AnnotateReads/Seq_Metrics", - "id": "#main/GetDataTable/Seq_Metrics" - }, - { - "source": "#main/Multiplexing_Settings/Tag_Sample_Names", - "id": "#main/GetDataTable/Tag_Names" - }, - { - "source": "#main/AnnotateMolecules/Total_Molecules", - "id": "#main/GetDataTable/Total_Molecules" - } - ] - }, - { - "out": [ - "#main/IndexBAM/Index", - "#main/IndexBAM/log" - ], - "run": "#IndexBAM.cwl", - "id": "#main/IndexBAM", - "in": [ - { - "source": "#main/MergeBAM/Final_Bam", - "id": "#main/IndexBAM/BamFile" - } - ] - }, - { - "out": [ - "#main/Internal_Settings/Read_Filter_Off", - "#main/Internal_Settings/Barcode_Num", - "#main/Internal_Settings/Seq_Run", - "#main/Internal_Settings/AbSeq_UMI", - "#main/Internal_Settings/Use_DBEC", - "#main/Internal_Settings/Extra_Seqs", - "#main/Internal_Settings/MinChunkSize", - "#main/Internal_Settings/NumRecordsPerSplit", - "#main/Internal_Settings/Target_analysis", - "#main/Internal_Settings/Subsample_Tags", - "#main/Internal_Settings/VDJ_VGene_Evalue", - "#main/Internal_Settings/VDJ_JGene_Evalue" - ], - "in": [], - "run": "#InternalSettings.cwl", - "id": "#main/Internal_Settings", - "label": "Internal Settings" - }, - { - "out": [ - "#main/MergeBAM/Final_Bam", - "#main/MergeBAM/log" - ], - "run": "#MergeBAM.cwl", - "id": "#main/MergeBAM", - "in": [ - { - "source": "#main/AddtoBam/Annotated_Bam", - "id": "#main/MergeBAM/BamFiles" - }, - { - "source": "#main/Metadata_Settings/Run_Base_Name", - "id": "#main/MergeBAM/Run_Name" - }, - { - "source": "#main/Multiplexing_Settings/Sample_Tags_Version", - "id": "#main/MergeBAM/Sample_Tags_Version" - } - ] - }, - { - "out": [ - "#main/MergeMultiplex/Multiplex_out" - ], - "run": { - "cwlVersion": "v1.0", - "inputs": [ - { - "type": { - "items": [ - "null", - "File" - ], - "type": "array" - }, - "id": "#main/MergeMultiplex/8e7f752c-1505-4d65-81b3-f91fcd83b679/SampleTag_Files" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "outputs": [ - { - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#main/MergeMultiplex/8e7f752c-1505-4d65-81b3-f91fcd83b679/Multiplex_out" - } - ], - "id": "#main/MergeMultiplex/8e7f752c-1505-4d65-81b3-f91fcd83b679", - "expression": "${\n var fp_array = [];\n for (var i = 0; i < inputs.SampleTag_Files.length; i++) {\n var fp = inputs.SampleTag_Files[i];\n if (fp != null) {\n fp_array.push(fp);\n }\n }\n return({\"Multiplex_out\": fp_array});\n}", - "class": "ExpressionTool" - }, - "id": "#main/MergeMultiplex", - "in": [ - { - "source": [ - "#main/GetDataTable/Trueno_out", - "#main/Metrics/Sample_Tag_Out" - ], - "linkMerge": "merge_flattened", - "id": "#main/MergeMultiplex/SampleTag_Files" - } - ] - }, - { - "out": [ - "#main/Metadata_Settings/Run_Metadata", - "#main/Metadata_Settings/Run_Base_Name" - ], - "run": "#Metadata.cwl", - "id": "#main/Metadata_Settings", - "in": [ - { - "source": "#main/AbSeq_Reference", - "id": "#main/Metadata_Settings/AbSeq_Reference" - }, - { - "valueFrom": "Targeted", - "id": "#main/Metadata_Settings/Assay" - }, - { - "source": "#main/Putative_Cell_Calling_Settings/Basic_Algo_Only", - "id": "#main/Metadata_Settings/Basic_Algo_Only" - }, - { - "source": "#main/CheckFastqs/Bead_Version", - "id": "#main/Metadata_Settings/Bead_Version" - }, - { - "source": "#main/Putative_Cell_Calling_Settings/Exact_Cell_Count", - "id": "#main/Metadata_Settings/Exact_Cell_Count" - }, - { - "source": "#main/CheckFastqs/Libraries", - "id": "#main/Metadata_Settings/Libraries" - }, - { - "valueFrom": "BD Rhapsody Targeted Analysis Pipeline", - "id": "#main/Metadata_Settings/Pipeline_Name" - }, - { - "source": "#main/Version/version", - "id": "#main/Metadata_Settings/Pipeline_Version" - }, - { - "source": "#main/Putative_Cell_Calling_Settings/Putative_Cell_Call", - "id": "#main/Metadata_Settings/Putative_Cell_Call" - }, - { - "source": "#main/CheckFastqs/ReadsList", - "id": "#main/Metadata_Settings/Reads" - }, - { - "source": "#main/Reference", - "id": "#main/Metadata_Settings/Reference" - }, - { - "source": "#main/Name_Settings/Run_Name", - "id": "#main/Metadata_Settings/Run_Name" - }, - { - "source": "#main/Multiplexing_Settings/Tag_Sample_Names", - "id": "#main/Metadata_Settings/Sample_Tag_Names" - }, - { - "source": "#main/Multiplexing_Settings/Sample_Tags_Version", - "id": "#main/Metadata_Settings/Sample_Tags_Version" - }, - { - "source": "#main/Start_Time/Start_Time", - "id": "#main/Metadata_Settings/Start_Time" - }, - { - "source": "#main/Subsample_Settings/Subsample_Reads", - "id": "#main/Metadata_Settings/Subsample" - }, - { - "source": "#main/Subsample_Settings/Subsample_Seed", - "id": "#main/Metadata_Settings/Subsample_Seed" - }, - { - "source": "#main/VDJ_Settings/VDJ_Version", - "id": "#main/Metadata_Settings/VDJ_Version" - } - ] - }, - { - "out": [ - "#main/Metrics/Metrics_Summary", - "#main/Metrics/Metrics_Archive", - "#main/Metrics/output", - "#main/Metrics/Sample_Tag_Out" - ], - "run": "#Metrics.cwl", - "id": "#main/Metrics", - "in": [ - { - "source": "#main/GetDataTable/Annot_Files", - "id": "#main/Metrics/Annot_Files" - }, - { - "source": "#main/AnnotateReads/Read1_error_rate", - "id": "#main/Metrics/Read1_error_rate" - }, - { - "source": "#main/Metadata_Settings/Run_Metadata", - "id": "#main/Metrics/Run_Metadata" - }, - { - "source": "#main/GetDataTable/Trueno_zip", - "id": "#main/Metrics/Sample_Tag_Archives" - }, - { - "source": "#main/Internal_Settings/Seq_Run", - "id": "#main/Metrics/Seq_Run" - }, - { - "source": "#main/GetDataTable/UMI_Adjusted_CellLabel_Stats", - "id": "#main/Metrics/UMI_Adjusted_Stats" - }, - { - "source": "#main/VDJ_Compile_Results/vdjMetricsJson", - "id": "#main/Metrics/vdjMetricsJson" - } - ] - }, - { - "out": [ - "#main/Multiplexing_Settings/Tag_Sample_Names", - "#main/Multiplexing_Settings/Sample_Tags_Version" - ], - "in": [ - { - "source": "#main/Sample_Tags_Version", - "id": "#main/Multiplexing_Settings/_Sample_Tags_Version" - }, - { - "source": "#main/Tag_Names", - "id": "#main/Multiplexing_Settings/_Tag_Sample_Names" - } - ], - "run": "#MultiplexingSettings.cwl", - "id": "#main/Multiplexing_Settings", - "label": "Multiplexing Settings" - }, - { - "out": [ - "#main/Name_Settings/Run_Name" - ], - "in": [ - { - "source": "#main/Run_Name", - "id": "#main/Name_Settings/_Run_Name" - } - ], - "run": "#NameSettings.cwl", - "id": "#main/Name_Settings", - "label": "Name Settings" - }, - { - "out": [ - "#main/PairReadFiles/ReadPairs" - ], - "run": "#PairReadFiles.cwl", - "id": "#main/PairReadFiles", - "in": [ - { - "source": "#main/CheckFastqs/FastqReadPairs", - "id": "#main/PairReadFiles/FastqReadPairs" - }, - { - "source": "#main/SplitAndSubsample/SplitAndSubsampledFastqs", - "id": "#main/PairReadFiles/Reads" - } - ] - }, - { - "out": [ - "#main/Putative_Cell_Calling_Settings/Putative_Cell_Call", - "#main/Putative_Cell_Calling_Settings/Exact_Cell_Count", - "#main/Putative_Cell_Calling_Settings/Basic_Algo_Only" - ], - "in": [ - { - "source": "#main/Basic_Algo_Only", - "id": "#main/Putative_Cell_Calling_Settings/_Basic_Algo_Only" - }, - { - "source": "#main/Exact_Cell_Count", - "id": "#main/Putative_Cell_Calling_Settings/_Exact_Cell_Count" - }, - { - "source": "#main/Putative_Cell_Call", - "id": "#main/Putative_Cell_Calling_Settings/_Putative_Cell_Call" - } - ], - "run": "#PutativeCellSettings.cwl", - "id": "#main/Putative_Cell_Calling_Settings", - "label": "Putative Cell Calling Settings" - }, - { - "out": [ - "#main/QualityFilterOuter/Filter_Metrics", - "#main/QualityFilterOuter/R1", - "#main/QualityFilterOuter/R2", - "#main/QualityFilterOuter/output" - ], - "run": "#QualityFilterOuter.cwl", - "id": "#main/QualityFilterOuter", - "in": [ - { - "source": "#main/Metadata_Settings/Run_Metadata", - "id": "#main/QualityFilterOuter/Run_Metadata" - }, - { - "source": "#main/PairReadFiles/ReadPairs", - "id": "#main/QualityFilterOuter/Split_Read_Pairs" - } - ] - }, - { - "out": [ - "#main/SplitAndSubsample/SplitAndSubsampledFastqs", - "#main/SplitAndSubsample/log" - ], - "run": "#SplitAndSubsample.cwl", - "id": "#main/SplitAndSubsample", - "in": [ - { - "source": "#main/Reads", - "id": "#main/SplitAndSubsample/Fastqs" - }, - { - "source": "#main/CheckFastqs/FilesToSkipSplitAndSubsample", - "id": "#main/SplitAndSubsample/FilesToSkipSplitAndSubsample" - }, - { - "source": "#main/Internal_Settings/NumRecordsPerSplit", - "id": "#main/SplitAndSubsample/NumRecordsPerSplit" - }, - { - "source": "#main/CheckFastqs/SubsamplingRatio", - "id": "#main/SplitAndSubsample/SubsampleRatio" - }, - { - "source": "#main/CheckFastqs/SubsampleSeed", - "id": "#main/SplitAndSubsample/SubsampleSeed" - } - ] - }, - { - "out": [ - "#main/Start_Time/Start_Time" - ], - "run": { - "cwlVersion": "v1.0", - "inputs": [], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "outputs": [ - { - "type": "string", - "id": "#main/Start_Time/dc4e9fd7-92dc-4aca-80ad-76601aaaf6ad/Start_Time" - } - ], - "id": "#main/Start_Time/dc4e9fd7-92dc-4aca-80ad-76601aaaf6ad", - "expression": "${ \n var today = new Date();\n var date = today.toString()\n return ({Start_Time: date});\n} ", - "class": "ExpressionTool" - }, - "id": "#main/Start_Time", - "in": [] - }, - { - "out": [ - "#main/Subsample_Settings/Subsample_Reads", - "#main/Subsample_Settings/Subsample_Seed" - ], - "in": [ - { - "source": "#main/Subsample", - "id": "#main/Subsample_Settings/_Subsample_Reads" - }, - { - "source": "#main/Subsample_seed", - "id": "#main/Subsample_Settings/_Subsample_Seed" - } - ], - "run": "#SubsampleSettings.cwl", - "id": "#main/Subsample_Settings", - "label": "Subsample Settings" - }, - { - "out": [ - "#main/Uncompress_Datatables/Uncompressed_Data_Tables", - "#main/Uncompress_Datatables/Uncompressed_Expression_Matrix" - ], - "run": "#UncompressDatatables.cwl", - "id": "#main/Uncompress_Datatables", - "in": [ - { - "source": "#main/Dense_to_Sparse_Datatable/Data_Tables", - "id": "#main/Uncompress_Datatables/Compressed_Data_Table" - }, - { - "source": "#main/GetDataTable/Expression_Data", - "id": "#main/Uncompress_Datatables/Compressed_Expression_Matrix" - } - ] - }, - { - "out": [ - "#main/VDJ_Assemble_and_Annotate_Contigs_IG/igCalls" - ], - "run": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl", - "id": "#main/VDJ_Assemble_and_Annotate_Contigs_IG", - "in": [ - { - "source": "#main/VDJ_Preprocess_Reads_IG/RSEC_Reads_Fastq", - "id": "#main/VDJ_Assemble_and_Annotate_Contigs_IG/RSEC_Reads_Fastq" - }, - { - "source": "#main/VDJ_Settings/VDJ_Version", - "id": "#main/VDJ_Assemble_and_Annotate_Contigs_IG/VDJ_Version" - }, - { - "source": "#main/VDJ_Preprocess_Reads_IG/num_cores", - "id": "#main/VDJ_Assemble_and_Annotate_Contigs_IG/num_cores" - } - ] - }, - { - "out": [ - "#main/VDJ_Assemble_and_Annotate_Contigs_TCR/tcrCalls" - ], - "run": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl", - "id": "#main/VDJ_Assemble_and_Annotate_Contigs_TCR", - "in": [ - { - "source": "#main/VDJ_Preprocess_Reads_TCR/RSEC_Reads_Fastq", - "id": "#main/VDJ_Assemble_and_Annotate_Contigs_TCR/RSEC_Reads_Fastq" - }, - { - "source": "#main/VDJ_Settings/VDJ_Version", - "id": "#main/VDJ_Assemble_and_Annotate_Contigs_TCR/VDJ_Version" - }, - { - "source": "#main/VDJ_Preprocess_Reads_TCR/num_cores", - "id": "#main/VDJ_Assemble_and_Annotate_Contigs_TCR/num_cores" - } - ] - }, - { - "out": [ - "#main/VDJ_Compile_Results/vdjCellsDatatable", - "#main/VDJ_Compile_Results/vdjCellsDatatableUncorrected", - "#main/VDJ_Compile_Results/vdjDominantContigs", - "#main/VDJ_Compile_Results/vdjUnfilteredContigs", - "#main/VDJ_Compile_Results/vdjMetricsJson", - "#main/VDJ_Compile_Results/vdjMetricsCsv", - "#main/VDJ_Compile_Results/vdjReadsPerCellByChainTypeFigure" - ], - "run": "#VDJ_Compile_Results.cwl", - "id": "#main/VDJ_Compile_Results", - "in": [ - { - "source": "#main/AnnotateReads/Seq_Metrics", - "id": "#main/VDJ_Compile_Results/Seq_Metrics" - }, - { - "source": "#main/CellClassifier/cellTypePredictions", - "id": "#main/VDJ_Compile_Results/cellTypeMapping" - }, - { - "valueFrom": "$([])", - "id": "#main/VDJ_Compile_Results/chainsToIgnore" - }, - { - "source": "#main/Internal_Settings/VDJ_JGene_Evalue", - "id": "#main/VDJ_Compile_Results/evalueJgene" - }, - { - "source": "#main/Internal_Settings/VDJ_VGene_Evalue", - "id": "#main/VDJ_Compile_Results/evalueVgene" - }, - { - "source": "#main/VDJ_GatherIGCalls/gatheredCalls", - "id": "#main/VDJ_Compile_Results/igCalls" - }, - { - "source": "#main/Metadata_Settings/Run_Metadata", - "id": "#main/VDJ_Compile_Results/metadata" - }, - { - "source": "#main/GetDataTable/Cell_Order", - "id": "#main/VDJ_Compile_Results/putativeCells" - }, - { - "source": "#main/VDJ_GatherTCRCalls/gatheredCalls", - "id": "#main/VDJ_Compile_Results/tcrCalls" - }, - { - "source": "#main/VDJ_Settings/VDJ_Version", - "id": "#main/VDJ_Compile_Results/vdjVersion" - } - ] - }, - { - "out": [ - "#main/VDJ_GatherIGCalls/gatheredCalls" - ], - "run": "#VDJ_GatherCalls.cwl", - "id": "#main/VDJ_GatherIGCalls", - "in": [ - { - "source": "#main/VDJ_Assemble_and_Annotate_Contigs_IG/igCalls", - "id": "#main/VDJ_GatherIGCalls/theCalls" - } - ] - }, - { - "out": [ - "#main/VDJ_GatherTCRCalls/gatheredCalls" - ], - "run": "#VDJ_GatherCalls.cwl", - "id": "#main/VDJ_GatherTCRCalls", - "in": [ - { - "source": "#main/VDJ_Assemble_and_Annotate_Contigs_TCR/tcrCalls", - "id": "#main/VDJ_GatherTCRCalls/theCalls" - } - ] - }, - { - "out": [ - "#main/VDJ_Preprocess_Reads_IG/RSEC_Reads_Fastq", - "#main/VDJ_Preprocess_Reads_IG/num_splits", - "#main/VDJ_Preprocess_Reads_IG/num_cores" - ], - "run": "#VDJ_Preprocess_Reads.cwl", - "id": "#main/VDJ_Preprocess_Reads_IG", - "in": [ - { - "source": "#main/AnnotateReads/validIgReads", - "id": "#main/VDJ_Preprocess_Reads_IG/Valid_Reads_Fastq" - }, - { - "source": "#main/AnnotateReads/num_valid_ig_reads", - "id": "#main/VDJ_Preprocess_Reads_IG/num_valid_reads" - }, - { - "valueFrom": "BCR", - "id": "#main/VDJ_Preprocess_Reads_IG/vdj_type" - } - ] - }, - { - "out": [ - "#main/VDJ_Preprocess_Reads_TCR/RSEC_Reads_Fastq", - "#main/VDJ_Preprocess_Reads_TCR/num_splits", - "#main/VDJ_Preprocess_Reads_TCR/num_cores" - ], - "run": "#VDJ_Preprocess_Reads.cwl", - "id": "#main/VDJ_Preprocess_Reads_TCR", - "in": [ - { - "source": "#main/AnnotateReads/validTcrReads", - "id": "#main/VDJ_Preprocess_Reads_TCR/Valid_Reads_Fastq" - }, - { - "source": "#main/AnnotateReads/num_valid_tcr_reads", - "id": "#main/VDJ_Preprocess_Reads_TCR/num_valid_reads" - }, - { - "valueFrom": "TCR", - "id": "#main/VDJ_Preprocess_Reads_TCR/vdj_type" - } - ] - }, - { - "out": [ - "#main/VDJ_Settings/VDJ_Version" - ], - "in": [ - { - "source": "#main/VDJ_Version", - "id": "#main/VDJ_Settings/_VDJ_Version" - } - ], - "run": "#VDJ_Settings.cwl", - "id": "#main/VDJ_Settings", - "label": "VDJ Settings" - }, - { - "out": [ - "#main/Version/version" - ], - "run": "#Version.cwl", - "id": "#main/Version", - "in": [] - } - ], - "outputs": [ - { - "outputSource": "#main/GetDataTable/Bioproduct_Stats", - "type": [ - "null", - "File" - ], - "id": "#main/Bioproduct_Stats", - "label": "Bioproduct Statistics" - }, - { - "outputSource": "#main/GetDataTable/Cell_Label_Filter", - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#main/Cell_Label_Filter", - "label": "Cell Label Filter" - }, - { - "outputSource": "#main/Uncompress_Datatables/Uncompressed_Data_Tables", - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#main/Data_Tables", - "label": "Data Tables" - }, - { - "outputSource": "#main/Dense_to_Sparse_Datatable_Unfiltered/Data_Tables", - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#main/Data_Tables_Unfiltered", - "label": "Unfiltered Data Tables" - }, - { - "outputSource": "#main/Uncompress_Datatables/Uncompressed_Expression_Matrix", - "type": [ - "null", - "File" - ], - "id": "#main/Expression_Data", - "label": "Expression Matrix" - }, - { - "outputSource": "#main/GetDataTable/Expression_Data_Unfiltered", - "type": [ - "null", - "File" - ], - "id": "#main/Expression_Data_Unfiltered", - "label": "Unfiltered Expression Matrix" - }, - { - "outputSource": "#main/MergeBAM/Final_Bam", - "type": "File", - "id": "#main/Final_Bam", - "label": "Final BAM File" - }, - { - "outputSource": "#main/IndexBAM/Index", - "type": "File", - "id": "#main/Final_Bam_Index", - "label": "Final BAM Index" - }, - { - "outputSource": "#main/CellClassifier/cellTypePredictions", - "type": [ - "null", - "File" - ], - "id": "#main/ImmuneCellClassification(Experimental)", - "label": "Immune Cell Classification (Experimental)" - }, - { - "outputSource": "#main/BundleLogs/logs_dir", - "type": "Directory", - "id": "#main/Logs", - "label": "Pipeline Logs" - }, - { - "outputSource": "#main/Metrics/Metrics_Summary", - "type": "File", - "id": "#main/Metrics_Summary", - "label": "Metrics Summary" - }, - { - "outputSource": "#main/MergeMultiplex/Multiplex_out", - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#main/Multiplex" - }, - { - "outputSource": "#main/GetDataTable/Protein_Aggregates_Experimental", - "type": [ - "null", - "File" - ], - "id": "#main/Protein_Aggregates_Experimental", - "label": "Protein Aggregates (Experimental)" - }, - { - "outputSource": "#main/GetDataTable/Putative_Cells_Origin", - "type": [ - "null", - "File" - ], - "id": "#main/Putative_Cells_Origin", - "label": "Putative Cells Origin" - }, - { - "outputSource": "#main/VDJ_Compile_Results/vdjCellsDatatable", - "type": [ - "null", - "File" - ], - "id": "#main/vdjCellsDatatable", - "label": "vdjCellsDatatable" - }, - { - "outputSource": "#main/VDJ_Compile_Results/vdjCellsDatatableUncorrected", - "type": [ - "null", - "File" - ], - "id": "#main/vdjCellsDatatableUncorrected", - "label": "vdjCellsDatatableUncorrected" - }, - { - "outputSource": "#main/VDJ_Compile_Results/vdjDominantContigs", - "type": [ - "null", - "File" - ], - "id": "#main/vdjDominantContigs", - "label": "vdjDominantContigs" - }, - { - "outputSource": "#main/VDJ_Compile_Results/vdjMetricsCsv", - "type": [ - "null", - "File" - ], - "id": "#main/vdjMetricsCsv", - "label": "vdjMetricsCsv" - }, - { - "outputSource": "#main/VDJ_Compile_Results/vdjUnfilteredContigs", - "type": [ - "null", - "File" - ], - "id": "#main/vdjUnfilteredContigs", - "label": "vdjUnfilteredContigs" - } - ], - "id": "#main", - "class": "Workflow" - }, - { - "inputs": [ - { - "inputBinding": { - "position": 1 - }, - "type": { - "items": "File", - "type": "array" - }, - "id": "#MergeBAM.cwl/BamFiles" - }, - { - "type": [ - "null", - "string" - ], - "id": "#MergeBAM.cwl/Run_Name" - }, - { - "type": [ - "null", - "string" - ], - "id": "#MergeBAM.cwl/Sample_Tags_Version" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "stdout": "samtools_merge.log", - "outputs": [ - { - "outputBinding": { - "glob": "*_final.BAM" - }, - "type": "File", - "id": "#MergeBAM.cwl/Final_Bam" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#MergeBAM.cwl/log" - } - ], - "baseCommand": [ - "samtools", - "merge" - ], - "id": "#MergeBAM.cwl", - "arguments": [ - { - "prefix": "-@", - "valueFrom": "$(runtime.cores)" - }, - { - "position": 0, - "valueFrom": "${\n if (inputs.Sample_Tags_Version) {\n return \"Combined_\" + inputs.Run_Name + \"_final.BAM\"\n } else {\n return inputs.Run_Name + \"_final.BAM\"\n }\n}" - } - ], - "class": "CommandLineTool", - "hints": [ - { - "coresMin": 4, - "class": "ResourceRequirement" - } - ] - }, - { - "inputs": [ - { - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#Metadata.cwl/AbSeq_Reference" - }, - { - "type": "string", - "id": "#Metadata.cwl/Assay" - }, - { - "type": [ - "null", - "boolean" - ], - "id": "#Metadata.cwl/Basic_Algo_Only" - }, - { - "type": { - "items": { - "fields": [ - { - "type": "string", - "name": "#Metadata.cwl/Bead_Version/Library" - }, - { - "type": "string", - "name": "#Metadata.cwl/Bead_Version/bead_version" - } - ], - "type": "record" - }, - "type": "array" - }, - "id": "#Metadata.cwl/Bead_Version" - }, - { - "type": [ - "null", - "int" - ], - "id": "#Metadata.cwl/Exact_Cell_Count" - }, - { - "type": [ - "null", - "int" - ], - "id": "#Metadata.cwl/Label_Version" - }, - { - "type": [ - "null", - "string" - ], - "id": "#Metadata.cwl/Libraries" - }, - { - "type": "string", - "id": "#Metadata.cwl/Pipeline_Name" - }, - { - "type": "string", - "id": "#Metadata.cwl/Pipeline_Version" - }, - { - "type": [ - "null", - "int" - ], - "id": "#Metadata.cwl/Putative_Cell_Call" - }, - { - "type": [ - "null", - "boolean" - ], - "id": "#Metadata.cwl/Read_Filter_Off" - }, - { - "type": [ - "null", - { - "items": "string", - "type": "array" - } - ], - "id": "#Metadata.cwl/Reads" - }, - { - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#Metadata.cwl/Reference" - }, - { - "type": [ - "null", - "string" - ], - "id": "#Metadata.cwl/Run_Name" - }, - { - "type": [ - "null", - { - "items": "string", - "type": "array" - } - ], - "id": "#Metadata.cwl/Sample_Tag_Names" - }, - { - "type": [ - "null", - "string" - ], - "id": "#Metadata.cwl/Sample_Tags_Version" - }, - { - "type": [ - "null", - "string" - ], - "id": "#Metadata.cwl/Start_Time" - }, - { - "type": [ - "null", - "float" - ], - "id": "#Metadata.cwl/Subsample" - }, - { - "type": [ - "null", - "int" - ], - "id": "#Metadata.cwl/Subsample_Seed" - }, - { - "type": [ - "null", - "float" - ], - "id": "#Metadata.cwl/Subsample_Tags" - }, - { - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#Metadata.cwl/Supplemental_Reference" - }, - { - "type": [ - "null", - "string" - ], - "id": "#Metadata.cwl/VDJ_Version" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "stdout": "run_metadata.json", - "outputs": [ - { - "outputBinding": { - "outputEval": "${ \n var name = inputs.Run_Name;\n if (name == null){\n var libraries = inputs.Libraries;\n name = libraries.split(',')[0];\n } \n return(name)\n} \n" - }, - "type": [ - "null", - "string" - ], - "id": "#Metadata.cwl/Run_Base_Name" - }, - { - "type": "stdout", - "id": "#Metadata.cwl/Run_Metadata" - } - ], - "baseCommand": "echo", - "id": "#Metadata.cwl", - "arguments": [ - { - "prefix": "" - }, - { - "shellQuote": true, - "valueFrom": "${\n var metadata = inputs;\n var all_bv = {};\n var customer_bv = \"Original (V1)\";\n for (var i = 0; i < inputs.Bead_Version.length; i++) {\n var BeadVer = inputs.Bead_Version[i];\n var Library = BeadVer[\"Library\"];\n var bead_version = BeadVer[\"bead_version\"];\n all_bv[Library] = bead_version \n var short_bv = bead_version.substring(0, 2);\n if (short_bv == \"V2\"){\n var customer_bv = \"Enhanced (V2)\";\n }\n }\n metadata[\"Bead_Version\"] = all_bv;\n\n var pipeline_name = inputs.Pipeline_Name;\n var assay = inputs.Assay;\n var version = inputs.Pipeline_Version;\n var time = inputs.Start_Time;\n var libraries = inputs.Libraries.split(\",\");\n var i = 0;\n var reference_list = []\n if(inputs.Reference != null){\n reference_list = reference_list.concat(inputs.Reference);\n }\n if(inputs.AbSeq_Reference != null){\n reference_list = reference_list.concat(inputs.AbSeq_Reference);\n }\n\n var supplemental = \"\"\n if(inputs.Supplemental_Reference != null){\n supplemental = \"; Supplemental_Reference - \" + inputs.Supplemental_Reference[0][\"basename\"];\n }\n var references = [];\n for (i = 0; i< reference_list.length; i++) {\n if(reference_list[i] != null){\n references.push(reference_list[i][\"basename\"]);\n }\n }\n var parameters = [];\n if(inputs.Sample_Tags_Version != null){\n var tags = \"Sample Tag Version: \" + inputs.Sample_Tags_Version;\n } else{ \n var tags = \"Sample Tag Version: None\";\n }\n parameters.push(tags);\n\n if(inputs.Sample_Tag_Names != null){\n var tag_names = inputs.Sample_Tag_Names.join(\" ; \")\n var tag_list = \"Sample Tag Names: \" + tag_names;\n } else{\n var tag_list = \"Sample Tag Names: None\";\n }\n parameters.push(tag_list);\n \n if(inputs.VDJ_Version != null){\n var vdj = \"VDJ Version: \" + inputs.VDJ_Version;\n } else{ \n var vdj = \"VDJ Version: None\";\n }\n parameters.push(vdj)\n\n if(inputs.Subsample != null){\n var subsample = \"Subsample: \" + inputs.Subsample;\n } else{ \n var subsample = \"Subsample: None\";\n } \n parameters.push(subsample);\n\n if(inputs.Putative_Cell_Call == 1){\n var call = \"Putative Cell Calling Type: AbSeq\";\n } else{ \n var call = \"Putative Cell Calling Type: mRNA\";\n } \n parameters.push(call)\n\n if(inputs.Basic_Algo_Only){\n var basic = \"Refined Putative Cell Calling: Off\";\n } else{ \n var basic = \"Refined Putative Cell Calling: On\";\n } \n parameters.push(basic)\n\n if(inputs.Exact_Cell_Count != null){\n var cells = \"Exact Cell Count: \" + inputs.Exact_Cell_Count;\n } else{ \n var cells = \"Exact Cell Count: None\";\n } \n parameters.push(cells)\n\n var name = inputs.Run_Name;\n if (name == null){\n var libraries = inputs.Libraries.split(',');\n name = libraries[0];\n } \n\n var header = [\"####################\"];\n header.push(\"## \" + pipeline_name + \" Version \" + version);\n header.push(\"## Analysis Date - \" + time);\n header.push(\"## Libraries - \" + libraries.join(' | ') + \" - Bead version detected: \" + customer_bv);\n header.push(\"## References - \" + references.join(' | ') + supplemental);\n header.push(\"## Parameters - \" + parameters.join(' | '));\n header.push(\"####################\");\n metadata[\"Output_Header\"] = header;\n metadata[\"Run_Base_Name\"] = name;\n var metadata_json = JSON.stringify(metadata);\n return metadata_json;\n}\n" - } - ], - "class": "CommandLineTool" - }, - { - "inputs": [ - { - "inputBinding": { - "prefix": "--annot-files" - }, - "type": "File", - "id": "#Metrics.cwl/Annot_Files" - }, - { - "inputBinding": { - "prefix": "--read1-error-rate" - }, - "type": "File", - "id": "#Metrics.cwl/Read1_error_rate" - }, - { - "inputBinding": { - "prefix": "--run-metadata" - }, - "type": "File", - "id": "#Metrics.cwl/Run_Metadata" - }, - { - "inputBinding": { - "prefix": "--sample-tag-archives", - "itemSeparator": "," - }, - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#Metrics.cwl/Sample_Tag_Archives" - }, - { - "inputBinding": { - "prefix": "--seq-run" - }, - "type": [ - "null", - "string" - ], - "id": "#Metrics.cwl/Seq_Run" - }, - { - "inputBinding": { - "prefix": "--umi-adjusted-stats" - }, - "type": [ - "null", - "File" - ], - "id": "#Metrics.cwl/UMI_Adjusted_Stats" - }, - { - "inputBinding": { - "prefix": "--vdj-metrics-fp" - }, - "type": [ - "null", - "File" - ], - "id": "#Metrics.cwl/vdjMetricsJson" - } - ], - "requirements": [ - ], - "outputs": [ - { - "outputBinding": { - "glob": "internal-metrics-archive.tar.gz" - }, - "type": "File", - "id": "#Metrics.cwl/Metrics_Archive" - }, - { - "outputBinding": { - "glob": "*_Metrics_Summary.csv" - }, - "type": "File", - "id": "#Metrics.cwl/Metrics_Summary" - }, - { - "outputBinding": { - "glob": "*.zip" - }, - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#Metrics.cwl/Sample_Tag_Out" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#Metrics.cwl/output" - } - ], - "baseCommand": [ - "mist_metrics.py" - ], - "class": "CommandLineTool", - "id": "#Metrics.cwl" - }, - { - "inputs": [ - { - "default": "Targeted", - "type": "string", - "id": "#MultiplexingSettings.cwl/Assay" - }, - { - "type": [ - "null", - "Any" - ], - "id": "#MultiplexingSettings.cwl/_Sample_Tags_Version" - }, - { - "type": [ - "null", - { - "items": "string", - "type": "array" - } - ], - "id": "#MultiplexingSettings.cwl/_Tag_Sample_Names" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "outputs": [ - { - "type": [ - "null", - "string" - ], - "id": "#MultiplexingSettings.cwl/Sample_Tags_Version" - }, - { - "type": [ - "null", - { - "items": "string", - "type": "array" - } - ], - "id": "#MultiplexingSettings.cwl/Tag_Sample_Names" - } - ], - "class": "ExpressionTool", - "expression": "${\n var enumifiedSampleTagsVersion = null;\n if (inputs._Sample_Tags_Version) {\n var _Sample_Tags_Version = inputs._Sample_Tags_Version.toLowerCase();\n if (_Sample_Tags_Version.indexOf('human') >= 0 || _Sample_Tags_Version === 'hs')\n {\n enumifiedSampleTagsVersion = 'hs';\n }\n else if (_Sample_Tags_Version.indexOf('mouse') >= 0 || _Sample_Tags_Version === 'mm')\n {\n enumifiedSampleTagsVersion = 'mm';\n }\n else if (_Sample_Tags_Version === 'no multiplexing')\n {\n enumifiedSampleTagsVersion = null;\n }\n else\n {\n throw new Error(\"Cannot parse Sample Tag Version: \" + inputs._Sample_Tags_Version);\n }\n }\n var listTagNames = inputs._Tag_Sample_Names\n var newTagNames = []\n for (var num in listTagNames) {\n var tag = listTagNames[num].replace(/[^A-Za-z0-9-+]/g,\"_\");\n newTagNames.push(tag); \n } \n return ({\n Tag_Sample_Names: newTagNames,\n Sample_Tags_Version: enumifiedSampleTagsVersion\n });\n}", - "id": "#MultiplexingSettings.cwl" - }, - { - "inputs": [ - { - "type": [ - "null", - "string" - ], - "id": "#NameSettings.cwl/_Run_Name" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "outputs": [ - { - "type": [ - "null", - "string" - ], - "id": "#NameSettings.cwl/Run_Name" - } - ], - "class": "ExpressionTool", - "expression": "${ var name = inputs._Run_Name;\n if (name != null) {\n name = name.replace(/[\\W_]+/g,\"-\");}\n return({'Run_Name' : name });\n } ", - "id": "#NameSettings.cwl" - }, - { - "inputs": [ - { - "type": { - "items": { - "fields": [ - { - "type": "string", - "name": "#PairReadFiles.cwl/FastqReadPairs/filename" - }, - { - "type": "string", - "name": "#PairReadFiles.cwl/FastqReadPairs/readFlag" - }, - { - "type": "string", - "name": "#PairReadFiles.cwl/FastqReadPairs/readPairId" - }, - { - "type": "string", - "name": "#PairReadFiles.cwl/FastqReadPairs/library" - } - ], - "type": "record" - }, - "type": "array" - }, - "id": "#PairReadFiles.cwl/FastqReadPairs" - }, - { - "type": { - "items": "File", - "type": "array" - }, - "id": "#PairReadFiles.cwl/Reads" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "doc": "PairReadFiles takes an array of split files and pairs them, such that an R1 file is transferred to the QualityFilter with its corresponding R2 file.\nThe original FASTQ files are paired in CheckFastqs and then split and sub-sampled in SplitAndSubsample. The pairing information is taken from CheckFastqs.\n", - "id": "#PairReadFiles.cwl", - "outputs": [ - { - "type": { - "items": { - "fields": [ - { - "type": "File", - "name": "#PairReadFiles.cwl/ReadPairs/R1" - }, - { - "type": "File", - "name": "#PairReadFiles.cwl/ReadPairs/R2" - }, - { - "type": "int", - "name": "#PairReadFiles.cwl/ReadPairs/readPairId" - }, - { - "type": "string", - "name": "#PairReadFiles.cwl/ReadPairs/library" - } - ], - "type": "record" - }, - "type": "array" - }, - "id": "#PairReadFiles.cwl/ReadPairs" - } - ], - "expression": "${\n // use the CheckFastqs read pairing information to create a dictionary\n // using the original fastq file name without the extension as the key\n var fastqReadPairs = {}\n for (var i = 0; i < inputs.FastqReadPairs.length; i++) {\n var fileDict = inputs.FastqReadPairs[i];\n var filename = fileDict[\"filename\"];\n\n if (!fastqReadPairs[filename]) {\n fastqReadPairs[filename] = {\n readPairId: null,\n readFlag: null,\n library: null,\n };\n }\n else {\n throw new Error(\"Found non-unique fastq filename '\" + filename + \"' in the FastqReadPairs dictionary from CheckFastqs.\")\n }\n\n fastqReadPairs[filename].readPairId = fileDict[\"readPairId\"]\n fastqReadPairs[filename].readFlag = fileDict[\"readFlag\"]\n fastqReadPairs[filename].library = fileDict[\"library\"]\n }\n\n // now loop through the input read files which could\n // be the original fastq files if no sub-sampling has\n // been done, or the sub-sampled fastq files\n var readPairs = {}\n for (var i = 0; i < inputs.Reads.length; i++) {\n\n // Set the fileDict to null\n var fileDict = null;\n\n // Get the fastq file\n var fastqFile = inputs.Reads[i];\n\n // Remove the .gz from the end of the filename\n var fileNoGzExt = fastqFile.basename.replace(/.gz$/i, \"\");\n\n // Remove the next file extension if it exists\n var fileArrayWithExt = fileNoGzExt.split(\".\");\n // If an extension exists, splice the array\n var fileArrayNoExt = null;\n if (fileArrayWithExt.length > 1) {\n fileArrayNoExt = fileArrayWithExt.splice(0, fileArrayWithExt.length-1);\n } else {\n // No file extension exists, so use the whole array\n fileArrayNoExt = fileArrayWithExt\n }\n var fileRootname = fileArrayNoExt.join(\".\")\n\n // if the original files were sub-sampled\n // get the original file and the chunk id\n if (fileRootname.indexOf(\"-\") != -1) {\n // Split on the dash to get the name of\n // the original file and the chunk id\n // The original file name can also have dashes\n var chunkFileArray = fileRootname.split(\"-\");\n\n // Get the original file rootname and chunk id\n // The rootname without the chunk id and file\n // extension is the key from CheckFastqs\n // The chunk id is used later to create a new unique\n // read pair id for all sub-sampled fastq files\n\n // The rootname array should contain all elements up to the last dash\n var fileRootnameArray = chunkFileArray.splice(0, chunkFileArray.length-1);\n var fileRootnameNoChunkId = fileRootnameArray.join(\"-\");\n\n // The chunk id is the last element in the array\n // representing the content after the last dash\n var orgChunkId = chunkFileArray.pop();\n\n // if there is no chunk id, use an arbitrary number\n // the chunk id is unique when the files are sub-sampled\n // and does not need to be unique when the files are not sub-sampled\n var chunkId = 9999;\n if (orgChunkId) {\n // cast to an integer\n chunkId = parseInt(orgChunkId);\n }\n // double check that we have a chunk id\n if (chunkId === undefined || chunkId === null) {\n throw new Error(\"The fastq file sub-sampling id could not be determined!\");\n }\n\n // The file rootname without the chunk id and file extension\n // should match the original file rootname from CheckFastqs\n // The original file rootname from CheckFastqs is the key for\n // the dictionary containing the original unique pair id\n var fileDict = fastqReadPairs[fileRootnameNoChunkId];\n }\n\n // If the files are not sub-sampled or the fileDict\n // is not found, then try to use the original\n // file rootname without the file extension as the key\n if (fileDict === undefined || fileDict === null) {\n\n // if the original files were not sub-sampled,\n // use the original file rootname and an arbitrary chunk id\n var chunkId = 9999;\n\n var fileDict = fastqReadPairs[fileRootname];\n\n // If the fileDict for this file rootname is not found,\n // then the filenames are in an unexpected format and\n // the code to parse the filenames in CheckFastqs,\n // SplitAndSubsample and here need to match\n if (fileDict === undefined || fileDict === null) {\n // Create an error\n if (fileDict === undefined || fileDict === null) {\n throw new Error(\"Cannot find the fastq read pair information for '\" + fastqFile.basename + \"'.\");\n }\n }\n }\n\n // Get the pairing information from CheckFastqs\n var readPairId = fileDict[\"readPairId\"];\n var library = fileDict[\"library\"];\n var flag = fileDict[\"readFlag\"];\n\n // Add the chunkId to create a new unique read pair id\n // for each file (sub-sampled or not)\n var chunkReadPairId = readPairId + \"_\" + chunkId;\n\n // Create a dictionary for each pair of files\n if (!readPairs[chunkReadPairId]) {\n readPairs[chunkReadPairId] = {\n R1: null,\n R2: null,\n library: library,\n readPairId: null,\n };\n }\n // add in the R1 and R2 files, depending on the flag\n if (flag === \"R1\") {\n readPairs[chunkReadPairId].R1 = fastqFile\n } else if (flag === \"R2\") {\n readPairs[chunkReadPairId].R2 = fastqFile\n }\n }\n // we are not interested in the read pair ids in readPairs\n // flatten into an array of objects\n var readPairsList = [];\n var i = 1;\n for (var key in readPairs) {\n if (readPairs.hasOwnProperty(key)) {\n var readPair = readPairs[key];\n readPair.readPairId = i;\n readPairsList.push(readPair);\n i++;\n }\n }\n // pass this array to the record array named \"ReadPairs\" on the CWL layer\n return {ReadPairs: readPairsList}\n}", - "class": "ExpressionTool" - }, - { - "inputs": [ - { - "type": [ - "null", - "boolean" - ], - "id": "#PutativeCellSettings.cwl/_Basic_Algo_Only" - }, - { - "type": [ - "null", - "int" - ], - "id": "#PutativeCellSettings.cwl/_Exact_Cell_Count" - }, - { - "type": [ - "null", - "Any" - ], - "id": "#PutativeCellSettings.cwl/_Putative_Cell_Call" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "outputs": [ - { - "type": [ - "null", - "boolean" - ], - "id": "#PutativeCellSettings.cwl/Basic_Algo_Only" - }, - { - "type": [ - "null", - "int" - ], - "id": "#PutativeCellSettings.cwl/Exact_Cell_Count" - }, - { - "type": [ - "null", - "int" - ], - "id": "#PutativeCellSettings.cwl/Putative_Cell_Call" - } - ], - "class": "ExpressionTool", - "expression": "${\n // the basic algorithm flag defaults to false\n var basicAlgOnlyFlag = false;\n // the user can set the basic algorithm flag\n if (inputs._Basic_Algo_Only) {\n basicAlgOnlyFlag = inputs._Basic_Algo_Only;\n }\n // convert the Putative_Cell_Call from a string to an integer\n var putativeCellCallInt = 0;\n if (inputs._Putative_Cell_Call) {\n if (inputs._Putative_Cell_Call === \"mRNA\") {\n putativeCellCallInt = 0;\n }\n else if (inputs._Putative_Cell_Call == \"AbSeq_Experimental\" || inputs._Putative_Cell_Call == \"AbSeq (Experimental)\") {\n putativeCellCallInt = 1;\n // for protein-only cell calling, we only have the basic algorithm\n basicAlgOnlyFlag = true;\n }\n else if (inputs._Putative_Cell_Call == \"mRNA_and_AbSeq\") {\n putativeCellCallInt = 2;\n }\n }\n // check the exact cell count\n if (inputs._Exact_Cell_Count) {\n if (inputs._Exact_Cell_Count < 1) {\n throw(\"Illogical value for exact cell count: \" + inputs._Exact_Cell_Count);\n }\n }\n return ({\n Putative_Cell_Call: putativeCellCallInt,\n Exact_Cell_Count: inputs._Exact_Cell_Count,\n Basic_Algo_Only: basicAlgOnlyFlag,\n });\n}", - "id": "#PutativeCellSettings.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "prefix": "--run-metadata" - }, - "type": "File", - "id": "#QualityFilter.cwl/Run_Metadata" - }, - { - "type": { - "fields": [ - { - "inputBinding": { - "prefix": "--r1" - }, - "type": "File", - "name": "#QualityFilter.cwl/Split_Read_Pairs/R1" - }, - { - "inputBinding": { - "prefix": "--r2" - }, - "type": "File", - "name": "#QualityFilter.cwl/Split_Read_Pairs/R2" - }, - { - "inputBinding": { - "prefix": "--read-pair-id" - }, - "type": "int", - "name": "#QualityFilter.cwl/Split_Read_Pairs/readPairId" - }, - { - "inputBinding": { - "prefix": "--library" - }, - "type": "string", - "name": "#QualityFilter.cwl/Split_Read_Pairs/library" - } - ], - "type": "record" - }, - "id": "#QualityFilter.cwl/Split_Read_Pairs" - } - ], - "requirements": [ - ], - "outputs": [ - { - "outputBinding": { - "glob": "*read_quality.csv.gz" - }, - "type": [ - "null", - "File" - ], - "id": "#QualityFilter.cwl/Filter_Metrics" - }, - { - "outputBinding": { - "glob": "*_R1*.fastq.gz" - }, - "type": "File", - "id": "#QualityFilter.cwl/R1" - }, - { - "outputBinding": { - "glob": "*_R2*.fastq.gz" - }, - "type": "File", - "id": "#QualityFilter.cwl/R2" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#QualityFilter.cwl/output" - } - ], - "baseCommand": [ - "mist_quality_filter.py" - ], - "class": "CommandLineTool", - "id": "#QualityFilter.cwl" - }, - { - "inputs": [ - { - "type": "File", - "id": "#QualityFilterOuter.cwl/Run_Metadata" - }, - { - "type": { - "items": { - "fields": [ - { - "type": "File", - "name": "#QualityFilterOuter.cwl/Split_Read_Pairs/R1" - }, - { - "type": "File", - "name": "#QualityFilterOuter.cwl/Split_Read_Pairs/R2" - }, - { - "type": "int", - "name": "#QualityFilterOuter.cwl/Split_Read_Pairs/readPairId" - }, - { - "type": "string", - "name": "#QualityFilterOuter.cwl/Split_Read_Pairs/library" - } - ], - "type": "record" - }, - "type": "array" - }, - "id": "#QualityFilterOuter.cwl/Split_Read_Pairs" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - }, - { - "class": "ScatterFeatureRequirement" - }, - { - "class": "StepInputExpressionRequirement" - }, - { - "class": "SubworkflowFeatureRequirement" - } - ], - "outputs": [ - { - "outputSource": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/Filter_Metrics", - "type": [ - { - "items": [ - "null", - "File" - ], - "type": "array" - } - ], - "id": "#QualityFilterOuter.cwl/Filter_Metrics" - }, - { - "outputSource": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/R1", - "type": [ - { - "items": [ - "null", - "File" - ], - "type": "array" - } - ], - "id": "#QualityFilterOuter.cwl/R1" - }, - { - "outputSource": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/R2", - "type": [ - { - "items": [ - "null", - "File" - ], - "type": "array" - } - ], - "id": "#QualityFilterOuter.cwl/R2" - }, - { - "outputSource": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/output", - "type": [ - { - "items": [ - "null", - "File" - ], - "type": "array" - } - ], - "id": "#QualityFilterOuter.cwl/output" - } - ], - "class": "Workflow", - "steps": [ - { - "scatter": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/Split_Read_Pairs", - "out": [ - "#QualityFilterOuter.cwl/Quality_Filter_Scatter/R1", - "#QualityFilterOuter.cwl/Quality_Filter_Scatter/R2", - "#QualityFilterOuter.cwl/Quality_Filter_Scatter/Filter_Metrics", - "#QualityFilterOuter.cwl/Quality_Filter_Scatter/output" - ], - "run": "#QualityFilter.cwl", - "id": "#QualityFilterOuter.cwl/Quality_Filter_Scatter", - "in": [ - { - "source": "#QualityFilterOuter.cwl/Run_Metadata", - "id": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/Run_Metadata" - }, - { - "source": "#QualityFilterOuter.cwl/Split_Read_Pairs", - "id": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/Split_Read_Pairs" - } - ] - } - ], - "id": "#QualityFilterOuter.cwl" - }, - { - "inputs": [ - { - "type": { - "items": "File", - "type": "array" - }, - "id": "#SplitAndSubsample.cwl/Fastqs" - }, - { - "type": { - "items": "string", - "type": "array" - }, - "id": "#SplitAndSubsample.cwl/FilesToSkipSplitAndSubsample" - }, - { - "type": [ - "null", - "long" - ], - "id": "#SplitAndSubsample.cwl/NumRecordsPerSplit" - }, - { - "type": "float", - "id": "#SplitAndSubsample.cwl/SubsampleRatio" - }, - { - "type": "int", - "id": "#SplitAndSubsample.cwl/SubsampleSeed" - } - ], - "requirements": [ - { - "class": "ScatterFeatureRequirement" - }, - { - "class": "InlineJavascriptRequirement" - } - ], - "doc": "SplitAndSubsample splits, subsamples and formats read files to be deposited in QualityFilter.\n", - "id": "#SplitAndSubsample.cwl", - "steps": [ - { - "doc": "After scattering \"SplitAndSubsample\" on a File array, the output of each node is also an array. Thus, we are left with a nestled list. This JS expression flattens this list to deal with the split reads in PairReadFiles.cwl", - "out": [ - "#SplitAndSubsample.cwl/FlattenOutput/SplitFastqList" - ], - "run": { - "cwlVersion": "v1.0", - "inputs": [ - { - "type": { - "items": { - "items": "File", - "type": "array" - }, - "type": "array" - }, - "id": "#SplitAndSubsample.cwl/FlattenOutput/flatten_output/nestledSplitFastqList" - } - ], - "outputs": [ - { - "type": { - "items": "File", - "type": "array" - }, - "id": "#SplitAndSubsample.cwl/FlattenOutput/flatten_output/SplitFastqList" - } - ], - "class": "ExpressionTool", - "expression": "${\n return {SplitFastqList: [].concat.apply([], inputs.nestledSplitFastqList)}\n}\n", - "id": "#SplitAndSubsample.cwl/FlattenOutput/flatten_output" - }, - "id": "#SplitAndSubsample.cwl/FlattenOutput", - "in": [ - { - "source": "#SplitAndSubsample.cwl/SplitAndSubsample/SplitAndSubsampledFastqs", - "id": "#SplitAndSubsample.cwl/FlattenOutput/nestledSplitFastqList" - } - ] - }, - { - "run": { - "cwlVersion": "v1.0", - "inputs": [ - { - "inputBinding": { - "prefix": "--fastq-file-path" - }, - "type": "File", - "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/Fastq" - }, - { - "inputBinding": { - "prefix": "--files-to-skip-split-and-subsample", - "itemSeparator": "," - }, - "type": { - "items": "string", - "type": "array" - }, - "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/FilesToSkipSplitAndSubsample" - }, - { - "inputBinding": { - "prefix": "--num-records" - }, - "type": [ - "null", - "long" - ], - "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/NumRecordsPerSplit" - }, - { - "inputBinding": { - "prefix": "--subsample-ratio" - }, - "type": "float", - "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/SubsampleRatio" - }, - { - "inputBinding": { - "prefix": "--subsample-seed" - }, - "type": "int", - "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/SubsampleSeed" - } - ], - "requirements": [ - ], - "outputs": [ - { - "outputBinding": { - "glob": "*.fastq.gz", - "outputEval": "${ if (self.length === 0) { return [inputs.Fastq]; } else { return self; } }" - }, - "type": { - "items": "File", - "type": "array" - }, - "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/SplitAndSubsampledFastqs" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/log" - } - ], - "baseCommand": [ - "mist_split_fastq.py" - ], - "class": "CommandLineTool", - "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq" - }, - "doc": "Allocate one docker/python process per file to do the actual file splitting.", - "scatter": [ - "#SplitAndSubsample.cwl/SplitAndSubsample/Fastq" - ], - "in": [ - { - "source": "#SplitAndSubsample.cwl/Fastqs", - "id": "#SplitAndSubsample.cwl/SplitAndSubsample/Fastq" - }, - { - "source": "#SplitAndSubsample.cwl/FilesToSkipSplitAndSubsample", - "id": "#SplitAndSubsample.cwl/SplitAndSubsample/FilesToSkipSplitAndSubsample" - }, - { - "source": "#SplitAndSubsample.cwl/NumRecordsPerSplit", - "id": "#SplitAndSubsample.cwl/SplitAndSubsample/NumRecordsPerSplit" - }, - { - "source": "#SplitAndSubsample.cwl/SubsampleRatio", - "id": "#SplitAndSubsample.cwl/SplitAndSubsample/SubsampleRatio" - }, - { - "source": "#SplitAndSubsample.cwl/SubsampleSeed", - "id": "#SplitAndSubsample.cwl/SplitAndSubsample/SubsampleSeed" - } - ], - "id": "#SplitAndSubsample.cwl/SplitAndSubsample", - "out": [ - "#SplitAndSubsample.cwl/SplitAndSubsample/SplitAndSubsampledFastqs", - "#SplitAndSubsample.cwl/SplitAndSubsample/log" - ] - } - ], - "outputs": [ - { - "outputSource": "#SplitAndSubsample.cwl/FlattenOutput/SplitFastqList", - "type": { - "items": "File", - "type": "array" - }, - "id": "#SplitAndSubsample.cwl/SplitAndSubsampledFastqs" - }, - { - "outputSource": "#SplitAndSubsample.cwl/SplitAndSubsample/log", - "type": { - "items": "File", - "type": "array" - }, - "id": "#SplitAndSubsample.cwl/log" - } - ], - "class": "Workflow" - }, - { - "inputs": [ - { - "type": [ - "null", - "float" - ], - "id": "#SubsampleSettings.cwl/_Subsample_Reads" - }, - { - "type": [ - "null", - "int" - ], - "id": "#SubsampleSettings.cwl/_Subsample_Seed" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "outputs": [ - { - "type": [ - "null", - "float" - ], - "id": "#SubsampleSettings.cwl/Subsample_Reads" - }, - { - "type": [ - "null", - "int" - ], - "id": "#SubsampleSettings.cwl/Subsample_Seed" - } - ], - "class": "ExpressionTool", - "expression": "${\n var subsamplingOutputs = {\n Subsample_Reads: inputs._Subsample_Reads,\n Subsample_Seed: inputs._Subsample_Seed\n }\n return subsamplingOutputs;\n}", - "id": "#SubsampleSettings.cwl" - }, - { - "inputs": [ - { - "type": { - "items": "File", - "type": "array" - }, - "id": "#UncompressDatatables.cwl/Compressed_Data_Table" - }, - { - "type": "File", - "id": "#UncompressDatatables.cwl/Compressed_Expression_Matrix" - } - ], - "requirements": [ - { - "class": "ScatterFeatureRequirement" - } - ], - "outputs": [ - { - "outputSource": "#UncompressDatatables.cwl/Uncompress_Datatable/Uncompressed_File", - "type": { - "items": "File", - "type": "array" - }, - "id": "#UncompressDatatables.cwl/Uncompressed_Data_Tables" - }, - { - "outputSource": "#UncompressDatatables.cwl/Uncompress_Expression_Matrix/Uncompressed_File", - "type": "File", - "id": "#UncompressDatatables.cwl/Uncompressed_Expression_Matrix" - } - ], - "class": "Workflow", - "steps": [ - { - "id": "#UncompressDatatables.cwl/Uncompress_Datatable", - "out": [ - "#UncompressDatatables.cwl/Uncompress_Datatable/Uncompressed_File" - ], - "run": { - "cwlVersion": "v1.0", - "inputs": [ - { - "inputBinding": { - "position": 1 - }, - "type": "File", - "id": "#UncompressDatatables.cwl/Uncompress_Datatable/Uncompress_Datatable_Inner/Compressed_File" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "stdout": "$(inputs.Compressed_File.nameroot)", - "outputs": [ - { - "outputBinding": { - "glob": "$(inputs.Compressed_File.nameroot)" - }, - "type": "File", - "id": "#UncompressDatatables.cwl/Uncompress_Datatable/Uncompress_Datatable_Inner/Uncompressed_File" - } - ], - "baseCommand": [ - "gunzip" - ], - "id": "#UncompressDatatables.cwl/Uncompress_Datatable/Uncompress_Datatable_Inner", - "arguments": [ - { - "position": 0, - "valueFrom": "-c" - } - ], - "class": "CommandLineTool", - "hints": [ - ] - }, - "scatter": [ - "#UncompressDatatables.cwl/Uncompress_Datatable/Compressed_File" - ], - "in": [ - { - "source": "#UncompressDatatables.cwl/Compressed_Data_Table", - "id": "#UncompressDatatables.cwl/Uncompress_Datatable/Compressed_File" - } - ] - }, - { - "out": [ - "#UncompressDatatables.cwl/Uncompress_Expression_Matrix/Uncompressed_File" - ], - "run": { - "cwlVersion": "v1.0", - "inputs": [ - { - "inputBinding": { - "position": 1 - }, - "type": "File", - "id": "#UncompressDatatables.cwl/Uncompress_Expression_Matrix/Uncompress_Expression_Matrix_Inner/Compressed_File" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "stdout": "$(inputs.Compressed_File.nameroot)", - "outputs": [ - { - "outputBinding": { - "glob": "$(inputs.Compressed_File.nameroot)" - }, - "type": "File", - "id": "#UncompressDatatables.cwl/Uncompress_Expression_Matrix/Uncompress_Expression_Matrix_Inner/Uncompressed_File" - } - ], - "baseCommand": [ - "gunzip" - ], - "id": "#UncompressDatatables.cwl/Uncompress_Expression_Matrix/Uncompress_Expression_Matrix_Inner", - "arguments": [ - { - "position": 0, - "valueFrom": "-c" - } - ], - "class": "CommandLineTool", - "hints": [ - ] - }, - "id": "#UncompressDatatables.cwl/Uncompress_Expression_Matrix", - "in": [ - { - "source": "#UncompressDatatables.cwl/Compressed_Expression_Matrix", - "id": "#UncompressDatatables.cwl/Uncompress_Expression_Matrix/Compressed_File" - } - ] - } - ], - "id": "#UncompressDatatables.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "position": 1 - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs.cwl/RSEC_Reads_Fastq" - }, - { - "inputBinding": { - "position": 2 - }, - "type": "string", - "id": "#VDJ_Assemble_and_Annotate_Contigs.cwl/Read_Limit" - }, - { - "inputBinding": { - "position": 3 - }, - "type": [ - "null", - "string" - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs.cwl/VDJ_Version" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - }, - { - "class": "ShellCommandRequirement" - } - ], - "outputs": [ - { - "outputBinding": { - "glob": "*_pruned.csv.gz" - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs.cwl/PyirCall" - } - ], - "baseCommand": [ - "AssembleAndAnnotate.sh" - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs.cwl", - "class": "CommandLineTool", - "hints": [ - { - "coresMin": 1, - "ramMin": 3200, - "class": "ResourceRequirement" - } - ] - }, - { - "inputs": [ - { - "type": [ - { - "items": [ - "null", - "File" - ], - "type": "array" - } - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/RSEC_Reads_Fastq" - }, - { - "type": [ - "null", - "string" - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Version" - }, - { - "type": [ - "null", - "int" - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/num_cores" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - }, - { - "class": "ScatterFeatureRequirement" - }, - { - "class": "StepInputExpressionRequirement" - }, - { - "class": "SubworkflowFeatureRequirement" - } - ], - "outputs": [ - { - "outputSource": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG/PyirCall", - "type": [ - { - "items": [ - "null", - "File" - ], - "type": "array" - } - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/igCalls" - } - ], - "class": "Workflow", - "steps": [ - { - "run": "#VDJ_Assemble_and_Annotate_Contigs.cwl", - "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG", - "in": [ - { - "source": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/RSEC_Reads_Fastq", - "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG/RSEC_Reads_Fastq" - }, - { - "valueFrom": "75000", - "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG/Read_Limit" - }, - { - "source": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Version", - "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG/VDJ_Version" - } - ], - "hints": [ - { - "coresMin": "$(inputs.num_cores)", - "class": "ResourceRequirement" - } - ], - "scatter": [ - "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG/RSEC_Reads_Fastq" - ], - "out": [ - "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG/PyirCall" - ] - } - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl" - }, - { - "inputs": [ - { - "type": [ - { - "items": [ - "null", - "File" - ], - "type": "array" - } - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/RSEC_Reads_Fastq" - }, - { - "type": [ - "null", - "string" - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Version" - }, - { - "type": [ - "null", - "int" - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/num_cores" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - }, - { - "class": "ScatterFeatureRequirement" - }, - { - "class": "StepInputExpressionRequirement" - }, - { - "class": "SubworkflowFeatureRequirement" - } - ], - "outputs": [ - { - "outputSource": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR/PyirCall", - "type": [ - { - "items": [ - "null", - "File" - ], - "type": "array" - } - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/tcrCalls" - } - ], - "class": "Workflow", - "steps": [ - { - "run": "#VDJ_Assemble_and_Annotate_Contigs.cwl", - "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR", - "in": [ - { - "source": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/RSEC_Reads_Fastq", - "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR/RSEC_Reads_Fastq" - }, - { - "valueFrom": "75000", - "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR/Read_Limit" - }, - { - "source": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Version", - "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR/VDJ_Version" - } - ], - "hints": [ - { - "coresMin": "$(inputs.num_cores)", - "class": "ResourceRequirement" - } - ], - "scatter": [ - "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR/RSEC_Reads_Fastq" - ], - "out": [ - "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR/PyirCall" - ] - } - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "position": 10, - "prefix": "--seq-metrics" - }, - "type": "File", - "id": "#VDJ_Compile_Results.cwl/Seq_Metrics" - }, - { - "inputBinding": { - "position": 0, - "prefix": "--cell-type-mapping-fp" - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_Compile_Results.cwl/cellTypeMapping" - }, - { - "inputBinding": { - "position": 4, - "prefix": "--ignore", - "itemSeparator": "," - }, - "type": [ - "null", - { - "items": "string", - "type": "array" - } - ], - "id": "#VDJ_Compile_Results.cwl/chainsToIgnore" - }, - { - "inputBinding": { - "position": 8, - "prefix": "--e-value-for-j" - }, - "type": [ - "null", - "float" - ], - "id": "#VDJ_Compile_Results.cwl/evalueJgene" - }, - { - "inputBinding": { - "position": 7, - "prefix": "--e-value-for-v" - }, - "type": [ - "null", - "float" - ], - "id": "#VDJ_Compile_Results.cwl/evalueVgene" - }, - { - "inputBinding": { - "position": 5 - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_Compile_Results.cwl/igCalls" - }, - { - "inputBinding": { - "position": 9, - "prefix": "--metadata-fp" - }, - "type": "File", - "id": "#VDJ_Compile_Results.cwl/metadata" - }, - { - "inputBinding": { - "position": 3, - "prefix": "--putative-cells-json-fp" - }, - "type": "File", - "id": "#VDJ_Compile_Results.cwl/putativeCells" - }, - { - "inputBinding": { - "position": 6 - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_Compile_Results.cwl/tcrCalls" - }, - { - "inputBinding": { - "position": 2, - "prefix": "--vdj-version" - }, - "type": [ - "null", - "string" - ], - "id": "#VDJ_Compile_Results.cwl/vdjVersion" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "outputs": [ - { - "doc": "VDJ data per cell, with distribution based error correction", - "outputBinding": { - "glob": "*_VDJ_perCell.csv" - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_Compile_Results.cwl/vdjCellsDatatable" - }, - { - "doc": "VDJ data per cell, including non-putative cells, no error correction applied", - "outputBinding": { - "glob": "*_VDJ_perCell_uncorrected.csv.gz" - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_Compile_Results.cwl/vdjCellsDatatableUncorrected" - }, - { - "outputBinding": { - "glob": "*_VDJ_Dominant_Contigs.csv.gz" - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_Compile_Results.cwl/vdjDominantContigs" - }, - { - "outputBinding": { - "glob": "*_VDJ_metrics.csv" - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_Compile_Results.cwl/vdjMetricsCsv" - }, - { - "outputBinding": { - "glob": "*_VDJ_metrics.json" - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_Compile_Results.cwl/vdjMetricsJson" - }, - { - "outputBinding": { - "glob": "*_DBEC_cutoff.png" - }, - "type": { - "items": "File", - "type": "array" - }, - "id": "#VDJ_Compile_Results.cwl/vdjReadsPerCellByChainTypeFigure" - }, - { - "outputBinding": { - "glob": "*_VDJ_Unfiltered_Contigs.csv.gz" - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_Compile_Results.cwl/vdjUnfilteredContigs" - } - ], - "baseCommand": [ - "mist_vdj_compile_results.py" - ], - "id": "#VDJ_Compile_Results.cwl", - "class": "CommandLineTool", - "hints": [ - { - "ramMin": 32000, - "class": "ResourceRequirement" - } - ] - }, - { - "inputs": [ - { - "type": [ - { - "items": [ - "null", - "File" - ], - "type": "array" - } - ], - "id": "#VDJ_GatherCalls.cwl/theCalls" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "doc": "VDJ_GatherCalls collect the outputs from the multi-processed VDJ step into one file.\n", - "id": "#VDJ_GatherCalls.cwl", - "steps": [ - { - "out": [ - "#VDJ_GatherCalls.cwl/VDJ_GatherCalls/gatheredCalls" - ], - "run": { - "cwlVersion": "v1.0", - "inputs": [ - { - "type": [ - { - "items": [ - "null", - "File" - ], - "type": "array" - } - ], - "id": "#VDJ_GatherCalls.cwl/VDJ_GatherCalls/gather_PyIR/theCalls" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - }, - { - "class": "ShellCommandRequirement" - } - ], - "outputs": [ - { - "outputBinding": { - "glob": "*_constant_region_called_pruned.csv.gz", - "outputEval": "${\n if (self.size == 0) {\n throw(\"No outputs from PyIR detected in VDJ_GatherCalls!\");\n } else {\n return(self);\n }\n}" - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_GatherCalls.cwl/VDJ_GatherCalls/gather_PyIR/gatheredCalls" - } - ], - "class": "CommandLineTool", - "arguments": [ - { - "shellQuote": false, - "valueFrom": "${\n if (!inputs.theCalls[0] ) {\n return (\"echo \\\"No outputs from PyIR detected in VDJ_GatherCalls\\\"\")\n }\n var inputFiles = \"\"\n if (!inputs.theCalls[0].path.split(\"_PrunePyIR\")[1]){\n inputFiles = \"zcat\"\n for (var i = 0; i < inputs.theCalls.length; i++) {\n inputFiles += \" \" + inputs.theCalls[i].path\n }\n inputFiles += \" | \"\n } else {\n inputFiles = \"zcat \" + inputs.theCalls[0].path.split(\"VDJ\")[0] + \"*\" + inputs.theCalls[0].path.split(\"_PrunePyIR\")[1].split(\"_Number_\")[0] + \"_Number_*.csv.gz | \"\n }\n var outputFileName = \"\\\"gzip > \" + inputs.theCalls[0].nameroot.split(\"_Number_\")[0] + \"_constant_region_called_pruned.csv.gz\" + \"\\\"\"\n var awkCommand = \"awk \\'NR==1{F=$1;print | \" + outputFileName + \" } $1!=F { print | \" + outputFileName + \" }\\' \"\n var outputCommand = inputFiles + awkCommand\n return (outputCommand)\n}" - } - ], - "id": "#VDJ_GatherCalls.cwl/VDJ_GatherCalls/gather_PyIR" - }, - "id": "#VDJ_GatherCalls.cwl/VDJ_GatherCalls", - "in": [ - { - "source": "#VDJ_GatherCalls.cwl/theCalls", - "id": "#VDJ_GatherCalls.cwl/VDJ_GatherCalls/theCalls" - } - ] - } - ], - "outputs": [ - { - "outputSource": "#VDJ_GatherCalls.cwl/VDJ_GatherCalls/gatheredCalls", - "type": [ - "null", - "File" - ], - "id": "#VDJ_GatherCalls.cwl/gatheredCalls" - } - ], - "class": "Workflow" - }, - { - "inputs": [ - { - "type": [ - "null", - "File" - ], - "id": "#VDJ_Preprocess_Reads.cwl/Valid_Reads_Fastq" - }, - { - "type": [ - "null", - "int" - ], - "id": "#VDJ_Preprocess_Reads.cwl/num_valid_reads" - }, - { - "type": "string", - "id": "#VDJ_Preprocess_Reads.cwl/vdj_type" - } - ], - "requirements": [ - { - "class": "SubworkflowFeatureRequirement" - }, - { - "class": "InlineJavascriptRequirement" - }, - { - "envDef": [ - { - "envName": "CORES_ALLOCATED_PER_CWL_PROCESS", - "envValue": "8" - } - ], - "class": "EnvVarRequirement" - } - ], - "outputs": [ - { - "outputSource": "#VDJ_Preprocess_Reads.cwl/VDJ_RSEC_Reads/RSEC_Reads_Fastq", - "type": [ - { - "items": [ - "null", - "File" - ], - "type": "array" - } - ], - "id": "#VDJ_Preprocess_Reads.cwl/RSEC_Reads_Fastq" - }, - { - "type": [ - "null", - "int" - ], - "outputSource": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/num_cores", - "id": "#VDJ_Preprocess_Reads.cwl/num_cores" - }, - { - "type": [ - "null", - "int" - ], - "outputSource": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/num_splits", - "id": "#VDJ_Preprocess_Reads.cwl/num_splits" - } - ], - "class": "Workflow", - "steps": [ - { - "run": "#VDJ_RSEC_Reads.cwl", - "out": [ - "#VDJ_Preprocess_Reads.cwl/VDJ_RSEC_Reads/RSEC_Reads_Fastq" - ], - "requirements": [ - { - "coresMin": 8, - "ramMin": "${ var est_ram = 0.0006 * parseInt(inputs.num_valid_reads) + 2000; var buffer = 1.25; est_ram *= buffer; if (est_ram < 2000) return 2000; if (est_ram > 370000) return 370000; return parseInt(est_ram); }", - "class": "ResourceRequirement" - } - ], - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_RSEC_Reads", - "in": [ - { - "source": "#VDJ_Preprocess_Reads.cwl/VDJ_Trim_Reads/Valid_Reads", - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_RSEC_Reads/Valid_Reads" - }, - { - "source": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/num_splits", - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_RSEC_Reads/num_splits" - }, - { - "source": "#VDJ_Preprocess_Reads.cwl/num_valid_reads", - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_RSEC_Reads/num_valid_reads" - } - ] - }, - { - "out": [ - "#VDJ_Preprocess_Reads.cwl/VDJ_Trim_Reads/Valid_Reads", - "#VDJ_Preprocess_Reads.cwl/VDJ_Trim_Reads/Trim_Report" - ], - "in": [ - { - "source": "#VDJ_Preprocess_Reads.cwl/Valid_Reads_Fastq", - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_Trim_Reads/Valid_Reads_Fastq" - } - ], - "run": "#VDJ_Trim_Reads.cwl", - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_Trim_Reads", - "hints": [ - { - "coresMin": 8, - "class": "ResourceRequirement" - } - ] - }, - { - "out": [ - "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/num_splits", - "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/num_cores" - ], - "run": { - "cwlVersion": "v1.0", - "inputs": [ - { - "type": [ - "null", - "int" - ], - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/determine_num_splits/num_valid_reads" - }, - { - "type": "string", - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/determine_num_splits/vdj_type" - } - ], - "outputs": [ - { - "type": [ - "null", - "int" - ], - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/determine_num_splits/num_cores" - }, - { - "type": [ - "null", - "int" - ], - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/determine_num_splits/num_splits" - } - ], - "class": "ExpressionTool", - "expression": "${\n var ram_per_instance = 192 * 1024;\n var num_cores = 96;\n if (inputs.vdj_type == \"BCR\") {\n ram_per_instance = 144 * 1024;\n num_cores = 72;\n }\n var ram_per_split = 3200;\n var num_splits_per_instance = parseInt(ram_per_instance / ram_per_split);\n var num_splits = num_splits_per_instance;\n\n var num_reads = parseInt(inputs.num_valid_reads);\n if (num_reads != null) {\n if (num_reads > 100000000)\n num_splits = num_splits_per_instance * 2;\n num_cores = num_cores * 2;\n }\n\n return ({\"num_splits\": num_splits, \"num_cores\": num_cores});\n}", - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/determine_num_splits" - }, - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits", - "in": [ - { - "source": "#VDJ_Preprocess_Reads.cwl/num_valid_reads", - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/num_valid_reads" - }, - { - "source": "#VDJ_Preprocess_Reads.cwl/vdj_type", - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/vdj_type" - } - ] - } - ], - "id": "#VDJ_Preprocess_Reads.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "prefix": "--vdj-valid-reads", - "itemSeparator": "," - }, - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#VDJ_RSEC_Reads.cwl/Valid_Reads" - }, - { - "inputBinding": { - "prefix": "--num-splits" - }, - "type": [ - "null", - "int" - ], - "id": "#VDJ_RSEC_Reads.cwl/num_splits" - } - ], - "requirements": [ - ], - "outputs": [ - { - "outputBinding": { - "glob": "*RSEC_Reads_Fastq_*.tar.gz" - }, - "type": [ - { - "items": [ - "null", - "File" - ], - "type": "array" - } - ], - "id": "#VDJ_RSEC_Reads.cwl/RSEC_Reads_Fastq" - } - ], - "baseCommand": "mist_vdj_rsec_reads.py", - "class": "CommandLineTool", - "id": "#VDJ_RSEC_Reads.cwl" - }, - { - "inputs": [ - { - "type": [ - "null", - "Any" - ], - "id": "#VDJ_Settings.cwl/_VDJ_Version" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "outputs": [ - { - "type": [ - "null", - "float" - ], - "id": "#VDJ_Settings.cwl/VDJ_JGene_Evalue" - }, - { - "type": [ - "null", - "float" - ], - "id": "#VDJ_Settings.cwl/VDJ_VGene_Evalue" - }, - { - "type": [ - "null", - "string" - ], - "id": "#VDJ_Settings.cwl/VDJ_Version" - } - ], - "class": "ExpressionTool", - "expression": "${\n var vdjVersion = null;\n if (!inputs._VDJ_Version) {\n vdjVersion = null;}\n else {\n var _VDJ_Version = inputs._VDJ_Version.toLowerCase();\n if (_VDJ_Version === \"human\" || _VDJ_Version === \"hs\" || _VDJ_Version === \"human vdj - bcr and tcr\") {\n vdjVersion = \"human\";\n } else if (_VDJ_Version === \"humanbcr\" || _VDJ_Version === \"human vdj - bcr only\") {\n vdjVersion = \"humanBCR\";\n } else if (_VDJ_Version === \"humantcr\" || _VDJ_Version === \"human vdj - tcr only\") {\n vdjVersion = \"humanTCR\";\n } else if (_VDJ_Version === \"mouse\" || _VDJ_Version === \"mm\" || _VDJ_Version === \"mouse vdj - bcr and tcr\") {\n vdjVersion = \"mouse\";\n } else if (_VDJ_Version === \"mousebcr\" || _VDJ_Version === \"mouse vdj - bcr only\") {\n vdjVersion = \"mouseBCR\";\n } else if (_VDJ_Version === \"mousetcr\" || _VDJ_Version === \"mouse vdj - tcr only\") {\n vdjVersion = \"mouseTCR\";\n } else {\n vdjVersion = inputs._VDJ_Version;\n }\n }\n\n return ({\n VDJ_Version: vdjVersion,\n })\n}", - "id": "#VDJ_Settings.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "position": 1 - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_Trim_Reads.cwl/Valid_Reads_Fastq" - } - ], - "requirements": [ - ], - "outputs": [ - { - "outputBinding": { - "glob": "cutadapt.log" - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_Trim_Reads.cwl/Trim_Report" - }, - { - "outputBinding": { - "glob": "*vdjtxt.gz" - }, - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#VDJ_Trim_Reads.cwl/Valid_Reads" - } - ], - "baseCommand": "VDJ_Trim_Reads.sh", - "class": "CommandLineTool", - "id": "#VDJ_Trim_Reads.cwl" - }, - { - "inputs": [], - "requirements": [ - ], - "stdout": "output.txt", - "outputs": [ - { - "outputBinding": { - "glob": "output.txt", - "loadContents": true, - "outputEval": "$(self[0].contents)" - }, - "type": "string", - "id": "#Version.cwl/version" - } - ], - "baseCommand": [ - "mist_version.py" - ], - "id": "#Version.cwl", - "class": "CommandLineTool" - } - ], - "$namespaces": { - "sbg": "https://sevenbridges.com#", - "arv": "http://arvados.org/cwl#" - } -} \ No newline at end of file diff --git a/target/docker/mapping/bd_rhapsody/rhapsody_wta_1.10.1_nodocker.cwl b/target/docker/mapping/bd_rhapsody/rhapsody_wta_1.10.1_nodocker.cwl deleted file mode 100755 index 5fa9ea85e48..00000000000 --- a/target/docker/mapping/bd_rhapsody/rhapsody_wta_1.10.1_nodocker.cwl +++ /dev/null @@ -1,5204 +0,0 @@ -#!/usr/bin/env cwl-runner -{ - "cwlVersion": "v1.0", - "$graph": [ - { - "inputs": [ - { - "inputBinding": { - "prefix": "--annot-r1", - "itemSeparator": "," - }, - "type": { - "items": "File", - "type": "array" - }, - "id": "#AddtoBam.cwl/Annotation_R1" - }, - { - "inputBinding": { - "prefix": "--cell-order" - }, - "type": "File", - "id": "#AddtoBam.cwl/Cell_Order" - }, - { - "inputBinding": { - "prefix": "--annot-mol-file" - }, - "type": "File", - "id": "#AddtoBam.cwl/Molecular_Annotation" - }, - { - "inputBinding": { - "prefix": "--r2-bam" - }, - "type": "File", - "id": "#AddtoBam.cwl/R2_Bam" - }, - { - "inputBinding": { - "prefix": "--run-metadata" - }, - "type": "File", - "id": "#AddtoBam.cwl/Run_Metadata" - }, - { - "inputBinding": { - "prefix": "--tag-calls" - }, - "type": [ - "null", - "File" - ], - "id": "#AddtoBam.cwl/Tag_Calls" - }, - { - "inputBinding": { - "prefix": "--target-gene-mapping" - }, - "type": [ - "null", - "File" - ], - "id": "#AddtoBam.cwl/Target_Gene_Mapping" - } - ], - "requirements": [ - - ], - "outputs": [ - { - "outputBinding": { - "glob": "Annotated_mapping_R2.BAM" - }, - "type": "File", - "id": "#AddtoBam.cwl/Annotated_Bam" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#AddtoBam.cwl/output" - } - ], - "baseCommand": [ - "mist_add_to_bam.py" - ], - "class": "CommandLineTool", - "id": "#AddtoBam.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "prefix": "--extra-seqs" - }, - "type": [ - "null", - "File" - ], - "id": "#AlignR2.cwl/Extra_Seqs" - }, - { - "inputBinding": { - "prefix": "--index" - }, - "type": "File", - "id": "#AlignR2.cwl/Index" - }, - { - "inputBinding": { - "prefix": "--r2-fastqs", - "itemSeparator": "," - }, - "type": { - "items": "File", - "type": "array" - }, - "id": "#AlignR2.cwl/R2" - }, - { - "inputBinding": { - "prefix": "--run-metadata" - }, - "type": "File", - "id": "#AlignR2.cwl/Run_Metadata" - } - ], - "requirements": [ - - { - "class": "InlineJavascriptRequirement" - }, - { - "envDef": [ - { - "envName": "CORES_ALLOCATED_PER_CWL_PROCESS", - "envValue": "$(String(runtime.cores))" - } - ], - "class": "EnvVarRequirement" - } - ], - "outputs": [ - { - "outputBinding": { - "glob": "*zip" - }, - "type": { - "items": "File", - "type": "array" - }, - "id": "#AlignR2.cwl/Alignments" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#AlignR2.cwl/output" - } - ], - "baseCommand": [ - "mist_align_R2.py" - ], - "class": "CommandLineTool", - "id": "#AlignR2.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "prefix": "--umi-option" - }, - "type": [ - "null", - "int" - ], - "id": "#AnnotateMolecules.cwl/AbSeq_UMI" - }, - { - "inputBinding": { - "prefix": "--run-metadata" - }, - "type": "File", - "id": "#AnnotateMolecules.cwl/Run_Metadata" - }, - { - "inputBinding": { - "prefix": "--use-dbec" - }, - "type": [ - "null", - "boolean" - ], - "id": "#AnnotateMolecules.cwl/Use_DBEC" - }, - { - "inputBinding": { - "prefix": "--valid-annot" - }, - "type": "File", - "id": "#AnnotateMolecules.cwl/Valids" - } - ], - "requirements": [ - - ], - "outputs": [ - { - "outputBinding": { - "glob": "*_GeneStatus.csv.*" - }, - "type": "File", - "id": "#AnnotateMolecules.cwl/Gene_Status_List" - }, - { - "outputBinding": { - "glob": "stats.json", - "loadContents": true, - "outputEval": "$(JSON.parse(self[0].contents).max_count)\n" - }, - "type": "int", - "id": "#AnnotateMolecules.cwl/Max_Count" - }, - { - "outputBinding": { - "glob": "*_Annotation_Molecule.csv.*" - }, - "type": "File", - "id": "#AnnotateMolecules.cwl/Mol_Annot_List" - }, - { - "outputBinding": { - "glob": "stats.json", - "loadContents": true, - "outputEval": "$(JSON.parse(self[0].contents).total_molecules)\n" - }, - "type": "int", - "id": "#AnnotateMolecules.cwl/Total_Molecules" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#AnnotateMolecules.cwl/output" - } - ], - "baseCommand": [ - "mist_annotate_molecules.py" - ], - "class": "CommandLineTool", - "id": "#AnnotateMolecules.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "prefix": "--filter-metrics", - "itemSeparator": "," - }, - "type": [ - { - "items": [ - "null", - "File" - ], - "type": "array" - } - ], - "id": "#AnnotateR1.cwl/Filter_Metrics" - }, - { - "inputBinding": { - "prefix": "--R1" - }, - "type": "File", - "id": "#AnnotateR1.cwl/R1" - }, - { - "inputBinding": { - "prefix": "--run-metadata" - }, - "type": "File", - "id": "#AnnotateR1.cwl/Run_Metadata" - } - ], - "requirements": [ - - { - "ramMin": 2000, - "class": "ResourceRequirement" - } - ], - "outputs": [ - { - "outputBinding": { - "glob": "*_Annotation_R1.csv.gz" - }, - "type": "File", - "id": "#AnnotateR1.cwl/Annotation_R1" - }, - { - "outputBinding": { - "glob": "*_R1_error_count_table.npy" - }, - "type": "File", - "id": "#AnnotateR1.cwl/R1_error_count_table" - }, - { - "outputBinding": { - "glob": "*_R1_read_count_breakdown.json" - }, - "type": "File", - "id": "#AnnotateR1.cwl/R1_read_count_breakdown" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#AnnotateR1.cwl/output" - } - ], - "baseCommand": [ - "mist_annotate_R1.py" - ], - "class": "CommandLineTool", - "id": "#AnnotateR1.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "prefix": "--extra-seqs" - }, - "type": [ - "null", - "File" - ], - "id": "#AnnotateR2.cwl/Extra_Seqs" - }, - { - "inputBinding": { - "prefix": "--gtf" - }, - "type": [ - "null", - "File" - ], - "id": "#AnnotateR2.cwl/GTF_Annotation" - }, - { - "inputBinding": { - "prefix": "--R2-zip" - }, - "type": "File", - "id": "#AnnotateR2.cwl/R2_zip" - }, - { - "inputBinding": { - "prefix": "--run-metadata" - }, - "type": "File", - "id": "#AnnotateR2.cwl/Run_Metadata" - }, - { - "inputBinding": { - "prefix": "--transcript-length" - }, - "type": [ - "null", - "File" - ], - "id": "#AnnotateR2.cwl/Transcript_Length" - } - ], - "requirements": [ - - { - "class": "InlineJavascriptRequirement" - } - ], - "outputs": [ - { - "outputBinding": { - "glob": "*Annotation_R2.csv.gz" - }, - "type": "File", - "id": "#AnnotateR2.cwl/Annot_R2" - }, - { - "outputBinding": { - "glob": "*-annot.gtf" - }, - "type": [ - "null", - "File" - ], - "id": "#AnnotateR2.cwl/GTF" - }, - { - "outputBinding": { - "glob": "*mapping_R2.BAM" - }, - "type": "File", - "id": "#AnnotateR2.cwl/R2_Bam" - }, - { - "outputBinding": { - "glob": "*_picard_quality_metrics.csv.gz" - }, - "type": "File", - "id": "#AnnotateR2.cwl/R2_Quality_Metrics" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#AnnotateR2.cwl/output" - } - ], - "baseCommand": [ - "mist_annotate_R2.py" - ], - "class": "CommandLineTool", - "id": "#AnnotateR2.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "prefix": "--umi-option" - }, - "type": [ - "null", - "int" - ], - "id": "#AnnotateReads.cwl/AbSeq_UMI" - }, - { - "inputBinding": { - "prefix": "--extra-seqs", - "itemSeparator": "," - }, - "type": [ - "null", - "File" - ], - "id": "#AnnotateReads.cwl/Extra_Seqs" - }, - { - "type": { - "items": [ - "null", - "File" - ], - "type": "array" - }, - "id": "#AnnotateReads.cwl/Filter_Metrics" - }, - { - "inputBinding": { - "prefix": "--putative-cell-call" - }, - "type": [ - "null", - "int" - ], - "id": "#AnnotateReads.cwl/Putative_Cell_Call" - }, - { - "type": { - "items": "File", - "type": "array" - }, - "id": "#AnnotateReads.cwl/R1_Annotation" - }, - { - "type": { - "items": "File", - "type": "array" - }, - "id": "#AnnotateReads.cwl/R1_error_count_table" - }, - { - "type": { - "items": "File", - "type": "array" - }, - "id": "#AnnotateReads.cwl/R1_read_count_breakdown" - }, - { - "type": { - "items": "File", - "type": "array" - }, - "id": "#AnnotateReads.cwl/R2_Annotation" - }, - { - "type": { - "items": "File", - "type": "array" - }, - "id": "#AnnotateReads.cwl/R2_Quality_Metrics" - }, - { - "inputBinding": { - "prefix": "--run-metadata" - }, - "type": "File", - "id": "#AnnotateReads.cwl/Run_Metadata" - }, - { - "inputBinding": { - "prefix": "--target-gene-mapping" - }, - "type": [ - "null", - "File" - ], - "id": "#AnnotateReads.cwl/Target_Gene_Mapping" - } - ], - "requirements": [ - - { - "class": "InitialWorkDirRequirement", - "listing": [ - { - "writable": false, - "entry": "${\n function getPaths(inputs, attribute) {\n var fp_arr = []\n for (var i = 0; i < inputs[attribute].length; i++)\n {\n fp_arr.push(inputs[attribute][i].path);\n }\n return fp_arr;\n }\n var paths = {}\n paths['annotR1'] = getPaths(inputs, 'R1_Annotation')\n paths['R1_error_count_table'] = getPaths(inputs, 'R1_error_count_table')\n paths['R1_read_count_breakdown'] = getPaths(inputs, 'R1_read_count_breakdown')\n paths['annotR2'] = getPaths(inputs, 'R2_Annotation')\n paths['r2_quality_metrics_fps'] = getPaths(inputs, 'R2_Quality_Metrics')\n if(inputs.Filter_Metrics[0] != null){\n paths['filtering_stat_files'] = getPaths(inputs, 'Filter_Metrics')\n }\n var paths_json = JSON.stringify(paths);\n return paths_json;\n}", - "entryname": "manifest.json" - } - ] - }, - { - "class": "InlineJavascriptRequirement" - }, - { - "envDef": [ - { - "envName": "CORES_ALLOCATED_PER_CWL_PROCESS", - "envValue": "4" - } - ], - "class": "EnvVarRequirement" - } - ], - "outputs": [ - { - "outputBinding": { - "glob": "*_Annotation_Read.csv.gz" - }, - "type": [ - "null", - "File" - ], - "id": "#AnnotateReads.cwl/Annotation_Read" - }, - { - "outputBinding": { - "glob": "*read1_error_rate_archive*" - }, - "type": "File", - "id": "#AnnotateReads.cwl/Read1_error_rate" - }, - { - "outputBinding": { - "glob": "*_SeqMetrics.csv.gz" - }, - "type": "File", - "id": "#AnnotateReads.cwl/Seq_Metrics" - }, - { - "outputBinding": { - "glob": "*Sorted_Valid_Reads.csv.*" - }, - "type": { - "items": "File", - "type": "array" - }, - "id": "#AnnotateReads.cwl/Valid_Reads" - }, - { - "outputBinding": { - "glob": "num_vdj_reads.json", - "loadContents": true, - "outputEval": "${ if (!self[0]) { return 0; } return parseInt(JSON.parse(self[0].contents).BCR); }" - }, - "type": "int", - "id": "#AnnotateReads.cwl/num_valid_ig_reads" - }, - { - "outputBinding": { - "glob": "num_vdj_reads.json", - "loadContents": true, - "outputEval": "${ if (!self[0]) { return 0; } return parseInt(JSON.parse(self[0].contents).TCR); }" - }, - "type": "int", - "id": "#AnnotateReads.cwl/num_valid_tcr_reads" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#AnnotateReads.cwl/output" - }, - { - "outputBinding": { - "glob": "*_VDJ_IG_Valid_Reads.fastq.gz" - }, - "type": [ - "null", - "File" - ], - "id": "#AnnotateReads.cwl/validIgReads" - }, - { - "outputBinding": { - "glob": "*_VDJ_TCR_Valid_Reads.fastq.gz" - }, - "type": [ - "null", - "File" - ], - "id": "#AnnotateReads.cwl/validTcrReads" - } - ], - "baseCommand": [ - "mist_annotate_reads.py" - ], - "class": "CommandLineTool", - "id": "#AnnotateReads.cwl" - }, - { - "inputs": [ - { - "type": { - "items": "File", - "type": "array" - }, - "id": "#BundleLogs.cwl/log_files" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - }, - { - "class": "MultipleInputFeatureRequirement" - } - ], - "outputs": [ - { - "type": "Directory", - "id": "#BundleLogs.cwl/logs_dir" - } - ], - "class": "ExpressionTool", - "expression": "${\n /* shamelly cribbed from https://gist.github.com/jcxplorer/823878 */\n function uuid() {\n var uuid = \"\", i, random;\n for (i = 0; i < 32; i++) {\n random = Math.random() * 16 | 0;\n if (i == 8 || i == 12 || i == 16 || i == 20) {\n uuid += \"-\";\n }\n uuid += (i == 12 ? 4 : (i == 16 ? (random & 3 | 8) : random)).toString(16);\n }\n return uuid;\n }\n var listing = [];\n for (var i = 0; i < inputs.log_files.length; i++) {\n var log_file = inputs.log_files[i];\n log_file.basename = uuid() + \"-\" + log_file.basename;\n listing.push(log_file);\n }\n return ({\n logs_dir: {\n class: \"Directory\",\n basename: \"Logs\",\n listing: listing\n }\n });\n}", - "id": "#BundleLogs.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "position": 0 - }, - "type": [ - "null", - "File" - ], - "id": "#Cell_Classifier.cwl/molsPerCellMatrix" - } - ], - "requirements": [ - - ], - "outputs": [ - { - "outputBinding": { - "glob": "*cell_type_experimental.csv" - }, - "type": [ - "null", - "File" - ], - "id": "#Cell_Classifier.cwl/cellTypePredictions" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#Cell_Classifier.cwl/log" - } - ], - "baseCommand": [ - "mist_cell_classifier.py" - ], - "class": "CommandLineTool", - "id": "#Cell_Classifier.cwl" - }, - { - "inputs": [ - { - "doc": "The minimum size (megabytes) of a file that should get split into chunks of a size designated in NumRecordsPerSplit\n", - "inputBinding": { - "prefix": "--min-split-size" - }, - "type": [ - "null", - "int" - ], - "id": "#CheckFastqs.cwl/MinChunkSize" - }, - { - "inputBinding": { - "prefix": "--reads", - "itemSeparator": "," - }, - "type": { - "items": "File", - "type": "array" - }, - "id": "#CheckFastqs.cwl/Reads" - }, - { - "inputBinding": { - "prefix": "--subsample" - }, - "type": [ - "null", - "float" - ], - "id": "#CheckFastqs.cwl/Subsample" - }, - { - "inputBinding": { - "prefix": "--subsample-seed" - }, - "type": [ - "null", - "int" - ], - "id": "#CheckFastqs.cwl/Subsample_Seed" - }, - { - "inputBinding": { - "prefix": "--subsample-seed" - }, - "type": [ - "null", - "int" - ], - "id": "#CheckFastqs.cwl/UserInputSubsampleSeed" - } - ], - "requirements": [ - - { - "class": "InlineJavascriptRequirement" - } - ], - "doc": "CheckFastqs does several quality control routines including: (1) ensuring that read pair file names are formatted correctly and contain a read pair mate; (2) disambiguating the \"Subsample Reads\" input and; (3) if not provided, generating a subsampling seed that the downstream instances can use.\n", - "baseCommand": [ - "mist_check_fastqs.py" - ], - "id": "#CheckFastqs.cwl", - "outputs": [ - { - "outputBinding": { - "glob": "bead_version.json", - "loadContents": true, - "outputEval": "$(JSON.parse(self[0].contents).BeadVersion)\n" - }, - "type": { - "items": { - "fields": [ - { - "type": "string", - "name": "#CheckFastqs.cwl/Bead_Version/Library" - }, - { - "type": "string", - "name": "#CheckFastqs.cwl/Bead_Version/bead_version" - } - ], - "type": "record" - }, - "type": "array" - }, - "id": "#CheckFastqs.cwl/Bead_Version" - }, - { - "outputBinding": { - "glob": "fastq_read_pairs.json", - "loadContents": true, - "outputEval": "$(JSON.parse(self[0].contents).fastq_read_pairs)\n" - }, - "type": { - "items": { - "fields": [ - { - "type": "string", - "name": "#CheckFastqs.cwl/FastqReadPairs/filename" - }, - { - "type": "string", - "name": "#CheckFastqs.cwl/FastqReadPairs/readFlag" - }, - { - "type": "string", - "name": "#CheckFastqs.cwl/FastqReadPairs/readPairId" - }, - { - "type": "string", - "name": "#CheckFastqs.cwl/FastqReadPairs/library" - }, - { - "type": "string", - "name": "#CheckFastqs.cwl/FastqReadPairs/beadVersion" - } - ], - "type": "record" - }, - "type": "array" - }, - "id": "#CheckFastqs.cwl/FastqReadPairs" - }, - { - "outputBinding": { - "glob": "files_to_skip_split_and_subsample.json", - "loadContents": true, - "outputEval": "$(JSON.parse(self[0].contents).files_to_skip_split_and_subsample)\n" - }, - "type": { - "items": "string", - "type": "array" - }, - "id": "#CheckFastqs.cwl/FilesToSkipSplitAndSubsample" - }, - { - "outputBinding": { - "glob": "fastq_read_pairs.json", - "loadContents": true, - "outputEval": "${\n var obj = JSON.parse(self[0].contents);\n var libraries = [];\n var pairs = obj.fastq_read_pairs\n for (var i in pairs){\n if (pairs[i][\"readFlag\"] == \"R1\"){\n if (libraries.indexOf(pairs[i][\"library\"]) == -1){ \n libraries.push(pairs[i][\"library\"]);\n }\n }\n }\n libraries.sort();\n return(libraries.toString())\n}\n" - }, - "type": [ - "null", - "string" - ], - "id": "#CheckFastqs.cwl/Libraries" - }, - { - "outputBinding": { - "outputEval": "${ \n var reads = []; \n var files = inputs.Reads\n for (var i in files){\n reads.push(files[i][\"basename\"]);\n }\n reads.sort();\n return(reads)\n}\n" - }, - "type": [ - "null", - { - "items": "string", - "type": "array" - } - ], - "id": "#CheckFastqs.cwl/ReadsList" - }, - { - "outputBinding": { - "glob": "subsampling_info.json", - "loadContents": true, - "outputEval": "$(JSON.parse(self[0].contents).subsampling_seed)\n" - }, - "type": "int", - "id": "#CheckFastqs.cwl/SubsampleSeed" - }, - { - "outputBinding": { - "glob": "subsampling_info.json", - "loadContents": true, - "outputEval": "$(JSON.parse(self[0].contents).subsampling_ratio)\n" - }, - "type": "float", - "id": "#CheckFastqs.cwl/SubsamplingRatio" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#CheckFastqs.cwl/log" - } - ], - "class": "CommandLineTool" - }, - { - "inputs": [ - { - "inputBinding": { - "prefix": "--abseq-reference", - "itemSeparator": "," - }, - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#CheckReference.cwl/AbSeq_Reference" - }, - { - "inputBinding": { - "prefix": "--putative-cell-call" - }, - "type": [ - "null", - "int" - ], - "id": "#CheckReference.cwl/Putative_Cell_Call" - }, - { - "inputBinding": { - "prefix": "--reference", - "itemSeparator": "," - }, - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#CheckReference.cwl/Reference" - }, - { - "inputBinding": { - "prefix": "--run-metadata" - }, - "type": "File", - "id": "#CheckReference.cwl/Run_Metadata" - }, - { - "inputBinding": { - "prefix": "--supplemental-reference", - "itemSeparator": "," - }, - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#CheckReference.cwl/Supplemental_Reference" - } - ], - "requirements": [ - - { - "class": "InlineJavascriptRequirement" - } - ], - "outputs": [ - { - "outputBinding": { - "glob": "combined_extra_seq.fasta" - }, - "type": [ - "null", - "File" - ], - "id": "#CheckReference.cwl/Extra_Seqs" - }, - { - "outputBinding": { - "glob": "full-gene-list.json" - }, - "type": [ - "null", - "File" - ], - "id": "#CheckReference.cwl/Full_Genes" - }, - { - "outputBinding": { - "glob": "*gtf", - "outputEval": "${\n // get the WTA modified GTF with extra seqs\n if (self.length == 1) {\n return self;\n // there is no modified GTF\n } else if (self.length == 0) {\n // if Reference is null (i.e. AbSeq_Reference only), return no GTF\n if (inputs.Reference === null) {\n return null;\n } else {\n // get the original WTA GTF without extra seqs\n for (var i = 0; i < inputs.Reference.length; i++) {\n if (inputs.Reference[i].basename.toLowerCase().indexOf('gtf') !== -1) {\n return inputs.Reference[i];\n }\n }\n // return no GTF for Targeted\n return null\n }\n }\n}\n" - }, - "type": [ - "null", - "File" - ], - "id": "#CheckReference.cwl/GTF" - }, - { - "outputBinding": { - "glob": "*-annot.*", - "outputEval": "${\n if (self.length == 1) { // Targeted\n return self;\n } else if (self.length == 0){ // WTA without extra seqs or targets\n for (var i = 0; i < inputs.Reference.length; i++) {\n if (inputs.Reference[i].basename.toLowerCase().indexOf('tar.gz') !== -1) {\n return inputs.Reference[i];\n }\n }\n return null\n }\n}\n" - }, - "type": "File", - "id": "#CheckReference.cwl/Index" - }, - { - "outputBinding": { - "glob": "target-gene.json" - }, - "type": [ - "null", - "File" - ], - "id": "#CheckReference.cwl/Target_Gene_Mapping" - }, - { - "outputBinding": { - "glob": "transcript_length.json" - }, - "type": [ - "null", - "File" - ], - "id": "#CheckReference.cwl/Transcript_Length" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#CheckReference.cwl/output" - } - ], - "baseCommand": [ - "mist_check_references.py" - ], - "class": "CommandLineTool", - "id": "#CheckReference.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "prefix": "--cell-order" - }, - "type": "File", - "id": "#DensetoSparse.cwl/Cell_Order" - }, - { - "inputBinding": { - "prefix": "--dense-data-table" - }, - "type": [ - "null", - "File" - ], - "id": "#DensetoSparse.cwl/Dense_Data_Table" - }, - { - "inputBinding": { - "prefix": "--gene-list" - }, - "type": "File", - "id": "#DensetoSparse.cwl/Gene_List" - }, - { - "inputBinding": { - "prefix": "--run-metadata" - }, - "type": "File", - "id": "#DensetoSparse.cwl/Run_Metadata" - } - ], - "requirements": [ - - ], - "outputs": [ - { - "outputBinding": { - "glob": "*.csv.gz" - }, - "type": "File", - "id": "#DensetoSparse.cwl/Data_Tables" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#DensetoSparse.cwl/output" - } - ], - "baseCommand": [ - "mist_dense_to_sparse.py" - ], - "class": "CommandLineTool", - "id": "#DensetoSparse.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "position": 1 - }, - "type": [ - "null", - "File" - ], - "id": "#DensetoSparseFile.cwl/GDT_cell_order" - } - ], - "requirements": [ - - ], - "stdout": "cell_order.json", - "outputs": [ - { - "type": "stdout", - "id": "#DensetoSparseFile.cwl/Cell_Order" - } - ], - "baseCommand": "cat", - "id": "#DensetoSparseFile.cwl", - "class": "CommandLineTool" - }, - { - "inputs": [ - { - "inputBinding": { - "prefix": "--full-gene-list" - }, - "type": [ - "null", - "File" - ], - "id": "#GetDataTable.cwl/Full_Genes" - }, - { - "inputBinding": { - "prefix": "--gene-status", - "itemSeparator": "," - }, - "type": { - "items": "File", - "type": "array" - }, - "id": "#GetDataTable.cwl/Gene_Status_List" - }, - { - "inputBinding": { - "prefix": "--max-count", - "itemSeparator": "," - }, - "type": { - "items": "int", - "type": "array" - }, - "id": "#GetDataTable.cwl/Max_Count" - }, - { - "inputBinding": { - "prefix": "--mol-annot", - "itemSeparator": "," - }, - "type": { - "items": "File", - "type": "array" - }, - "id": "#GetDataTable.cwl/Molecule_Annotation_List" - }, - { - "inputBinding": { - "prefix": "--putative-cell-call" - }, - "type": [ - "null", - "int" - ], - "id": "#GetDataTable.cwl/Putative_Cell_Call" - }, - { - "inputBinding": { - "prefix": "--run-metadata" - }, - "type": "File", - "id": "#GetDataTable.cwl/Run_Metadata" - }, - { - "inputBinding": { - "prefix": "--seq-metrics" - }, - "type": "File", - "id": "#GetDataTable.cwl/Seq_Metrics" - }, - { - "inputBinding": { - "prefix": "--tag-names", - "itemSeparator": "," - }, - "type": [ - "null", - { - "items": "string", - "type": "array" - } - ], - "id": "#GetDataTable.cwl/Tag_Names" - }, - { - "type": { - "items": "int", - "type": "array" - }, - "id": "#GetDataTable.cwl/Total_Molecules" - } - ], - "requirements": [ - { - "ramMin": "${return Math.min(Math.max(parseInt(inputs.Total_Molecules.reduce(function(a, b) { return a + b; }, 0) / 4000), 32000), 768000);}", - "class": "ResourceRequirement" - }, - - { - "class": "InlineJavascriptRequirement" - } - ], - "outputs": [ - { - "outputBinding": { - "glob": "metrics-files.tar.gz" - }, - "type": "File", - "id": "#GetDataTable.cwl/Annot_Files" - }, - { - "outputBinding": { - "glob": "Annotations/*_Bioproduct_Stats.csv" - }, - "type": [ - "null", - "File" - ], - "id": "#GetDataTable.cwl/Bioproduct_Stats" - }, - { - "outputBinding": { - "glob": "Cell_Label_Filtering/*.png" - }, - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#GetDataTable.cwl/Cell_Label_Filter" - }, - { - "outputBinding": { - "glob": "cell_order.json" - }, - "type": "File", - "id": "#GetDataTable.cwl/Cell_Order" - }, - { - "outputBinding": { - "glob": "*_Annotation_Molecule_corrected.csv.gz" - }, - "type": "File", - "id": "#GetDataTable.cwl/Corrected_Molecular_Annotation" - }, - { - "outputBinding": { - "glob": "*PerCell_Dense.csv.gz" - }, - "type": { - "items": "File", - "type": "array" - }, - "id": "#GetDataTable.cwl/Dense_Data_Tables" - }, - { - "outputBinding": { - "glob": "*PerCell_Unfiltered_Dense.csv.gz" - }, - "type": { - "items": "File", - "type": "array" - }, - "id": "#GetDataTable.cwl/Dense_Data_Tables_Unfiltered" - }, - { - "outputBinding": { - "glob": "*_Expression_Data.st.gz" - }, - "type": "File", - "id": "#GetDataTable.cwl/Expression_Data" - }, - { - "outputBinding": { - "glob": "*_Expression_Data_Unfiltered.st.gz" - }, - "type": [ - "null", - "File" - ], - "id": "#GetDataTable.cwl/Expression_Data_Unfiltered" - }, - { - "outputBinding": { - "glob": "gene_list.json" - }, - "type": "File", - "id": "#GetDataTable.cwl/Gene_List" - }, - { - "outputBinding": { - "glob": "Annotations/*_Annotation_Molecule.csv.gz" - }, - "type": "File", - "id": "#GetDataTable.cwl/Molecular_Annotation" - }, - { - "outputBinding": { - "glob": "Cell_Label_Filtering/*_Protein_Aggregates_Experimental.csv" - }, - "type": [ - "null", - "File" - ], - "id": "#GetDataTable.cwl/Protein_Aggregates_Experimental" - }, - { - "outputBinding": { - "glob": "Cell_Label_Filtering/*_Putative_Cells_Origin.csv" - }, - "type": [ - "null", - "File" - ], - "id": "#GetDataTable.cwl/Putative_Cells_Origin" - }, - { - "outputBinding": { - "glob": "Annotations/*_Annotation_Molecule_Trueno.csv" - }, - "type": [ - "null", - "File" - ], - "id": "#GetDataTable.cwl/Tag_Annotation" - }, - { - "outputBinding": { - "glob": "Trueno/*_Calls.csv" - }, - "type": [ - "null", - "File" - ], - "id": "#GetDataTable.cwl/Tag_Calls" - }, - { - "outputBinding": { - "glob": "Trueno/*csv" - }, - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#GetDataTable.cwl/Trueno_out" - }, - { - "outputBinding": { - "glob": "Trueno/*zip" - }, - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#GetDataTable.cwl/Trueno_zip" - }, - { - "outputBinding": { - "glob": "Annotations/*_UMI_Adjusted_CellLabel_Stats.csv" - }, - "type": [ - "null", - "File" - ], - "id": "#GetDataTable.cwl/UMI_Adjusted_CellLabel_Stats" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#GetDataTable.cwl/output" - } - ], - "baseCommand": [ - "mist_get_datatables.py" - ], - "class": "CommandLineTool", - "id": "#GetDataTable.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "position": 1 - }, - "type": "File", - "id": "#IndexBAM.cwl/BamFile" - } - ], - "requirements": [ - - { - "class": "InlineJavascriptRequirement" - } - ], - "stdout": "samtools_index.log", - "outputs": [ - { - "outputBinding": { - "glob": "*.bai" - }, - "type": "File", - "id": "#IndexBAM.cwl/Index" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#IndexBAM.cwl/log" - } - ], - "baseCommand": [ - "samtools", - "index" - ], - "id": "#IndexBAM.cwl", - "arguments": [ - { - "position": 2, - "valueFrom": "${\n return inputs.BamFile.basename + \".bai\"\n}" - } - ], - "class": "CommandLineTool" - }, - { - "inputs": [], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "outputs": [ - { - "type": [ - "null", - "int" - ], - "id": "#InternalSettings.cwl/AbSeq_UMI" - }, - { - "type": [ - "null", - "int" - ], - "id": "#InternalSettings.cwl/Barcode_Num" - }, - { - "type": [ - "null", - "File" - ], - "id": "#InternalSettings.cwl/Extra_Seqs" - }, - { - "type": [ - "null", - "int" - ], - "id": "#InternalSettings.cwl/Label_Version" - }, - { - "type": [ - "null", - "int" - ], - "id": "#InternalSettings.cwl/MinChunkSize" - }, - { - "type": [ - "null", - "long" - ], - "id": "#InternalSettings.cwl/NumRecordsPerSplit" - }, - { - "type": [ - "null", - "boolean" - ], - "id": "#InternalSettings.cwl/Read_Filter_Off" - }, - { - "type": [ - "null", - "string" - ], - "id": "#InternalSettings.cwl/Seq_Run" - }, - { - "type": [ - "null", - "float" - ], - "id": "#InternalSettings.cwl/Subsample_Tags" - }, - { - "type": [ - "null", - "boolean" - ], - "id": "#InternalSettings.cwl/Target_analysis" - }, - { - "type": [ - "null", - "boolean" - ], - "id": "#InternalSettings.cwl/Use_DBEC" - }, - { - "type": [ - "null", - "float" - ], - "id": "#InternalSettings.cwl/VDJ_JGene_Evalue" - }, - { - "type": [ - "null", - "float" - ], - "id": "#InternalSettings.cwl/VDJ_VGene_Evalue" - } - ], - "class": "ExpressionTool", - "expression": "${\n var internalInputs = [\n '_Label_Version',\n '_Read_Filter_Off',\n '_Barcode_Num',\n '_Seq_Run',\n '_AbSeq_UMI',\n '_Use_DBEC',\n '_Extra_Seqs',\n '_MinChunkSize',\n '_NumRecordsPerSplit',\n '_Target_analysis',\n '_Subsample_Tags',\n '_VDJ_VGene_Evalue',\n '_VDJ_JGene_Evalue',\n ];\n var internalOutputs = {}\n for (var i = 0; i < internalInputs.length; i++) {\n var internalInput = internalInputs[i];\n var internalOutput = internalInput.slice(1); // remove leading underscore\n if (inputs.hasOwnProperty(internalInput)) {\n internalOutputs[internalOutput] = inputs[internalInput]; // if input specified, redirect to output\n } else {\n internalOutputs[internalOutput] = null; // if input not specified, provide a null\n }\n }\n return internalOutputs;\n}", - "id": "#InternalSettings.cwl" - }, - { - "inputs": [ - { - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#main/AbSeq_Reference", - "label": "AbSeq Reference" - }, - { - "doc": "Determine putative cells using only the basic algorithm (minimum second derivative along the cumulative reads curve). The refined algorithm attempts to remove false positives and recover false negatives, but may not be ideal for certain complex mixtures of cell types. Does not apply if Exact Cell Count is set.", - "type": [ - "null", - "boolean" - ], - "id": "#main/Basic_Algo_Only", - "label": "Disable Refined Putative Cell Calling" - }, - { - "doc": "Set a specific number (>=1) of cells as putative, based on those with the highest error-corrected read count", - "type": [ - "null", - "int" - ], - "id": "#main/Exact_Cell_Count", - "label": "Exact Cell Count" - }, - { - "doc": "Specify the data to be used for putative cell calling. mRNA is the default selected option. AbSeq (Experimental) is for troubleshooting only.", - "type": [ - "null", - { - "symbols": [ - "#main/Putative_Cell_Call/Putative_Cell_Call/mRNA", - "#main/Putative_Cell_Call/Putative_Cell_Call/AbSeq_Experimental" - ], - "type": "enum", - "name": "#main/Putative_Cell_Call/Putative_Cell_Call" - } - ], - "id": "#main/Putative_Cell_Call", - "label": "Putative Cell Calling" - }, - { - "type": { - "items": "File", - "type": "array" - }, - "id": "#main/Reads", - "label": "Reads" - }, - { - "type": "File", - "id": "#main/Reference_Genome", - "label": "Reference Genome" - }, - { - "doc": "This is a name for output files, for example Experiment1_Metrics_Summary.csv. Default if left empty is to name run based on a library. Any non-alpha numeric characters will be changed to a hyphen.", - "type": [ - "null", - "string" - ], - "id": "#main/Run_Name", - "label": "Run Name" - }, - { - "doc": "The sample multiplexing kit version. This option should only be set for a multiplexed experiment.", - "type": [ - "null", - { - "symbols": [ - "#main/Sample_Tags_Version/Sample_Tags_Version/human", - "#main/Sample_Tags_Version/Sample_Tags_Version/hs", - "#main/Sample_Tags_Version/Sample_Tags_Version/mouse", - "#main/Sample_Tags_Version/Sample_Tags_Version/mm", - "#main/Sample_Tags_Version/Sample_Tags_Version/custom" - ], - "type": "enum", - "name": "#main/Sample_Tags_Version/Sample_Tags_Version" - } - ], - "id": "#main/Sample_Tags_Version", - "label": "Sample Tags Version" - }, - { - "doc": "Any number of reads >1 or a fraction between 0 < n < 1 to indicate the percentage of reads to subsample.\n", - "type": [ - "null", - "float" - ], - "id": "#main/Subsample", - "label": "Subsample Reads" - }, - { - "doc": "For use when replicating a previous subsampling run only. Obtain the seed generated from the log file for the SplitFastQ node.\n", - "type": [ - "null", - "int" - ], - "id": "#main/Subsample_seed", - "label": "Subsample Seed" - }, - { - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#main/Supplemental_Reference", - "label": "Supplemental Reference" - }, - { - "doc": "Specify the Sample Tag number followed by - (hyphen) and a sample name to appear in the output files. For example: 4-Ramos. Should be alpha numeric, with + - and _ allowed. Any special characters: &, (), [], {}, <>, ?, | will be corrected to underscores. \n", - "type": [ - "null", - { - "items": "string", - "type": "array" - } - ], - "id": "#main/Tag_Names", - "label": "Tag Names" - }, - { - "type": "File", - "id": "#main/Transcriptome_Annotation", - "label": "Transcriptome Annotation" - }, - { - "doc": "The VDJ species and chain types. This option should only be set for VDJ experiment.", - "type": [ - "null", - { - "symbols": [ - "#main/VDJ_Version/VDJ_Version/human", - "#main/VDJ_Version/VDJ_Version/hs", - "#main/VDJ_Version/VDJ_Version/mouse", - "#main/VDJ_Version/VDJ_Version/mm", - "#main/VDJ_Version/VDJ_Version/humanBCR", - "#main/VDJ_Version/VDJ_Version/humanTCR", - "#main/VDJ_Version/VDJ_Version/mouseBCR", - "#main/VDJ_Version/VDJ_Version/mouseTCR" - ], - "type": "enum", - "name": "#main/VDJ_Version/VDJ_Version" - } - ], - "id": "#main/VDJ_Version", - "label": "VDJ Species Version" - } - ], - "requirements": [ - { - "class": "ScatterFeatureRequirement" - }, - { - "class": "MultipleInputFeatureRequirement" - }, - { - "class": "SubworkflowFeatureRequirement" - }, - { - "class": "StepInputExpressionRequirement" - }, - { - "class": "InlineJavascriptRequirement" - } - ], - "doc": "The BD Rhapsody\u2122 WTA Analysis Pipeline is used to create sequencing libraries from single cell transcriptomes without having to specify a targeted panel.\n\nAfter sequencing, the analysis pipeline takes the FASTQ files, a reference genome file and a transcriptome annotation file for gene alignment. The pipeline generates molecular counts per cell, read counts per cell, metrics, and an alignment file.", - "label": "BD Rhapsody\u2122 WTA Analysis Pipeline", - "steps": [ - { - "run": "#AddtoBam.cwl", - "scatter": [ - "#main/AddtoBam/R2_Bam" - ], - "in": [ - { - "source": "#main/AnnotateR1/Annotation_R1", - "id": "#main/AddtoBam/Annotation_R1" - }, - { - "source": "#main/Dense_to_Sparse_File/Cell_Order", - "id": "#main/AddtoBam/Cell_Order" - }, - { - "source": "#main/GetDataTable/Corrected_Molecular_Annotation", - "id": "#main/AddtoBam/Molecular_Annotation" - }, - { - "source": "#main/AnnotateR2/R2_Bam", - "id": "#main/AddtoBam/R2_Bam" - }, - { - "source": "#main/Metadata_Settings/Run_Metadata", - "id": "#main/AddtoBam/Run_Metadata" - }, - { - "source": "#main/GetDataTable/Tag_Calls", - "id": "#main/AddtoBam/Tag_Calls" - }, - { - "source": "#main/CheckReference/Target_Gene_Mapping", - "id": "#main/AddtoBam/Target_Gene_Mapping" - } - ], - "requirements": [ - { - "ramMin": 16000, - "class": "ResourceRequirement" - } - ], - "id": "#main/AddtoBam", - "out": [ - "#main/AddtoBam/Annotated_Bam", - "#main/AddtoBam/output" - ] - }, - { - "run": "#AlignR2.cwl", - "out": [ - "#main/AlignR2/Alignments", - "#main/AlignR2/output" - ], - "requirements": [ - { - "coresMin": 8, - "ramMin": 48000, - "class": "ResourceRequirement" - } - ], - "id": "#main/AlignR2", - "in": [ - { - "source": "#main/CheckReference/Extra_Seqs", - "id": "#main/AlignR2/Extra_Seqs" - }, - { - "source": "#main/CheckReference/Index", - "id": "#main/AlignR2/Index" - }, - { - "source": "#main/QualityFilterOuter/R2", - "id": "#main/AlignR2/R2" - }, - { - "source": "#main/Metadata_Settings/Run_Metadata", - "id": "#main/AlignR2/Run_Metadata" - } - ] - }, - { - "run": "#AnnotateMolecules.cwl", - "scatter": [ - "#main/AnnotateMolecules/Valids" - ], - "in": [ - { - "source": "#main/Internal_Settings/AbSeq_UMI", - "id": "#main/AnnotateMolecules/AbSeq_UMI" - }, - { - "source": "#main/Metadata_Settings/Run_Metadata", - "id": "#main/AnnotateMolecules/Run_Metadata" - }, - { - "source": "#main/Internal_Settings/Use_DBEC", - "id": "#main/AnnotateMolecules/Use_DBEC" - }, - { - "source": "#main/AnnotateReads/Valid_Reads", - "id": "#main/AnnotateMolecules/Valids" - } - ], - "requirements": [ - { - "ramMin": 32000, - "class": "ResourceRequirement" - } - ], - "id": "#main/AnnotateMolecules", - "out": [ - "#main/AnnotateMolecules/Mol_Annot_List", - "#main/AnnotateMolecules/Gene_Status_List", - "#main/AnnotateMolecules/Max_Count", - "#main/AnnotateMolecules/Total_Molecules", - "#main/AnnotateMolecules/output" - ] - }, - { - "id": "#main/AnnotateR1", - "out": [ - "#main/AnnotateR1/Annotation_R1", - "#main/AnnotateR1/R1_error_count_table", - "#main/AnnotateR1/R1_read_count_breakdown", - "#main/AnnotateR1/output" - ], - "run": "#AnnotateR1.cwl", - "scatter": [ - "#main/AnnotateR1/R1" - ], - "in": [ - { - "source": "#main/QualityFilterOuter/Filter_Metrics", - "id": "#main/AnnotateR1/Filter_Metrics" - }, - { - "source": "#main/QualityFilterOuter/R1", - "id": "#main/AnnotateR1/R1" - }, - { - "source": "#main/Metadata_Settings/Run_Metadata", - "id": "#main/AnnotateR1/Run_Metadata" - } - ] - }, - { - "run": "#AnnotateR2.cwl", - "scatter": [ - "#main/AnnotateR2/R2_zip" - ], - "in": [ - { - "source": "#main/CheckReference/Extra_Seqs", - "id": "#main/AnnotateR2/Extra_Seqs" - }, - { - "source": "#main/CheckReference/GTF", - "id": "#main/AnnotateR2/GTF_Annotation" - }, - { - "source": "#main/AlignR2/Alignments", - "id": "#main/AnnotateR2/R2_zip" - }, - { - "source": "#main/Metadata_Settings/Run_Metadata", - "id": "#main/AnnotateR2/Run_Metadata" - }, - { - "source": "#main/CheckReference/Transcript_Length", - "id": "#main/AnnotateR2/Transcript_Length" - } - ], - "requirements": [ - { - "ramMin": 10000, - "class": "ResourceRequirement" - } - ], - "id": "#main/AnnotateR2", - "out": [ - "#main/AnnotateR2/Annot_R2", - "#main/AnnotateR2/R2_Bam", - "#main/AnnotateR2/GTF", - "#main/AnnotateR2/output", - "#main/AnnotateR2/R2_Quality_Metrics" - ] - }, - { - "run": "#AnnotateReads.cwl", - "out": [ - "#main/AnnotateReads/Seq_Metrics", - "#main/AnnotateReads/Valid_Reads", - "#main/AnnotateReads/Read1_error_rate", - "#main/AnnotateReads/Annotation_Read", - "#main/AnnotateReads/output", - "#main/AnnotateReads/validTcrReads", - "#main/AnnotateReads/validIgReads", - "#main/AnnotateReads/num_valid_tcr_reads", - "#main/AnnotateReads/num_valid_ig_reads" - ], - "requirements": [ - { - "ramMin": 32000, - "class": "ResourceRequirement" - } - ], - "id": "#main/AnnotateReads", - "in": [ - { - "source": "#main/Internal_Settings/AbSeq_UMI", - "id": "#main/AnnotateReads/AbSeq_UMI" - }, - { - "source": "#main/CheckReference/Extra_Seqs", - "id": "#main/AnnotateReads/Extra_Seqs" - }, - { - "source": "#main/QualityFilterOuter/Filter_Metrics", - "id": "#main/AnnotateReads/Filter_Metrics" - }, - { - "source": "#main/Putative_Cell_Calling_Settings/Putative_Cell_Call", - "id": "#main/AnnotateReads/Putative_Cell_Call" - }, - { - "source": "#main/AnnotateR1/Annotation_R1", - "id": "#main/AnnotateReads/R1_Annotation" - }, - { - "source": "#main/AnnotateR1/R1_error_count_table", - "id": "#main/AnnotateReads/R1_error_count_table" - }, - { - "source": "#main/AnnotateR1/R1_read_count_breakdown", - "id": "#main/AnnotateReads/R1_read_count_breakdown" - }, - { - "source": "#main/AnnotateR2/Annot_R2", - "id": "#main/AnnotateReads/R2_Annotation" - }, - { - "source": "#main/AnnotateR2/R2_Quality_Metrics", - "id": "#main/AnnotateReads/R2_Quality_Metrics" - }, - { - "source": "#main/Metadata_Settings/Run_Metadata", - "id": "#main/AnnotateReads/Run_Metadata" - }, - { - "source": "#main/CheckReference/Target_Gene_Mapping", - "id": "#main/AnnotateReads/Target_Gene_Mapping" - } - ] - }, - { - "out": [ - "#main/BundleLogs/logs_dir" - ], - "run": "#BundleLogs.cwl", - "id": "#main/BundleLogs", - "in": [ - { - "source": [ - "#main/AnnotateReads/output", - "#main/AnnotateR1/output", - "#main/AnnotateR2/output", - "#main/CheckReference/output", - "#main/GetDataTable/output", - "#main/Metrics/output", - "#main/AddtoBam/output", - "#main/AnnotateMolecules/output", - "#main/QualityFilterOuter/output", - "#main/CheckFastqs/log", - "#main/SplitAndSubsample/log", - "#main/MergeBAM/log", - "#main/Dense_to_Sparse_Datatable/output", - "#main/Dense_to_Sparse_Datatable_Unfiltered/output", - "#main/IndexBAM/log", - "#main/CellClassifier/log" - ], - "linkMerge": "merge_flattened", - "id": "#main/BundleLogs/log_files" - } - ] - }, - { - "run": "#Cell_Classifier.cwl", - "out": [ - "#main/CellClassifier/cellTypePredictions", - "#main/CellClassifier/log" - ], - "requirements": [ - { - "ramMin": 4000, - "class": "ResourceRequirement" - } - ], - "id": "#main/CellClassifier", - "in": [ - { - "source": "#main/FindDataTableForCellClassifier/molsPerCellMatrixForCellClassifier", - "id": "#main/CellClassifier/molsPerCellMatrix" - } - ] - }, - { - "out": [ - "#main/CheckFastqs/SubsampleSeed", - "#main/CheckFastqs/SubsamplingRatio", - "#main/CheckFastqs/FilesToSkipSplitAndSubsample", - "#main/CheckFastqs/FastqReadPairs", - "#main/CheckFastqs/Bead_Version", - "#main/CheckFastqs/Libraries", - "#main/CheckFastqs/ReadsList", - "#main/CheckFastqs/log" - ], - "run": "#CheckFastqs.cwl", - "id": "#main/CheckFastqs", - "in": [ - { - "source": "#main/Internal_Settings/MinChunkSize", - "id": "#main/CheckFastqs/MinChunkSize" - }, - { - "source": "#main/Reads", - "id": "#main/CheckFastqs/Reads" - }, - { - "source": "#main/Subsample_Settings/Subsample_Reads", - "id": "#main/CheckFastqs/Subsample" - }, - { - "source": "#main/Subsample_Settings/Subsample_Seed", - "id": "#main/CheckFastqs/Subsample_Seed" - } - ] - }, - { - "run": "#CheckReference.cwl", - "out": [ - "#main/CheckReference/Index", - "#main/CheckReference/Extra_Seqs", - "#main/CheckReference/Full_Genes", - "#main/CheckReference/output", - "#main/CheckReference/Transcript_Length", - "#main/CheckReference/GTF", - "#main/CheckReference/Target_Gene_Mapping" - ], - "requirements": [ - { - "ramMin": 10000, - "class": "ResourceRequirement" - } - ], - "id": "#main/CheckReference", - "in": [ - { - "source": "#main/AbSeq_Reference", - "id": "#main/CheckReference/AbSeq_Reference" - }, - { - "source": "#main/Putative_Cell_Calling_Settings/Putative_Cell_Call", - "id": "#main/CheckReference/Putative_Cell_Call" - }, - { - "source": [ - "#main/Transcriptome_Annotation", - "#main/Reference_Genome" - ], - "id": "#main/CheckReference/Reference" - }, - { - "source": "#main/Metadata_Settings/Run_Metadata", - "id": "#main/CheckReference/Run_Metadata" - }, - { - "source": "#main/Supplemental_Reference", - "id": "#main/CheckReference/Supplemental_Reference" - } - ] - }, - { - "run": "#DensetoSparse.cwl", - "scatter": [ - "#main/Dense_to_Sparse_Datatable/Dense_Data_Table" - ], - "in": [ - { - "source": "#main/Dense_to_Sparse_File/Cell_Order", - "id": "#main/Dense_to_Sparse_Datatable/Cell_Order" - }, - { - "source": "#main/GetDataTable/Dense_Data_Tables", - "id": "#main/Dense_to_Sparse_Datatable/Dense_Data_Table" - }, - { - "source": "#main/GetDataTable/Gene_List", - "id": "#main/Dense_to_Sparse_Datatable/Gene_List" - }, - { - "source": "#main/Metadata_Settings/Run_Metadata", - "id": "#main/Dense_to_Sparse_Datatable/Run_Metadata" - } - ], - "requirements": [ - { - "ramMin": 16000, - "class": "ResourceRequirement" - } - ], - "id": "#main/Dense_to_Sparse_Datatable", - "out": [ - "#main/Dense_to_Sparse_Datatable/Data_Tables", - "#main/Dense_to_Sparse_Datatable/output" - ] - }, - { - "run": "#DensetoSparse.cwl", - "scatter": [ - "#main/Dense_to_Sparse_Datatable_Unfiltered/Dense_Data_Table" - ], - "in": [ - { - "source": "#main/GetDataTable/Cell_Order", - "id": "#main/Dense_to_Sparse_Datatable_Unfiltered/Cell_Order" - }, - { - "source": "#main/GetDataTable/Dense_Data_Tables_Unfiltered", - "id": "#main/Dense_to_Sparse_Datatable_Unfiltered/Dense_Data_Table" - }, - { - "source": "#main/GetDataTable/Gene_List", - "id": "#main/Dense_to_Sparse_Datatable_Unfiltered/Gene_List" - }, - { - "source": "#main/Metadata_Settings/Run_Metadata", - "id": "#main/Dense_to_Sparse_Datatable_Unfiltered/Run_Metadata" - } - ], - "requirements": [ - { - "ramMin": 16000, - "class": "ResourceRequirement" - } - ], - "id": "#main/Dense_to_Sparse_Datatable_Unfiltered", - "out": [ - "#main/Dense_to_Sparse_Datatable_Unfiltered/Data_Tables", - "#main/Dense_to_Sparse_Datatable_Unfiltered/output" - ] - }, - { - "out": [ - "#main/Dense_to_Sparse_File/Cell_Order" - ], - "run": "#DensetoSparseFile.cwl", - "id": "#main/Dense_to_Sparse_File", - "in": [ - { - "source": "#main/GetDataTable/Cell_Order", - "id": "#main/Dense_to_Sparse_File/GDT_cell_order" - } - ] - }, - { - "out": [ - "#main/FindDataTableForCellClassifier/molsPerCellMatrixForCellClassifier" - ], - "run": { - "cwlVersion": "v1.0", - "inputs": [ - { - "type": { - "items": "File", - "type": "array" - }, - "id": "#main/FindDataTableForCellClassifier/e13a85b9-73df-4ed0-9386-c8c9ca3b47f0/dataTables" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "outputs": [ - { - "type": "File", - "id": "#main/FindDataTableForCellClassifier/e13a85b9-73df-4ed0-9386-c8c9ca3b47f0/molsPerCellMatrixForCellClassifier" - } - ], - "id": "#main/FindDataTableForCellClassifier/e13a85b9-73df-4ed0-9386-c8c9ca3b47f0", - "expression": "${\n for (var i = 0; i < inputs.dataTables.length; i++) {\n var dataTable = inputs.dataTables[i];\n if (dataTable.basename.indexOf(\"_RSEC_MolsPerCell.csv\") >= 0) {\n return({molsPerCellMatrixForCellClassifier: dataTable});\n }\n }\n return({molsPerCellMatrixForCellClassifier: null});\n}", - "class": "ExpressionTool" - }, - "id": "#main/FindDataTableForCellClassifier", - "in": [ - { - "source": "#main/Dense_to_Sparse_Datatable/Data_Tables", - "id": "#main/FindDataTableForCellClassifier/dataTables" - } - ] - }, - { - "out": [ - "#main/GetDataTable/Tag_Calls", - "#main/GetDataTable/Molecular_Annotation", - "#main/GetDataTable/Corrected_Molecular_Annotation", - "#main/GetDataTable/Tag_Annotation", - "#main/GetDataTable/Annot_Files", - "#main/GetDataTable/Cell_Label_Filter", - "#main/GetDataTable/Dense_Data_Tables", - "#main/GetDataTable/Dense_Data_Tables_Unfiltered", - "#main/GetDataTable/Expression_Data", - "#main/GetDataTable/Expression_Data_Unfiltered", - "#main/GetDataTable/Bioproduct_Stats", - "#main/GetDataTable/UMI_Adjusted_CellLabel_Stats", - "#main/GetDataTable/Putative_Cells_Origin", - "#main/GetDataTable/Protein_Aggregates_Experimental", - "#main/GetDataTable/Trueno_out", - "#main/GetDataTable/Trueno_zip", - "#main/GetDataTable/output", - "#main/GetDataTable/Cell_Order", - "#main/GetDataTable/Gene_List" - ], - "run": "#GetDataTable.cwl", - "id": "#main/GetDataTable", - "in": [ - { - "source": "#main/CheckReference/Full_Genes", - "id": "#main/GetDataTable/Full_Genes" - }, - { - "source": "#main/AnnotateMolecules/Gene_Status_List", - "id": "#main/GetDataTable/Gene_Status_List" - }, - { - "source": "#main/AnnotateMolecules/Max_Count", - "id": "#main/GetDataTable/Max_Count" - }, - { - "source": "#main/AnnotateMolecules/Mol_Annot_List", - "id": "#main/GetDataTable/Molecule_Annotation_List" - }, - { - "source": "#main/Putative_Cell_Calling_Settings/Putative_Cell_Call", - "id": "#main/GetDataTable/Putative_Cell_Call" - }, - { - "source": "#main/Metadata_Settings/Run_Metadata", - "id": "#main/GetDataTable/Run_Metadata" - }, - { - "source": "#main/AnnotateReads/Seq_Metrics", - "id": "#main/GetDataTable/Seq_Metrics" - }, - { - "source": "#main/Multiplexing_Settings/Tag_Sample_Names", - "id": "#main/GetDataTable/Tag_Names" - }, - { - "source": "#main/AnnotateMolecules/Total_Molecules", - "id": "#main/GetDataTable/Total_Molecules" - } - ] - }, - { - "out": [ - "#main/IndexBAM/Index", - "#main/IndexBAM/log" - ], - "run": "#IndexBAM.cwl", - "id": "#main/IndexBAM", - "in": [ - { - "source": "#main/MergeBAM/Final_Bam", - "id": "#main/IndexBAM/BamFile" - } - ] - }, - { - "out": [ - "#main/Internal_Settings/Read_Filter_Off", - "#main/Internal_Settings/Barcode_Num", - "#main/Internal_Settings/Seq_Run", - "#main/Internal_Settings/AbSeq_UMI", - "#main/Internal_Settings/Use_DBEC", - "#main/Internal_Settings/Extra_Seqs", - "#main/Internal_Settings/MinChunkSize", - "#main/Internal_Settings/NumRecordsPerSplit", - "#main/Internal_Settings/Target_analysis", - "#main/Internal_Settings/Subsample_Tags", - "#main/Internal_Settings/VDJ_VGene_Evalue", - "#main/Internal_Settings/VDJ_JGene_Evalue" - ], - "in": [], - "run": "#InternalSettings.cwl", - "id": "#main/Internal_Settings", - "label": "Internal Settings" - }, - { - "out": [ - "#main/MergeBAM/Final_Bam", - "#main/MergeBAM/log" - ], - "run": "#MergeBAM.cwl", - "id": "#main/MergeBAM", - "in": [ - { - "source": "#main/AddtoBam/Annotated_Bam", - "id": "#main/MergeBAM/BamFiles" - }, - { - "source": "#main/Metadata_Settings/Run_Base_Name", - "id": "#main/MergeBAM/Run_Name" - }, - { - "source": "#main/Multiplexing_Settings/Sample_Tags_Version", - "id": "#main/MergeBAM/Sample_Tags_Version" - } - ] - }, - { - "out": [ - "#main/MergeMultiplex/Multiplex_out" - ], - "run": { - "cwlVersion": "v1.0", - "inputs": [ - { - "type": { - "items": [ - "null", - "File" - ], - "type": "array" - }, - "id": "#main/MergeMultiplex/d7de4031-c557-4bec-bdfc-33e9f909e2d7/SampleTag_Files" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "outputs": [ - { - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#main/MergeMultiplex/d7de4031-c557-4bec-bdfc-33e9f909e2d7/Multiplex_out" - } - ], - "id": "#main/MergeMultiplex/d7de4031-c557-4bec-bdfc-33e9f909e2d7", - "expression": "${\n var fp_array = [];\n for (var i = 0; i < inputs.SampleTag_Files.length; i++) {\n var fp = inputs.SampleTag_Files[i];\n if (fp != null) {\n fp_array.push(fp);\n }\n }\n return({\"Multiplex_out\": fp_array});\n}", - "class": "ExpressionTool" - }, - "id": "#main/MergeMultiplex", - "in": [ - { - "source": [ - "#main/GetDataTable/Trueno_out", - "#main/Metrics/Sample_Tag_Out" - ], - "linkMerge": "merge_flattened", - "id": "#main/MergeMultiplex/SampleTag_Files" - } - ] - }, - { - "out": [ - "#main/Metadata_Settings/Run_Metadata", - "#main/Metadata_Settings/Run_Base_Name" - ], - "run": "#Metadata.cwl", - "id": "#main/Metadata_Settings", - "in": [ - { - "source": "#main/AbSeq_Reference", - "id": "#main/Metadata_Settings/AbSeq_Reference" - }, - { - "valueFrom": "WTA", - "id": "#main/Metadata_Settings/Assay" - }, - { - "source": "#main/Putative_Cell_Calling_Settings/Basic_Algo_Only", - "id": "#main/Metadata_Settings/Basic_Algo_Only" - }, - { - "source": "#main/CheckFastqs/Bead_Version", - "id": "#main/Metadata_Settings/Bead_Version" - }, - { - "source": "#main/Putative_Cell_Calling_Settings/Exact_Cell_Count", - "id": "#main/Metadata_Settings/Exact_Cell_Count" - }, - { - "source": "#main/CheckFastqs/Libraries", - "id": "#main/Metadata_Settings/Libraries" - }, - { - "valueFrom": "BD Rhapsody WTA Analysis Pipeline", - "id": "#main/Metadata_Settings/Pipeline_Name" - }, - { - "source": "#main/Version/version", - "id": "#main/Metadata_Settings/Pipeline_Version" - }, - { - "source": "#main/Putative_Cell_Calling_Settings/Putative_Cell_Call", - "id": "#main/Metadata_Settings/Putative_Cell_Call" - }, - { - "source": "#main/CheckFastqs/ReadsList", - "id": "#main/Metadata_Settings/Reads" - }, - { - "source": [ - "#main/Transcriptome_Annotation", - "#main/Reference_Genome" - ], - "id": "#main/Metadata_Settings/Reference" - }, - { - "source": "#main/Name_Settings/Run_Name", - "id": "#main/Metadata_Settings/Run_Name" - }, - { - "source": "#main/Multiplexing_Settings/Tag_Sample_Names", - "id": "#main/Metadata_Settings/Sample_Tag_Names" - }, - { - "source": "#main/Multiplexing_Settings/Sample_Tags_Version", - "id": "#main/Metadata_Settings/Sample_Tags_Version" - }, - { - "source": "#main/Start_Time/Start_Time", - "id": "#main/Metadata_Settings/Start_Time" - }, - { - "source": "#main/Subsample_Settings/Subsample_Reads", - "id": "#main/Metadata_Settings/Subsample" - }, - { - "source": "#main/Subsample_Settings/Subsample_Seed", - "id": "#main/Metadata_Settings/Subsample_Seed" - }, - { - "source": "#main/Supplemental_Reference", - "id": "#main/Metadata_Settings/Supplemental_Reference" - }, - { - "source": "#main/VDJ_Settings/VDJ_Version", - "id": "#main/Metadata_Settings/VDJ_Version" - } - ] - }, - { - "out": [ - "#main/Metrics/Metrics_Summary", - "#main/Metrics/Metrics_Archive", - "#main/Metrics/output", - "#main/Metrics/Sample_Tag_Out" - ], - "run": "#Metrics.cwl", - "id": "#main/Metrics", - "in": [ - { - "source": "#main/GetDataTable/Annot_Files", - "id": "#main/Metrics/Annot_Files" - }, - { - "source": "#main/AnnotateReads/Read1_error_rate", - "id": "#main/Metrics/Read1_error_rate" - }, - { - "source": "#main/Metadata_Settings/Run_Metadata", - "id": "#main/Metrics/Run_Metadata" - }, - { - "source": "#main/GetDataTable/Trueno_zip", - "id": "#main/Metrics/Sample_Tag_Archives" - }, - { - "source": "#main/Internal_Settings/Seq_Run", - "id": "#main/Metrics/Seq_Run" - }, - { - "source": "#main/GetDataTable/UMI_Adjusted_CellLabel_Stats", - "id": "#main/Metrics/UMI_Adjusted_Stats" - }, - { - "source": "#main/VDJ_Compile_Results/vdjMetricsJson", - "id": "#main/Metrics/vdjMetricsJson" - } - ] - }, - { - "out": [ - "#main/Multiplexing_Settings/Tag_Sample_Names", - "#main/Multiplexing_Settings/Sample_Tags_Version" - ], - "in": [ - { - "source": "#main/Sample_Tags_Version", - "id": "#main/Multiplexing_Settings/_Sample_Tags_Version" - }, - { - "source": "#main/Tag_Names", - "id": "#main/Multiplexing_Settings/_Tag_Sample_Names" - } - ], - "run": "#MultiplexingSettings.cwl", - "id": "#main/Multiplexing_Settings", - "label": "Multiplexing Settings" - }, - { - "out": [ - "#main/Name_Settings/Run_Name" - ], - "in": [ - { - "source": "#main/Run_Name", - "id": "#main/Name_Settings/_Run_Name" - } - ], - "run": "#NameSettings.cwl", - "id": "#main/Name_Settings", - "label": "Name Settings" - }, - { - "out": [ - "#main/PairReadFiles/ReadPairs" - ], - "run": "#PairReadFiles.cwl", - "id": "#main/PairReadFiles", - "in": [ - { - "source": "#main/CheckFastqs/FastqReadPairs", - "id": "#main/PairReadFiles/FastqReadPairs" - }, - { - "source": "#main/SplitAndSubsample/SplitAndSubsampledFastqs", - "id": "#main/PairReadFiles/Reads" - } - ] - }, - { - "out": [ - "#main/Putative_Cell_Calling_Settings/Putative_Cell_Call", - "#main/Putative_Cell_Calling_Settings/Exact_Cell_Count", - "#main/Putative_Cell_Calling_Settings/Basic_Algo_Only" - ], - "in": [ - { - "source": "#main/Basic_Algo_Only", - "id": "#main/Putative_Cell_Calling_Settings/_Basic_Algo_Only" - }, - { - "source": "#main/Exact_Cell_Count", - "id": "#main/Putative_Cell_Calling_Settings/_Exact_Cell_Count" - }, - { - "source": "#main/Putative_Cell_Call", - "id": "#main/Putative_Cell_Calling_Settings/_Putative_Cell_Call" - } - ], - "run": "#PutativeCellSettings.cwl", - "id": "#main/Putative_Cell_Calling_Settings", - "label": "Putative Cell Calling Settings" - }, - { - "out": [ - "#main/QualityFilterOuter/Filter_Metrics", - "#main/QualityFilterOuter/R1", - "#main/QualityFilterOuter/R2", - "#main/QualityFilterOuter/output" - ], - "run": "#QualityFilterOuter.cwl", - "id": "#main/QualityFilterOuter", - "in": [ - { - "source": "#main/Metadata_Settings/Run_Metadata", - "id": "#main/QualityFilterOuter/Run_Metadata" - }, - { - "source": "#main/PairReadFiles/ReadPairs", - "id": "#main/QualityFilterOuter/Split_Read_Pairs" - } - ] - }, - { - "out": [ - "#main/SplitAndSubsample/SplitAndSubsampledFastqs", - "#main/SplitAndSubsample/log" - ], - "run": "#SplitAndSubsample.cwl", - "id": "#main/SplitAndSubsample", - "in": [ - { - "source": "#main/Reads", - "id": "#main/SplitAndSubsample/Fastqs" - }, - { - "source": "#main/CheckFastqs/FilesToSkipSplitAndSubsample", - "id": "#main/SplitAndSubsample/FilesToSkipSplitAndSubsample" - }, - { - "source": "#main/Internal_Settings/NumRecordsPerSplit", - "id": "#main/SplitAndSubsample/NumRecordsPerSplit" - }, - { - "source": "#main/CheckFastqs/SubsamplingRatio", - "id": "#main/SplitAndSubsample/SubsampleRatio" - }, - { - "source": "#main/CheckFastqs/SubsampleSeed", - "id": "#main/SplitAndSubsample/SubsampleSeed" - } - ] - }, - { - "out": [ - "#main/Start_Time/Start_Time" - ], - "run": { - "cwlVersion": "v1.0", - "inputs": [], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "outputs": [ - { - "type": "string", - "id": "#main/Start_Time/c0e8267c-52e8-448b-b9c2-7600ab5ed59a/Start_Time" - } - ], - "id": "#main/Start_Time/c0e8267c-52e8-448b-b9c2-7600ab5ed59a", - "expression": "${ \n var today = new Date();\n var date = today.toString()\n return ({Start_Time: date});\n} ", - "class": "ExpressionTool" - }, - "id": "#main/Start_Time", - "in": [] - }, - { - "out": [ - "#main/Subsample_Settings/Subsample_Reads", - "#main/Subsample_Settings/Subsample_Seed" - ], - "in": [ - { - "source": "#main/Subsample", - "id": "#main/Subsample_Settings/_Subsample_Reads" - }, - { - "source": "#main/Subsample_seed", - "id": "#main/Subsample_Settings/_Subsample_Seed" - } - ], - "run": "#SubsampleSettings.cwl", - "id": "#main/Subsample_Settings", - "label": "Subsample Settings" - }, - { - "out": [ - "#main/Uncompress_Datatables/Uncompressed_Data_Tables", - "#main/Uncompress_Datatables/Uncompressed_Expression_Matrix" - ], - "run": "#UncompressDatatables.cwl", - "id": "#main/Uncompress_Datatables", - "in": [ - { - "source": "#main/Dense_to_Sparse_Datatable/Data_Tables", - "id": "#main/Uncompress_Datatables/Compressed_Data_Table" - }, - { - "source": "#main/GetDataTable/Expression_Data", - "id": "#main/Uncompress_Datatables/Compressed_Expression_Matrix" - } - ] - }, - { - "out": [ - "#main/VDJ_Assemble_and_Annotate_Contigs_IG/igCalls" - ], - "run": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl", - "id": "#main/VDJ_Assemble_and_Annotate_Contigs_IG", - "in": [ - { - "source": "#main/VDJ_Preprocess_Reads_IG/RSEC_Reads_Fastq", - "id": "#main/VDJ_Assemble_and_Annotate_Contigs_IG/RSEC_Reads_Fastq" - }, - { - "source": "#main/VDJ_Settings/VDJ_Version", - "id": "#main/VDJ_Assemble_and_Annotate_Contigs_IG/VDJ_Version" - }, - { - "source": "#main/VDJ_Preprocess_Reads_IG/num_cores", - "id": "#main/VDJ_Assemble_and_Annotate_Contigs_IG/num_cores" - } - ] - }, - { - "out": [ - "#main/VDJ_Assemble_and_Annotate_Contigs_TCR/tcrCalls" - ], - "run": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl", - "id": "#main/VDJ_Assemble_and_Annotate_Contigs_TCR", - "in": [ - { - "source": "#main/VDJ_Preprocess_Reads_TCR/RSEC_Reads_Fastq", - "id": "#main/VDJ_Assemble_and_Annotate_Contigs_TCR/RSEC_Reads_Fastq" - }, - { - "source": "#main/VDJ_Settings/VDJ_Version", - "id": "#main/VDJ_Assemble_and_Annotate_Contigs_TCR/VDJ_Version" - }, - { - "source": "#main/VDJ_Preprocess_Reads_TCR/num_cores", - "id": "#main/VDJ_Assemble_and_Annotate_Contigs_TCR/num_cores" - } - ] - }, - { - "out": [ - "#main/VDJ_Compile_Results/vdjCellsDatatable", - "#main/VDJ_Compile_Results/vdjCellsDatatableUncorrected", - "#main/VDJ_Compile_Results/vdjDominantContigs", - "#main/VDJ_Compile_Results/vdjUnfilteredContigs", - "#main/VDJ_Compile_Results/vdjMetricsJson", - "#main/VDJ_Compile_Results/vdjMetricsCsv", - "#main/VDJ_Compile_Results/vdjReadsPerCellByChainTypeFigure" - ], - "run": "#VDJ_Compile_Results.cwl", - "id": "#main/VDJ_Compile_Results", - "in": [ - { - "source": "#main/AnnotateReads/Seq_Metrics", - "id": "#main/VDJ_Compile_Results/Seq_Metrics" - }, - { - "source": "#main/CellClassifier/cellTypePredictions", - "id": "#main/VDJ_Compile_Results/cellTypeMapping" - }, - { - "valueFrom": "$([])", - "id": "#main/VDJ_Compile_Results/chainsToIgnore" - }, - { - "source": "#main/Internal_Settings/VDJ_JGene_Evalue", - "id": "#main/VDJ_Compile_Results/evalueJgene" - }, - { - "source": "#main/Internal_Settings/VDJ_VGene_Evalue", - "id": "#main/VDJ_Compile_Results/evalueVgene" - }, - { - "source": "#main/VDJ_GatherIGCalls/gatheredCalls", - "id": "#main/VDJ_Compile_Results/igCalls" - }, - { - "source": "#main/Metadata_Settings/Run_Metadata", - "id": "#main/VDJ_Compile_Results/metadata" - }, - { - "source": "#main/GetDataTable/Cell_Order", - "id": "#main/VDJ_Compile_Results/putativeCells" - }, - { - "source": "#main/VDJ_GatherTCRCalls/gatheredCalls", - "id": "#main/VDJ_Compile_Results/tcrCalls" - }, - { - "source": "#main/VDJ_Settings/VDJ_Version", - "id": "#main/VDJ_Compile_Results/vdjVersion" - } - ] - }, - { - "out": [ - "#main/VDJ_GatherIGCalls/gatheredCalls" - ], - "run": "#VDJ_GatherCalls.cwl", - "id": "#main/VDJ_GatherIGCalls", - "in": [ - { - "source": "#main/VDJ_Assemble_and_Annotate_Contigs_IG/igCalls", - "id": "#main/VDJ_GatherIGCalls/theCalls" - } - ] - }, - { - "out": [ - "#main/VDJ_GatherTCRCalls/gatheredCalls" - ], - "run": "#VDJ_GatherCalls.cwl", - "id": "#main/VDJ_GatherTCRCalls", - "in": [ - { - "source": "#main/VDJ_Assemble_and_Annotate_Contigs_TCR/tcrCalls", - "id": "#main/VDJ_GatherTCRCalls/theCalls" - } - ] - }, - { - "out": [ - "#main/VDJ_Preprocess_Reads_IG/RSEC_Reads_Fastq", - "#main/VDJ_Preprocess_Reads_IG/num_splits", - "#main/VDJ_Preprocess_Reads_IG/num_cores" - ], - "run": "#VDJ_Preprocess_Reads.cwl", - "id": "#main/VDJ_Preprocess_Reads_IG", - "in": [ - { - "source": "#main/AnnotateReads/validIgReads", - "id": "#main/VDJ_Preprocess_Reads_IG/Valid_Reads_Fastq" - }, - { - "source": "#main/AnnotateReads/num_valid_ig_reads", - "id": "#main/VDJ_Preprocess_Reads_IG/num_valid_reads" - }, - { - "valueFrom": "BCR", - "id": "#main/VDJ_Preprocess_Reads_IG/vdj_type" - } - ] - }, - { - "out": [ - "#main/VDJ_Preprocess_Reads_TCR/RSEC_Reads_Fastq", - "#main/VDJ_Preprocess_Reads_TCR/num_splits", - "#main/VDJ_Preprocess_Reads_TCR/num_cores" - ], - "run": "#VDJ_Preprocess_Reads.cwl", - "id": "#main/VDJ_Preprocess_Reads_TCR", - "in": [ - { - "source": "#main/AnnotateReads/validTcrReads", - "id": "#main/VDJ_Preprocess_Reads_TCR/Valid_Reads_Fastq" - }, - { - "source": "#main/AnnotateReads/num_valid_tcr_reads", - "id": "#main/VDJ_Preprocess_Reads_TCR/num_valid_reads" - }, - { - "valueFrom": "TCR", - "id": "#main/VDJ_Preprocess_Reads_TCR/vdj_type" - } - ] - }, - { - "out": [ - "#main/VDJ_Settings/VDJ_Version" - ], - "in": [ - { - "source": "#main/VDJ_Version", - "id": "#main/VDJ_Settings/_VDJ_Version" - } - ], - "run": "#VDJ_Settings.cwl", - "id": "#main/VDJ_Settings", - "label": "VDJ Settings" - }, - { - "out": [ - "#main/Version/version" - ], - "run": "#Version.cwl", - "id": "#main/Version", - "in": [] - } - ], - "outputs": [ - { - "outputSource": "#main/GetDataTable/Bioproduct_Stats", - "type": [ - "null", - "File" - ], - "id": "#main/Bioproduct_Stats", - "label": "Bioproduct Statistics" - }, - { - "outputSource": "#main/GetDataTable/Cell_Label_Filter", - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#main/Cell_Label_Filter", - "label": "Cell Label Filter" - }, - { - "outputSource": "#main/Uncompress_Datatables/Uncompressed_Data_Tables", - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#main/Data_Tables", - "label": "Data Tables" - }, - { - "outputSource": "#main/Dense_to_Sparse_Datatable_Unfiltered/Data_Tables", - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#main/Data_Tables_Unfiltered", - "label": "Unfiltered Data Tables" - }, - { - "outputSource": "#main/Uncompress_Datatables/Uncompressed_Expression_Matrix", - "type": [ - "null", - "File" - ], - "id": "#main/Expression_Data", - "label": "Expression Matrix" - }, - { - "outputSource": "#main/GetDataTable/Expression_Data_Unfiltered", - "type": [ - "null", - "File" - ], - "id": "#main/Expression_Data_Unfiltered", - "label": "Unfiltered Expression Matrix" - }, - { - "outputSource": "#main/MergeBAM/Final_Bam", - "type": "File", - "id": "#main/Final_Bam", - "label": "Final BAM File" - }, - { - "outputSource": "#main/IndexBAM/Index", - "type": "File", - "id": "#main/Final_Bam_Index", - "label": "Final BAM Index" - }, - { - "outputSource": "#main/CellClassifier/cellTypePredictions", - "type": [ - "null", - "File" - ], - "id": "#main/ImmuneCellClassification(Experimental)", - "label": "Immune Cell Classification (Experimental)" - }, - { - "outputSource": "#main/BundleLogs/logs_dir", - "type": "Directory", - "id": "#main/Logs", - "label": "Pipeline Logs" - }, - { - "outputSource": "#main/Metrics/Metrics_Summary", - "type": "File", - "id": "#main/Metrics_Summary", - "label": "Metrics Summary" - }, - { - "outputSource": "#main/MergeMultiplex/Multiplex_out", - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#main/Multiplex" - }, - { - "outputSource": "#main/GetDataTable/Protein_Aggregates_Experimental", - "type": [ - "null", - "File" - ], - "id": "#main/Protein_Aggregates_Experimental", - "label": "Protein Aggregates (Experimental)" - }, - { - "outputSource": "#main/GetDataTable/Putative_Cells_Origin", - "type": [ - "null", - "File" - ], - "id": "#main/Putative_Cells_Origin", - "label": "Putative Cells Origin" - }, - { - "outputSource": "#main/VDJ_Compile_Results/vdjCellsDatatable", - "type": [ - "null", - "File" - ], - "id": "#main/vdjCellsDatatable", - "label": "vdjCellsDatatable" - }, - { - "outputSource": "#main/VDJ_Compile_Results/vdjCellsDatatableUncorrected", - "type": [ - "null", - "File" - ], - "id": "#main/vdjCellsDatatableUncorrected", - "label": "vdjCellsDatatableUncorrected" - }, - { - "outputSource": "#main/VDJ_Compile_Results/vdjDominantContigs", - "type": [ - "null", - "File" - ], - "id": "#main/vdjDominantContigs", - "label": "vdjDominantContigs" - }, - { - "outputSource": "#main/VDJ_Compile_Results/vdjMetricsCsv", - "type": [ - "null", - "File" - ], - "id": "#main/vdjMetricsCsv", - "label": "vdjMetricsCsv" - }, - { - "outputSource": "#main/VDJ_Compile_Results/vdjUnfilteredContigs", - "type": [ - "null", - "File" - ], - "id": "#main/vdjUnfilteredContigs", - "label": "vdjUnfilteredContigs" - } - ], - "id": "#main", - "class": "Workflow" - }, - { - "inputs": [ - { - "inputBinding": { - "position": 1 - }, - "type": { - "items": "File", - "type": "array" - }, - "id": "#MergeBAM.cwl/BamFiles" - }, - { - "type": [ - "null", - "string" - ], - "id": "#MergeBAM.cwl/Run_Name" - }, - { - "type": [ - "null", - "string" - ], - "id": "#MergeBAM.cwl/Sample_Tags_Version" - } - ], - "requirements": [ - - { - "class": "InlineJavascriptRequirement" - } - ], - "stdout": "samtools_merge.log", - "outputs": [ - { - "outputBinding": { - "glob": "*_final.BAM" - }, - "type": "File", - "id": "#MergeBAM.cwl/Final_Bam" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#MergeBAM.cwl/log" - } - ], - "baseCommand": [ - "samtools", - "merge" - ], - "id": "#MergeBAM.cwl", - "arguments": [ - { - "prefix": "-@", - "valueFrom": "$(runtime.cores)" - }, - { - "position": 0, - "valueFrom": "${\n if (inputs.Sample_Tags_Version) {\n return \"Combined_\" + inputs.Run_Name + \"_final.BAM\"\n } else {\n return inputs.Run_Name + \"_final.BAM\"\n }\n}" - } - ], - "class": "CommandLineTool", - "hints": [ - { - "coresMin": 4, - "class": "ResourceRequirement" - } - ] - }, - { - "inputs": [ - { - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#Metadata.cwl/AbSeq_Reference" - }, - { - "type": "string", - "id": "#Metadata.cwl/Assay" - }, - { - "type": [ - "null", - "boolean" - ], - "id": "#Metadata.cwl/Basic_Algo_Only" - }, - { - "type": { - "items": { - "fields": [ - { - "type": "string", - "name": "#Metadata.cwl/Bead_Version/Library" - }, - { - "type": "string", - "name": "#Metadata.cwl/Bead_Version/bead_version" - } - ], - "type": "record" - }, - "type": "array" - }, - "id": "#Metadata.cwl/Bead_Version" - }, - { - "type": [ - "null", - "int" - ], - "id": "#Metadata.cwl/Exact_Cell_Count" - }, - { - "type": [ - "null", - "int" - ], - "id": "#Metadata.cwl/Label_Version" - }, - { - "type": [ - "null", - "string" - ], - "id": "#Metadata.cwl/Libraries" - }, - { - "type": "string", - "id": "#Metadata.cwl/Pipeline_Name" - }, - { - "type": "string", - "id": "#Metadata.cwl/Pipeline_Version" - }, - { - "type": [ - "null", - "int" - ], - "id": "#Metadata.cwl/Putative_Cell_Call" - }, - { - "type": [ - "null", - "boolean" - ], - "id": "#Metadata.cwl/Read_Filter_Off" - }, - { - "type": [ - "null", - { - "items": "string", - "type": "array" - } - ], - "id": "#Metadata.cwl/Reads" - }, - { - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#Metadata.cwl/Reference" - }, - { - "type": [ - "null", - "string" - ], - "id": "#Metadata.cwl/Run_Name" - }, - { - "type": [ - "null", - { - "items": "string", - "type": "array" - } - ], - "id": "#Metadata.cwl/Sample_Tag_Names" - }, - { - "type": [ - "null", - "string" - ], - "id": "#Metadata.cwl/Sample_Tags_Version" - }, - { - "type": [ - "null", - "string" - ], - "id": "#Metadata.cwl/Start_Time" - }, - { - "type": [ - "null", - "float" - ], - "id": "#Metadata.cwl/Subsample" - }, - { - "type": [ - "null", - "int" - ], - "id": "#Metadata.cwl/Subsample_Seed" - }, - { - "type": [ - "null", - "float" - ], - "id": "#Metadata.cwl/Subsample_Tags" - }, - { - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#Metadata.cwl/Supplemental_Reference" - }, - { - "type": [ - "null", - "string" - ], - "id": "#Metadata.cwl/VDJ_Version" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "stdout": "run_metadata.json", - "outputs": [ - { - "outputBinding": { - "outputEval": "${ \n var name = inputs.Run_Name;\n if (name == null){\n var libraries = inputs.Libraries;\n name = libraries.split(',')[0];\n } \n return(name)\n} \n" - }, - "type": [ - "null", - "string" - ], - "id": "#Metadata.cwl/Run_Base_Name" - }, - { - "type": "stdout", - "id": "#Metadata.cwl/Run_Metadata" - } - ], - "baseCommand": "echo", - "id": "#Metadata.cwl", - "arguments": [ - { - "prefix": "" - }, - { - "shellQuote": true, - "valueFrom": "${\n var metadata = inputs;\n var all_bv = {};\n var customer_bv = \"Original (V1)\";\n for (var i = 0; i < inputs.Bead_Version.length; i++) {\n var BeadVer = inputs.Bead_Version[i];\n var Library = BeadVer[\"Library\"];\n var bead_version = BeadVer[\"bead_version\"];\n all_bv[Library] = bead_version \n var short_bv = bead_version.substring(0, 2);\n if (short_bv == \"V2\"){\n var customer_bv = \"Enhanced (V2)\";\n }\n }\n metadata[\"Bead_Version\"] = all_bv;\n\n var pipeline_name = inputs.Pipeline_Name;\n var assay = inputs.Assay;\n var version = inputs.Pipeline_Version;\n var time = inputs.Start_Time;\n var libraries = inputs.Libraries.split(\",\");\n var i = 0;\n var reference_list = []\n if(inputs.Reference != null){\n reference_list = reference_list.concat(inputs.Reference);\n }\n if(inputs.AbSeq_Reference != null){\n reference_list = reference_list.concat(inputs.AbSeq_Reference);\n }\n\n var supplemental = \"\"\n if(inputs.Supplemental_Reference != null){\n supplemental = \"; Supplemental_Reference - \" + inputs.Supplemental_Reference[0][\"basename\"];\n }\n var references = [];\n for (i = 0; i< reference_list.length; i++) {\n if(reference_list[i] != null){\n references.push(reference_list[i][\"basename\"]);\n }\n }\n var parameters = [];\n if(inputs.Sample_Tags_Version != null){\n var tags = \"Sample Tag Version: \" + inputs.Sample_Tags_Version;\n } else{ \n var tags = \"Sample Tag Version: None\";\n }\n parameters.push(tags);\n\n if(inputs.Sample_Tag_Names != null){\n var tag_names = inputs.Sample_Tag_Names.join(\" ; \")\n var tag_list = \"Sample Tag Names: \" + tag_names;\n } else{\n var tag_list = \"Sample Tag Names: None\";\n }\n parameters.push(tag_list);\n \n if(inputs.VDJ_Version != null){\n var vdj = \"VDJ Version: \" + inputs.VDJ_Version;\n } else{ \n var vdj = \"VDJ Version: None\";\n }\n parameters.push(vdj)\n\n if(inputs.Subsample != null){\n var subsample = \"Subsample: \" + inputs.Subsample;\n } else{ \n var subsample = \"Subsample: None\";\n } \n parameters.push(subsample);\n\n if(inputs.Putative_Cell_Call == 1){\n var call = \"Putative Cell Calling Type: AbSeq\";\n } else{ \n var call = \"Putative Cell Calling Type: mRNA\";\n } \n parameters.push(call)\n\n if(inputs.Basic_Algo_Only){\n var basic = \"Refined Putative Cell Calling: Off\";\n } else{ \n var basic = \"Refined Putative Cell Calling: On\";\n } \n parameters.push(basic)\n\n if(inputs.Exact_Cell_Count != null){\n var cells = \"Exact Cell Count: \" + inputs.Exact_Cell_Count;\n } else{ \n var cells = \"Exact Cell Count: None\";\n } \n parameters.push(cells)\n\n var name = inputs.Run_Name;\n if (name == null){\n var libraries = inputs.Libraries.split(',');\n name = libraries[0];\n } \n\n var header = [\"####################\"];\n header.push(\"## \" + pipeline_name + \" Version \" + version);\n header.push(\"## Analysis Date - \" + time);\n header.push(\"## Libraries - \" + libraries.join(' | ') + \" - Bead version detected: \" + customer_bv);\n header.push(\"## References - \" + references.join(' | ') + supplemental);\n header.push(\"## Parameters - \" + parameters.join(' | '));\n header.push(\"####################\");\n metadata[\"Output_Header\"] = header;\n metadata[\"Run_Base_Name\"] = name;\n var metadata_json = JSON.stringify(metadata);\n return metadata_json;\n}\n" - } - ], - "class": "CommandLineTool" - }, - { - "inputs": [ - { - "inputBinding": { - "prefix": "--annot-files" - }, - "type": "File", - "id": "#Metrics.cwl/Annot_Files" - }, - { - "inputBinding": { - "prefix": "--read1-error-rate" - }, - "type": "File", - "id": "#Metrics.cwl/Read1_error_rate" - }, - { - "inputBinding": { - "prefix": "--run-metadata" - }, - "type": "File", - "id": "#Metrics.cwl/Run_Metadata" - }, - { - "inputBinding": { - "prefix": "--sample-tag-archives", - "itemSeparator": "," - }, - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#Metrics.cwl/Sample_Tag_Archives" - }, - { - "inputBinding": { - "prefix": "--seq-run" - }, - "type": [ - "null", - "string" - ], - "id": "#Metrics.cwl/Seq_Run" - }, - { - "inputBinding": { - "prefix": "--umi-adjusted-stats" - }, - "type": [ - "null", - "File" - ], - "id": "#Metrics.cwl/UMI_Adjusted_Stats" - }, - { - "inputBinding": { - "prefix": "--vdj-metrics-fp" - }, - "type": [ - "null", - "File" - ], - "id": "#Metrics.cwl/vdjMetricsJson" - } - ], - "requirements": [ - - ], - "outputs": [ - { - "outputBinding": { - "glob": "internal-metrics-archive.tar.gz" - }, - "type": "File", - "id": "#Metrics.cwl/Metrics_Archive" - }, - { - "outputBinding": { - "glob": "*_Metrics_Summary.csv" - }, - "type": "File", - "id": "#Metrics.cwl/Metrics_Summary" - }, - { - "outputBinding": { - "glob": "*.zip" - }, - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#Metrics.cwl/Sample_Tag_Out" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#Metrics.cwl/output" - } - ], - "baseCommand": [ - "mist_metrics.py" - ], - "class": "CommandLineTool", - "id": "#Metrics.cwl" - }, - { - "inputs": [ - { - "default": "Targeted", - "type": "string", - "id": "#MultiplexingSettings.cwl/Assay" - }, - { - "type": [ - "null", - "Any" - ], - "id": "#MultiplexingSettings.cwl/_Sample_Tags_Version" - }, - { - "type": [ - "null", - { - "items": "string", - "type": "array" - } - ], - "id": "#MultiplexingSettings.cwl/_Tag_Sample_Names" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "outputs": [ - { - "type": [ - "null", - "string" - ], - "id": "#MultiplexingSettings.cwl/Sample_Tags_Version" - }, - { - "type": [ - "null", - { - "items": "string", - "type": "array" - } - ], - "id": "#MultiplexingSettings.cwl/Tag_Sample_Names" - } - ], - "class": "ExpressionTool", - "expression": "${\n var enumifiedSampleTagsVersion = null;\n if (inputs._Sample_Tags_Version) {\n var _Sample_Tags_Version = inputs._Sample_Tags_Version.toLowerCase();\n if (_Sample_Tags_Version.indexOf('human') >= 0 || _Sample_Tags_Version === 'hs')\n {\n enumifiedSampleTagsVersion = 'hs';\n }\n else if (_Sample_Tags_Version.indexOf('mouse') >= 0 || _Sample_Tags_Version === 'mm')\n {\n enumifiedSampleTagsVersion = 'mm';\n }\n else if (_Sample_Tags_Version === 'no multiplexing')\n {\n enumifiedSampleTagsVersion = null;\n }\n else\n {\n throw new Error(\"Cannot parse Sample Tag Version: \" + inputs._Sample_Tags_Version);\n }\n }\n var listTagNames = inputs._Tag_Sample_Names\n var newTagNames = []\n for (var num in listTagNames) {\n var tag = listTagNames[num].replace(/[^A-Za-z0-9-+]/g,\"_\");\n newTagNames.push(tag); \n } \n return ({\n Tag_Sample_Names: newTagNames,\n Sample_Tags_Version: enumifiedSampleTagsVersion\n });\n}", - "id": "#MultiplexingSettings.cwl" - }, - { - "inputs": [ - { - "type": [ - "null", - "string" - ], - "id": "#NameSettings.cwl/_Run_Name" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "outputs": [ - { - "type": [ - "null", - "string" - ], - "id": "#NameSettings.cwl/Run_Name" - } - ], - "class": "ExpressionTool", - "expression": "${ var name = inputs._Run_Name;\n if (name != null) {\n name = name.replace(/[\\W_]+/g,\"-\");}\n return({'Run_Name' : name });\n } ", - "id": "#NameSettings.cwl" - }, - { - "inputs": [ - { - "type": { - "items": { - "fields": [ - { - "type": "string", - "name": "#PairReadFiles.cwl/FastqReadPairs/filename" - }, - { - "type": "string", - "name": "#PairReadFiles.cwl/FastqReadPairs/readFlag" - }, - { - "type": "string", - "name": "#PairReadFiles.cwl/FastqReadPairs/readPairId" - }, - { - "type": "string", - "name": "#PairReadFiles.cwl/FastqReadPairs/library" - } - ], - "type": "record" - }, - "type": "array" - }, - "id": "#PairReadFiles.cwl/FastqReadPairs" - }, - { - "type": { - "items": "File", - "type": "array" - }, - "id": "#PairReadFiles.cwl/Reads" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "doc": "PairReadFiles takes an array of split files and pairs them, such that an R1 file is transferred to the QualityFilter with its corresponding R2 file.\nThe original FASTQ files are paired in CheckFastqs and then split and sub-sampled in SplitAndSubsample. The pairing information is taken from CheckFastqs.\n", - "id": "#PairReadFiles.cwl", - "outputs": [ - { - "type": { - "items": { - "fields": [ - { - "type": "File", - "name": "#PairReadFiles.cwl/ReadPairs/R1" - }, - { - "type": "File", - "name": "#PairReadFiles.cwl/ReadPairs/R2" - }, - { - "type": "int", - "name": "#PairReadFiles.cwl/ReadPairs/readPairId" - }, - { - "type": "string", - "name": "#PairReadFiles.cwl/ReadPairs/library" - } - ], - "type": "record" - }, - "type": "array" - }, - "id": "#PairReadFiles.cwl/ReadPairs" - } - ], - "expression": "${\n // use the CheckFastqs read pairing information to create a dictionary\n // using the original fastq file name without the extension as the key\n var fastqReadPairs = {}\n for (var i = 0; i < inputs.FastqReadPairs.length; i++) {\n var fileDict = inputs.FastqReadPairs[i];\n var filename = fileDict[\"filename\"];\n\n if (!fastqReadPairs[filename]) {\n fastqReadPairs[filename] = {\n readPairId: null,\n readFlag: null,\n library: null,\n };\n }\n else {\n throw new Error(\"Found non-unique fastq filename '\" + filename + \"' in the FastqReadPairs dictionary from CheckFastqs.\")\n }\n\n fastqReadPairs[filename].readPairId = fileDict[\"readPairId\"]\n fastqReadPairs[filename].readFlag = fileDict[\"readFlag\"]\n fastqReadPairs[filename].library = fileDict[\"library\"]\n }\n\n // now loop through the input read files which could\n // be the original fastq files if no sub-sampling has\n // been done, or the sub-sampled fastq files\n var readPairs = {}\n for (var i = 0; i < inputs.Reads.length; i++) {\n\n // Set the fileDict to null\n var fileDict = null;\n\n // Get the fastq file\n var fastqFile = inputs.Reads[i];\n\n // Remove the .gz from the end of the filename\n var fileNoGzExt = fastqFile.basename.replace(/.gz$/i, \"\");\n\n // Remove the next file extension if it exists\n var fileArrayWithExt = fileNoGzExt.split(\".\");\n // If an extension exists, splice the array\n var fileArrayNoExt = null;\n if (fileArrayWithExt.length > 1) {\n fileArrayNoExt = fileArrayWithExt.splice(0, fileArrayWithExt.length-1);\n } else {\n // No file extension exists, so use the whole array\n fileArrayNoExt = fileArrayWithExt\n }\n var fileRootname = fileArrayNoExt.join(\".\")\n\n // if the original files were sub-sampled\n // get the original file and the chunk id\n if (fileRootname.indexOf(\"-\") != -1) {\n // Split on the dash to get the name of\n // the original file and the chunk id\n // The original file name can also have dashes\n var chunkFileArray = fileRootname.split(\"-\");\n\n // Get the original file rootname and chunk id\n // The rootname without the chunk id and file\n // extension is the key from CheckFastqs\n // The chunk id is used later to create a new unique\n // read pair id for all sub-sampled fastq files\n\n // The rootname array should contain all elements up to the last dash\n var fileRootnameArray = chunkFileArray.splice(0, chunkFileArray.length-1);\n var fileRootnameNoChunkId = fileRootnameArray.join(\"-\");\n\n // The chunk id is the last element in the array\n // representing the content after the last dash\n var orgChunkId = chunkFileArray.pop();\n\n // if there is no chunk id, use an arbitrary number\n // the chunk id is unique when the files are sub-sampled\n // and does not need to be unique when the files are not sub-sampled\n var chunkId = 9999;\n if (orgChunkId) {\n // cast to an integer\n chunkId = parseInt(orgChunkId);\n }\n // double check that we have a chunk id\n if (chunkId === undefined || chunkId === null) {\n throw new Error(\"The fastq file sub-sampling id could not be determined!\");\n }\n\n // The file rootname without the chunk id and file extension\n // should match the original file rootname from CheckFastqs\n // The original file rootname from CheckFastqs is the key for\n // the dictionary containing the original unique pair id\n var fileDict = fastqReadPairs[fileRootnameNoChunkId];\n }\n\n // If the files are not sub-sampled or the fileDict\n // is not found, then try to use the original\n // file rootname without the file extension as the key\n if (fileDict === undefined || fileDict === null) {\n\n // if the original files were not sub-sampled,\n // use the original file rootname and an arbitrary chunk id\n var chunkId = 9999;\n\n var fileDict = fastqReadPairs[fileRootname];\n\n // If the fileDict for this file rootname is not found,\n // then the filenames are in an unexpected format and\n // the code to parse the filenames in CheckFastqs,\n // SplitAndSubsample and here need to match\n if (fileDict === undefined || fileDict === null) {\n // Create an error\n if (fileDict === undefined || fileDict === null) {\n throw new Error(\"Cannot find the fastq read pair information for '\" + fastqFile.basename + \"'.\");\n }\n }\n }\n\n // Get the pairing information from CheckFastqs\n var readPairId = fileDict[\"readPairId\"];\n var library = fileDict[\"library\"];\n var flag = fileDict[\"readFlag\"];\n\n // Add the chunkId to create a new unique read pair id\n // for each file (sub-sampled or not)\n var chunkReadPairId = readPairId + \"_\" + chunkId;\n\n // Create a dictionary for each pair of files\n if (!readPairs[chunkReadPairId]) {\n readPairs[chunkReadPairId] = {\n R1: null,\n R2: null,\n library: library,\n readPairId: null,\n };\n }\n // add in the R1 and R2 files, depending on the flag\n if (flag === \"R1\") {\n readPairs[chunkReadPairId].R1 = fastqFile\n } else if (flag === \"R2\") {\n readPairs[chunkReadPairId].R2 = fastqFile\n }\n }\n // we are not interested in the read pair ids in readPairs\n // flatten into an array of objects\n var readPairsList = [];\n var i = 1;\n for (var key in readPairs) {\n if (readPairs.hasOwnProperty(key)) {\n var readPair = readPairs[key];\n readPair.readPairId = i;\n readPairsList.push(readPair);\n i++;\n }\n }\n // pass this array to the record array named \"ReadPairs\" on the CWL layer\n return {ReadPairs: readPairsList}\n}", - "class": "ExpressionTool" - }, - { - "inputs": [ - { - "type": [ - "null", - "boolean" - ], - "id": "#PutativeCellSettings.cwl/_Basic_Algo_Only" - }, - { - "type": [ - "null", - "int" - ], - "id": "#PutativeCellSettings.cwl/_Exact_Cell_Count" - }, - { - "type": [ - "null", - "Any" - ], - "id": "#PutativeCellSettings.cwl/_Putative_Cell_Call" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "outputs": [ - { - "type": [ - "null", - "boolean" - ], - "id": "#PutativeCellSettings.cwl/Basic_Algo_Only" - }, - { - "type": [ - "null", - "int" - ], - "id": "#PutativeCellSettings.cwl/Exact_Cell_Count" - }, - { - "type": [ - "null", - "int" - ], - "id": "#PutativeCellSettings.cwl/Putative_Cell_Call" - } - ], - "class": "ExpressionTool", - "expression": "${\n // the basic algorithm flag defaults to false\n var basicAlgOnlyFlag = false;\n // the user can set the basic algorithm flag\n if (inputs._Basic_Algo_Only) {\n basicAlgOnlyFlag = inputs._Basic_Algo_Only;\n }\n // convert the Putative_Cell_Call from a string to an integer\n var putativeCellCallInt = 0;\n if (inputs._Putative_Cell_Call) {\n if (inputs._Putative_Cell_Call === \"mRNA\") {\n putativeCellCallInt = 0;\n }\n else if (inputs._Putative_Cell_Call == \"AbSeq_Experimental\" || inputs._Putative_Cell_Call == \"AbSeq (Experimental)\") {\n putativeCellCallInt = 1;\n // for protein-only cell calling, we only have the basic algorithm\n basicAlgOnlyFlag = true;\n }\n else if (inputs._Putative_Cell_Call == \"mRNA_and_AbSeq\") {\n putativeCellCallInt = 2;\n }\n }\n // check the exact cell count\n if (inputs._Exact_Cell_Count) {\n if (inputs._Exact_Cell_Count < 1) {\n throw(\"Illogical value for exact cell count: \" + inputs._Exact_Cell_Count);\n }\n }\n return ({\n Putative_Cell_Call: putativeCellCallInt,\n Exact_Cell_Count: inputs._Exact_Cell_Count,\n Basic_Algo_Only: basicAlgOnlyFlag,\n });\n}", - "id": "#PutativeCellSettings.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "prefix": "--run-metadata" - }, - "type": "File", - "id": "#QualityFilter.cwl/Run_Metadata" - }, - { - "type": { - "fields": [ - { - "inputBinding": { - "prefix": "--r1" - }, - "type": "File", - "name": "#QualityFilter.cwl/Split_Read_Pairs/R1" - }, - { - "inputBinding": { - "prefix": "--r2" - }, - "type": "File", - "name": "#QualityFilter.cwl/Split_Read_Pairs/R2" - }, - { - "inputBinding": { - "prefix": "--read-pair-id" - }, - "type": "int", - "name": "#QualityFilter.cwl/Split_Read_Pairs/readPairId" - }, - { - "inputBinding": { - "prefix": "--library" - }, - "type": "string", - "name": "#QualityFilter.cwl/Split_Read_Pairs/library" - } - ], - "type": "record" - }, - "id": "#QualityFilter.cwl/Split_Read_Pairs" - } - ], - "requirements": [ - - ], - "outputs": [ - { - "outputBinding": { - "glob": "*read_quality.csv.gz" - }, - "type": [ - "null", - "File" - ], - "id": "#QualityFilter.cwl/Filter_Metrics" - }, - { - "outputBinding": { - "glob": "*_R1*.fastq.gz" - }, - "type": "File", - "id": "#QualityFilter.cwl/R1" - }, - { - "outputBinding": { - "glob": "*_R2*.fastq.gz" - }, - "type": "File", - "id": "#QualityFilter.cwl/R2" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#QualityFilter.cwl/output" - } - ], - "baseCommand": [ - "mist_quality_filter.py" - ], - "class": "CommandLineTool", - "id": "#QualityFilter.cwl" - }, - { - "inputs": [ - { - "type": "File", - "id": "#QualityFilterOuter.cwl/Run_Metadata" - }, - { - "type": { - "items": { - "fields": [ - { - "type": "File", - "name": "#QualityFilterOuter.cwl/Split_Read_Pairs/R1" - }, - { - "type": "File", - "name": "#QualityFilterOuter.cwl/Split_Read_Pairs/R2" - }, - { - "type": "int", - "name": "#QualityFilterOuter.cwl/Split_Read_Pairs/readPairId" - }, - { - "type": "string", - "name": "#QualityFilterOuter.cwl/Split_Read_Pairs/library" - } - ], - "type": "record" - }, - "type": "array" - }, - "id": "#QualityFilterOuter.cwl/Split_Read_Pairs" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - }, - { - "class": "ScatterFeatureRequirement" - }, - { - "class": "StepInputExpressionRequirement" - }, - { - "class": "SubworkflowFeatureRequirement" - } - ], - "outputs": [ - { - "outputSource": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/Filter_Metrics", - "type": [ - { - "items": [ - "null", - "File" - ], - "type": "array" - } - ], - "id": "#QualityFilterOuter.cwl/Filter_Metrics" - }, - { - "outputSource": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/R1", - "type": [ - { - "items": [ - "null", - "File" - ], - "type": "array" - } - ], - "id": "#QualityFilterOuter.cwl/R1" - }, - { - "outputSource": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/R2", - "type": [ - { - "items": [ - "null", - "File" - ], - "type": "array" - } - ], - "id": "#QualityFilterOuter.cwl/R2" - }, - { - "outputSource": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/output", - "type": [ - { - "items": [ - "null", - "File" - ], - "type": "array" - } - ], - "id": "#QualityFilterOuter.cwl/output" - } - ], - "class": "Workflow", - "steps": [ - { - "scatter": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/Split_Read_Pairs", - "out": [ - "#QualityFilterOuter.cwl/Quality_Filter_Scatter/R1", - "#QualityFilterOuter.cwl/Quality_Filter_Scatter/R2", - "#QualityFilterOuter.cwl/Quality_Filter_Scatter/Filter_Metrics", - "#QualityFilterOuter.cwl/Quality_Filter_Scatter/output" - ], - "run": "#QualityFilter.cwl", - "id": "#QualityFilterOuter.cwl/Quality_Filter_Scatter", - "in": [ - { - "source": "#QualityFilterOuter.cwl/Run_Metadata", - "id": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/Run_Metadata" - }, - { - "source": "#QualityFilterOuter.cwl/Split_Read_Pairs", - "id": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/Split_Read_Pairs" - } - ] - } - ], - "id": "#QualityFilterOuter.cwl" - }, - { - "inputs": [ - { - "type": { - "items": "File", - "type": "array" - }, - "id": "#SplitAndSubsample.cwl/Fastqs" - }, - { - "type": { - "items": "string", - "type": "array" - }, - "id": "#SplitAndSubsample.cwl/FilesToSkipSplitAndSubsample" - }, - { - "type": [ - "null", - "long" - ], - "id": "#SplitAndSubsample.cwl/NumRecordsPerSplit" - }, - { - "type": "float", - "id": "#SplitAndSubsample.cwl/SubsampleRatio" - }, - { - "type": "int", - "id": "#SplitAndSubsample.cwl/SubsampleSeed" - } - ], - "requirements": [ - { - "class": "ScatterFeatureRequirement" - }, - { - "class": "InlineJavascriptRequirement" - } - ], - "doc": "SplitAndSubsample splits, subsamples and formats read files to be deposited in QualityFilter.\n", - "id": "#SplitAndSubsample.cwl", - "steps": [ - { - "doc": "After scattering \"SplitAndSubsample\" on a File array, the output of each node is also an array. Thus, we are left with a nestled list. This JS expression flattens this list to deal with the split reads in PairReadFiles.cwl", - "out": [ - "#SplitAndSubsample.cwl/FlattenOutput/SplitFastqList" - ], - "run": { - "cwlVersion": "v1.0", - "inputs": [ - { - "type": { - "items": { - "items": "File", - "type": "array" - }, - "type": "array" - }, - "id": "#SplitAndSubsample.cwl/FlattenOutput/flatten_output/nestledSplitFastqList" - } - ], - "outputs": [ - { - "type": { - "items": "File", - "type": "array" - }, - "id": "#SplitAndSubsample.cwl/FlattenOutput/flatten_output/SplitFastqList" - } - ], - "class": "ExpressionTool", - "expression": "${\n return {SplitFastqList: [].concat.apply([], inputs.nestledSplitFastqList)}\n}\n", - "id": "#SplitAndSubsample.cwl/FlattenOutput/flatten_output" - }, - "id": "#SplitAndSubsample.cwl/FlattenOutput", - "in": [ - { - "source": "#SplitAndSubsample.cwl/SplitAndSubsample/SplitAndSubsampledFastqs", - "id": "#SplitAndSubsample.cwl/FlattenOutput/nestledSplitFastqList" - } - ] - }, - { - "run": { - "cwlVersion": "v1.0", - "inputs": [ - { - "inputBinding": { - "prefix": "--fastq-file-path" - }, - "type": "File", - "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/Fastq" - }, - { - "inputBinding": { - "prefix": "--files-to-skip-split-and-subsample", - "itemSeparator": "," - }, - "type": { - "items": "string", - "type": "array" - }, - "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/FilesToSkipSplitAndSubsample" - }, - { - "inputBinding": { - "prefix": "--num-records" - }, - "type": [ - "null", - "long" - ], - "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/NumRecordsPerSplit" - }, - { - "inputBinding": { - "prefix": "--subsample-ratio" - }, - "type": "float", - "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/SubsampleRatio" - }, - { - "inputBinding": { - "prefix": "--subsample-seed" - }, - "type": "int", - "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/SubsampleSeed" - } - ], - "requirements": [ - ], - "outputs": [ - { - "outputBinding": { - "glob": "*.fastq.gz", - "outputEval": "${ if (self.length === 0) { return [inputs.Fastq]; } else { return self; } }" - }, - "type": { - "items": "File", - "type": "array" - }, - "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/SplitAndSubsampledFastqs" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/log" - } - ], - "baseCommand": [ - "mist_split_fastq.py" - ], - "class": "CommandLineTool", - "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq" - }, - "doc": "Allocate one docker/python process per file to do the actual file splitting.", - "scatter": [ - "#SplitAndSubsample.cwl/SplitAndSubsample/Fastq" - ], - "in": [ - { - "source": "#SplitAndSubsample.cwl/Fastqs", - "id": "#SplitAndSubsample.cwl/SplitAndSubsample/Fastq" - }, - { - "source": "#SplitAndSubsample.cwl/FilesToSkipSplitAndSubsample", - "id": "#SplitAndSubsample.cwl/SplitAndSubsample/FilesToSkipSplitAndSubsample" - }, - { - "source": "#SplitAndSubsample.cwl/NumRecordsPerSplit", - "id": "#SplitAndSubsample.cwl/SplitAndSubsample/NumRecordsPerSplit" - }, - { - "source": "#SplitAndSubsample.cwl/SubsampleRatio", - "id": "#SplitAndSubsample.cwl/SplitAndSubsample/SubsampleRatio" - }, - { - "source": "#SplitAndSubsample.cwl/SubsampleSeed", - "id": "#SplitAndSubsample.cwl/SplitAndSubsample/SubsampleSeed" - } - ], - "id": "#SplitAndSubsample.cwl/SplitAndSubsample", - "out": [ - "#SplitAndSubsample.cwl/SplitAndSubsample/SplitAndSubsampledFastqs", - "#SplitAndSubsample.cwl/SplitAndSubsample/log" - ] - } - ], - "outputs": [ - { - "outputSource": "#SplitAndSubsample.cwl/FlattenOutput/SplitFastqList", - "type": { - "items": "File", - "type": "array" - }, - "id": "#SplitAndSubsample.cwl/SplitAndSubsampledFastqs" - }, - { - "outputSource": "#SplitAndSubsample.cwl/SplitAndSubsample/log", - "type": { - "items": "File", - "type": "array" - }, - "id": "#SplitAndSubsample.cwl/log" - } - ], - "class": "Workflow" - }, - { - "inputs": [ - { - "type": [ - "null", - "float" - ], - "id": "#SubsampleSettings.cwl/_Subsample_Reads" - }, - { - "type": [ - "null", - "int" - ], - "id": "#SubsampleSettings.cwl/_Subsample_Seed" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "outputs": [ - { - "type": [ - "null", - "float" - ], - "id": "#SubsampleSettings.cwl/Subsample_Reads" - }, - { - "type": [ - "null", - "int" - ], - "id": "#SubsampleSettings.cwl/Subsample_Seed" - } - ], - "class": "ExpressionTool", - "expression": "${\n var subsamplingOutputs = {\n Subsample_Reads: inputs._Subsample_Reads,\n Subsample_Seed: inputs._Subsample_Seed\n }\n return subsamplingOutputs;\n}", - "id": "#SubsampleSettings.cwl" - }, - { - "inputs": [ - { - "type": { - "items": "File", - "type": "array" - }, - "id": "#UncompressDatatables.cwl/Compressed_Data_Table" - }, - { - "type": "File", - "id": "#UncompressDatatables.cwl/Compressed_Expression_Matrix" - } - ], - "requirements": [ - { - "class": "ScatterFeatureRequirement" - } - ], - "outputs": [ - { - "outputSource": "#UncompressDatatables.cwl/Uncompress_Datatable/Uncompressed_File", - "type": { - "items": "File", - "type": "array" - }, - "id": "#UncompressDatatables.cwl/Uncompressed_Data_Tables" - }, - { - "outputSource": "#UncompressDatatables.cwl/Uncompress_Expression_Matrix/Uncompressed_File", - "type": "File", - "id": "#UncompressDatatables.cwl/Uncompressed_Expression_Matrix" - } - ], - "class": "Workflow", - "steps": [ - { - "id": "#UncompressDatatables.cwl/Uncompress_Datatable", - "out": [ - "#UncompressDatatables.cwl/Uncompress_Datatable/Uncompressed_File" - ], - "run": { - "cwlVersion": "v1.0", - "inputs": [ - { - "inputBinding": { - "position": 1 - }, - "type": "File", - "id": "#UncompressDatatables.cwl/Uncompress_Datatable/Uncompress_Datatable_Inner/Compressed_File" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "stdout": "$(inputs.Compressed_File.nameroot)", - "outputs": [ - { - "outputBinding": { - "glob": "$(inputs.Compressed_File.nameroot)" - }, - "type": "File", - "id": "#UncompressDatatables.cwl/Uncompress_Datatable/Uncompress_Datatable_Inner/Uncompressed_File" - } - ], - "baseCommand": [ - "gunzip" - ], - "id": "#UncompressDatatables.cwl/Uncompress_Datatable/Uncompress_Datatable_Inner", - "arguments": [ - { - "position": 0, - "valueFrom": "-c" - } - ], - "class": "CommandLineTool", - "hints": [ - ] - }, - "scatter": [ - "#UncompressDatatables.cwl/Uncompress_Datatable/Compressed_File" - ], - "in": [ - { - "source": "#UncompressDatatables.cwl/Compressed_Data_Table", - "id": "#UncompressDatatables.cwl/Uncompress_Datatable/Compressed_File" - } - ] - }, - { - "out": [ - "#UncompressDatatables.cwl/Uncompress_Expression_Matrix/Uncompressed_File" - ], - "run": { - "cwlVersion": "v1.0", - "inputs": [ - { - "inputBinding": { - "position": 1 - }, - "type": "File", - "id": "#UncompressDatatables.cwl/Uncompress_Expression_Matrix/Uncompress_Expression_Matrix_Inner/Compressed_File" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "stdout": "$(inputs.Compressed_File.nameroot)", - "outputs": [ - { - "outputBinding": { - "glob": "$(inputs.Compressed_File.nameroot)" - }, - "type": "File", - "id": "#UncompressDatatables.cwl/Uncompress_Expression_Matrix/Uncompress_Expression_Matrix_Inner/Uncompressed_File" - } - ], - "baseCommand": [ - "gunzip" - ], - "id": "#UncompressDatatables.cwl/Uncompress_Expression_Matrix/Uncompress_Expression_Matrix_Inner", - "arguments": [ - { - "position": 0, - "valueFrom": "-c" - } - ], - "class": "CommandLineTool", - "hints": [ - - ] - }, - "id": "#UncompressDatatables.cwl/Uncompress_Expression_Matrix", - "in": [ - { - "source": "#UncompressDatatables.cwl/Compressed_Expression_Matrix", - "id": "#UncompressDatatables.cwl/Uncompress_Expression_Matrix/Compressed_File" - } - ] - } - ], - "id": "#UncompressDatatables.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "position": 1 - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs.cwl/RSEC_Reads_Fastq" - }, - { - "inputBinding": { - "position": 2 - }, - "type": "string", - "id": "#VDJ_Assemble_and_Annotate_Contigs.cwl/Read_Limit" - }, - { - "inputBinding": { - "position": 3 - }, - "type": [ - "null", - "string" - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs.cwl/VDJ_Version" - } - ], - "requirements": [ - - { - "class": "InlineJavascriptRequirement" - }, - { - "class": "ShellCommandRequirement" - } - ], - "outputs": [ - { - "outputBinding": { - "glob": "*_pruned.csv.gz" - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs.cwl/PyirCall" - } - ], - "baseCommand": [ - "AssembleAndAnnotate.sh" - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs.cwl", - "class": "CommandLineTool", - "hints": [ - { - "coresMin": 1, - "ramMin": 3200, - "class": "ResourceRequirement" - } - ] - }, - { - "inputs": [ - { - "type": [ - { - "items": [ - "null", - "File" - ], - "type": "array" - } - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/RSEC_Reads_Fastq" - }, - { - "type": [ - "null", - "string" - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Version" - }, - { - "type": [ - "null", - "int" - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/num_cores" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - }, - { - "class": "ScatterFeatureRequirement" - }, - { - "class": "StepInputExpressionRequirement" - }, - { - "class": "SubworkflowFeatureRequirement" - } - ], - "outputs": [ - { - "outputSource": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG/PyirCall", - "type": [ - { - "items": [ - "null", - "File" - ], - "type": "array" - } - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/igCalls" - } - ], - "class": "Workflow", - "steps": [ - { - "run": "#VDJ_Assemble_and_Annotate_Contigs.cwl", - "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG", - "in": [ - { - "source": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/RSEC_Reads_Fastq", - "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG/RSEC_Reads_Fastq" - }, - { - "valueFrom": "75000", - "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG/Read_Limit" - }, - { - "source": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Version", - "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG/VDJ_Version" - } - ], - "hints": [ - { - "coresMin": "$(inputs.num_cores)", - "class": "ResourceRequirement" - } - ], - "scatter": [ - "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG/RSEC_Reads_Fastq" - ], - "out": [ - "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG/PyirCall" - ] - } - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl" - }, - { - "inputs": [ - { - "type": [ - { - "items": [ - "null", - "File" - ], - "type": "array" - } - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/RSEC_Reads_Fastq" - }, - { - "type": [ - "null", - "string" - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Version" - }, - { - "type": [ - "null", - "int" - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/num_cores" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - }, - { - "class": "ScatterFeatureRequirement" - }, - { - "class": "StepInputExpressionRequirement" - }, - { - "class": "SubworkflowFeatureRequirement" - } - ], - "outputs": [ - { - "outputSource": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR/PyirCall", - "type": [ - { - "items": [ - "null", - "File" - ], - "type": "array" - } - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/tcrCalls" - } - ], - "class": "Workflow", - "steps": [ - { - "run": "#VDJ_Assemble_and_Annotate_Contigs.cwl", - "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR", - "in": [ - { - "source": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/RSEC_Reads_Fastq", - "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR/RSEC_Reads_Fastq" - }, - { - "valueFrom": "75000", - "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR/Read_Limit" - }, - { - "source": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Version", - "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR/VDJ_Version" - } - ], - "hints": [ - { - "coresMin": "$(inputs.num_cores)", - "class": "ResourceRequirement" - } - ], - "scatter": [ - "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR/RSEC_Reads_Fastq" - ], - "out": [ - "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR/PyirCall" - ] - } - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "position": 10, - "prefix": "--seq-metrics" - }, - "type": "File", - "id": "#VDJ_Compile_Results.cwl/Seq_Metrics" - }, - { - "inputBinding": { - "position": 0, - "prefix": "--cell-type-mapping-fp" - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_Compile_Results.cwl/cellTypeMapping" - }, - { - "inputBinding": { - "position": 4, - "prefix": "--ignore", - "itemSeparator": "," - }, - "type": [ - "null", - { - "items": "string", - "type": "array" - } - ], - "id": "#VDJ_Compile_Results.cwl/chainsToIgnore" - }, - { - "inputBinding": { - "position": 8, - "prefix": "--e-value-for-j" - }, - "type": [ - "null", - "float" - ], - "id": "#VDJ_Compile_Results.cwl/evalueJgene" - }, - { - "inputBinding": { - "position": 7, - "prefix": "--e-value-for-v" - }, - "type": [ - "null", - "float" - ], - "id": "#VDJ_Compile_Results.cwl/evalueVgene" - }, - { - "inputBinding": { - "position": 5 - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_Compile_Results.cwl/igCalls" - }, - { - "inputBinding": { - "position": 9, - "prefix": "--metadata-fp" - }, - "type": "File", - "id": "#VDJ_Compile_Results.cwl/metadata" - }, - { - "inputBinding": { - "position": 3, - "prefix": "--putative-cells-json-fp" - }, - "type": "File", - "id": "#VDJ_Compile_Results.cwl/putativeCells" - }, - { - "inputBinding": { - "position": 6 - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_Compile_Results.cwl/tcrCalls" - }, - { - "inputBinding": { - "position": 2, - "prefix": "--vdj-version" - }, - "type": [ - "null", - "string" - ], - "id": "#VDJ_Compile_Results.cwl/vdjVersion" - } - ], - "requirements": [ - - { - "class": "InlineJavascriptRequirement" - } - ], - "outputs": [ - { - "doc": "VDJ data per cell, with distribution based error correction", - "outputBinding": { - "glob": "*_VDJ_perCell.csv" - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_Compile_Results.cwl/vdjCellsDatatable" - }, - { - "doc": "VDJ data per cell, including non-putative cells, no error correction applied", - "outputBinding": { - "glob": "*_VDJ_perCell_uncorrected.csv.gz" - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_Compile_Results.cwl/vdjCellsDatatableUncorrected" - }, - { - "outputBinding": { - "glob": "*_VDJ_Dominant_Contigs.csv.gz" - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_Compile_Results.cwl/vdjDominantContigs" - }, - { - "outputBinding": { - "glob": "*_VDJ_metrics.csv" - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_Compile_Results.cwl/vdjMetricsCsv" - }, - { - "outputBinding": { - "glob": "*_VDJ_metrics.json" - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_Compile_Results.cwl/vdjMetricsJson" - }, - { - "outputBinding": { - "glob": "*_DBEC_cutoff.png" - }, - "type": { - "items": "File", - "type": "array" - }, - "id": "#VDJ_Compile_Results.cwl/vdjReadsPerCellByChainTypeFigure" - }, - { - "outputBinding": { - "glob": "*_VDJ_Unfiltered_Contigs.csv.gz" - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_Compile_Results.cwl/vdjUnfilteredContigs" - } - ], - "baseCommand": [ - "mist_vdj_compile_results.py" - ], - "id": "#VDJ_Compile_Results.cwl", - "class": "CommandLineTool", - "hints": [ - { - "ramMin": 32000, - "class": "ResourceRequirement" - } - ] - }, - { - "inputs": [ - { - "type": [ - { - "items": [ - "null", - "File" - ], - "type": "array" - } - ], - "id": "#VDJ_GatherCalls.cwl/theCalls" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "doc": "VDJ_GatherCalls collect the outputs from the multi-processed VDJ step into one file.\n", - "id": "#VDJ_GatherCalls.cwl", - "steps": [ - { - "out": [ - "#VDJ_GatherCalls.cwl/VDJ_GatherCalls/gatheredCalls" - ], - "run": { - "cwlVersion": "v1.0", - "inputs": [ - { - "type": [ - { - "items": [ - "null", - "File" - ], - "type": "array" - } - ], - "id": "#VDJ_GatherCalls.cwl/VDJ_GatherCalls/gather_PyIR/theCalls" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - }, - { - "class": "ShellCommandRequirement" - } - ], - "outputs": [ - { - "outputBinding": { - "glob": "*_constant_region_called_pruned.csv.gz", - "outputEval": "${\n if (self.size == 0) {\n throw(\"No outputs from PyIR detected in VDJ_GatherCalls!\");\n } else {\n return(self);\n }\n}" - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_GatherCalls.cwl/VDJ_GatherCalls/gather_PyIR/gatheredCalls" - } - ], - "class": "CommandLineTool", - "arguments": [ - { - "shellQuote": false, - "valueFrom": "${\n if (!inputs.theCalls[0] ) {\n return (\"echo \\\"No outputs from PyIR detected in VDJ_GatherCalls\\\"\")\n }\n var inputFiles = \"\"\n if (!inputs.theCalls[0].path.split(\"_PrunePyIR\")[1]){\n inputFiles = \"zcat\"\n for (var i = 0; i < inputs.theCalls.length; i++) {\n inputFiles += \" \" + inputs.theCalls[i].path\n }\n inputFiles += \" | \"\n } else {\n inputFiles = \"zcat \" + inputs.theCalls[0].path.split(\"VDJ\")[0] + \"*\" + inputs.theCalls[0].path.split(\"_PrunePyIR\")[1].split(\"_Number_\")[0] + \"_Number_*.csv.gz | \"\n }\n var outputFileName = \"\\\"gzip > \" + inputs.theCalls[0].nameroot.split(\"_Number_\")[0] + \"_constant_region_called_pruned.csv.gz\" + \"\\\"\"\n var awkCommand = \"awk \\'NR==1{F=$1;print | \" + outputFileName + \" } $1!=F { print | \" + outputFileName + \" }\\' \"\n var outputCommand = inputFiles + awkCommand\n return (outputCommand)\n}" - } - ], - "id": "#VDJ_GatherCalls.cwl/VDJ_GatherCalls/gather_PyIR" - }, - "id": "#VDJ_GatherCalls.cwl/VDJ_GatherCalls", - "in": [ - { - "source": "#VDJ_GatherCalls.cwl/theCalls", - "id": "#VDJ_GatherCalls.cwl/VDJ_GatherCalls/theCalls" - } - ] - } - ], - "outputs": [ - { - "outputSource": "#VDJ_GatherCalls.cwl/VDJ_GatherCalls/gatheredCalls", - "type": [ - "null", - "File" - ], - "id": "#VDJ_GatherCalls.cwl/gatheredCalls" - } - ], - "class": "Workflow" - }, - { - "inputs": [ - { - "type": [ - "null", - "File" - ], - "id": "#VDJ_Preprocess_Reads.cwl/Valid_Reads_Fastq" - }, - { - "type": [ - "null", - "int" - ], - "id": "#VDJ_Preprocess_Reads.cwl/num_valid_reads" - }, - { - "type": "string", - "id": "#VDJ_Preprocess_Reads.cwl/vdj_type" - } - ], - "requirements": [ - { - "class": "SubworkflowFeatureRequirement" - }, - { - "class": "InlineJavascriptRequirement" - }, - { - "envDef": [ - { - "envName": "CORES_ALLOCATED_PER_CWL_PROCESS", - "envValue": "8" - } - ], - "class": "EnvVarRequirement" - } - ], - "outputs": [ - { - "outputSource": "#VDJ_Preprocess_Reads.cwl/VDJ_RSEC_Reads/RSEC_Reads_Fastq", - "type": [ - { - "items": [ - "null", - "File" - ], - "type": "array" - } - ], - "id": "#VDJ_Preprocess_Reads.cwl/RSEC_Reads_Fastq" - }, - { - "type": [ - "null", - "int" - ], - "outputSource": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/num_cores", - "id": "#VDJ_Preprocess_Reads.cwl/num_cores" - }, - { - "type": [ - "null", - "int" - ], - "outputSource": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/num_splits", - "id": "#VDJ_Preprocess_Reads.cwl/num_splits" - } - ], - "class": "Workflow", - "steps": [ - { - "run": "#VDJ_RSEC_Reads.cwl", - "out": [ - "#VDJ_Preprocess_Reads.cwl/VDJ_RSEC_Reads/RSEC_Reads_Fastq" - ], - "requirements": [ - { - "coresMin": 8, - "ramMin": "${ var est_ram = 0.0006 * parseInt(inputs.num_valid_reads) + 2000; var buffer = 1.25; est_ram *= buffer; if (est_ram < 2000) return 2000; if (est_ram > 370000) return 370000; return parseInt(est_ram); }", - "class": "ResourceRequirement" - } - ], - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_RSEC_Reads", - "in": [ - { - "source": "#VDJ_Preprocess_Reads.cwl/VDJ_Trim_Reads/Valid_Reads", - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_RSEC_Reads/Valid_Reads" - }, - { - "source": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/num_splits", - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_RSEC_Reads/num_splits" - }, - { - "source": "#VDJ_Preprocess_Reads.cwl/num_valid_reads", - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_RSEC_Reads/num_valid_reads" - } - ] - }, - { - "out": [ - "#VDJ_Preprocess_Reads.cwl/VDJ_Trim_Reads/Valid_Reads", - "#VDJ_Preprocess_Reads.cwl/VDJ_Trim_Reads/Trim_Report" - ], - "in": [ - { - "source": "#VDJ_Preprocess_Reads.cwl/Valid_Reads_Fastq", - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_Trim_Reads/Valid_Reads_Fastq" - } - ], - "run": "#VDJ_Trim_Reads.cwl", - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_Trim_Reads", - "hints": [ - { - "coresMin": 8, - "class": "ResourceRequirement" - } - ] - }, - { - "out": [ - "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/num_splits", - "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/num_cores" - ], - "run": { - "cwlVersion": "v1.0", - "inputs": [ - { - "type": [ - "null", - "int" - ], - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/determine_num_splits/num_valid_reads" - }, - { - "type": "string", - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/determine_num_splits/vdj_type" - } - ], - "outputs": [ - { - "type": [ - "null", - "int" - ], - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/determine_num_splits/num_cores" - }, - { - "type": [ - "null", - "int" - ], - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/determine_num_splits/num_splits" - } - ], - "class": "ExpressionTool", - "expression": "${\n var ram_per_instance = 192 * 1024;\n var num_cores = 96;\n if (inputs.vdj_type == \"BCR\") {\n ram_per_instance = 144 * 1024;\n num_cores = 72;\n }\n var ram_per_split = 3200;\n var num_splits_per_instance = parseInt(ram_per_instance / ram_per_split);\n var num_splits = num_splits_per_instance;\n\n var num_reads = parseInt(inputs.num_valid_reads);\n if (num_reads != null) {\n if (num_reads > 100000000)\n num_splits = num_splits_per_instance * 2;\n num_cores = num_cores * 2;\n }\n\n return ({\"num_splits\": num_splits, \"num_cores\": num_cores});\n}", - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/determine_num_splits" - }, - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits", - "in": [ - { - "source": "#VDJ_Preprocess_Reads.cwl/num_valid_reads", - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/num_valid_reads" - }, - { - "source": "#VDJ_Preprocess_Reads.cwl/vdj_type", - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/vdj_type" - } - ] - } - ], - "id": "#VDJ_Preprocess_Reads.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "prefix": "--vdj-valid-reads", - "itemSeparator": "," - }, - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#VDJ_RSEC_Reads.cwl/Valid_Reads" - }, - { - "inputBinding": { - "prefix": "--num-splits" - }, - "type": [ - "null", - "int" - ], - "id": "#VDJ_RSEC_Reads.cwl/num_splits" - } - ], - "requirements": [ - - ], - "outputs": [ - { - "outputBinding": { - "glob": "*RSEC_Reads_Fastq_*.tar.gz" - }, - "type": [ - { - "items": [ - "null", - "File" - ], - "type": "array" - } - ], - "id": "#VDJ_RSEC_Reads.cwl/RSEC_Reads_Fastq" - } - ], - "baseCommand": "mist_vdj_rsec_reads.py", - "class": "CommandLineTool", - "id": "#VDJ_RSEC_Reads.cwl" - }, - { - "inputs": [ - { - "type": [ - "null", - "Any" - ], - "id": "#VDJ_Settings.cwl/_VDJ_Version" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "outputs": [ - { - "type": [ - "null", - "float" - ], - "id": "#VDJ_Settings.cwl/VDJ_JGene_Evalue" - }, - { - "type": [ - "null", - "float" - ], - "id": "#VDJ_Settings.cwl/VDJ_VGene_Evalue" - }, - { - "type": [ - "null", - "string" - ], - "id": "#VDJ_Settings.cwl/VDJ_Version" - } - ], - "class": "ExpressionTool", - "expression": "${\n var vdjVersion = null;\n if (!inputs._VDJ_Version) {\n vdjVersion = null;}\n else {\n var _VDJ_Version = inputs._VDJ_Version.toLowerCase();\n if (_VDJ_Version === \"human\" || _VDJ_Version === \"hs\" || _VDJ_Version === \"human vdj - bcr and tcr\") {\n vdjVersion = \"human\";\n } else if (_VDJ_Version === \"humanbcr\" || _VDJ_Version === \"human vdj - bcr only\") {\n vdjVersion = \"humanBCR\";\n } else if (_VDJ_Version === \"humantcr\" || _VDJ_Version === \"human vdj - tcr only\") {\n vdjVersion = \"humanTCR\";\n } else if (_VDJ_Version === \"mouse\" || _VDJ_Version === \"mm\" || _VDJ_Version === \"mouse vdj - bcr and tcr\") {\n vdjVersion = \"mouse\";\n } else if (_VDJ_Version === \"mousebcr\" || _VDJ_Version === \"mouse vdj - bcr only\") {\n vdjVersion = \"mouseBCR\";\n } else if (_VDJ_Version === \"mousetcr\" || _VDJ_Version === \"mouse vdj - tcr only\") {\n vdjVersion = \"mouseTCR\";\n } else {\n vdjVersion = inputs._VDJ_Version;\n }\n }\n\n return ({\n VDJ_Version: vdjVersion,\n })\n}", - "id": "#VDJ_Settings.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "position": 1 - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_Trim_Reads.cwl/Valid_Reads_Fastq" - } - ], - "requirements": [ - - ], - "outputs": [ - { - "outputBinding": { - "glob": "cutadapt.log" - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_Trim_Reads.cwl/Trim_Report" - }, - { - "outputBinding": { - "glob": "*vdjtxt.gz" - }, - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#VDJ_Trim_Reads.cwl/Valid_Reads" - } - ], - "baseCommand": "VDJ_Trim_Reads.sh", - "class": "CommandLineTool", - "id": "#VDJ_Trim_Reads.cwl" - }, - { - "inputs": [], - "requirements": [ - - ], - "stdout": "output.txt", - "outputs": [ - { - "outputBinding": { - "glob": "output.txt", - "loadContents": true, - "outputEval": "$(self[0].contents)" - }, - "type": "string", - "id": "#Version.cwl/version" - } - ], - "baseCommand": [ - "mist_version.py" - ], - "id": "#Version.cwl", - "class": "CommandLineTool" - } - ], - "$namespaces": { - "sbg": "https://sevenbridges.com#", - "arv": "http://arvados.org/cwl#" - } -} \ No newline at end of file diff --git a/target/docker/mapping/bd_rhapsody/setup_logger.py b/target/docker/mapping/bd_rhapsody/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/docker/mapping/bd_rhapsody/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/docker/mapping/cellranger_count/.config.vsh.yaml b/target/docker/mapping/cellranger_count/.config.vsh.yaml deleted file mode 100644 index ade2b61c89d..00000000000 --- a/target/docker/mapping/cellranger_count/.config.vsh.yaml +++ /dev/null @@ -1,266 +0,0 @@ -functionality: - name: "cellranger_count" - namespace: "mapping" - version: "0.12.3" - authors: - - name: "Angela Oliveira Pisco" - roles: - - "author" - info: - role: "Contributor" - links: - github: "aopisco" - orcid: "0000-0003-0142-2355" - linkedin: "aopisco" - organizations: - - name: "Insitro" - href: "https://insitro.com" - role: "Director of Computational Biology" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - - name: "Samuel D'Souza" - roles: - - "author" - info: - role: "Contributor" - links: - github: "srdsam" - linkedin: "samuel-d-souza-887023150/" - organizations: - - name: "Chan Zuckerberg Biohub" - href: "https://www.czbiohub.org" - role: "Data Engineer" - - name: "Robrecht Cannoodt" - roles: - - "author" - - "maintainer" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - argument_groups: - - name: "Inputs" - arguments: - - type: "file" - name: "--input" - description: "The fastq.gz files to align. Can also be a single directory containing\ - \ fastq.gz files." - info: null - example: - - "sample_S1_L001_R1_001.fastq.gz" - - "sample_S1_L001_R2_001.fastq.gz" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "file" - name: "--reference" - description: "The path to Cell Ranger reference tar.gz file. Can also be a directory." - info: null - example: - - "reference.tar.gz" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Outputs" - arguments: - - type: "file" - name: "--output" - description: "The folder to store the alignment results." - info: null - example: - - "/path/to/output" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Arguments" - arguments: - - type: "integer" - name: "--expect_cells" - description: "Expected number of recovered cells, used as input to cell calling\ - \ algorithm." - info: null - example: - - 3000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--chemistry" - description: "Assay configuration.\n- auto: autodetect mode\n- threeprime: Single\ - \ Cell 3'\n- fiveprime: Single Cell 5'\n- SC3Pv1: Single Cell 3' v1\n- SC3Pv2:\ - \ Single Cell 3' v2\n- SC3Pv3: Single Cell 3' v3\n- SC3Pv3LT: Single Cell\ - \ 3' v3 LT\n- SC3Pv3HT: Single Cell 3' v3 HT\n- SC5P-PE: Single Cell 5' paired-end\n\ - - SC5P-R2: Single Cell 5' R2-only\n- SC-FB: Single Cell Antibody-only 3' v2\ - \ or 5'\nSee https://kb.10xgenomics.com/hc/en-us/articles/115003764132-How-does-Cell-Ranger-auto-detect-chemistry-\ - \ for more information.\n" - info: null - default: - - "auto" - required: false - choices: - - "auto" - - "threeprime" - - "fiveprime" - - "SC3Pv1" - - "SC3Pv2" - - "SC3Pv3" - - "SC3Pv3LT" - - "SC3Pv3HT" - - "SC5P-PE" - - "SC5P-R2" - - "SC-FB" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean" - name: "--secondary_analysis" - description: "Whether or not to run the secondary analysis e.g. clustering." - info: null - default: - - false - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean" - name: "--generate_bam" - description: "Whether to generate a BAM file." - info: null - default: - - true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean" - name: "--include_introns" - description: "Include intronic reads in count (default=true unless --target-panel\ - \ is specified in which case default=false)" - info: null - default: - - true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "bash_script" - path: "script.sh" - is_executable: true - description: "Align fastq files using Cell Ranger count." - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/cellranger_tiny_fastq" - - type: "file" - path: "src/utils/setup_logger.py" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "ghcr.io/data-intuitive/cellranger:7.0" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "docker" - run: - - "apt update && apt upgrade -y" - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highmem" - - "highcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/mapping/cellranger_count/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/mapping/cellranger_count" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/mapping/cellranger_count/cellranger_count" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/mapping/cellranger_count/cellranger_count b/target/docker/mapping/cellranger_count/cellranger_count deleted file mode 100755 index 45bd87d3a16..00000000000 --- a/target/docker/mapping/cellranger_count/cellranger_count +++ /dev/null @@ -1,1206 +0,0 @@ -#!/usr/bin/env bash - -# cellranger_count 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Angela Oliveira Pisco (author) -# * Samuel D'Souza (author) -# * Robrecht Cannoodt (author, maintainer) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="cellranger_count" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "cellranger_count 0.12.3" - echo "" - echo "Align fastq files using Cell Ranger count." - echo "" - echo "Inputs:" - echo " --input" - echo " type: file, required parameter, multiple values allowed, file must exist" - echo " example: sample_S1_L001_R1_001.fastq.gz;sample_S1_L001_R2_001.fastq.gz" - echo " The fastq.gz files to align. Can also be a single directory containing" - echo " fastq.gz files." - echo "" - echo " --reference" - echo " type: file, required parameter, file must exist" - echo " example: reference.tar.gz" - echo " The path to Cell Ranger reference tar.gz file. Can also be a directory." - echo "" - echo "Outputs:" - echo " --output" - echo " type: file, required parameter, output, file must exist" - echo " example: /path/to/output" - echo " The folder to store the alignment results." - echo "" - echo "Arguments:" - echo " --expect_cells" - echo " type: integer" - echo " example: 3000" - echo " Expected number of recovered cells, used as input to cell calling" - echo " algorithm." - echo "" - echo " --chemistry" - echo " type: string" - echo " default: auto" - echo " choices: [ auto, threeprime, fiveprime, SC3Pv1, SC3Pv2, SC3Pv3," - echo "SC3Pv3LT, SC3Pv3HT, SC5P-PE, SC5P-R2, SC-FB ]" - echo " Assay configuration." - echo " - auto: autodetect mode" - echo " - threeprime: Single Cell 3'" - echo " - fiveprime: Single Cell 5'" - echo " - SC3Pv1: Single Cell 3' v1" - echo " - SC3Pv2: Single Cell 3' v2" - echo " - SC3Pv3: Single Cell 3' v3" - echo " - SC3Pv3LT: Single Cell 3' v3 LT" - echo " - SC3Pv3HT: Single Cell 3' v3 HT" - echo " - SC5P-PE: Single Cell 5' paired-end" - echo " - SC5P-R2: Single Cell 5' R2-only" - echo " - SC-FB: Single Cell Antibody-only 3' v2 or 5'" - echo " See" - echo " " - echo "https://kb.10xgenomics.com/hc/en-us/articles/115003764132-How-does-Cell-Ranger-auto-detect-chemistry-" - echo " for more information." - echo "" - echo " --secondary_analysis" - echo " type: boolean" - echo " default: false" - echo " Whether or not to run the secondary analysis e.g. clustering." - echo "" - echo " --generate_bam" - echo " type: boolean" - echo " default: true" - echo " Whether to generate a BAM file." - echo "" - echo " --include_introns" - echo " type: boolean" - echo " default: true" - echo " Include intronic reads in count (default=true unless --target-panel is" - echo " specified in which case default=false)" -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM ghcr.io/data-intuitive/cellranger:7.0 - -ENTRYPOINT [] - - -RUN apt update && apt upgrade -y -LABEL org.opencontainers.image.authors="Angela Oliveira Pisco, Samuel D'Souza, Robrecht Cannoodt" -LABEL org.opencontainers.image.description="Companion container for running component mapping cellranger_count" -LABEL org.opencontainers.image.created="2024-01-25T10:13:57Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-cellranger_count-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "cellranger_count 0.12.3" - exit - ;; - --input) - if [ -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT="$2" - else - VIASH_PAR_INPUT="$VIASH_PAR_INPUT;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - if [ -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - else - VIASH_PAR_INPUT="$VIASH_PAR_INPUT;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --reference) - [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reference=*) - [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference=*\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --expect_cells) - [ -n "$VIASH_PAR_EXPECT_CELLS" ] && ViashError Bad arguments for option \'--expect_cells\': \'$VIASH_PAR_EXPECT_CELLS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EXPECT_CELLS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --expect_cells. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --expect_cells=*) - [ -n "$VIASH_PAR_EXPECT_CELLS" ] && ViashError Bad arguments for option \'--expect_cells=*\': \'$VIASH_PAR_EXPECT_CELLS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EXPECT_CELLS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --chemistry) - [ -n "$VIASH_PAR_CHEMISTRY" ] && ViashError Bad arguments for option \'--chemistry\': \'$VIASH_PAR_CHEMISTRY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHEMISTRY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --chemistry. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --chemistry=*) - [ -n "$VIASH_PAR_CHEMISTRY" ] && ViashError Bad arguments for option \'--chemistry=*\': \'$VIASH_PAR_CHEMISTRY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHEMISTRY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --secondary_analysis) - [ -n "$VIASH_PAR_SECONDARY_ANALYSIS" ] && ViashError Bad arguments for option \'--secondary_analysis\': \'$VIASH_PAR_SECONDARY_ANALYSIS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SECONDARY_ANALYSIS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --secondary_analysis. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --secondary_analysis=*) - [ -n "$VIASH_PAR_SECONDARY_ANALYSIS" ] && ViashError Bad arguments for option \'--secondary_analysis=*\': \'$VIASH_PAR_SECONDARY_ANALYSIS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SECONDARY_ANALYSIS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --generate_bam) - [ -n "$VIASH_PAR_GENERATE_BAM" ] && ViashError Bad arguments for option \'--generate_bam\': \'$VIASH_PAR_GENERATE_BAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_GENERATE_BAM="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --generate_bam. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --generate_bam=*) - [ -n "$VIASH_PAR_GENERATE_BAM" ] && ViashError Bad arguments for option \'--generate_bam=*\': \'$VIASH_PAR_GENERATE_BAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_GENERATE_BAM=$(ViashRemoveFlags "$1") - shift 1 - ;; - --include_introns) - [ -n "$VIASH_PAR_INCLUDE_INTRONS" ] && ViashError Bad arguments for option \'--include_introns\': \'$VIASH_PAR_INCLUDE_INTRONS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INCLUDE_INTRONS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --include_introns. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --include_introns=*) - [ -n "$VIASH_PAR_INCLUDE_INTRONS" ] && ViashError Bad arguments for option \'--include_introns=*\': \'$VIASH_PAR_INCLUDE_INTRONS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INCLUDE_INTRONS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/mapping_cellranger_count:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/mapping_cellranger_count:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/mapping_cellranger_count:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/mapping_cellranger_count:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_REFERENCE+x} ]; then - ViashError '--reference' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_CHEMISTRY+x} ]; then - VIASH_PAR_CHEMISTRY="auto" -fi -if [ -z ${VIASH_PAR_SECONDARY_ANALYSIS+x} ]; then - VIASH_PAR_SECONDARY_ANALYSIS="false" -fi -if [ -z ${VIASH_PAR_GENERATE_BAM+x} ]; then - VIASH_PAR_GENERATE_BAM="true" -fi -if [ -z ${VIASH_PAR_INCLUDE_INTRONS+x} ]; then - VIASH_PAR_INCLUDE_INTRONS="true" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ]; then - IFS=';' - set -f - for file in $VIASH_PAR_INPUT; do - unset IFS - if [ ! -e "$file" ]; then - ViashError "Input file '$file' does not exist." - exit 1 - fi - done - set +f -fi -if [ ! -z "$VIASH_PAR_REFERENCE" ] && [ ! -e "$VIASH_PAR_REFERENCE" ]; then - ViashError "Input file '$VIASH_PAR_REFERENCE' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_EXPECT_CELLS" ]]; then - if ! [[ "$VIASH_PAR_EXPECT_CELLS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--expect_cells' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SECONDARY_ANALYSIS" ]]; then - if ! [[ "$VIASH_PAR_SECONDARY_ANALYSIS" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--secondary_analysis' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_GENERATE_BAM" ]]; then - if ! [[ "$VIASH_PAR_GENERATE_BAM" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--generate_bam' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_INCLUDE_INTRONS" ]]; then - if ! [[ "$VIASH_PAR_INCLUDE_INTRONS" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--include_introns' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_CHEMISTRY" ]; then - VIASH_PAR_CHEMISTRY_CHOICES=("auto:threeprime:fiveprime:SC3Pv1:SC3Pv2:SC3Pv3:SC3Pv3LT:SC3Pv3HT:SC5P-PE:SC5P-R2:SC-FB") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_CHEMISTRY_CHOICES[*]}:" =~ ":$VIASH_PAR_CHEMISTRY:" ]]; then - ViashError '--chemistry' specified value of \'$VIASH_PAR_CHEMISTRY\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_TEST_INPUT=() - IFS=';' - for var in $VIASH_PAR_INPUT; do - unset IFS - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$var")" ) - var=$(ViashAutodetectMount "$var") - VIASH_TEST_INPUT+=( "$var" ) - done - VIASH_PAR_INPUT=$(IFS=';' ; echo "${VIASH_TEST_INPUT[*]}") -fi -if [ ! -z "$VIASH_PAR_REFERENCE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_REFERENCE")" ) - VIASH_PAR_REFERENCE=$(ViashAutodetectMount "$VIASH_PAR_REFERENCE") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/mapping_cellranger_count:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/mapping_cellranger_count:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/mapping_cellranger_count:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-cellranger_count-XXXXXX").sh -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -#!/bin/bash - -set -eo pipefail - -## VIASH START -# The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) -$( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "${VIASH_PAR_REFERENCE}" | sed "s#'#'\"'\"'#g;s#.*#par_reference='&'#" ; else echo "# par_reference="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) -$( if [ ! -z ${VIASH_PAR_EXPECT_CELLS+x} ]; then echo "${VIASH_PAR_EXPECT_CELLS}" | sed "s#'#'\"'\"'#g;s#.*#par_expect_cells='&'#" ; else echo "# par_expect_cells="; fi ) -$( if [ ! -z ${VIASH_PAR_CHEMISTRY+x} ]; then echo "${VIASH_PAR_CHEMISTRY}" | sed "s#'#'\"'\"'#g;s#.*#par_chemistry='&'#" ; else echo "# par_chemistry="; fi ) -$( if [ ! -z ${VIASH_PAR_SECONDARY_ANALYSIS+x} ]; then echo "${VIASH_PAR_SECONDARY_ANALYSIS}" | sed "s#'#'\"'\"'#g;s#.*#par_secondary_analysis='&'#" ; else echo "# par_secondary_analysis="; fi ) -$( if [ ! -z ${VIASH_PAR_GENERATE_BAM+x} ]; then echo "${VIASH_PAR_GENERATE_BAM}" | sed "s#'#'\"'\"'#g;s#.*#par_generate_bam='&'#" ; else echo "# par_generate_bam="; fi ) -$( if [ ! -z ${VIASH_PAR_INCLUDE_INTRONS+x} ]; then echo "${VIASH_PAR_INCLUDE_INTRONS}" | sed "s#'#'\"'\"'#g;s#.*#par_include_introns='&'#" ; else echo "# par_include_introns="; fi ) -$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) -$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) -$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) -$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) -$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) -$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) - -## VIASH END - -# just to make sure paths are absolute -par_reference=\`realpath \$par_reference\` -par_output=\`realpath \$par_output\` - -# create temporary directory -tmpdir=\$(mktemp -d "\$meta_temp_dir/\$meta_functionality_name-XXXXXXXX") -function clean_up { - rm -rf "\$tmpdir" -} -trap clean_up EXIT - -# process inputs -# for every fastq file found, make a symlink into the tempdir -fastq_dir="\$tmpdir/fastqs" -mkdir -p "\$fastq_dir" -IFS=";" -for var in \$par_input; do - unset IFS - abs_path=\`realpath \$var\` - if [ -d "\$abs_path" ]; then - find "\$abs_path" -name *.fastq.gz -exec ln -s {} "\$fastq_dir" \\; - else - ln -s "\$abs_path" "\$fastq_dir" - fi -done - -# process reference -if file \$par_reference | grep -q 'gzip compressed data'; then - echo "Untarring genome" - reference_dir="\$tmpdir/fastqs" - mkdir -p "\$reference_dir" - tar -xvf "\$par_reference" -C "\$reference_dir" --strip-components=1 - par_reference="\$reference_dir" -fi - -# cd into tempdir -cd "\$tmpdir" - -# add additional params -extra_params=( ) - -if [ ! -z "\$meta_cpus" ]; then - extra_params+=( "--localcores=\$meta_cpus" ) -fi -if [ ! -z "\$meta_memory_gb" ]; then - # always keep 2gb for the OS itself - memory_gb=\`python -c "print(int('\$meta_memory_gb') - 2)"\` - extra_params+=( "--localmem=\$memory_gb" ) -fi -if [ ! -z "\$par_expect_cells" ]; then - extra_params+=( "--expect-cells=\$par_expect_cells" ) -fi -if [ ! -z "\$par_chemistry" ]; then - extra_params+=( "--chemistry=\$par_chemistry" ) -fi -if [ "\$par_secondary_analysis" == "false" ]; then - extra_params+=( "--nosecondary" ) -fi -if [ "\$par_generate_bam" == "false" ]; then - extra_params+=( "--no-bam" ) -fi -echo "Running cellranger count" - - -id=myoutput -cellranger count \\ - --id "\$id" \\ - --fastqs "\$fastq_dir" \\ - --transcriptome "\$par_reference" \\ - --include-introns "\$par_include_introns" \\ - "\${extra_params[@]}" \\ - --disable-ui \\ - -echo "Copying output" -if [ -d "\$id/outs/" ]; then - if [ ! -d "\$par_output" ]; then - mkdir -p "\$par_output" - fi - mv "\$id/outs/"* "\$par_output" -fi -VIASHMAIN -bash "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - unset VIASH_TEST_INPUT - IFS=';' - for var in $VIASH_PAR_INPUT; do - unset IFS - if [ -z "$VIASH_TEST_INPUT" ]; then - VIASH_TEST_INPUT="$(ViashStripAutomount "$var")" - else - VIASH_TEST_INPUT="$VIASH_TEST_INPUT;""$(ViashStripAutomount "$var")" - fi - done - VIASH_PAR_INPUT="$VIASH_TEST_INPUT" -fi -if [ ! -z "$VIASH_PAR_REFERENCE" ]; then - VIASH_PAR_REFERENCE=$(ViashStripAutomount "$VIASH_PAR_REFERENCE") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/mapping/cellranger_count_split/.config.vsh.yaml b/target/docker/mapping/cellranger_count_split/.config.vsh.yaml deleted file mode 100644 index 18b8445f5cf..00000000000 --- a/target/docker/mapping/cellranger_count_split/.config.vsh.yaml +++ /dev/null @@ -1,218 +0,0 @@ -functionality: - name: "cellranger_count_split" - namespace: "mapping" - version: "0.12.3" - authors: - - name: "Angela Oliveira Pisco" - roles: - - "author" - info: - role: "Contributor" - links: - github: "aopisco" - orcid: "0000-0003-0142-2355" - linkedin: "aopisco" - organizations: - - name: "Insitro" - href: "https://insitro.com" - role: "Director of Computational Biology" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - - name: "Samuel D'Souza" - roles: - - "author" - info: - role: "Contributor" - links: - github: "srdsam" - linkedin: "samuel-d-souza-887023150/" - organizations: - - name: "Chan Zuckerberg Biohub" - href: "https://www.czbiohub.org" - role: "Data Engineer" - - name: "Robrecht Cannoodt" - roles: - - "author" - - "maintainer" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - arguments: - - type: "file" - name: "--input" - description: "Output directory from a Cell Ranger count run." - info: null - example: - - "input_dir" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--filtered_h5" - info: null - example: - - "filtered_feature_bc_matrix.h5" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--metrics_summary" - info: null - example: - - "metrics_summary.csv" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--molecule_info" - info: null - example: - - "molecule_info.h5" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--bam" - info: null - example: - - "possorted_genome_bam.bam" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--bai" - info: null - example: - - "possorted_genome_bam.bam.bai" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--raw_h5" - info: null - example: - - "raw_feature_bc_matrix.h5" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "bash_script" - path: "script.sh" - is_executable: true - description: "Split 10x Cell Ranger output directory into separate output fields." - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "ubuntu:jammy" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "docker" - run: - - "apt update && apt upgrade -y" - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/mapping/cellranger_count_split/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/mapping/cellranger_count_split" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/mapping/cellranger_count_split/cellranger_count_split" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/mapping/cellranger_count_split/cellranger_count_split b/target/docker/mapping/cellranger_count_split/cellranger_count_split deleted file mode 100755 index 2efd9a25590..00000000000 --- a/target/docker/mapping/cellranger_count_split/cellranger_count_split +++ /dev/null @@ -1,1090 +0,0 @@ -#!/usr/bin/env bash - -# cellranger_count_split 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Angela Oliveira Pisco (author) -# * Samuel D'Souza (author) -# * Robrecht Cannoodt (author, maintainer) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="cellranger_count_split" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "cellranger_count_split 0.12.3" - echo "" - echo "Split 10x Cell Ranger output directory into separate output fields." - echo "" - echo "Arguments:" - echo " --input" - echo " type: file, required parameter, file must exist" - echo " example: input_dir" - echo " Output directory from a Cell Ranger count run." - echo "" - echo " --filtered_h5" - echo " type: file, output, file must exist" - echo " example: filtered_feature_bc_matrix.h5" - echo "" - echo " --metrics_summary" - echo " type: file, output, file must exist" - echo " example: metrics_summary.csv" - echo "" - echo " --molecule_info" - echo " type: file, output, file must exist" - echo " example: molecule_info.h5" - echo "" - echo " --bam" - echo " type: file, output, file must exist" - echo " example: possorted_genome_bam.bam" - echo "" - echo " --bai" - echo " type: file, output, file must exist" - echo " example: possorted_genome_bam.bam.bai" - echo "" - echo " --raw_h5" - echo " type: file, output, file must exist" - echo " example: raw_feature_bc_matrix.h5" -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM ubuntu:jammy - -ENTRYPOINT [] - - -RUN apt update && apt upgrade -y -LABEL org.opencontainers.image.authors="Angela Oliveira Pisco, Samuel D'Souza, Robrecht Cannoodt" -LABEL org.opencontainers.image.description="Companion container for running component mapping cellranger_count_split" -LABEL org.opencontainers.image.created="2024-01-25T10:13:55Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-cellranger_count_split-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "cellranger_count_split 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --filtered_h5) - [ -n "$VIASH_PAR_FILTERED_H5" ] && ViashError Bad arguments for option \'--filtered_h5\': \'$VIASH_PAR_FILTERED_H5\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_FILTERED_H5="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --filtered_h5. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --filtered_h5=*) - [ -n "$VIASH_PAR_FILTERED_H5" ] && ViashError Bad arguments for option \'--filtered_h5=*\': \'$VIASH_PAR_FILTERED_H5\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_FILTERED_H5=$(ViashRemoveFlags "$1") - shift 1 - ;; - --metrics_summary) - [ -n "$VIASH_PAR_METRICS_SUMMARY" ] && ViashError Bad arguments for option \'--metrics_summary\': \'$VIASH_PAR_METRICS_SUMMARY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_METRICS_SUMMARY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --metrics_summary. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --metrics_summary=*) - [ -n "$VIASH_PAR_METRICS_SUMMARY" ] && ViashError Bad arguments for option \'--metrics_summary=*\': \'$VIASH_PAR_METRICS_SUMMARY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_METRICS_SUMMARY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --molecule_info) - [ -n "$VIASH_PAR_MOLECULE_INFO" ] && ViashError Bad arguments for option \'--molecule_info\': \'$VIASH_PAR_MOLECULE_INFO\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MOLECULE_INFO="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --molecule_info. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --molecule_info=*) - [ -n "$VIASH_PAR_MOLECULE_INFO" ] && ViashError Bad arguments for option \'--molecule_info=*\': \'$VIASH_PAR_MOLECULE_INFO\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MOLECULE_INFO=$(ViashRemoveFlags "$1") - shift 1 - ;; - --bam) - [ -n "$VIASH_PAR_BAM" ] && ViashError Bad arguments for option \'--bam\': \'$VIASH_PAR_BAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BAM="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --bam. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --bam=*) - [ -n "$VIASH_PAR_BAM" ] && ViashError Bad arguments for option \'--bam=*\': \'$VIASH_PAR_BAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BAM=$(ViashRemoveFlags "$1") - shift 1 - ;; - --bai) - [ -n "$VIASH_PAR_BAI" ] && ViashError Bad arguments for option \'--bai\': \'$VIASH_PAR_BAI\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BAI="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --bai. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --bai=*) - [ -n "$VIASH_PAR_BAI" ] && ViashError Bad arguments for option \'--bai=*\': \'$VIASH_PAR_BAI\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BAI=$(ViashRemoveFlags "$1") - shift 1 - ;; - --raw_h5) - [ -n "$VIASH_PAR_RAW_H5" ] && ViashError Bad arguments for option \'--raw_h5\': \'$VIASH_PAR_RAW_H5\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_RAW_H5="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --raw_h5. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --raw_h5=*) - [ -n "$VIASH_PAR_RAW_H5" ] && ViashError Bad arguments for option \'--raw_h5=*\': \'$VIASH_PAR_RAW_H5\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_RAW_H5=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/mapping_cellranger_count_split:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/mapping_cellranger_count_split:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/mapping_cellranger_count_split:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/mapping_cellranger_count_split:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_FILTERED_H5" ] && [ ! -d "$(dirname "$VIASH_PAR_FILTERED_H5")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_FILTERED_H5")" -fi -if [ ! -z "$VIASH_PAR_METRICS_SUMMARY" ] && [ ! -d "$(dirname "$VIASH_PAR_METRICS_SUMMARY")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_METRICS_SUMMARY")" -fi -if [ ! -z "$VIASH_PAR_MOLECULE_INFO" ] && [ ! -d "$(dirname "$VIASH_PAR_MOLECULE_INFO")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_MOLECULE_INFO")" -fi -if [ ! -z "$VIASH_PAR_BAM" ] && [ ! -d "$(dirname "$VIASH_PAR_BAM")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_BAM")" -fi -if [ ! -z "$VIASH_PAR_BAI" ] && [ ! -d "$(dirname "$VIASH_PAR_BAI")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_BAI")" -fi -if [ ! -z "$VIASH_PAR_RAW_H5" ] && [ ! -d "$(dirname "$VIASH_PAR_RAW_H5")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_RAW_H5")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_FILTERED_H5" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_FILTERED_H5")" ) - VIASH_PAR_FILTERED_H5=$(ViashAutodetectMount "$VIASH_PAR_FILTERED_H5") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_FILTERED_H5" ) -fi -if [ ! -z "$VIASH_PAR_METRICS_SUMMARY" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_METRICS_SUMMARY")" ) - VIASH_PAR_METRICS_SUMMARY=$(ViashAutodetectMount "$VIASH_PAR_METRICS_SUMMARY") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_METRICS_SUMMARY" ) -fi -if [ ! -z "$VIASH_PAR_MOLECULE_INFO" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_MOLECULE_INFO")" ) - VIASH_PAR_MOLECULE_INFO=$(ViashAutodetectMount "$VIASH_PAR_MOLECULE_INFO") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_MOLECULE_INFO" ) -fi -if [ ! -z "$VIASH_PAR_BAM" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_BAM")" ) - VIASH_PAR_BAM=$(ViashAutodetectMount "$VIASH_PAR_BAM") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_BAM" ) -fi -if [ ! -z "$VIASH_PAR_BAI" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_BAI")" ) - VIASH_PAR_BAI=$(ViashAutodetectMount "$VIASH_PAR_BAI") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_BAI" ) -fi -if [ ! -z "$VIASH_PAR_RAW_H5" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_RAW_H5")" ) - VIASH_PAR_RAW_H5=$(ViashAutodetectMount "$VIASH_PAR_RAW_H5") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_RAW_H5" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/mapping_cellranger_count_split:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/mapping_cellranger_count_split:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/mapping_cellranger_count_split:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-cellranger_count_split-XXXXXX").sh -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -#!/bin/bash - -set -eo pipefail - -## VIASH START -# The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) -$( if [ ! -z ${VIASH_PAR_FILTERED_H5+x} ]; then echo "${VIASH_PAR_FILTERED_H5}" | sed "s#'#'\"'\"'#g;s#.*#par_filtered_h5='&'#" ; else echo "# par_filtered_h5="; fi ) -$( if [ ! -z ${VIASH_PAR_METRICS_SUMMARY+x} ]; then echo "${VIASH_PAR_METRICS_SUMMARY}" | sed "s#'#'\"'\"'#g;s#.*#par_metrics_summary='&'#" ; else echo "# par_metrics_summary="; fi ) -$( if [ ! -z ${VIASH_PAR_MOLECULE_INFO+x} ]; then echo "${VIASH_PAR_MOLECULE_INFO}" | sed "s#'#'\"'\"'#g;s#.*#par_molecule_info='&'#" ; else echo "# par_molecule_info="; fi ) -$( if [ ! -z ${VIASH_PAR_BAM+x} ]; then echo "${VIASH_PAR_BAM}" | sed "s#'#'\"'\"'#g;s#.*#par_bam='&'#" ; else echo "# par_bam="; fi ) -$( if [ ! -z ${VIASH_PAR_BAI+x} ]; then echo "${VIASH_PAR_BAI}" | sed "s#'#'\"'\"'#g;s#.*#par_bai='&'#" ; else echo "# par_bai="; fi ) -$( if [ ! -z ${VIASH_PAR_RAW_H5+x} ]; then echo "${VIASH_PAR_RAW_H5}" | sed "s#'#'\"'\"'#g;s#.*#par_raw_h5='&'#" ; else echo "# par_raw_h5="; fi ) -$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) -$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) -$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) -$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) -$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) -$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) - -## VIASH END - -filtered_h5="\$par_input/filtered_feature_bc_matrix.h5" -if [ -f "\$filtered_h5" ] && [ ! -z "\$par_filtered_h5" ]; then - echo "+ cp \$filtered_h5 \$par_filtered_h5" - cp "\$filtered_h5" "\$par_filtered_h5" -fi - -metrics_summary="\$par_input/metrics_summary.csv" -if [ -f "\$metrics_summary" ] && [ ! -z "\$par_metrics_summary" ]; then - echo "+ cp \$metrics_summary \$par_metrics_summary" - cp "\$metrics_summary" "\$par_metrics_summary" -fi - -molecule_info="\$par_input/molecule_info.h5" -if [ -f "\$molecule_info" ] && [ ! -z "\$par_molecule_info" ]; then - echo "+ cp \$molecule_info \$par_molecule_info" - cp "\$molecule_info" "\$par_molecule_info" -fi - -bam="\$par_input/possorted_genome_bam.bam" -if [ -f "\$bam" ] && [ ! -z "\$par_bam" ]; then - echo "cp \$bam \$par_bam" - cp "\$bam" "\$par_bam" -fi - -raw_h5="\$par_input/raw_feature_bc_matrix.h5" -if [ -f "\$raw_h5" ] && [ ! -z "\$par_raw_h5" ]; then - echo "+ cp \$raw_h5 \$par_raw_h5" - cp "\$raw_h5" "\$par_raw_h5" -fi - -bai="\$par_input/possorted_genome_bam.bam.bai" -if [ -f "\$bai" ] && [ ! -z "\$par_bai" ]; then - echo "+ cp \$bai \$par_bai" - cp "\$bai" "\$par_bai" -fi -VIASHMAIN -bash "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_FILTERED_H5" ]; then - VIASH_PAR_FILTERED_H5=$(ViashStripAutomount "$VIASH_PAR_FILTERED_H5") -fi -if [ ! -z "$VIASH_PAR_METRICS_SUMMARY" ]; then - VIASH_PAR_METRICS_SUMMARY=$(ViashStripAutomount "$VIASH_PAR_METRICS_SUMMARY") -fi -if [ ! -z "$VIASH_PAR_MOLECULE_INFO" ]; then - VIASH_PAR_MOLECULE_INFO=$(ViashStripAutomount "$VIASH_PAR_MOLECULE_INFO") -fi -if [ ! -z "$VIASH_PAR_BAM" ]; then - VIASH_PAR_BAM=$(ViashStripAutomount "$VIASH_PAR_BAM") -fi -if [ ! -z "$VIASH_PAR_BAI" ]; then - VIASH_PAR_BAI=$(ViashStripAutomount "$VIASH_PAR_BAI") -fi -if [ ! -z "$VIASH_PAR_RAW_H5" ]; then - VIASH_PAR_RAW_H5=$(ViashStripAutomount "$VIASH_PAR_RAW_H5") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_FILTERED_H5" ] && [ ! -e "$VIASH_PAR_FILTERED_H5" ]; then - ViashError "Output file '$VIASH_PAR_FILTERED_H5' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_METRICS_SUMMARY" ] && [ ! -e "$VIASH_PAR_METRICS_SUMMARY" ]; then - ViashError "Output file '$VIASH_PAR_METRICS_SUMMARY' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_MOLECULE_INFO" ] && [ ! -e "$VIASH_PAR_MOLECULE_INFO" ]; then - ViashError "Output file '$VIASH_PAR_MOLECULE_INFO' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_BAM" ] && [ ! -e "$VIASH_PAR_BAM" ]; then - ViashError "Output file '$VIASH_PAR_BAM' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_BAI" ] && [ ! -e "$VIASH_PAR_BAI" ]; then - ViashError "Output file '$VIASH_PAR_BAI' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_RAW_H5" ] && [ ! -e "$VIASH_PAR_RAW_H5" ]; then - ViashError "Output file '$VIASH_PAR_RAW_H5' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/mapping/cellranger_multi/.config.vsh.yaml b/target/docker/mapping/cellranger_multi/.config.vsh.yaml deleted file mode 100644 index 3829e2bcd8d..00000000000 --- a/target/docker/mapping/cellranger_multi/.config.vsh.yaml +++ /dev/null @@ -1,423 +0,0 @@ -functionality: - name: "cellranger_multi" - namespace: "mapping" - version: "0.12.3" - authors: - - name: "Angela Oliveira Pisco" - roles: - - "author" - info: - role: "Contributor" - links: - github: "aopisco" - orcid: "0000-0003-0142-2355" - linkedin: "aopisco" - organizations: - - name: "Insitro" - href: "https://insitro.com" - role: "Director of Computational Biology" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - - name: "Robrecht Cannoodt" - roles: - - "author" - - "maintainer" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - - name: "Dries De Maeyer" - roles: - - "author" - info: - role: "Core Team Member" - links: - email: "ddemaeyer@gmail.com" - github: "ddemaeyer" - linkedin: "dries-de-maeyer-b46a814" - organizations: - - name: "Janssen Pharmaceuticals" - href: "https://www.janssen.com" - role: "Principal Scientist" - argument_groups: - - name: "Input files" - arguments: - - type: "file" - name: "--input" - description: "The FASTQ files to be analyzed. FASTQ files should conform to\ - \ the naming conventions of bcl2fastq and mkfastq:\n`[Sample Name]_S[Sample\ - \ Index]_L00[Lane Number]_[Read Type]_001.fastq.gz`\n" - info: null - example: - - "mysample_S1_L001_R1_001.fastq.gz" - - "mysample_S1_L001_R2_001.fastq.gz" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "file" - name: "--gex_reference" - description: "Genome refence index built by Cell Ranger mkref." - info: null - example: - - "reference_genome.tar.gz" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--vdj_reference" - description: "VDJ refence index built by Cell Ranger mkref." - info: null - example: - - "reference_vdj.tar.gz" - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--vdj_inner_enrichment_primers" - description: "V(D)J Immune Profiling libraries: if inner enrichment primers\ - \ other than those provided \nin the 10x Genomics kits are used, they need\ - \ to be specified here as a\ntext file with one primer per line.\n" - info: null - example: - - "enrichment_primers.txt" - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--feature_reference" - description: "Path to the Feature reference CSV file, declaring Feature Barcode\ - \ constructs and associated barcodes. Required only for Antibody Capture or\ - \ CRISPR Guide Capture libraries. See https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/feature-bc-analysis#feature-ref\ - \ for more information." - info: null - example: - - "feature_reference.csv" - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Library arguments" - arguments: - - type: "string" - name: "--library_id" - description: "The Illumina sample name to analyze. This must exactly match the\ - \ 'Sample Name' part of the FASTQ files specified in the `--input` argument." - info: null - example: - - "mysample1" - required: true - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--library_type" - description: "The underlying feature type of the library.\nPossible values:\ - \ \"Gene Expression\", \"VDJ\", \"VDJ-T\", \"VDJ-B\", \"Antibody Capture\"\ - , \"CRISPR Guide Capture\", \"Multiplexing Capture\"\n" - info: null - example: - - "Gene Expression" - required: true - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--library_subsample" - description: "Optional. The rate at which reads from the provided FASTQ files\ - \ are sampled. Must be strictly greater than 0 and less than or equal to 1." - info: null - example: - - "0.5" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--library_lanes" - description: "Lanes associated with this sample. Defaults to using all lanes." - info: null - example: - - "1-4" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - name: "Gene expression arguments" - description: "Arguments relevant to the analysis of gene expression data." - arguments: - - type: "integer" - name: "--gex_expect_cells" - description: "Expected number of recovered cells, used as input to cell calling\ - \ algorithm." - info: null - example: - - 3000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--gex_chemistry" - description: "Assay configuration.\n- auto: autodetect mode\n- threeprime: Single\ - \ Cell 3'\n- fiveprime: Single Cell 5'\n- SC3Pv1: Single Cell 3' v1\n- SC3Pv2:\ - \ Single Cell 3' v2\n- SC3Pv3: Single Cell 3' v3\n- SC3Pv3LT: Single Cell\ - \ 3' v3 LT\n- SC3Pv3HT: Single Cell 3' v3 HT\n- SC5P-PE: Single Cell 5' paired-end\n\ - - SC5P-R2: Single Cell 5' R2-only\n- SC-FB: Single Cell Antibody-only 3' v2\ - \ or 5'\nSee https://kb.10xgenomics.com/hc/en-us/articles/115003764132-How-does-Cell-Ranger-auto-detect-chemistry-\ - \ for more information.\n" - info: null - default: - - "auto" - required: false - choices: - - "auto" - - "threeprime" - - "fiveprime" - - "SC3Pv1" - - "SC3Pv2" - - "SC3Pv3" - - "SC3Pv3LT" - - "SC3Pv3HT" - - "SC5P-PE" - - "SC5P-R2" - - "SC-FB" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean" - name: "--gex_secondary_analysis" - description: "Whether or not to run the secondary analysis e.g. clustering." - info: null - default: - - false - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean" - name: "--gex_generate_bam" - description: "Whether to generate a BAM file." - info: null - default: - - false - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean" - name: "--gex_include_introns" - description: "Include intronic reads in count (default=true unless --target-panel\ - \ is specified in which case default=false)" - info: null - default: - - true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Cell multiplexing parameters" - description: "Arguments related to cell multiplexing." - arguments: - - type: "string" - name: "--cell_multiplex_sample_id" - description: "A name to identify a multiplexed sample. Must be alphanumeric\ - \ with hyphens and/or underscores, and less than 64 characters. Required for\ - \ Cell Multiplexing libraries." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--cell_multiplex_oligo_ids" - description: "The Cell Multiplexing oligo IDs used to multiplex this sample.\ - \ If multiple CMOs were used for a sample, separate IDs with a pipe (e.g.,\ - \ CMO301|CMO302). Required for Cell Multiplexing libraries." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--cell_multiplex_description" - description: "A description for the sample." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Outputs" - arguments: - - type: "file" - name: "--output" - description: "The folder to store the alignment results." - info: null - example: - - "/path/to/output" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Executor arguments" - arguments: - - type: "boolean_true" - name: "--dryrun" - description: "If true, the output directory will only contain the CWL input\ - \ files, but the pipeline itself will not be executed." - info: null - direction: "input" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Align fastq files using Cell Ranger multi." - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/10x_5k_anticmv/raw/" - dest: "10x_5k_anticmv/raw/" - - type: "file" - path: "resources_test/10x_5k_lung_crispr/raw/" - dest: "10x_5k_lung_crispr/raw/" - - type: "file" - path: "resources_test/reference_gencodev41_chr1" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "ghcr.io/data-intuitive/cellranger:7.0" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "docker" - run: - - "DEBIAN_FRONTEND=noninteractive apt update && apt upgrade -y && rm -rf /var/lib/apt/lists/*" - - type: "python" - user: false - packages: - - "pandas" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "veryhighmem" - - "highcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/mapping/cellranger_multi/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/mapping/cellranger_multi" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/mapping/cellranger_multi/cellranger_multi" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/mapping/cellranger_multi/cellranger_multi b/target/docker/mapping/cellranger_multi/cellranger_multi deleted file mode 100755 index 16128636037..00000000000 --- a/target/docker/mapping/cellranger_multi/cellranger_multi +++ /dev/null @@ -1,1681 +0,0 @@ -#!/usr/bin/env bash - -# cellranger_multi 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Angela Oliveira Pisco (author) -# * Robrecht Cannoodt (author, maintainer) -# * Dries De Maeyer (author) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="cellranger_multi" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "cellranger_multi 0.12.3" - echo "" - echo "Align fastq files using Cell Ranger multi." - echo "" - echo "Input files:" - echo " --input" - echo " type: file, required parameter, multiple values allowed, file must exist" - echo " example:" - echo "mysample_S1_L001_R1_001.fastq.gz;mysample_S1_L001_R2_001.fastq.gz" - echo " The FASTQ files to be analyzed. FASTQ files should conform to the naming" - echo " conventions of bcl2fastq and mkfastq:" - echo " \`[Sample Name]_S[Sample Index]_L00[Lane Number]_[Read" - echo " Type]_001.fastq.gz\`" - echo "" - echo " --gex_reference" - echo " type: file, required parameter, file must exist" - echo " example: reference_genome.tar.gz" - echo " Genome refence index built by Cell Ranger mkref." - echo "" - echo " --vdj_reference" - echo " type: file, file must exist" - echo " example: reference_vdj.tar.gz" - echo " VDJ refence index built by Cell Ranger mkref." - echo "" - echo " --vdj_inner_enrichment_primers" - echo " type: file, file must exist" - echo " example: enrichment_primers.txt" - echo " V(D)J Immune Profiling libraries: if inner enrichment primers other than" - echo " those provided" - echo " in the 10x Genomics kits are used, they need to be specified here as a" - echo " text file with one primer per line." - echo "" - echo " --feature_reference" - echo " type: file, file must exist" - echo " example: feature_reference.csv" - echo " Path to the Feature reference CSV file, declaring Feature Barcode" - echo " constructs and associated barcodes. Required only for Antibody Capture" - echo " or CRISPR Guide Capture libraries. See" - echo " " - echo "https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/feature-bc-analysis#feature-ref" - echo " for more information." - echo "" - echo "Library arguments:" - echo " --library_id" - echo " type: string, required parameter, multiple values allowed" - echo " example: mysample1" - echo " The Illumina sample name to analyze. This must exactly match the 'Sample" - echo " Name' part of the FASTQ files specified in the \`--input\` argument." - echo "" - echo " --library_type" - echo " type: string, required parameter, multiple values allowed" - echo " example: Gene Expression" - echo " The underlying feature type of the library." - echo " Possible values: \"Gene Expression\", \"VDJ\", \"VDJ-T\", \"VDJ-B\", \"Antibody" - echo " Capture\", \"CRISPR Guide Capture\", \"Multiplexing Capture\"" - echo "" - echo " --library_subsample" - echo " type: string, multiple values allowed" - echo " example: 0.5" - echo " Optional. The rate at which reads from the provided FASTQ files are" - echo " sampled. Must be strictly greater than 0 and less than or equal to 1." - echo "" - echo " --library_lanes" - echo " type: string, multiple values allowed" - echo " example: 1-4" - echo " Lanes associated with this sample. Defaults to using all lanes." - echo "" - echo "Gene expression arguments:" - echo " Arguments relevant to the analysis of gene expression data." - echo "" - echo " --gex_expect_cells" - echo " type: integer" - echo " example: 3000" - echo " Expected number of recovered cells, used as input to cell calling" - echo " algorithm." - echo "" - echo " --gex_chemistry" - echo " type: string" - echo " default: auto" - echo " choices: [ auto, threeprime, fiveprime, SC3Pv1, SC3Pv2, SC3Pv3," - echo "SC3Pv3LT, SC3Pv3HT, SC5P-PE, SC5P-R2, SC-FB ]" - echo " Assay configuration." - echo " - auto: autodetect mode" - echo " - threeprime: Single Cell 3'" - echo " - fiveprime: Single Cell 5'" - echo " - SC3Pv1: Single Cell 3' v1" - echo " - SC3Pv2: Single Cell 3' v2" - echo " - SC3Pv3: Single Cell 3' v3" - echo " - SC3Pv3LT: Single Cell 3' v3 LT" - echo " - SC3Pv3HT: Single Cell 3' v3 HT" - echo " - SC5P-PE: Single Cell 5' paired-end" - echo " - SC5P-R2: Single Cell 5' R2-only" - echo " - SC-FB: Single Cell Antibody-only 3' v2 or 5'" - echo " See" - echo " " - echo "https://kb.10xgenomics.com/hc/en-us/articles/115003764132-How-does-Cell-Ranger-auto-detect-chemistry-" - echo " for more information." - echo "" - echo " --gex_secondary_analysis" - echo " type: boolean" - echo " default: false" - echo " Whether or not to run the secondary analysis e.g. clustering." - echo "" - echo " --gex_generate_bam" - echo " type: boolean" - echo " default: false" - echo " Whether to generate a BAM file." - echo "" - echo " --gex_include_introns" - echo " type: boolean" - echo " default: true" - echo " Include intronic reads in count (default=true unless --target-panel is" - echo " specified in which case default=false)" - echo "" - echo "Cell multiplexing parameters:" - echo " Arguments related to cell multiplexing." - echo "" - echo " --cell_multiplex_sample_id" - echo " type: string" - echo " A name to identify a multiplexed sample. Must be alphanumeric with" - echo " hyphens and/or underscores, and less than 64 characters. Required for" - echo " Cell Multiplexing libraries." - echo "" - echo " --cell_multiplex_oligo_ids" - echo " type: string" - echo " The Cell Multiplexing oligo IDs used to multiplex this sample. If" - echo " multiple CMOs were used for a sample, separate IDs with a pipe (e.g.," - echo " CMO301|CMO302). Required for Cell Multiplexing libraries." - echo "" - echo " --cell_multiplex_description" - echo " type: string" - echo " A description for the sample." - echo "" - echo "Outputs:" - echo " --output" - echo " type: file, required parameter, output, file must exist" - echo " example: /path/to/output" - echo " The folder to store the alignment results." - echo "" - echo "Executor arguments:" - echo " --dryrun" - echo " type: boolean_true" - echo " If true, the output directory will only contain the CWL input files, but" - echo " the pipeline itself will not be executed." -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM ghcr.io/data-intuitive/cellranger:7.0 - -ENTRYPOINT [] - - -RUN DEBIAN_FRONTEND=noninteractive apt update && apt upgrade -y && rm -rf /var/lib/apt/lists/* -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "pandas" - -LABEL org.opencontainers.image.authors="Angela Oliveira Pisco, Robrecht Cannoodt, Dries De Maeyer" -LABEL org.opencontainers.image.description="Companion container for running component mapping cellranger_multi" -LABEL org.opencontainers.image.created="2024-01-25T10:13:55Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-cellranger_multi-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "cellranger_multi 0.12.3" - exit - ;; - --input) - if [ -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT="$2" - else - VIASH_PAR_INPUT="$VIASH_PAR_INPUT;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - if [ -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - else - VIASH_PAR_INPUT="$VIASH_PAR_INPUT;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --gex_reference) - [ -n "$VIASH_PAR_GEX_REFERENCE" ] && ViashError Bad arguments for option \'--gex_reference\': \'$VIASH_PAR_GEX_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_GEX_REFERENCE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --gex_reference. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --gex_reference=*) - [ -n "$VIASH_PAR_GEX_REFERENCE" ] && ViashError Bad arguments for option \'--gex_reference=*\': \'$VIASH_PAR_GEX_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_GEX_REFERENCE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --vdj_reference) - [ -n "$VIASH_PAR_VDJ_REFERENCE" ] && ViashError Bad arguments for option \'--vdj_reference\': \'$VIASH_PAR_VDJ_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_VDJ_REFERENCE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --vdj_reference. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --vdj_reference=*) - [ -n "$VIASH_PAR_VDJ_REFERENCE" ] && ViashError Bad arguments for option \'--vdj_reference=*\': \'$VIASH_PAR_VDJ_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_VDJ_REFERENCE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --vdj_inner_enrichment_primers) - [ -n "$VIASH_PAR_VDJ_INNER_ENRICHMENT_PRIMERS" ] && ViashError Bad arguments for option \'--vdj_inner_enrichment_primers\': \'$VIASH_PAR_VDJ_INNER_ENRICHMENT_PRIMERS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_VDJ_INNER_ENRICHMENT_PRIMERS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --vdj_inner_enrichment_primers. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --vdj_inner_enrichment_primers=*) - [ -n "$VIASH_PAR_VDJ_INNER_ENRICHMENT_PRIMERS" ] && ViashError Bad arguments for option \'--vdj_inner_enrichment_primers=*\': \'$VIASH_PAR_VDJ_INNER_ENRICHMENT_PRIMERS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_VDJ_INNER_ENRICHMENT_PRIMERS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --feature_reference) - [ -n "$VIASH_PAR_FEATURE_REFERENCE" ] && ViashError Bad arguments for option \'--feature_reference\': \'$VIASH_PAR_FEATURE_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_FEATURE_REFERENCE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --feature_reference. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --feature_reference=*) - [ -n "$VIASH_PAR_FEATURE_REFERENCE" ] && ViashError Bad arguments for option \'--feature_reference=*\': \'$VIASH_PAR_FEATURE_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_FEATURE_REFERENCE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --library_id) - if [ -z "$VIASH_PAR_LIBRARY_ID" ]; then - VIASH_PAR_LIBRARY_ID="$2" - else - VIASH_PAR_LIBRARY_ID="$VIASH_PAR_LIBRARY_ID;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --library_id. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --library_id=*) - if [ -z "$VIASH_PAR_LIBRARY_ID" ]; then - VIASH_PAR_LIBRARY_ID=$(ViashRemoveFlags "$1") - else - VIASH_PAR_LIBRARY_ID="$VIASH_PAR_LIBRARY_ID;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --library_type) - if [ -z "$VIASH_PAR_LIBRARY_TYPE" ]; then - VIASH_PAR_LIBRARY_TYPE="$2" - else - VIASH_PAR_LIBRARY_TYPE="$VIASH_PAR_LIBRARY_TYPE;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --library_type. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --library_type=*) - if [ -z "$VIASH_PAR_LIBRARY_TYPE" ]; then - VIASH_PAR_LIBRARY_TYPE=$(ViashRemoveFlags "$1") - else - VIASH_PAR_LIBRARY_TYPE="$VIASH_PAR_LIBRARY_TYPE;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --library_subsample) - if [ -z "$VIASH_PAR_LIBRARY_SUBSAMPLE" ]; then - VIASH_PAR_LIBRARY_SUBSAMPLE="$2" - else - VIASH_PAR_LIBRARY_SUBSAMPLE="$VIASH_PAR_LIBRARY_SUBSAMPLE;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --library_subsample. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --library_subsample=*) - if [ -z "$VIASH_PAR_LIBRARY_SUBSAMPLE" ]; then - VIASH_PAR_LIBRARY_SUBSAMPLE=$(ViashRemoveFlags "$1") - else - VIASH_PAR_LIBRARY_SUBSAMPLE="$VIASH_PAR_LIBRARY_SUBSAMPLE;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --library_lanes) - if [ -z "$VIASH_PAR_LIBRARY_LANES" ]; then - VIASH_PAR_LIBRARY_LANES="$2" - else - VIASH_PAR_LIBRARY_LANES="$VIASH_PAR_LIBRARY_LANES;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --library_lanes. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --library_lanes=*) - if [ -z "$VIASH_PAR_LIBRARY_LANES" ]; then - VIASH_PAR_LIBRARY_LANES=$(ViashRemoveFlags "$1") - else - VIASH_PAR_LIBRARY_LANES="$VIASH_PAR_LIBRARY_LANES;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --gex_expect_cells) - [ -n "$VIASH_PAR_GEX_EXPECT_CELLS" ] && ViashError Bad arguments for option \'--gex_expect_cells\': \'$VIASH_PAR_GEX_EXPECT_CELLS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_GEX_EXPECT_CELLS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --gex_expect_cells. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --gex_expect_cells=*) - [ -n "$VIASH_PAR_GEX_EXPECT_CELLS" ] && ViashError Bad arguments for option \'--gex_expect_cells=*\': \'$VIASH_PAR_GEX_EXPECT_CELLS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_GEX_EXPECT_CELLS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --gex_chemistry) - [ -n "$VIASH_PAR_GEX_CHEMISTRY" ] && ViashError Bad arguments for option \'--gex_chemistry\': \'$VIASH_PAR_GEX_CHEMISTRY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_GEX_CHEMISTRY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --gex_chemistry. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --gex_chemistry=*) - [ -n "$VIASH_PAR_GEX_CHEMISTRY" ] && ViashError Bad arguments for option \'--gex_chemistry=*\': \'$VIASH_PAR_GEX_CHEMISTRY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_GEX_CHEMISTRY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --gex_secondary_analysis) - [ -n "$VIASH_PAR_GEX_SECONDARY_ANALYSIS" ] && ViashError Bad arguments for option \'--gex_secondary_analysis\': \'$VIASH_PAR_GEX_SECONDARY_ANALYSIS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_GEX_SECONDARY_ANALYSIS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --gex_secondary_analysis. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --gex_secondary_analysis=*) - [ -n "$VIASH_PAR_GEX_SECONDARY_ANALYSIS" ] && ViashError Bad arguments for option \'--gex_secondary_analysis=*\': \'$VIASH_PAR_GEX_SECONDARY_ANALYSIS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_GEX_SECONDARY_ANALYSIS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --gex_generate_bam) - [ -n "$VIASH_PAR_GEX_GENERATE_BAM" ] && ViashError Bad arguments for option \'--gex_generate_bam\': \'$VIASH_PAR_GEX_GENERATE_BAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_GEX_GENERATE_BAM="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --gex_generate_bam. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --gex_generate_bam=*) - [ -n "$VIASH_PAR_GEX_GENERATE_BAM" ] && ViashError Bad arguments for option \'--gex_generate_bam=*\': \'$VIASH_PAR_GEX_GENERATE_BAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_GEX_GENERATE_BAM=$(ViashRemoveFlags "$1") - shift 1 - ;; - --gex_include_introns) - [ -n "$VIASH_PAR_GEX_INCLUDE_INTRONS" ] && ViashError Bad arguments for option \'--gex_include_introns\': \'$VIASH_PAR_GEX_INCLUDE_INTRONS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_GEX_INCLUDE_INTRONS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --gex_include_introns. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --gex_include_introns=*) - [ -n "$VIASH_PAR_GEX_INCLUDE_INTRONS" ] && ViashError Bad arguments for option \'--gex_include_introns=*\': \'$VIASH_PAR_GEX_INCLUDE_INTRONS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_GEX_INCLUDE_INTRONS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --cell_multiplex_sample_id) - [ -n "$VIASH_PAR_CELL_MULTIPLEX_SAMPLE_ID" ] && ViashError Bad arguments for option \'--cell_multiplex_sample_id\': \'$VIASH_PAR_CELL_MULTIPLEX_SAMPLE_ID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CELL_MULTIPLEX_SAMPLE_ID="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --cell_multiplex_sample_id. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --cell_multiplex_sample_id=*) - [ -n "$VIASH_PAR_CELL_MULTIPLEX_SAMPLE_ID" ] && ViashError Bad arguments for option \'--cell_multiplex_sample_id=*\': \'$VIASH_PAR_CELL_MULTIPLEX_SAMPLE_ID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CELL_MULTIPLEX_SAMPLE_ID=$(ViashRemoveFlags "$1") - shift 1 - ;; - --cell_multiplex_oligo_ids) - [ -n "$VIASH_PAR_CELL_MULTIPLEX_OLIGO_IDS" ] && ViashError Bad arguments for option \'--cell_multiplex_oligo_ids\': \'$VIASH_PAR_CELL_MULTIPLEX_OLIGO_IDS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CELL_MULTIPLEX_OLIGO_IDS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --cell_multiplex_oligo_ids. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --cell_multiplex_oligo_ids=*) - [ -n "$VIASH_PAR_CELL_MULTIPLEX_OLIGO_IDS" ] && ViashError Bad arguments for option \'--cell_multiplex_oligo_ids=*\': \'$VIASH_PAR_CELL_MULTIPLEX_OLIGO_IDS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CELL_MULTIPLEX_OLIGO_IDS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --cell_multiplex_description) - [ -n "$VIASH_PAR_CELL_MULTIPLEX_DESCRIPTION" ] && ViashError Bad arguments for option \'--cell_multiplex_description\': \'$VIASH_PAR_CELL_MULTIPLEX_DESCRIPTION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CELL_MULTIPLEX_DESCRIPTION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --cell_multiplex_description. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --cell_multiplex_description=*) - [ -n "$VIASH_PAR_CELL_MULTIPLEX_DESCRIPTION" ] && ViashError Bad arguments for option \'--cell_multiplex_description=*\': \'$VIASH_PAR_CELL_MULTIPLEX_DESCRIPTION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CELL_MULTIPLEX_DESCRIPTION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --dryrun) - [ -n "$VIASH_PAR_DRYRUN" ] && ViashError Bad arguments for option \'--dryrun\': \'$VIASH_PAR_DRYRUN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_DRYRUN=true - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/mapping_cellranger_multi:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/mapping_cellranger_multi:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/mapping_cellranger_multi:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/mapping_cellranger_multi:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_GEX_REFERENCE+x} ]; then - ViashError '--gex_reference' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_LIBRARY_ID+x} ]; then - ViashError '--library_id' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_LIBRARY_TYPE+x} ]; then - ViashError '--library_type' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_GEX_CHEMISTRY+x} ]; then - VIASH_PAR_GEX_CHEMISTRY="auto" -fi -if [ -z ${VIASH_PAR_GEX_SECONDARY_ANALYSIS+x} ]; then - VIASH_PAR_GEX_SECONDARY_ANALYSIS="false" -fi -if [ -z ${VIASH_PAR_GEX_GENERATE_BAM+x} ]; then - VIASH_PAR_GEX_GENERATE_BAM="false" -fi -if [ -z ${VIASH_PAR_GEX_INCLUDE_INTRONS+x} ]; then - VIASH_PAR_GEX_INCLUDE_INTRONS="true" -fi -if [ -z ${VIASH_PAR_DRYRUN+x} ]; then - VIASH_PAR_DRYRUN="false" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ]; then - IFS=';' - set -f - for file in $VIASH_PAR_INPUT; do - unset IFS - if [ ! -e "$file" ]; then - ViashError "Input file '$file' does not exist." - exit 1 - fi - done - set +f -fi -if [ ! -z "$VIASH_PAR_GEX_REFERENCE" ] && [ ! -e "$VIASH_PAR_GEX_REFERENCE" ]; then - ViashError "Input file '$VIASH_PAR_GEX_REFERENCE' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_VDJ_REFERENCE" ] && [ ! -e "$VIASH_PAR_VDJ_REFERENCE" ]; then - ViashError "Input file '$VIASH_PAR_VDJ_REFERENCE' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_VDJ_INNER_ENRICHMENT_PRIMERS" ] && [ ! -e "$VIASH_PAR_VDJ_INNER_ENRICHMENT_PRIMERS" ]; then - ViashError "Input file '$VIASH_PAR_VDJ_INNER_ENRICHMENT_PRIMERS' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_FEATURE_REFERENCE" ] && [ ! -e "$VIASH_PAR_FEATURE_REFERENCE" ]; then - ViashError "Input file '$VIASH_PAR_FEATURE_REFERENCE' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_GEX_EXPECT_CELLS" ]]; then - if ! [[ "$VIASH_PAR_GEX_EXPECT_CELLS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--gex_expect_cells' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_GEX_SECONDARY_ANALYSIS" ]]; then - if ! [[ "$VIASH_PAR_GEX_SECONDARY_ANALYSIS" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--gex_secondary_analysis' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_GEX_GENERATE_BAM" ]]; then - if ! [[ "$VIASH_PAR_GEX_GENERATE_BAM" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--gex_generate_bam' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_GEX_INCLUDE_INTRONS" ]]; then - if ! [[ "$VIASH_PAR_GEX_INCLUDE_INTRONS" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--gex_include_introns' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_DRYRUN" ]]; then - if ! [[ "$VIASH_PAR_DRYRUN" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--dryrun' has to be a boolean_true. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_GEX_CHEMISTRY" ]; then - VIASH_PAR_GEX_CHEMISTRY_CHOICES=("auto:threeprime:fiveprime:SC3Pv1:SC3Pv2:SC3Pv3:SC3Pv3LT:SC3Pv3HT:SC5P-PE:SC5P-R2:SC-FB") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_GEX_CHEMISTRY_CHOICES[*]}:" =~ ":$VIASH_PAR_GEX_CHEMISTRY:" ]]; then - ViashError '--gex_chemistry' specified value of \'$VIASH_PAR_GEX_CHEMISTRY\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_TEST_INPUT=() - IFS=';' - for var in $VIASH_PAR_INPUT; do - unset IFS - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$var")" ) - var=$(ViashAutodetectMount "$var") - VIASH_TEST_INPUT+=( "$var" ) - done - VIASH_PAR_INPUT=$(IFS=';' ; echo "${VIASH_TEST_INPUT[*]}") -fi -if [ ! -z "$VIASH_PAR_GEX_REFERENCE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_GEX_REFERENCE")" ) - VIASH_PAR_GEX_REFERENCE=$(ViashAutodetectMount "$VIASH_PAR_GEX_REFERENCE") -fi -if [ ! -z "$VIASH_PAR_VDJ_REFERENCE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_VDJ_REFERENCE")" ) - VIASH_PAR_VDJ_REFERENCE=$(ViashAutodetectMount "$VIASH_PAR_VDJ_REFERENCE") -fi -if [ ! -z "$VIASH_PAR_VDJ_INNER_ENRICHMENT_PRIMERS" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_VDJ_INNER_ENRICHMENT_PRIMERS")" ) - VIASH_PAR_VDJ_INNER_ENRICHMENT_PRIMERS=$(ViashAutodetectMount "$VIASH_PAR_VDJ_INNER_ENRICHMENT_PRIMERS") -fi -if [ ! -z "$VIASH_PAR_FEATURE_REFERENCE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_FEATURE_REFERENCE")" ) - VIASH_PAR_FEATURE_REFERENCE=$(ViashAutodetectMount "$VIASH_PAR_FEATURE_REFERENCE") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/mapping_cellranger_multi:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/mapping_cellranger_multi:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/mapping_cellranger_multi:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-cellranger_multi-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -from __future__ import annotations - -import sys -import re -import subprocess -import tempfile -import pandas as pd -from typing import Optional, Any, Union -import tarfile -from pathlib import Path -import shutil -from itertools import chain - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'gex_reference': $( if [ ! -z ${VIASH_PAR_GEX_REFERENCE+x} ]; then echo "r'${VIASH_PAR_GEX_REFERENCE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'vdj_reference': $( if [ ! -z ${VIASH_PAR_VDJ_REFERENCE+x} ]; then echo "r'${VIASH_PAR_VDJ_REFERENCE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'vdj_inner_enrichment_primers': $( if [ ! -z ${VIASH_PAR_VDJ_INNER_ENRICHMENT_PRIMERS+x} ]; then echo "r'${VIASH_PAR_VDJ_INNER_ENRICHMENT_PRIMERS//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'feature_reference': $( if [ ! -z ${VIASH_PAR_FEATURE_REFERENCE+x} ]; then echo "r'${VIASH_PAR_FEATURE_REFERENCE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'library_id': $( if [ ! -z ${VIASH_PAR_LIBRARY_ID+x} ]; then echo "r'${VIASH_PAR_LIBRARY_ID//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'library_type': $( if [ ! -z ${VIASH_PAR_LIBRARY_TYPE+x} ]; then echo "r'${VIASH_PAR_LIBRARY_TYPE//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'library_subsample': $( if [ ! -z ${VIASH_PAR_LIBRARY_SUBSAMPLE+x} ]; then echo "r'${VIASH_PAR_LIBRARY_SUBSAMPLE//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'library_lanes': $( if [ ! -z ${VIASH_PAR_LIBRARY_LANES+x} ]; then echo "r'${VIASH_PAR_LIBRARY_LANES//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'gex_expect_cells': $( if [ ! -z ${VIASH_PAR_GEX_EXPECT_CELLS+x} ]; then echo "int(r'${VIASH_PAR_GEX_EXPECT_CELLS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'gex_chemistry': $( if [ ! -z ${VIASH_PAR_GEX_CHEMISTRY+x} ]; then echo "r'${VIASH_PAR_GEX_CHEMISTRY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'gex_secondary_analysis': $( if [ ! -z ${VIASH_PAR_GEX_SECONDARY_ANALYSIS+x} ]; then echo "r'${VIASH_PAR_GEX_SECONDARY_ANALYSIS//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), - 'gex_generate_bam': $( if [ ! -z ${VIASH_PAR_GEX_GENERATE_BAM+x} ]; then echo "r'${VIASH_PAR_GEX_GENERATE_BAM//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), - 'gex_include_introns': $( if [ ! -z ${VIASH_PAR_GEX_INCLUDE_INTRONS+x} ]; then echo "r'${VIASH_PAR_GEX_INCLUDE_INTRONS//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), - 'cell_multiplex_sample_id': $( if [ ! -z ${VIASH_PAR_CELL_MULTIPLEX_SAMPLE_ID+x} ]; then echo "r'${VIASH_PAR_CELL_MULTIPLEX_SAMPLE_ID//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cell_multiplex_oligo_ids': $( if [ ! -z ${VIASH_PAR_CELL_MULTIPLEX_OLIGO_IDS+x} ]; then echo "r'${VIASH_PAR_CELL_MULTIPLEX_OLIGO_IDS//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cell_multiplex_description': $( if [ ! -z ${VIASH_PAR_CELL_MULTIPLEX_DESCRIPTION+x} ]; then echo "r'${VIASH_PAR_CELL_MULTIPLEX_DESCRIPTION//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'dryrun': $( if [ ! -z ${VIASH_PAR_DRYRUN+x} ]; then echo "r'${VIASH_PAR_DRYRUN//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -## VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -fastq_regex = r'([A-Za-z0-9\\-_\\.]+)_S(\\d+)_L(\\d+)_[RI](\\d+)_(\\d+)\\.fastq\\.gz' -# assert re.match(fastq_regex, "5k_human_GEX_1_subset_S1_L001_R1_001.fastq.gz") is not None - -# Invert some parameters. Keep the original ones in the config for compatibility -inverted_params = { - "gex_generate_no_bam": "gex_generate_bam", - "gex_no_secondary_analysis": "gex_secondary_analysis" -} -for inverted_param, param in inverted_params.items(): - par[inverted_param] = not par[param] if par[param] is not None else None - del par[param] - -GEX_CONFIG_KEYS = { - "gex_reference": "reference", - "gex_expect_cells": "expect-cells", - "gex_chemistry": "chemistry", - "gex_no_secondary_analysis": "no-secondary", - "gex_generate_no_bam": "no-bam", - "gex_include_introns": "include-introns" -} -FEATURE_CONFIG_KEYS = {"feature_reference": "reference"} -VDJ_CONFIG_KEYS = {"vdj_reference": "reference", - "vdj_inner_enrichment_primers": "inner-enrichment-primers"} - -REFERENCE_SECTIONS = { - "gene-expression": (GEX_CONFIG_KEYS, "index"), - "feature": (FEATURE_CONFIG_KEYS, "index"), - "vdj": (VDJ_CONFIG_KEYS, "index") -} - -LIBRARY_CONFIG_KEYS = {'library_id': 'fastq_id', - 'library_type': 'feature_types', - 'library_subsample': 'subsample_rate', - 'library_lanes': 'lanes'} -SAMPLE_PARAMS_CONFIG_KEYS = {'cell_multiplex_sample_id': 'sample_id', - 'cell_multiplex_oligo_ids': 'cmo_ids', - 'cell_multiplex_description': 'description'} - - -# These are derived from the dictionaries above -REFERENCES = tuple(reference_param for reference_param, cellranger_param - in chain(GEX_CONFIG_KEYS.items(), FEATURE_CONFIG_KEYS.items(), VDJ_CONFIG_KEYS.items()) - if cellranger_param == "reference") -LIBRARY_PARAMS = tuple(LIBRARY_CONFIG_KEYS.keys()) -SAMPLE_PARAMS = tuple(SAMPLE_PARAMS_CONFIG_KEYS.keys()) - - -def lengths_gt1(dic: dict[str, Optional[list[Any]]]) -> dict[str, int]: - return {key: len(li) for key, li in dic.items() - if li is not None and len(li) > 1} - -def strip_margin(text: str) -> str: - return re.sub('(\\n?)[ \\t]*\\|', '\\\\1', text) - - -def subset_dict(dictionary: dict[str, str], - keys: Union[dict[str, str], list[str]]) -> dict[str, str]: - if isinstance(keys, (list, tuple)): - keys = {key: key for key in keys} - return {dest_key: dictionary[orig_key] - for orig_key, dest_key in keys.items() - if dictionary[orig_key] is not None} - -def check_subset_dict_equal_length(group_name: str, - dictionary: dict[str, list[str]]) -> None: - lens = lengths_gt1(dictionary) - assert len(set(lens.values())) <= 1, f"The number of values passed to {group_name} "\\ - f"arguments must be 0, 1 or all the same. Offenders: {lens}" - -def process_params(par: dict[str, Any]) -> str: - # if par_input is a directory, look for fastq files - par["input"] = [Path(fastq) for fastq in par["input"]] - if len(par["input"]) == 1 and par["input"][0].is_dir(): - logger.info("Detected '--input' as a directory, " - "traversing to see if we can detect any FASTQ files.") - par["input"] = [input_path for input_path in par["input"][0].rglob('*') - if re.match(fastq_regex, input_path.name) ] - - # check input fastq files - for input_path in par["input"]: - assert re.match(fastq_regex, input_path.name) is not None, \\ - f"File name of --input '{input_path}' should match regex {fastq_regex}." - - # check lengths of libraries metadata - library_dict = subset_dict(par, LIBRARY_PARAMS) - check_subset_dict_equal_length("Library", library_dict) - # storing for later use - par["libraries"] = library_dict - - cmo_dict = subset_dict(par, SAMPLE_PARAMS) - check_subset_dict_equal_length("Cell multiplexing", cmo_dict) - # storing for later use - par["cmo"] = cmo_dict - - # use absolute paths - par["input"] = [input_path.resolve() for input_path in par["input"]] - for file_path in REFERENCES + ('output', ): - if par[file_path]: - logger.info('Making path %s absolute', par[file_path]) - par[file_path] = Path(par[file_path]).resolve() - return par - - -def generate_csv_category(name: str, args: dict[str, str], orient: str) -> list[str]: - assert orient in ("index", "columns") - if not args: - return [] - title = [ f'[{name}]' ] - # Which index to include in csv section is based on orientation - to_csv_args = {"index": (orient=="index"), "header": (orient=="columns")} - values = [pd.DataFrame.from_dict(args, orient=orient).to_csv(**to_csv_args).strip()] - return title + values + [""] - - -def generate_config(par: dict[str, Any], fastq_dir: str) -> str: - content_list = [] - par["fastqs"] = fastq_dir - libraries = dict(LIBRARY_CONFIG_KEYS, **{"fastqs": "fastqs"}) - #TODO: use the union (|) operator when python is updated to 3.9 - all_sections = dict(REFERENCE_SECTIONS, - **{"libraries": (libraries, "columns")}, - **{"samples": (SAMPLE_PARAMS_CONFIG_KEYS, "columns")}) - for section_name, (section_params, orientation) in all_sections.items(): - reference_pars = subset_dict(par, section_params) - content_list += generate_csv_category(section_name, reference_pars, orient=orientation) - - return '\\n'.join(content_list) - -def main(par: dict[str, Any], meta: dict[str, Any]): - logger.info(" Processing params") - par = process_params(par) - logger.info(par) - - # TODO: throw error or else Cell Ranger will - with tempfile.TemporaryDirectory(prefix="cellranger_multi-", - dir=meta["temp_dir"]) as temp_dir: - temp_dir_path = Path(temp_dir) - for reference_par_name in REFERENCES: - reference = par[reference_par_name] - logger.info('Looking at %s to check if it needs decompressing', reference) - if reference and Path(reference).is_file() and tarfile.is_tarfile(reference): - extaction_dir_name = Path(reference.stem).stem # Remove two extensions (if they exist) - unpacked_directory = temp_dir_path / extaction_dir_name - logger.info('Extracting %s to %s', reference, unpacked_directory) - - with tarfile.open(reference, 'r') as open_tar: - members = open_tar.getmembers() - root_dirs = [member for member in members if member.isdir() - and member.name != '.' and '/' not in member.name] - # if there is only one root_dir (and there are files in that directory) - # strip that directory name from the destination folder - if len(root_dirs) == 1: - for mem in members: - mem.path = Path(*Path(mem.path).parts[1:]) - members_to_move = [mem for mem in members if mem.path != Path('.')] - open_tar.extractall(unpacked_directory, members=members_to_move) - par[reference_par_name] = unpacked_directory - - # Creating symlinks of fastq files to tempdir - input_symlinks_dir = temp_dir_path / "input_symlinks" - input_symlinks_dir.mkdir() - for fastq in par['input']: - destination = input_symlinks_dir / fastq.name - destination.symlink_to(fastq) - - logger.info(" Creating config file") - config_content = generate_config(par, input_symlinks_dir) - - logger.info(" Creating Cell Ranger argument") - temp_id="run" - proc_pars=["--disable-ui", "--id", temp_id] - - command_line_parameters = { - "--localcores": meta['cpus'], - "--localmem": int(meta['memory_gb']) - 2 if meta['memory_gb'] else None, - } - for param, param_value in command_line_parameters.items(): - if param_value: - proc_pars.append(f"{param}={param_value}") - - ## Run pipeline - if par["dryrun"]: - par['output'].mkdir(parents=True, exist_ok=True) - - # write config file - config_file = par['output'] / "config.csv" - with open(config_file, "w") as f: - f.write(config_content) - proc_pars.append(f"--csv={config_file}") - - # display command that would've been used - cmd = ["cellranger multi"] + proc_pars + ["--csv=config.csv"] - logger.info("> " + ' '.join(cmd)) - else: - # write config file to execution directory - config_file = temp_dir_path / "config.csv" - with open(config_file, "w") as f: - f.write(config_content) - proc_pars.append(f"--csv={config_file}") - - # Already copy config file to output directory - par['output'].mkdir(parents=True, exist_ok=True) - with (par['output'] / "config.csv").open('w') as open_config: - open_config.write(config_content) - - # run process - cmd = ["cellranger", "multi"] + proc_pars - logger.info("> " + ' '.join(cmd)) - try: - process_output = subprocess.run( - cmd, - cwd=temp_dir, - check=True, - capture_output=True - ) - except subprocess.CalledProcessError as e: - print(e.output.decode('utf-8'), flush=True) - raise e - else: - # Write stdout output to output folder - with (par["output"] / "cellranger_multi.log").open('w') as open_log: - open_log.write(process_output.stdout.decode('utf-8')) - print(process_output.stdout.decode('utf-8'), flush=True) - - # look for output dir file - tmp_output_dir = temp_dir_path / temp_id / "outs" - expected_files = { - Path("multi"): Path.is_dir, - Path("per_sample_outs"): Path.is_dir, - Path("config.csv"): Path.is_file, - } - for file_path, type_func in expected_files.items(): - output_path = tmp_output_dir / file_path - if not type_func(output_path): - raise ValueError(f"Could not find expected '{output_path}'") - - for output_path in tmp_output_dir.rglob('*'): - if output_path.name != "config.csv": # Already created - shutil.move(str(output_path), par['output']) - -if __name__ == "__main__": - main(par, meta) -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - unset VIASH_TEST_INPUT - IFS=';' - for var in $VIASH_PAR_INPUT; do - unset IFS - if [ -z "$VIASH_TEST_INPUT" ]; then - VIASH_TEST_INPUT="$(ViashStripAutomount "$var")" - else - VIASH_TEST_INPUT="$VIASH_TEST_INPUT;""$(ViashStripAutomount "$var")" - fi - done - VIASH_PAR_INPUT="$VIASH_TEST_INPUT" -fi -if [ ! -z "$VIASH_PAR_GEX_REFERENCE" ]; then - VIASH_PAR_GEX_REFERENCE=$(ViashStripAutomount "$VIASH_PAR_GEX_REFERENCE") -fi -if [ ! -z "$VIASH_PAR_VDJ_REFERENCE" ]; then - VIASH_PAR_VDJ_REFERENCE=$(ViashStripAutomount "$VIASH_PAR_VDJ_REFERENCE") -fi -if [ ! -z "$VIASH_PAR_VDJ_INNER_ENRICHMENT_PRIMERS" ]; then - VIASH_PAR_VDJ_INNER_ENRICHMENT_PRIMERS=$(ViashStripAutomount "$VIASH_PAR_VDJ_INNER_ENRICHMENT_PRIMERS") -fi -if [ ! -z "$VIASH_PAR_FEATURE_REFERENCE" ]; then - VIASH_PAR_FEATURE_REFERENCE=$(ViashStripAutomount "$VIASH_PAR_FEATURE_REFERENCE") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/mapping/cellranger_multi/setup_logger.py b/target/docker/mapping/cellranger_multi/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/docker/mapping/cellranger_multi/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/docker/mapping/htseq_count/.config.vsh.yaml b/target/docker/mapping/htseq_count/.config.vsh.yaml deleted file mode 100644 index 1bd957c81c8..00000000000 --- a/target/docker/mapping/htseq_count/.config.vsh.yaml +++ /dev/null @@ -1,418 +0,0 @@ -functionality: - name: "htseq_count" - namespace: "mapping" - version: "0.12.3" - authors: - - name: "Robrecht Cannoodt" - roles: - - "author" - - "maintainer" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - - name: "Angela Oliveira Pisco" - roles: - - "author" - info: - role: "Contributor" - links: - github: "aopisco" - orcid: "0000-0003-0142-2355" - linkedin: "aopisco" - organizations: - - name: "Insitro" - href: "https://insitro.com" - role: "Director of Computational Biology" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - argument_groups: - - name: "Input" - arguments: - - type: "file" - name: "--input" - description: "Path to the SAM/BAM files containing the mapped reads." - info: - orig_arg: "samfilenames" - example: - - "mysample1.BAM" - - "mysample2.BAM" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "file" - name: "--reference" - description: "Path to the GTF file containing the features." - info: - orig_arg: "featurefilename" - example: - - "reference.gtf" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Output" - arguments: - - type: "file" - name: "--output" - description: "Filename to output the counts to." - info: - orig_arg: "--counts_output" - example: - - "htseq-count.tsv" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_delimiter" - description: "Column delimiter in output." - info: - orig_arg: "--delimiter" - example: - - "\t" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output_sam" - description: "Write out all SAM alignment records into SAM/BAM files (one per\ - \ input file needed), \nannotating each line with its feature assignment (as\ - \ an optional field with tag 'XF'). \nSee the -p option to use BAM instead\ - \ of SAM.\n" - info: - orig_arg: "--samout" - example: - - "mysample1_out.BAM" - - "mysample2_out.BAM" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--output_sam_format" - description: "Format to use with the --output_sam argument." - info: - orig_arg: "--samout-format" - required: false - choices: - - "sam" - - "bam" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Arguments" - arguments: - - type: "string" - name: "--order" - alternatives: - - "-r" - description: "Sorting order of . Paired-end sequencing data\ - \ must be sorted either by position or\nby read name, and the sorting order\ - \ must be specified. Ignored for single-end data.\n" - info: - orig_arg: "--order" - default: - - "name" - required: false - choices: - - "pos" - - "name" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--stranded" - alternatives: - - "-s" - description: "Whether the data is from a strand-specific assay. 'reverse' means\ - \ 'yes' with reversed strand interpretation." - info: - orig_arg: "--stranded" - default: - - "yes" - required: false - choices: - - "yes" - - "no" - - "reverse" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--minimum_alignment_quality" - alternatives: - - "-a" - - "--minaqual" - description: "Skip all reads with MAPQ alignment quality lower than the given\ - \ minimum value. \nMAPQ is the 5th column of a SAM/BAM file and its usage\ - \ depends on the software \nused to map the reads.\n" - info: - orig_arg: "--minaqual" - default: - - 10 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--type" - alternatives: - - "-t" - description: "Feature type (3rd column in GTF file) to be used, all features\ - \ of other type are ignored (default, suitable for Ensembl GTF files: exon)" - info: - orig_arg: "--type" - example: - - "exon" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--id_attribute" - alternatives: - - "-i" - description: "GTF attribute to be used as feature ID (default, suitable for\ - \ Ensembl GTF files: gene_id).\nAll feature of the right type (see -t option)\ - \ within the same GTF attribute will be added\ntogether. The typical way of\ - \ using this option is to count all exonic reads from each gene\nand add the\ - \ exons but other uses are possible as well. You can call this option multiple\n\ - times: in that case, the combination of all attributes separated by colons\ - \ (:) will be used\nas a unique identifier, e.g. for exons you might use -i\ - \ gene_id -i exon_number.\n" - info: - orig_arg: "--idattr" - example: - - "gene_id" - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--additional_attributes" - description: "Additional feature attributes (suitable for Ensembl GTF files:\ - \ gene_name). Use multiple times\nfor more than one additional attribute.\ - \ These attributes are only used as annotations in the\noutput, while the\ - \ determination of how the counts are added together is done based on option\ - \ -i.\n" - info: - orig_arg: "--additional-attr" - example: - - "gene_name" - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--add_chromosome_info" - description: "Store information about the chromosome of each feature as an additional\ - \ attribute\n(e.g. colunm in the TSV output file).\n" - info: - orig_arg: "--add-chromosome-info" - direction: "input" - dest: "par" - - type: "string" - name: "--mode" - alternatives: - - "-m" - description: "Mode to handle reads overlapping more than one feature." - info: - orig_arg: "--mode" - default: - - "union" - required: false - choices: - - "union" - - "intersection-strict" - - "intersection-nonempty" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--non_unique" - description: "Whether and how to score reads that are not uniquely aligned or\ - \ ambiguously assigned to features." - info: - orig_arg: "--nonunique" - default: - - "none" - required: false - choices: - - "none" - - "all" - - "fraction" - - "random" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--secondary_alignments" - description: "Whether to score secondary alignments (0x100 flag)." - info: - orig_arg: "--secondary-alignments" - required: false - choices: - - "score" - - "ignore" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--supplementary_alignments" - description: "Whether to score supplementary alignments (0x800 flag)." - info: - orig_arg: "--supplementary-alignments" - required: false - choices: - - "score" - - "ignore" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--counts_output_sparse" - description: "Store the counts as a sparse matrix (mtx, h5ad, loom)." - info: - orig_arg: "--counts-output-sparse" - direction: "input" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - description: "Quantify gene expression for subsequent testing for differential expression.\n\ - \nThis script takes one or more alignment files in SAM/BAM format and a feature\ - \ file in GFF format and calculates for each feature the number of reads mapping\ - \ to it. \n\nSee http://htseq.readthedocs.io/en/master/count.html for details.\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/cellranger_tiny_fastq" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "HTSeq" - - "pyyaml" - - "scipy" - - "pandas~=2.0.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highmem" - - "highcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/mapping/htseq_count/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/mapping/htseq_count" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/mapping/htseq_count/htseq_count" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/mapping/htseq_count/htseq_count b/target/docker/mapping/htseq_count/htseq_count deleted file mode 100755 index 2399b9f2a37..00000000000 --- a/target/docker/mapping/htseq_count/htseq_count +++ /dev/null @@ -1,1608 +0,0 @@ -#!/usr/bin/env bash - -# htseq_count 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Robrecht Cannoodt (author, maintainer) -# * Angela Oliveira Pisco (author) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="htseq_count" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "htseq_count 0.12.3" - echo "" - echo "Quantify gene expression for subsequent testing for differential expression." - echo "" - echo "This script takes one or more alignment files in SAM/BAM format and a feature" - echo "file in GFF format and calculates for each feature the number of reads mapping" - echo "to it." - echo "" - echo "See http://htseq.readthedocs.io/en/master/count.html for details." - echo "" - echo "Input:" - echo " --input" - echo " type: file, required parameter, multiple values allowed, file must exist" - echo " example: mysample1.BAM;mysample2.BAM" - echo " Path to the SAM/BAM files containing the mapped reads." - echo "" - echo " --reference" - echo " type: file, required parameter, file must exist" - echo " example: reference.gtf" - echo " Path to the GTF file containing the features." - echo "" - echo "Output:" - echo " --output" - echo " type: file, required parameter, output, file must exist" - echo " example: htseq-count.tsv" - echo " Filename to output the counts to." - echo "" - echo " --output_delimiter" - echo " type: string" - echo " example:" - echo " Column delimiter in output." - echo "" - echo " --output_sam" - echo " type: file, multiple values allowed, output, file must exist" - echo " example: mysample1_out.BAM;mysample2_out.BAM" - echo " Write out all SAM alignment records into SAM/BAM files (one per input" - echo " file needed)," - echo " annotating each line with its feature assignment (as an optional field" - echo " with tag 'XF')." - echo " See the -p option to use BAM instead of SAM." - echo "" - echo " --output_sam_format" - echo " type: string" - echo " choices: [ sam, bam ]" - echo " Format to use with the --output_sam argument." - echo "" - echo "Arguments:" - echo " -r, --order" - echo " type: string" - echo " default: name" - echo " choices: [ pos, name ]" - echo " Sorting order of . Paired-end sequencing data must be" - echo " sorted either by position or" - echo " by read name, and the sorting order must be specified. Ignored for" - echo " single-end data." - echo "" - echo " -s, --stranded" - echo " type: string" - echo " default: yes" - echo " choices: [ yes, no, reverse ]" - echo " Whether the data is from a strand-specific assay. 'reverse' means 'yes'" - echo " with reversed strand interpretation." - echo "" - echo " -a, --minaqual, --minimum_alignment_quality" - echo " type: integer" - echo " default: 10" - echo " Skip all reads with MAPQ alignment quality lower than the given minimum" - echo " value." - echo " MAPQ is the 5th column of a SAM/BAM file and its usage depends on the" - echo " software" - echo " used to map the reads." - echo "" - echo " -t, --type" - echo " type: string" - echo " example: exon" - echo " Feature type (3rd column in GTF file) to be used, all features of other" - echo " type are ignored (default, suitable for Ensembl GTF files: exon)" - echo "" - echo " -i, --id_attribute" - echo " type: string, multiple values allowed" - echo " example: gene_id" - echo " GTF attribute to be used as feature ID (default, suitable for Ensembl" - echo " GTF files: gene_id)." - echo " All feature of the right type (see -t option) within the same GTF" - echo " attribute will be added" - echo " together. The typical way of using this option is to count all exonic" - echo " reads from each gene" - echo " and add the exons but other uses are possible as well. You can call this" - echo " option multiple" - echo " times: in that case, the combination of all attributes separated by" - echo " colons (:) will be used" - echo " as a unique identifier, e.g. for exons you might use -i gene_id -i" - echo " exon_number." - echo "" - echo " --additional_attributes" - echo " type: string, multiple values allowed" - echo " example: gene_name" - echo " Additional feature attributes (suitable for Ensembl GTF files:" - echo " gene_name). Use multiple times" - echo " for more than one additional attribute. These attributes are only used" - echo " as annotations in the" - echo " output, while the determination of how the counts are added together is" - echo " done based on option -i." - echo "" - echo " --add_chromosome_info" - echo " type: boolean_true" - echo " Store information about the chromosome of each feature as an additional" - echo " attribute" - echo " (e.g. colunm in the TSV output file)." - echo "" - echo " -m, --mode" - echo " type: string" - echo " default: union" - echo " choices: [ union, intersection-strict, intersection-nonempty ]" - echo " Mode to handle reads overlapping more than one feature." - echo "" - echo " --non_unique" - echo " type: string" - echo " default: none" - echo " choices: [ none, all, fraction, random ]" - echo " Whether and how to score reads that are not uniquely aligned or" - echo " ambiguously assigned to features." - echo "" - echo " --secondary_alignments" - echo " type: string" - echo " choices: [ score, ignore ]" - echo " Whether to score secondary alignments (0x100 flag)." - echo "" - echo " --supplementary_alignments" - echo " type: string" - echo " choices: [ score, ignore ]" - echo " Whether to score supplementary alignments (0x800 flag)." - echo "" - echo " --counts_output_sparse" - echo " type: boolean_true" - echo " Store the counts as a sparse matrix (mtx, h5ad, loom)." -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM python:3.10-slim - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "HTSeq" "pyyaml" "scipy" "pandas~=2.0.0" - -LABEL org.opencontainers.image.authors="Robrecht Cannoodt, Angela Oliveira Pisco" -LABEL org.opencontainers.image.description="Companion container for running component mapping htseq_count" -LABEL org.opencontainers.image.created="2024-01-25T10:14:00Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-htseq_count-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "htseq_count 0.12.3" - exit - ;; - --input) - if [ -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT="$2" - else - VIASH_PAR_INPUT="$VIASH_PAR_INPUT;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - if [ -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - else - VIASH_PAR_INPUT="$VIASH_PAR_INPUT;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --reference) - [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reference=*) - [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference=*\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output_delimiter) - [ -n "$VIASH_PAR_OUTPUT_DELIMITER" ] && ViashError Bad arguments for option \'--output_delimiter\': \'$VIASH_PAR_OUTPUT_DELIMITER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_DELIMITER="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_delimiter. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_delimiter=*) - [ -n "$VIASH_PAR_OUTPUT_DELIMITER" ] && ViashError Bad arguments for option \'--output_delimiter=*\': \'$VIASH_PAR_OUTPUT_DELIMITER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_DELIMITER=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output_sam) - if [ -z "$VIASH_PAR_OUTPUT_SAM" ]; then - VIASH_PAR_OUTPUT_SAM="$2" - else - VIASH_PAR_OUTPUT_SAM="$VIASH_PAR_OUTPUT_SAM;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_sam. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_sam=*) - if [ -z "$VIASH_PAR_OUTPUT_SAM" ]; then - VIASH_PAR_OUTPUT_SAM=$(ViashRemoveFlags "$1") - else - VIASH_PAR_OUTPUT_SAM="$VIASH_PAR_OUTPUT_SAM;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --output_sam_format) - [ -n "$VIASH_PAR_OUTPUT_SAM_FORMAT" ] && ViashError Bad arguments for option \'--output_sam_format\': \'$VIASH_PAR_OUTPUT_SAM_FORMAT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_SAM_FORMAT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_sam_format. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_sam_format=*) - [ -n "$VIASH_PAR_OUTPUT_SAM_FORMAT" ] && ViashError Bad arguments for option \'--output_sam_format=*\': \'$VIASH_PAR_OUTPUT_SAM_FORMAT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_SAM_FORMAT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --order) - [ -n "$VIASH_PAR_ORDER" ] && ViashError Bad arguments for option \'--order\': \'$VIASH_PAR_ORDER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ORDER="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --order. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --order=*) - [ -n "$VIASH_PAR_ORDER" ] && ViashError Bad arguments for option \'--order=*\': \'$VIASH_PAR_ORDER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ORDER=$(ViashRemoveFlags "$1") - shift 1 - ;; - -r) - [ -n "$VIASH_PAR_ORDER" ] && ViashError Bad arguments for option \'-r\': \'$VIASH_PAR_ORDER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ORDER="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -r. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --stranded) - [ -n "$VIASH_PAR_STRANDED" ] && ViashError Bad arguments for option \'--stranded\': \'$VIASH_PAR_STRANDED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_STRANDED="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --stranded. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --stranded=*) - [ -n "$VIASH_PAR_STRANDED" ] && ViashError Bad arguments for option \'--stranded=*\': \'$VIASH_PAR_STRANDED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_STRANDED=$(ViashRemoveFlags "$1") - shift 1 - ;; - -s) - [ -n "$VIASH_PAR_STRANDED" ] && ViashError Bad arguments for option \'-s\': \'$VIASH_PAR_STRANDED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_STRANDED="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -s. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --minimum_alignment_quality) - [ -n "$VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY" ] && ViashError Bad arguments for option \'--minimum_alignment_quality\': \'$VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --minimum_alignment_quality. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --minimum_alignment_quality=*) - [ -n "$VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY" ] && ViashError Bad arguments for option \'--minimum_alignment_quality=*\': \'$VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - -a) - [ -n "$VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY" ] && ViashError Bad arguments for option \'-a\': \'$VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -a. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --minaqual) - [ -n "$VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY" ] && ViashError Bad arguments for option \'--minaqual\': \'$VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --minaqual. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --type) - [ -n "$VIASH_PAR_TYPE" ] && ViashError Bad arguments for option \'--type\': \'$VIASH_PAR_TYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TYPE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --type. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --type=*) - [ -n "$VIASH_PAR_TYPE" ] && ViashError Bad arguments for option \'--type=*\': \'$VIASH_PAR_TYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TYPE=$(ViashRemoveFlags "$1") - shift 1 - ;; - -t) - [ -n "$VIASH_PAR_TYPE" ] && ViashError Bad arguments for option \'-t\': \'$VIASH_PAR_TYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TYPE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -t. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --id_attribute) - if [ -z "$VIASH_PAR_ID_ATTRIBUTE" ]; then - VIASH_PAR_ID_ATTRIBUTE="$2" - else - VIASH_PAR_ID_ATTRIBUTE="$VIASH_PAR_ID_ATTRIBUTE:""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --id_attribute. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --id_attribute=*) - if [ -z "$VIASH_PAR_ID_ATTRIBUTE" ]; then - VIASH_PAR_ID_ATTRIBUTE=$(ViashRemoveFlags "$1") - else - VIASH_PAR_ID_ATTRIBUTE="$VIASH_PAR_ID_ATTRIBUTE:"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - -i) - if [ -z "$VIASH_PAR_ID_ATTRIBUTE" ]; then - VIASH_PAR_ID_ATTRIBUTE="$2" - else - VIASH_PAR_ID_ATTRIBUTE="$VIASH_PAR_ID_ATTRIBUTE:""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --additional_attributes) - if [ -z "$VIASH_PAR_ADDITIONAL_ATTRIBUTES" ]; then - VIASH_PAR_ADDITIONAL_ATTRIBUTES="$2" - else - VIASH_PAR_ADDITIONAL_ATTRIBUTES="$VIASH_PAR_ADDITIONAL_ATTRIBUTES:""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --additional_attributes. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --additional_attributes=*) - if [ -z "$VIASH_PAR_ADDITIONAL_ATTRIBUTES" ]; then - VIASH_PAR_ADDITIONAL_ATTRIBUTES=$(ViashRemoveFlags "$1") - else - VIASH_PAR_ADDITIONAL_ATTRIBUTES="$VIASH_PAR_ADDITIONAL_ATTRIBUTES:"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --add_chromosome_info) - [ -n "$VIASH_PAR_ADD_CHROMOSOME_INFO" ] && ViashError Bad arguments for option \'--add_chromosome_info\': \'$VIASH_PAR_ADD_CHROMOSOME_INFO\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ADD_CHROMOSOME_INFO=true - shift 1 - ;; - --mode) - [ -n "$VIASH_PAR_MODE" ] && ViashError Bad arguments for option \'--mode\': \'$VIASH_PAR_MODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --mode. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --mode=*) - [ -n "$VIASH_PAR_MODE" ] && ViashError Bad arguments for option \'--mode=*\': \'$VIASH_PAR_MODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODE=$(ViashRemoveFlags "$1") - shift 1 - ;; - -m) - [ -n "$VIASH_PAR_MODE" ] && ViashError Bad arguments for option \'-m\': \'$VIASH_PAR_MODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -m. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --non_unique) - [ -n "$VIASH_PAR_NON_UNIQUE" ] && ViashError Bad arguments for option \'--non_unique\': \'$VIASH_PAR_NON_UNIQUE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_NON_UNIQUE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --non_unique. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --non_unique=*) - [ -n "$VIASH_PAR_NON_UNIQUE" ] && ViashError Bad arguments for option \'--non_unique=*\': \'$VIASH_PAR_NON_UNIQUE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_NON_UNIQUE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --secondary_alignments) - [ -n "$VIASH_PAR_SECONDARY_ALIGNMENTS" ] && ViashError Bad arguments for option \'--secondary_alignments\': \'$VIASH_PAR_SECONDARY_ALIGNMENTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SECONDARY_ALIGNMENTS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --secondary_alignments. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --secondary_alignments=*) - [ -n "$VIASH_PAR_SECONDARY_ALIGNMENTS" ] && ViashError Bad arguments for option \'--secondary_alignments=*\': \'$VIASH_PAR_SECONDARY_ALIGNMENTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SECONDARY_ALIGNMENTS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --supplementary_alignments) - [ -n "$VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS" ] && ViashError Bad arguments for option \'--supplementary_alignments\': \'$VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --supplementary_alignments. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --supplementary_alignments=*) - [ -n "$VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS" ] && ViashError Bad arguments for option \'--supplementary_alignments=*\': \'$VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --counts_output_sparse) - [ -n "$VIASH_PAR_COUNTS_OUTPUT_SPARSE" ] && ViashError Bad arguments for option \'--counts_output_sparse\': \'$VIASH_PAR_COUNTS_OUTPUT_SPARSE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_COUNTS_OUTPUT_SPARSE=true - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/mapping_htseq_count:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/mapping_htseq_count:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/mapping_htseq_count:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/mapping_htseq_count:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_REFERENCE+x} ]; then - ViashError '--reference' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_ORDER+x} ]; then - VIASH_PAR_ORDER="name" -fi -if [ -z ${VIASH_PAR_STRANDED+x} ]; then - VIASH_PAR_STRANDED="yes" -fi -if [ -z ${VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY+x} ]; then - VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY="10" -fi -if [ -z ${VIASH_PAR_ADD_CHROMOSOME_INFO+x} ]; then - VIASH_PAR_ADD_CHROMOSOME_INFO="false" -fi -if [ -z ${VIASH_PAR_MODE+x} ]; then - VIASH_PAR_MODE="union" -fi -if [ -z ${VIASH_PAR_NON_UNIQUE+x} ]; then - VIASH_PAR_NON_UNIQUE="none" -fi -if [ -z ${VIASH_PAR_COUNTS_OUTPUT_SPARSE+x} ]; then - VIASH_PAR_COUNTS_OUTPUT_SPARSE="false" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ]; then - IFS=';' - set -f - for file in $VIASH_PAR_INPUT; do - unset IFS - if [ ! -e "$file" ]; then - ViashError "Input file '$file' does not exist." - exit 1 - fi - done - set +f -fi -if [ ! -z "$VIASH_PAR_REFERENCE" ] && [ ! -e "$VIASH_PAR_REFERENCE" ]; then - ViashError "Input file '$VIASH_PAR_REFERENCE' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY" ]]; then - if ! [[ "$VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--minimum_alignment_quality' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_ADD_CHROMOSOME_INFO" ]]; then - if ! [[ "$VIASH_PAR_ADD_CHROMOSOME_INFO" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--add_chromosome_info' has to be a boolean_true. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_COUNTS_OUTPUT_SPARSE" ]]; then - if ! [[ "$VIASH_PAR_COUNTS_OUTPUT_SPARSE" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--counts_output_sparse' has to be a boolean_true. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_OUTPUT_SAM_FORMAT" ]; then - VIASH_PAR_OUTPUT_SAM_FORMAT_CHOICES=("sam:bam") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_SAM_FORMAT_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_SAM_FORMAT:" ]]; then - ViashError '--output_sam_format' specified value of \'$VIASH_PAR_OUTPUT_SAM_FORMAT\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -if [ ! -z "$VIASH_PAR_ORDER" ]; then - VIASH_PAR_ORDER_CHOICES=("pos:name") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_ORDER_CHOICES[*]}:" =~ ":$VIASH_PAR_ORDER:" ]]; then - ViashError '--order' specified value of \'$VIASH_PAR_ORDER\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -if [ ! -z "$VIASH_PAR_STRANDED" ]; then - VIASH_PAR_STRANDED_CHOICES=("yes:no:reverse") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_STRANDED_CHOICES[*]}:" =~ ":$VIASH_PAR_STRANDED:" ]]; then - ViashError '--stranded' specified value of \'$VIASH_PAR_STRANDED\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -if [ ! -z "$VIASH_PAR_MODE" ]; then - VIASH_PAR_MODE_CHOICES=("union:intersection-strict:intersection-nonempty") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_MODE_CHOICES[*]}:" =~ ":$VIASH_PAR_MODE:" ]]; then - ViashError '--mode' specified value of \'$VIASH_PAR_MODE\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -if [ ! -z "$VIASH_PAR_NON_UNIQUE" ]; then - VIASH_PAR_NON_UNIQUE_CHOICES=("none:all:fraction:random") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_NON_UNIQUE_CHOICES[*]}:" =~ ":$VIASH_PAR_NON_UNIQUE:" ]]; then - ViashError '--non_unique' specified value of \'$VIASH_PAR_NON_UNIQUE\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -if [ ! -z "$VIASH_PAR_SECONDARY_ALIGNMENTS" ]; then - VIASH_PAR_SECONDARY_ALIGNMENTS_CHOICES=("score:ignore") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_SECONDARY_ALIGNMENTS_CHOICES[*]}:" =~ ":$VIASH_PAR_SECONDARY_ALIGNMENTS:" ]]; then - ViashError '--secondary_alignments' specified value of \'$VIASH_PAR_SECONDARY_ALIGNMENTS\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -if [ ! -z "$VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS" ]; then - VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS_CHOICES=("score:ignore") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS_CHOICES[*]}:" =~ ":$VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS:" ]]; then - ViashError '--supplementary_alignments' specified value of \'$VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi -if [ ! -z "$VIASH_PAR_OUTPUT_SAM" ]; then - IFS=';' - set -f - for file in $VIASH_PAR_OUTPUT_SAM; do - unset IFS - if [ ! -d "$(dirname "$file")" ]; then - mkdir -p "$(dirname "$file")" - fi - done - set +f -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_TEST_INPUT=() - IFS=';' - for var in $VIASH_PAR_INPUT; do - unset IFS - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$var")" ) - var=$(ViashAutodetectMount "$var") - VIASH_TEST_INPUT+=( "$var" ) - done - VIASH_PAR_INPUT=$(IFS=';' ; echo "${VIASH_TEST_INPUT[*]}") -fi -if [ ! -z "$VIASH_PAR_REFERENCE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_REFERENCE")" ) - VIASH_PAR_REFERENCE=$(ViashAutodetectMount "$VIASH_PAR_REFERENCE") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_PAR_OUTPUT_SAM" ]; then - VIASH_TEST_OUTPUT_SAM=() - IFS=';' - for var in $VIASH_PAR_OUTPUT_SAM; do - unset IFS - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$var")" ) - var=$(ViashAutodetectMount "$var") - VIASH_TEST_OUTPUT_SAM+=( "$var" ) - VIASH_CHOWN_VARS+=( "$var" ) - done - VIASH_PAR_OUTPUT_SAM=$(IFS=';' ; echo "${VIASH_TEST_OUTPUT_SAM[*]}") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/mapping_htseq_count:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/mapping_htseq_count:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/mapping_htseq_count:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-htseq_count-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -import tempfile -import subprocess -from pathlib import Path -import tarfile -import gzip -import shutil -import yaml - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'reference': $( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "r'${VIASH_PAR_REFERENCE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_delimiter': $( if [ ! -z ${VIASH_PAR_OUTPUT_DELIMITER+x} ]; then echo "r'${VIASH_PAR_OUTPUT_DELIMITER//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_sam': $( if [ ! -z ${VIASH_PAR_OUTPUT_SAM+x} ]; then echo "r'${VIASH_PAR_OUTPUT_SAM//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'output_sam_format': $( if [ ! -z ${VIASH_PAR_OUTPUT_SAM_FORMAT+x} ]; then echo "r'${VIASH_PAR_OUTPUT_SAM_FORMAT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'order': $( if [ ! -z ${VIASH_PAR_ORDER+x} ]; then echo "r'${VIASH_PAR_ORDER//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'stranded': $( if [ ! -z ${VIASH_PAR_STRANDED+x} ]; then echo "r'${VIASH_PAR_STRANDED//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'minimum_alignment_quality': $( if [ ! -z ${VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY+x} ]; then echo "int(r'${VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'type': $( if [ ! -z ${VIASH_PAR_TYPE+x} ]; then echo "r'${VIASH_PAR_TYPE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'id_attribute': $( if [ ! -z ${VIASH_PAR_ID_ATTRIBUTE+x} ]; then echo "r'${VIASH_PAR_ID_ATTRIBUTE//\'/\'\"\'\"r\'}'.split(':')"; else echo None; fi ), - 'additional_attributes': $( if [ ! -z ${VIASH_PAR_ADDITIONAL_ATTRIBUTES+x} ]; then echo "r'${VIASH_PAR_ADDITIONAL_ATTRIBUTES//\'/\'\"\'\"r\'}'.split(':')"; else echo None; fi ), - 'add_chromosome_info': $( if [ ! -z ${VIASH_PAR_ADD_CHROMOSOME_INFO+x} ]; then echo "r'${VIASH_PAR_ADD_CHROMOSOME_INFO//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), - 'mode': $( if [ ! -z ${VIASH_PAR_MODE+x} ]; then echo "r'${VIASH_PAR_MODE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'non_unique': $( if [ ! -z ${VIASH_PAR_NON_UNIQUE+x} ]; then echo "r'${VIASH_PAR_NON_UNIQUE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'secondary_alignments': $( if [ ! -z ${VIASH_PAR_SECONDARY_ALIGNMENTS+x} ]; then echo "r'${VIASH_PAR_SECONDARY_ALIGNMENTS//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'supplementary_alignments': $( if [ ! -z ${VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS+x} ]; then echo "r'${VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'counts_output_sparse': $( if [ ! -z ${VIASH_PAR_COUNTS_OUTPUT_SPARSE+x} ]; then echo "r'${VIASH_PAR_COUNTS_OUTPUT_SPARSE//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -## VIASH END - -######################## -### Helper functions ### -######################## - -# helper function for cheching whether something is a gzip -def is_gz_file(path: Path) -> bool: - with open(path, 'rb') as file: - return file.read(2) == b'\\x1f\\x8b' - -# if {par_value} is a Path, extract it to a temp_dir_path and return the resulting path -def extract_if_need_be(par_value: Path, temp_dir_path: Path) -> Path: - if par_value.is_file() and tarfile.is_tarfile(par_value): - # Remove two extensions (if they exist) - extaction_dir_name = Path(par_value.stem).stem - unpacked_path = temp_dir_path / extaction_dir_name - print(f' Tar detected; extracting {par_value} to {unpacked_path}', flush=True) - - with tarfile.open(par_value, 'r') as open_tar: - members = open_tar.getmembers() - root_dirs = [member - for member in members - if member.isdir() and member.name != '.' and '/' not in member.name] - # if there is only one root_dir (and there are files in that directory) - # strip that directory name from the destination folder - if len(root_dirs) == 1: - for mem in members: - mem.path = Path(*Path(mem.path).parts[1:]) - members_to_move = [mem for mem in members if mem.path != Path('.')] - open_tar.extractall(unpacked_path, members=members_to_move) - return unpacked_path - - elif par_value.is_file() and is_gz_file(par_value): - # Remove extension (if it exists) - extaction_file_name = Path(par_value.stem) - unpacked_path = temp_dir_path / extaction_file_name - print(f' Gzip detected; extracting {par_value} to {unpacked_path}', flush=True) - - with gzip.open(par_value, 'rb') as f_in: - with open(unpacked_path, 'wb') as f_out: - shutil.copyfileobj(f_in, f_out) - return unpacked_path - - else: - return par_value - -def generate_args(par, config): - # fetch arguments from config - arguments = [ - arg - for group in config["functionality"]["argument_groups"] - for arg in group["arguments"] - ] - - cmd_args = [] - - for arg in arguments: - arg_val = par.get(arg["name"].removeprefix("--")) - orig_arg = arg.get("info", {}).get("orig_arg") - if arg_val and orig_arg: - if not arg.get("multiple", False): - arg_val = [arg_val] - - if arg["type"] in ["boolean_true", "boolean_false"]: - # if argument is a boolean_true or boolean_false, simply add the flag - arg_val = [orig_arg] - elif orig_arg.startswith("-"): - # if the orig arg flag is not a positional, - # add the flag in front of each element and flatten - arg_val = [str(x) for val in arg_val for x in [orig_arg, val]] - - cmd_args.extend(arg_val) - - return cmd_args - -######################## -### Main code ### -######################## - -# read config arguments -config = yaml.safe_load(Path(meta["config"]).read_text()) - - -with tempfile.TemporaryDirectory(prefix="htseq-", dir=meta["temp_dir"]) as temp_dir: - - # checking for compressed files, ungzip files if need be - temp_dir_path = Path(temp_dir) - reference = Path(par["reference"]) - - print(f'>> Check compression of --reference with value: {reference}', flush=True) - par["reference"] = extract_if_need_be(reference, temp_dir_path) - - print(">> Constructing command", flush=True) - cmd_args = [ "htseq-count" ] + generate_args(par, config) - - # manually process cpus parameter - if 'cpus' in meta and meta['cpus']: - cmd_args.extend(["--nprocesses", str(meta["cpus"])]) - - print(">> Running htseq-count with command:", flush=True) - print("+ " + ' '.join([str(x) for x in cmd_args]), flush=True) - - subprocess.run( - cmd_args, - check=True - ) -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - unset VIASH_TEST_INPUT - IFS=';' - for var in $VIASH_PAR_INPUT; do - unset IFS - if [ -z "$VIASH_TEST_INPUT" ]; then - VIASH_TEST_INPUT="$(ViashStripAutomount "$var")" - else - VIASH_TEST_INPUT="$VIASH_TEST_INPUT;""$(ViashStripAutomount "$var")" - fi - done - VIASH_PAR_INPUT="$VIASH_TEST_INPUT" -fi -if [ ! -z "$VIASH_PAR_REFERENCE" ]; then - VIASH_PAR_REFERENCE=$(ViashStripAutomount "$VIASH_PAR_REFERENCE") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT_SAM" ]; then - unset VIASH_TEST_OUTPUT_SAM - IFS=';' - for var in $VIASH_PAR_OUTPUT_SAM; do - unset IFS - if [ -z "$VIASH_TEST_OUTPUT_SAM" ]; then - VIASH_TEST_OUTPUT_SAM="$(ViashStripAutomount "$var")" - else - VIASH_TEST_OUTPUT_SAM="$VIASH_TEST_OUTPUT_SAM;""$(ViashStripAutomount "$var")" - fi - done - VIASH_PAR_OUTPUT_SAM="$VIASH_TEST_OUTPUT_SAM" -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_OUTPUT_SAM" ]; then - IFS=';' - set -f - for file in $VIASH_PAR_OUTPUT_SAM; do - unset IFS - if [ ! -e "$file" ]; then - ViashError "Output file '$file' does not exist." - exit 1 - fi - done - set +f -fi - - -exit 0 diff --git a/target/docker/mapping/htseq_count_to_h5mu/.config.vsh.yaml b/target/docker/mapping/htseq_count_to_h5mu/.config.vsh.yaml deleted file mode 100644 index f08aa9ac37f..00000000000 --- a/target/docker/mapping/htseq_count_to_h5mu/.config.vsh.yaml +++ /dev/null @@ -1,209 +0,0 @@ -functionality: - name: "htseq_count_to_h5mu" - namespace: "mapping" - version: "0.12.3" - authors: - - name: "Robrecht Cannoodt" - roles: - - "author" - - "maintainer" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - - name: "Angela Oliveira Pisco" - roles: - - "author" - info: - role: "Contributor" - links: - github: "aopisco" - orcid: "0000-0003-0142-2355" - linkedin: "aopisco" - organizations: - - name: "Insitro" - href: "https://insitro.com" - role: "Director of Computational Biology" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - argument_groups: - - name: "Input" - arguments: - - type: "string" - name: "--input_id" - description: "The obs index for the counts" - info: null - example: - - "foo" - required: true - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "file" - name: "--input_counts" - description: "The counts as a TSV file as output by HTSeq." - info: null - example: - - "counts.tsv" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "file" - name: "--reference" - description: "The GTF file." - info: null - example: - - "gencode_v41_star" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Outputs" - arguments: - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output h5mu file." - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - description: "Convert the htseq table to a h5mu.\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/cellranger_tiny_fastq" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "gtfparse" - - "polars[pyarrow] < 0.16.14" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highmem" - - "midcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/mapping/htseq_count_to_h5mu/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/mapping/htseq_count_to_h5mu" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/mapping/htseq_count_to_h5mu/htseq_count_to_h5mu" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/mapping/htseq_count_to_h5mu/htseq_count_to_h5mu b/target/docker/mapping/htseq_count_to_h5mu/htseq_count_to_h5mu deleted file mode 100755 index f75d95ef0dd..00000000000 --- a/target/docker/mapping/htseq_count_to_h5mu/htseq_count_to_h5mu +++ /dev/null @@ -1,1151 +0,0 @@ -#!/usr/bin/env bash - -# htseq_count_to_h5mu 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Robrecht Cannoodt (author, maintainer) -# * Angela Oliveira Pisco (author) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="htseq_count_to_h5mu" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "htseq_count_to_h5mu 0.12.3" - echo "" - echo "Convert the htseq table to a h5mu." - echo "" - echo "Input:" - echo " --input_id" - echo " type: string, required parameter, multiple values allowed" - echo " example: foo" - echo " The obs index for the counts" - echo "" - echo " --input_counts" - echo " type: file, required parameter, multiple values allowed, file must exist" - echo " example: counts.tsv" - echo " The counts as a TSV file as output by HTSeq." - echo "" - echo " --reference" - echo " type: file, required parameter, file must exist" - echo " example: gencode_v41_star" - echo " The GTF file." - echo "" - echo "Outputs:" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " example: output.h5mu" - echo " Output h5mu file." - echo "" - echo " --output_compression" - echo " type: string" - echo " example: gzip" - echo " choices: [ gzip, lzf ]" - echo " The compression format to be used on the output h5mu object." -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM python:3.10-slim - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "gtfparse" "polars[pyarrow] < 0.16.14" - -LABEL org.opencontainers.image.authors="Robrecht Cannoodt, Angela Oliveira Pisco" -LABEL org.opencontainers.image.description="Companion container for running component mapping htseq_count_to_h5mu" -LABEL org.opencontainers.image.created="2024-01-25T10:13:59Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-htseq_count_to_h5mu-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "htseq_count_to_h5mu 0.12.3" - exit - ;; - --input_id) - if [ -z "$VIASH_PAR_INPUT_ID" ]; then - VIASH_PAR_INPUT_ID="$2" - else - VIASH_PAR_INPUT_ID="$VIASH_PAR_INPUT_ID;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_id. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input_id=*) - if [ -z "$VIASH_PAR_INPUT_ID" ]; then - VIASH_PAR_INPUT_ID=$(ViashRemoveFlags "$1") - else - VIASH_PAR_INPUT_ID="$VIASH_PAR_INPUT_ID;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --input_counts) - if [ -z "$VIASH_PAR_INPUT_COUNTS" ]; then - VIASH_PAR_INPUT_COUNTS="$2" - else - VIASH_PAR_INPUT_COUNTS="$VIASH_PAR_INPUT_COUNTS;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_counts. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input_counts=*) - if [ -z "$VIASH_PAR_INPUT_COUNTS" ]; then - VIASH_PAR_INPUT_COUNTS=$(ViashRemoveFlags "$1") - else - VIASH_PAR_INPUT_COUNTS="$VIASH_PAR_INPUT_COUNTS;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --reference) - [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reference=*) - [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference=*\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression=*) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/mapping_htseq_count_to_h5mu:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/mapping_htseq_count_to_h5mu:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/mapping_htseq_count_to_h5mu:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/mapping_htseq_count_to_h5mu:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT_ID+x} ]; then - ViashError '--input_id' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_INPUT_COUNTS+x} ]; then - ViashError '--input_counts' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_REFERENCE+x} ]; then - ViashError '--reference' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT_COUNTS" ]; then - IFS=';' - set -f - for file in $VIASH_PAR_INPUT_COUNTS; do - unset IFS - if [ ! -e "$file" ]; then - ViashError "Input file '$file' does not exist." - exit 1 - fi - done - set +f -fi -if [ ! -z "$VIASH_PAR_REFERENCE" ] && [ ! -e "$VIASH_PAR_REFERENCE" ]; then - ViashError "Input file '$VIASH_PAR_REFERENCE' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then - VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then - ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT_COUNTS" ]; then - VIASH_TEST_INPUT_COUNTS=() - IFS=';' - for var in $VIASH_PAR_INPUT_COUNTS; do - unset IFS - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$var")" ) - var=$(ViashAutodetectMount "$var") - VIASH_TEST_INPUT_COUNTS+=( "$var" ) - done - VIASH_PAR_INPUT_COUNTS=$(IFS=';' ; echo "${VIASH_TEST_INPUT_COUNTS[*]}") -fi -if [ ! -z "$VIASH_PAR_REFERENCE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_REFERENCE")" ) - VIASH_PAR_REFERENCE=$(ViashAutodetectMount "$VIASH_PAR_REFERENCE") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/mapping_htseq_count_to_h5mu:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/mapping_htseq_count_to_h5mu:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/mapping_htseq_count_to_h5mu:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-htseq_count_to_h5mu-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -import tempfile -from pathlib import Path -import tarfile -import gzip -import shutil -import pandas as pd -import mudata as md -import anndata as ad -import polars as pl -import numpy as np -import gtfparse - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input_id': $( if [ ! -z ${VIASH_PAR_INPUT_ID+x} ]; then echo "r'${VIASH_PAR_INPUT_ID//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'input_counts': $( if [ ! -z ${VIASH_PAR_INPUT_COUNTS+x} ]; then echo "r'${VIASH_PAR_INPUT_COUNTS//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'reference': $( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "r'${VIASH_PAR_REFERENCE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -## VIASH END - -######################## -### Helper functions ### -######################## - -# helper function for cheching whether something is a gzip -def is_gz_file(path: Path) -> bool: - with open(path, 'rb') as file: - return file.read(2) == b'\\x1f\\x8b' - -# if {par_value} is a Path, extract it to a temp_dir_path and return the resulting path -def extract_if_need_be(par_value: Path, temp_dir_path: Path) -> Path: - if par_value.is_file() and tarfile.is_tarfile(par_value): - # Remove two extensions (if they exist) - extaction_dir_name = Path(par_value.stem).stem - unpacked_path = temp_dir_path / extaction_dir_name - print(f' Tar detected; extracting {par_value} to {unpacked_path}', flush=True) - - with tarfile.open(par_value, 'r') as open_tar: - members = open_tar.getmembers() - root_dirs = [member - for member in members - if member.isdir() and member.name != '.' and '/' not in member.name] - # if there is only one root_dir (and there are files in that directory) - # strip that directory name from the destination folder - if len(root_dirs) == 1: - for mem in members: - mem.path = Path(*Path(mem.path).parts[1:]) - members_to_move = [mem for mem in members if mem.path != Path('.')] - open_tar.extractall(unpacked_path, members=members_to_move) - return unpacked_path - - elif par_value.is_file() and is_gz_file(par_value): - # Remove extension (if it exists) - extaction_file_name = Path(par_value.stem) - unpacked_path = temp_dir_path / extaction_file_name - print(f' Gzip detected; extracting {par_value} to {unpacked_path}', flush=True) - - with gzip.open(par_value, 'rb') as f_in: - with open(unpacked_path, 'wb') as f_out: - shutil.copyfileobj(f_in, f_out) - return unpacked_path - - else: - return par_value - - -print("> combine counts data", flush=True) -counts_data = [] - -for input_id, input_counts in zip(par["input_id"], par["input_counts"]): - data = pd.read_table(input_counts, index_col=0, names=["gene_ids", input_id], dtype={'gene_ids': 'U', input_id: 'i'}).transpose() - counts_data.append(data) - -# combine all counts -counts_and_qc = pd.concat(counts_data, axis=0) - -print("> split qc", flush=True) -idx = counts_and_qc.columns.str.startswith("_") -qc = counts_and_qc.loc[:,idx] -qc.columns = qc.columns.str.replace("^__", "", regex=True) -counts = counts_and_qc.loc[:,~idx] - -print("> construct var", flush=True) -with tempfile.TemporaryDirectory(prefix="htseq-", dir=meta["temp_dir"]) as temp_dir: - # checking for compressed files, ungzip files if need be - temp_dir_path = Path(temp_dir) - reference = Path(par["reference"]) - - print(f'>> Check compression of --reference with value: {reference}', flush=True) - par["reference"] = extract_if_need_be(reference, temp_dir_path) - - # read_gtf only works on str object, not pathlib.Path - reference = gtfparse.read_gtf(str(par["reference"])) - - -# This is a polars dataframe, not pandas -reference_genes = reference.filter((pl.col("feature") == "gene") & - (pl.col("gene_id").is_in(list(counts.columns))))\\ - .sort("gene_id") - -var = pd.DataFrame( - data={ - "gene_ids": pd.Index(reference_genes.get_column("gene_id")), - "feature_types": "Gene Expression", - "gene_symbol": reference_genes.get_column("gene_name").to_pandas(), - } -).set_index("gene_ids") - -print("> construct anndata", flush=True) -adata = ad.AnnData( - X=counts, - obsm={"qc_htseq": qc}, - var=var, - dtype=np.int32 -) - -print("> convert to mudata", flush=True) -mdata = md.MuData(adata) - -print("> write to file", flush=True) -mdata.write_h5mu(par["output"], compression=par["output_compression"]) -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT_COUNTS" ]; then - unset VIASH_TEST_INPUT_COUNTS - IFS=';' - for var in $VIASH_PAR_INPUT_COUNTS; do - unset IFS - if [ -z "$VIASH_TEST_INPUT_COUNTS" ]; then - VIASH_TEST_INPUT_COUNTS="$(ViashStripAutomount "$var")" - else - VIASH_TEST_INPUT_COUNTS="$VIASH_TEST_INPUT_COUNTS;""$(ViashStripAutomount "$var")" - fi - done - VIASH_PAR_INPUT_COUNTS="$VIASH_TEST_INPUT_COUNTS" -fi -if [ ! -z "$VIASH_PAR_REFERENCE" ]; then - VIASH_PAR_REFERENCE=$(ViashStripAutomount "$VIASH_PAR_REFERENCE") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/mapping/multi_star/.config.vsh.yaml b/target/docker/mapping/multi_star/.config.vsh.yaml deleted file mode 100644 index e66b519f23d..00000000000 --- a/target/docker/mapping/multi_star/.config.vsh.yaml +++ /dev/null @@ -1,3080 +0,0 @@ -functionality: - name: "multi_star" - namespace: "mapping" - version: "0.12.3" - authors: - - name: "Angela Oliveira Pisco" - roles: - - "author" - info: - role: "Contributor" - links: - github: "aopisco" - orcid: "0000-0003-0142-2355" - linkedin: "aopisco" - organizations: - - name: "Insitro" - href: "https://insitro.com" - role: "Director of Computational Biology" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - - name: "Robrecht Cannoodt" - roles: - - "author" - - "maintainer" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - argument_groups: - - name: "Input/Output" - arguments: - - type: "string" - name: "--input_id" - description: "The ID of the sample being processed. This vector should have\ - \ the same length as the `--input_r1` argument." - info: null - example: - - "mysample" - - "mysample" - required: true - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "file" - name: "--input_r1" - description: "Paths to the sequences to be mapped. If using Illumina paired-end\ - \ reads, only the R1 files should be passed." - info: null - example: - - "mysample_S1_L001_R1_001.fastq.gz" - - "mysample_S1_L002_R1_001.fastq.gz" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "file" - name: "--input_r2" - description: "Paths to the sequences to be mapped. If using Illumina paired-end\ - \ reads, only the R2 files should be passed." - info: null - example: - - "mysample_S1_L001_R2_001.fastq.gz" - - "mysample_S1_L002_R2_001.fastq.gz" - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "file" - name: "--reference_index" - alternatives: - - "--genomeDir" - description: "Path to the reference built by star_build_reference. Corresponds\ - \ to the --genomeDir argument in the STAR command." - info: null - example: - - "/path/to/reference" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--reference_gtf" - description: "Path to the gtf reference file." - info: null - example: - - "genes.gtf" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "--outFileNamePrefix" - description: "Path to output directory. Corresponds to the --outFileNamePrefix\ - \ argument in the STAR command." - info: null - example: - - "/path/to/foo" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Processing arguments" - arguments: - - type: "boolean" - name: "--run_htseq_count" - description: "Whether or not to also run htseq-count after STAR." - info: null - default: - - true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean" - name: "--run_multiqc" - description: "Whether or not to also run MultiQC at the end." - info: null - default: - - true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--min_success_rate" - description: "Fail when the success rate is below this threshold." - info: null - default: - - 0.5 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Run Parameters" - arguments: - - type: "integer" - name: "--runRNGseed" - description: "random number generator seed." - info: - step: "star" - orig_arg: "--runRNGseed" - example: - - 777 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Genome Parameters" - arguments: - - type: "file" - name: "--genomeFastaFiles" - description: "path(s) to the fasta files with the genome sequences, separated\ - \ by spaces. These files should be plain text FASTA files, they *cannot* be\ - \ zipped.\n\nRequired for the genome generation (--runMode genomeGenerate).\ - \ Can also be used in the mapping (--runMode alignReads) to add extra (new)\ - \ sequences to the genome (e.g. spike-ins)." - info: - step: "star" - orig_arg: "--genomeFastaFiles" - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - name: "Splice Junctions Database" - arguments: - - type: "string" - name: "--sjdbFileChrStartEnd" - description: "path to the files with genomic coordinates (chr start \ - \ end strand) for the splice junction introns. Multiple files can be\ - \ supplied and will be concatenated." - info: - step: "star" - orig_arg: "--sjdbFileChrStartEnd" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "file" - name: "--sjdbGTFfile" - description: "path to the GTF file with annotations" - info: - step: "star" - orig_arg: "--sjdbGTFfile" - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--sjdbGTFchrPrefix" - description: "prefix for chromosome names in a GTF file (e.g. 'chr' for using\ - \ ENSMEBL annotations with UCSC genomes)" - info: - step: "star" - orig_arg: "--sjdbGTFchrPrefix" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--sjdbGTFfeatureExon" - description: "feature type in GTF file to be used as exons for building transcripts" - info: - step: "star" - orig_arg: "--sjdbGTFfeatureExon" - example: - - "exon" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--sjdbGTFtagExonParentTranscript" - description: "GTF attribute name for parent transcript ID (default \"transcript_id\"\ - \ works for GTF files)" - info: - step: "star" - orig_arg: "--sjdbGTFtagExonParentTranscript" - example: - - "transcript_id" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--sjdbGTFtagExonParentGene" - description: "GTF attribute name for parent gene ID (default \"gene_id\" works\ - \ for GTF files)" - info: - step: "star" - orig_arg: "--sjdbGTFtagExonParentGene" - example: - - "gene_id" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--sjdbGTFtagExonParentGeneName" - description: "GTF attribute name for parent gene name" - info: - step: "star" - orig_arg: "--sjdbGTFtagExonParentGeneName" - example: - - "gene_name" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--sjdbGTFtagExonParentGeneType" - description: "GTF attribute name for parent gene type" - info: - step: "star" - orig_arg: "--sjdbGTFtagExonParentGeneType" - example: - - "gene_type" - - "gene_biotype" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--sjdbOverhang" - description: "length of the donor/acceptor sequence on each side of the junctions,\ - \ ideally = (mate_length - 1)" - info: - step: "star" - orig_arg: "--sjdbOverhang" - example: - - 100 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--sjdbScore" - description: "extra alignment score for alignments that cross database junctions" - info: - step: "star" - orig_arg: "--sjdbScore" - example: - - 2 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--sjdbInsertSave" - description: "which files to save when sjdb junctions are inserted on the fly\ - \ at the mapping step\n\n- Basic ... only small junction / transcript files\n\ - - All ... all files including big Genome, SA and SAindex - this will create\ - \ a complete genome directory" - info: - step: "star" - orig_arg: "--sjdbInsertSave" - example: - - "Basic" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Variation parameters" - arguments: - - type: "string" - name: "--varVCFfile" - description: "path to the VCF file that contains variation data. The 10th column\ - \ should contain the genotype information, e.g. 0/1" - info: - step: "star" - orig_arg: "--varVCFfile" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Read Parameters" - arguments: - - type: "string" - name: "--readFilesType" - description: "format of input read files\n\n- Fastx ... FASTA or FASTQ\n\ - - SAM SE ... SAM or BAM single-end reads; for BAM use --readFilesCommand\ - \ samtools view\n- SAM PE ... SAM or BAM paired-end reads; for BAM use\ - \ --readFilesCommand samtools view" - info: - step: "star" - orig_arg: "--readFilesType" - example: - - "Fastx" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--readFilesSAMattrKeep" - description: "for --readFilesType SAM SE/PE, which SAM tags to keep in the output\ - \ BAM, e.g.: --readFilesSAMtagsKeep RG PL\n\n- All ... keep all tags\n\ - - None ... do not keep any tags" - info: - step: "star" - orig_arg: "--readFilesSAMattrKeep" - example: - - "All" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "file" - name: "--readFilesManifest" - description: "path to the \"manifest\" file with the names of read files. The\ - \ manifest file should contain 3 tab-separated columns:\n\npaired-end reads:\ - \ read1_file_name $tab$ read2_file_name $tab$ read_group_line.\nsingle-end\ - \ reads: read1_file_name $tab$ - $tab$ read_group_line.\nSpaces,\ - \ but not tabs are allowed in file names.\nIf read_group_line does not start\ - \ with ID:, it can only contain one ID field, and ID: will be added to it.\n\ - If read_group_line starts with ID:, it can contain several fields separated\ - \ by $tab$, and all fields will be be copied verbatim into SAM @RG header\ - \ line." - info: - step: "star" - orig_arg: "--readFilesManifest" - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--readFilesPrefix" - description: "prefix for the read files names, i.e. it will be added in front\ - \ of the strings in --readFilesIn" - info: - step: "star" - orig_arg: "--readFilesPrefix" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--readFilesCommand" - description: "command line to execute for each of the input file. This command\ - \ should generate FASTA or FASTQ text and send it to stdout\n\nFor example:\ - \ zcat - to uncompress .gz files, bzcat - to uncompress .bz2 files, etc." - info: - step: "star" - orig_arg: "--readFilesCommand" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--readMapNumber" - description: "number of reads to map from the beginning of the file\n\n-1: map\ - \ all reads" - info: - step: "star" - orig_arg: "--readMapNumber" - example: - - -1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--readMatesLengthsIn" - description: "Equal/NotEqual - lengths of names,sequences,qualities for both\ - \ mates are the same / not the same. NotEqual is safe in all situations." - info: - step: "star" - orig_arg: "--readMatesLengthsIn" - example: - - "NotEqual" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--readNameSeparator" - description: "character(s) separating the part of the read names that will be\ - \ trimmed in output (read name after space is always trimmed)" - info: - step: "star" - orig_arg: "--readNameSeparator" - example: - - "/" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--readQualityScoreBase" - description: "number to be subtracted from the ASCII code to get Phred quality\ - \ score" - info: - step: "star" - orig_arg: "--readQualityScoreBase" - example: - - 33 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Read Clipping" - arguments: - - type: "string" - name: "--clipAdapterType" - description: "adapter clipping type\n\n- Hamming ... adapter clipping based\ - \ on Hamming distance, with the number of mismatches controlled by --clip5pAdapterMMp\n\ - - CellRanger4 ... 5p and 3p adapter clipping similar to CellRanger4. Utilizes\ - \ Opal package by Martin Sosic: https://github.com/Martinsos/opal\n- None\ - \ ... no adapter clipping, all other clip* parameters are disregarded" - info: - step: "star" - orig_arg: "--clipAdapterType" - example: - - "Hamming" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--clip3pNbases" - description: "number(s) of bases to clip from 3p of each mate. If one value\ - \ is given, it will be assumed the same for both mates." - info: - step: "star" - orig_arg: "--clip3pNbases" - example: - - 0 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--clip3pAdapterSeq" - description: "adapter sequences to clip from 3p of each mate. If one value\ - \ is given, it will be assumed the same for both mates.\n\n- polyA ... polyA\ - \ sequence with the length equal to read length" - info: - step: "star" - orig_arg: "--clip3pAdapterSeq" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "double" - name: "--clip3pAdapterMMp" - description: "max proportion of mismatches for 3p adapter clipping for each\ - \ mate. If one value is given, it will be assumed the same for both mates." - info: - step: "star" - orig_arg: "--clip3pAdapterMMp" - example: - - 0.1 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--clip3pAfterAdapterNbases" - description: "number of bases to clip from 3p of each mate after the adapter\ - \ clipping. If one value is given, it will be assumed the same for both mates." - info: - step: "star" - orig_arg: "--clip3pAfterAdapterNbases" - example: - - 0 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--clip5pNbases" - description: "number(s) of bases to clip from 5p of each mate. If one value\ - \ is given, it will be assumed the same for both mates." - info: - step: "star" - orig_arg: "--clip5pNbases" - example: - - 0 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - name: "Limits" - arguments: - - type: "long" - name: "--limitGenomeGenerateRAM" - description: "maximum available RAM (bytes) for genome generation" - info: - step: "star" - orig_arg: "--limitGenomeGenerateRAM" - example: - - 31000000000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "long" - name: "--limitIObufferSize" - description: "max available buffers size (bytes) for input/output, per thread" - info: - step: "star" - orig_arg: "--limitIObufferSize" - example: - - 30000000 - - 50000000 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "long" - name: "--limitOutSAMoneReadBytes" - description: "max size of the SAM record (bytes) for one read. Recommended value:\ - \ >(2*(LengthMate1+LengthMate2+100)*outFilterMultimapNmax" - info: - step: "star" - orig_arg: "--limitOutSAMoneReadBytes" - example: - - 100000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--limitOutSJoneRead" - description: "max number of junctions for one read (including all multi-mappers)" - info: - step: "star" - orig_arg: "--limitOutSJoneRead" - example: - - 1000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--limitOutSJcollapsed" - description: "max number of collapsed junctions" - info: - step: "star" - orig_arg: "--limitOutSJcollapsed" - example: - - 1000000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "long" - name: "--limitBAMsortRAM" - description: "maximum available RAM (bytes) for sorting BAM. If =0, it will\ - \ be set to the genome index size. 0 value can only be used with --genomeLoad\ - \ NoSharedMemory option." - info: - step: "star" - orig_arg: "--limitBAMsortRAM" - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--limitSjdbInsertNsj" - description: "maximum number of junctions to be inserted to the genome on the\ - \ fly at the mapping stage, including those from annotations and those detected\ - \ in the 1st step of the 2-pass run" - info: - step: "star" - orig_arg: "--limitSjdbInsertNsj" - example: - - 1000000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--limitNreadsSoft" - description: "soft limit on the number of reads" - info: - step: "star" - orig_arg: "--limitNreadsSoft" - example: - - -1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Output: general" - arguments: - - type: "string" - name: "--outTmpKeep" - description: "whether to keep the temporary files after STAR runs is finished\n\ - \n- None ... remove all temporary files\n- All ... keep all files" - info: - step: "star" - orig_arg: "--outTmpKeep" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outStd" - description: "which output will be directed to stdout (standard out)\n\n- Log\ - \ ... log messages\n- SAM ... alignments\ - \ in SAM format (which normally are output to Aligned.out.sam file), normal\ - \ standard output will go into Log.std.out\n- BAM_Unsorted ... alignments\ - \ in BAM format, unsorted. Requires --outSAMtype BAM Unsorted\n- BAM_SortedByCoordinate\ - \ ... alignments in BAM format, sorted by coordinate. Requires --outSAMtype\ - \ BAM SortedByCoordinate\n- BAM_Quant ... alignments to transcriptome\ - \ in BAM format, unsorted. Requires --quantMode TranscriptomeSAM" - info: - step: "star" - orig_arg: "--outStd" - example: - - "Log" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outReadsUnmapped" - description: "output of unmapped and partially mapped (i.e. mapped only one\ - \ mate of a paired end read) reads in separate file(s).\n\n- None ... no\ - \ output\n- Fastx ... output in separate fasta/fastq files, Unmapped.out.mate1/2" - info: - step: "star" - orig_arg: "--outReadsUnmapped" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outQSconversionAdd" - description: "add this number to the quality score (e.g. to convert from Illumina\ - \ to Sanger, use -31)" - info: - step: "star" - orig_arg: "--outQSconversionAdd" - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outMultimapperOrder" - description: "order of multimapping alignments in the output files\n\n- Old_2.4\ - \ ... quasi-random order used before 2.5.0\n- Random \ - \ ... random order of alignments for each multi-mapper. Read mates (pairs)\ - \ are always adjacent, all alignment for each read stay together. This option\ - \ will become default in the future releases." - info: - step: "star" - orig_arg: "--outMultimapperOrder" - example: - - "Old_2.4" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Output: SAM and BAM" - arguments: - - type: "string" - name: "--outSAMmode" - description: "mode of SAM output\n\n- None ... no SAM output\n- Full ... full\ - \ SAM output\n- NoQS ... full SAM but without quality scores" - info: - step: "star" - orig_arg: "--outSAMmode" - example: - - "Full" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outSAMstrandField" - description: "Cufflinks-like strand field flag\n\n- None ... not used\n\ - - intronMotif ... strand derived from the intron motif. This option changes\ - \ the output alignments: reads with inconsistent and/or non-canonical introns\ - \ are filtered out." - info: - step: "star" - orig_arg: "--outSAMstrandField" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outSAMattributes" - description: "a string of desired SAM attributes, in the order desired for the\ - \ output SAM. Tags can be listed in any combination/order.\n\n***Presets:\n\ - - None ... no attributes\n- Standard ... NH HI AS nM\n- All \ - \ ... NH HI AS nM NM MD jM jI MC ch\n***Alignment:\n- NH ...\ - \ number of loci the reads maps to: =1 for unique mappers, >1 for multimappers.\ - \ Standard SAM tag.\n- HI ... multiple alignment index, starts with\ - \ --outSAMattrIHstart (=1 by default). Standard SAM tag.\n- AS ...\ - \ local alignment score, +1/-1 for matches/mismateches, score* penalties for\ - \ indels and gaps. For PE reads, total score for two mates. Stadnard SAM tag.\n\ - - nM ... number of mismatches. For PE reads, sum over two mates.\n\ - - NM ... edit distance to the reference (number of mismatched + inserted\ - \ + deleted bases) for each mate. Standard SAM tag.\n- MD ... string\ - \ encoding mismatched and deleted reference bases (see standard SAM specifications).\ - \ Standard SAM tag.\n- jM ... intron motifs for all junctions (i.e.\ - \ N in CIGAR): 0: non-canonical; 1: GT/AG, 2: CT/AC, 3: GC/AG, 4: CT/GC, 5:\ - \ AT/AC, 6: GT/AT. If splice junctions database is used, and a junction is\ - \ annotated, 20 is added to its motif value.\n- jI ... start and\ - \ end of introns for all junctions (1-based).\n- XS ... alignment\ - \ strand according to --outSAMstrandField.\n- MC ... mate's CIGAR\ - \ string. Standard SAM tag.\n- ch ... marks all segment of all chimeric\ - \ alingments for --chimOutType WithinBAM output.\n- cN ... number\ - \ of bases clipped from the read ends: 5' and 3'\n***Variation:\n- vA \ - \ ... variant allele\n- vG ... genomic coordinate of the variant\ - \ overlapped by the read.\n- vW ... 1 - alignment passes WASP filtering;\ - \ 2,3,4,5,6,7 - alignment does not pass WASP filtering. Requires --waspOutputMode\ - \ SAMtag.\n***STARsolo:\n- CR CY UR UY ... sequences and quality scores of\ - \ cell barcodes and UMIs for the solo* demultiplexing.\n- GX GN ...\ - \ gene ID and gene name for unique-gene reads.\n- gx gn ... gene IDs\ - \ and gene names for unique- and multi-gene reads.\n- CB UB ... error-corrected\ - \ cell barcodes and UMIs for solo* demultiplexing. Requires --outSAMtype BAM\ - \ SortedByCoordinate.\n- sM ... assessment of CB and UMI.\n- sS \ - \ ... sequence of the entire barcode (CB,UMI,adapter).\n- sQ \ - \ ... quality of the entire barcode.\n***Unsupported/undocumented:\n-\ - \ ha ... haplotype (1/2) when mapping to the diploid genome. Requires\ - \ genome generated with --genomeTransformType Diploid .\n- rB ...\ - \ alignment block read/genomic coordinates.\n- vR ... read coordinate\ - \ of the variant." - info: - step: "star" - orig_arg: "--outSAMattributes" - example: - - "Standard" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--outSAMattrIHstart" - description: "start value for the IH attribute. 0 may be required by some downstream\ - \ software, such as Cufflinks or StringTie." - info: - step: "star" - orig_arg: "--outSAMattrIHstart" - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outSAMunmapped" - description: "output of unmapped reads in the SAM format\n\n1st word:\n- None\ - \ ... no output\n- Within ... output unmapped reads within the main SAM\ - \ file (i.e. Aligned.out.sam)\n2nd word:\n- KeepPairs ... record unmapped\ - \ mate for each alignment, and, in case of unsorted output, keep it adjacent\ - \ to its mapped mate. Only affects multi-mapping reads." - info: - step: "star" - orig_arg: "--outSAMunmapped" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--outSAMorder" - description: "type of sorting for the SAM output\n\nPaired: one mate after the\ - \ other for all paired alignments\nPairedKeepInputOrder: one mate after the\ - \ other for all paired alignments, the order is kept the same as in the input\ - \ FASTQ files" - info: - step: "star" - orig_arg: "--outSAMorder" - example: - - "Paired" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outSAMprimaryFlag" - description: "which alignments are considered primary - all others will be marked\ - \ with 0x100 bit in the FLAG\n\n- OneBestScore ... only one alignment with\ - \ the best score is primary\n- AllBestScore ... all alignments with the best\ - \ score are primary" - info: - step: "star" - orig_arg: "--outSAMprimaryFlag" - example: - - "OneBestScore" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outSAMreadID" - description: "read ID record type\n\n- Standard ... first word (until space)\ - \ from the FASTx read ID line, removing /1,/2 from the end\n- Number ...\ - \ read number (index) in the FASTx file" - info: - step: "star" - orig_arg: "--outSAMreadID" - example: - - "Standard" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outSAMmapqUnique" - description: "0 to 255: the MAPQ value for unique mappers" - info: - step: "star" - orig_arg: "--outSAMmapqUnique" - example: - - 255 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outSAMflagOR" - description: "0 to 65535: sam FLAG will be bitwise OR'd with this value, i.e.\ - \ FLAG=FLAG | outSAMflagOR. This is applied after all flags have been set\ - \ by STAR, and after outSAMflagAND. Can be used to set specific bits that\ - \ are not set otherwise." - info: - step: "star" - orig_arg: "--outSAMflagOR" - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outSAMflagAND" - description: "0 to 65535: sam FLAG will be bitwise AND'd with this value, i.e.\ - \ FLAG=FLAG & outSAMflagOR. This is applied after all flags have been set\ - \ by STAR, but before outSAMflagOR. Can be used to unset specific bits that\ - \ are not set otherwise." - info: - step: "star" - orig_arg: "--outSAMflagAND" - example: - - 65535 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outSAMattrRGline" - description: "SAM/BAM read group line. The first word contains the read group\ - \ identifier and must start with \"ID:\", e.g. --outSAMattrRGline ID:xxx CN:yy\ - \ \"DS:z z z\".\n\nxxx will be added as RG tag to each output alignment. Any\ - \ spaces in the tag values have to be double quoted.\nComma separated RG lines\ - \ correspons to different (comma separated) input files in --readFilesIn.\ - \ Commas have to be surrounded by spaces, e.g.\n--outSAMattrRGline ID:xxx\ - \ , ID:zzz \"DS:z z\" , ID:yyy DS:yyyy" - info: - step: "star" - orig_arg: "--outSAMattrRGline" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--outSAMheaderHD" - description: "@HD (header) line of the SAM header" - info: - step: "star" - orig_arg: "--outSAMheaderHD" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--outSAMheaderPG" - description: "extra @PG (software) line of the SAM header (in addition to STAR)" - info: - step: "star" - orig_arg: "--outSAMheaderPG" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--outSAMheaderCommentFile" - description: "path to the file with @CO (comment) lines of the SAM header" - info: - step: "star" - orig_arg: "--outSAMheaderCommentFile" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outSAMfilter" - description: "filter the output into main SAM/BAM files\n\n- KeepOnlyAddedReferences\ - \ ... only keep the reads for which all alignments are to the extra reference\ - \ sequences added with --genomeFastaFiles at the mapping stage.\n- KeepAllAddedReferences\ - \ ... keep all alignments to the extra reference sequences added with --genomeFastaFiles\ - \ at the mapping stage." - info: - step: "star" - orig_arg: "--outSAMfilter" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--outSAMmultNmax" - description: "max number of multiple alignments for a read that will be output\ - \ to the SAM/BAM files. Note that if this value is not equal to -1, the top\ - \ scoring alignment will be output first\n\n- -1 ... all alignments (up to\ - \ --outFilterMultimapNmax) will be output" - info: - step: "star" - orig_arg: "--outSAMmultNmax" - example: - - -1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outSAMtlen" - description: "calculation method for the TLEN field in the SAM/BAM files\n\n\ - - 1 ... leftmost base of the (+)strand mate to rightmost base of the (-)mate.\ - \ (+)sign for the (+)strand mate\n- 2 ... leftmost base of any mate to rightmost\ - \ base of any mate. (+)sign for the mate with the leftmost base. This is different\ - \ from 1 for overlapping mates with protruding ends" - info: - step: "star" - orig_arg: "--outSAMtlen" - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outBAMcompression" - description: "-1 to 10 BAM compression level, -1=default compression (6?),\ - \ 0=no compression, 10=maximum compression" - info: - step: "star" - orig_arg: "--outBAMcompression" - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outBAMsortingThreadN" - description: ">=0: number of threads for BAM sorting. 0 will default to min(6,--runThreadN)." - info: - step: "star" - orig_arg: "--outBAMsortingThreadN" - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outBAMsortingBinsN" - description: ">0: number of genome bins for coordinate-sorting" - info: - step: "star" - orig_arg: "--outBAMsortingBinsN" - example: - - 50 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "BAM processing" - arguments: - - type: "string" - name: "--bamRemoveDuplicatesType" - description: "mark duplicates in the BAM file, for now only works with (i) sorted\ - \ BAM fed with inputBAMfile, and (ii) for paired-end alignments only\n\n-\ - \ - ... no duplicate removal/marking\n- UniqueIdentical\ - \ ... mark all multimappers, and duplicate unique mappers. The coordinates,\ - \ FLAG, CIGAR must be identical\n- UniqueIdenticalNotMulti ... mark duplicate\ - \ unique mappers but not multimappers." - info: - step: "star" - orig_arg: "--bamRemoveDuplicatesType" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--bamRemoveDuplicatesMate2basesN" - description: "number of bases from the 5' of mate 2 to use in collapsing (e.g.\ - \ for RAMPAGE)" - info: - step: "star" - orig_arg: "--bamRemoveDuplicatesMate2basesN" - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Output Wiggle" - arguments: - - type: "string" - name: "--outWigType" - description: "type of signal output, e.g. \"bedGraph\" OR \"bedGraph read1_5p\"\ - . Requires sorted BAM: --outSAMtype BAM SortedByCoordinate .\n\n1st word:\n\ - - None ... no signal output\n- bedGraph ... bedGraph format\n- wiggle\ - \ ... wiggle format\n2nd word:\n- read1_5p ... signal from only 5' of\ - \ the 1st read, useful for CAGE/RAMPAGE etc\n- read2 ... signal from\ - \ only 2nd read" - info: - step: "star" - orig_arg: "--outWigType" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--outWigStrand" - description: "strandedness of wiggle/bedGraph output\n\n- Stranded ... separate\ - \ strands, str1 and str2\n- Unstranded ... collapsed strands" - info: - step: "star" - orig_arg: "--outWigStrand" - example: - - "Stranded" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outWigReferencesPrefix" - description: "prefix matching reference names to include in the output wiggle\ - \ file, e.g. \"chr\", default \"-\" - include all references" - info: - step: "star" - orig_arg: "--outWigReferencesPrefix" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outWigNorm" - description: "type of normalization for the signal\n\n- RPM ... reads per\ - \ million of mapped reads\n- None ... no normalization, \"raw\" counts" - info: - step: "star" - orig_arg: "--outWigNorm" - example: - - "RPM" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Output Filtering" - arguments: - - type: "string" - name: "--outFilterType" - description: "type of filtering\n\n- Normal ... standard filtering using only\ - \ current alignment\n- BySJout ... keep only those reads that contain junctions\ - \ that passed filtering into SJ.out.tab" - info: - step: "star" - orig_arg: "--outFilterType" - example: - - "Normal" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outFilterMultimapScoreRange" - description: "the score range below the maximum score for multimapping alignments" - info: - step: "star" - orig_arg: "--outFilterMultimapScoreRange" - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outFilterMultimapNmax" - description: "maximum number of loci the read is allowed to map to. Alignments\ - \ (all of them) will be output only if the read maps to no more loci than\ - \ this value.\n\nOtherwise no alignments will be output, and the read will\ - \ be counted as \"mapped to too many loci\" in the Log.final.out ." - info: - step: "star" - orig_arg: "--outFilterMultimapNmax" - example: - - 10 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outFilterMismatchNmax" - description: "alignment will be output only if it has no more mismatches than\ - \ this value." - info: - step: "star" - orig_arg: "--outFilterMismatchNmax" - example: - - 10 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--outFilterMismatchNoverLmax" - description: "alignment will be output only if its ratio of mismatches to *mapped*\ - \ length is less than or equal to this value." - info: - step: "star" - orig_arg: "--outFilterMismatchNoverLmax" - example: - - 0.3 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--outFilterMismatchNoverReadLmax" - description: "alignment will be output only if its ratio of mismatches to *read*\ - \ length is less than or equal to this value." - info: - step: "star" - orig_arg: "--outFilterMismatchNoverReadLmax" - example: - - 1.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outFilterScoreMin" - description: "alignment will be output only if its score is higher than or equal\ - \ to this value." - info: - step: "star" - orig_arg: "--outFilterScoreMin" - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--outFilterScoreMinOverLread" - description: "same as outFilterScoreMin, but normalized to read length (sum\ - \ of mates' lengths for paired-end reads)" - info: - step: "star" - orig_arg: "--outFilterScoreMinOverLread" - example: - - 0.66 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outFilterMatchNmin" - description: "alignment will be output only if the number of matched bases is\ - \ higher than or equal to this value." - info: - step: "star" - orig_arg: "--outFilterMatchNmin" - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--outFilterMatchNminOverLread" - description: "sam as outFilterMatchNmin, but normalized to the read length (sum\ - \ of mates' lengths for paired-end reads)." - info: - step: "star" - orig_arg: "--outFilterMatchNminOverLread" - example: - - 0.66 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outFilterIntronMotifs" - description: "filter alignment using their motifs\n\n- None \ - \ ... no filtering\n- RemoveNoncanonical ... filter\ - \ out alignments that contain non-canonical junctions\n- RemoveNoncanonicalUnannotated\ - \ ... filter out alignments that contain non-canonical unannotated junctions\ - \ when using annotated splice junctions database. The annotated non-canonical\ - \ junctions will be kept." - info: - step: "star" - orig_arg: "--outFilterIntronMotifs" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outFilterIntronStrands" - description: "filter alignments\n\n- RemoveInconsistentStrands ... remove\ - \ alignments that have junctions with inconsistent strands\n- None \ - \ ... no filtering" - info: - step: "star" - orig_arg: "--outFilterIntronStrands" - example: - - "RemoveInconsistentStrands" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Output splice junctions (SJ.out.tab)" - arguments: - - type: "string" - name: "--outSJtype" - description: "type of splice junction output\n\n- Standard ... standard SJ.out.tab\ - \ output\n- None ... no splice junction output" - info: - step: "star" - orig_arg: "--outSJtype" - example: - - "Standard" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Output Filtering: Splice Junctions" - arguments: - - type: "string" - name: "--outSJfilterReads" - description: "which reads to consider for collapsed splice junctions output\n\ - \n- All ... all reads, unique- and multi-mappers\n- Unique ... uniquely\ - \ mapping reads only" - info: - step: "star" - orig_arg: "--outSJfilterReads" - example: - - "All" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outSJfilterOverhangMin" - description: "minimum overhang length for splice junctions on both sides for:\ - \ (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC\ - \ motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\n\ - does not apply to annotated junctions" - info: - step: "star" - orig_arg: "--outSJfilterOverhangMin" - example: - - 30 - - 12 - - 12 - - 12 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--outSJfilterCountUniqueMin" - description: "minimum uniquely mapping read count per junction for: (1) non-canonical\ - \ motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC\ - \ and GT/AT motif. -1 means no output for that motif\n\nJunctions are output\ - \ if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin conditions\ - \ are satisfied\ndoes not apply to annotated junctions" - info: - step: "star" - orig_arg: "--outSJfilterCountUniqueMin" - example: - - 3 - - 1 - - 1 - - 1 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--outSJfilterCountTotalMin" - description: "minimum total (multi-mapping+unique) read count per junction for:\ - \ (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC\ - \ motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\n\ - Junctions are output if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin\ - \ conditions are satisfied\ndoes not apply to annotated junctions" - info: - step: "star" - orig_arg: "--outSJfilterCountTotalMin" - example: - - 3 - - 1 - - 1 - - 1 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--outSJfilterDistToOtherSJmin" - description: "minimum allowed distance to other junctions' donor/acceptor\n\n\ - does not apply to annotated junctions" - info: - step: "star" - orig_arg: "--outSJfilterDistToOtherSJmin" - example: - - 10 - - 0 - - 5 - - 10 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--outSJfilterIntronMaxVsReadN" - description: "maximum gap allowed for junctions supported by 1,2,3,,,N reads\n\ - \ni.e. by default junctions supported by 1 read can have gaps <=50000b, by\ - \ 2 reads: <=100000b, by 3 reads: <=200000. by >=4 reads any gap <=alignIntronMax\n\ - does not apply to annotated junctions" - info: - step: "star" - orig_arg: "--outSJfilterIntronMaxVsReadN" - example: - - 50000 - - 100000 - - 200000 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - name: "Scoring" - arguments: - - type: "integer" - name: "--scoreGap" - description: "splice junction penalty (independent on intron motif)" - info: - step: "star" - orig_arg: "--scoreGap" - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--scoreGapNoncan" - description: "non-canonical junction penalty (in addition to scoreGap)" - info: - step: "star" - orig_arg: "--scoreGapNoncan" - example: - - -8 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--scoreGapGCAG" - description: "GC/AG and CT/GC junction penalty (in addition to scoreGap)" - info: - step: "star" - orig_arg: "--scoreGapGCAG" - example: - - -4 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--scoreGapATAC" - description: "AT/AC and GT/AT junction penalty (in addition to scoreGap)" - info: - step: "star" - orig_arg: "--scoreGapATAC" - example: - - -8 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--scoreGenomicLengthLog2scale" - description: "extra score logarithmically scaled with genomic length of the\ - \ alignment: scoreGenomicLengthLog2scale*log2(genomicLength)" - info: - step: "star" - orig_arg: "--scoreGenomicLengthLog2scale" - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--scoreDelOpen" - description: "deletion open penalty" - info: - step: "star" - orig_arg: "--scoreDelOpen" - example: - - -2 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--scoreDelBase" - description: "deletion extension penalty per base (in addition to scoreDelOpen)" - info: - step: "star" - orig_arg: "--scoreDelBase" - example: - - -2 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--scoreInsOpen" - description: "insertion open penalty" - info: - step: "star" - orig_arg: "--scoreInsOpen" - example: - - -2 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--scoreInsBase" - description: "insertion extension penalty per base (in addition to scoreInsOpen)" - info: - step: "star" - orig_arg: "--scoreInsBase" - example: - - -2 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--scoreStitchSJshift" - description: "maximum score reduction while searching for SJ boundaries in the\ - \ stitching step" - info: - step: "star" - orig_arg: "--scoreStitchSJshift" - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Alignments and Seeding" - arguments: - - type: "integer" - name: "--seedSearchStartLmax" - description: "defines the search start point through the read - the read is\ - \ split into pieces no longer than this value" - info: - step: "star" - orig_arg: "--seedSearchStartLmax" - example: - - 50 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--seedSearchStartLmaxOverLread" - description: "seedSearchStartLmax normalized to read length (sum of mates' lengths\ - \ for paired-end reads)" - info: - step: "star" - orig_arg: "--seedSearchStartLmaxOverLread" - example: - - 1.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--seedSearchLmax" - description: "defines the maximum length of the seeds, if =0 seed length is\ - \ not limited" - info: - step: "star" - orig_arg: "--seedSearchLmax" - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--seedMultimapNmax" - description: "only pieces that map fewer than this value are utilized in the\ - \ stitching procedure" - info: - step: "star" - orig_arg: "--seedMultimapNmax" - example: - - 10000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--seedPerReadNmax" - description: "max number of seeds per read" - info: - step: "star" - orig_arg: "--seedPerReadNmax" - example: - - 1000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--seedPerWindowNmax" - description: "max number of seeds per window" - info: - step: "star" - orig_arg: "--seedPerWindowNmax" - example: - - 50 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--seedNoneLociPerWindow" - description: "max number of one seed loci per window" - info: - step: "star" - orig_arg: "--seedNoneLociPerWindow" - example: - - 10 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--seedSplitMin" - description: "min length of the seed sequences split by Ns or mate gap" - info: - step: "star" - orig_arg: "--seedSplitMin" - example: - - 12 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--seedMapMin" - description: "min length of seeds to be mapped" - info: - step: "star" - orig_arg: "--seedMapMin" - example: - - 5 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--alignIntronMin" - description: "minimum intron size, genomic gap is considered intron if its length>=alignIntronMin,\ - \ otherwise it is considered Deletion" - info: - step: "star" - orig_arg: "--alignIntronMin" - example: - - 21 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--alignIntronMax" - description: "maximum intron size, if 0, max intron size will be determined\ - \ by (2^winBinNbits)*winAnchorDistNbins" - info: - step: "star" - orig_arg: "--alignIntronMax" - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--alignMatesGapMax" - description: "maximum gap between two mates, if 0, max intron gap will be determined\ - \ by (2^winBinNbits)*winAnchorDistNbins" - info: - step: "star" - orig_arg: "--alignMatesGapMax" - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--alignSJoverhangMin" - description: "minimum overhang (i.e. block size) for spliced alignments" - info: - step: "star" - orig_arg: "--alignSJoverhangMin" - example: - - 5 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--alignSJstitchMismatchNmax" - description: "maximum number of mismatches for stitching of the splice junctions\ - \ (-1: no limit).\n\n(1) non-canonical motifs, (2) GT/AG and CT/AC motif,\ - \ (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif." - info: - step: "star" - orig_arg: "--alignSJstitchMismatchNmax" - example: - - 0 - - -1 - - 0 - - 0 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--alignSJDBoverhangMin" - description: "minimum overhang (i.e. block size) for annotated (sjdb) spliced\ - \ alignments" - info: - step: "star" - orig_arg: "--alignSJDBoverhangMin" - example: - - 3 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--alignSplicedMateMapLmin" - description: "minimum mapped length for a read mate that is spliced" - info: - step: "star" - orig_arg: "--alignSplicedMateMapLmin" - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--alignSplicedMateMapLminOverLmate" - description: "alignSplicedMateMapLmin normalized to mate length" - info: - step: "star" - orig_arg: "--alignSplicedMateMapLminOverLmate" - example: - - 0.66 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--alignWindowsPerReadNmax" - description: "max number of windows per read" - info: - step: "star" - orig_arg: "--alignWindowsPerReadNmax" - example: - - 10000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--alignTranscriptsPerWindowNmax" - description: "max number of transcripts per window" - info: - step: "star" - orig_arg: "--alignTranscriptsPerWindowNmax" - example: - - 100 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--alignTranscriptsPerReadNmax" - description: "max number of different alignments per read to consider" - info: - step: "star" - orig_arg: "--alignTranscriptsPerReadNmax" - example: - - 10000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--alignEndsType" - description: "type of read ends alignment\n\n- Local ... standard\ - \ local alignment with soft-clipping allowed\n- EndToEnd ... force\ - \ end-to-end read alignment, do not soft-clip\n- Extend5pOfRead1 ... fully\ - \ extend only the 5p of the read1, all other ends: local alignment\n- Extend5pOfReads12\ - \ ... fully extend only the 5p of the both read1 and read2, all other ends:\ - \ local alignment" - info: - step: "star" - orig_arg: "--alignEndsType" - example: - - "Local" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--alignEndsProtrude" - description: "allow protrusion of alignment ends, i.e. start (end) of the +strand\ - \ mate downstream of the start (end) of the -strand mate\n\n1st word: int:\ - \ maximum number of protrusion bases allowed\n2nd word: string:\n- \ - \ ConcordantPair ... report alignments with non-zero protrusion\ - \ as concordant pairs\n- DiscordantPair ... report alignments\ - \ with non-zero protrusion as discordant pairs" - info: - step: "star" - orig_arg: "--alignEndsProtrude" - example: - - "0 ConcordantPair" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--alignSoftClipAtReferenceEnds" - description: "allow the soft-clipping of the alignments past the end of the\ - \ chromosomes\n\n- Yes ... allow\n- No ... prohibit, useful for compatibility\ - \ with Cufflinks" - info: - step: "star" - orig_arg: "--alignSoftClipAtReferenceEnds" - example: - - "Yes" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--alignInsertionFlush" - description: "how to flush ambiguous insertion positions\n\n- None ... insertions\ - \ are not flushed\n- Right ... insertions are flushed to the right" - info: - step: "star" - orig_arg: "--alignInsertionFlush" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Paired-End reads" - arguments: - - type: "integer" - name: "--peOverlapNbasesMin" - description: "minimum number of overlapping bases to trigger mates merging and\ - \ realignment. Specify >0 value to switch on the \"merginf of overlapping\ - \ mates\" algorithm." - info: - step: "star" - orig_arg: "--peOverlapNbasesMin" - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--peOverlapMMp" - description: "maximum proportion of mismatched bases in the overlap area" - info: - step: "star" - orig_arg: "--peOverlapMMp" - example: - - 0.01 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Windows, Anchors, Binning" - arguments: - - type: "integer" - name: "--winAnchorMultimapNmax" - description: "max number of loci anchors are allowed to map to" - info: - step: "star" - orig_arg: "--winAnchorMultimapNmax" - example: - - 50 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--winBinNbits" - description: "=log2(winBin), where winBin is the size of the bin for the windows/clustering,\ - \ each window will occupy an integer number of bins." - info: - step: "star" - orig_arg: "--winBinNbits" - example: - - 16 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--winAnchorDistNbins" - description: "max number of bins between two anchors that allows aggregation\ - \ of anchors into one window" - info: - step: "star" - orig_arg: "--winAnchorDistNbins" - example: - - 9 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--winFlankNbins" - description: "log2(winFlank), where win Flank is the size of the left and right\ - \ flanking regions for each window" - info: - step: "star" - orig_arg: "--winFlankNbins" - example: - - 4 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--winReadCoverageRelativeMin" - description: "minimum relative coverage of the read sequence by the seeds in\ - \ a window, for STARlong algorithm only." - info: - step: "star" - orig_arg: "--winReadCoverageRelativeMin" - example: - - 0.5 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--winReadCoverageBasesMin" - description: "minimum number of bases covered by the seeds in a window , for\ - \ STARlong algorithm only." - info: - step: "star" - orig_arg: "--winReadCoverageBasesMin" - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Chimeric Alignments" - arguments: - - type: "string" - name: "--chimOutType" - description: "type of chimeric output\n\n- Junctions ... Chimeric.out.junction\n\ - - SeparateSAMold ... output old SAM into separate Chimeric.out.sam file\n\ - - WithinBAM ... output into main aligned BAM files (Aligned.*.bam)\n\ - - WithinBAM HardClip ... (default) hard-clipping in the CIGAR for supplemental\ - \ chimeric alignments (default if no 2nd word is present)\n- WithinBAM SoftClip\ - \ ... soft-clipping in the CIGAR for supplemental chimeric alignments" - info: - step: "star" - orig_arg: "--chimOutType" - example: - - "Junctions" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--chimSegmentMin" - description: "minimum length of chimeric segment length, if ==0, no chimeric\ - \ output" - info: - step: "star" - orig_arg: "--chimSegmentMin" - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimScoreMin" - description: "minimum total (summed) score of the chimeric segments" - info: - step: "star" - orig_arg: "--chimScoreMin" - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimScoreDropMax" - description: "max drop (difference) of chimeric score (the sum of scores of\ - \ all chimeric segments) from the read length" - info: - step: "star" - orig_arg: "--chimScoreDropMax" - example: - - 20 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimScoreSeparation" - description: "minimum difference (separation) between the best chimeric score\ - \ and the next one" - info: - step: "star" - orig_arg: "--chimScoreSeparation" - example: - - 10 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimScoreJunctionNonGTAG" - description: "penalty for a non-GT/AG chimeric junction" - info: - step: "star" - orig_arg: "--chimScoreJunctionNonGTAG" - example: - - -1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimJunctionOverhangMin" - description: "minimum overhang for a chimeric junction" - info: - step: "star" - orig_arg: "--chimJunctionOverhangMin" - example: - - 20 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimSegmentReadGapMax" - description: "maximum gap in the read sequence between chimeric segments" - info: - step: "star" - orig_arg: "--chimSegmentReadGapMax" - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--chimFilter" - description: "different filters for chimeric alignments\n\n- None ... no filtering\n\ - - banGenomicN ... Ns are not allowed in the genome sequence around the chimeric\ - \ junction" - info: - step: "star" - orig_arg: "--chimFilter" - example: - - "banGenomicN" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--chimMainSegmentMultNmax" - description: "maximum number of multi-alignments for the main chimeric segment.\ - \ =1 will prohibit multimapping main segments." - info: - step: "star" - orig_arg: "--chimMainSegmentMultNmax" - example: - - 10 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimMultimapNmax" - description: "maximum number of chimeric multi-alignments\n\n- 0 ... use the\ - \ old scheme for chimeric detection which only considered unique alignments" - info: - step: "star" - orig_arg: "--chimMultimapNmax" - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimMultimapScoreRange" - description: "the score range for multi-mapping chimeras below the best chimeric\ - \ score. Only works with --chimMultimapNmax > 1" - info: - step: "star" - orig_arg: "--chimMultimapScoreRange" - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimNonchimScoreDropMin" - description: "to trigger chimeric detection, the drop in the best non-chimeric\ - \ alignment score with respect to the read length has to be greater than this\ - \ value" - info: - step: "star" - orig_arg: "--chimNonchimScoreDropMin" - example: - - 20 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimOutJunctionFormat" - description: "formatting type for the Chimeric.out.junction file\n\n- 0 ...\ - \ no comment lines/headers\n- 1 ... comment lines at the end of the file:\ - \ command line and Nreads: total, unique/multi-mapping" - info: - step: "star" - orig_arg: "--chimOutJunctionFormat" - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Quantification of Annotations" - arguments: - - type: "string" - name: "--quantMode" - description: "types of quantification requested\n\n- - ... none\n\ - - TranscriptomeSAM ... output SAM/BAM alignments to transcriptome into a separate\ - \ file\n- GeneCounts ... count reads per gene" - info: - step: "star" - orig_arg: "--quantMode" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--quantTranscriptomeBAMcompression" - description: "-2 to 10 transcriptome BAM compression level\n\n- -2 ... no\ - \ BAM output\n- -1 ... default compression (6?)\n- 0 ... no compression\n\ - - 10 ... maximum compression" - info: - step: "star" - orig_arg: "--quantTranscriptomeBAMcompression" - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--quantTranscriptomeBan" - description: "prohibit various alignment type\n\n- IndelSoftclipSingleend ...\ - \ prohibit indels, soft clipping and single-end alignments - compatible with\ - \ RSEM\n- Singleend ... prohibit single-end alignments" - info: - step: "star" - orig_arg: "--quantTranscriptomeBan" - example: - - "IndelSoftclipSingleend" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "2-pass Mapping" - arguments: - - type: "string" - name: "--twopassMode" - description: "2-pass mapping mode.\n\n- None ... 1-pass mapping\n- Basic\ - \ ... basic 2-pass mapping, with all 1st pass junctions inserted into\ - \ the genome indices on the fly" - info: - step: "star" - orig_arg: "--twopassMode" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--twopass1readsN" - description: "number of reads to process for the 1st step. Use very large number\ - \ (or default -1) to map all reads in the first step." - info: - step: "star" - orig_arg: "--twopass1readsN" - example: - - -1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "WASP parameters" - arguments: - - type: "string" - name: "--waspOutputMode" - description: "WASP allele-specific output type. This is re-implementation of\ - \ the original WASP mappability filtering by Bryce van de Geijn, Graham McVicker,\ - \ Yoav Gilad & Jonathan K Pritchard. Please cite the original WASP paper:\ - \ Nature Methods 12, 1061-1063 (2015), https://www.nature.com/articles/nmeth.3582\ - \ .\n\n- SAMtag ... add WASP tags to the alignments that pass WASP filtering" - info: - step: "star" - orig_arg: "--waspOutputMode" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "STARsolo (single cell RNA-seq) parameters" - arguments: - - type: "string" - name: "--soloType" - description: "type of single-cell RNA-seq\n\n- CB_UMI_Simple ... (a.k.a. Droplet)\ - \ one UMI and one Cell Barcode of fixed length in read2, e.g. Drop-seq and\ - \ 10X Chromium.\n- CB_UMI_Complex ... multiple Cell Barcodes of varying length,\ - \ one UMI of fixed length and one adapter sequence of fixed length are allowed\ - \ in read2 only (e.g. inDrop, ddSeq).\n- CB_samTagOut ... output Cell Barcode\ - \ as CR and/or CB SAm tag. No UMI counting. --readFilesIn cDNA_read1 [cDNA_read2\ - \ if paired-end] CellBarcode_read . Requires --outSAMtype BAM Unsorted [and/or\ - \ SortedByCoordinate]\n- SmartSeq ... Smart-seq: each cell in a separate\ - \ FASTQ (paired- or single-end), barcodes are corresponding read-groups, no\ - \ UMI sequences, alignments deduplicated according to alignment start and\ - \ end (after extending soft-clipped bases)" - info: - step: "star" - orig_arg: "--soloType" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloCBwhitelist" - description: "file(s) with whitelist(s) of cell barcodes. Only --soloType CB_UMI_Complex\ - \ allows more than one whitelist file.\n\n- None ... no whitelist:\ - \ all cell barcodes are allowed" - info: - step: "star" - orig_arg: "--soloCBwhitelist" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--soloCBstart" - description: "cell barcode start base" - info: - step: "star" - orig_arg: "--soloCBstart" - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--soloCBlen" - description: "cell barcode length" - info: - step: "star" - orig_arg: "--soloCBlen" - example: - - 16 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--soloUMIstart" - description: "UMI start base" - info: - step: "star" - orig_arg: "--soloUMIstart" - example: - - 17 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--soloUMIlen" - description: "UMI length" - info: - step: "star" - orig_arg: "--soloUMIlen" - example: - - 10 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--soloBarcodeReadLength" - description: "length of the barcode read\n\n- 1 ... equal to sum of soloCBlen+soloUMIlen\n\ - - 0 ... not defined, do not check" - info: - step: "star" - orig_arg: "--soloBarcodeReadLength" - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--soloBarcodeMate" - description: "identifies which read mate contains the barcode (CB+UMI) sequence\n\ - \n- 0 ... barcode sequence is on separate read, which should always be the\ - \ last file in the --readFilesIn listed\n- 1 ... barcode sequence is a part\ - \ of mate 1\n- 2 ... barcode sequence is a part of mate 2" - info: - step: "star" - orig_arg: "--soloBarcodeMate" - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--soloCBposition" - description: "position of Cell Barcode(s) on the barcode read.\n\nPresently\ - \ only works with --soloType CB_UMI_Complex, and barcodes are assumed to be\ - \ on Read2.\nFormat for each barcode: startAnchor_startPosition_endAnchor_endPosition\n\ - start(end)Anchor defines the Anchor Base for the CB: 0: read start; 1: read\ - \ end; 2: adapter start; 3: adapter end\nstart(end)Position is the 0-based\ - \ position with of the CB start(end) with respect to the Anchor Base\nString\ - \ for different barcodes are separated by space.\nExample: inDrop (Zilionis\ - \ et al, Nat. Protocols, 2017):\n--soloCBposition 0_0_2_-1 3_1_3_8" - info: - step: "star" - orig_arg: "--soloCBposition" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloUMIposition" - description: "position of the UMI on the barcode read, same as soloCBposition\n\ - \nExample: inDrop (Zilionis et al, Nat. Protocols, 2017):\n--soloCBposition\ - \ 3_9_3_14" - info: - step: "star" - orig_arg: "--soloUMIposition" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--soloAdapterSequence" - description: "adapter sequence to anchor barcodes. Only one adapter sequence\ - \ is allowed." - info: - step: "star" - orig_arg: "--soloAdapterSequence" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--soloAdapterMismatchesNmax" - description: "maximum number of mismatches allowed in adapter sequence." - info: - step: "star" - orig_arg: "--soloAdapterMismatchesNmax" - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--soloCBmatchWLtype" - description: "matching the Cell Barcodes to the WhiteList\n\n- Exact \ - \ ... only exact matches allowed\n- 1MM \ - \ ... only one match in whitelist with 1 mismatched base allowed.\ - \ Allowed CBs have to have at least one read with exact match.\n- 1MM_multi\ - \ ... multiple matches in whitelist with 1 mismatched\ - \ base allowed, posterior probability calculation is used choose one of the\ - \ matches.\nAllowed CBs have to have at least one read with exact match. This\ - \ option matches best with CellRanger 2.2.0\n- 1MM_multi_pseudocounts \ - \ ... same as 1MM_Multi, but pseudocounts of 1 are added to all whitelist\ - \ barcodes.\n- 1MM_multi_Nbase_pseudocounts ... same as 1MM_multi_pseudocounts,\ - \ multimatching to WL is allowed for CBs with N-bases. This option matches\ - \ best with CellRanger >= 3.0.0\n- EditDist_2 ... allow\ - \ up to edit distance of 3 fpr each of the barcodes. May include one deletion\ - \ + one insertion. Only works with --soloType CB_UMI_Complex. Matches to multiple\ - \ passlist barcdoes are not allowed. Similar to ParseBio Split-seq pipeline." - info: - step: "star" - orig_arg: "--soloCBmatchWLtype" - example: - - "1MM_multi" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--soloInputSAMattrBarcodeSeq" - description: "when inputting reads from a SAM file (--readsFileType SAM SE/PE),\ - \ these SAM attributes mark the barcode sequence (in proper order).\n\nFor\ - \ instance, for 10X CellRanger or STARsolo BAMs, use --soloInputSAMattrBarcodeSeq\ - \ CR UR .\nThis parameter is required when running STARsolo with input from\ - \ SAM." - info: - step: "star" - orig_arg: "--soloInputSAMattrBarcodeSeq" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloInputSAMattrBarcodeQual" - description: "when inputting reads from a SAM file (--readsFileType SAM SE/PE),\ - \ these SAM attributes mark the barcode qualities (in proper order).\n\nFor\ - \ instance, for 10X CellRanger or STARsolo BAMs, use --soloInputSAMattrBarcodeQual\ - \ CY UY .\nIf this parameter is '-' (default), the quality 'H' will be assigned\ - \ to all bases." - info: - step: "star" - orig_arg: "--soloInputSAMattrBarcodeQual" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloStrand" - description: "strandedness of the solo libraries:\n\n- Unstranded ... no strand\ - \ information\n- Forward ... read strand same as the original RNA molecule\n\ - - Reverse ... read strand opposite to the original RNA molecule" - info: - step: "star" - orig_arg: "--soloStrand" - example: - - "Forward" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--soloFeatures" - description: "genomic features for which the UMI counts per Cell Barcode are\ - \ collected\n\n- Gene ... genes: reads match the gene transcript\n\ - - SJ ... splice junctions: reported in SJ.out.tab\n- GeneFull\ - \ ... full gene (pre-mRNA): count all reads overlapping genes' exons\ - \ and introns\n- GeneFull_ExonOverIntron ... full gene (pre-mRNA): count all\ - \ reads overlapping genes' exons and introns: prioritize 100% overlap with\ - \ exons\n- GeneFull_Ex50pAS ... full gene (pre-RNA): count all reads\ - \ overlapping genes' exons and introns: prioritize >50% overlap with exons.\ - \ Do not count reads with 100% exonic overlap in the antisense direction." - info: - step: "star" - orig_arg: "--soloFeatures" - example: - - "Gene" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloMultiMappers" - description: "counting method for reads mapping to multiple genes\n\n- Unique\ - \ ... count only reads that map to unique genes\n- Uniform ... uniformly\ - \ distribute multi-genic UMIs to all genes\n- Rescue ... distribute UMIs\ - \ proportionally to unique+uniform counts (~ first iteration of EM)\n- PropUnique\ - \ ... distribute UMIs proportionally to unique mappers, if present, and uniformly\ - \ if not.\n- EM ... multi-gene UMIs are distributed using Expectation\ - \ Maximization algorithm" - info: - step: "star" - orig_arg: "--soloMultiMappers" - example: - - "Unique" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloUMIdedup" - description: "type of UMI deduplication (collapsing) algorithm\n\n- 1MM_All\ - \ ... all UMIs with 1 mismatch distance to each other\ - \ are collapsed (i.e. counted once).\n- 1MM_Directional_UMItools ... follows\ - \ the \"directional\" method from the UMI-tools by Smith, Heger and Sudbery\ - \ (Genome Research 2017).\n- 1MM_Directional ... same as 1MM_Directional_UMItools,\ - \ but with more stringent criteria for duplicate UMIs\n- Exact \ - \ ... only exactly matching UMIs are collapsed.\n- NoDedup \ - \ ... no deduplication of UMIs, count all reads.\n- 1MM_CR\ - \ ... CellRanger2-4 algorithm for 1MM UMI collapsing." - info: - step: "star" - orig_arg: "--soloUMIdedup" - example: - - "1MM_All" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloUMIfiltering" - description: "type of UMI filtering (for reads uniquely mapping to genes)\n\n\ - - - ... basic filtering: remove UMIs with N and homopolymers\ - \ (similar to CellRanger 2.2.0).\n- MultiGeneUMI ... basic + remove\ - \ lower-count UMIs that map to more than one gene.\n- MultiGeneUMI_All ...\ - \ basic + remove all UMIs that map to more than one gene.\n- MultiGeneUMI_CR\ - \ ... basic + remove lower-count UMIs that map to more than one gene, matching\ - \ CellRanger > 3.0.0 .\nOnly works with --soloUMIdedup 1MM_CR" - info: - step: "star" - orig_arg: "--soloUMIfiltering" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloOutFileNames" - description: "file names for STARsolo output:\n\nfile_name_prefix gene_names\ - \ barcode_sequences cell_feature_count_matrix" - info: - step: "star" - orig_arg: "--soloOutFileNames" - example: - - "Solo.out/" - - "features.tsv" - - "barcodes.tsv" - - "matrix.mtx" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloCellFilter" - description: "cell filtering type and parameters\n\n- None ... do\ - \ not output filtered cells\n- TopCells ... only report top cells by\ - \ UMI count, followed by the exact number of cells\n- CellRanger2.2 ...\ - \ simple filtering of CellRanger 2.2.\nCan be followed by numbers: number\ - \ of expected cells, robust maximum percentile for UMI count, maximum to minimum\ - \ ratio for UMI count\nThe harcoded values are from CellRanger: nExpectedCells=3000;\ - \ maxPercentile=0.99; maxMinRatio=10\n- EmptyDrops_CR ... EmptyDrops filtering\ - \ in CellRanger flavor. Please cite the original EmptyDrops paper: A.T.L Lun\ - \ et al, Genome Biology, 20, 63 (2019): https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1662-y\n\ - Can be followed by 10 numeric parameters: nExpectedCells maxPercentile\ - \ maxMinRatio indMin indMax umiMin umiMinFracMedian candMaxN \ - \ FDR simN\nThe harcoded values are from CellRanger: 3000 \ - \ 0.99 10 45000 90000 500 0.01\ - \ 20000 0.01 10000" - info: - step: "star" - orig_arg: "--soloCellFilter" - example: - - "CellRanger2.2" - - "3000" - - "0.99" - - "10" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloOutFormatFeaturesGeneField3" - description: "field 3 in the Gene features.tsv file. If \"-\", then no 3rd field\ - \ is output." - info: - step: "star" - orig_arg: "--soloOutFormatFeaturesGeneField3" - example: - - "Gene Expression" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloCellReadStats" - description: "Output reads statistics for each CB\n\n- Standard ... standard\ - \ output" - info: - step: "star" - orig_arg: "--soloCellReadStats" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "HTSeq arguments" - arguments: - - type: "string" - name: "--stranded" - alternatives: - - "-s" - description: "Whether the data is from a strand-specific assay. 'reverse' means\ - \ 'yes' with reversed strand interpretation." - info: - step: "htseq" - orig_arg: "--stranded" - default: - - "yes" - required: false - choices: - - "yes" - - "no" - - "reverse" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--minimum_alignment_quality" - alternatives: - - "-a" - - "--minaqual" - description: "Skip all reads with MAPQ alignment quality lower than the given\ - \ minimum value. \nMAPQ is the 5th column of a SAM/BAM file and its usage\ - \ depends on the software \nused to map the reads.\n" - info: - step: "htseq" - orig_arg: "--minaqual" - default: - - 10 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--type" - alternatives: - - "-t" - description: "Feature type (3rd column in GTF file) to be used, all features\ - \ of other type are ignored (default, suitable for Ensembl GTF files: exon)" - info: - step: "htseq" - orig_arg: "--type" - example: - - "exon" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--id_attribute" - alternatives: - - "-i" - description: "GTF attribute to be used as feature ID (default, suitable for\ - \ Ensembl GTF files: gene_id).\nAll feature of the right type (see -t option)\ - \ within the same GTF attribute will be added\ntogether. The typical way of\ - \ using this option is to count all exonic reads from each gene\nand add the\ - \ exons but other uses are possible as well. You can call this option multiple\n\ - times: in that case, the combination of all attributes separated by colons\ - \ (:) will be used\nas a unique identifier, e.g. for exons you might use -i\ - \ gene_id -i exon_number.\n" - info: - step: "htseq" - orig_arg: "--idattr" - example: - - "gene_id" - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--additional_attributes" - description: "Additional feature attributes (suitable for Ensembl GTF files:\ - \ gene_name). Use multiple times\nfor more than one additional attribute.\ - \ These attributes are only used as annotations in the\noutput, while the\ - \ determination of how the counts are added together is done based on option\ - \ -i.\n" - info: - step: "htseq" - orig_arg: "--additional-attr" - example: - - "gene_name" - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--add_chromosome_info" - description: "Store information about the chromosome of each feature as an additional\ - \ attribute\n(e.g. colunm in the TSV output file).\n" - info: - step: "htseq" - orig_arg: "--add-chromosome-info" - direction: "input" - dest: "par" - - type: "string" - name: "--mode" - alternatives: - - "-m" - description: "Mode to handle reads overlapping more than one feature." - info: - step: "htseq" - orig_arg: "--mode" - default: - - "union" - required: false - choices: - - "union" - - "intersection-strict" - - "intersection-nonempty" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--non_unique" - description: "Whether and how to score reads that are not uniquely aligned or\ - \ ambiguously assigned to features." - info: - step: "htseq" - orig_arg: "--nonunique" - default: - - "none" - required: false - choices: - - "none" - - "all" - - "fraction" - - "random" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--secondary_alignments" - description: "Whether to score secondary alignments (0x100 flag)." - info: - step: "htseq" - orig_arg: "--secondary-alignments" - required: false - choices: - - "score" - - "ignore" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--supplementary_alignments" - description: "Whether to score supplementary alignments (0x800 flag)." - info: - step: "htseq" - orig_arg: "--supplementary-alignments" - required: false - choices: - - "score" - - "ignore" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--counts_output_sparse" - description: "Store the counts as a sparse matrix (mtx, h5ad, loom)." - info: - step: "htseq" - orig_arg: "--counts-output-sparse" - direction: "input" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - description: "Align fastq files using STAR." - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/cellranger_tiny_fastq" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "docker" - env: - - "STAR_VERSION 2.7.10b" - - "PACKAGES gcc g++ make wget zlib1g-dev unzip" - - type: "docker" - run: - - "apt-get update && \\\n apt-get install -y --no-install-recommends ${PACKAGES}\ - \ && \\\n cd /tmp && \\\n wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip\ - \ && \\\n unzip ${STAR_VERSION}.zip && \\\n cd STAR-${STAR_VERSION}/source\ - \ && \\\n make STARstatic CXXFLAGS_SIMD=-std=c++11 && \\\n cp STAR /usr/local/bin\ - \ && \\\n cd / && \\\n rm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip\ - \ && \\\n apt-get --purge autoremove -y ${PACKAGES} && \\\n apt-get clean\n" - - type: "apt" - packages: - - "samtools" - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "pyyaml" - - "HTSeq" - - "multiprocess" - - "gtfparse<2.0" - - "pandas" - - "multiqc~=1.15.0" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "pytest" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highmem" - - "highcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/mapping/multi_star/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/mapping/multi_star" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/mapping/multi_star/multi_star" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/mapping/multi_star/multi_star b/target/docker/mapping/multi_star/multi_star deleted file mode 100755 index 236aecae1eb..00000000000 --- a/target/docker/mapping/multi_star/multi_star +++ /dev/null @@ -1,6362 +0,0 @@ -#!/usr/bin/env bash - -# multi_star 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Angela Oliveira Pisco (author) -# * Robrecht Cannoodt (author, maintainer) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="multi_star" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "multi_star 0.12.3" - echo "" - echo "Align fastq files using STAR." - echo "" - echo "Input/Output:" - echo " --input_id" - echo " type: string, required parameter, multiple values allowed" - echo " example: mysample;mysample" - echo " The ID of the sample being processed. This vector should have the same" - echo " length as the \`--input_r1\` argument." - echo "" - echo " --input_r1" - echo " type: file, required parameter, multiple values allowed, file must exist" - echo " example:" - echo "mysample_S1_L001_R1_001.fastq.gz;mysample_S1_L002_R1_001.fastq.gz" - echo " Paths to the sequences to be mapped. If using Illumina paired-end reads," - echo " only the R1 files should be passed." - echo "" - echo " --input_r2" - echo " type: file, multiple values allowed, file must exist" - echo " example:" - echo "mysample_S1_L001_R2_001.fastq.gz;mysample_S1_L002_R2_001.fastq.gz" - echo " Paths to the sequences to be mapped. If using Illumina paired-end reads," - echo " only the R2 files should be passed." - echo "" - echo " --genomeDir, --reference_index" - echo " type: file, required parameter, file must exist" - echo " example: /path/to/reference" - echo " Path to the reference built by star_build_reference. Corresponds to the" - echo " --genomeDir argument in the STAR command." - echo "" - echo " --reference_gtf" - echo " type: file, required parameter, file must exist" - echo " example: genes.gtf" - echo " Path to the gtf reference file." - echo "" - echo " --outFileNamePrefix, --output" - echo " type: file, required parameter, output, file must exist" - echo " example: /path/to/foo" - echo " Path to output directory. Corresponds to the --outFileNamePrefix" - echo " argument in the STAR command." - echo "" - echo "Processing arguments:" - echo " --run_htseq_count" - echo " type: boolean" - echo " default: true" - echo " Whether or not to also run htseq-count after STAR." - echo "" - echo " --run_multiqc" - echo " type: boolean" - echo " default: true" - echo " Whether or not to also run MultiQC at the end." - echo "" - echo " --min_success_rate" - echo " type: double" - echo " default: 0.5" - echo " Fail when the success rate is below this threshold." - echo "" - echo "Run Parameters:" - echo " --runRNGseed" - echo " type: integer" - echo " example: 777" - echo " random number generator seed." - echo "" - echo "Genome Parameters:" - echo " --genomeFastaFiles" - echo " type: file, multiple values allowed, file must exist" - echo " path(s) to the fasta files with the genome sequences, separated by" - echo " spaces. These files should be plain text FASTA files, they *cannot* be" - echo " zipped." - echo " Required for the genome generation (--runMode genomeGenerate). Can also" - echo " be used in the mapping (--runMode alignReads) to add extra (new)" - echo " sequences to the genome (e.g. spike-ins)." - echo "" - echo "Splice Junctions Database:" - echo " --sjdbFileChrStartEnd" - echo " type: string, multiple values allowed" - echo " path to the files with genomic coordinates (chr start end" - echo " strand) for the splice junction introns. Multiple files can be" - echo " supplied and will be concatenated." - echo "" - echo " --sjdbGTFfile" - echo " type: file, file must exist" - echo " path to the GTF file with annotations" - echo "" - echo " --sjdbGTFchrPrefix" - echo " type: string" - echo " prefix for chromosome names in a GTF file (e.g. 'chr' for using ENSMEBL" - echo " annotations with UCSC genomes)" - echo "" - echo " --sjdbGTFfeatureExon" - echo " type: string" - echo " example: exon" - echo " feature type in GTF file to be used as exons for building transcripts" - echo "" - echo " --sjdbGTFtagExonParentTranscript" - echo " type: string" - echo " example: transcript_id" - echo " GTF attribute name for parent transcript ID (default \"transcript_id\"" - echo " works for GTF files)" - echo "" - echo " --sjdbGTFtagExonParentGene" - echo " type: string" - echo " example: gene_id" - echo " GTF attribute name for parent gene ID (default \"gene_id\" works for GTF" - echo " files)" - echo "" - echo " --sjdbGTFtagExonParentGeneName" - echo " type: string, multiple values allowed" - echo " example: gene_name" - echo " GTF attribute name for parent gene name" - echo "" - echo " --sjdbGTFtagExonParentGeneType" - echo " type: string, multiple values allowed" - echo " example: gene_type;gene_biotype" - echo " GTF attribute name for parent gene type" - echo "" - echo " --sjdbOverhang" - echo " type: integer" - echo " example: 100" - echo " length of the donor/acceptor sequence on each side of the junctions," - echo " ideally = (mate_length - 1)" - echo "" - echo " --sjdbScore" - echo " type: integer" - echo " example: 2" - echo " extra alignment score for alignments that cross database junctions" - echo "" - echo " --sjdbInsertSave" - echo " type: string" - echo " example: Basic" - echo " which files to save when sjdb junctions are inserted on the fly at the" - echo " mapping step" - echo " - Basic ... only small junction / transcript files" - echo " - All ... all files including big Genome, SA and SAindex - this will" - echo " create a complete genome directory" - echo "" - echo "Variation parameters:" - echo " --varVCFfile" - echo " type: string" - echo " path to the VCF file that contains variation data. The 10th column" - echo " should contain the genotype information, e.g. 0/1" - echo "" - echo "Read Parameters:" - echo " --readFilesType" - echo " type: string" - echo " example: Fastx" - echo " format of input read files" - echo " - Fastx ... FASTA or FASTQ" - echo " - SAM SE ... SAM or BAM single-end reads; for BAM use" - echo " --readFilesCommand samtools view" - echo " - SAM PE ... SAM or BAM paired-end reads; for BAM use" - echo " --readFilesCommand samtools view" - echo "" - echo " --readFilesSAMattrKeep" - echo " type: string, multiple values allowed" - echo " example: All" - echo " for --readFilesType SAM SE/PE, which SAM tags to keep in the output BAM," - echo " e.g.: --readFilesSAMtagsKeep RG PL" - echo " - All ... keep all tags" - echo " - None ... do not keep any tags" - echo "" - echo " --readFilesManifest" - echo " type: file, file must exist" - echo " path to the \"manifest\" file with the names of read files. The manifest" - echo " file should contain 3 tab-separated columns:" - echo " paired-end reads: read1_file_name \$tab\$ read2_file_name \$tab\$" - echo " read_group_line." - echo " single-end reads: read1_file_name \$tab\$ - \$tab\$" - echo " read_group_line." - echo " Spaces, but not tabs are allowed in file names." - echo " If read_group_line does not start with ID:, it can only contain one ID" - echo " field, and ID: will be added to it." - echo " If read_group_line starts with ID:, it can contain several fields" - echo " separated by \$tab\$, and all fields will be be copied verbatim into SAM" - echo " @RG header line." - echo "" - echo " --readFilesPrefix" - echo " type: string" - echo " prefix for the read files names, i.e. it will be added in front of the" - echo " strings in --readFilesIn" - echo "" - echo " --readFilesCommand" - echo " type: string, multiple values allowed" - echo " command line to execute for each of the input file. This command should" - echo " generate FASTA or FASTQ text and send it to stdout" - echo " For example: zcat - to uncompress .gz files, bzcat - to uncompress .bz2" - echo " files, etc." - echo "" - echo " --readMapNumber" - echo " type: integer" - echo " example: -1" - echo " number of reads to map from the beginning of the file" - echo " -1: map all reads" - echo "" - echo " --readMatesLengthsIn" - echo " type: string" - echo " example: NotEqual" - echo " Equal/NotEqual - lengths of names,sequences,qualities for both mates are" - echo " the same / not the same. NotEqual is safe in all situations." - echo "" - echo " --readNameSeparator" - echo " type: string, multiple values allowed" - echo " example: /" - echo " character(s) separating the part of the read names that will be trimmed" - echo " in output (read name after space is always trimmed)" - echo "" - echo " --readQualityScoreBase" - echo " type: integer" - echo " example: 33" - echo " number to be subtracted from the ASCII code to get Phred quality score" - echo "" - echo "Read Clipping:" - echo " --clipAdapterType" - echo " type: string" - echo " example: Hamming" - echo " adapter clipping type" - echo " - Hamming ... adapter clipping based on Hamming distance, with the" - echo " number of mismatches controlled by --clip5pAdapterMMp" - echo " - CellRanger4 ... 5p and 3p adapter clipping similar to CellRanger4." - echo " Utilizes Opal package by Martin Sosic: https://github.com/Martinsos/opal" - echo " - None ... no adapter clipping, all other clip* parameters are" - echo " disregarded" - echo "" - echo " --clip3pNbases" - echo " type: integer, multiple values allowed" - echo " example: 0" - echo " number(s) of bases to clip from 3p of each mate. If one value is given," - echo " it will be assumed the same for both mates." - echo "" - echo " --clip3pAdapterSeq" - echo " type: string, multiple values allowed" - echo " adapter sequences to clip from 3p of each mate. If one value is given," - echo " it will be assumed the same for both mates." - echo " - polyA ... polyA sequence with the length equal to read length" - echo "" - echo " --clip3pAdapterMMp" - echo " type: double, multiple values allowed" - echo " example: 0.1" - echo " max proportion of mismatches for 3p adapter clipping for each mate. If" - echo " one value is given, it will be assumed the same for both mates." - echo "" - echo " --clip3pAfterAdapterNbases" - echo " type: integer, multiple values allowed" - echo " example: 0" - echo " number of bases to clip from 3p of each mate after the adapter clipping." - echo " If one value is given, it will be assumed the same for both mates." - echo "" - echo " --clip5pNbases" - echo " type: integer, multiple values allowed" - echo " example: 0" - echo " number(s) of bases to clip from 5p of each mate. If one value is given," - echo " it will be assumed the same for both mates." - echo "" - echo "Limits:" - echo " --limitGenomeGenerateRAM" - echo " type: long" - echo " example: 31000000000" - echo " maximum available RAM (bytes) for genome generation" - echo "" - echo " --limitIObufferSize" - echo " type: long, multiple values allowed" - echo " example: 30000000;50000000" - echo " max available buffers size (bytes) for input/output, per thread" - echo "" - echo " --limitOutSAMoneReadBytes" - echo " type: long" - echo " example: 100000" - echo " max size of the SAM record (bytes) for one read. Recommended value:" - echo " >(2*(LengthMate1+LengthMate2+100)*outFilterMultimapNmax" - echo "" - echo " --limitOutSJoneRead" - echo " type: integer" - echo " example: 1000" - echo " max number of junctions for one read (including all multi-mappers)" - echo "" - echo " --limitOutSJcollapsed" - echo " type: integer" - echo " example: 1000000" - echo " max number of collapsed junctions" - echo "" - echo " --limitBAMsortRAM" - echo " type: long" - echo " example: 0" - echo " maximum available RAM (bytes) for sorting BAM. If =0, it will be set to" - echo " the genome index size. 0 value can only be used with --genomeLoad" - echo " NoSharedMemory option." - echo "" - echo " --limitSjdbInsertNsj" - echo " type: integer" - echo " example: 1000000" - echo " maximum number of junctions to be inserted to the genome on the fly at" - echo " the mapping stage, including those from annotations and those detected" - echo " in the 1st step of the 2-pass run" - echo "" - echo " --limitNreadsSoft" - echo " type: integer" - echo " example: -1" - echo " soft limit on the number of reads" - echo "" - echo "Output: general:" - echo " --outTmpKeep" - echo " type: string" - echo " whether to keep the temporary files after STAR runs is finished" - echo " - None ... remove all temporary files" - echo " - All ... keep all files" - echo "" - echo " --outStd" - echo " type: string" - echo " example: Log" - echo " which output will be directed to stdout (standard out)" - echo " - Log ... log messages" - echo " - SAM ... alignments in SAM format (which normally" - echo " are output to Aligned.out.sam file), normal standard output will go into" - echo " Log.std.out" - echo " - BAM_Unsorted ... alignments in BAM format, unsorted." - echo " Requires --outSAMtype BAM Unsorted" - echo " - BAM_SortedByCoordinate ... alignments in BAM format, sorted by" - echo " coordinate. Requires --outSAMtype BAM SortedByCoordinate" - echo " - BAM_Quant ... alignments to transcriptome in BAM format," - echo " unsorted. Requires --quantMode TranscriptomeSAM" - echo "" - echo " --outReadsUnmapped" - echo " type: string" - echo " output of unmapped and partially mapped (i.e. mapped only one mate of a" - echo " paired end read) reads in separate file(s)." - echo " - None ... no output" - echo " - Fastx ... output in separate fasta/fastq files, Unmapped.out.mate1/2" - echo "" - echo " --outQSconversionAdd" - echo " type: integer" - echo " example: 0" - echo " add this number to the quality score (e.g. to convert from Illumina to" - echo " Sanger, use -31)" - echo "" - echo " --outMultimapperOrder" - echo " type: string" - echo " example: Old_2.4" - echo " order of multimapping alignments in the output files" - echo " - Old_2.4 ... quasi-random order used before 2.5.0" - echo " - Random ... random order of alignments for each" - echo " multi-mapper. Read mates (pairs) are always adjacent, all alignment for" - echo " each read stay together. This option will become default in the future" - echo " releases." - echo "" - echo "Output: SAM and BAM:" - echo " --outSAMmode" - echo " type: string" - echo " example: Full" - echo " mode of SAM output" - echo " - None ... no SAM output" - echo " - Full ... full SAM output" - echo " - NoQS ... full SAM but without quality scores" - echo "" - echo " --outSAMstrandField" - echo " type: string" - echo " Cufflinks-like strand field flag" - echo " - None ... not used" - echo " - intronMotif ... strand derived from the intron motif. This option" - echo " changes the output alignments: reads with inconsistent and/or" - echo " non-canonical introns are filtered out." - echo "" - echo " --outSAMattributes" - echo " type: string, multiple values allowed" - echo " example: Standard" - echo " a string of desired SAM attributes, in the order desired for the output" - echo " SAM. Tags can be listed in any combination/order." - echo " ***Presets:" - echo " - None ... no attributes" - echo " - Standard ... NH HI AS nM" - echo " - All ... NH HI AS nM NM MD jM jI MC ch" - echo " ***Alignment:" - echo " - NH ... number of loci the reads maps to: =1 for unique" - echo " mappers, >1 for multimappers. Standard SAM tag." - echo " - HI ... multiple alignment index, starts with" - echo " --outSAMattrIHstart (=1 by default). Standard SAM tag." - echo " - AS ... local alignment score, +1/-1 for matches/mismateches," - echo " score* penalties for indels and gaps. For PE reads, total score for two" - echo " mates. Stadnard SAM tag." - echo " - nM ... number of mismatches. For PE reads, sum over two" - echo " mates." - echo " - NM ... edit distance to the reference (number of mismatched +" - echo " inserted + deleted bases) for each mate. Standard SAM tag." - echo " - MD ... string encoding mismatched and deleted reference bases" - echo " (see standard SAM specifications). Standard SAM tag." - echo " - jM ... intron motifs for all junctions (i.e. N in CIGAR): 0:" - echo " non-canonical; 1: GT/AG, 2: CT/AC, 3: GC/AG, 4: CT/GC, 5: AT/AC, 6:" - echo " GT/AT. If splice junctions database is used, and a junction is" - echo " annotated, 20 is added to its motif value." - echo " - jI ... start and end of introns for all junctions (1-based)." - echo " - XS ... alignment strand according to --outSAMstrandField." - echo " - MC ... mate's CIGAR string. Standard SAM tag." - echo " - ch ... marks all segment of all chimeric alingments for" - echo " --chimOutType WithinBAM output." - echo " - cN ... number of bases clipped from the read ends: 5' and 3'" - echo " ***Variation:" - echo " - vA ... variant allele" - echo " - vG ... genomic coordinate of the variant overlapped by the" - echo " read." - echo " - vW ... 1 - alignment passes WASP filtering; 2,3,4,5,6,7 -" - echo " alignment does not pass WASP filtering. Requires --waspOutputMode" - echo " SAMtag." - echo " ***STARsolo:" - echo " - CR CY UR UY ... sequences and quality scores of cell barcodes and UMIs" - echo " for the solo* demultiplexing." - echo " - GX GN ... gene ID and gene name for unique-gene reads." - echo " - gx gn ... gene IDs and gene names for unique- and multi-gene" - echo " reads." - echo " - CB UB ... error-corrected cell barcodes and UMIs for solo*" - echo " demultiplexing. Requires --outSAMtype BAM SortedByCoordinate." - echo " - sM ... assessment of CB and UMI." - echo " - sS ... sequence of the entire barcode (CB,UMI,adapter)." - echo " - sQ ... quality of the entire barcode." - echo " ***Unsupported/undocumented:" - echo " - ha ... haplotype (1/2) when mapping to the diploid genome." - echo " Requires genome generated with --genomeTransformType Diploid ." - echo " - rB ... alignment block read/genomic coordinates." - echo " - vR ... read coordinate of the variant." - echo "" - echo " --outSAMattrIHstart" - echo " type: integer" - echo " example: 1" - echo " start value for the IH attribute. 0 may be required by some downstream" - echo " software, such as Cufflinks or StringTie." - echo "" - echo " --outSAMunmapped" - echo " type: string, multiple values allowed" - echo " output of unmapped reads in the SAM format" - echo " 1st word:" - echo " - None ... no output" - echo " - Within ... output unmapped reads within the main SAM file (i.e." - echo " Aligned.out.sam)" - echo " 2nd word:" - echo " - KeepPairs ... record unmapped mate for each alignment, and, in case of" - echo " unsorted output, keep it adjacent to its mapped mate. Only affects" - echo " multi-mapping reads." - echo "" - echo " --outSAMorder" - echo " type: string" - echo " example: Paired" - echo " type of sorting for the SAM output" - echo " Paired: one mate after the other for all paired alignments" - echo " PairedKeepInputOrder: one mate after the other for all paired" - echo " alignments, the order is kept the same as in the input FASTQ files" - echo "" - echo " --outSAMprimaryFlag" - echo " type: string" - echo " example: OneBestScore" - echo " which alignments are considered primary - all others will be marked with" - echo " 0x100 bit in the FLAG" - echo " - OneBestScore ... only one alignment with the best score is primary" - echo " - AllBestScore ... all alignments with the best score are primary" - echo "" - echo " --outSAMreadID" - echo " type: string" - echo " example: Standard" - echo " read ID record type" - echo " - Standard ... first word (until space) from the FASTx read ID line," - echo " removing /1,/2 from the end" - echo " - Number ... read number (index) in the FASTx file" - echo "" - echo " --outSAMmapqUnique" - echo " type: integer" - echo " example: 255" - echo " 0 to 255: the MAPQ value for unique mappers" - echo "" - echo " --outSAMflagOR" - echo " type: integer" - echo " example: 0" - echo " 0 to 65535: sam FLAG will be bitwise OR'd with this value, i.e." - echo " FLAG=FLAG | outSAMflagOR. This is applied after all flags have been set" - echo " by STAR, and after outSAMflagAND. Can be used to set specific bits that" - echo " are not set otherwise." - echo "" - echo " --outSAMflagAND" - echo " type: integer" - echo " example: 65535" - echo " 0 to 65535: sam FLAG will be bitwise AND'd with this value, i.e." - echo " FLAG=FLAG & outSAMflagOR. This is applied after all flags have been set" - echo " by STAR, but before outSAMflagOR. Can be used to unset specific bits" - echo " that are not set otherwise." - echo "" - echo " --outSAMattrRGline" - echo " type: string, multiple values allowed" - echo " SAM/BAM read group line. The first word contains the read group" - echo " identifier and must start with \"ID:\", e.g. --outSAMattrRGline ID:xxx" - echo " CN:yy \"DS:z z z\"." - echo " xxx will be added as RG tag to each output alignment. Any spaces in the" - echo " tag values have to be double quoted." - echo " Comma separated RG lines correspons to different (comma separated) input" - echo " files in --readFilesIn. Commas have to be surrounded by spaces, e.g." - echo " --outSAMattrRGline ID:xxx , ID:zzz \"DS:z z\" , ID:yyy DS:yyyy" - echo "" - echo " --outSAMheaderHD" - echo " type: string, multiple values allowed" - echo " @HD (header) line of the SAM header" - echo "" - echo " --outSAMheaderPG" - echo " type: string, multiple values allowed" - echo " extra @PG (software) line of the SAM header (in addition to STAR)" - echo "" - echo " --outSAMheaderCommentFile" - echo " type: string" - echo " path to the file with @CO (comment) lines of the SAM header" - echo "" - echo " --outSAMfilter" - echo " type: string, multiple values allowed" - echo " filter the output into main SAM/BAM files" - echo " - KeepOnlyAddedReferences ... only keep the reads for which all" - echo " alignments are to the extra reference sequences added with" - echo " --genomeFastaFiles at the mapping stage." - echo " - KeepAllAddedReferences ... keep all alignments to the extra reference" - echo " sequences added with --genomeFastaFiles at the mapping stage." - echo "" - echo " --outSAMmultNmax" - echo " type: integer" - echo " example: -1" - echo " max number of multiple alignments for a read that will be output to the" - echo " SAM/BAM files. Note that if this value is not equal to -1, the top" - echo " scoring alignment will be output first" - echo " - -1 ... all alignments (up to --outFilterMultimapNmax) will be output" - echo "" - echo " --outSAMtlen" - echo " type: integer" - echo " example: 1" - echo " calculation method for the TLEN field in the SAM/BAM files" - echo " - 1 ... leftmost base of the (+)strand mate to rightmost base of the" - echo " (-)mate. (+)sign for the (+)strand mate" - echo " - 2 ... leftmost base of any mate to rightmost base of any mate. (+)sign" - echo " for the mate with the leftmost base. This is different from 1 for" - echo " overlapping mates with protruding ends" - echo "" - echo " --outBAMcompression" - echo " type: integer" - echo " example: 1" - echo " -1 to 10 BAM compression level, -1=default compression (6?), 0=no" - echo " compression, 10=maximum compression" - echo "" - echo " --outBAMsortingThreadN" - echo " type: integer" - echo " example: 0" - echo " >=0: number of threads for BAM sorting. 0 will default to" - echo " min(6,--runThreadN)." - echo "" - echo " --outBAMsortingBinsN" - echo " type: integer" - echo " example: 50" - echo " >0: number of genome bins for coordinate-sorting" - echo "" - echo "BAM processing:" - echo " --bamRemoveDuplicatesType" - echo " type: string" - echo " mark duplicates in the BAM file, for now only works with (i) sorted BAM" - echo " fed with inputBAMfile, and (ii) for paired-end alignments only" - echo " - - ... no duplicate removal/marking" - echo " - UniqueIdentical ... mark all multimappers, and duplicate" - echo " unique mappers. The coordinates, FLAG, CIGAR must be identical" - echo " - UniqueIdenticalNotMulti ... mark duplicate unique mappers but not" - echo " multimappers." - echo "" - echo " --bamRemoveDuplicatesMate2basesN" - echo " type: integer" - echo " example: 0" - echo " number of bases from the 5' of mate 2 to use in collapsing (e.g. for" - echo " RAMPAGE)" - echo "" - echo "Output Wiggle:" - echo " --outWigType" - echo " type: string, multiple values allowed" - echo " type of signal output, e.g. \"bedGraph\" OR \"bedGraph read1_5p\". Requires" - echo " sorted BAM: --outSAMtype BAM SortedByCoordinate ." - echo " 1st word:" - echo " - None ... no signal output" - echo " - bedGraph ... bedGraph format" - echo " - wiggle ... wiggle format" - echo " 2nd word:" - echo " - read1_5p ... signal from only 5' of the 1st read, useful for" - echo " CAGE/RAMPAGE etc" - echo " - read2 ... signal from only 2nd read" - echo "" - echo " --outWigStrand" - echo " type: string" - echo " example: Stranded" - echo " strandedness of wiggle/bedGraph output" - echo " - Stranded ... separate strands, str1 and str2" - echo " - Unstranded ... collapsed strands" - echo "" - echo " --outWigReferencesPrefix" - echo " type: string" - echo " prefix matching reference names to include in the output wiggle file," - echo " e.g. \"chr\", default \"-\" - include all references" - echo "" - echo " --outWigNorm" - echo " type: string" - echo " example: RPM" - echo " type of normalization for the signal" - echo " - RPM ... reads per million of mapped reads" - echo " - None ... no normalization, \"raw\" counts" - echo "" - echo "Output Filtering:" - echo " --outFilterType" - echo " type: string" - echo " example: Normal" - echo " type of filtering" - echo " - Normal ... standard filtering using only current alignment" - echo " - BySJout ... keep only those reads that contain junctions that passed" - echo " filtering into SJ.out.tab" - echo "" - echo " --outFilterMultimapScoreRange" - echo " type: integer" - echo " example: 1" - echo " the score range below the maximum score for multimapping alignments" - echo "" - echo " --outFilterMultimapNmax" - echo " type: integer" - echo " example: 10" - echo " maximum number of loci the read is allowed to map to. Alignments (all of" - echo " them) will be output only if the read maps to no more loci than this" - echo " value." - echo " Otherwise no alignments will be output, and the read will be counted as" - echo " \"mapped to too many loci\" in the Log.final.out ." - echo "" - echo " --outFilterMismatchNmax" - echo " type: integer" - echo " example: 10" - echo " alignment will be output only if it has no more mismatches than this" - echo " value." - echo "" - echo " --outFilterMismatchNoverLmax" - echo " type: double" - echo " example: 0.3" - echo " alignment will be output only if its ratio of mismatches to *mapped*" - echo " length is less than or equal to this value." - echo "" - echo " --outFilterMismatchNoverReadLmax" - echo " type: double" - echo " example: 1.0" - echo " alignment will be output only if its ratio of mismatches to *read*" - echo " length is less than or equal to this value." - echo "" - echo " --outFilterScoreMin" - echo " type: integer" - echo " example: 0" - echo " alignment will be output only if its score is higher than or equal to" - echo " this value." - echo "" - echo " --outFilterScoreMinOverLread" - echo " type: double" - echo " example: 0.66" - echo " same as outFilterScoreMin, but normalized to read length (sum of mates'" - echo " lengths for paired-end reads)" - echo "" - echo " --outFilterMatchNmin" - echo " type: integer" - echo " example: 0" - echo " alignment will be output only if the number of matched bases is higher" - echo " than or equal to this value." - echo "" - echo " --outFilterMatchNminOverLread" - echo " type: double" - echo " example: 0.66" - echo " sam as outFilterMatchNmin, but normalized to the read length (sum of" - echo " mates' lengths for paired-end reads)." - echo "" - echo " --outFilterIntronMotifs" - echo " type: string" - echo " filter alignment using their motifs" - echo " - None ... no filtering" - echo " - RemoveNoncanonical ... filter out alignments that contain" - echo " non-canonical junctions" - echo " - RemoveNoncanonicalUnannotated ... filter out alignments that contain" - echo " non-canonical unannotated junctions when using annotated splice" - echo " junctions database. The annotated non-canonical junctions will be kept." - echo "" - echo " --outFilterIntronStrands" - echo " type: string" - echo " example: RemoveInconsistentStrands" - echo " filter alignments" - echo " - RemoveInconsistentStrands ... remove alignments that have" - echo " junctions with inconsistent strands" - echo " - None ... no filtering" - echo "" - echo "Output splice junctions (SJ.out.tab):" - echo " --outSJtype" - echo " type: string" - echo " example: Standard" - echo " type of splice junction output" - echo " - Standard ... standard SJ.out.tab output" - echo " - None ... no splice junction output" - echo "" - echo "Output Filtering: Splice Junctions:" - echo " --outSJfilterReads" - echo " type: string" - echo " example: All" - echo " which reads to consider for collapsed splice junctions output" - echo " - All ... all reads, unique- and multi-mappers" - echo " - Unique ... uniquely mapping reads only" - echo "" - echo " --outSJfilterOverhangMin" - echo " type: integer, multiple values allowed" - echo " example: 30;12;12;12" - echo " minimum overhang length for splice junctions on both sides for: (1)" - echo " non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC" - echo " motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif" - echo " does not apply to annotated junctions" - echo "" - echo " --outSJfilterCountUniqueMin" - echo " type: integer, multiple values allowed" - echo " example: 3;1;1;1" - echo " minimum uniquely mapping read count per junction for: (1) non-canonical" - echo " motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC" - echo " and GT/AT motif. -1 means no output for that motif" - echo " Junctions are output if one of outSJfilterCountUniqueMin OR" - echo " outSJfilterCountTotalMin conditions are satisfied" - echo " does not apply to annotated junctions" - echo "" - echo " --outSJfilterCountTotalMin" - echo " type: integer, multiple values allowed" - echo " example: 3;1;1;1" - echo " minimum total (multi-mapping+unique) read count per junction for: (1)" - echo " non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC" - echo " motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif" - echo " Junctions are output if one of outSJfilterCountUniqueMin OR" - echo " outSJfilterCountTotalMin conditions are satisfied" - echo " does not apply to annotated junctions" - echo "" - echo " --outSJfilterDistToOtherSJmin" - echo " type: integer, multiple values allowed" - echo " example: 10;0;5;10" - echo " minimum allowed distance to other junctions' donor/acceptor" - echo " does not apply to annotated junctions" - echo "" - echo " --outSJfilterIntronMaxVsReadN" - echo " type: integer, multiple values allowed" - echo " example: 50000;100000;200000" - echo " maximum gap allowed for junctions supported by 1,2,3,,,N reads" - echo " i.e. by default junctions supported by 1 read can have gaps <=50000b, by" - echo " 2 reads: <=100000b, by 3 reads: <=200000. by >=4 reads any gap" - echo " <=alignIntronMax" - echo " does not apply to annotated junctions" - echo "" - echo "Scoring:" - echo " --scoreGap" - echo " type: integer" - echo " example: 0" - echo " splice junction penalty (independent on intron motif)" - echo "" - echo " --scoreGapNoncan" - echo " type: integer" - echo " example: -8" - echo " non-canonical junction penalty (in addition to scoreGap)" - echo "" - echo " --scoreGapGCAG" - echo " type: integer" - echo " example: -4" - echo " GC/AG and CT/GC junction penalty (in addition to scoreGap)" - echo "" - echo " --scoreGapATAC" - echo " type: integer" - echo " example: -8" - echo " AT/AC and GT/AT junction penalty (in addition to scoreGap)" - echo "" - echo " --scoreGenomicLengthLog2scale" - echo " type: integer" - echo " example: 0" - echo " extra score logarithmically scaled with genomic length of the alignment:" - echo " scoreGenomicLengthLog2scale*log2(genomicLength)" - echo "" - echo " --scoreDelOpen" - echo " type: integer" - echo " example: -2" - echo " deletion open penalty" - echo "" - echo " --scoreDelBase" - echo " type: integer" - echo " example: -2" - echo " deletion extension penalty per base (in addition to scoreDelOpen)" - echo "" - echo " --scoreInsOpen" - echo " type: integer" - echo " example: -2" - echo " insertion open penalty" - echo "" - echo " --scoreInsBase" - echo " type: integer" - echo " example: -2" - echo " insertion extension penalty per base (in addition to scoreInsOpen)" - echo "" - echo " --scoreStitchSJshift" - echo " type: integer" - echo " example: 1" - echo " maximum score reduction while searching for SJ boundaries in the" - echo " stitching step" - echo "" - echo "Alignments and Seeding:" - echo " --seedSearchStartLmax" - echo " type: integer" - echo " example: 50" - echo " defines the search start point through the read - the read is split into" - echo " pieces no longer than this value" - echo "" - echo " --seedSearchStartLmaxOverLread" - echo " type: double" - echo " example: 1.0" - echo " seedSearchStartLmax normalized to read length (sum of mates' lengths for" - echo " paired-end reads)" - echo "" - echo " --seedSearchLmax" - echo " type: integer" - echo " example: 0" - echo " defines the maximum length of the seeds, if =0 seed length is not" - echo " limited" - echo "" - echo " --seedMultimapNmax" - echo " type: integer" - echo " example: 10000" - echo " only pieces that map fewer than this value are utilized in the stitching" - echo " procedure" - echo "" - echo " --seedPerReadNmax" - echo " type: integer" - echo " example: 1000" - echo " max number of seeds per read" - echo "" - echo " --seedPerWindowNmax" - echo " type: integer" - echo " example: 50" - echo " max number of seeds per window" - echo "" - echo " --seedNoneLociPerWindow" - echo " type: integer" - echo " example: 10" - echo " max number of one seed loci per window" - echo "" - echo " --seedSplitMin" - echo " type: integer" - echo " example: 12" - echo " min length of the seed sequences split by Ns or mate gap" - echo "" - echo " --seedMapMin" - echo " type: integer" - echo " example: 5" - echo " min length of seeds to be mapped" - echo "" - echo " --alignIntronMin" - echo " type: integer" - echo " example: 21" - echo " minimum intron size, genomic gap is considered intron if its" - echo " length>=alignIntronMin, otherwise it is considered Deletion" - echo "" - echo " --alignIntronMax" - echo " type: integer" - echo " example: 0" - echo " maximum intron size, if 0, max intron size will be determined by" - echo " (2^winBinNbits)*winAnchorDistNbins" - echo "" - echo " --alignMatesGapMax" - echo " type: integer" - echo " example: 0" - echo " maximum gap between two mates, if 0, max intron gap will be determined" - echo " by (2^winBinNbits)*winAnchorDistNbins" - echo "" - echo " --alignSJoverhangMin" - echo " type: integer" - echo " example: 5" - echo " minimum overhang (i.e. block size) for spliced alignments" - echo "" - echo " --alignSJstitchMismatchNmax" - echo " type: integer, multiple values allowed" - echo " example: 0;-1;0;0" - echo " maximum number of mismatches for stitching of the splice junctions (-1:" - echo " no limit)." - echo " (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC" - echo " motif, (4) AT/AC and GT/AT motif." - echo "" - echo " --alignSJDBoverhangMin" - echo " type: integer" - echo " example: 3" - echo " minimum overhang (i.e. block size) for annotated (sjdb) spliced" - echo " alignments" - echo "" - echo " --alignSplicedMateMapLmin" - echo " type: integer" - echo " example: 0" - echo " minimum mapped length for a read mate that is spliced" - echo "" - echo " --alignSplicedMateMapLminOverLmate" - echo " type: double" - echo " example: 0.66" - echo " alignSplicedMateMapLmin normalized to mate length" - echo "" - echo " --alignWindowsPerReadNmax" - echo " type: integer" - echo " example: 10000" - echo " max number of windows per read" - echo "" - echo " --alignTranscriptsPerWindowNmax" - echo " type: integer" - echo " example: 100" - echo " max number of transcripts per window" - echo "" - echo " --alignTranscriptsPerReadNmax" - echo " type: integer" - echo " example: 10000" - echo " max number of different alignments per read to consider" - echo "" - echo " --alignEndsType" - echo " type: string" - echo " example: Local" - echo " type of read ends alignment" - echo " - Local ... standard local alignment with soft-clipping" - echo " allowed" - echo " - EndToEnd ... force end-to-end read alignment, do not" - echo " soft-clip" - echo " - Extend5pOfRead1 ... fully extend only the 5p of the read1, all other" - echo " ends: local alignment" - echo " - Extend5pOfReads12 ... fully extend only the 5p of the both read1 and" - echo " read2, all other ends: local alignment" - echo "" - echo " --alignEndsProtrude" - echo " type: string" - echo " example: 0 ConcordantPair" - echo " allow protrusion of alignment ends, i.e. start (end) of the +strand mate" - echo " downstream of the start (end) of the -strand mate" - echo " 1st word: int: maximum number of protrusion bases allowed" - echo " 2nd word: string:" - echo " - ConcordantPair ... report alignments with non-zero" - echo " protrusion as concordant pairs" - echo " - DiscordantPair ... report alignments with non-zero" - echo " protrusion as discordant pairs" - echo "" - echo " --alignSoftClipAtReferenceEnds" - echo " type: string" - echo " example: Yes" - echo " allow the soft-clipping of the alignments past the end of the" - echo " chromosomes" - echo " - Yes ... allow" - echo " - No ... prohibit, useful for compatibility with Cufflinks" - echo "" - echo " --alignInsertionFlush" - echo " type: string" - echo " how to flush ambiguous insertion positions" - echo " - None ... insertions are not flushed" - echo " - Right ... insertions are flushed to the right" - echo "" - echo "Paired-End reads:" - echo " --peOverlapNbasesMin" - echo " type: integer" - echo " example: 0" - echo " minimum number of overlapping bases to trigger mates merging and" - echo " realignment. Specify >0 value to switch on the \"merginf of overlapping" - echo " mates\" algorithm." - echo "" - echo " --peOverlapMMp" - echo " type: double" - echo " example: 0.01" - echo " maximum proportion of mismatched bases in the overlap area" - echo "" - echo "Windows, Anchors, Binning:" - echo " --winAnchorMultimapNmax" - echo " type: integer" - echo " example: 50" - echo " max number of loci anchors are allowed to map to" - echo "" - echo " --winBinNbits" - echo " type: integer" - echo " example: 16" - echo " =log2(winBin), where winBin is the size of the bin for the" - echo " windows/clustering, each window will occupy an integer number of bins." - echo "" - echo " --winAnchorDistNbins" - echo " type: integer" - echo " example: 9" - echo " max number of bins between two anchors that allows aggregation of" - echo " anchors into one window" - echo "" - echo " --winFlankNbins" - echo " type: integer" - echo " example: 4" - echo " log2(winFlank), where win Flank is the size of the left and right" - echo " flanking regions for each window" - echo "" - echo " --winReadCoverageRelativeMin" - echo " type: double" - echo " example: 0.5" - echo " minimum relative coverage of the read sequence by the seeds in a window," - echo " for STARlong algorithm only." - echo "" - echo " --winReadCoverageBasesMin" - echo " type: integer" - echo " example: 0" - echo " minimum number of bases covered by the seeds in a window , for STARlong" - echo " algorithm only." - echo "" - echo "Chimeric Alignments:" - echo " --chimOutType" - echo " type: string, multiple values allowed" - echo " example: Junctions" - echo " type of chimeric output" - echo " - Junctions ... Chimeric.out.junction" - echo " - SeparateSAMold ... output old SAM into separate Chimeric.out.sam file" - echo " - WithinBAM ... output into main aligned BAM files (Aligned.*.bam)" - echo " - WithinBAM HardClip ... (default) hard-clipping in the CIGAR for" - echo " supplemental chimeric alignments (default if no 2nd word is present)" - echo " - WithinBAM SoftClip ... soft-clipping in the CIGAR for supplemental" - echo " chimeric alignments" - echo "" - echo " --chimSegmentMin" - echo " type: integer" - echo " example: 0" - echo " minimum length of chimeric segment length, if ==0, no chimeric output" - echo "" - echo " --chimScoreMin" - echo " type: integer" - echo " example: 0" - echo " minimum total (summed) score of the chimeric segments" - echo "" - echo " --chimScoreDropMax" - echo " type: integer" - echo " example: 20" - echo " max drop (difference) of chimeric score (the sum of scores of all" - echo " chimeric segments) from the read length" - echo "" - echo " --chimScoreSeparation" - echo " type: integer" - echo " example: 10" - echo " minimum difference (separation) between the best chimeric score and the" - echo " next one" - echo "" - echo " --chimScoreJunctionNonGTAG" - echo " type: integer" - echo " example: -1" - echo " penalty for a non-GT/AG chimeric junction" - echo "" - echo " --chimJunctionOverhangMin" - echo " type: integer" - echo " example: 20" - echo " minimum overhang for a chimeric junction" - echo "" - echo " --chimSegmentReadGapMax" - echo " type: integer" - echo " example: 0" - echo " maximum gap in the read sequence between chimeric segments" - echo "" - echo " --chimFilter" - echo " type: string, multiple values allowed" - echo " example: banGenomicN" - echo " different filters for chimeric alignments" - echo " - None ... no filtering" - echo " - banGenomicN ... Ns are not allowed in the genome sequence around the" - echo " chimeric junction" - echo "" - echo " --chimMainSegmentMultNmax" - echo " type: integer" - echo " example: 10" - echo " maximum number of multi-alignments for the main chimeric segment. =1" - echo " will prohibit multimapping main segments." - echo "" - echo " --chimMultimapNmax" - echo " type: integer" - echo " example: 0" - echo " maximum number of chimeric multi-alignments" - echo " - 0 ... use the old scheme for chimeric detection which only considered" - echo " unique alignments" - echo "" - echo " --chimMultimapScoreRange" - echo " type: integer" - echo " example: 1" - echo " the score range for multi-mapping chimeras below the best chimeric" - echo " score. Only works with --chimMultimapNmax > 1" - echo "" - echo " --chimNonchimScoreDropMin" - echo " type: integer" - echo " example: 20" - echo " to trigger chimeric detection, the drop in the best non-chimeric" - echo " alignment score with respect to the read length has to be greater than" - echo " this value" - echo "" - echo " --chimOutJunctionFormat" - echo " type: integer" - echo " example: 0" - echo " formatting type for the Chimeric.out.junction file" - echo " - 0 ... no comment lines/headers" - echo " - 1 ... comment lines at the end of the file: command line and Nreads:" - echo " total, unique/multi-mapping" - echo "" - echo "Quantification of Annotations:" - echo " --quantMode" - echo " type: string, multiple values allowed" - echo " types of quantification requested" - echo " - - ... none" - echo " - TranscriptomeSAM ... output SAM/BAM alignments to transcriptome into a" - echo " separate file" - echo " - GeneCounts ... count reads per gene" - echo "" - echo " --quantTranscriptomeBAMcompression" - echo " type: integer" - echo " example: 1" - echo " -2 to 10 transcriptome BAM compression level" - echo " - -2 ... no BAM output" - echo " - -1 ... default compression (6?)" - echo " - 0 ... no compression" - echo " - 10 ... maximum compression" - echo "" - echo " --quantTranscriptomeBan" - echo " type: string" - echo " example: IndelSoftclipSingleend" - echo " prohibit various alignment type" - echo " - IndelSoftclipSingleend ... prohibit indels, soft clipping and" - echo " single-end alignments - compatible with RSEM" - echo " - Singleend ... prohibit single-end alignments" - echo "" - echo "2-pass Mapping:" - echo " --twopassMode" - echo " type: string" - echo " 2-pass mapping mode." - echo " - None ... 1-pass mapping" - echo " - Basic ... basic 2-pass mapping, with all 1st pass junctions" - echo " inserted into the genome indices on the fly" - echo "" - echo " --twopass1readsN" - echo " type: integer" - echo " example: -1" - echo " number of reads to process for the 1st step. Use very large number (or" - echo " default -1) to map all reads in the first step." - echo "" - echo "WASP parameters:" - echo " --waspOutputMode" - echo " type: string" - echo " WASP allele-specific output type. This is re-implementation of the" - echo " original WASP mappability filtering by Bryce van de Geijn, Graham" - echo " McVicker, Yoav Gilad & Jonathan K Pritchard. Please cite the original" - echo " WASP paper: Nature Methods 12, 1061-1063 (2015)," - echo " https://www.nature.com/articles/nmeth.3582 ." - echo " - SAMtag ... add WASP tags to the alignments that pass WASP" - echo " filtering" - echo "" - echo "STARsolo (single cell RNA-seq) parameters:" - echo " --soloType" - echo " type: string, multiple values allowed" - echo " type of single-cell RNA-seq" - echo " - CB_UMI_Simple ... (a.k.a. Droplet) one UMI and one Cell Barcode of" - echo " fixed length in read2, e.g. Drop-seq and 10X Chromium." - echo " - CB_UMI_Complex ... multiple Cell Barcodes of varying length, one UMI" - echo " of fixed length and one adapter sequence of fixed length are allowed in" - echo " read2 only (e.g. inDrop, ddSeq)." - echo " - CB_samTagOut ... output Cell Barcode as CR and/or CB SAm tag. No" - echo " UMI counting. --readFilesIn cDNA_read1 [cDNA_read2 if paired-end]" - echo " CellBarcode_read . Requires --outSAMtype BAM Unsorted [and/or" - echo " SortedByCoordinate]" - echo " - SmartSeq ... Smart-seq: each cell in a separate FASTQ (paired-" - echo " or single-end), barcodes are corresponding read-groups, no UMI" - echo " sequences, alignments deduplicated according to alignment start and end" - echo " (after extending soft-clipped bases)" - echo "" - echo " --soloCBwhitelist" - echo " type: string, multiple values allowed" - echo " file(s) with whitelist(s) of cell barcodes. Only --soloType" - echo " CB_UMI_Complex allows more than one whitelist file." - echo " - None ... no whitelist: all cell barcodes are allowed" - echo "" - echo " --soloCBstart" - echo " type: integer" - echo " example: 1" - echo " cell barcode start base" - echo "" - echo " --soloCBlen" - echo " type: integer" - echo " example: 16" - echo " cell barcode length" - echo "" - echo " --soloUMIstart" - echo " type: integer" - echo " example: 17" - echo " UMI start base" - echo "" - echo " --soloUMIlen" - echo " type: integer" - echo " example: 10" - echo " UMI length" - echo "" - echo " --soloBarcodeReadLength" - echo " type: integer" - echo " example: 1" - echo " length of the barcode read" - echo " - 1 ... equal to sum of soloCBlen+soloUMIlen" - echo " - 0 ... not defined, do not check" - echo "" - echo " --soloBarcodeMate" - echo " type: integer" - echo " example: 0" - echo " identifies which read mate contains the barcode (CB+UMI) sequence" - echo " - 0 ... barcode sequence is on separate read, which should always be" - echo " the last file in the --readFilesIn listed" - echo " - 1 ... barcode sequence is a part of mate 1" - echo " - 2 ... barcode sequence is a part of mate 2" - echo "" - echo " --soloCBposition" - echo " type: string, multiple values allowed" - echo " position of Cell Barcode(s) on the barcode read." - echo " Presently only works with --soloType CB_UMI_Complex, and barcodes are" - echo " assumed to be on Read2." - echo " Format for each barcode: startAnchor_startPosition_endAnchor_endPosition" - echo " start(end)Anchor defines the Anchor Base for the CB: 0: read start; 1:" - echo " read end; 2: adapter start; 3: adapter end" - echo " start(end)Position is the 0-based position with of the CB start(end)" - echo " with respect to the Anchor Base" - echo " String for different barcodes are separated by space." - echo " Example: inDrop (Zilionis et al, Nat. Protocols, 2017):" - echo " --soloCBposition 0_0_2_-1 3_1_3_8" - echo "" - echo " --soloUMIposition" - echo " type: string" - echo " position of the UMI on the barcode read, same as soloCBposition" - echo " Example: inDrop (Zilionis et al, Nat. Protocols, 2017):" - echo " --soloCBposition 3_9_3_14" - echo "" - echo " --soloAdapterSequence" - echo " type: string" - echo " adapter sequence to anchor barcodes. Only one adapter sequence is" - echo " allowed." - echo "" - echo " --soloAdapterMismatchesNmax" - echo " type: integer" - echo " example: 1" - echo " maximum number of mismatches allowed in adapter sequence." - echo "" - echo " --soloCBmatchWLtype" - echo " type: string" - echo " example: 1MM_multi" - echo " matching the Cell Barcodes to the WhiteList" - echo " - Exact ... only exact matches allowed" - echo " - 1MM ... only one match in whitelist with 1" - echo " mismatched base allowed. Allowed CBs have to have at least one read with" - echo " exact match." - echo " - 1MM_multi ... multiple matches in whitelist with" - echo " 1 mismatched base allowed, posterior probability calculation is used" - echo " choose one of the matches." - echo " Allowed CBs have to have at least one read with exact match. This option" - echo " matches best with CellRanger 2.2.0" - echo " - 1MM_multi_pseudocounts ... same as 1MM_Multi, but" - echo " pseudocounts of 1 are added to all whitelist barcodes." - echo " - 1MM_multi_Nbase_pseudocounts ... same as 1MM_multi_pseudocounts," - echo " multimatching to WL is allowed for CBs with N-bases. This option matches" - echo " best with CellRanger >= 3.0.0" - echo " - EditDist_2 ... allow up to edit distance of 3 fpr" - echo " each of the barcodes. May include one deletion + one insertion. Only" - echo " works with --soloType CB_UMI_Complex. Matches to multiple passlist" - echo " barcdoes are not allowed. Similar to ParseBio Split-seq pipeline." - echo "" - echo " --soloInputSAMattrBarcodeSeq" - echo " type: string, multiple values allowed" - echo " when inputting reads from a SAM file (--readsFileType SAM SE/PE), these" - echo " SAM attributes mark the barcode sequence (in proper order)." - echo " For instance, for 10X CellRanger or STARsolo BAMs, use" - echo " --soloInputSAMattrBarcodeSeq CR UR ." - echo " This parameter is required when running STARsolo with input from SAM." - echo "" - echo " --soloInputSAMattrBarcodeQual" - echo " type: string, multiple values allowed" - echo " when inputting reads from a SAM file (--readsFileType SAM SE/PE), these" - echo " SAM attributes mark the barcode qualities (in proper order)." - echo " For instance, for 10X CellRanger or STARsolo BAMs, use" - echo " --soloInputSAMattrBarcodeQual CY UY ." - echo " If this parameter is '-' (default), the quality 'H' will be assigned to" - echo " all bases." - echo "" - echo " --soloStrand" - echo " type: string" - echo " example: Forward" - echo " strandedness of the solo libraries:" - echo " - Unstranded ... no strand information" - echo " - Forward ... read strand same as the original RNA molecule" - echo " - Reverse ... read strand opposite to the original RNA molecule" - echo "" - echo " --soloFeatures" - echo " type: string, multiple values allowed" - echo " example: Gene" - echo " genomic features for which the UMI counts per Cell Barcode are collected" - echo " - Gene ... genes: reads match the gene transcript" - echo " - SJ ... splice junctions: reported in SJ.out.tab" - echo " - GeneFull ... full gene (pre-mRNA): count all reads overlapping" - echo " genes' exons and introns" - echo " - GeneFull_ExonOverIntron ... full gene (pre-mRNA): count all reads" - echo " overlapping genes' exons and introns: prioritize 100% overlap with exons" - echo " - GeneFull_Ex50pAS ... full gene (pre-RNA): count all reads" - echo " overlapping genes' exons and introns: prioritize >50% overlap with" - echo " exons. Do not count reads with 100% exonic overlap in the antisense" - echo " direction." - echo "" - echo " --soloMultiMappers" - echo " type: string, multiple values allowed" - echo " example: Unique" - echo " counting method for reads mapping to multiple genes" - echo " - Unique ... count only reads that map to unique genes" - echo " - Uniform ... uniformly distribute multi-genic UMIs to all genes" - echo " - Rescue ... distribute UMIs proportionally to unique+uniform counts" - echo " (~ first iteration of EM)" - echo " - PropUnique ... distribute UMIs proportionally to unique mappers, if" - echo " present, and uniformly if not." - echo " - EM ... multi-gene UMIs are distributed using Expectation" - echo " Maximization algorithm" - echo "" - echo " --soloUMIdedup" - echo " type: string, multiple values allowed" - echo " example: 1MM_All" - echo " type of UMI deduplication (collapsing) algorithm" - echo " - 1MM_All ... all UMIs with 1 mismatch distance to" - echo " each other are collapsed (i.e. counted once)." - echo " - 1MM_Directional_UMItools ... follows the \"directional\" method from" - echo " the UMI-tools by Smith, Heger and Sudbery (Genome Research 2017)." - echo " - 1MM_Directional ... same as 1MM_Directional_UMItools, but" - echo " with more stringent criteria for duplicate UMIs" - echo " - Exact ... only exactly matching UMIs are" - echo " collapsed." - echo " - NoDedup ... no deduplication of UMIs, count all" - echo " reads." - echo " - 1MM_CR ... CellRanger2-4 algorithm for 1MM UMI" - echo " collapsing." - echo "" - echo " --soloUMIfiltering" - echo " type: string, multiple values allowed" - echo " type of UMI filtering (for reads uniquely mapping to genes)" - echo " - - ... basic filtering: remove UMIs with N and" - echo " homopolymers (similar to CellRanger 2.2.0)." - echo " - MultiGeneUMI ... basic + remove lower-count UMIs that map to" - echo " more than one gene." - echo " - MultiGeneUMI_All ... basic + remove all UMIs that map to more than" - echo " one gene." - echo " - MultiGeneUMI_CR ... basic + remove lower-count UMIs that map to" - echo " more than one gene, matching CellRanger > 3.0.0 ." - echo " Only works with --soloUMIdedup 1MM_CR" - echo "" - echo " --soloOutFileNames" - echo " type: string, multiple values allowed" - echo " example: Solo.out/;features.tsv;barcodes.tsv;matrix.mtx" - echo " file names for STARsolo output:" - echo " file_name_prefix gene_names barcode_sequences" - echo " cell_feature_count_matrix" - echo "" - echo " --soloCellFilter" - echo " type: string, multiple values allowed" - echo " example: CellRanger2.2;3000;0.99;10" - echo " cell filtering type and parameters" - echo " - None ... do not output filtered cells" - echo " - TopCells ... only report top cells by UMI count, followed by" - echo " the exact number of cells" - echo " - CellRanger2.2 ... simple filtering of CellRanger 2.2." - echo " Can be followed by numbers: number of expected cells, robust maximum" - echo " percentile for UMI count, maximum to minimum ratio for UMI count" - echo " The harcoded values are from CellRanger: nExpectedCells=3000;" - echo " maxPercentile=0.99; maxMinRatio=10" - echo " - EmptyDrops_CR ... EmptyDrops filtering in CellRanger flavor. Please" - echo " cite the original EmptyDrops paper: A.T.L Lun et al, Genome Biology, 20," - echo " 63 (2019):" - echo " " - echo "https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1662-y" - echo " Can be followed by 10 numeric parameters: nExpectedCells" - echo " maxPercentile maxMinRatio indMin indMax umiMin" - echo " umiMinFracMedian candMaxN FDR simN" - echo " The harcoded values are from CellRanger: 3000" - echo " 0.99 10 45000 90000 500 0.01" - echo " 20000 0.01 10000" - echo "" - echo " --soloOutFormatFeaturesGeneField3" - echo " type: string, multiple values allowed" - echo " example: Gene Expression" - echo " field 3 in the Gene features.tsv file. If \"-\", then no 3rd field is" - echo " output." - echo "" - echo " --soloCellReadStats" - echo " type: string" - echo " Output reads statistics for each CB" - echo " - Standard ... standard output" - echo "" - echo "HTSeq arguments:" - echo " -s, --stranded" - echo " type: string" - echo " default: yes" - echo " choices: [ yes, no, reverse ]" - echo " Whether the data is from a strand-specific assay. 'reverse' means 'yes'" - echo " with reversed strand interpretation." - echo "" - echo " -a, --minaqual, --minimum_alignment_quality" - echo " type: integer" - echo " default: 10" - echo " Skip all reads with MAPQ alignment quality lower than the given minimum" - echo " value." - echo " MAPQ is the 5th column of a SAM/BAM file and its usage depends on the" - echo " software" - echo " used to map the reads." - echo "" - echo " -t, --type" - echo " type: string" - echo " example: exon" - echo " Feature type (3rd column in GTF file) to be used, all features of other" - echo " type are ignored (default, suitable for Ensembl GTF files: exon)" - echo "" - echo " -i, --id_attribute" - echo " type: string, multiple values allowed" - echo " example: gene_id" - echo " GTF attribute to be used as feature ID (default, suitable for Ensembl" - echo " GTF files: gene_id)." - echo " All feature of the right type (see -t option) within the same GTF" - echo " attribute will be added" - echo " together. The typical way of using this option is to count all exonic" - echo " reads from each gene" - echo " and add the exons but other uses are possible as well. You can call this" - echo " option multiple" - echo " times: in that case, the combination of all attributes separated by" - echo " colons (:) will be used" - echo " as a unique identifier, e.g. for exons you might use -i gene_id -i" - echo " exon_number." - echo "" - echo " --additional_attributes" - echo " type: string, multiple values allowed" - echo " example: gene_name" - echo " Additional feature attributes (suitable for Ensembl GTF files:" - echo " gene_name). Use multiple times" - echo " for more than one additional attribute. These attributes are only used" - echo " as annotations in the" - echo " output, while the determination of how the counts are added together is" - echo " done based on option -i." - echo "" - echo " --add_chromosome_info" - echo " type: boolean_true" - echo " Store information about the chromosome of each feature as an additional" - echo " attribute" - echo " (e.g. colunm in the TSV output file)." - echo "" - echo " -m, --mode" - echo " type: string" - echo " default: union" - echo " choices: [ union, intersection-strict, intersection-nonempty ]" - echo " Mode to handle reads overlapping more than one feature." - echo "" - echo " --non_unique" - echo " type: string" - echo " default: none" - echo " choices: [ none, all, fraction, random ]" - echo " Whether and how to score reads that are not uniquely aligned or" - echo " ambiguously assigned to features." - echo "" - echo " --secondary_alignments" - echo " type: string" - echo " choices: [ score, ignore ]" - echo " Whether to score secondary alignments (0x100 flag)." - echo "" - echo " --supplementary_alignments" - echo " type: string" - echo " choices: [ score, ignore ]" - echo " Whether to score supplementary alignments (0x800 flag)." - echo "" - echo " --counts_output_sparse" - echo " type: boolean_true" - echo " Store the counts as a sparse matrix (mtx, h5ad, loom)." -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM python:3.10-slim - -ENTRYPOINT [] - - -ENV STAR_VERSION 2.7.10b -ENV PACKAGES gcc g++ make wget zlib1g-dev unzip -RUN apt-get update && \ - apt-get install -y --no-install-recommends ${PACKAGES} && \ - cd /tmp && \ - wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip && \ - unzip ${STAR_VERSION}.zip && \ - cd STAR-${STAR_VERSION}/source && \ - make STARstatic CXXFLAGS_SIMD=-std=c++11 && \ - cp STAR /usr/local/bin && \ - cd / && \ - rm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip && \ - apt-get --purge autoremove -y ${PACKAGES} && \ - apt-get clean - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y samtools procps && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "pyyaml" "HTSeq" "multiprocess" "gtfparse<2.0" "pandas" "multiqc~=1.15.0" - -LABEL org.opencontainers.image.authors="Angela Oliveira Pisco, Robrecht Cannoodt" -LABEL org.opencontainers.image.description="Companion container for running component mapping multi_star" -LABEL org.opencontainers.image.created="2024-01-25T10:13:56Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-multi_star-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "multi_star 0.12.3" - exit - ;; - --input_id) - if [ -z "$VIASH_PAR_INPUT_ID" ]; then - VIASH_PAR_INPUT_ID="$2" - else - VIASH_PAR_INPUT_ID="$VIASH_PAR_INPUT_ID;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_id. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input_id=*) - if [ -z "$VIASH_PAR_INPUT_ID" ]; then - VIASH_PAR_INPUT_ID=$(ViashRemoveFlags "$1") - else - VIASH_PAR_INPUT_ID="$VIASH_PAR_INPUT_ID;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --input_r1) - if [ -z "$VIASH_PAR_INPUT_R1" ]; then - VIASH_PAR_INPUT_R1="$2" - else - VIASH_PAR_INPUT_R1="$VIASH_PAR_INPUT_R1;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_r1. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input_r1=*) - if [ -z "$VIASH_PAR_INPUT_R1" ]; then - VIASH_PAR_INPUT_R1=$(ViashRemoveFlags "$1") - else - VIASH_PAR_INPUT_R1="$VIASH_PAR_INPUT_R1;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --input_r2) - if [ -z "$VIASH_PAR_INPUT_R2" ]; then - VIASH_PAR_INPUT_R2="$2" - else - VIASH_PAR_INPUT_R2="$VIASH_PAR_INPUT_R2;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_r2. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input_r2=*) - if [ -z "$VIASH_PAR_INPUT_R2" ]; then - VIASH_PAR_INPUT_R2=$(ViashRemoveFlags "$1") - else - VIASH_PAR_INPUT_R2="$VIASH_PAR_INPUT_R2;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --reference_index) - [ -n "$VIASH_PAR_REFERENCE_INDEX" ] && ViashError Bad arguments for option \'--reference_index\': \'$VIASH_PAR_REFERENCE_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE_INDEX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference_index. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reference_index=*) - [ -n "$VIASH_PAR_REFERENCE_INDEX" ] && ViashError Bad arguments for option \'--reference_index=*\': \'$VIASH_PAR_REFERENCE_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE_INDEX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --genomeDir) - [ -n "$VIASH_PAR_REFERENCE_INDEX" ] && ViashError Bad arguments for option \'--genomeDir\': \'$VIASH_PAR_REFERENCE_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE_INDEX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --genomeDir. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reference_gtf) - [ -n "$VIASH_PAR_REFERENCE_GTF" ] && ViashError Bad arguments for option \'--reference_gtf\': \'$VIASH_PAR_REFERENCE_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE_GTF="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference_gtf. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reference_gtf=*) - [ -n "$VIASH_PAR_REFERENCE_GTF" ] && ViashError Bad arguments for option \'--reference_gtf=*\': \'$VIASH_PAR_REFERENCE_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE_GTF=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outFileNamePrefix) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--outFileNamePrefix\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFileNamePrefix. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --run_htseq_count) - [ -n "$VIASH_PAR_RUN_HTSEQ_COUNT" ] && ViashError Bad arguments for option \'--run_htseq_count\': \'$VIASH_PAR_RUN_HTSEQ_COUNT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_RUN_HTSEQ_COUNT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --run_htseq_count. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --run_htseq_count=*) - [ -n "$VIASH_PAR_RUN_HTSEQ_COUNT" ] && ViashError Bad arguments for option \'--run_htseq_count=*\': \'$VIASH_PAR_RUN_HTSEQ_COUNT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_RUN_HTSEQ_COUNT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --run_multiqc) - [ -n "$VIASH_PAR_RUN_MULTIQC" ] && ViashError Bad arguments for option \'--run_multiqc\': \'$VIASH_PAR_RUN_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_RUN_MULTIQC="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --run_multiqc. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --run_multiqc=*) - [ -n "$VIASH_PAR_RUN_MULTIQC" ] && ViashError Bad arguments for option \'--run_multiqc=*\': \'$VIASH_PAR_RUN_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_RUN_MULTIQC=$(ViashRemoveFlags "$1") - shift 1 - ;; - --min_success_rate) - [ -n "$VIASH_PAR_MIN_SUCCESS_RATE" ] && ViashError Bad arguments for option \'--min_success_rate\': \'$VIASH_PAR_MIN_SUCCESS_RATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_SUCCESS_RATE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_success_rate. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --min_success_rate=*) - [ -n "$VIASH_PAR_MIN_SUCCESS_RATE" ] && ViashError Bad arguments for option \'--min_success_rate=*\': \'$VIASH_PAR_MIN_SUCCESS_RATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_SUCCESS_RATE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --runRNGseed) - [ -n "$VIASH_PAR_RUNRNGSEED" ] && ViashError Bad arguments for option \'--runRNGseed\': \'$VIASH_PAR_RUNRNGSEED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_RUNRNGSEED="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --runRNGseed. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --runRNGseed=*) - [ -n "$VIASH_PAR_RUNRNGSEED" ] && ViashError Bad arguments for option \'--runRNGseed=*\': \'$VIASH_PAR_RUNRNGSEED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_RUNRNGSEED=$(ViashRemoveFlags "$1") - shift 1 - ;; - --genomeFastaFiles) - if [ -z "$VIASH_PAR_GENOMEFASTAFILES" ]; then - VIASH_PAR_GENOMEFASTAFILES="$2" - else - VIASH_PAR_GENOMEFASTAFILES="$VIASH_PAR_GENOMEFASTAFILES;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --genomeFastaFiles. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --genomeFastaFiles=*) - if [ -z "$VIASH_PAR_GENOMEFASTAFILES" ]; then - VIASH_PAR_GENOMEFASTAFILES=$(ViashRemoveFlags "$1") - else - VIASH_PAR_GENOMEFASTAFILES="$VIASH_PAR_GENOMEFASTAFILES;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --sjdbFileChrStartEnd) - if [ -z "$VIASH_PAR_SJDBFILECHRSTARTEND" ]; then - VIASH_PAR_SJDBFILECHRSTARTEND="$2" - else - VIASH_PAR_SJDBFILECHRSTARTEND="$VIASH_PAR_SJDBFILECHRSTARTEND;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbFileChrStartEnd. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --sjdbFileChrStartEnd=*) - if [ -z "$VIASH_PAR_SJDBFILECHRSTARTEND" ]; then - VIASH_PAR_SJDBFILECHRSTARTEND=$(ViashRemoveFlags "$1") - else - VIASH_PAR_SJDBFILECHRSTARTEND="$VIASH_PAR_SJDBFILECHRSTARTEND;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --sjdbGTFfile) - [ -n "$VIASH_PAR_SJDBGTFFILE" ] && ViashError Bad arguments for option \'--sjdbGTFfile\': \'$VIASH_PAR_SJDBGTFFILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SJDBGTFFILE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbGTFfile. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --sjdbGTFfile=*) - [ -n "$VIASH_PAR_SJDBGTFFILE" ] && ViashError Bad arguments for option \'--sjdbGTFfile=*\': \'$VIASH_PAR_SJDBGTFFILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SJDBGTFFILE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --sjdbGTFchrPrefix) - [ -n "$VIASH_PAR_SJDBGTFCHRPREFIX" ] && ViashError Bad arguments for option \'--sjdbGTFchrPrefix\': \'$VIASH_PAR_SJDBGTFCHRPREFIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SJDBGTFCHRPREFIX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbGTFchrPrefix. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --sjdbGTFchrPrefix=*) - [ -n "$VIASH_PAR_SJDBGTFCHRPREFIX" ] && ViashError Bad arguments for option \'--sjdbGTFchrPrefix=*\': \'$VIASH_PAR_SJDBGTFCHRPREFIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SJDBGTFCHRPREFIX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --sjdbGTFfeatureExon) - [ -n "$VIASH_PAR_SJDBGTFFEATUREEXON" ] && ViashError Bad arguments for option \'--sjdbGTFfeatureExon\': \'$VIASH_PAR_SJDBGTFFEATUREEXON\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SJDBGTFFEATUREEXON="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbGTFfeatureExon. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --sjdbGTFfeatureExon=*) - [ -n "$VIASH_PAR_SJDBGTFFEATUREEXON" ] && ViashError Bad arguments for option \'--sjdbGTFfeatureExon=*\': \'$VIASH_PAR_SJDBGTFFEATUREEXON\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SJDBGTFFEATUREEXON=$(ViashRemoveFlags "$1") - shift 1 - ;; - --sjdbGTFtagExonParentTranscript) - [ -n "$VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT" ] && ViashError Bad arguments for option \'--sjdbGTFtagExonParentTranscript\': \'$VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbGTFtagExonParentTranscript. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --sjdbGTFtagExonParentTranscript=*) - [ -n "$VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT" ] && ViashError Bad arguments for option \'--sjdbGTFtagExonParentTranscript=*\': \'$VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --sjdbGTFtagExonParentGene) - [ -n "$VIASH_PAR_SJDBGTFTAGEXONPARENTGENE" ] && ViashError Bad arguments for option \'--sjdbGTFtagExonParentGene\': \'$VIASH_PAR_SJDBGTFTAGEXONPARENTGENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SJDBGTFTAGEXONPARENTGENE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbGTFtagExonParentGene. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --sjdbGTFtagExonParentGene=*) - [ -n "$VIASH_PAR_SJDBGTFTAGEXONPARENTGENE" ] && ViashError Bad arguments for option \'--sjdbGTFtagExonParentGene=*\': \'$VIASH_PAR_SJDBGTFTAGEXONPARENTGENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SJDBGTFTAGEXONPARENTGENE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --sjdbGTFtagExonParentGeneName) - if [ -z "$VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME" ]; then - VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME="$2" - else - VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME="$VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbGTFtagExonParentGeneName. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --sjdbGTFtagExonParentGeneName=*) - if [ -z "$VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME" ]; then - VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME=$(ViashRemoveFlags "$1") - else - VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME="$VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --sjdbGTFtagExonParentGeneType) - if [ -z "$VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE" ]; then - VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE="$2" - else - VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE="$VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbGTFtagExonParentGeneType. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --sjdbGTFtagExonParentGeneType=*) - if [ -z "$VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE" ]; then - VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE=$(ViashRemoveFlags "$1") - else - VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE="$VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --sjdbOverhang) - [ -n "$VIASH_PAR_SJDBOVERHANG" ] && ViashError Bad arguments for option \'--sjdbOverhang\': \'$VIASH_PAR_SJDBOVERHANG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SJDBOVERHANG="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbOverhang. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --sjdbOverhang=*) - [ -n "$VIASH_PAR_SJDBOVERHANG" ] && ViashError Bad arguments for option \'--sjdbOverhang=*\': \'$VIASH_PAR_SJDBOVERHANG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SJDBOVERHANG=$(ViashRemoveFlags "$1") - shift 1 - ;; - --sjdbScore) - [ -n "$VIASH_PAR_SJDBSCORE" ] && ViashError Bad arguments for option \'--sjdbScore\': \'$VIASH_PAR_SJDBSCORE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SJDBSCORE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbScore. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --sjdbScore=*) - [ -n "$VIASH_PAR_SJDBSCORE" ] && ViashError Bad arguments for option \'--sjdbScore=*\': \'$VIASH_PAR_SJDBSCORE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SJDBSCORE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --sjdbInsertSave) - [ -n "$VIASH_PAR_SJDBINSERTSAVE" ] && ViashError Bad arguments for option \'--sjdbInsertSave\': \'$VIASH_PAR_SJDBINSERTSAVE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SJDBINSERTSAVE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbInsertSave. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --sjdbInsertSave=*) - [ -n "$VIASH_PAR_SJDBINSERTSAVE" ] && ViashError Bad arguments for option \'--sjdbInsertSave=*\': \'$VIASH_PAR_SJDBINSERTSAVE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SJDBINSERTSAVE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --varVCFfile) - [ -n "$VIASH_PAR_VARVCFFILE" ] && ViashError Bad arguments for option \'--varVCFfile\': \'$VIASH_PAR_VARVCFFILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_VARVCFFILE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --varVCFfile. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --varVCFfile=*) - [ -n "$VIASH_PAR_VARVCFFILE" ] && ViashError Bad arguments for option \'--varVCFfile=*\': \'$VIASH_PAR_VARVCFFILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_VARVCFFILE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --readFilesType) - [ -n "$VIASH_PAR_READFILESTYPE" ] && ViashError Bad arguments for option \'--readFilesType\': \'$VIASH_PAR_READFILESTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_READFILESTYPE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --readFilesType. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --readFilesType=*) - [ -n "$VIASH_PAR_READFILESTYPE" ] && ViashError Bad arguments for option \'--readFilesType=*\': \'$VIASH_PAR_READFILESTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_READFILESTYPE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --readFilesSAMattrKeep) - if [ -z "$VIASH_PAR_READFILESSAMATTRKEEP" ]; then - VIASH_PAR_READFILESSAMATTRKEEP="$2" - else - VIASH_PAR_READFILESSAMATTRKEEP="$VIASH_PAR_READFILESSAMATTRKEEP;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --readFilesSAMattrKeep. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --readFilesSAMattrKeep=*) - if [ -z "$VIASH_PAR_READFILESSAMATTRKEEP" ]; then - VIASH_PAR_READFILESSAMATTRKEEP=$(ViashRemoveFlags "$1") - else - VIASH_PAR_READFILESSAMATTRKEEP="$VIASH_PAR_READFILESSAMATTRKEEP;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --readFilesManifest) - [ -n "$VIASH_PAR_READFILESMANIFEST" ] && ViashError Bad arguments for option \'--readFilesManifest\': \'$VIASH_PAR_READFILESMANIFEST\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_READFILESMANIFEST="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --readFilesManifest. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --readFilesManifest=*) - [ -n "$VIASH_PAR_READFILESMANIFEST" ] && ViashError Bad arguments for option \'--readFilesManifest=*\': \'$VIASH_PAR_READFILESMANIFEST\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_READFILESMANIFEST=$(ViashRemoveFlags "$1") - shift 1 - ;; - --readFilesPrefix) - [ -n "$VIASH_PAR_READFILESPREFIX" ] && ViashError Bad arguments for option \'--readFilesPrefix\': \'$VIASH_PAR_READFILESPREFIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_READFILESPREFIX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --readFilesPrefix. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --readFilesPrefix=*) - [ -n "$VIASH_PAR_READFILESPREFIX" ] && ViashError Bad arguments for option \'--readFilesPrefix=*\': \'$VIASH_PAR_READFILESPREFIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_READFILESPREFIX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --readFilesCommand) - if [ -z "$VIASH_PAR_READFILESCOMMAND" ]; then - VIASH_PAR_READFILESCOMMAND="$2" - else - VIASH_PAR_READFILESCOMMAND="$VIASH_PAR_READFILESCOMMAND;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --readFilesCommand. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --readFilesCommand=*) - if [ -z "$VIASH_PAR_READFILESCOMMAND" ]; then - VIASH_PAR_READFILESCOMMAND=$(ViashRemoveFlags "$1") - else - VIASH_PAR_READFILESCOMMAND="$VIASH_PAR_READFILESCOMMAND;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --readMapNumber) - [ -n "$VIASH_PAR_READMAPNUMBER" ] && ViashError Bad arguments for option \'--readMapNumber\': \'$VIASH_PAR_READMAPNUMBER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_READMAPNUMBER="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --readMapNumber. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --readMapNumber=*) - [ -n "$VIASH_PAR_READMAPNUMBER" ] && ViashError Bad arguments for option \'--readMapNumber=*\': \'$VIASH_PAR_READMAPNUMBER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_READMAPNUMBER=$(ViashRemoveFlags "$1") - shift 1 - ;; - --readMatesLengthsIn) - [ -n "$VIASH_PAR_READMATESLENGTHSIN" ] && ViashError Bad arguments for option \'--readMatesLengthsIn\': \'$VIASH_PAR_READMATESLENGTHSIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_READMATESLENGTHSIN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --readMatesLengthsIn. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --readMatesLengthsIn=*) - [ -n "$VIASH_PAR_READMATESLENGTHSIN" ] && ViashError Bad arguments for option \'--readMatesLengthsIn=*\': \'$VIASH_PAR_READMATESLENGTHSIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_READMATESLENGTHSIN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --readNameSeparator) - if [ -z "$VIASH_PAR_READNAMESEPARATOR" ]; then - VIASH_PAR_READNAMESEPARATOR="$2" - else - VIASH_PAR_READNAMESEPARATOR="$VIASH_PAR_READNAMESEPARATOR;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --readNameSeparator. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --readNameSeparator=*) - if [ -z "$VIASH_PAR_READNAMESEPARATOR" ]; then - VIASH_PAR_READNAMESEPARATOR=$(ViashRemoveFlags "$1") - else - VIASH_PAR_READNAMESEPARATOR="$VIASH_PAR_READNAMESEPARATOR;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --readQualityScoreBase) - [ -n "$VIASH_PAR_READQUALITYSCOREBASE" ] && ViashError Bad arguments for option \'--readQualityScoreBase\': \'$VIASH_PAR_READQUALITYSCOREBASE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_READQUALITYSCOREBASE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --readQualityScoreBase. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --readQualityScoreBase=*) - [ -n "$VIASH_PAR_READQUALITYSCOREBASE" ] && ViashError Bad arguments for option \'--readQualityScoreBase=*\': \'$VIASH_PAR_READQUALITYSCOREBASE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_READQUALITYSCOREBASE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --clipAdapterType) - [ -n "$VIASH_PAR_CLIPADAPTERTYPE" ] && ViashError Bad arguments for option \'--clipAdapterType\': \'$VIASH_PAR_CLIPADAPTERTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CLIPADAPTERTYPE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --clipAdapterType. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --clipAdapterType=*) - [ -n "$VIASH_PAR_CLIPADAPTERTYPE" ] && ViashError Bad arguments for option \'--clipAdapterType=*\': \'$VIASH_PAR_CLIPADAPTERTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CLIPADAPTERTYPE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --clip3pNbases) - if [ -z "$VIASH_PAR_CLIP3PNBASES" ]; then - VIASH_PAR_CLIP3PNBASES="$2" - else - VIASH_PAR_CLIP3PNBASES="$VIASH_PAR_CLIP3PNBASES;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --clip3pNbases. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --clip3pNbases=*) - if [ -z "$VIASH_PAR_CLIP3PNBASES" ]; then - VIASH_PAR_CLIP3PNBASES=$(ViashRemoveFlags "$1") - else - VIASH_PAR_CLIP3PNBASES="$VIASH_PAR_CLIP3PNBASES;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --clip3pAdapterSeq) - if [ -z "$VIASH_PAR_CLIP3PADAPTERSEQ" ]; then - VIASH_PAR_CLIP3PADAPTERSEQ="$2" - else - VIASH_PAR_CLIP3PADAPTERSEQ="$VIASH_PAR_CLIP3PADAPTERSEQ;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --clip3pAdapterSeq. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --clip3pAdapterSeq=*) - if [ -z "$VIASH_PAR_CLIP3PADAPTERSEQ" ]; then - VIASH_PAR_CLIP3PADAPTERSEQ=$(ViashRemoveFlags "$1") - else - VIASH_PAR_CLIP3PADAPTERSEQ="$VIASH_PAR_CLIP3PADAPTERSEQ;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --clip3pAdapterMMp) - if [ -z "$VIASH_PAR_CLIP3PADAPTERMMP" ]; then - VIASH_PAR_CLIP3PADAPTERMMP="$2" - else - VIASH_PAR_CLIP3PADAPTERMMP="$VIASH_PAR_CLIP3PADAPTERMMP;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --clip3pAdapterMMp. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --clip3pAdapterMMp=*) - if [ -z "$VIASH_PAR_CLIP3PADAPTERMMP" ]; then - VIASH_PAR_CLIP3PADAPTERMMP=$(ViashRemoveFlags "$1") - else - VIASH_PAR_CLIP3PADAPTERMMP="$VIASH_PAR_CLIP3PADAPTERMMP;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --clip3pAfterAdapterNbases) - if [ -z "$VIASH_PAR_CLIP3PAFTERADAPTERNBASES" ]; then - VIASH_PAR_CLIP3PAFTERADAPTERNBASES="$2" - else - VIASH_PAR_CLIP3PAFTERADAPTERNBASES="$VIASH_PAR_CLIP3PAFTERADAPTERNBASES;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --clip3pAfterAdapterNbases. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --clip3pAfterAdapterNbases=*) - if [ -z "$VIASH_PAR_CLIP3PAFTERADAPTERNBASES" ]; then - VIASH_PAR_CLIP3PAFTERADAPTERNBASES=$(ViashRemoveFlags "$1") - else - VIASH_PAR_CLIP3PAFTERADAPTERNBASES="$VIASH_PAR_CLIP3PAFTERADAPTERNBASES;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --clip5pNbases) - if [ -z "$VIASH_PAR_CLIP5PNBASES" ]; then - VIASH_PAR_CLIP5PNBASES="$2" - else - VIASH_PAR_CLIP5PNBASES="$VIASH_PAR_CLIP5PNBASES;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --clip5pNbases. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --clip5pNbases=*) - if [ -z "$VIASH_PAR_CLIP5PNBASES" ]; then - VIASH_PAR_CLIP5PNBASES=$(ViashRemoveFlags "$1") - else - VIASH_PAR_CLIP5PNBASES="$VIASH_PAR_CLIP5PNBASES;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --limitGenomeGenerateRAM) - [ -n "$VIASH_PAR_LIMITGENOMEGENERATERAM" ] && ViashError Bad arguments for option \'--limitGenomeGenerateRAM\': \'$VIASH_PAR_LIMITGENOMEGENERATERAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LIMITGENOMEGENERATERAM="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --limitGenomeGenerateRAM. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --limitGenomeGenerateRAM=*) - [ -n "$VIASH_PAR_LIMITGENOMEGENERATERAM" ] && ViashError Bad arguments for option \'--limitGenomeGenerateRAM=*\': \'$VIASH_PAR_LIMITGENOMEGENERATERAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LIMITGENOMEGENERATERAM=$(ViashRemoveFlags "$1") - shift 1 - ;; - --limitIObufferSize) - if [ -z "$VIASH_PAR_LIMITIOBUFFERSIZE" ]; then - VIASH_PAR_LIMITIOBUFFERSIZE="$2" - else - VIASH_PAR_LIMITIOBUFFERSIZE="$VIASH_PAR_LIMITIOBUFFERSIZE;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --limitIObufferSize. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --limitIObufferSize=*) - if [ -z "$VIASH_PAR_LIMITIOBUFFERSIZE" ]; then - VIASH_PAR_LIMITIOBUFFERSIZE=$(ViashRemoveFlags "$1") - else - VIASH_PAR_LIMITIOBUFFERSIZE="$VIASH_PAR_LIMITIOBUFFERSIZE;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --limitOutSAMoneReadBytes) - [ -n "$VIASH_PAR_LIMITOUTSAMONEREADBYTES" ] && ViashError Bad arguments for option \'--limitOutSAMoneReadBytes\': \'$VIASH_PAR_LIMITOUTSAMONEREADBYTES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LIMITOUTSAMONEREADBYTES="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --limitOutSAMoneReadBytes. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --limitOutSAMoneReadBytes=*) - [ -n "$VIASH_PAR_LIMITOUTSAMONEREADBYTES" ] && ViashError Bad arguments for option \'--limitOutSAMoneReadBytes=*\': \'$VIASH_PAR_LIMITOUTSAMONEREADBYTES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LIMITOUTSAMONEREADBYTES=$(ViashRemoveFlags "$1") - shift 1 - ;; - --limitOutSJoneRead) - [ -n "$VIASH_PAR_LIMITOUTSJONEREAD" ] && ViashError Bad arguments for option \'--limitOutSJoneRead\': \'$VIASH_PAR_LIMITOUTSJONEREAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LIMITOUTSJONEREAD="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --limitOutSJoneRead. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --limitOutSJoneRead=*) - [ -n "$VIASH_PAR_LIMITOUTSJONEREAD" ] && ViashError Bad arguments for option \'--limitOutSJoneRead=*\': \'$VIASH_PAR_LIMITOUTSJONEREAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LIMITOUTSJONEREAD=$(ViashRemoveFlags "$1") - shift 1 - ;; - --limitOutSJcollapsed) - [ -n "$VIASH_PAR_LIMITOUTSJCOLLAPSED" ] && ViashError Bad arguments for option \'--limitOutSJcollapsed\': \'$VIASH_PAR_LIMITOUTSJCOLLAPSED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LIMITOUTSJCOLLAPSED="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --limitOutSJcollapsed. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --limitOutSJcollapsed=*) - [ -n "$VIASH_PAR_LIMITOUTSJCOLLAPSED" ] && ViashError Bad arguments for option \'--limitOutSJcollapsed=*\': \'$VIASH_PAR_LIMITOUTSJCOLLAPSED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LIMITOUTSJCOLLAPSED=$(ViashRemoveFlags "$1") - shift 1 - ;; - --limitBAMsortRAM) - [ -n "$VIASH_PAR_LIMITBAMSORTRAM" ] && ViashError Bad arguments for option \'--limitBAMsortRAM\': \'$VIASH_PAR_LIMITBAMSORTRAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LIMITBAMSORTRAM="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --limitBAMsortRAM. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --limitBAMsortRAM=*) - [ -n "$VIASH_PAR_LIMITBAMSORTRAM" ] && ViashError Bad arguments for option \'--limitBAMsortRAM=*\': \'$VIASH_PAR_LIMITBAMSORTRAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LIMITBAMSORTRAM=$(ViashRemoveFlags "$1") - shift 1 - ;; - --limitSjdbInsertNsj) - [ -n "$VIASH_PAR_LIMITSJDBINSERTNSJ" ] && ViashError Bad arguments for option \'--limitSjdbInsertNsj\': \'$VIASH_PAR_LIMITSJDBINSERTNSJ\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LIMITSJDBINSERTNSJ="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --limitSjdbInsertNsj. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --limitSjdbInsertNsj=*) - [ -n "$VIASH_PAR_LIMITSJDBINSERTNSJ" ] && ViashError Bad arguments for option \'--limitSjdbInsertNsj=*\': \'$VIASH_PAR_LIMITSJDBINSERTNSJ\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LIMITSJDBINSERTNSJ=$(ViashRemoveFlags "$1") - shift 1 - ;; - --limitNreadsSoft) - [ -n "$VIASH_PAR_LIMITNREADSSOFT" ] && ViashError Bad arguments for option \'--limitNreadsSoft\': \'$VIASH_PAR_LIMITNREADSSOFT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LIMITNREADSSOFT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --limitNreadsSoft. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --limitNreadsSoft=*) - [ -n "$VIASH_PAR_LIMITNREADSSOFT" ] && ViashError Bad arguments for option \'--limitNreadsSoft=*\': \'$VIASH_PAR_LIMITNREADSSOFT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LIMITNREADSSOFT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outTmpKeep) - [ -n "$VIASH_PAR_OUTTMPKEEP" ] && ViashError Bad arguments for option \'--outTmpKeep\': \'$VIASH_PAR_OUTTMPKEEP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTTMPKEEP="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outTmpKeep. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outTmpKeep=*) - [ -n "$VIASH_PAR_OUTTMPKEEP" ] && ViashError Bad arguments for option \'--outTmpKeep=*\': \'$VIASH_PAR_OUTTMPKEEP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTTMPKEEP=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outStd) - [ -n "$VIASH_PAR_OUTSTD" ] && ViashError Bad arguments for option \'--outStd\': \'$VIASH_PAR_OUTSTD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSTD="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outStd. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outStd=*) - [ -n "$VIASH_PAR_OUTSTD" ] && ViashError Bad arguments for option \'--outStd=*\': \'$VIASH_PAR_OUTSTD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSTD=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outReadsUnmapped) - [ -n "$VIASH_PAR_OUTREADSUNMAPPED" ] && ViashError Bad arguments for option \'--outReadsUnmapped\': \'$VIASH_PAR_OUTREADSUNMAPPED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTREADSUNMAPPED="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outReadsUnmapped. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outReadsUnmapped=*) - [ -n "$VIASH_PAR_OUTREADSUNMAPPED" ] && ViashError Bad arguments for option \'--outReadsUnmapped=*\': \'$VIASH_PAR_OUTREADSUNMAPPED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTREADSUNMAPPED=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outQSconversionAdd) - [ -n "$VIASH_PAR_OUTQSCONVERSIONADD" ] && ViashError Bad arguments for option \'--outQSconversionAdd\': \'$VIASH_PAR_OUTQSCONVERSIONADD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTQSCONVERSIONADD="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outQSconversionAdd. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outQSconversionAdd=*) - [ -n "$VIASH_PAR_OUTQSCONVERSIONADD" ] && ViashError Bad arguments for option \'--outQSconversionAdd=*\': \'$VIASH_PAR_OUTQSCONVERSIONADD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTQSCONVERSIONADD=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outMultimapperOrder) - [ -n "$VIASH_PAR_OUTMULTIMAPPERORDER" ] && ViashError Bad arguments for option \'--outMultimapperOrder\': \'$VIASH_PAR_OUTMULTIMAPPERORDER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTMULTIMAPPERORDER="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outMultimapperOrder. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outMultimapperOrder=*) - [ -n "$VIASH_PAR_OUTMULTIMAPPERORDER" ] && ViashError Bad arguments for option \'--outMultimapperOrder=*\': \'$VIASH_PAR_OUTMULTIMAPPERORDER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTMULTIMAPPERORDER=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outSAMmode) - [ -n "$VIASH_PAR_OUTSAMMODE" ] && ViashError Bad arguments for option \'--outSAMmode\': \'$VIASH_PAR_OUTSAMMODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMMODE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMmode. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMmode=*) - [ -n "$VIASH_PAR_OUTSAMMODE" ] && ViashError Bad arguments for option \'--outSAMmode=*\': \'$VIASH_PAR_OUTSAMMODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMMODE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outSAMstrandField) - [ -n "$VIASH_PAR_OUTSAMSTRANDFIELD" ] && ViashError Bad arguments for option \'--outSAMstrandField\': \'$VIASH_PAR_OUTSAMSTRANDFIELD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMSTRANDFIELD="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMstrandField. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMstrandField=*) - [ -n "$VIASH_PAR_OUTSAMSTRANDFIELD" ] && ViashError Bad arguments for option \'--outSAMstrandField=*\': \'$VIASH_PAR_OUTSAMSTRANDFIELD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMSTRANDFIELD=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outSAMattributes) - if [ -z "$VIASH_PAR_OUTSAMATTRIBUTES" ]; then - VIASH_PAR_OUTSAMATTRIBUTES="$2" - else - VIASH_PAR_OUTSAMATTRIBUTES="$VIASH_PAR_OUTSAMATTRIBUTES;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMattributes. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMattributes=*) - if [ -z "$VIASH_PAR_OUTSAMATTRIBUTES" ]; then - VIASH_PAR_OUTSAMATTRIBUTES=$(ViashRemoveFlags "$1") - else - VIASH_PAR_OUTSAMATTRIBUTES="$VIASH_PAR_OUTSAMATTRIBUTES;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --outSAMattrIHstart) - [ -n "$VIASH_PAR_OUTSAMATTRIHSTART" ] && ViashError Bad arguments for option \'--outSAMattrIHstart\': \'$VIASH_PAR_OUTSAMATTRIHSTART\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMATTRIHSTART="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMattrIHstart. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMattrIHstart=*) - [ -n "$VIASH_PAR_OUTSAMATTRIHSTART" ] && ViashError Bad arguments for option \'--outSAMattrIHstart=*\': \'$VIASH_PAR_OUTSAMATTRIHSTART\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMATTRIHSTART=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outSAMunmapped) - if [ -z "$VIASH_PAR_OUTSAMUNMAPPED" ]; then - VIASH_PAR_OUTSAMUNMAPPED="$2" - else - VIASH_PAR_OUTSAMUNMAPPED="$VIASH_PAR_OUTSAMUNMAPPED;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMunmapped. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMunmapped=*) - if [ -z "$VIASH_PAR_OUTSAMUNMAPPED" ]; then - VIASH_PAR_OUTSAMUNMAPPED=$(ViashRemoveFlags "$1") - else - VIASH_PAR_OUTSAMUNMAPPED="$VIASH_PAR_OUTSAMUNMAPPED;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --outSAMorder) - [ -n "$VIASH_PAR_OUTSAMORDER" ] && ViashError Bad arguments for option \'--outSAMorder\': \'$VIASH_PAR_OUTSAMORDER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMORDER="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMorder. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMorder=*) - [ -n "$VIASH_PAR_OUTSAMORDER" ] && ViashError Bad arguments for option \'--outSAMorder=*\': \'$VIASH_PAR_OUTSAMORDER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMORDER=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outSAMprimaryFlag) - [ -n "$VIASH_PAR_OUTSAMPRIMARYFLAG" ] && ViashError Bad arguments for option \'--outSAMprimaryFlag\': \'$VIASH_PAR_OUTSAMPRIMARYFLAG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMPRIMARYFLAG="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMprimaryFlag. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMprimaryFlag=*) - [ -n "$VIASH_PAR_OUTSAMPRIMARYFLAG" ] && ViashError Bad arguments for option \'--outSAMprimaryFlag=*\': \'$VIASH_PAR_OUTSAMPRIMARYFLAG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMPRIMARYFLAG=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outSAMreadID) - [ -n "$VIASH_PAR_OUTSAMREADID" ] && ViashError Bad arguments for option \'--outSAMreadID\': \'$VIASH_PAR_OUTSAMREADID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMREADID="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMreadID. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMreadID=*) - [ -n "$VIASH_PAR_OUTSAMREADID" ] && ViashError Bad arguments for option \'--outSAMreadID=*\': \'$VIASH_PAR_OUTSAMREADID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMREADID=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outSAMmapqUnique) - [ -n "$VIASH_PAR_OUTSAMMAPQUNIQUE" ] && ViashError Bad arguments for option \'--outSAMmapqUnique\': \'$VIASH_PAR_OUTSAMMAPQUNIQUE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMMAPQUNIQUE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMmapqUnique. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMmapqUnique=*) - [ -n "$VIASH_PAR_OUTSAMMAPQUNIQUE" ] && ViashError Bad arguments for option \'--outSAMmapqUnique=*\': \'$VIASH_PAR_OUTSAMMAPQUNIQUE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMMAPQUNIQUE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outSAMflagOR) - [ -n "$VIASH_PAR_OUTSAMFLAGOR" ] && ViashError Bad arguments for option \'--outSAMflagOR\': \'$VIASH_PAR_OUTSAMFLAGOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMFLAGOR="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMflagOR. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMflagOR=*) - [ -n "$VIASH_PAR_OUTSAMFLAGOR" ] && ViashError Bad arguments for option \'--outSAMflagOR=*\': \'$VIASH_PAR_OUTSAMFLAGOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMFLAGOR=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outSAMflagAND) - [ -n "$VIASH_PAR_OUTSAMFLAGAND" ] && ViashError Bad arguments for option \'--outSAMflagAND\': \'$VIASH_PAR_OUTSAMFLAGAND\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMFLAGAND="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMflagAND. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMflagAND=*) - [ -n "$VIASH_PAR_OUTSAMFLAGAND" ] && ViashError Bad arguments for option \'--outSAMflagAND=*\': \'$VIASH_PAR_OUTSAMFLAGAND\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMFLAGAND=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outSAMattrRGline) - if [ -z "$VIASH_PAR_OUTSAMATTRRGLINE" ]; then - VIASH_PAR_OUTSAMATTRRGLINE="$2" - else - VIASH_PAR_OUTSAMATTRRGLINE="$VIASH_PAR_OUTSAMATTRRGLINE;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMattrRGline. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMattrRGline=*) - if [ -z "$VIASH_PAR_OUTSAMATTRRGLINE" ]; then - VIASH_PAR_OUTSAMATTRRGLINE=$(ViashRemoveFlags "$1") - else - VIASH_PAR_OUTSAMATTRRGLINE="$VIASH_PAR_OUTSAMATTRRGLINE;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --outSAMheaderHD) - if [ -z "$VIASH_PAR_OUTSAMHEADERHD" ]; then - VIASH_PAR_OUTSAMHEADERHD="$2" - else - VIASH_PAR_OUTSAMHEADERHD="$VIASH_PAR_OUTSAMHEADERHD;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMheaderHD. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMheaderHD=*) - if [ -z "$VIASH_PAR_OUTSAMHEADERHD" ]; then - VIASH_PAR_OUTSAMHEADERHD=$(ViashRemoveFlags "$1") - else - VIASH_PAR_OUTSAMHEADERHD="$VIASH_PAR_OUTSAMHEADERHD;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --outSAMheaderPG) - if [ -z "$VIASH_PAR_OUTSAMHEADERPG" ]; then - VIASH_PAR_OUTSAMHEADERPG="$2" - else - VIASH_PAR_OUTSAMHEADERPG="$VIASH_PAR_OUTSAMHEADERPG;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMheaderPG. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMheaderPG=*) - if [ -z "$VIASH_PAR_OUTSAMHEADERPG" ]; then - VIASH_PAR_OUTSAMHEADERPG=$(ViashRemoveFlags "$1") - else - VIASH_PAR_OUTSAMHEADERPG="$VIASH_PAR_OUTSAMHEADERPG;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --outSAMheaderCommentFile) - [ -n "$VIASH_PAR_OUTSAMHEADERCOMMENTFILE" ] && ViashError Bad arguments for option \'--outSAMheaderCommentFile\': \'$VIASH_PAR_OUTSAMHEADERCOMMENTFILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMHEADERCOMMENTFILE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMheaderCommentFile. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMheaderCommentFile=*) - [ -n "$VIASH_PAR_OUTSAMHEADERCOMMENTFILE" ] && ViashError Bad arguments for option \'--outSAMheaderCommentFile=*\': \'$VIASH_PAR_OUTSAMHEADERCOMMENTFILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMHEADERCOMMENTFILE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outSAMfilter) - if [ -z "$VIASH_PAR_OUTSAMFILTER" ]; then - VIASH_PAR_OUTSAMFILTER="$2" - else - VIASH_PAR_OUTSAMFILTER="$VIASH_PAR_OUTSAMFILTER;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMfilter. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMfilter=*) - if [ -z "$VIASH_PAR_OUTSAMFILTER" ]; then - VIASH_PAR_OUTSAMFILTER=$(ViashRemoveFlags "$1") - else - VIASH_PAR_OUTSAMFILTER="$VIASH_PAR_OUTSAMFILTER;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --outSAMmultNmax) - [ -n "$VIASH_PAR_OUTSAMMULTNMAX" ] && ViashError Bad arguments for option \'--outSAMmultNmax\': \'$VIASH_PAR_OUTSAMMULTNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMMULTNMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMmultNmax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMmultNmax=*) - [ -n "$VIASH_PAR_OUTSAMMULTNMAX" ] && ViashError Bad arguments for option \'--outSAMmultNmax=*\': \'$VIASH_PAR_OUTSAMMULTNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMMULTNMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outSAMtlen) - [ -n "$VIASH_PAR_OUTSAMTLEN" ] && ViashError Bad arguments for option \'--outSAMtlen\': \'$VIASH_PAR_OUTSAMTLEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMTLEN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMtlen. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMtlen=*) - [ -n "$VIASH_PAR_OUTSAMTLEN" ] && ViashError Bad arguments for option \'--outSAMtlen=*\': \'$VIASH_PAR_OUTSAMTLEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMTLEN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outBAMcompression) - [ -n "$VIASH_PAR_OUTBAMCOMPRESSION" ] && ViashError Bad arguments for option \'--outBAMcompression\': \'$VIASH_PAR_OUTBAMCOMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTBAMCOMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outBAMcompression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outBAMcompression=*) - [ -n "$VIASH_PAR_OUTBAMCOMPRESSION" ] && ViashError Bad arguments for option \'--outBAMcompression=*\': \'$VIASH_PAR_OUTBAMCOMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTBAMCOMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outBAMsortingThreadN) - [ -n "$VIASH_PAR_OUTBAMSORTINGTHREADN" ] && ViashError Bad arguments for option \'--outBAMsortingThreadN\': \'$VIASH_PAR_OUTBAMSORTINGTHREADN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTBAMSORTINGTHREADN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outBAMsortingThreadN. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outBAMsortingThreadN=*) - [ -n "$VIASH_PAR_OUTBAMSORTINGTHREADN" ] && ViashError Bad arguments for option \'--outBAMsortingThreadN=*\': \'$VIASH_PAR_OUTBAMSORTINGTHREADN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTBAMSORTINGTHREADN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outBAMsortingBinsN) - [ -n "$VIASH_PAR_OUTBAMSORTINGBINSN" ] && ViashError Bad arguments for option \'--outBAMsortingBinsN\': \'$VIASH_PAR_OUTBAMSORTINGBINSN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTBAMSORTINGBINSN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outBAMsortingBinsN. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outBAMsortingBinsN=*) - [ -n "$VIASH_PAR_OUTBAMSORTINGBINSN" ] && ViashError Bad arguments for option \'--outBAMsortingBinsN=*\': \'$VIASH_PAR_OUTBAMSORTINGBINSN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTBAMSORTINGBINSN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --bamRemoveDuplicatesType) - [ -n "$VIASH_PAR_BAMREMOVEDUPLICATESTYPE" ] && ViashError Bad arguments for option \'--bamRemoveDuplicatesType\': \'$VIASH_PAR_BAMREMOVEDUPLICATESTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BAMREMOVEDUPLICATESTYPE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --bamRemoveDuplicatesType. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --bamRemoveDuplicatesType=*) - [ -n "$VIASH_PAR_BAMREMOVEDUPLICATESTYPE" ] && ViashError Bad arguments for option \'--bamRemoveDuplicatesType=*\': \'$VIASH_PAR_BAMREMOVEDUPLICATESTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BAMREMOVEDUPLICATESTYPE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --bamRemoveDuplicatesMate2basesN) - [ -n "$VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN" ] && ViashError Bad arguments for option \'--bamRemoveDuplicatesMate2basesN\': \'$VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --bamRemoveDuplicatesMate2basesN. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --bamRemoveDuplicatesMate2basesN=*) - [ -n "$VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN" ] && ViashError Bad arguments for option \'--bamRemoveDuplicatesMate2basesN=*\': \'$VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outWigType) - if [ -z "$VIASH_PAR_OUTWIGTYPE" ]; then - VIASH_PAR_OUTWIGTYPE="$2" - else - VIASH_PAR_OUTWIGTYPE="$VIASH_PAR_OUTWIGTYPE;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outWigType. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outWigType=*) - if [ -z "$VIASH_PAR_OUTWIGTYPE" ]; then - VIASH_PAR_OUTWIGTYPE=$(ViashRemoveFlags "$1") - else - VIASH_PAR_OUTWIGTYPE="$VIASH_PAR_OUTWIGTYPE;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --outWigStrand) - [ -n "$VIASH_PAR_OUTWIGSTRAND" ] && ViashError Bad arguments for option \'--outWigStrand\': \'$VIASH_PAR_OUTWIGSTRAND\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTWIGSTRAND="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outWigStrand. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outWigStrand=*) - [ -n "$VIASH_PAR_OUTWIGSTRAND" ] && ViashError Bad arguments for option \'--outWigStrand=*\': \'$VIASH_PAR_OUTWIGSTRAND\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTWIGSTRAND=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outWigReferencesPrefix) - [ -n "$VIASH_PAR_OUTWIGREFERENCESPREFIX" ] && ViashError Bad arguments for option \'--outWigReferencesPrefix\': \'$VIASH_PAR_OUTWIGREFERENCESPREFIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTWIGREFERENCESPREFIX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outWigReferencesPrefix. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outWigReferencesPrefix=*) - [ -n "$VIASH_PAR_OUTWIGREFERENCESPREFIX" ] && ViashError Bad arguments for option \'--outWigReferencesPrefix=*\': \'$VIASH_PAR_OUTWIGREFERENCESPREFIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTWIGREFERENCESPREFIX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outWigNorm) - [ -n "$VIASH_PAR_OUTWIGNORM" ] && ViashError Bad arguments for option \'--outWigNorm\': \'$VIASH_PAR_OUTWIGNORM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTWIGNORM="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outWigNorm. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outWigNorm=*) - [ -n "$VIASH_PAR_OUTWIGNORM" ] && ViashError Bad arguments for option \'--outWigNorm=*\': \'$VIASH_PAR_OUTWIGNORM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTWIGNORM=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outFilterType) - [ -n "$VIASH_PAR_OUTFILTERTYPE" ] && ViashError Bad arguments for option \'--outFilterType\': \'$VIASH_PAR_OUTFILTERTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERTYPE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterType. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outFilterType=*) - [ -n "$VIASH_PAR_OUTFILTERTYPE" ] && ViashError Bad arguments for option \'--outFilterType=*\': \'$VIASH_PAR_OUTFILTERTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERTYPE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outFilterMultimapScoreRange) - [ -n "$VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE" ] && ViashError Bad arguments for option \'--outFilterMultimapScoreRange\': \'$VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterMultimapScoreRange. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outFilterMultimapScoreRange=*) - [ -n "$VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE" ] && ViashError Bad arguments for option \'--outFilterMultimapScoreRange=*\': \'$VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outFilterMultimapNmax) - [ -n "$VIASH_PAR_OUTFILTERMULTIMAPNMAX" ] && ViashError Bad arguments for option \'--outFilterMultimapNmax\': \'$VIASH_PAR_OUTFILTERMULTIMAPNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERMULTIMAPNMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterMultimapNmax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outFilterMultimapNmax=*) - [ -n "$VIASH_PAR_OUTFILTERMULTIMAPNMAX" ] && ViashError Bad arguments for option \'--outFilterMultimapNmax=*\': \'$VIASH_PAR_OUTFILTERMULTIMAPNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERMULTIMAPNMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outFilterMismatchNmax) - [ -n "$VIASH_PAR_OUTFILTERMISMATCHNMAX" ] && ViashError Bad arguments for option \'--outFilterMismatchNmax\': \'$VIASH_PAR_OUTFILTERMISMATCHNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERMISMATCHNMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterMismatchNmax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outFilterMismatchNmax=*) - [ -n "$VIASH_PAR_OUTFILTERMISMATCHNMAX" ] && ViashError Bad arguments for option \'--outFilterMismatchNmax=*\': \'$VIASH_PAR_OUTFILTERMISMATCHNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERMISMATCHNMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outFilterMismatchNoverLmax) - [ -n "$VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX" ] && ViashError Bad arguments for option \'--outFilterMismatchNoverLmax\': \'$VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterMismatchNoverLmax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outFilterMismatchNoverLmax=*) - [ -n "$VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX" ] && ViashError Bad arguments for option \'--outFilterMismatchNoverLmax=*\': \'$VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outFilterMismatchNoverReadLmax) - [ -n "$VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX" ] && ViashError Bad arguments for option \'--outFilterMismatchNoverReadLmax\': \'$VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterMismatchNoverReadLmax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outFilterMismatchNoverReadLmax=*) - [ -n "$VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX" ] && ViashError Bad arguments for option \'--outFilterMismatchNoverReadLmax=*\': \'$VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outFilterScoreMin) - [ -n "$VIASH_PAR_OUTFILTERSCOREMIN" ] && ViashError Bad arguments for option \'--outFilterScoreMin\': \'$VIASH_PAR_OUTFILTERSCOREMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERSCOREMIN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterScoreMin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outFilterScoreMin=*) - [ -n "$VIASH_PAR_OUTFILTERSCOREMIN" ] && ViashError Bad arguments for option \'--outFilterScoreMin=*\': \'$VIASH_PAR_OUTFILTERSCOREMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERSCOREMIN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outFilterScoreMinOverLread) - [ -n "$VIASH_PAR_OUTFILTERSCOREMINOVERLREAD" ] && ViashError Bad arguments for option \'--outFilterScoreMinOverLread\': \'$VIASH_PAR_OUTFILTERSCOREMINOVERLREAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERSCOREMINOVERLREAD="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterScoreMinOverLread. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outFilterScoreMinOverLread=*) - [ -n "$VIASH_PAR_OUTFILTERSCOREMINOVERLREAD" ] && ViashError Bad arguments for option \'--outFilterScoreMinOverLread=*\': \'$VIASH_PAR_OUTFILTERSCOREMINOVERLREAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERSCOREMINOVERLREAD=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outFilterMatchNmin) - [ -n "$VIASH_PAR_OUTFILTERMATCHNMIN" ] && ViashError Bad arguments for option \'--outFilterMatchNmin\': \'$VIASH_PAR_OUTFILTERMATCHNMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERMATCHNMIN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterMatchNmin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outFilterMatchNmin=*) - [ -n "$VIASH_PAR_OUTFILTERMATCHNMIN" ] && ViashError Bad arguments for option \'--outFilterMatchNmin=*\': \'$VIASH_PAR_OUTFILTERMATCHNMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERMATCHNMIN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outFilterMatchNminOverLread) - [ -n "$VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD" ] && ViashError Bad arguments for option \'--outFilterMatchNminOverLread\': \'$VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterMatchNminOverLread. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outFilterMatchNminOverLread=*) - [ -n "$VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD" ] && ViashError Bad arguments for option \'--outFilterMatchNminOverLread=*\': \'$VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outFilterIntronMotifs) - [ -n "$VIASH_PAR_OUTFILTERINTRONMOTIFS" ] && ViashError Bad arguments for option \'--outFilterIntronMotifs\': \'$VIASH_PAR_OUTFILTERINTRONMOTIFS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERINTRONMOTIFS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterIntronMotifs. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outFilterIntronMotifs=*) - [ -n "$VIASH_PAR_OUTFILTERINTRONMOTIFS" ] && ViashError Bad arguments for option \'--outFilterIntronMotifs=*\': \'$VIASH_PAR_OUTFILTERINTRONMOTIFS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERINTRONMOTIFS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outFilterIntronStrands) - [ -n "$VIASH_PAR_OUTFILTERINTRONSTRANDS" ] && ViashError Bad arguments for option \'--outFilterIntronStrands\': \'$VIASH_PAR_OUTFILTERINTRONSTRANDS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERINTRONSTRANDS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterIntronStrands. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outFilterIntronStrands=*) - [ -n "$VIASH_PAR_OUTFILTERINTRONSTRANDS" ] && ViashError Bad arguments for option \'--outFilterIntronStrands=*\': \'$VIASH_PAR_OUTFILTERINTRONSTRANDS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERINTRONSTRANDS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outSJtype) - [ -n "$VIASH_PAR_OUTSJTYPE" ] && ViashError Bad arguments for option \'--outSJtype\': \'$VIASH_PAR_OUTSJTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSJTYPE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSJtype. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSJtype=*) - [ -n "$VIASH_PAR_OUTSJTYPE" ] && ViashError Bad arguments for option \'--outSJtype=*\': \'$VIASH_PAR_OUTSJTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSJTYPE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outSJfilterReads) - [ -n "$VIASH_PAR_OUTSJFILTERREADS" ] && ViashError Bad arguments for option \'--outSJfilterReads\': \'$VIASH_PAR_OUTSJFILTERREADS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSJFILTERREADS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSJfilterReads. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSJfilterReads=*) - [ -n "$VIASH_PAR_OUTSJFILTERREADS" ] && ViashError Bad arguments for option \'--outSJfilterReads=*\': \'$VIASH_PAR_OUTSJFILTERREADS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSJFILTERREADS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outSJfilterOverhangMin) - if [ -z "$VIASH_PAR_OUTSJFILTEROVERHANGMIN" ]; then - VIASH_PAR_OUTSJFILTEROVERHANGMIN="$2" - else - VIASH_PAR_OUTSJFILTEROVERHANGMIN="$VIASH_PAR_OUTSJFILTEROVERHANGMIN;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSJfilterOverhangMin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSJfilterOverhangMin=*) - if [ -z "$VIASH_PAR_OUTSJFILTEROVERHANGMIN" ]; then - VIASH_PAR_OUTSJFILTEROVERHANGMIN=$(ViashRemoveFlags "$1") - else - VIASH_PAR_OUTSJFILTEROVERHANGMIN="$VIASH_PAR_OUTSJFILTEROVERHANGMIN;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --outSJfilterCountUniqueMin) - if [ -z "$VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN" ]; then - VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN="$2" - else - VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN="$VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSJfilterCountUniqueMin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSJfilterCountUniqueMin=*) - if [ -z "$VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN" ]; then - VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN=$(ViashRemoveFlags "$1") - else - VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN="$VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --outSJfilterCountTotalMin) - if [ -z "$VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN" ]; then - VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN="$2" - else - VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN="$VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSJfilterCountTotalMin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSJfilterCountTotalMin=*) - if [ -z "$VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN" ]; then - VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN=$(ViashRemoveFlags "$1") - else - VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN="$VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --outSJfilterDistToOtherSJmin) - if [ -z "$VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN" ]; then - VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN="$2" - else - VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN="$VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSJfilterDistToOtherSJmin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSJfilterDistToOtherSJmin=*) - if [ -z "$VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN" ]; then - VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN=$(ViashRemoveFlags "$1") - else - VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN="$VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --outSJfilterIntronMaxVsReadN) - if [ -z "$VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN" ]; then - VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN="$2" - else - VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN="$VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSJfilterIntronMaxVsReadN. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSJfilterIntronMaxVsReadN=*) - if [ -z "$VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN" ]; then - VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN=$(ViashRemoveFlags "$1") - else - VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN="$VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --scoreGap) - [ -n "$VIASH_PAR_SCOREGAP" ] && ViashError Bad arguments for option \'--scoreGap\': \'$VIASH_PAR_SCOREGAP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCOREGAP="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreGap. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --scoreGap=*) - [ -n "$VIASH_PAR_SCOREGAP" ] && ViashError Bad arguments for option \'--scoreGap=*\': \'$VIASH_PAR_SCOREGAP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCOREGAP=$(ViashRemoveFlags "$1") - shift 1 - ;; - --scoreGapNoncan) - [ -n "$VIASH_PAR_SCOREGAPNONCAN" ] && ViashError Bad arguments for option \'--scoreGapNoncan\': \'$VIASH_PAR_SCOREGAPNONCAN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCOREGAPNONCAN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreGapNoncan. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --scoreGapNoncan=*) - [ -n "$VIASH_PAR_SCOREGAPNONCAN" ] && ViashError Bad arguments for option \'--scoreGapNoncan=*\': \'$VIASH_PAR_SCOREGAPNONCAN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCOREGAPNONCAN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --scoreGapGCAG) - [ -n "$VIASH_PAR_SCOREGAPGCAG" ] && ViashError Bad arguments for option \'--scoreGapGCAG\': \'$VIASH_PAR_SCOREGAPGCAG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCOREGAPGCAG="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreGapGCAG. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --scoreGapGCAG=*) - [ -n "$VIASH_PAR_SCOREGAPGCAG" ] && ViashError Bad arguments for option \'--scoreGapGCAG=*\': \'$VIASH_PAR_SCOREGAPGCAG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCOREGAPGCAG=$(ViashRemoveFlags "$1") - shift 1 - ;; - --scoreGapATAC) - [ -n "$VIASH_PAR_SCOREGAPATAC" ] && ViashError Bad arguments for option \'--scoreGapATAC\': \'$VIASH_PAR_SCOREGAPATAC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCOREGAPATAC="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreGapATAC. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --scoreGapATAC=*) - [ -n "$VIASH_PAR_SCOREGAPATAC" ] && ViashError Bad arguments for option \'--scoreGapATAC=*\': \'$VIASH_PAR_SCOREGAPATAC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCOREGAPATAC=$(ViashRemoveFlags "$1") - shift 1 - ;; - --scoreGenomicLengthLog2scale) - [ -n "$VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE" ] && ViashError Bad arguments for option \'--scoreGenomicLengthLog2scale\': \'$VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreGenomicLengthLog2scale. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --scoreGenomicLengthLog2scale=*) - [ -n "$VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE" ] && ViashError Bad arguments for option \'--scoreGenomicLengthLog2scale=*\': \'$VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --scoreDelOpen) - [ -n "$VIASH_PAR_SCOREDELOPEN" ] && ViashError Bad arguments for option \'--scoreDelOpen\': \'$VIASH_PAR_SCOREDELOPEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCOREDELOPEN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreDelOpen. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --scoreDelOpen=*) - [ -n "$VIASH_PAR_SCOREDELOPEN" ] && ViashError Bad arguments for option \'--scoreDelOpen=*\': \'$VIASH_PAR_SCOREDELOPEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCOREDELOPEN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --scoreDelBase) - [ -n "$VIASH_PAR_SCOREDELBASE" ] && ViashError Bad arguments for option \'--scoreDelBase\': \'$VIASH_PAR_SCOREDELBASE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCOREDELBASE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreDelBase. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --scoreDelBase=*) - [ -n "$VIASH_PAR_SCOREDELBASE" ] && ViashError Bad arguments for option \'--scoreDelBase=*\': \'$VIASH_PAR_SCOREDELBASE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCOREDELBASE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --scoreInsOpen) - [ -n "$VIASH_PAR_SCOREINSOPEN" ] && ViashError Bad arguments for option \'--scoreInsOpen\': \'$VIASH_PAR_SCOREINSOPEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCOREINSOPEN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreInsOpen. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --scoreInsOpen=*) - [ -n "$VIASH_PAR_SCOREINSOPEN" ] && ViashError Bad arguments for option \'--scoreInsOpen=*\': \'$VIASH_PAR_SCOREINSOPEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCOREINSOPEN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --scoreInsBase) - [ -n "$VIASH_PAR_SCOREINSBASE" ] && ViashError Bad arguments for option \'--scoreInsBase\': \'$VIASH_PAR_SCOREINSBASE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCOREINSBASE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreInsBase. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --scoreInsBase=*) - [ -n "$VIASH_PAR_SCOREINSBASE" ] && ViashError Bad arguments for option \'--scoreInsBase=*\': \'$VIASH_PAR_SCOREINSBASE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCOREINSBASE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --scoreStitchSJshift) - [ -n "$VIASH_PAR_SCORESTITCHSJSHIFT" ] && ViashError Bad arguments for option \'--scoreStitchSJshift\': \'$VIASH_PAR_SCORESTITCHSJSHIFT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCORESTITCHSJSHIFT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreStitchSJshift. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --scoreStitchSJshift=*) - [ -n "$VIASH_PAR_SCORESTITCHSJSHIFT" ] && ViashError Bad arguments for option \'--scoreStitchSJshift=*\': \'$VIASH_PAR_SCORESTITCHSJSHIFT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCORESTITCHSJSHIFT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --seedSearchStartLmax) - [ -n "$VIASH_PAR_SEEDSEARCHSTARTLMAX" ] && ViashError Bad arguments for option \'--seedSearchStartLmax\': \'$VIASH_PAR_SEEDSEARCHSTARTLMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEEDSEARCHSTARTLMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --seedSearchStartLmax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --seedSearchStartLmax=*) - [ -n "$VIASH_PAR_SEEDSEARCHSTARTLMAX" ] && ViashError Bad arguments for option \'--seedSearchStartLmax=*\': \'$VIASH_PAR_SEEDSEARCHSTARTLMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEEDSEARCHSTARTLMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --seedSearchStartLmaxOverLread) - [ -n "$VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD" ] && ViashError Bad arguments for option \'--seedSearchStartLmaxOverLread\': \'$VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --seedSearchStartLmaxOverLread. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --seedSearchStartLmaxOverLread=*) - [ -n "$VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD" ] && ViashError Bad arguments for option \'--seedSearchStartLmaxOverLread=*\': \'$VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD=$(ViashRemoveFlags "$1") - shift 1 - ;; - --seedSearchLmax) - [ -n "$VIASH_PAR_SEEDSEARCHLMAX" ] && ViashError Bad arguments for option \'--seedSearchLmax\': \'$VIASH_PAR_SEEDSEARCHLMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEEDSEARCHLMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --seedSearchLmax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --seedSearchLmax=*) - [ -n "$VIASH_PAR_SEEDSEARCHLMAX" ] && ViashError Bad arguments for option \'--seedSearchLmax=*\': \'$VIASH_PAR_SEEDSEARCHLMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEEDSEARCHLMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --seedMultimapNmax) - [ -n "$VIASH_PAR_SEEDMULTIMAPNMAX" ] && ViashError Bad arguments for option \'--seedMultimapNmax\': \'$VIASH_PAR_SEEDMULTIMAPNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEEDMULTIMAPNMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --seedMultimapNmax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --seedMultimapNmax=*) - [ -n "$VIASH_PAR_SEEDMULTIMAPNMAX" ] && ViashError Bad arguments for option \'--seedMultimapNmax=*\': \'$VIASH_PAR_SEEDMULTIMAPNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEEDMULTIMAPNMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --seedPerReadNmax) - [ -n "$VIASH_PAR_SEEDPERREADNMAX" ] && ViashError Bad arguments for option \'--seedPerReadNmax\': \'$VIASH_PAR_SEEDPERREADNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEEDPERREADNMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --seedPerReadNmax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --seedPerReadNmax=*) - [ -n "$VIASH_PAR_SEEDPERREADNMAX" ] && ViashError Bad arguments for option \'--seedPerReadNmax=*\': \'$VIASH_PAR_SEEDPERREADNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEEDPERREADNMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --seedPerWindowNmax) - [ -n "$VIASH_PAR_SEEDPERWINDOWNMAX" ] && ViashError Bad arguments for option \'--seedPerWindowNmax\': \'$VIASH_PAR_SEEDPERWINDOWNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEEDPERWINDOWNMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --seedPerWindowNmax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --seedPerWindowNmax=*) - [ -n "$VIASH_PAR_SEEDPERWINDOWNMAX" ] && ViashError Bad arguments for option \'--seedPerWindowNmax=*\': \'$VIASH_PAR_SEEDPERWINDOWNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEEDPERWINDOWNMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --seedNoneLociPerWindow) - [ -n "$VIASH_PAR_SEEDNONELOCIPERWINDOW" ] && ViashError Bad arguments for option \'--seedNoneLociPerWindow\': \'$VIASH_PAR_SEEDNONELOCIPERWINDOW\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEEDNONELOCIPERWINDOW="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --seedNoneLociPerWindow. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --seedNoneLociPerWindow=*) - [ -n "$VIASH_PAR_SEEDNONELOCIPERWINDOW" ] && ViashError Bad arguments for option \'--seedNoneLociPerWindow=*\': \'$VIASH_PAR_SEEDNONELOCIPERWINDOW\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEEDNONELOCIPERWINDOW=$(ViashRemoveFlags "$1") - shift 1 - ;; - --seedSplitMin) - [ -n "$VIASH_PAR_SEEDSPLITMIN" ] && ViashError Bad arguments for option \'--seedSplitMin\': \'$VIASH_PAR_SEEDSPLITMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEEDSPLITMIN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --seedSplitMin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --seedSplitMin=*) - [ -n "$VIASH_PAR_SEEDSPLITMIN" ] && ViashError Bad arguments for option \'--seedSplitMin=*\': \'$VIASH_PAR_SEEDSPLITMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEEDSPLITMIN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --seedMapMin) - [ -n "$VIASH_PAR_SEEDMAPMIN" ] && ViashError Bad arguments for option \'--seedMapMin\': \'$VIASH_PAR_SEEDMAPMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEEDMAPMIN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --seedMapMin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --seedMapMin=*) - [ -n "$VIASH_PAR_SEEDMAPMIN" ] && ViashError Bad arguments for option \'--seedMapMin=*\': \'$VIASH_PAR_SEEDMAPMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEEDMAPMIN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --alignIntronMin) - [ -n "$VIASH_PAR_ALIGNINTRONMIN" ] && ViashError Bad arguments for option \'--alignIntronMin\': \'$VIASH_PAR_ALIGNINTRONMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNINTRONMIN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignIntronMin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --alignIntronMin=*) - [ -n "$VIASH_PAR_ALIGNINTRONMIN" ] && ViashError Bad arguments for option \'--alignIntronMin=*\': \'$VIASH_PAR_ALIGNINTRONMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNINTRONMIN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --alignIntronMax) - [ -n "$VIASH_PAR_ALIGNINTRONMAX" ] && ViashError Bad arguments for option \'--alignIntronMax\': \'$VIASH_PAR_ALIGNINTRONMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNINTRONMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignIntronMax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --alignIntronMax=*) - [ -n "$VIASH_PAR_ALIGNINTRONMAX" ] && ViashError Bad arguments for option \'--alignIntronMax=*\': \'$VIASH_PAR_ALIGNINTRONMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNINTRONMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --alignMatesGapMax) - [ -n "$VIASH_PAR_ALIGNMATESGAPMAX" ] && ViashError Bad arguments for option \'--alignMatesGapMax\': \'$VIASH_PAR_ALIGNMATESGAPMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNMATESGAPMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignMatesGapMax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --alignMatesGapMax=*) - [ -n "$VIASH_PAR_ALIGNMATESGAPMAX" ] && ViashError Bad arguments for option \'--alignMatesGapMax=*\': \'$VIASH_PAR_ALIGNMATESGAPMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNMATESGAPMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --alignSJoverhangMin) - [ -n "$VIASH_PAR_ALIGNSJOVERHANGMIN" ] && ViashError Bad arguments for option \'--alignSJoverhangMin\': \'$VIASH_PAR_ALIGNSJOVERHANGMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNSJOVERHANGMIN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignSJoverhangMin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --alignSJoverhangMin=*) - [ -n "$VIASH_PAR_ALIGNSJOVERHANGMIN" ] && ViashError Bad arguments for option \'--alignSJoverhangMin=*\': \'$VIASH_PAR_ALIGNSJOVERHANGMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNSJOVERHANGMIN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --alignSJstitchMismatchNmax) - if [ -z "$VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX" ]; then - VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX="$2" - else - VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX="$VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignSJstitchMismatchNmax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --alignSJstitchMismatchNmax=*) - if [ -z "$VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX" ]; then - VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX=$(ViashRemoveFlags "$1") - else - VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX="$VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --alignSJDBoverhangMin) - [ -n "$VIASH_PAR_ALIGNSJDBOVERHANGMIN" ] && ViashError Bad arguments for option \'--alignSJDBoverhangMin\': \'$VIASH_PAR_ALIGNSJDBOVERHANGMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNSJDBOVERHANGMIN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignSJDBoverhangMin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --alignSJDBoverhangMin=*) - [ -n "$VIASH_PAR_ALIGNSJDBOVERHANGMIN" ] && ViashError Bad arguments for option \'--alignSJDBoverhangMin=*\': \'$VIASH_PAR_ALIGNSJDBOVERHANGMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNSJDBOVERHANGMIN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --alignSplicedMateMapLmin) - [ -n "$VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN" ] && ViashError Bad arguments for option \'--alignSplicedMateMapLmin\': \'$VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignSplicedMateMapLmin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --alignSplicedMateMapLmin=*) - [ -n "$VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN" ] && ViashError Bad arguments for option \'--alignSplicedMateMapLmin=*\': \'$VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --alignSplicedMateMapLminOverLmate) - [ -n "$VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE" ] && ViashError Bad arguments for option \'--alignSplicedMateMapLminOverLmate\': \'$VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignSplicedMateMapLminOverLmate. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --alignSplicedMateMapLminOverLmate=*) - [ -n "$VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE" ] && ViashError Bad arguments for option \'--alignSplicedMateMapLminOverLmate=*\': \'$VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --alignWindowsPerReadNmax) - [ -n "$VIASH_PAR_ALIGNWINDOWSPERREADNMAX" ] && ViashError Bad arguments for option \'--alignWindowsPerReadNmax\': \'$VIASH_PAR_ALIGNWINDOWSPERREADNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNWINDOWSPERREADNMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignWindowsPerReadNmax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --alignWindowsPerReadNmax=*) - [ -n "$VIASH_PAR_ALIGNWINDOWSPERREADNMAX" ] && ViashError Bad arguments for option \'--alignWindowsPerReadNmax=*\': \'$VIASH_PAR_ALIGNWINDOWSPERREADNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNWINDOWSPERREADNMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --alignTranscriptsPerWindowNmax) - [ -n "$VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX" ] && ViashError Bad arguments for option \'--alignTranscriptsPerWindowNmax\': \'$VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignTranscriptsPerWindowNmax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --alignTranscriptsPerWindowNmax=*) - [ -n "$VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX" ] && ViashError Bad arguments for option \'--alignTranscriptsPerWindowNmax=*\': \'$VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --alignTranscriptsPerReadNmax) - [ -n "$VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX" ] && ViashError Bad arguments for option \'--alignTranscriptsPerReadNmax\': \'$VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignTranscriptsPerReadNmax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --alignTranscriptsPerReadNmax=*) - [ -n "$VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX" ] && ViashError Bad arguments for option \'--alignTranscriptsPerReadNmax=*\': \'$VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --alignEndsType) - [ -n "$VIASH_PAR_ALIGNENDSTYPE" ] && ViashError Bad arguments for option \'--alignEndsType\': \'$VIASH_PAR_ALIGNENDSTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNENDSTYPE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignEndsType. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --alignEndsType=*) - [ -n "$VIASH_PAR_ALIGNENDSTYPE" ] && ViashError Bad arguments for option \'--alignEndsType=*\': \'$VIASH_PAR_ALIGNENDSTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNENDSTYPE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --alignEndsProtrude) - [ -n "$VIASH_PAR_ALIGNENDSPROTRUDE" ] && ViashError Bad arguments for option \'--alignEndsProtrude\': \'$VIASH_PAR_ALIGNENDSPROTRUDE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNENDSPROTRUDE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignEndsProtrude. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --alignEndsProtrude=*) - [ -n "$VIASH_PAR_ALIGNENDSPROTRUDE" ] && ViashError Bad arguments for option \'--alignEndsProtrude=*\': \'$VIASH_PAR_ALIGNENDSPROTRUDE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNENDSPROTRUDE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --alignSoftClipAtReferenceEnds) - [ -n "$VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS" ] && ViashError Bad arguments for option \'--alignSoftClipAtReferenceEnds\': \'$VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignSoftClipAtReferenceEnds. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --alignSoftClipAtReferenceEnds=*) - [ -n "$VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS" ] && ViashError Bad arguments for option \'--alignSoftClipAtReferenceEnds=*\': \'$VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --alignInsertionFlush) - [ -n "$VIASH_PAR_ALIGNINSERTIONFLUSH" ] && ViashError Bad arguments for option \'--alignInsertionFlush\': \'$VIASH_PAR_ALIGNINSERTIONFLUSH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNINSERTIONFLUSH="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignInsertionFlush. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --alignInsertionFlush=*) - [ -n "$VIASH_PAR_ALIGNINSERTIONFLUSH" ] && ViashError Bad arguments for option \'--alignInsertionFlush=*\': \'$VIASH_PAR_ALIGNINSERTIONFLUSH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNINSERTIONFLUSH=$(ViashRemoveFlags "$1") - shift 1 - ;; - --peOverlapNbasesMin) - [ -n "$VIASH_PAR_PEOVERLAPNBASESMIN" ] && ViashError Bad arguments for option \'--peOverlapNbasesMin\': \'$VIASH_PAR_PEOVERLAPNBASESMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_PEOVERLAPNBASESMIN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --peOverlapNbasesMin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --peOverlapNbasesMin=*) - [ -n "$VIASH_PAR_PEOVERLAPNBASESMIN" ] && ViashError Bad arguments for option \'--peOverlapNbasesMin=*\': \'$VIASH_PAR_PEOVERLAPNBASESMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_PEOVERLAPNBASESMIN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --peOverlapMMp) - [ -n "$VIASH_PAR_PEOVERLAPMMP" ] && ViashError Bad arguments for option \'--peOverlapMMp\': \'$VIASH_PAR_PEOVERLAPMMP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_PEOVERLAPMMP="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --peOverlapMMp. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --peOverlapMMp=*) - [ -n "$VIASH_PAR_PEOVERLAPMMP" ] && ViashError Bad arguments for option \'--peOverlapMMp=*\': \'$VIASH_PAR_PEOVERLAPMMP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_PEOVERLAPMMP=$(ViashRemoveFlags "$1") - shift 1 - ;; - --winAnchorMultimapNmax) - [ -n "$VIASH_PAR_WINANCHORMULTIMAPNMAX" ] && ViashError Bad arguments for option \'--winAnchorMultimapNmax\': \'$VIASH_PAR_WINANCHORMULTIMAPNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_WINANCHORMULTIMAPNMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --winAnchorMultimapNmax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --winAnchorMultimapNmax=*) - [ -n "$VIASH_PAR_WINANCHORMULTIMAPNMAX" ] && ViashError Bad arguments for option \'--winAnchorMultimapNmax=*\': \'$VIASH_PAR_WINANCHORMULTIMAPNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_WINANCHORMULTIMAPNMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --winBinNbits) - [ -n "$VIASH_PAR_WINBINNBITS" ] && ViashError Bad arguments for option \'--winBinNbits\': \'$VIASH_PAR_WINBINNBITS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_WINBINNBITS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --winBinNbits. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --winBinNbits=*) - [ -n "$VIASH_PAR_WINBINNBITS" ] && ViashError Bad arguments for option \'--winBinNbits=*\': \'$VIASH_PAR_WINBINNBITS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_WINBINNBITS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --winAnchorDistNbins) - [ -n "$VIASH_PAR_WINANCHORDISTNBINS" ] && ViashError Bad arguments for option \'--winAnchorDistNbins\': \'$VIASH_PAR_WINANCHORDISTNBINS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_WINANCHORDISTNBINS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --winAnchorDistNbins. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --winAnchorDistNbins=*) - [ -n "$VIASH_PAR_WINANCHORDISTNBINS" ] && ViashError Bad arguments for option \'--winAnchorDistNbins=*\': \'$VIASH_PAR_WINANCHORDISTNBINS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_WINANCHORDISTNBINS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --winFlankNbins) - [ -n "$VIASH_PAR_WINFLANKNBINS" ] && ViashError Bad arguments for option \'--winFlankNbins\': \'$VIASH_PAR_WINFLANKNBINS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_WINFLANKNBINS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --winFlankNbins. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --winFlankNbins=*) - [ -n "$VIASH_PAR_WINFLANKNBINS" ] && ViashError Bad arguments for option \'--winFlankNbins=*\': \'$VIASH_PAR_WINFLANKNBINS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_WINFLANKNBINS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --winReadCoverageRelativeMin) - [ -n "$VIASH_PAR_WINREADCOVERAGERELATIVEMIN" ] && ViashError Bad arguments for option \'--winReadCoverageRelativeMin\': \'$VIASH_PAR_WINREADCOVERAGERELATIVEMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_WINREADCOVERAGERELATIVEMIN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --winReadCoverageRelativeMin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --winReadCoverageRelativeMin=*) - [ -n "$VIASH_PAR_WINREADCOVERAGERELATIVEMIN" ] && ViashError Bad arguments for option \'--winReadCoverageRelativeMin=*\': \'$VIASH_PAR_WINREADCOVERAGERELATIVEMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_WINREADCOVERAGERELATIVEMIN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --winReadCoverageBasesMin) - [ -n "$VIASH_PAR_WINREADCOVERAGEBASESMIN" ] && ViashError Bad arguments for option \'--winReadCoverageBasesMin\': \'$VIASH_PAR_WINREADCOVERAGEBASESMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_WINREADCOVERAGEBASESMIN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --winReadCoverageBasesMin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --winReadCoverageBasesMin=*) - [ -n "$VIASH_PAR_WINREADCOVERAGEBASESMIN" ] && ViashError Bad arguments for option \'--winReadCoverageBasesMin=*\': \'$VIASH_PAR_WINREADCOVERAGEBASESMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_WINREADCOVERAGEBASESMIN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --chimOutType) - if [ -z "$VIASH_PAR_CHIMOUTTYPE" ]; then - VIASH_PAR_CHIMOUTTYPE="$2" - else - VIASH_PAR_CHIMOUTTYPE="$VIASH_PAR_CHIMOUTTYPE;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimOutType. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --chimOutType=*) - if [ -z "$VIASH_PAR_CHIMOUTTYPE" ]; then - VIASH_PAR_CHIMOUTTYPE=$(ViashRemoveFlags "$1") - else - VIASH_PAR_CHIMOUTTYPE="$VIASH_PAR_CHIMOUTTYPE;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --chimSegmentMin) - [ -n "$VIASH_PAR_CHIMSEGMENTMIN" ] && ViashError Bad arguments for option \'--chimSegmentMin\': \'$VIASH_PAR_CHIMSEGMENTMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMSEGMENTMIN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimSegmentMin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --chimSegmentMin=*) - [ -n "$VIASH_PAR_CHIMSEGMENTMIN" ] && ViashError Bad arguments for option \'--chimSegmentMin=*\': \'$VIASH_PAR_CHIMSEGMENTMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMSEGMENTMIN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --chimScoreMin) - [ -n "$VIASH_PAR_CHIMSCOREMIN" ] && ViashError Bad arguments for option \'--chimScoreMin\': \'$VIASH_PAR_CHIMSCOREMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMSCOREMIN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimScoreMin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --chimScoreMin=*) - [ -n "$VIASH_PAR_CHIMSCOREMIN" ] && ViashError Bad arguments for option \'--chimScoreMin=*\': \'$VIASH_PAR_CHIMSCOREMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMSCOREMIN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --chimScoreDropMax) - [ -n "$VIASH_PAR_CHIMSCOREDROPMAX" ] && ViashError Bad arguments for option \'--chimScoreDropMax\': \'$VIASH_PAR_CHIMSCOREDROPMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMSCOREDROPMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimScoreDropMax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --chimScoreDropMax=*) - [ -n "$VIASH_PAR_CHIMSCOREDROPMAX" ] && ViashError Bad arguments for option \'--chimScoreDropMax=*\': \'$VIASH_PAR_CHIMSCOREDROPMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMSCOREDROPMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --chimScoreSeparation) - [ -n "$VIASH_PAR_CHIMSCORESEPARATION" ] && ViashError Bad arguments for option \'--chimScoreSeparation\': \'$VIASH_PAR_CHIMSCORESEPARATION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMSCORESEPARATION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimScoreSeparation. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --chimScoreSeparation=*) - [ -n "$VIASH_PAR_CHIMSCORESEPARATION" ] && ViashError Bad arguments for option \'--chimScoreSeparation=*\': \'$VIASH_PAR_CHIMSCORESEPARATION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMSCORESEPARATION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --chimScoreJunctionNonGTAG) - [ -n "$VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG" ] && ViashError Bad arguments for option \'--chimScoreJunctionNonGTAG\': \'$VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimScoreJunctionNonGTAG. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --chimScoreJunctionNonGTAG=*) - [ -n "$VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG" ] && ViashError Bad arguments for option \'--chimScoreJunctionNonGTAG=*\': \'$VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG=$(ViashRemoveFlags "$1") - shift 1 - ;; - --chimJunctionOverhangMin) - [ -n "$VIASH_PAR_CHIMJUNCTIONOVERHANGMIN" ] && ViashError Bad arguments for option \'--chimJunctionOverhangMin\': \'$VIASH_PAR_CHIMJUNCTIONOVERHANGMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMJUNCTIONOVERHANGMIN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimJunctionOverhangMin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --chimJunctionOverhangMin=*) - [ -n "$VIASH_PAR_CHIMJUNCTIONOVERHANGMIN" ] && ViashError Bad arguments for option \'--chimJunctionOverhangMin=*\': \'$VIASH_PAR_CHIMJUNCTIONOVERHANGMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMJUNCTIONOVERHANGMIN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --chimSegmentReadGapMax) - [ -n "$VIASH_PAR_CHIMSEGMENTREADGAPMAX" ] && ViashError Bad arguments for option \'--chimSegmentReadGapMax\': \'$VIASH_PAR_CHIMSEGMENTREADGAPMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMSEGMENTREADGAPMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimSegmentReadGapMax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --chimSegmentReadGapMax=*) - [ -n "$VIASH_PAR_CHIMSEGMENTREADGAPMAX" ] && ViashError Bad arguments for option \'--chimSegmentReadGapMax=*\': \'$VIASH_PAR_CHIMSEGMENTREADGAPMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMSEGMENTREADGAPMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --chimFilter) - if [ -z "$VIASH_PAR_CHIMFILTER" ]; then - VIASH_PAR_CHIMFILTER="$2" - else - VIASH_PAR_CHIMFILTER="$VIASH_PAR_CHIMFILTER;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimFilter. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --chimFilter=*) - if [ -z "$VIASH_PAR_CHIMFILTER" ]; then - VIASH_PAR_CHIMFILTER=$(ViashRemoveFlags "$1") - else - VIASH_PAR_CHIMFILTER="$VIASH_PAR_CHIMFILTER;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --chimMainSegmentMultNmax) - [ -n "$VIASH_PAR_CHIMMAINSEGMENTMULTNMAX" ] && ViashError Bad arguments for option \'--chimMainSegmentMultNmax\': \'$VIASH_PAR_CHIMMAINSEGMENTMULTNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMMAINSEGMENTMULTNMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimMainSegmentMultNmax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --chimMainSegmentMultNmax=*) - [ -n "$VIASH_PAR_CHIMMAINSEGMENTMULTNMAX" ] && ViashError Bad arguments for option \'--chimMainSegmentMultNmax=*\': \'$VIASH_PAR_CHIMMAINSEGMENTMULTNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMMAINSEGMENTMULTNMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --chimMultimapNmax) - [ -n "$VIASH_PAR_CHIMMULTIMAPNMAX" ] && ViashError Bad arguments for option \'--chimMultimapNmax\': \'$VIASH_PAR_CHIMMULTIMAPNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMMULTIMAPNMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimMultimapNmax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --chimMultimapNmax=*) - [ -n "$VIASH_PAR_CHIMMULTIMAPNMAX" ] && ViashError Bad arguments for option \'--chimMultimapNmax=*\': \'$VIASH_PAR_CHIMMULTIMAPNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMMULTIMAPNMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --chimMultimapScoreRange) - [ -n "$VIASH_PAR_CHIMMULTIMAPSCORERANGE" ] && ViashError Bad arguments for option \'--chimMultimapScoreRange\': \'$VIASH_PAR_CHIMMULTIMAPSCORERANGE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMMULTIMAPSCORERANGE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimMultimapScoreRange. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --chimMultimapScoreRange=*) - [ -n "$VIASH_PAR_CHIMMULTIMAPSCORERANGE" ] && ViashError Bad arguments for option \'--chimMultimapScoreRange=*\': \'$VIASH_PAR_CHIMMULTIMAPSCORERANGE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMMULTIMAPSCORERANGE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --chimNonchimScoreDropMin) - [ -n "$VIASH_PAR_CHIMNONCHIMSCOREDROPMIN" ] && ViashError Bad arguments for option \'--chimNonchimScoreDropMin\': \'$VIASH_PAR_CHIMNONCHIMSCOREDROPMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMNONCHIMSCOREDROPMIN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimNonchimScoreDropMin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --chimNonchimScoreDropMin=*) - [ -n "$VIASH_PAR_CHIMNONCHIMSCOREDROPMIN" ] && ViashError Bad arguments for option \'--chimNonchimScoreDropMin=*\': \'$VIASH_PAR_CHIMNONCHIMSCOREDROPMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMNONCHIMSCOREDROPMIN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --chimOutJunctionFormat) - [ -n "$VIASH_PAR_CHIMOUTJUNCTIONFORMAT" ] && ViashError Bad arguments for option \'--chimOutJunctionFormat\': \'$VIASH_PAR_CHIMOUTJUNCTIONFORMAT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMOUTJUNCTIONFORMAT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimOutJunctionFormat. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --chimOutJunctionFormat=*) - [ -n "$VIASH_PAR_CHIMOUTJUNCTIONFORMAT" ] && ViashError Bad arguments for option \'--chimOutJunctionFormat=*\': \'$VIASH_PAR_CHIMOUTJUNCTIONFORMAT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMOUTJUNCTIONFORMAT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --quantMode) - if [ -z "$VIASH_PAR_QUANTMODE" ]; then - VIASH_PAR_QUANTMODE="$2" - else - VIASH_PAR_QUANTMODE="$VIASH_PAR_QUANTMODE;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --quantMode. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --quantMode=*) - if [ -z "$VIASH_PAR_QUANTMODE" ]; then - VIASH_PAR_QUANTMODE=$(ViashRemoveFlags "$1") - else - VIASH_PAR_QUANTMODE="$VIASH_PAR_QUANTMODE;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --quantTranscriptomeBAMcompression) - [ -n "$VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION" ] && ViashError Bad arguments for option \'--quantTranscriptomeBAMcompression\': \'$VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --quantTranscriptomeBAMcompression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --quantTranscriptomeBAMcompression=*) - [ -n "$VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION" ] && ViashError Bad arguments for option \'--quantTranscriptomeBAMcompression=*\': \'$VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --quantTranscriptomeBan) - [ -n "$VIASH_PAR_QUANTTRANSCRIPTOMEBAN" ] && ViashError Bad arguments for option \'--quantTranscriptomeBan\': \'$VIASH_PAR_QUANTTRANSCRIPTOMEBAN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_QUANTTRANSCRIPTOMEBAN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --quantTranscriptomeBan. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --quantTranscriptomeBan=*) - [ -n "$VIASH_PAR_QUANTTRANSCRIPTOMEBAN" ] && ViashError Bad arguments for option \'--quantTranscriptomeBan=*\': \'$VIASH_PAR_QUANTTRANSCRIPTOMEBAN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_QUANTTRANSCRIPTOMEBAN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --twopassMode) - [ -n "$VIASH_PAR_TWOPASSMODE" ] && ViashError Bad arguments for option \'--twopassMode\': \'$VIASH_PAR_TWOPASSMODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TWOPASSMODE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --twopassMode. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --twopassMode=*) - [ -n "$VIASH_PAR_TWOPASSMODE" ] && ViashError Bad arguments for option \'--twopassMode=*\': \'$VIASH_PAR_TWOPASSMODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TWOPASSMODE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --twopass1readsN) - [ -n "$VIASH_PAR_TWOPASS1READSN" ] && ViashError Bad arguments for option \'--twopass1readsN\': \'$VIASH_PAR_TWOPASS1READSN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TWOPASS1READSN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --twopass1readsN. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --twopass1readsN=*) - [ -n "$VIASH_PAR_TWOPASS1READSN" ] && ViashError Bad arguments for option \'--twopass1readsN=*\': \'$VIASH_PAR_TWOPASS1READSN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TWOPASS1READSN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --waspOutputMode) - [ -n "$VIASH_PAR_WASPOUTPUTMODE" ] && ViashError Bad arguments for option \'--waspOutputMode\': \'$VIASH_PAR_WASPOUTPUTMODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_WASPOUTPUTMODE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --waspOutputMode. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --waspOutputMode=*) - [ -n "$VIASH_PAR_WASPOUTPUTMODE" ] && ViashError Bad arguments for option \'--waspOutputMode=*\': \'$VIASH_PAR_WASPOUTPUTMODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_WASPOUTPUTMODE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --soloType) - if [ -z "$VIASH_PAR_SOLOTYPE" ]; then - VIASH_PAR_SOLOTYPE="$2" - else - VIASH_PAR_SOLOTYPE="$VIASH_PAR_SOLOTYPE;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloType. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloType=*) - if [ -z "$VIASH_PAR_SOLOTYPE" ]; then - VIASH_PAR_SOLOTYPE=$(ViashRemoveFlags "$1") - else - VIASH_PAR_SOLOTYPE="$VIASH_PAR_SOLOTYPE;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --soloCBwhitelist) - if [ -z "$VIASH_PAR_SOLOCBWHITELIST" ]; then - VIASH_PAR_SOLOCBWHITELIST="$2" - else - VIASH_PAR_SOLOCBWHITELIST="$VIASH_PAR_SOLOCBWHITELIST;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloCBwhitelist. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloCBwhitelist=*) - if [ -z "$VIASH_PAR_SOLOCBWHITELIST" ]; then - VIASH_PAR_SOLOCBWHITELIST=$(ViashRemoveFlags "$1") - else - VIASH_PAR_SOLOCBWHITELIST="$VIASH_PAR_SOLOCBWHITELIST;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --soloCBstart) - [ -n "$VIASH_PAR_SOLOCBSTART" ] && ViashError Bad arguments for option \'--soloCBstart\': \'$VIASH_PAR_SOLOCBSTART\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOCBSTART="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloCBstart. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloCBstart=*) - [ -n "$VIASH_PAR_SOLOCBSTART" ] && ViashError Bad arguments for option \'--soloCBstart=*\': \'$VIASH_PAR_SOLOCBSTART\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOCBSTART=$(ViashRemoveFlags "$1") - shift 1 - ;; - --soloCBlen) - [ -n "$VIASH_PAR_SOLOCBLEN" ] && ViashError Bad arguments for option \'--soloCBlen\': \'$VIASH_PAR_SOLOCBLEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOCBLEN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloCBlen. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloCBlen=*) - [ -n "$VIASH_PAR_SOLOCBLEN" ] && ViashError Bad arguments for option \'--soloCBlen=*\': \'$VIASH_PAR_SOLOCBLEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOCBLEN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --soloUMIstart) - [ -n "$VIASH_PAR_SOLOUMISTART" ] && ViashError Bad arguments for option \'--soloUMIstart\': \'$VIASH_PAR_SOLOUMISTART\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOUMISTART="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloUMIstart. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloUMIstart=*) - [ -n "$VIASH_PAR_SOLOUMISTART" ] && ViashError Bad arguments for option \'--soloUMIstart=*\': \'$VIASH_PAR_SOLOUMISTART\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOUMISTART=$(ViashRemoveFlags "$1") - shift 1 - ;; - --soloUMIlen) - [ -n "$VIASH_PAR_SOLOUMILEN" ] && ViashError Bad arguments for option \'--soloUMIlen\': \'$VIASH_PAR_SOLOUMILEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOUMILEN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloUMIlen. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloUMIlen=*) - [ -n "$VIASH_PAR_SOLOUMILEN" ] && ViashError Bad arguments for option \'--soloUMIlen=*\': \'$VIASH_PAR_SOLOUMILEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOUMILEN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --soloBarcodeReadLength) - [ -n "$VIASH_PAR_SOLOBARCODEREADLENGTH" ] && ViashError Bad arguments for option \'--soloBarcodeReadLength\': \'$VIASH_PAR_SOLOBARCODEREADLENGTH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOBARCODEREADLENGTH="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloBarcodeReadLength. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloBarcodeReadLength=*) - [ -n "$VIASH_PAR_SOLOBARCODEREADLENGTH" ] && ViashError Bad arguments for option \'--soloBarcodeReadLength=*\': \'$VIASH_PAR_SOLOBARCODEREADLENGTH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOBARCODEREADLENGTH=$(ViashRemoveFlags "$1") - shift 1 - ;; - --soloBarcodeMate) - [ -n "$VIASH_PAR_SOLOBARCODEMATE" ] && ViashError Bad arguments for option \'--soloBarcodeMate\': \'$VIASH_PAR_SOLOBARCODEMATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOBARCODEMATE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloBarcodeMate. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloBarcodeMate=*) - [ -n "$VIASH_PAR_SOLOBARCODEMATE" ] && ViashError Bad arguments for option \'--soloBarcodeMate=*\': \'$VIASH_PAR_SOLOBARCODEMATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOBARCODEMATE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --soloCBposition) - if [ -z "$VIASH_PAR_SOLOCBPOSITION" ]; then - VIASH_PAR_SOLOCBPOSITION="$2" - else - VIASH_PAR_SOLOCBPOSITION="$VIASH_PAR_SOLOCBPOSITION;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloCBposition. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloCBposition=*) - if [ -z "$VIASH_PAR_SOLOCBPOSITION" ]; then - VIASH_PAR_SOLOCBPOSITION=$(ViashRemoveFlags "$1") - else - VIASH_PAR_SOLOCBPOSITION="$VIASH_PAR_SOLOCBPOSITION;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --soloUMIposition) - [ -n "$VIASH_PAR_SOLOUMIPOSITION" ] && ViashError Bad arguments for option \'--soloUMIposition\': \'$VIASH_PAR_SOLOUMIPOSITION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOUMIPOSITION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloUMIposition. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloUMIposition=*) - [ -n "$VIASH_PAR_SOLOUMIPOSITION" ] && ViashError Bad arguments for option \'--soloUMIposition=*\': \'$VIASH_PAR_SOLOUMIPOSITION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOUMIPOSITION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --soloAdapterSequence) - [ -n "$VIASH_PAR_SOLOADAPTERSEQUENCE" ] && ViashError Bad arguments for option \'--soloAdapterSequence\': \'$VIASH_PAR_SOLOADAPTERSEQUENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOADAPTERSEQUENCE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloAdapterSequence. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloAdapterSequence=*) - [ -n "$VIASH_PAR_SOLOADAPTERSEQUENCE" ] && ViashError Bad arguments for option \'--soloAdapterSequence=*\': \'$VIASH_PAR_SOLOADAPTERSEQUENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOADAPTERSEQUENCE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --soloAdapterMismatchesNmax) - [ -n "$VIASH_PAR_SOLOADAPTERMISMATCHESNMAX" ] && ViashError Bad arguments for option \'--soloAdapterMismatchesNmax\': \'$VIASH_PAR_SOLOADAPTERMISMATCHESNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOADAPTERMISMATCHESNMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloAdapterMismatchesNmax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloAdapterMismatchesNmax=*) - [ -n "$VIASH_PAR_SOLOADAPTERMISMATCHESNMAX" ] && ViashError Bad arguments for option \'--soloAdapterMismatchesNmax=*\': \'$VIASH_PAR_SOLOADAPTERMISMATCHESNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOADAPTERMISMATCHESNMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --soloCBmatchWLtype) - [ -n "$VIASH_PAR_SOLOCBMATCHWLTYPE" ] && ViashError Bad arguments for option \'--soloCBmatchWLtype\': \'$VIASH_PAR_SOLOCBMATCHWLTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOCBMATCHWLTYPE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloCBmatchWLtype. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloCBmatchWLtype=*) - [ -n "$VIASH_PAR_SOLOCBMATCHWLTYPE" ] && ViashError Bad arguments for option \'--soloCBmatchWLtype=*\': \'$VIASH_PAR_SOLOCBMATCHWLTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOCBMATCHWLTYPE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --soloInputSAMattrBarcodeSeq) - if [ -z "$VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ" ]; then - VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ="$2" - else - VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ="$VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloInputSAMattrBarcodeSeq. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloInputSAMattrBarcodeSeq=*) - if [ -z "$VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ" ]; then - VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ=$(ViashRemoveFlags "$1") - else - VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ="$VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --soloInputSAMattrBarcodeQual) - if [ -z "$VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL" ]; then - VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL="$2" - else - VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL="$VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloInputSAMattrBarcodeQual. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloInputSAMattrBarcodeQual=*) - if [ -z "$VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL" ]; then - VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL=$(ViashRemoveFlags "$1") - else - VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL="$VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --soloStrand) - [ -n "$VIASH_PAR_SOLOSTRAND" ] && ViashError Bad arguments for option \'--soloStrand\': \'$VIASH_PAR_SOLOSTRAND\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOSTRAND="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloStrand. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloStrand=*) - [ -n "$VIASH_PAR_SOLOSTRAND" ] && ViashError Bad arguments for option \'--soloStrand=*\': \'$VIASH_PAR_SOLOSTRAND\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOSTRAND=$(ViashRemoveFlags "$1") - shift 1 - ;; - --soloFeatures) - if [ -z "$VIASH_PAR_SOLOFEATURES" ]; then - VIASH_PAR_SOLOFEATURES="$2" - else - VIASH_PAR_SOLOFEATURES="$VIASH_PAR_SOLOFEATURES;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloFeatures. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloFeatures=*) - if [ -z "$VIASH_PAR_SOLOFEATURES" ]; then - VIASH_PAR_SOLOFEATURES=$(ViashRemoveFlags "$1") - else - VIASH_PAR_SOLOFEATURES="$VIASH_PAR_SOLOFEATURES;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --soloMultiMappers) - if [ -z "$VIASH_PAR_SOLOMULTIMAPPERS" ]; then - VIASH_PAR_SOLOMULTIMAPPERS="$2" - else - VIASH_PAR_SOLOMULTIMAPPERS="$VIASH_PAR_SOLOMULTIMAPPERS;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloMultiMappers. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloMultiMappers=*) - if [ -z "$VIASH_PAR_SOLOMULTIMAPPERS" ]; then - VIASH_PAR_SOLOMULTIMAPPERS=$(ViashRemoveFlags "$1") - else - VIASH_PAR_SOLOMULTIMAPPERS="$VIASH_PAR_SOLOMULTIMAPPERS;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --soloUMIdedup) - if [ -z "$VIASH_PAR_SOLOUMIDEDUP" ]; then - VIASH_PAR_SOLOUMIDEDUP="$2" - else - VIASH_PAR_SOLOUMIDEDUP="$VIASH_PAR_SOLOUMIDEDUP;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloUMIdedup. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloUMIdedup=*) - if [ -z "$VIASH_PAR_SOLOUMIDEDUP" ]; then - VIASH_PAR_SOLOUMIDEDUP=$(ViashRemoveFlags "$1") - else - VIASH_PAR_SOLOUMIDEDUP="$VIASH_PAR_SOLOUMIDEDUP;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --soloUMIfiltering) - if [ -z "$VIASH_PAR_SOLOUMIFILTERING" ]; then - VIASH_PAR_SOLOUMIFILTERING="$2" - else - VIASH_PAR_SOLOUMIFILTERING="$VIASH_PAR_SOLOUMIFILTERING;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloUMIfiltering. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloUMIfiltering=*) - if [ -z "$VIASH_PAR_SOLOUMIFILTERING" ]; then - VIASH_PAR_SOLOUMIFILTERING=$(ViashRemoveFlags "$1") - else - VIASH_PAR_SOLOUMIFILTERING="$VIASH_PAR_SOLOUMIFILTERING;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --soloOutFileNames) - if [ -z "$VIASH_PAR_SOLOOUTFILENAMES" ]; then - VIASH_PAR_SOLOOUTFILENAMES="$2" - else - VIASH_PAR_SOLOOUTFILENAMES="$VIASH_PAR_SOLOOUTFILENAMES;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloOutFileNames. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloOutFileNames=*) - if [ -z "$VIASH_PAR_SOLOOUTFILENAMES" ]; then - VIASH_PAR_SOLOOUTFILENAMES=$(ViashRemoveFlags "$1") - else - VIASH_PAR_SOLOOUTFILENAMES="$VIASH_PAR_SOLOOUTFILENAMES;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --soloCellFilter) - if [ -z "$VIASH_PAR_SOLOCELLFILTER" ]; then - VIASH_PAR_SOLOCELLFILTER="$2" - else - VIASH_PAR_SOLOCELLFILTER="$VIASH_PAR_SOLOCELLFILTER;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloCellFilter. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloCellFilter=*) - if [ -z "$VIASH_PAR_SOLOCELLFILTER" ]; then - VIASH_PAR_SOLOCELLFILTER=$(ViashRemoveFlags "$1") - else - VIASH_PAR_SOLOCELLFILTER="$VIASH_PAR_SOLOCELLFILTER;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --soloOutFormatFeaturesGeneField3) - if [ -z "$VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3" ]; then - VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3="$2" - else - VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3="$VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloOutFormatFeaturesGeneField3. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloOutFormatFeaturesGeneField3=*) - if [ -z "$VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3" ]; then - VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3=$(ViashRemoveFlags "$1") - else - VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3="$VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --soloCellReadStats) - [ -n "$VIASH_PAR_SOLOCELLREADSTATS" ] && ViashError Bad arguments for option \'--soloCellReadStats\': \'$VIASH_PAR_SOLOCELLREADSTATS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOCELLREADSTATS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloCellReadStats. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloCellReadStats=*) - [ -n "$VIASH_PAR_SOLOCELLREADSTATS" ] && ViashError Bad arguments for option \'--soloCellReadStats=*\': \'$VIASH_PAR_SOLOCELLREADSTATS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOCELLREADSTATS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --stranded) - [ -n "$VIASH_PAR_STRANDED" ] && ViashError Bad arguments for option \'--stranded\': \'$VIASH_PAR_STRANDED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_STRANDED="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --stranded. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --stranded=*) - [ -n "$VIASH_PAR_STRANDED" ] && ViashError Bad arguments for option \'--stranded=*\': \'$VIASH_PAR_STRANDED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_STRANDED=$(ViashRemoveFlags "$1") - shift 1 - ;; - -s) - [ -n "$VIASH_PAR_STRANDED" ] && ViashError Bad arguments for option \'-s\': \'$VIASH_PAR_STRANDED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_STRANDED="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -s. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --minimum_alignment_quality) - [ -n "$VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY" ] && ViashError Bad arguments for option \'--minimum_alignment_quality\': \'$VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --minimum_alignment_quality. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --minimum_alignment_quality=*) - [ -n "$VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY" ] && ViashError Bad arguments for option \'--minimum_alignment_quality=*\': \'$VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - -a) - [ -n "$VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY" ] && ViashError Bad arguments for option \'-a\': \'$VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -a. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --minaqual) - [ -n "$VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY" ] && ViashError Bad arguments for option \'--minaqual\': \'$VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --minaqual. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --type) - [ -n "$VIASH_PAR_TYPE" ] && ViashError Bad arguments for option \'--type\': \'$VIASH_PAR_TYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TYPE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --type. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --type=*) - [ -n "$VIASH_PAR_TYPE" ] && ViashError Bad arguments for option \'--type=*\': \'$VIASH_PAR_TYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TYPE=$(ViashRemoveFlags "$1") - shift 1 - ;; - -t) - [ -n "$VIASH_PAR_TYPE" ] && ViashError Bad arguments for option \'-t\': \'$VIASH_PAR_TYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TYPE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -t. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --id_attribute) - if [ -z "$VIASH_PAR_ID_ATTRIBUTE" ]; then - VIASH_PAR_ID_ATTRIBUTE="$2" - else - VIASH_PAR_ID_ATTRIBUTE="$VIASH_PAR_ID_ATTRIBUTE:""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --id_attribute. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --id_attribute=*) - if [ -z "$VIASH_PAR_ID_ATTRIBUTE" ]; then - VIASH_PAR_ID_ATTRIBUTE=$(ViashRemoveFlags "$1") - else - VIASH_PAR_ID_ATTRIBUTE="$VIASH_PAR_ID_ATTRIBUTE:"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - -i) - if [ -z "$VIASH_PAR_ID_ATTRIBUTE" ]; then - VIASH_PAR_ID_ATTRIBUTE="$2" - else - VIASH_PAR_ID_ATTRIBUTE="$VIASH_PAR_ID_ATTRIBUTE:""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --additional_attributes) - if [ -z "$VIASH_PAR_ADDITIONAL_ATTRIBUTES" ]; then - VIASH_PAR_ADDITIONAL_ATTRIBUTES="$2" - else - VIASH_PAR_ADDITIONAL_ATTRIBUTES="$VIASH_PAR_ADDITIONAL_ATTRIBUTES:""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --additional_attributes. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --additional_attributes=*) - if [ -z "$VIASH_PAR_ADDITIONAL_ATTRIBUTES" ]; then - VIASH_PAR_ADDITIONAL_ATTRIBUTES=$(ViashRemoveFlags "$1") - else - VIASH_PAR_ADDITIONAL_ATTRIBUTES="$VIASH_PAR_ADDITIONAL_ATTRIBUTES:"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --add_chromosome_info) - [ -n "$VIASH_PAR_ADD_CHROMOSOME_INFO" ] && ViashError Bad arguments for option \'--add_chromosome_info\': \'$VIASH_PAR_ADD_CHROMOSOME_INFO\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ADD_CHROMOSOME_INFO=true - shift 1 - ;; - --mode) - [ -n "$VIASH_PAR_MODE" ] && ViashError Bad arguments for option \'--mode\': \'$VIASH_PAR_MODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --mode. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --mode=*) - [ -n "$VIASH_PAR_MODE" ] && ViashError Bad arguments for option \'--mode=*\': \'$VIASH_PAR_MODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODE=$(ViashRemoveFlags "$1") - shift 1 - ;; - -m) - [ -n "$VIASH_PAR_MODE" ] && ViashError Bad arguments for option \'-m\': \'$VIASH_PAR_MODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -m. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --non_unique) - [ -n "$VIASH_PAR_NON_UNIQUE" ] && ViashError Bad arguments for option \'--non_unique\': \'$VIASH_PAR_NON_UNIQUE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_NON_UNIQUE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --non_unique. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --non_unique=*) - [ -n "$VIASH_PAR_NON_UNIQUE" ] && ViashError Bad arguments for option \'--non_unique=*\': \'$VIASH_PAR_NON_UNIQUE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_NON_UNIQUE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --secondary_alignments) - [ -n "$VIASH_PAR_SECONDARY_ALIGNMENTS" ] && ViashError Bad arguments for option \'--secondary_alignments\': \'$VIASH_PAR_SECONDARY_ALIGNMENTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SECONDARY_ALIGNMENTS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --secondary_alignments. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --secondary_alignments=*) - [ -n "$VIASH_PAR_SECONDARY_ALIGNMENTS" ] && ViashError Bad arguments for option \'--secondary_alignments=*\': \'$VIASH_PAR_SECONDARY_ALIGNMENTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SECONDARY_ALIGNMENTS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --supplementary_alignments) - [ -n "$VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS" ] && ViashError Bad arguments for option \'--supplementary_alignments\': \'$VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --supplementary_alignments. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --supplementary_alignments=*) - [ -n "$VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS" ] && ViashError Bad arguments for option \'--supplementary_alignments=*\': \'$VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --counts_output_sparse) - [ -n "$VIASH_PAR_COUNTS_OUTPUT_SPARSE" ] && ViashError Bad arguments for option \'--counts_output_sparse\': \'$VIASH_PAR_COUNTS_OUTPUT_SPARSE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_COUNTS_OUTPUT_SPARSE=true - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/mapping_multi_star:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/mapping_multi_star:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/mapping_multi_star:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/mapping_multi_star:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT_ID+x} ]; then - ViashError '--input_id' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_INPUT_R1+x} ]; then - ViashError '--input_r1' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_REFERENCE_INDEX+x} ]; then - ViashError '--reference_index' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_REFERENCE_GTF+x} ]; then - ViashError '--reference_gtf' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_RUN_HTSEQ_COUNT+x} ]; then - VIASH_PAR_RUN_HTSEQ_COUNT="true" -fi -if [ -z ${VIASH_PAR_RUN_MULTIQC+x} ]; then - VIASH_PAR_RUN_MULTIQC="true" -fi -if [ -z ${VIASH_PAR_MIN_SUCCESS_RATE+x} ]; then - VIASH_PAR_MIN_SUCCESS_RATE="0.5" -fi -if [ -z ${VIASH_PAR_STRANDED+x} ]; then - VIASH_PAR_STRANDED="yes" -fi -if [ -z ${VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY+x} ]; then - VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY="10" -fi -if [ -z ${VIASH_PAR_ADD_CHROMOSOME_INFO+x} ]; then - VIASH_PAR_ADD_CHROMOSOME_INFO="false" -fi -if [ -z ${VIASH_PAR_MODE+x} ]; then - VIASH_PAR_MODE="union" -fi -if [ -z ${VIASH_PAR_NON_UNIQUE+x} ]; then - VIASH_PAR_NON_UNIQUE="none" -fi -if [ -z ${VIASH_PAR_COUNTS_OUTPUT_SPARSE+x} ]; then - VIASH_PAR_COUNTS_OUTPUT_SPARSE="false" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT_R1" ]; then - IFS=';' - set -f - for file in $VIASH_PAR_INPUT_R1; do - unset IFS - if [ ! -e "$file" ]; then - ViashError "Input file '$file' does not exist." - exit 1 - fi - done - set +f -fi -if [ ! -z "$VIASH_PAR_INPUT_R2" ]; then - IFS=';' - set -f - for file in $VIASH_PAR_INPUT_R2; do - unset IFS - if [ ! -e "$file" ]; then - ViashError "Input file '$file' does not exist." - exit 1 - fi - done - set +f -fi -if [ ! -z "$VIASH_PAR_REFERENCE_INDEX" ] && [ ! -e "$VIASH_PAR_REFERENCE_INDEX" ]; then - ViashError "Input file '$VIASH_PAR_REFERENCE_INDEX' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_REFERENCE_GTF" ] && [ ! -e "$VIASH_PAR_REFERENCE_GTF" ]; then - ViashError "Input file '$VIASH_PAR_REFERENCE_GTF' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_GENOMEFASTAFILES" ]; then - IFS=';' - set -f - for file in $VIASH_PAR_GENOMEFASTAFILES; do - unset IFS - if [ ! -e "$file" ]; then - ViashError "Input file '$file' does not exist." - exit 1 - fi - done - set +f -fi -if [ ! -z "$VIASH_PAR_SJDBGTFFILE" ] && [ ! -e "$VIASH_PAR_SJDBGTFFILE" ]; then - ViashError "Input file '$VIASH_PAR_SJDBGTFFILE' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_READFILESMANIFEST" ] && [ ! -e "$VIASH_PAR_READFILESMANIFEST" ]; then - ViashError "Input file '$VIASH_PAR_READFILESMANIFEST' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_RUN_HTSEQ_COUNT" ]]; then - if ! [[ "$VIASH_PAR_RUN_HTSEQ_COUNT" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--run_htseq_count' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_RUN_MULTIQC" ]]; then - if ! [[ "$VIASH_PAR_RUN_MULTIQC" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--run_multiqc' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_MIN_SUCCESS_RATE" ]]; then - if ! [[ "$VIASH_PAR_MIN_SUCCESS_RATE" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--min_success_rate' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_RUNRNGSEED" ]]; then - if ! [[ "$VIASH_PAR_RUNRNGSEED" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--runRNGseed' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SJDBOVERHANG" ]]; then - if ! [[ "$VIASH_PAR_SJDBOVERHANG" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--sjdbOverhang' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SJDBSCORE" ]]; then - if ! [[ "$VIASH_PAR_SJDBSCORE" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--sjdbScore' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_READMAPNUMBER" ]]; then - if ! [[ "$VIASH_PAR_READMAPNUMBER" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--readMapNumber' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_READQUALITYSCOREBASE" ]]; then - if ! [[ "$VIASH_PAR_READQUALITYSCOREBASE" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--readQualityScoreBase' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [ -n "$VIASH_PAR_CLIP3PNBASES" ]; then - IFS=';' - set -f - for val in $VIASH_PAR_CLIP3PNBASES; do - if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--clip3pNbases' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi - done - set +f - unset IFS -fi - -if [ -n "$VIASH_PAR_CLIP3PADAPTERMMP" ]; then - IFS=';' - set -f - for val in $VIASH_PAR_CLIP3PADAPTERMMP; do - if ! [[ "${val}" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--clip3pAdapterMMp' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi - done - set +f - unset IFS -fi - -if [ -n "$VIASH_PAR_CLIP3PAFTERADAPTERNBASES" ]; then - IFS=';' - set -f - for val in $VIASH_PAR_CLIP3PAFTERADAPTERNBASES; do - if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--clip3pAfterAdapterNbases' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi - done - set +f - unset IFS -fi - -if [ -n "$VIASH_PAR_CLIP5PNBASES" ]; then - IFS=';' - set -f - for val in $VIASH_PAR_CLIP5PNBASES; do - if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--clip5pNbases' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi - done - set +f - unset IFS -fi - -if [[ -n "$VIASH_PAR_LIMITGENOMEGENERATERAM" ]]; then - if ! [[ "$VIASH_PAR_LIMITGENOMEGENERATERAM" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--limitGenomeGenerateRAM' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [ -n "$VIASH_PAR_LIMITIOBUFFERSIZE" ]; then - IFS=';' - set -f - for val in $VIASH_PAR_LIMITIOBUFFERSIZE; do - if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--limitIObufferSize' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi - done - set +f - unset IFS -fi - -if [[ -n "$VIASH_PAR_LIMITOUTSAMONEREADBYTES" ]]; then - if ! [[ "$VIASH_PAR_LIMITOUTSAMONEREADBYTES" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--limitOutSAMoneReadBytes' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_LIMITOUTSJONEREAD" ]]; then - if ! [[ "$VIASH_PAR_LIMITOUTSJONEREAD" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--limitOutSJoneRead' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_LIMITOUTSJCOLLAPSED" ]]; then - if ! [[ "$VIASH_PAR_LIMITOUTSJCOLLAPSED" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--limitOutSJcollapsed' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_LIMITBAMSORTRAM" ]]; then - if ! [[ "$VIASH_PAR_LIMITBAMSORTRAM" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--limitBAMsortRAM' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_LIMITSJDBINSERTNSJ" ]]; then - if ! [[ "$VIASH_PAR_LIMITSJDBINSERTNSJ" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--limitSjdbInsertNsj' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_LIMITNREADSSOFT" ]]; then - if ! [[ "$VIASH_PAR_LIMITNREADSSOFT" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--limitNreadsSoft' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTQSCONVERSIONADD" ]]; then - if ! [[ "$VIASH_PAR_OUTQSCONVERSIONADD" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outQSconversionAdd' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTSAMATTRIHSTART" ]]; then - if ! [[ "$VIASH_PAR_OUTSAMATTRIHSTART" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outSAMattrIHstart' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTSAMMAPQUNIQUE" ]]; then - if ! [[ "$VIASH_PAR_OUTSAMMAPQUNIQUE" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outSAMmapqUnique' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTSAMFLAGOR" ]]; then - if ! [[ "$VIASH_PAR_OUTSAMFLAGOR" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outSAMflagOR' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTSAMFLAGAND" ]]; then - if ! [[ "$VIASH_PAR_OUTSAMFLAGAND" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outSAMflagAND' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTSAMMULTNMAX" ]]; then - if ! [[ "$VIASH_PAR_OUTSAMMULTNMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outSAMmultNmax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTSAMTLEN" ]]; then - if ! [[ "$VIASH_PAR_OUTSAMTLEN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outSAMtlen' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTBAMCOMPRESSION" ]]; then - if ! [[ "$VIASH_PAR_OUTBAMCOMPRESSION" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outBAMcompression' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTBAMSORTINGTHREADN" ]]; then - if ! [[ "$VIASH_PAR_OUTBAMSORTINGTHREADN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outBAMsortingThreadN' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTBAMSORTINGBINSN" ]]; then - if ! [[ "$VIASH_PAR_OUTBAMSORTINGBINSN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outBAMsortingBinsN' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN" ]]; then - if ! [[ "$VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--bamRemoveDuplicatesMate2basesN' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE" ]]; then - if ! [[ "$VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outFilterMultimapScoreRange' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTFILTERMULTIMAPNMAX" ]]; then - if ! [[ "$VIASH_PAR_OUTFILTERMULTIMAPNMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outFilterMultimapNmax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTFILTERMISMATCHNMAX" ]]; then - if ! [[ "$VIASH_PAR_OUTFILTERMISMATCHNMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outFilterMismatchNmax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX" ]]; then - if ! [[ "$VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--outFilterMismatchNoverLmax' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX" ]]; then - if ! [[ "$VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--outFilterMismatchNoverReadLmax' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTFILTERSCOREMIN" ]]; then - if ! [[ "$VIASH_PAR_OUTFILTERSCOREMIN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outFilterScoreMin' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTFILTERSCOREMINOVERLREAD" ]]; then - if ! [[ "$VIASH_PAR_OUTFILTERSCOREMINOVERLREAD" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--outFilterScoreMinOverLread' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTFILTERMATCHNMIN" ]]; then - if ! [[ "$VIASH_PAR_OUTFILTERMATCHNMIN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outFilterMatchNmin' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD" ]]; then - if ! [[ "$VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--outFilterMatchNminOverLread' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [ -n "$VIASH_PAR_OUTSJFILTEROVERHANGMIN" ]; then - IFS=';' - set -f - for val in $VIASH_PAR_OUTSJFILTEROVERHANGMIN; do - if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outSJfilterOverhangMin' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi - done - set +f - unset IFS -fi - -if [ -n "$VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN" ]; then - IFS=';' - set -f - for val in $VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN; do - if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outSJfilterCountUniqueMin' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi - done - set +f - unset IFS -fi - -if [ -n "$VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN" ]; then - IFS=';' - set -f - for val in $VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN; do - if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outSJfilterCountTotalMin' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi - done - set +f - unset IFS -fi - -if [ -n "$VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN" ]; then - IFS=';' - set -f - for val in $VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN; do - if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outSJfilterDistToOtherSJmin' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi - done - set +f - unset IFS -fi - -if [ -n "$VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN" ]; then - IFS=';' - set -f - for val in $VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN; do - if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outSJfilterIntronMaxVsReadN' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi - done - set +f - unset IFS -fi - -if [[ -n "$VIASH_PAR_SCOREGAP" ]]; then - if ! [[ "$VIASH_PAR_SCOREGAP" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--scoreGap' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SCOREGAPNONCAN" ]]; then - if ! [[ "$VIASH_PAR_SCOREGAPNONCAN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--scoreGapNoncan' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SCOREGAPGCAG" ]]; then - if ! [[ "$VIASH_PAR_SCOREGAPGCAG" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--scoreGapGCAG' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SCOREGAPATAC" ]]; then - if ! [[ "$VIASH_PAR_SCOREGAPATAC" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--scoreGapATAC' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE" ]]; then - if ! [[ "$VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--scoreGenomicLengthLog2scale' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SCOREDELOPEN" ]]; then - if ! [[ "$VIASH_PAR_SCOREDELOPEN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--scoreDelOpen' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SCOREDELBASE" ]]; then - if ! [[ "$VIASH_PAR_SCOREDELBASE" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--scoreDelBase' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SCOREINSOPEN" ]]; then - if ! [[ "$VIASH_PAR_SCOREINSOPEN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--scoreInsOpen' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SCOREINSBASE" ]]; then - if ! [[ "$VIASH_PAR_SCOREINSBASE" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--scoreInsBase' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SCORESTITCHSJSHIFT" ]]; then - if ! [[ "$VIASH_PAR_SCORESTITCHSJSHIFT" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--scoreStitchSJshift' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SEEDSEARCHSTARTLMAX" ]]; then - if ! [[ "$VIASH_PAR_SEEDSEARCHSTARTLMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--seedSearchStartLmax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD" ]]; then - if ! [[ "$VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--seedSearchStartLmaxOverLread' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SEEDSEARCHLMAX" ]]; then - if ! [[ "$VIASH_PAR_SEEDSEARCHLMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--seedSearchLmax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SEEDMULTIMAPNMAX" ]]; then - if ! [[ "$VIASH_PAR_SEEDMULTIMAPNMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--seedMultimapNmax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SEEDPERREADNMAX" ]]; then - if ! [[ "$VIASH_PAR_SEEDPERREADNMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--seedPerReadNmax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SEEDPERWINDOWNMAX" ]]; then - if ! [[ "$VIASH_PAR_SEEDPERWINDOWNMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--seedPerWindowNmax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SEEDNONELOCIPERWINDOW" ]]; then - if ! [[ "$VIASH_PAR_SEEDNONELOCIPERWINDOW" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--seedNoneLociPerWindow' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SEEDSPLITMIN" ]]; then - if ! [[ "$VIASH_PAR_SEEDSPLITMIN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--seedSplitMin' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SEEDMAPMIN" ]]; then - if ! [[ "$VIASH_PAR_SEEDMAPMIN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--seedMapMin' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_ALIGNINTRONMIN" ]]; then - if ! [[ "$VIASH_PAR_ALIGNINTRONMIN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--alignIntronMin' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_ALIGNINTRONMAX" ]]; then - if ! [[ "$VIASH_PAR_ALIGNINTRONMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--alignIntronMax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_ALIGNMATESGAPMAX" ]]; then - if ! [[ "$VIASH_PAR_ALIGNMATESGAPMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--alignMatesGapMax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_ALIGNSJOVERHANGMIN" ]]; then - if ! [[ "$VIASH_PAR_ALIGNSJOVERHANGMIN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--alignSJoverhangMin' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [ -n "$VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX" ]; then - IFS=';' - set -f - for val in $VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX; do - if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--alignSJstitchMismatchNmax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi - done - set +f - unset IFS -fi - -if [[ -n "$VIASH_PAR_ALIGNSJDBOVERHANGMIN" ]]; then - if ! [[ "$VIASH_PAR_ALIGNSJDBOVERHANGMIN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--alignSJDBoverhangMin' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN" ]]; then - if ! [[ "$VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--alignSplicedMateMapLmin' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE" ]]; then - if ! [[ "$VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--alignSplicedMateMapLminOverLmate' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_ALIGNWINDOWSPERREADNMAX" ]]; then - if ! [[ "$VIASH_PAR_ALIGNWINDOWSPERREADNMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--alignWindowsPerReadNmax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX" ]]; then - if ! [[ "$VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--alignTranscriptsPerWindowNmax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX" ]]; then - if ! [[ "$VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--alignTranscriptsPerReadNmax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_PEOVERLAPNBASESMIN" ]]; then - if ! [[ "$VIASH_PAR_PEOVERLAPNBASESMIN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--peOverlapNbasesMin' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_PEOVERLAPMMP" ]]; then - if ! [[ "$VIASH_PAR_PEOVERLAPMMP" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--peOverlapMMp' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_WINANCHORMULTIMAPNMAX" ]]; then - if ! [[ "$VIASH_PAR_WINANCHORMULTIMAPNMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--winAnchorMultimapNmax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_WINBINNBITS" ]]; then - if ! [[ "$VIASH_PAR_WINBINNBITS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--winBinNbits' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_WINANCHORDISTNBINS" ]]; then - if ! [[ "$VIASH_PAR_WINANCHORDISTNBINS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--winAnchorDistNbins' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_WINFLANKNBINS" ]]; then - if ! [[ "$VIASH_PAR_WINFLANKNBINS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--winFlankNbins' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_WINREADCOVERAGERELATIVEMIN" ]]; then - if ! [[ "$VIASH_PAR_WINREADCOVERAGERELATIVEMIN" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--winReadCoverageRelativeMin' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_WINREADCOVERAGEBASESMIN" ]]; then - if ! [[ "$VIASH_PAR_WINREADCOVERAGEBASESMIN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--winReadCoverageBasesMin' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_CHIMSEGMENTMIN" ]]; then - if ! [[ "$VIASH_PAR_CHIMSEGMENTMIN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--chimSegmentMin' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_CHIMSCOREMIN" ]]; then - if ! [[ "$VIASH_PAR_CHIMSCOREMIN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--chimScoreMin' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_CHIMSCOREDROPMAX" ]]; then - if ! [[ "$VIASH_PAR_CHIMSCOREDROPMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--chimScoreDropMax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_CHIMSCORESEPARATION" ]]; then - if ! [[ "$VIASH_PAR_CHIMSCORESEPARATION" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--chimScoreSeparation' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG" ]]; then - if ! [[ "$VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--chimScoreJunctionNonGTAG' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_CHIMJUNCTIONOVERHANGMIN" ]]; then - if ! [[ "$VIASH_PAR_CHIMJUNCTIONOVERHANGMIN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--chimJunctionOverhangMin' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_CHIMSEGMENTREADGAPMAX" ]]; then - if ! [[ "$VIASH_PAR_CHIMSEGMENTREADGAPMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--chimSegmentReadGapMax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_CHIMMAINSEGMENTMULTNMAX" ]]; then - if ! [[ "$VIASH_PAR_CHIMMAINSEGMENTMULTNMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--chimMainSegmentMultNmax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_CHIMMULTIMAPNMAX" ]]; then - if ! [[ "$VIASH_PAR_CHIMMULTIMAPNMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--chimMultimapNmax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_CHIMMULTIMAPSCORERANGE" ]]; then - if ! [[ "$VIASH_PAR_CHIMMULTIMAPSCORERANGE" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--chimMultimapScoreRange' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_CHIMNONCHIMSCOREDROPMIN" ]]; then - if ! [[ "$VIASH_PAR_CHIMNONCHIMSCOREDROPMIN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--chimNonchimScoreDropMin' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_CHIMOUTJUNCTIONFORMAT" ]]; then - if ! [[ "$VIASH_PAR_CHIMOUTJUNCTIONFORMAT" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--chimOutJunctionFormat' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION" ]]; then - if ! [[ "$VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--quantTranscriptomeBAMcompression' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_TWOPASS1READSN" ]]; then - if ! [[ "$VIASH_PAR_TWOPASS1READSN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--twopass1readsN' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SOLOCBSTART" ]]; then - if ! [[ "$VIASH_PAR_SOLOCBSTART" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--soloCBstart' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SOLOCBLEN" ]]; then - if ! [[ "$VIASH_PAR_SOLOCBLEN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--soloCBlen' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SOLOUMISTART" ]]; then - if ! [[ "$VIASH_PAR_SOLOUMISTART" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--soloUMIstart' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SOLOUMILEN" ]]; then - if ! [[ "$VIASH_PAR_SOLOUMILEN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--soloUMIlen' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SOLOBARCODEREADLENGTH" ]]; then - if ! [[ "$VIASH_PAR_SOLOBARCODEREADLENGTH" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--soloBarcodeReadLength' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SOLOBARCODEMATE" ]]; then - if ! [[ "$VIASH_PAR_SOLOBARCODEMATE" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--soloBarcodeMate' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SOLOADAPTERMISMATCHESNMAX" ]]; then - if ! [[ "$VIASH_PAR_SOLOADAPTERMISMATCHESNMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--soloAdapterMismatchesNmax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY" ]]; then - if ! [[ "$VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--minimum_alignment_quality' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_ADD_CHROMOSOME_INFO" ]]; then - if ! [[ "$VIASH_PAR_ADD_CHROMOSOME_INFO" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--add_chromosome_info' has to be a boolean_true. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_COUNTS_OUTPUT_SPARSE" ]]; then - if ! [[ "$VIASH_PAR_COUNTS_OUTPUT_SPARSE" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--counts_output_sparse' has to be a boolean_true. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_STRANDED" ]; then - VIASH_PAR_STRANDED_CHOICES=("yes:no:reverse") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_STRANDED_CHOICES[*]}:" =~ ":$VIASH_PAR_STRANDED:" ]]; then - ViashError '--stranded' specified value of \'$VIASH_PAR_STRANDED\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -if [ ! -z "$VIASH_PAR_MODE" ]; then - VIASH_PAR_MODE_CHOICES=("union:intersection-strict:intersection-nonempty") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_MODE_CHOICES[*]}:" =~ ":$VIASH_PAR_MODE:" ]]; then - ViashError '--mode' specified value of \'$VIASH_PAR_MODE\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -if [ ! -z "$VIASH_PAR_NON_UNIQUE" ]; then - VIASH_PAR_NON_UNIQUE_CHOICES=("none:all:fraction:random") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_NON_UNIQUE_CHOICES[*]}:" =~ ":$VIASH_PAR_NON_UNIQUE:" ]]; then - ViashError '--non_unique' specified value of \'$VIASH_PAR_NON_UNIQUE\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -if [ ! -z "$VIASH_PAR_SECONDARY_ALIGNMENTS" ]; then - VIASH_PAR_SECONDARY_ALIGNMENTS_CHOICES=("score:ignore") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_SECONDARY_ALIGNMENTS_CHOICES[*]}:" =~ ":$VIASH_PAR_SECONDARY_ALIGNMENTS:" ]]; then - ViashError '--secondary_alignments' specified value of \'$VIASH_PAR_SECONDARY_ALIGNMENTS\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -if [ ! -z "$VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS" ]; then - VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS_CHOICES=("score:ignore") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS_CHOICES[*]}:" =~ ":$VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS:" ]]; then - ViashError '--supplementary_alignments' specified value of \'$VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT_R1" ]; then - VIASH_TEST_INPUT_R1=() - IFS=';' - for var in $VIASH_PAR_INPUT_R1; do - unset IFS - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$var")" ) - var=$(ViashAutodetectMount "$var") - VIASH_TEST_INPUT_R1+=( "$var" ) - done - VIASH_PAR_INPUT_R1=$(IFS=';' ; echo "${VIASH_TEST_INPUT_R1[*]}") -fi -if [ ! -z "$VIASH_PAR_INPUT_R2" ]; then - VIASH_TEST_INPUT_R2=() - IFS=';' - for var in $VIASH_PAR_INPUT_R2; do - unset IFS - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$var")" ) - var=$(ViashAutodetectMount "$var") - VIASH_TEST_INPUT_R2+=( "$var" ) - done - VIASH_PAR_INPUT_R2=$(IFS=';' ; echo "${VIASH_TEST_INPUT_R2[*]}") -fi -if [ ! -z "$VIASH_PAR_REFERENCE_INDEX" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_REFERENCE_INDEX")" ) - VIASH_PAR_REFERENCE_INDEX=$(ViashAutodetectMount "$VIASH_PAR_REFERENCE_INDEX") -fi -if [ ! -z "$VIASH_PAR_REFERENCE_GTF" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_REFERENCE_GTF")" ) - VIASH_PAR_REFERENCE_GTF=$(ViashAutodetectMount "$VIASH_PAR_REFERENCE_GTF") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_PAR_GENOMEFASTAFILES" ]; then - VIASH_TEST_GENOMEFASTAFILES=() - IFS=';' - for var in $VIASH_PAR_GENOMEFASTAFILES; do - unset IFS - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$var")" ) - var=$(ViashAutodetectMount "$var") - VIASH_TEST_GENOMEFASTAFILES+=( "$var" ) - done - VIASH_PAR_GENOMEFASTAFILES=$(IFS=';' ; echo "${VIASH_TEST_GENOMEFASTAFILES[*]}") -fi -if [ ! -z "$VIASH_PAR_SJDBGTFFILE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_SJDBGTFFILE")" ) - VIASH_PAR_SJDBGTFFILE=$(ViashAutodetectMount "$VIASH_PAR_SJDBGTFFILE") -fi -if [ ! -z "$VIASH_PAR_READFILESMANIFEST" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_READFILESMANIFEST")" ) - VIASH_PAR_READFILESMANIFEST=$(ViashAutodetectMount "$VIASH_PAR_READFILESMANIFEST") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/mapping_multi_star:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/mapping_multi_star:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/mapping_multi_star:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-multi_star-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -from typing import Any, Dict, List, Tuple -import math -import tempfile -import subprocess -import tarfile -import gzip -import shutil -from pathlib import Path -import yaml -import pandas as pd -from multiprocess import Pool -import gtfparse - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input_id': $( if [ ! -z ${VIASH_PAR_INPUT_ID+x} ]; then echo "r'${VIASH_PAR_INPUT_ID//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'input_r1': $( if [ ! -z ${VIASH_PAR_INPUT_R1+x} ]; then echo "r'${VIASH_PAR_INPUT_R1//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'input_r2': $( if [ ! -z ${VIASH_PAR_INPUT_R2+x} ]; then echo "r'${VIASH_PAR_INPUT_R2//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'reference_index': $( if [ ! -z ${VIASH_PAR_REFERENCE_INDEX+x} ]; then echo "r'${VIASH_PAR_REFERENCE_INDEX//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'reference_gtf': $( if [ ! -z ${VIASH_PAR_REFERENCE_GTF+x} ]; then echo "r'${VIASH_PAR_REFERENCE_GTF//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'run_htseq_count': $( if [ ! -z ${VIASH_PAR_RUN_HTSEQ_COUNT+x} ]; then echo "r'${VIASH_PAR_RUN_HTSEQ_COUNT//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), - 'run_multiqc': $( if [ ! -z ${VIASH_PAR_RUN_MULTIQC+x} ]; then echo "r'${VIASH_PAR_RUN_MULTIQC//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), - 'min_success_rate': $( if [ ! -z ${VIASH_PAR_MIN_SUCCESS_RATE+x} ]; then echo "float(r'${VIASH_PAR_MIN_SUCCESS_RATE//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'runRNGseed': $( if [ ! -z ${VIASH_PAR_RUNRNGSEED+x} ]; then echo "int(r'${VIASH_PAR_RUNRNGSEED//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'genomeFastaFiles': $( if [ ! -z ${VIASH_PAR_GENOMEFASTAFILES+x} ]; then echo "r'${VIASH_PAR_GENOMEFASTAFILES//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'sjdbFileChrStartEnd': $( if [ ! -z ${VIASH_PAR_SJDBFILECHRSTARTEND+x} ]; then echo "r'${VIASH_PAR_SJDBFILECHRSTARTEND//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'sjdbGTFfile': $( if [ ! -z ${VIASH_PAR_SJDBGTFFILE+x} ]; then echo "r'${VIASH_PAR_SJDBGTFFILE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'sjdbGTFchrPrefix': $( if [ ! -z ${VIASH_PAR_SJDBGTFCHRPREFIX+x} ]; then echo "r'${VIASH_PAR_SJDBGTFCHRPREFIX//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'sjdbGTFfeatureExon': $( if [ ! -z ${VIASH_PAR_SJDBGTFFEATUREEXON+x} ]; then echo "r'${VIASH_PAR_SJDBGTFFEATUREEXON//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'sjdbGTFtagExonParentTranscript': $( if [ ! -z ${VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT+x} ]; then echo "r'${VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'sjdbGTFtagExonParentGene': $( if [ ! -z ${VIASH_PAR_SJDBGTFTAGEXONPARENTGENE+x} ]; then echo "r'${VIASH_PAR_SJDBGTFTAGEXONPARENTGENE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'sjdbGTFtagExonParentGeneName': $( if [ ! -z ${VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME+x} ]; then echo "r'${VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'sjdbGTFtagExonParentGeneType': $( if [ ! -z ${VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE+x} ]; then echo "r'${VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'sjdbOverhang': $( if [ ! -z ${VIASH_PAR_SJDBOVERHANG+x} ]; then echo "int(r'${VIASH_PAR_SJDBOVERHANG//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'sjdbScore': $( if [ ! -z ${VIASH_PAR_SJDBSCORE+x} ]; then echo "int(r'${VIASH_PAR_SJDBSCORE//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'sjdbInsertSave': $( if [ ! -z ${VIASH_PAR_SJDBINSERTSAVE+x} ]; then echo "r'${VIASH_PAR_SJDBINSERTSAVE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'varVCFfile': $( if [ ! -z ${VIASH_PAR_VARVCFFILE+x} ]; then echo "r'${VIASH_PAR_VARVCFFILE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'readFilesType': $( if [ ! -z ${VIASH_PAR_READFILESTYPE+x} ]; then echo "r'${VIASH_PAR_READFILESTYPE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'readFilesSAMattrKeep': $( if [ ! -z ${VIASH_PAR_READFILESSAMATTRKEEP+x} ]; then echo "r'${VIASH_PAR_READFILESSAMATTRKEEP//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'readFilesManifest': $( if [ ! -z ${VIASH_PAR_READFILESMANIFEST+x} ]; then echo "r'${VIASH_PAR_READFILESMANIFEST//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'readFilesPrefix': $( if [ ! -z ${VIASH_PAR_READFILESPREFIX+x} ]; then echo "r'${VIASH_PAR_READFILESPREFIX//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'readFilesCommand': $( if [ ! -z ${VIASH_PAR_READFILESCOMMAND+x} ]; then echo "r'${VIASH_PAR_READFILESCOMMAND//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'readMapNumber': $( if [ ! -z ${VIASH_PAR_READMAPNUMBER+x} ]; then echo "int(r'${VIASH_PAR_READMAPNUMBER//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'readMatesLengthsIn': $( if [ ! -z ${VIASH_PAR_READMATESLENGTHSIN+x} ]; then echo "r'${VIASH_PAR_READMATESLENGTHSIN//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'readNameSeparator': $( if [ ! -z ${VIASH_PAR_READNAMESEPARATOR+x} ]; then echo "r'${VIASH_PAR_READNAMESEPARATOR//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'readQualityScoreBase': $( if [ ! -z ${VIASH_PAR_READQUALITYSCOREBASE+x} ]; then echo "int(r'${VIASH_PAR_READQUALITYSCOREBASE//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'clipAdapterType': $( if [ ! -z ${VIASH_PAR_CLIPADAPTERTYPE+x} ]; then echo "r'${VIASH_PAR_CLIPADAPTERTYPE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'clip3pNbases': $( if [ ! -z ${VIASH_PAR_CLIP3PNBASES+x} ]; then echo "list(map(int, r'${VIASH_PAR_CLIP3PNBASES//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), - 'clip3pAdapterSeq': $( if [ ! -z ${VIASH_PAR_CLIP3PADAPTERSEQ+x} ]; then echo "r'${VIASH_PAR_CLIP3PADAPTERSEQ//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'clip3pAdapterMMp': $( if [ ! -z ${VIASH_PAR_CLIP3PADAPTERMMP+x} ]; then echo "list(map(float, r'${VIASH_PAR_CLIP3PADAPTERMMP//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), - 'clip3pAfterAdapterNbases': $( if [ ! -z ${VIASH_PAR_CLIP3PAFTERADAPTERNBASES+x} ]; then echo "list(map(int, r'${VIASH_PAR_CLIP3PAFTERADAPTERNBASES//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), - 'clip5pNbases': $( if [ ! -z ${VIASH_PAR_CLIP5PNBASES+x} ]; then echo "list(map(int, r'${VIASH_PAR_CLIP5PNBASES//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), - 'limitGenomeGenerateRAM': $( if [ ! -z ${VIASH_PAR_LIMITGENOMEGENERATERAM+x} ]; then echo "int(r'${VIASH_PAR_LIMITGENOMEGENERATERAM//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'limitIObufferSize': $( if [ ! -z ${VIASH_PAR_LIMITIOBUFFERSIZE+x} ]; then echo "list(map(int, r'${VIASH_PAR_LIMITIOBUFFERSIZE//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), - 'limitOutSAMoneReadBytes': $( if [ ! -z ${VIASH_PAR_LIMITOUTSAMONEREADBYTES+x} ]; then echo "int(r'${VIASH_PAR_LIMITOUTSAMONEREADBYTES//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'limitOutSJoneRead': $( if [ ! -z ${VIASH_PAR_LIMITOUTSJONEREAD+x} ]; then echo "int(r'${VIASH_PAR_LIMITOUTSJONEREAD//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'limitOutSJcollapsed': $( if [ ! -z ${VIASH_PAR_LIMITOUTSJCOLLAPSED+x} ]; then echo "int(r'${VIASH_PAR_LIMITOUTSJCOLLAPSED//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'limitBAMsortRAM': $( if [ ! -z ${VIASH_PAR_LIMITBAMSORTRAM+x} ]; then echo "int(r'${VIASH_PAR_LIMITBAMSORTRAM//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'limitSjdbInsertNsj': $( if [ ! -z ${VIASH_PAR_LIMITSJDBINSERTNSJ+x} ]; then echo "int(r'${VIASH_PAR_LIMITSJDBINSERTNSJ//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'limitNreadsSoft': $( if [ ! -z ${VIASH_PAR_LIMITNREADSSOFT+x} ]; then echo "int(r'${VIASH_PAR_LIMITNREADSSOFT//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outTmpKeep': $( if [ ! -z ${VIASH_PAR_OUTTMPKEEP+x} ]; then echo "r'${VIASH_PAR_OUTTMPKEEP//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'outStd': $( if [ ! -z ${VIASH_PAR_OUTSTD+x} ]; then echo "r'${VIASH_PAR_OUTSTD//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'outReadsUnmapped': $( if [ ! -z ${VIASH_PAR_OUTREADSUNMAPPED+x} ]; then echo "r'${VIASH_PAR_OUTREADSUNMAPPED//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'outQSconversionAdd': $( if [ ! -z ${VIASH_PAR_OUTQSCONVERSIONADD+x} ]; then echo "int(r'${VIASH_PAR_OUTQSCONVERSIONADD//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outMultimapperOrder': $( if [ ! -z ${VIASH_PAR_OUTMULTIMAPPERORDER+x} ]; then echo "r'${VIASH_PAR_OUTMULTIMAPPERORDER//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'outSAMmode': $( if [ ! -z ${VIASH_PAR_OUTSAMMODE+x} ]; then echo "r'${VIASH_PAR_OUTSAMMODE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'outSAMstrandField': $( if [ ! -z ${VIASH_PAR_OUTSAMSTRANDFIELD+x} ]; then echo "r'${VIASH_PAR_OUTSAMSTRANDFIELD//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'outSAMattributes': $( if [ ! -z ${VIASH_PAR_OUTSAMATTRIBUTES+x} ]; then echo "r'${VIASH_PAR_OUTSAMATTRIBUTES//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'outSAMattrIHstart': $( if [ ! -z ${VIASH_PAR_OUTSAMATTRIHSTART+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMATTRIHSTART//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outSAMunmapped': $( if [ ! -z ${VIASH_PAR_OUTSAMUNMAPPED+x} ]; then echo "r'${VIASH_PAR_OUTSAMUNMAPPED//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'outSAMorder': $( if [ ! -z ${VIASH_PAR_OUTSAMORDER+x} ]; then echo "r'${VIASH_PAR_OUTSAMORDER//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'outSAMprimaryFlag': $( if [ ! -z ${VIASH_PAR_OUTSAMPRIMARYFLAG+x} ]; then echo "r'${VIASH_PAR_OUTSAMPRIMARYFLAG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'outSAMreadID': $( if [ ! -z ${VIASH_PAR_OUTSAMREADID+x} ]; then echo "r'${VIASH_PAR_OUTSAMREADID//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'outSAMmapqUnique': $( if [ ! -z ${VIASH_PAR_OUTSAMMAPQUNIQUE+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMMAPQUNIQUE//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outSAMflagOR': $( if [ ! -z ${VIASH_PAR_OUTSAMFLAGOR+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMFLAGOR//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outSAMflagAND': $( if [ ! -z ${VIASH_PAR_OUTSAMFLAGAND+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMFLAGAND//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outSAMattrRGline': $( if [ ! -z ${VIASH_PAR_OUTSAMATTRRGLINE+x} ]; then echo "r'${VIASH_PAR_OUTSAMATTRRGLINE//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'outSAMheaderHD': $( if [ ! -z ${VIASH_PAR_OUTSAMHEADERHD+x} ]; then echo "r'${VIASH_PAR_OUTSAMHEADERHD//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'outSAMheaderPG': $( if [ ! -z ${VIASH_PAR_OUTSAMHEADERPG+x} ]; then echo "r'${VIASH_PAR_OUTSAMHEADERPG//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'outSAMheaderCommentFile': $( if [ ! -z ${VIASH_PAR_OUTSAMHEADERCOMMENTFILE+x} ]; then echo "r'${VIASH_PAR_OUTSAMHEADERCOMMENTFILE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'outSAMfilter': $( if [ ! -z ${VIASH_PAR_OUTSAMFILTER+x} ]; then echo "r'${VIASH_PAR_OUTSAMFILTER//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'outSAMmultNmax': $( if [ ! -z ${VIASH_PAR_OUTSAMMULTNMAX+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMMULTNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outSAMtlen': $( if [ ! -z ${VIASH_PAR_OUTSAMTLEN+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMTLEN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outBAMcompression': $( if [ ! -z ${VIASH_PAR_OUTBAMCOMPRESSION+x} ]; then echo "int(r'${VIASH_PAR_OUTBAMCOMPRESSION//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outBAMsortingThreadN': $( if [ ! -z ${VIASH_PAR_OUTBAMSORTINGTHREADN+x} ]; then echo "int(r'${VIASH_PAR_OUTBAMSORTINGTHREADN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outBAMsortingBinsN': $( if [ ! -z ${VIASH_PAR_OUTBAMSORTINGBINSN+x} ]; then echo "int(r'${VIASH_PAR_OUTBAMSORTINGBINSN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'bamRemoveDuplicatesType': $( if [ ! -z ${VIASH_PAR_BAMREMOVEDUPLICATESTYPE+x} ]; then echo "r'${VIASH_PAR_BAMREMOVEDUPLICATESTYPE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'bamRemoveDuplicatesMate2basesN': $( if [ ! -z ${VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN+x} ]; then echo "int(r'${VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outWigType': $( if [ ! -z ${VIASH_PAR_OUTWIGTYPE+x} ]; then echo "r'${VIASH_PAR_OUTWIGTYPE//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'outWigStrand': $( if [ ! -z ${VIASH_PAR_OUTWIGSTRAND+x} ]; then echo "r'${VIASH_PAR_OUTWIGSTRAND//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'outWigReferencesPrefix': $( if [ ! -z ${VIASH_PAR_OUTWIGREFERENCESPREFIX+x} ]; then echo "r'${VIASH_PAR_OUTWIGREFERENCESPREFIX//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'outWigNorm': $( if [ ! -z ${VIASH_PAR_OUTWIGNORM+x} ]; then echo "r'${VIASH_PAR_OUTWIGNORM//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'outFilterType': $( if [ ! -z ${VIASH_PAR_OUTFILTERTYPE+x} ]; then echo "r'${VIASH_PAR_OUTFILTERTYPE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'outFilterMultimapScoreRange': $( if [ ! -z ${VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outFilterMultimapNmax': $( if [ ! -z ${VIASH_PAR_OUTFILTERMULTIMAPNMAX+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERMULTIMAPNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outFilterMismatchNmax': $( if [ ! -z ${VIASH_PAR_OUTFILTERMISMATCHNMAX+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERMISMATCHNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outFilterMismatchNoverLmax': $( if [ ! -z ${VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX+x} ]; then echo "float(r'${VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outFilterMismatchNoverReadLmax': $( if [ ! -z ${VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX+x} ]; then echo "float(r'${VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outFilterScoreMin': $( if [ ! -z ${VIASH_PAR_OUTFILTERSCOREMIN+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERSCOREMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outFilterScoreMinOverLread': $( if [ ! -z ${VIASH_PAR_OUTFILTERSCOREMINOVERLREAD+x} ]; then echo "float(r'${VIASH_PAR_OUTFILTERSCOREMINOVERLREAD//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outFilterMatchNmin': $( if [ ! -z ${VIASH_PAR_OUTFILTERMATCHNMIN+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERMATCHNMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outFilterMatchNminOverLread': $( if [ ! -z ${VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD+x} ]; then echo "float(r'${VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outFilterIntronMotifs': $( if [ ! -z ${VIASH_PAR_OUTFILTERINTRONMOTIFS+x} ]; then echo "r'${VIASH_PAR_OUTFILTERINTRONMOTIFS//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'outFilterIntronStrands': $( if [ ! -z ${VIASH_PAR_OUTFILTERINTRONSTRANDS+x} ]; then echo "r'${VIASH_PAR_OUTFILTERINTRONSTRANDS//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'outSJtype': $( if [ ! -z ${VIASH_PAR_OUTSJTYPE+x} ]; then echo "r'${VIASH_PAR_OUTSJTYPE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'outSJfilterReads': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERREADS+x} ]; then echo "r'${VIASH_PAR_OUTSJFILTERREADS//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'outSJfilterOverhangMin': $( if [ ! -z ${VIASH_PAR_OUTSJFILTEROVERHANGMIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTEROVERHANGMIN//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), - 'outSJfilterCountUniqueMin': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), - 'outSJfilterCountTotalMin': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), - 'outSJfilterDistToOtherSJmin': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), - 'outSJfilterIntronMaxVsReadN': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), - 'scoreGap': $( if [ ! -z ${VIASH_PAR_SCOREGAP+x} ]; then echo "int(r'${VIASH_PAR_SCOREGAP//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'scoreGapNoncan': $( if [ ! -z ${VIASH_PAR_SCOREGAPNONCAN+x} ]; then echo "int(r'${VIASH_PAR_SCOREGAPNONCAN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'scoreGapGCAG': $( if [ ! -z ${VIASH_PAR_SCOREGAPGCAG+x} ]; then echo "int(r'${VIASH_PAR_SCOREGAPGCAG//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'scoreGapATAC': $( if [ ! -z ${VIASH_PAR_SCOREGAPATAC+x} ]; then echo "int(r'${VIASH_PAR_SCOREGAPATAC//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'scoreGenomicLengthLog2scale': $( if [ ! -z ${VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE+x} ]; then echo "int(r'${VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'scoreDelOpen': $( if [ ! -z ${VIASH_PAR_SCOREDELOPEN+x} ]; then echo "int(r'${VIASH_PAR_SCOREDELOPEN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'scoreDelBase': $( if [ ! -z ${VIASH_PAR_SCOREDELBASE+x} ]; then echo "int(r'${VIASH_PAR_SCOREDELBASE//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'scoreInsOpen': $( if [ ! -z ${VIASH_PAR_SCOREINSOPEN+x} ]; then echo "int(r'${VIASH_PAR_SCOREINSOPEN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'scoreInsBase': $( if [ ! -z ${VIASH_PAR_SCOREINSBASE+x} ]; then echo "int(r'${VIASH_PAR_SCOREINSBASE//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'scoreStitchSJshift': $( if [ ! -z ${VIASH_PAR_SCORESTITCHSJSHIFT+x} ]; then echo "int(r'${VIASH_PAR_SCORESTITCHSJSHIFT//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'seedSearchStartLmax': $( if [ ! -z ${VIASH_PAR_SEEDSEARCHSTARTLMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDSEARCHSTARTLMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'seedSearchStartLmaxOverLread': $( if [ ! -z ${VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD+x} ]; then echo "float(r'${VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'seedSearchLmax': $( if [ ! -z ${VIASH_PAR_SEEDSEARCHLMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDSEARCHLMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'seedMultimapNmax': $( if [ ! -z ${VIASH_PAR_SEEDMULTIMAPNMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDMULTIMAPNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'seedPerReadNmax': $( if [ ! -z ${VIASH_PAR_SEEDPERREADNMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDPERREADNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'seedPerWindowNmax': $( if [ ! -z ${VIASH_PAR_SEEDPERWINDOWNMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDPERWINDOWNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'seedNoneLociPerWindow': $( if [ ! -z ${VIASH_PAR_SEEDNONELOCIPERWINDOW+x} ]; then echo "int(r'${VIASH_PAR_SEEDNONELOCIPERWINDOW//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'seedSplitMin': $( if [ ! -z ${VIASH_PAR_SEEDSPLITMIN+x} ]; then echo "int(r'${VIASH_PAR_SEEDSPLITMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'seedMapMin': $( if [ ! -z ${VIASH_PAR_SEEDMAPMIN+x} ]; then echo "int(r'${VIASH_PAR_SEEDMAPMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'alignIntronMin': $( if [ ! -z ${VIASH_PAR_ALIGNINTRONMIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGNINTRONMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'alignIntronMax': $( if [ ! -z ${VIASH_PAR_ALIGNINTRONMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNINTRONMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'alignMatesGapMax': $( if [ ! -z ${VIASH_PAR_ALIGNMATESGAPMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNMATESGAPMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'alignSJoverhangMin': $( if [ ! -z ${VIASH_PAR_ALIGNSJOVERHANGMIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGNSJOVERHANGMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'alignSJstitchMismatchNmax': $( if [ ! -z ${VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX+x} ]; then echo "list(map(int, r'${VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), - 'alignSJDBoverhangMin': $( if [ ! -z ${VIASH_PAR_ALIGNSJDBOVERHANGMIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGNSJDBOVERHANGMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'alignSplicedMateMapLmin': $( if [ ! -z ${VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'alignSplicedMateMapLminOverLmate': $( if [ ! -z ${VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE+x} ]; then echo "float(r'${VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'alignWindowsPerReadNmax': $( if [ ! -z ${VIASH_PAR_ALIGNWINDOWSPERREADNMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNWINDOWSPERREADNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'alignTranscriptsPerWindowNmax': $( if [ ! -z ${VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'alignTranscriptsPerReadNmax': $( if [ ! -z ${VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'alignEndsType': $( if [ ! -z ${VIASH_PAR_ALIGNENDSTYPE+x} ]; then echo "r'${VIASH_PAR_ALIGNENDSTYPE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'alignEndsProtrude': $( if [ ! -z ${VIASH_PAR_ALIGNENDSPROTRUDE+x} ]; then echo "r'${VIASH_PAR_ALIGNENDSPROTRUDE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'alignSoftClipAtReferenceEnds': $( if [ ! -z ${VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS+x} ]; then echo "r'${VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'alignInsertionFlush': $( if [ ! -z ${VIASH_PAR_ALIGNINSERTIONFLUSH+x} ]; then echo "r'${VIASH_PAR_ALIGNINSERTIONFLUSH//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'peOverlapNbasesMin': $( if [ ! -z ${VIASH_PAR_PEOVERLAPNBASESMIN+x} ]; then echo "int(r'${VIASH_PAR_PEOVERLAPNBASESMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'peOverlapMMp': $( if [ ! -z ${VIASH_PAR_PEOVERLAPMMP+x} ]; then echo "float(r'${VIASH_PAR_PEOVERLAPMMP//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'winAnchorMultimapNmax': $( if [ ! -z ${VIASH_PAR_WINANCHORMULTIMAPNMAX+x} ]; then echo "int(r'${VIASH_PAR_WINANCHORMULTIMAPNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'winBinNbits': $( if [ ! -z ${VIASH_PAR_WINBINNBITS+x} ]; then echo "int(r'${VIASH_PAR_WINBINNBITS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'winAnchorDistNbins': $( if [ ! -z ${VIASH_PAR_WINANCHORDISTNBINS+x} ]; then echo "int(r'${VIASH_PAR_WINANCHORDISTNBINS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'winFlankNbins': $( if [ ! -z ${VIASH_PAR_WINFLANKNBINS+x} ]; then echo "int(r'${VIASH_PAR_WINFLANKNBINS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'winReadCoverageRelativeMin': $( if [ ! -z ${VIASH_PAR_WINREADCOVERAGERELATIVEMIN+x} ]; then echo "float(r'${VIASH_PAR_WINREADCOVERAGERELATIVEMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'winReadCoverageBasesMin': $( if [ ! -z ${VIASH_PAR_WINREADCOVERAGEBASESMIN+x} ]; then echo "int(r'${VIASH_PAR_WINREADCOVERAGEBASESMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'chimOutType': $( if [ ! -z ${VIASH_PAR_CHIMOUTTYPE+x} ]; then echo "r'${VIASH_PAR_CHIMOUTTYPE//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'chimSegmentMin': $( if [ ! -z ${VIASH_PAR_CHIMSEGMENTMIN+x} ]; then echo "int(r'${VIASH_PAR_CHIMSEGMENTMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'chimScoreMin': $( if [ ! -z ${VIASH_PAR_CHIMSCOREMIN+x} ]; then echo "int(r'${VIASH_PAR_CHIMSCOREMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'chimScoreDropMax': $( if [ ! -z ${VIASH_PAR_CHIMSCOREDROPMAX+x} ]; then echo "int(r'${VIASH_PAR_CHIMSCOREDROPMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'chimScoreSeparation': $( if [ ! -z ${VIASH_PAR_CHIMSCORESEPARATION+x} ]; then echo "int(r'${VIASH_PAR_CHIMSCORESEPARATION//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'chimScoreJunctionNonGTAG': $( if [ ! -z ${VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG+x} ]; then echo "int(r'${VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'chimJunctionOverhangMin': $( if [ ! -z ${VIASH_PAR_CHIMJUNCTIONOVERHANGMIN+x} ]; then echo "int(r'${VIASH_PAR_CHIMJUNCTIONOVERHANGMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'chimSegmentReadGapMax': $( if [ ! -z ${VIASH_PAR_CHIMSEGMENTREADGAPMAX+x} ]; then echo "int(r'${VIASH_PAR_CHIMSEGMENTREADGAPMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'chimFilter': $( if [ ! -z ${VIASH_PAR_CHIMFILTER+x} ]; then echo "r'${VIASH_PAR_CHIMFILTER//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'chimMainSegmentMultNmax': $( if [ ! -z ${VIASH_PAR_CHIMMAINSEGMENTMULTNMAX+x} ]; then echo "int(r'${VIASH_PAR_CHIMMAINSEGMENTMULTNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'chimMultimapNmax': $( if [ ! -z ${VIASH_PAR_CHIMMULTIMAPNMAX+x} ]; then echo "int(r'${VIASH_PAR_CHIMMULTIMAPNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'chimMultimapScoreRange': $( if [ ! -z ${VIASH_PAR_CHIMMULTIMAPSCORERANGE+x} ]; then echo "int(r'${VIASH_PAR_CHIMMULTIMAPSCORERANGE//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'chimNonchimScoreDropMin': $( if [ ! -z ${VIASH_PAR_CHIMNONCHIMSCOREDROPMIN+x} ]; then echo "int(r'${VIASH_PAR_CHIMNONCHIMSCOREDROPMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'chimOutJunctionFormat': $( if [ ! -z ${VIASH_PAR_CHIMOUTJUNCTIONFORMAT+x} ]; then echo "int(r'${VIASH_PAR_CHIMOUTJUNCTIONFORMAT//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'quantMode': $( if [ ! -z ${VIASH_PAR_QUANTMODE+x} ]; then echo "r'${VIASH_PAR_QUANTMODE//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'quantTranscriptomeBAMcompression': $( if [ ! -z ${VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION+x} ]; then echo "int(r'${VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'quantTranscriptomeBan': $( if [ ! -z ${VIASH_PAR_QUANTTRANSCRIPTOMEBAN+x} ]; then echo "r'${VIASH_PAR_QUANTTRANSCRIPTOMEBAN//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'twopassMode': $( if [ ! -z ${VIASH_PAR_TWOPASSMODE+x} ]; then echo "r'${VIASH_PAR_TWOPASSMODE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'twopass1readsN': $( if [ ! -z ${VIASH_PAR_TWOPASS1READSN+x} ]; then echo "int(r'${VIASH_PAR_TWOPASS1READSN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'waspOutputMode': $( if [ ! -z ${VIASH_PAR_WASPOUTPUTMODE+x} ]; then echo "r'${VIASH_PAR_WASPOUTPUTMODE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'soloType': $( if [ ! -z ${VIASH_PAR_SOLOTYPE+x} ]; then echo "r'${VIASH_PAR_SOLOTYPE//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'soloCBwhitelist': $( if [ ! -z ${VIASH_PAR_SOLOCBWHITELIST+x} ]; then echo "r'${VIASH_PAR_SOLOCBWHITELIST//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'soloCBstart': $( if [ ! -z ${VIASH_PAR_SOLOCBSTART+x} ]; then echo "int(r'${VIASH_PAR_SOLOCBSTART//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'soloCBlen': $( if [ ! -z ${VIASH_PAR_SOLOCBLEN+x} ]; then echo "int(r'${VIASH_PAR_SOLOCBLEN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'soloUMIstart': $( if [ ! -z ${VIASH_PAR_SOLOUMISTART+x} ]; then echo "int(r'${VIASH_PAR_SOLOUMISTART//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'soloUMIlen': $( if [ ! -z ${VIASH_PAR_SOLOUMILEN+x} ]; then echo "int(r'${VIASH_PAR_SOLOUMILEN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'soloBarcodeReadLength': $( if [ ! -z ${VIASH_PAR_SOLOBARCODEREADLENGTH+x} ]; then echo "int(r'${VIASH_PAR_SOLOBARCODEREADLENGTH//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'soloBarcodeMate': $( if [ ! -z ${VIASH_PAR_SOLOBARCODEMATE+x} ]; then echo "int(r'${VIASH_PAR_SOLOBARCODEMATE//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'soloCBposition': $( if [ ! -z ${VIASH_PAR_SOLOCBPOSITION+x} ]; then echo "r'${VIASH_PAR_SOLOCBPOSITION//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'soloUMIposition': $( if [ ! -z ${VIASH_PAR_SOLOUMIPOSITION+x} ]; then echo "r'${VIASH_PAR_SOLOUMIPOSITION//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'soloAdapterSequence': $( if [ ! -z ${VIASH_PAR_SOLOADAPTERSEQUENCE+x} ]; then echo "r'${VIASH_PAR_SOLOADAPTERSEQUENCE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'soloAdapterMismatchesNmax': $( if [ ! -z ${VIASH_PAR_SOLOADAPTERMISMATCHESNMAX+x} ]; then echo "int(r'${VIASH_PAR_SOLOADAPTERMISMATCHESNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'soloCBmatchWLtype': $( if [ ! -z ${VIASH_PAR_SOLOCBMATCHWLTYPE+x} ]; then echo "r'${VIASH_PAR_SOLOCBMATCHWLTYPE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'soloInputSAMattrBarcodeSeq': $( if [ ! -z ${VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ+x} ]; then echo "r'${VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'soloInputSAMattrBarcodeQual': $( if [ ! -z ${VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL+x} ]; then echo "r'${VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'soloStrand': $( if [ ! -z ${VIASH_PAR_SOLOSTRAND+x} ]; then echo "r'${VIASH_PAR_SOLOSTRAND//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'soloFeatures': $( if [ ! -z ${VIASH_PAR_SOLOFEATURES+x} ]; then echo "r'${VIASH_PAR_SOLOFEATURES//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'soloMultiMappers': $( if [ ! -z ${VIASH_PAR_SOLOMULTIMAPPERS+x} ]; then echo "r'${VIASH_PAR_SOLOMULTIMAPPERS//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'soloUMIdedup': $( if [ ! -z ${VIASH_PAR_SOLOUMIDEDUP+x} ]; then echo "r'${VIASH_PAR_SOLOUMIDEDUP//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'soloUMIfiltering': $( if [ ! -z ${VIASH_PAR_SOLOUMIFILTERING+x} ]; then echo "r'${VIASH_PAR_SOLOUMIFILTERING//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'soloOutFileNames': $( if [ ! -z ${VIASH_PAR_SOLOOUTFILENAMES+x} ]; then echo "r'${VIASH_PAR_SOLOOUTFILENAMES//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'soloCellFilter': $( if [ ! -z ${VIASH_PAR_SOLOCELLFILTER+x} ]; then echo "r'${VIASH_PAR_SOLOCELLFILTER//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'soloOutFormatFeaturesGeneField3': $( if [ ! -z ${VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3+x} ]; then echo "r'${VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'soloCellReadStats': $( if [ ! -z ${VIASH_PAR_SOLOCELLREADSTATS+x} ]; then echo "r'${VIASH_PAR_SOLOCELLREADSTATS//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'stranded': $( if [ ! -z ${VIASH_PAR_STRANDED+x} ]; then echo "r'${VIASH_PAR_STRANDED//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'minimum_alignment_quality': $( if [ ! -z ${VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY+x} ]; then echo "int(r'${VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'type': $( if [ ! -z ${VIASH_PAR_TYPE+x} ]; then echo "r'${VIASH_PAR_TYPE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'id_attribute': $( if [ ! -z ${VIASH_PAR_ID_ATTRIBUTE+x} ]; then echo "r'${VIASH_PAR_ID_ATTRIBUTE//\'/\'\"\'\"r\'}'.split(':')"; else echo None; fi ), - 'additional_attributes': $( if [ ! -z ${VIASH_PAR_ADDITIONAL_ATTRIBUTES+x} ]; then echo "r'${VIASH_PAR_ADDITIONAL_ATTRIBUTES//\'/\'\"\'\"r\'}'.split(':')"; else echo None; fi ), - 'add_chromosome_info': $( if [ ! -z ${VIASH_PAR_ADD_CHROMOSOME_INFO+x} ]; then echo "r'${VIASH_PAR_ADD_CHROMOSOME_INFO//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), - 'mode': $( if [ ! -z ${VIASH_PAR_MODE+x} ]; then echo "r'${VIASH_PAR_MODE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'non_unique': $( if [ ! -z ${VIASH_PAR_NON_UNIQUE+x} ]; then echo "r'${VIASH_PAR_NON_UNIQUE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'secondary_alignments': $( if [ ! -z ${VIASH_PAR_SECONDARY_ALIGNMENTS+x} ]; then echo "r'${VIASH_PAR_SECONDARY_ALIGNMENTS//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'supplementary_alignments': $( if [ ! -z ${VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS+x} ]; then echo "r'${VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'counts_output_sparse': $( if [ ! -z ${VIASH_PAR_COUNTS_OUTPUT_SPARSE+x} ]; then echo "r'${VIASH_PAR_COUNTS_OUTPUT_SPARSE//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -## VIASH END - -######################## -### Helper functions ### -######################## - - -def fetch_arguments_info(config: Dict[str, Any]) -> Dict[str, Any]: - """Fetch arguments from config""" - arguments = { - arg["name"].removeprefix("-").removeprefix("-"): arg - for group in config["functionality"]["argument_groups"] - for arg in group["arguments"] - } - return arguments - -def process_par( - par: Dict[str, Any], - arguments_info: Dict[str, Any], - gz_args: List[str], - temp_dir: Path -) -> Dict[str, Any]: - """ - Process the Viash par dictionary - - This turns file strings into Path objects and extracting gzipped files if need be. - - Parameters - ---------- - par: The par dictionary created by Viash - arguments_info: The arguments info Dictionary created by \`fetch_arguments_info\` - gz_args: A list of argument keys which could be gzip files which need to be decompressed. - temp_dir: A temporary directory in which to ungzip files - """ - new_par = {} - for key, value in par.items(): - arg_info = arguments_info[key] - # turn file arguments into paths - if value and arg_info["type"] == "file": - is_multiple = isinstance(value, list) - - if is_multiple: - value = [Path(val) for val in value] - else: - value = Path(value) - - if key in gz_args: - print(f">> Checking compression of --{key}", flush=True) - # turn value into list if need be - if not is_multiple: - value = [value] - - # extract - value = [extract_if_need_be(path, temp_dir) for path in value] - - # unlist if need be - if not is_multiple: - value = value[0] - - new_par[key] = value - return new_par - -def generate_cmd_arguments(par, arguments_info, step_filter=None, flatten=False): - """ - Generate command-line arguments by fetching the relevant args - - Parameters - ---------- - par: The par dictionary created by Viash - arguments_info: The arguments info Dictionary created by \`fetch_arguments_info\` - step_filter: If provided,\`par\` will be filtered to only contain arguments for which - argument.info.step == step_filter. - flatten: If \`False\`, the command for an argument with multiple values will be - \`["--key", "value1", "--key", "value2"]\`, otherwise \`["--key", "value1", "value2"]\`. - """ - cmd_args = [] - - for key, arg in arguments_info.items(): - arg_val = par.get(key) - # The info key is always present (changed in viash 0.7.4) - # in the parsed config (None if not specified in source config) - info = arg["info"] or {} - orig_arg = info.get("orig_arg") - step = info.get("step") - if arg_val and orig_arg and (not step_filter or step == step_filter): - if not arg.get("multiple", False): - arg_val = [arg_val] - - if arg["type"] in ["boolean_true", "boolean_false"]: - # if argument is a boolean_true or boolean_false, simply add the flag - arg_val = [orig_arg] - elif orig_arg.startswith("-"): - # if the orig arg flag is not a positional, - # add the flag in front of each element and flatten - if flatten: - arg_val = [str(x) for x in [orig_arg] + arg_val] - else: - arg_val = [str(x) for val in arg_val for x in [orig_arg, val]] - - cmd_args.extend(arg_val) - - return cmd_args - -def is_gz_file(path: Path) -> bool: - """Check whether something is a gzip""" - with open(path, "rb") as file: - return file.read(2) == b"\\x1f\\x8b" - -def extract_if_need_be(par_value: Path, temp_dir_path: Path) -> Path: - """if {par_value} is a Path, extract it to a temp_dir_path and return the resulting path""" - if par_value.is_file() and tarfile.is_tarfile(par_value): - # Remove two extensions (if they exist) - extaction_dir_name = Path(par_value.stem).stem - unpacked_path = temp_dir_path / extaction_dir_name - print(f" Tar detected; extracting {par_value} to {unpacked_path}", flush=True) - - with tarfile.open(par_value, "r") as open_tar: - members = open_tar.getmembers() - root_dirs = [ - member - for member in members - if member.isdir() and member.name != "." and "/" not in member.name - ] - # if there is only one root_dir (and there are files in that directory) - # strip that directory name from the destination folder - if len(root_dirs) == 1: - for mem in members: - mem.path = Path(*Path(mem.path).parts[1:]) - members_to_move = [mem for mem in members if mem.path != Path(".")] - open_tar.extractall(unpacked_path, members=members_to_move) - return unpacked_path - - elif par_value.is_file() and is_gz_file(par_value): - # Remove extension (if it exists) - extaction_file_name = Path(par_value.stem) - unpacked_path = temp_dir_path / extaction_file_name - print(f" Gzip detected; extracting {par_value} to {unpacked_path}", flush=True) - - with gzip.open(par_value, "rb") as f_in: - with open(unpacked_path, "wb") as f_out: - shutil.copyfileobj(f_in, f_out) - return unpacked_path - - else: - return par_value - -def load_star_reference(reference_index: str) -> None: - """Load star reference index into memory.""" - subprocess.run( - [ - "STAR", - "--genomeLoad", "LoadAndExit", - "--genomeDir", str(reference_index), - ], - check=True - ) - -def unload_star_reference(reference_index: str) -> None: - """Remove star reference index from memory.""" - subprocess.run( - [ - "STAR", - "--genomeLoad", "Remove", - "--genomeDir", str(reference_index), - ], - check=True - ) - -def star_and_htseq( - group_id: str, - r1_files: List[Path], - r2_files: List[Path], - temp_dir: Path, - par: Dict[str, Any], - arguments_info: Dict[str, Any], - num_threads: int -) -> Tuple[int, str] : - star_output = par["output"] / "per" / group_id - temp_dir_group = temp_dir / f"star_tmp_{group_id}" - unsorted_bam = star_output / "Aligned.out.bam" - sorted_bam = star_output / "Aligned.sorted.out.bam" - counts_file = star_output / "htseq-count.txt" - multiqc_path = star_output / "multiqc_data" - - print(f">> Running STAR for group '{group_id}' with command:", flush=True) - star_output.mkdir(parents=True, exist_ok=True) - temp_dir_group.parent.mkdir(parents=True, exist_ok=True) - run_star( - r1_files=r1_files, - r2_files=r2_files, - output_dir=star_output, - temp_dir=temp_dir / f"star_tmp_{group_id}", - par=par, - arguments_info=arguments_info, - num_threads=num_threads - ) - if not unsorted_bam.exists(): - return (1, f"Could not find unsorted bam at '{unsorted_bam}'") - - if par["run_htseq_count"]: - print(f">> Running samtools sort for group '{group_id}' with command:", flush=True) - run_samtools_sort(unsorted_bam, sorted_bam) - if not sorted_bam.exists(): - return (1, f"Could not find sorted bam at '{unsorted_bam}'") - - print(f">> Running htseq-count for group '{group_id}' with command:", flush=True) - run_htseq_count(sorted_bam, counts_file, par, arguments_info) - if not counts_file.exists(): - return (1, f"Could not find counts at '{counts_file}'") - - if par["run_multiqc"]: - run_multiqc(star_output) - if not multiqc_path.exists(): - return (1, f"Could not find MultiQC output at '{multiqc_path}'") - - return (0, "") - -def run_star( - r1_files: List[Path], - r2_files: List[Path], - output_dir: Path, - temp_dir: Path, - par: Dict[str, Any], - arguments_info: Dict[str, Any], - num_threads: int -) -> None: - """Run star""" - # process manual arguments - r1_pasted = [",".join([str(r1) for r1 in r1_files])] - r2_pasted = [",".join([str(r2) for r2 in r2_files])] if r2_files else [] - manual_par = { - "--genomeDir": [par["reference_index"]], - "--genomeLoad": ["LoadAndRemove"], - "--runThreadN": [str(num_threads)], - "--runMode": ["alignReads"], - "--readFilesIn": r1_pasted + r2_pasted, - # create a tempdir per group - "--outTmpDir": [temp_dir], - # make sure there is a trailing / - "--outFileNamePrefix": [f"{output_dir}/"], - # fix the outSAMtype to return unsorted BAM files - "--outSAMtype": ["BAM", "Unsorted"] - } - manual_cmd = [str(x) - for key, values in manual_par.items() - for x in [key] + values - ] - - # process all passthrough star arguments - par_cmd = generate_cmd_arguments(par, arguments_info, "star", flatten=True) - - # combine into one command and turn into strings - cmd_args = [str(val) for val in ["STAR"] + manual_cmd + par_cmd] - - # run star - subprocess.run(cmd_args, check=True) - -def run_samtools_sort( - unsorted_bam: Path, - sorted_bam: Path -) -> None: - "Run samtools sort" - cmd_args = [ - "samtools", - "sort", - "-o", - sorted_bam, - unsorted_bam, - ] - subprocess.run(cmd_args, check=True) - -def run_htseq_count( - sorted_bam: Path, - counts_file: Path, - par: Dict[str, Any], - arguments_info: Dict[str, Any] -) -> None: - """Run HTSeq count""" - # process manual arguments - manual_cmd = [ - sorted_bam, - par["reference_gtf"] - ] - - # process all passthrough htseq arguments - par_cmd = generate_cmd_arguments(par, arguments_info, "htseq") - - # combine into one command and turn into strings - cmd_args = [str(val) for val in ["htseq-count"] + manual_cmd + par_cmd] - - # run htseq - with open(counts_file, "w", encoding="utf-8") as file: - subprocess.run(cmd_args, check=True, stdout=file) - -def get_feature_info(reference_gtf) -> pd.DataFrame: - ref = gtfparse.read_gtf(reference_gtf) - ref_genes = ref.loc[(ref["feature"] == "gene") | (ref["source"] == "ERCC")] - return pd.DataFrame( - { - "feature_id": ref_genes["gene_id"], - "feature_type": "Gene Expression", - "feature_name": ref_genes["gene_name"] - } - ) - -def run_multiqc(input_dir: Path) -> None: - cmd_args = ["multiqc", str(input_dir), "--outdir", str(input_dir), "--no-report", "--force"] - - # run multiqc - subprocess.run(cmd_args, check=True) - - -######################## -### Main code ### -######################## - -def main(par, meta): - """Main function""" - - # check input arguments - assert len(par["input_id"]) == len(par["input_r1"]), "--input_r1 should have same length as --input_id" - if par["input_r2"]: - assert len(par["input_id"]) == len(par["input_r2"]), "--input_r2 should have same length as --input_id" - - # read config arguments - with open(meta["config"], "r", encoding="utf-8") as file: - config = yaml.safe_load(file) - - # fetch all arguments from the config and turn it into a Dict[str, Argument] - arguments_info = fetch_arguments_info(config) - - # temp_dir = "tmp/" - with tempfile.TemporaryDirectory( - prefix=f"{meta['functionality_name']}-", - dir=meta["temp_dir"], - ignore_cleanup_errors=True - ) as temp_dir: - temp_dir = Path(temp_dir) - temp_dir.mkdir(parents=True, exist_ok=True) - - # turn file strings into Paths and decompress gzip if need be - gz_args = ["input_r1", "input_r2", "reference_index", "reference_gtf"] - par = process_par(par, arguments_info, gz_args, temp_dir) - - # make sure input_r2 has same length as input_r1 - if not par["input_r2"]: - par["input_r2"] = [None for _ in par["input_r1"]] - - # group input_files by input_id - print(">> Group by --input_id", flush=True) - grouped_inputs = {} - for group_id, file_r1, file_r2 in zip(par["input_id"], par["input_r1"], par["input_r2"]): - if group_id not in grouped_inputs: - grouped_inputs[group_id] = ([], []) - grouped_inputs[group_id][0].append(file_r1) - if file_r2: - grouped_inputs[group_id][1].append(file_r2) - - # create output dir if need be - par["output"].mkdir(parents=True, exist_ok=True) - - # store features metadata - feature_info = get_feature_info(str(par["reference_gtf"])) - with open(par["output"] / "feature_info.tsv", "w", encoding="utf-8") as file: - feature_info.to_csv(file, sep="\\t", index=False) - - # try: - # print(">> Loading genome in memory", flush=True) - # load_star_reference(par["reference_index"]) - - cpus = meta.get("cpus", 1) - num_items = len(grouped_inputs) - pool_size = min(cpus, num_items) - num_threads_per_task = math.ceil(cpus / pool_size) - - with Pool(pool_size) as pool: - outs = pool.starmap( - lambda group_id, files: star_and_htseq( - group_id=group_id, - r1_files=files[0], - r2_files=files[1], - temp_dir=temp_dir, - par=par, - arguments_info=arguments_info, - num_threads=num_threads_per_task - ), - grouped_inputs.items() - ) - - num_errored = 0 - for exit, msg in outs: - if exit != 0: - print(f"Error: {msg}") - num_errored += 1 - - pct_succeeded = 1.0 - num_errored / len(outs) - print("------------------") - print(f"Success rate: {math.ceil(pct_succeeded * 100)}%") - - assert pct_succeeded >= par["min_success_rate"], f"Success rate should be at least {math.ceil(par['min_success_rate'] * 100)}%" - -if __name__ == "__main__": - main(par, meta) -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT_R1" ]; then - unset VIASH_TEST_INPUT_R1 - IFS=';' - for var in $VIASH_PAR_INPUT_R1; do - unset IFS - if [ -z "$VIASH_TEST_INPUT_R1" ]; then - VIASH_TEST_INPUT_R1="$(ViashStripAutomount "$var")" - else - VIASH_TEST_INPUT_R1="$VIASH_TEST_INPUT_R1;""$(ViashStripAutomount "$var")" - fi - done - VIASH_PAR_INPUT_R1="$VIASH_TEST_INPUT_R1" -fi -if [ ! -z "$VIASH_PAR_INPUT_R2" ]; then - unset VIASH_TEST_INPUT_R2 - IFS=';' - for var in $VIASH_PAR_INPUT_R2; do - unset IFS - if [ -z "$VIASH_TEST_INPUT_R2" ]; then - VIASH_TEST_INPUT_R2="$(ViashStripAutomount "$var")" - else - VIASH_TEST_INPUT_R2="$VIASH_TEST_INPUT_R2;""$(ViashStripAutomount "$var")" - fi - done - VIASH_PAR_INPUT_R2="$VIASH_TEST_INPUT_R2" -fi -if [ ! -z "$VIASH_PAR_REFERENCE_INDEX" ]; then - VIASH_PAR_REFERENCE_INDEX=$(ViashStripAutomount "$VIASH_PAR_REFERENCE_INDEX") -fi -if [ ! -z "$VIASH_PAR_REFERENCE_GTF" ]; then - VIASH_PAR_REFERENCE_GTF=$(ViashStripAutomount "$VIASH_PAR_REFERENCE_GTF") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_PAR_GENOMEFASTAFILES" ]; then - unset VIASH_TEST_GENOMEFASTAFILES - IFS=';' - for var in $VIASH_PAR_GENOMEFASTAFILES; do - unset IFS - if [ -z "$VIASH_TEST_GENOMEFASTAFILES" ]; then - VIASH_TEST_GENOMEFASTAFILES="$(ViashStripAutomount "$var")" - else - VIASH_TEST_GENOMEFASTAFILES="$VIASH_TEST_GENOMEFASTAFILES;""$(ViashStripAutomount "$var")" - fi - done - VIASH_PAR_GENOMEFASTAFILES="$VIASH_TEST_GENOMEFASTAFILES" -fi -if [ ! -z "$VIASH_PAR_SJDBGTFFILE" ]; then - VIASH_PAR_SJDBGTFFILE=$(ViashStripAutomount "$VIASH_PAR_SJDBGTFFILE") -fi -if [ ! -z "$VIASH_PAR_READFILESMANIFEST" ]; then - VIASH_PAR_READFILESMANIFEST=$(ViashStripAutomount "$VIASH_PAR_READFILESMANIFEST") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/mapping/multi_star_to_h5mu/.config.vsh.yaml b/target/docker/mapping/multi_star_to_h5mu/.config.vsh.yaml deleted file mode 100644 index c0f10e359c8..00000000000 --- a/target/docker/mapping/multi_star_to_h5mu/.config.vsh.yaml +++ /dev/null @@ -1,179 +0,0 @@ -functionality: - name: "multi_star_to_h5mu" - namespace: "mapping" - version: "0.12.3" - authors: - - name: "Robrecht Cannoodt" - roles: - - "author" - - "maintainer" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - - name: "Angela Oliveira Pisco" - roles: - - "author" - info: - role: "Contributor" - links: - github: "aopisco" - orcid: "0000-0003-0142-2355" - linkedin: "aopisco" - organizations: - - name: "Insitro" - href: "https://insitro.com" - role: "Director of Computational Biology" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - arguments: - - type: "file" - name: "--input" - description: "The directory created by `multi_star`" - info: null - example: - - "/path/to/foo" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output h5mu file." - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - description: "Convert the output of `multi_star` to a h5mu.\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/cellranger_tiny_fastq/multi_star" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "midmem" - - "midcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/mapping/multi_star_to_h5mu/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/mapping/multi_star_to_h5mu" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/mapping/multi_star_to_h5mu/multi_star_to_h5mu" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/mapping/multi_star_to_h5mu/multi_star_to_h5mu b/target/docker/mapping/multi_star_to_h5mu/multi_star_to_h5mu deleted file mode 100755 index d9c9fb15726..00000000000 --- a/target/docker/mapping/multi_star_to_h5mu/multi_star_to_h5mu +++ /dev/null @@ -1,1017 +0,0 @@ -#!/usr/bin/env bash - -# multi_star_to_h5mu 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Robrecht Cannoodt (author, maintainer) -# * Angela Oliveira Pisco (author) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="multi_star_to_h5mu" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "multi_star_to_h5mu 0.12.3" - echo "" - echo "Convert the output of \`multi_star\` to a h5mu." - echo "" - echo "Arguments:" - echo " --input" - echo " type: file, required parameter, file must exist" - echo " example: /path/to/foo" - echo " The directory created by \`multi_star\`" - echo "" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " example: output.h5mu" - echo " Output h5mu file." - echo "" - echo " --output_compression" - echo " type: string" - echo " example: gzip" - echo " choices: [ gzip, lzf ]" - echo " The compression format to be used on the output h5mu object." -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM python:3.10-slim - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" - -LABEL org.opencontainers.image.authors="Robrecht Cannoodt, Angela Oliveira Pisco" -LABEL org.opencontainers.image.description="Companion container for running component mapping multi_star_to_h5mu" -LABEL org.opencontainers.image.created="2024-01-25T10:13:55Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-multi_star_to_h5mu-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "multi_star_to_h5mu 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression=*) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/mapping_multi_star_to_h5mu:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/mapping_multi_star_to_h5mu:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/mapping_multi_star_to_h5mu:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/mapping_multi_star_to_h5mu:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then - VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then - ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/mapping_multi_star_to_h5mu:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/mapping_multi_star_to_h5mu:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/mapping_multi_star_to_h5mu:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-multi_star_to_h5mu-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -from pathlib import Path -import pandas as pd -import mudata as md -import anndata as ad -import numpy as np -import json - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -## VIASH END - -# convert to path -input_dir = Path(par["input"]) - -# read counts information -print("> Read counts data", flush=True) -per_obs_data = [] - -for input_counts in (input_dir / "per").glob("**/htseq-count.txt"): - per_obs_dir = input_counts.parent - input_id = per_obs_dir.name - input_multiqc = per_obs_dir / "multiqc_data" / "multiqc_data.json" - - data = pd.read_table( - input_counts, - index_col=0, - names=["cell_id", input_id], - dtype={"cell_id": "U", input_id: "i"} - ) - data2 = data[~data.index.str.startswith("__")] - - with open(input_multiqc, "r") as file: - qc = json.load(file) - - qc_star = qc.get("report_saved_raw_data", {}).get("multiqc_star", {}).get(input_id) - qc_htseq = qc.get("report_saved_raw_data", {}).get("multiqc_htseq", {}).get("htseq-count") - - per_obs_data.append({ - "counts": data2.transpose(), - "qc_star": pd.DataFrame(qc_star, index=[input_id]), - "qc_htseq": pd.DataFrame(qc_htseq, index=[input_id]) - }) - - -# combine all counts -counts = pd.concat([x["counts"] for x in per_obs_data], axis=0) -qc_star = pd.concat([x["qc_star"] for x in per_obs_data], axis=0) -qc_htseq = pd.concat([x["qc_htseq"] for x in per_obs_data], axis=0) - -# read feature info -feature_info = pd.read_csv(input_dir / "feature_info.tsv", sep="\\t", index_col=0) -feature_info_ord = feature_info.loc[counts.columns] - -var = pd.DataFrame( - data={ - "gene_ids": feature_info_ord.index, - "feature_types": "Gene Expression", - "gene_name": feature_info_ord["feature_name"], - } -).set_index("gene_ids") - -print("> construct anndata", flush=True) -adata = ad.AnnData( - X=counts, - obsm={"qc_star": qc_star, "qc_htseq": qc_htseq}, - var=var, - dtype=np.int32 -) - -print("> convert to mudata", flush=True) -mdata = md.MuData(adata) - -print("> write to file", flush=True) -mdata.write_h5mu(par["output"], compression=par["output_compression"]) -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/mapping/samtools_sort/.config.vsh.yaml b/target/docker/mapping/samtools_sort/.config.vsh.yaml deleted file mode 100644 index 3aec09b023a..00000000000 --- a/target/docker/mapping/samtools_sort/.config.vsh.yaml +++ /dev/null @@ -1,270 +0,0 @@ -functionality: - name: "samtools_sort" - namespace: "mapping" - version: "0.12.3" - authors: - - name: "Robrecht Cannoodt" - roles: - - "author" - - "maintainer" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - - name: "Angela Oliveira Pisco" - roles: - - "author" - info: - role: "Contributor" - links: - github: "aopisco" - orcid: "0000-0003-0142-2355" - linkedin: "aopisco" - organizations: - - name: "Insitro" - href: "https://insitro.com" - role: "Director of Computational Biology" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - argument_groups: - - name: "Input" - arguments: - - type: "file" - name: "--input" - description: "Path to the SAM/BAM/CRAM files containing the mapped reads." - info: - orig_arg: "in_sam" - example: - - "input.bam" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Output" - arguments: - - type: "file" - name: "--output_bam" - description: "Filename to output the counts to." - info: - orig_arg: "-o" - example: - - "output.bam" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output_bai" - description: "BAI-format index for BAM file." - info: null - example: - - "output.bam.bai" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_format" - description: "The output format. By default, samtools tries to select a format\ - \ based on the -o filename extension; if output is to standard output or no\ - \ format can be deduced, bam is selected." - info: - orig_arg: "-O" - example: - - "bam" - required: false - choices: - - "sam" - - "bam" - - "cram" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--compression" - description: "Compression level, from 0 (uncompressed) to 9 (best" - info: - orig_arg: "-l" - example: - - 5 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Arguments" - arguments: - - type: "boolean_true" - name: "--minimizer_cluster" - description: "Sort unmapped reads (those in chromosome \"*\") by their sequence\ - \ minimiser (Schleimer et al., 2003; Roberts et al., 2004), \nalso reverse\ - \ complementing as appropriate. This has the effect of collating some similar\ - \ data together, improving the \ncompressibility of the unmapped sequence.\ - \ The minimiser kmer size is adjusted using the -K option. Note data compressed\ - \ \nin this manner may need to be name collated prior to conversion back to\ - \ fastq.\n\nMapped sequences are sorted by chromosome and position. \n" - info: - orig_arg: "-M" - direction: "input" - dest: "par" - - type: "integer" - name: "--minimizer_kmer" - description: "Sets the kmer size to be used in the -M option." - info: - orig_arg: "-K" - example: - - 20 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--sort_by_read_names" - description: "Sort by read names (i.e., the QNAME field) rather than by chromosomal\ - \ coordinates." - info: - orig_arg: "-n" - direction: "input" - dest: "par" - - type: "string" - name: "--sort_by" - description: "Sort first by this value in the alignment tag, then by position\ - \ or name (if also using -n)." - info: - orig_arg: "-t" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--no_pg" - description: "Do not add a @PG line to the header of the output file." - info: - orig_arg: "--no-PG" - direction: "input" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - description: "Sort and (optionally) index alignments.\n\nReads are sorted by leftmost\ - \ coordinates, or by read name when `--sort_by_read_names` is used.\n\nAn appropriate\ - \ `@HD-SO` sort order header tag will be added or an existing one updated if necessary.\n\ - \nNote that to generate an index file (by specifying `--output_bai`), the default\ - \ coordinate sort must be used.\nThus the `--sort_by_read_names` and `--sort_by\ - \ ` options are incompatible with `--output_bai`. \n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/cellranger_tiny_fastq" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "samtools" - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "pyyaml" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highmem" - - "highcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/mapping/samtools_sort/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/mapping/samtools_sort" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/mapping/samtools_sort/samtools_sort" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/mapping/samtools_sort/samtools_sort b/target/docker/mapping/samtools_sort/samtools_sort deleted file mode 100755 index a85d26a34ed..00000000000 --- a/target/docker/mapping/samtools_sort/samtools_sort +++ /dev/null @@ -1,1185 +0,0 @@ -#!/usr/bin/env bash - -# samtools_sort 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Robrecht Cannoodt (author, maintainer) -# * Angela Oliveira Pisco (author) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="samtools_sort" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "samtools_sort 0.12.3" - echo "" - echo "Sort and (optionally) index alignments." - echo "" - echo "Reads are sorted by leftmost coordinates, or by read name when" - echo "\`--sort_by_read_names\` is used." - echo "" - echo "An appropriate \`@HD-SO\` sort order header tag will be added or an existing one" - echo "updated if necessary." - echo "" - echo "Note that to generate an index file (by specifying \`--output_bai\`), the default" - echo "coordinate sort must be used." - echo "Thus the \`--sort_by_read_names\` and \`--sort_by \` options are incompatible" - echo "with \`--output_bai\`." - echo "" - echo "Input:" - echo " --input" - echo " type: file, required parameter, file must exist" - echo " example: input.bam" - echo " Path to the SAM/BAM/CRAM files containing the mapped reads." - echo "" - echo "Output:" - echo " --output_bam" - echo " type: file, required parameter, output, file must exist" - echo " example: output.bam" - echo " Filename to output the counts to." - echo "" - echo " --output_bai" - echo " type: file, output, file must exist" - echo " example: output.bam.bai" - echo " BAI-format index for BAM file." - echo "" - echo " --output_format" - echo " type: string" - echo " example: bam" - echo " choices: [ sam, bam, cram ]" - echo " The output format. By default, samtools tries to select a format based" - echo " on the -o filename extension; if output is to standard output or no" - echo " format can be deduced, bam is selected." - echo "" - echo " --compression" - echo " type: integer" - echo " example: 5" - echo " Compression level, from 0 (uncompressed) to 9 (best" - echo "" - echo "Arguments:" - echo " --minimizer_cluster" - echo " type: boolean_true" - echo " Sort unmapped reads (those in chromosome \"*\") by their sequence" - echo " minimiser (Schleimer et al., 2003; Roberts et al., 2004)," - echo " also reverse complementing as appropriate. This has the effect of" - echo " collating some similar data together, improving the" - echo " compressibility of the unmapped sequence. The minimiser kmer size is" - echo " adjusted using the -K option. Note data compressed" - echo " in this manner may need to be name collated prior to conversion back to" - echo " fastq." - echo " Mapped sequences are sorted by chromosome and position." - echo "" - echo " --minimizer_kmer" - echo " type: integer" - echo " example: 20" - echo " Sets the kmer size to be used in the -M option." - echo "" - echo " --sort_by_read_names" - echo " type: boolean_true" - echo " Sort by read names (i.e., the QNAME field) rather than by chromosomal" - echo " coordinates." - echo "" - echo " --sort_by" - echo " type: string" - echo " Sort first by this value in the alignment tag, then by position or name" - echo " (if also using -n)." - echo "" - echo " --no_pg" - echo " type: boolean_true" - echo " Do not add a @PG line to the header of the output file." -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM python:3.10-slim - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y samtools procps && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "pyyaml" - -LABEL org.opencontainers.image.authors="Robrecht Cannoodt, Angela Oliveira Pisco" -LABEL org.opencontainers.image.description="Companion container for running component mapping samtools_sort" -LABEL org.opencontainers.image.created="2024-01-25T10:13:55Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-samtools_sort-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "samtools_sort 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output_bam) - [ -n "$VIASH_PAR_OUTPUT_BAM" ] && ViashError Bad arguments for option \'--output_bam\': \'$VIASH_PAR_OUTPUT_BAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_BAM="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_bam. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_bam=*) - [ -n "$VIASH_PAR_OUTPUT_BAM" ] && ViashError Bad arguments for option \'--output_bam=*\': \'$VIASH_PAR_OUTPUT_BAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_BAM=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output_bai) - [ -n "$VIASH_PAR_OUTPUT_BAI" ] && ViashError Bad arguments for option \'--output_bai\': \'$VIASH_PAR_OUTPUT_BAI\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_BAI="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_bai. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_bai=*) - [ -n "$VIASH_PAR_OUTPUT_BAI" ] && ViashError Bad arguments for option \'--output_bai=*\': \'$VIASH_PAR_OUTPUT_BAI\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_BAI=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output_format) - [ -n "$VIASH_PAR_OUTPUT_FORMAT" ] && ViashError Bad arguments for option \'--output_format\': \'$VIASH_PAR_OUTPUT_FORMAT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_FORMAT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_format. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_format=*) - [ -n "$VIASH_PAR_OUTPUT_FORMAT" ] && ViashError Bad arguments for option \'--output_format=*\': \'$VIASH_PAR_OUTPUT_FORMAT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_FORMAT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --compression) - [ -n "$VIASH_PAR_COMPRESSION" ] && ViashError Bad arguments for option \'--compression\': \'$VIASH_PAR_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --compression=*) - [ -n "$VIASH_PAR_COMPRESSION" ] && ViashError Bad arguments for option \'--compression=*\': \'$VIASH_PAR_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --minimizer_cluster) - [ -n "$VIASH_PAR_MINIMIZER_CLUSTER" ] && ViashError Bad arguments for option \'--minimizer_cluster\': \'$VIASH_PAR_MINIMIZER_CLUSTER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MINIMIZER_CLUSTER=true - shift 1 - ;; - --minimizer_kmer) - [ -n "$VIASH_PAR_MINIMIZER_KMER" ] && ViashError Bad arguments for option \'--minimizer_kmer\': \'$VIASH_PAR_MINIMIZER_KMER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MINIMIZER_KMER="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --minimizer_kmer. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --minimizer_kmer=*) - [ -n "$VIASH_PAR_MINIMIZER_KMER" ] && ViashError Bad arguments for option \'--minimizer_kmer=*\': \'$VIASH_PAR_MINIMIZER_KMER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MINIMIZER_KMER=$(ViashRemoveFlags "$1") - shift 1 - ;; - --sort_by_read_names) - [ -n "$VIASH_PAR_SORT_BY_READ_NAMES" ] && ViashError Bad arguments for option \'--sort_by_read_names\': \'$VIASH_PAR_SORT_BY_READ_NAMES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SORT_BY_READ_NAMES=true - shift 1 - ;; - --sort_by) - [ -n "$VIASH_PAR_SORT_BY" ] && ViashError Bad arguments for option \'--sort_by\': \'$VIASH_PAR_SORT_BY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SORT_BY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --sort_by. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --sort_by=*) - [ -n "$VIASH_PAR_SORT_BY" ] && ViashError Bad arguments for option \'--sort_by=*\': \'$VIASH_PAR_SORT_BY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SORT_BY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --no_pg) - [ -n "$VIASH_PAR_NO_PG" ] && ViashError Bad arguments for option \'--no_pg\': \'$VIASH_PAR_NO_PG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_NO_PG=true - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/mapping_samtools_sort:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/mapping_samtools_sort:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/mapping_samtools_sort:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/mapping_samtools_sort:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT_BAM+x} ]; then - ViashError '--output_bam' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_MINIMIZER_CLUSTER+x} ]; then - VIASH_PAR_MINIMIZER_CLUSTER="false" -fi -if [ -z ${VIASH_PAR_SORT_BY_READ_NAMES+x} ]; then - VIASH_PAR_SORT_BY_READ_NAMES="false" -fi -if [ -z ${VIASH_PAR_NO_PG+x} ]; then - VIASH_PAR_NO_PG="false" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_COMPRESSION" ]]; then - if ! [[ "$VIASH_PAR_COMPRESSION" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--compression' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_MINIMIZER_CLUSTER" ]]; then - if ! [[ "$VIASH_PAR_MINIMIZER_CLUSTER" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--minimizer_cluster' has to be a boolean_true. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_MINIMIZER_KMER" ]]; then - if ! [[ "$VIASH_PAR_MINIMIZER_KMER" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--minimizer_kmer' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SORT_BY_READ_NAMES" ]]; then - if ! [[ "$VIASH_PAR_SORT_BY_READ_NAMES" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--sort_by_read_names' has to be a boolean_true. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_NO_PG" ]]; then - if ! [[ "$VIASH_PAR_NO_PG" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--no_pg' has to be a boolean_true. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_OUTPUT_FORMAT" ]; then - VIASH_PAR_OUTPUT_FORMAT_CHOICES=("sam:bam:cram") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_FORMAT_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_FORMAT:" ]]; then - ViashError '--output_format' specified value of \'$VIASH_PAR_OUTPUT_FORMAT\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT_BAM" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_BAM")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_BAM")" -fi -if [ ! -z "$VIASH_PAR_OUTPUT_BAI" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_BAI")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_BAI")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT_BAM" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT_BAM")" ) - VIASH_PAR_OUTPUT_BAM=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT_BAM") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_BAM" ) -fi -if [ ! -z "$VIASH_PAR_OUTPUT_BAI" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT_BAI")" ) - VIASH_PAR_OUTPUT_BAI=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT_BAI") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_BAI" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/mapping_samtools_sort:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/mapping_samtools_sort:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/mapping_samtools_sort:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-samtools_sort-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -import tempfile -import subprocess -from pathlib import Path -import yaml - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_bam': $( if [ ! -z ${VIASH_PAR_OUTPUT_BAM+x} ]; then echo "r'${VIASH_PAR_OUTPUT_BAM//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_bai': $( if [ ! -z ${VIASH_PAR_OUTPUT_BAI+x} ]; then echo "r'${VIASH_PAR_OUTPUT_BAI//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_format': $( if [ ! -z ${VIASH_PAR_OUTPUT_FORMAT+x} ]; then echo "r'${VIASH_PAR_OUTPUT_FORMAT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'compression': $( if [ ! -z ${VIASH_PAR_COMPRESSION+x} ]; then echo "int(r'${VIASH_PAR_COMPRESSION//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'minimizer_cluster': $( if [ ! -z ${VIASH_PAR_MINIMIZER_CLUSTER+x} ]; then echo "r'${VIASH_PAR_MINIMIZER_CLUSTER//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), - 'minimizer_kmer': $( if [ ! -z ${VIASH_PAR_MINIMIZER_KMER+x} ]; then echo "int(r'${VIASH_PAR_MINIMIZER_KMER//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'sort_by_read_names': $( if [ ! -z ${VIASH_PAR_SORT_BY_READ_NAMES+x} ]; then echo "r'${VIASH_PAR_SORT_BY_READ_NAMES//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), - 'sort_by': $( if [ ! -z ${VIASH_PAR_SORT_BY+x} ]; then echo "r'${VIASH_PAR_SORT_BY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'no_pg': $( if [ ! -z ${VIASH_PAR_NO_PG+x} ]; then echo "r'${VIASH_PAR_NO_PG//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -## VIASH END - -def generate_args(par, config): - # fetch arguments from config - arguments = [ - arg - for group in config["functionality"]["argument_groups"] - for arg in group["arguments"] - ] - - cmd_args = [] - - for arg in arguments: - arg_val = par.get(arg["name"].removeprefix("--")) - # The info key is always present (changed in viash 0.7.4) - # in the parsed config (None if not specified in source config) - info = arg["info"] or {} - orig_arg = info.get("orig_arg") - if arg_val and orig_arg: - if not arg.get("multiple", False): - arg_val = [arg_val] - - if arg["type"] in ["boolean_true", "boolean_false"]: - # if argument is a boolean_true or boolean_false, simply add the flag - arg_val = [orig_arg] - elif orig_arg.startswith("-"): - # if the orig arg flag is not a positional, - # add the flag in front of each element and flatten - arg_val = [str(x) for val in arg_val for x in [orig_arg, val]] - - cmd_args.extend(arg_val) - - return cmd_args - -# read config arguments -config = yaml.safe_load(Path(meta["config"]).read_text()) - -print(">> Constructing command", flush=True) -cmd_args = [ "samtools", "sort" ] + generate_args(par, config) - -# manually process cpus parameter -if 'cpus' in meta and meta['cpus']: - cmd_args.extend(["--threads", str(meta["cpus"])]) -# add memory -if 'memory_mb' in meta and meta['memory_mb']: - import math - mem_per_thread = math.ceil(meta['memory_mb'] * .8 / meta['cpus']) - cmd_args.extend(["-m", f"{mem_per_thread}M"]) - -with tempfile.TemporaryDirectory(prefix="samtools-", dir=meta["temp_dir"]) as temp_dir: - # add tempdir - cmd_args.extend(["-T", str(temp_dir + "/")]) - - # run command - print(">> Running samtools sort with command:", flush=True) - print("+ " + ' '.join([str(x) for x in cmd_args]), flush=True) - subprocess.run(cmd_args, check=True) - -if par.get("output_bai"): - print(">> Running samtools index with command:", flush=True) - cmd_index_args = ["samtools", "index", "-b", par["output_bam"], par["output_bai"]] - print("+ " + ' '.join([str(x) for x in cmd_index_args]), flush=True) - subprocess.run(cmd_index_args, check=True) -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT_BAM" ]; then - VIASH_PAR_OUTPUT_BAM=$(ViashStripAutomount "$VIASH_PAR_OUTPUT_BAM") -fi -if [ ! -z "$VIASH_PAR_OUTPUT_BAI" ]; then - VIASH_PAR_OUTPUT_BAI=$(ViashStripAutomount "$VIASH_PAR_OUTPUT_BAI") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT_BAM" ] && [ ! -e "$VIASH_PAR_OUTPUT_BAM" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT_BAM' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_OUTPUT_BAI" ] && [ ! -e "$VIASH_PAR_OUTPUT_BAI" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT_BAI' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/mapping/star_align/.config.vsh.yaml b/target/docker/mapping/star_align/.config.vsh.yaml deleted file mode 100644 index 1787020a5c0..00000000000 --- a/target/docker/mapping/star_align/.config.vsh.yaml +++ /dev/null @@ -1,2535 +0,0 @@ -functionality: - name: "star_align" - namespace: "mapping" - version: "0.12.3" - authors: - - name: "Angela Oliveira Pisco" - roles: - - "author" - info: - role: "Contributor" - links: - github: "aopisco" - orcid: "0000-0003-0142-2355" - linkedin: "aopisco" - organizations: - - name: "Insitro" - href: "https://insitro.com" - role: "Director of Computational Biology" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - - name: "Robrecht Cannoodt" - roles: - - "author" - - "maintainer" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - argument_groups: - - name: "Input/Output" - arguments: - - type: "file" - name: "--input" - alternatives: - - "--readFilesIn" - description: "The FASTQ files to be analyzed. Corresponds to the --readFilesIn\ - \ argument in the STAR command." - info: null - example: - - "mysample_S1_L001_R1_001.fastq.gz" - - "mysample_S1_L001_R2_001.fastq.gz" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "file" - name: "--reference" - alternatives: - - "--genomeDir" - description: "Path to the reference built by star_build_reference. Corresponds\ - \ to the --genomeDir argument in the STAR command." - info: null - example: - - "/path/to/reference" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "--outFileNamePrefix" - description: "Path to output directory. Corresponds to the --outFileNamePrefix\ - \ argument in the STAR command." - info: null - example: - - "/path/to/foo" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Run Parameters" - arguments: - - type: "integer" - name: "--runRNGseed" - description: "random number generator seed." - info: null - example: - - 777 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Genome Parameters" - arguments: - - type: "string" - name: "--genomeLoad" - description: "mode of shared memory usage for the genome files. Only used with\ - \ --runMode alignReads.\n\n- LoadAndKeep ... load genome into shared and\ - \ keep it in memory after run\n- LoadAndRemove ... load genome into shared\ - \ but remove it after run\n- LoadAndExit ... load genome into shared memory\ - \ and exit, keeping the genome in memory for future runs\n- Remove \ - \ ... do not map anything, just remove loaded genome from memory\n- NoSharedMemory\ - \ ... do not use shared memory, each job will have its own private copy of\ - \ the genome" - info: null - example: - - "NoSharedMemory" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--genomeFastaFiles" - description: "path(s) to the fasta files with the genome sequences, separated\ - \ by spaces. These files should be plain text FASTA files, they *cannot* be\ - \ zipped.\n\nRequired for the genome generation (--runMode genomeGenerate).\ - \ Can also be used in the mapping (--runMode alignReads) to add extra (new)\ - \ sequences to the genome (e.g. spike-ins)." - info: null - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--genomeFileSizes" - description: "genome files exact sizes in bytes. Typically, this should not\ - \ be defined by the user." - info: null - example: - - 0 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--genomeTransformOutput" - description: "which output to transform back to original genome\n\n- SAM \ - \ ... SAM/BAM alignments\n- SJ ... splice junctions (SJ.out.tab)\n-\ - \ None ... no transformation of the output" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--genomeChrSetMitochondrial" - description: "names of the mitochondrial chromosomes. Presently only used for\ - \ STARsolo statistics output/" - info: null - example: - - "chrM" - - "M" - - "MT" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - name: "Splice Junctions Database" - arguments: - - type: "string" - name: "--sjdbFileChrStartEnd" - description: "path to the files with genomic coordinates (chr start \ - \ end strand) for the splice junction introns. Multiple files can be\ - \ supplied and will be concatenated." - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "file" - name: "--sjdbGTFfile" - description: "path to the GTF file with annotations" - info: null - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--sjdbGTFchrPrefix" - description: "prefix for chromosome names in a GTF file (e.g. 'chr' for using\ - \ ENSMEBL annotations with UCSC genomes)" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--sjdbGTFfeatureExon" - description: "feature type in GTF file to be used as exons for building transcripts" - info: null - example: - - "exon" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--sjdbGTFtagExonParentTranscript" - description: "GTF attribute name for parent transcript ID (default \"transcript_id\"\ - \ works for GTF files)" - info: null - example: - - "transcript_id" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--sjdbGTFtagExonParentGene" - description: "GTF attribute name for parent gene ID (default \"gene_id\" works\ - \ for GTF files)" - info: null - example: - - "gene_id" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--sjdbGTFtagExonParentGeneName" - description: "GTF attribute name for parent gene name" - info: null - example: - - "gene_name" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--sjdbGTFtagExonParentGeneType" - description: "GTF attribute name for parent gene type" - info: null - example: - - "gene_type" - - "gene_biotype" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--sjdbOverhang" - description: "length of the donor/acceptor sequence on each side of the junctions,\ - \ ideally = (mate_length - 1)" - info: null - example: - - 100 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--sjdbScore" - description: "extra alignment score for alignments that cross database junctions" - info: null - example: - - 2 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--sjdbInsertSave" - description: "which files to save when sjdb junctions are inserted on the fly\ - \ at the mapping step\n\n- Basic ... only small junction / transcript files\n\ - - All ... all files including big Genome, SA and SAindex - this will create\ - \ a complete genome directory" - info: null - example: - - "Basic" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Variation parameters" - arguments: - - type: "string" - name: "--varVCFfile" - description: "path to the VCF file that contains variation data. The 10th column\ - \ should contain the genotype information, e.g. 0/1" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Read Parameters" - arguments: - - type: "string" - name: "--readFilesType" - description: "format of input read files\n\n- Fastx ... FASTA or FASTQ\n\ - - SAM SE ... SAM or BAM single-end reads; for BAM use --readFilesCommand\ - \ samtools view\n- SAM PE ... SAM or BAM paired-end reads; for BAM use\ - \ --readFilesCommand samtools view" - info: null - example: - - "Fastx" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--readFilesSAMattrKeep" - description: "for --readFilesType SAM SE/PE, which SAM tags to keep in the output\ - \ BAM, e.g.: --readFilesSAMtagsKeep RG PL\n\n- All ... keep all tags\n\ - - None ... do not keep any tags" - info: null - example: - - "All" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "file" - name: "--readFilesManifest" - description: "path to the \"manifest\" file with the names of read files. The\ - \ manifest file should contain 3 tab-separated columns:\n\npaired-end reads:\ - \ read1_file_name $tab$ read2_file_name $tab$ read_group_line.\nsingle-end\ - \ reads: read1_file_name $tab$ - $tab$ read_group_line.\nSpaces,\ - \ but not tabs are allowed in file names.\nIf read_group_line does not start\ - \ with ID:, it can only contain one ID field, and ID: will be added to it.\n\ - If read_group_line starts with ID:, it can contain several fields separated\ - \ by $tab$, and all fields will be be copied verbatim into SAM @RG header\ - \ line." - info: null - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--readFilesPrefix" - description: "prefix for the read files names, i.e. it will be added in front\ - \ of the strings in --readFilesIn" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--readFilesCommand" - description: "command line to execute for each of the input file. This command\ - \ should generate FASTA or FASTQ text and send it to stdout\n\nFor example:\ - \ zcat - to uncompress .gz files, bzcat - to uncompress .bz2 files, etc." - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--readMapNumber" - description: "number of reads to map from the beginning of the file\n\n-1: map\ - \ all reads" - info: null - example: - - -1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--readMatesLengthsIn" - description: "Equal/NotEqual - lengths of names,sequences,qualities for both\ - \ mates are the same / not the same. NotEqual is safe in all situations." - info: null - example: - - "NotEqual" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--readNameSeparator" - description: "character(s) separating the part of the read names that will be\ - \ trimmed in output (read name after space is always trimmed)" - info: null - example: - - "/" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--readQualityScoreBase" - description: "number to be subtracted from the ASCII code to get Phred quality\ - \ score" - info: null - example: - - 33 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Read Clipping" - arguments: - - type: "string" - name: "--clipAdapterType" - description: "adapter clipping type\n\n- Hamming ... adapter clipping based\ - \ on Hamming distance, with the number of mismatches controlled by --clip5pAdapterMMp\n\ - - CellRanger4 ... 5p and 3p adapter clipping similar to CellRanger4. Utilizes\ - \ Opal package by Martin Sosic: https://github.com/Martinsos/opal\n- None\ - \ ... no adapter clipping, all other clip* parameters are disregarded" - info: null - example: - - "Hamming" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--clip3pNbases" - description: "number(s) of bases to clip from 3p of each mate. If one value\ - \ is given, it will be assumed the same for both mates." - info: null - example: - - 0 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--clip3pAdapterSeq" - description: "adapter sequences to clip from 3p of each mate. If one value\ - \ is given, it will be assumed the same for both mates.\n\n- polyA ... polyA\ - \ sequence with the length equal to read length" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "double" - name: "--clip3pAdapterMMp" - description: "max proportion of mismatches for 3p adapter clipping for each\ - \ mate. If one value is given, it will be assumed the same for both mates." - info: null - example: - - 0.1 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--clip3pAfterAdapterNbases" - description: "number of bases to clip from 3p of each mate after the adapter\ - \ clipping. If one value is given, it will be assumed the same for both mates." - info: null - example: - - 0 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--clip5pNbases" - description: "number(s) of bases to clip from 5p of each mate. If one value\ - \ is given, it will be assumed the same for both mates." - info: null - example: - - 0 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - name: "Limits" - arguments: - - type: "long" - name: "--limitGenomeGenerateRAM" - description: "maximum available RAM (bytes) for genome generation" - info: null - example: - - 31000000000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "long" - name: "--limitIObufferSize" - description: "max available buffers size (bytes) for input/output, per thread" - info: null - example: - - 30000000 - - 50000000 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "long" - name: "--limitOutSAMoneReadBytes" - description: "max size of the SAM record (bytes) for one read. Recommended value:\ - \ >(2*(LengthMate1+LengthMate2+100)*outFilterMultimapNmax" - info: null - example: - - 100000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--limitOutSJoneRead" - description: "max number of junctions for one read (including all multi-mappers)" - info: null - example: - - 1000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--limitOutSJcollapsed" - description: "max number of collapsed junctions" - info: null - example: - - 1000000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "long" - name: "--limitBAMsortRAM" - description: "maximum available RAM (bytes) for sorting BAM. If =0, it will\ - \ be set to the genome index size. 0 value can only be used with --genomeLoad\ - \ NoSharedMemory option." - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--limitSjdbInsertNsj" - description: "maximum number of junctions to be inserted to the genome on the\ - \ fly at the mapping stage, including those from annotations and those detected\ - \ in the 1st step of the 2-pass run" - info: null - example: - - 1000000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--limitNreadsSoft" - description: "soft limit on the number of reads" - info: null - example: - - -1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Output: general" - arguments: - - type: "string" - name: "--outTmpKeep" - description: "whether to keep the temporary files after STAR runs is finished\n\ - \n- None ... remove all temporary files\n- All ... keep all files" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outStd" - description: "which output will be directed to stdout (standard out)\n\n- Log\ - \ ... log messages\n- SAM ... alignments\ - \ in SAM format (which normally are output to Aligned.out.sam file), normal\ - \ standard output will go into Log.std.out\n- BAM_Unsorted ... alignments\ - \ in BAM format, unsorted. Requires --outSAMtype BAM Unsorted\n- BAM_SortedByCoordinate\ - \ ... alignments in BAM format, sorted by coordinate. Requires --outSAMtype\ - \ BAM SortedByCoordinate\n- BAM_Quant ... alignments to transcriptome\ - \ in BAM format, unsorted. Requires --quantMode TranscriptomeSAM" - info: null - example: - - "Log" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outReadsUnmapped" - description: "output of unmapped and partially mapped (i.e. mapped only one\ - \ mate of a paired end read) reads in separate file(s).\n\n- None ... no\ - \ output\n- Fastx ... output in separate fasta/fastq files, Unmapped.out.mate1/2" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outQSconversionAdd" - description: "add this number to the quality score (e.g. to convert from Illumina\ - \ to Sanger, use -31)" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outMultimapperOrder" - description: "order of multimapping alignments in the output files\n\n- Old_2.4\ - \ ... quasi-random order used before 2.5.0\n- Random \ - \ ... random order of alignments for each multi-mapper. Read mates (pairs)\ - \ are always adjacent, all alignment for each read stay together. This option\ - \ will become default in the future releases." - info: null - example: - - "Old_2.4" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Output: SAM and BAM" - arguments: - - type: "string" - name: "--outSAMtype" - description: "type of SAM/BAM output\n\n1st word:\n- BAM ... output BAM without\ - \ sorting\n- SAM ... output SAM without sorting\n- None ... no SAM/BAM output\n\ - 2nd, 3rd:\n- Unsorted ... standard unsorted\n- SortedByCoordinate\ - \ ... sorted by coordinate. This option will allocate extra memory for sorting\ - \ which can be specified by --limitBAMsortRAM." - info: null - example: - - "SAM" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--outSAMmode" - description: "mode of SAM output\n\n- None ... no SAM output\n- Full ... full\ - \ SAM output\n- NoQS ... full SAM but without quality scores" - info: null - example: - - "Full" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outSAMstrandField" - description: "Cufflinks-like strand field flag\n\n- None ... not used\n\ - - intronMotif ... strand derived from the intron motif. This option changes\ - \ the output alignments: reads with inconsistent and/or non-canonical introns\ - \ are filtered out." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outSAMattributes" - description: "a string of desired SAM attributes, in the order desired for the\ - \ output SAM. Tags can be listed in any combination/order.\n\n***Presets:\n\ - - None ... no attributes\n- Standard ... NH HI AS nM\n- All \ - \ ... NH HI AS nM NM MD jM jI MC ch\n***Alignment:\n- NH ...\ - \ number of loci the reads maps to: =1 for unique mappers, >1 for multimappers.\ - \ Standard SAM tag.\n- HI ... multiple alignment index, starts with\ - \ --outSAMattrIHstart (=1 by default). Standard SAM tag.\n- AS ...\ - \ local alignment score, +1/-1 for matches/mismateches, score* penalties for\ - \ indels and gaps. For PE reads, total score for two mates. Stadnard SAM tag.\n\ - - nM ... number of mismatches. For PE reads, sum over two mates.\n\ - - NM ... edit distance to the reference (number of mismatched + inserted\ - \ + deleted bases) for each mate. Standard SAM tag.\n- MD ... string\ - \ encoding mismatched and deleted reference bases (see standard SAM specifications).\ - \ Standard SAM tag.\n- jM ... intron motifs for all junctions (i.e.\ - \ N in CIGAR): 0: non-canonical; 1: GT/AG, 2: CT/AC, 3: GC/AG, 4: CT/GC, 5:\ - \ AT/AC, 6: GT/AT. If splice junctions database is used, and a junction is\ - \ annotated, 20 is added to its motif value.\n- jI ... start and\ - \ end of introns for all junctions (1-based).\n- XS ... alignment\ - \ strand according to --outSAMstrandField.\n- MC ... mate's CIGAR\ - \ string. Standard SAM tag.\n- ch ... marks all segment of all chimeric\ - \ alingments for --chimOutType WithinBAM output.\n- cN ... number\ - \ of bases clipped from the read ends: 5' and 3'\n***Variation:\n- vA \ - \ ... variant allele\n- vG ... genomic coordinate of the variant\ - \ overlapped by the read.\n- vW ... 1 - alignment passes WASP filtering;\ - \ 2,3,4,5,6,7 - alignment does not pass WASP filtering. Requires --waspOutputMode\ - \ SAMtag.\n***STARsolo:\n- CR CY UR UY ... sequences and quality scores of\ - \ cell barcodes and UMIs for the solo* demultiplexing.\n- GX GN ...\ - \ gene ID and gene name for unique-gene reads.\n- gx gn ... gene IDs\ - \ and gene names for unique- and multi-gene reads.\n- CB UB ... error-corrected\ - \ cell barcodes and UMIs for solo* demultiplexing. Requires --outSAMtype BAM\ - \ SortedByCoordinate.\n- sM ... assessment of CB and UMI.\n- sS \ - \ ... sequence of the entire barcode (CB,UMI,adapter).\n- sQ \ - \ ... quality of the entire barcode.\n***Unsupported/undocumented:\n-\ - \ ha ... haplotype (1/2) when mapping to the diploid genome. Requires\ - \ genome generated with --genomeTransformType Diploid .\n- rB ...\ - \ alignment block read/genomic coordinates.\n- vR ... read coordinate\ - \ of the variant." - info: null - example: - - "Standard" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--outSAMattrIHstart" - description: "start value for the IH attribute. 0 may be required by some downstream\ - \ software, such as Cufflinks or StringTie." - info: null - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outSAMunmapped" - description: "output of unmapped reads in the SAM format\n\n1st word:\n- None\ - \ ... no output\n- Within ... output unmapped reads within the main SAM\ - \ file (i.e. Aligned.out.sam)\n2nd word:\n- KeepPairs ... record unmapped\ - \ mate for each alignment, and, in case of unsorted output, keep it adjacent\ - \ to its mapped mate. Only affects multi-mapping reads." - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--outSAMorder" - description: "type of sorting for the SAM output\n\nPaired: one mate after the\ - \ other for all paired alignments\nPairedKeepInputOrder: one mate after the\ - \ other for all paired alignments, the order is kept the same as in the input\ - \ FASTQ files" - info: null - example: - - "Paired" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outSAMprimaryFlag" - description: "which alignments are considered primary - all others will be marked\ - \ with 0x100 bit in the FLAG\n\n- OneBestScore ... only one alignment with\ - \ the best score is primary\n- AllBestScore ... all alignments with the best\ - \ score are primary" - info: null - example: - - "OneBestScore" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outSAMreadID" - description: "read ID record type\n\n- Standard ... first word (until space)\ - \ from the FASTx read ID line, removing /1,/2 from the end\n- Number ...\ - \ read number (index) in the FASTx file" - info: null - example: - - "Standard" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outSAMmapqUnique" - description: "0 to 255: the MAPQ value for unique mappers" - info: null - example: - - 255 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outSAMflagOR" - description: "0 to 65535: sam FLAG will be bitwise OR'd with this value, i.e.\ - \ FLAG=FLAG | outSAMflagOR. This is applied after all flags have been set\ - \ by STAR, and after outSAMflagAND. Can be used to set specific bits that\ - \ are not set otherwise." - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outSAMflagAND" - description: "0 to 65535: sam FLAG will be bitwise AND'd with this value, i.e.\ - \ FLAG=FLAG & outSAMflagOR. This is applied after all flags have been set\ - \ by STAR, but before outSAMflagOR. Can be used to unset specific bits that\ - \ are not set otherwise." - info: null - example: - - 65535 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outSAMattrRGline" - description: "SAM/BAM read group line. The first word contains the read group\ - \ identifier and must start with \"ID:\", e.g. --outSAMattrRGline ID:xxx CN:yy\ - \ \"DS:z z z\".\n\nxxx will be added as RG tag to each output alignment. Any\ - \ spaces in the tag values have to be double quoted.\nComma separated RG lines\ - \ correspons to different (comma separated) input files in --readFilesIn.\ - \ Commas have to be surrounded by spaces, e.g.\n--outSAMattrRGline ID:xxx\ - \ , ID:zzz \"DS:z z\" , ID:yyy DS:yyyy" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--outSAMheaderHD" - description: "@HD (header) line of the SAM header" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--outSAMheaderPG" - description: "extra @PG (software) line of the SAM header (in addition to STAR)" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--outSAMheaderCommentFile" - description: "path to the file with @CO (comment) lines of the SAM header" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outSAMfilter" - description: "filter the output into main SAM/BAM files\n\n- KeepOnlyAddedReferences\ - \ ... only keep the reads for which all alignments are to the extra reference\ - \ sequences added with --genomeFastaFiles at the mapping stage.\n- KeepAllAddedReferences\ - \ ... keep all alignments to the extra reference sequences added with --genomeFastaFiles\ - \ at the mapping stage." - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--outSAMmultNmax" - description: "max number of multiple alignments for a read that will be output\ - \ to the SAM/BAM files. Note that if this value is not equal to -1, the top\ - \ scoring alignment will be output first\n\n- -1 ... all alignments (up to\ - \ --outFilterMultimapNmax) will be output" - info: null - example: - - -1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outSAMtlen" - description: "calculation method for the TLEN field in the SAM/BAM files\n\n\ - - 1 ... leftmost base of the (+)strand mate to rightmost base of the (-)mate.\ - \ (+)sign for the (+)strand mate\n- 2 ... leftmost base of any mate to rightmost\ - \ base of any mate. (+)sign for the mate with the leftmost base. This is different\ - \ from 1 for overlapping mates with protruding ends" - info: null - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outBAMcompression" - description: "-1 to 10 BAM compression level, -1=default compression (6?),\ - \ 0=no compression, 10=maximum compression" - info: null - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outBAMsortingThreadN" - description: ">=0: number of threads for BAM sorting. 0 will default to min(6,--runThreadN)." - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outBAMsortingBinsN" - description: ">0: number of genome bins for coordinate-sorting" - info: null - example: - - 50 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "BAM processing" - arguments: - - type: "string" - name: "--bamRemoveDuplicatesType" - description: "mark duplicates in the BAM file, for now only works with (i) sorted\ - \ BAM fed with inputBAMfile, and (ii) for paired-end alignments only\n\n-\ - \ - ... no duplicate removal/marking\n- UniqueIdentical\ - \ ... mark all multimappers, and duplicate unique mappers. The coordinates,\ - \ FLAG, CIGAR must be identical\n- UniqueIdenticalNotMulti ... mark duplicate\ - \ unique mappers but not multimappers." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--bamRemoveDuplicatesMate2basesN" - description: "number of bases from the 5' of mate 2 to use in collapsing (e.g.\ - \ for RAMPAGE)" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Output Wiggle" - arguments: - - type: "string" - name: "--outWigType" - description: "type of signal output, e.g. \"bedGraph\" OR \"bedGraph read1_5p\"\ - . Requires sorted BAM: --outSAMtype BAM SortedByCoordinate .\n\n1st word:\n\ - - None ... no signal output\n- bedGraph ... bedGraph format\n- wiggle\ - \ ... wiggle format\n2nd word:\n- read1_5p ... signal from only 5' of\ - \ the 1st read, useful for CAGE/RAMPAGE etc\n- read2 ... signal from\ - \ only 2nd read" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--outWigStrand" - description: "strandedness of wiggle/bedGraph output\n\n- Stranded ... separate\ - \ strands, str1 and str2\n- Unstranded ... collapsed strands" - info: null - example: - - "Stranded" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outWigReferencesPrefix" - description: "prefix matching reference names to include in the output wiggle\ - \ file, e.g. \"chr\", default \"-\" - include all references" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outWigNorm" - description: "type of normalization for the signal\n\n- RPM ... reads per\ - \ million of mapped reads\n- None ... no normalization, \"raw\" counts" - info: null - example: - - "RPM" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Output Filtering" - arguments: - - type: "string" - name: "--outFilterType" - description: "type of filtering\n\n- Normal ... standard filtering using only\ - \ current alignment\n- BySJout ... keep only those reads that contain junctions\ - \ that passed filtering into SJ.out.tab" - info: null - example: - - "Normal" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outFilterMultimapScoreRange" - description: "the score range below the maximum score for multimapping alignments" - info: null - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outFilterMultimapNmax" - description: "maximum number of loci the read is allowed to map to. Alignments\ - \ (all of them) will be output only if the read maps to no more loci than\ - \ this value.\n\nOtherwise no alignments will be output, and the read will\ - \ be counted as \"mapped to too many loci\" in the Log.final.out ." - info: null - example: - - 10 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outFilterMismatchNmax" - description: "alignment will be output only if it has no more mismatches than\ - \ this value." - info: null - example: - - 10 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--outFilterMismatchNoverLmax" - description: "alignment will be output only if its ratio of mismatches to *mapped*\ - \ length is less than or equal to this value." - info: null - example: - - 0.3 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--outFilterMismatchNoverReadLmax" - description: "alignment will be output only if its ratio of mismatches to *read*\ - \ length is less than or equal to this value." - info: null - example: - - 1.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outFilterScoreMin" - description: "alignment will be output only if its score is higher than or equal\ - \ to this value." - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--outFilterScoreMinOverLread" - description: "same as outFilterScoreMin, but normalized to read length (sum\ - \ of mates' lengths for paired-end reads)" - info: null - example: - - 0.66 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outFilterMatchNmin" - description: "alignment will be output only if the number of matched bases is\ - \ higher than or equal to this value." - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--outFilterMatchNminOverLread" - description: "sam as outFilterMatchNmin, but normalized to the read length (sum\ - \ of mates' lengths for paired-end reads)." - info: null - example: - - 0.66 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outFilterIntronMotifs" - description: "filter alignment using their motifs\n\n- None \ - \ ... no filtering\n- RemoveNoncanonical ... filter\ - \ out alignments that contain non-canonical junctions\n- RemoveNoncanonicalUnannotated\ - \ ... filter out alignments that contain non-canonical unannotated junctions\ - \ when using annotated splice junctions database. The annotated non-canonical\ - \ junctions will be kept." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outFilterIntronStrands" - description: "filter alignments\n\n- RemoveInconsistentStrands ... remove\ - \ alignments that have junctions with inconsistent strands\n- None \ - \ ... no filtering" - info: null - example: - - "RemoveInconsistentStrands" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Output splice junctions (SJ.out.tab)" - arguments: - - type: "string" - name: "--outSJtype" - description: "type of splice junction output\n\n- Standard ... standard SJ.out.tab\ - \ output\n- None ... no splice junction output" - info: null - example: - - "Standard" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Output Filtering: Splice Junctions" - arguments: - - type: "string" - name: "--outSJfilterReads" - description: "which reads to consider for collapsed splice junctions output\n\ - \n- All ... all reads, unique- and multi-mappers\n- Unique ... uniquely\ - \ mapping reads only" - info: null - example: - - "All" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outSJfilterOverhangMin" - description: "minimum overhang length for splice junctions on both sides for:\ - \ (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC\ - \ motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\n\ - does not apply to annotated junctions" - info: null - example: - - 30 - - 12 - - 12 - - 12 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--outSJfilterCountUniqueMin" - description: "minimum uniquely mapping read count per junction for: (1) non-canonical\ - \ motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC\ - \ and GT/AT motif. -1 means no output for that motif\n\nJunctions are output\ - \ if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin conditions\ - \ are satisfied\ndoes not apply to annotated junctions" - info: null - example: - - 3 - - 1 - - 1 - - 1 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--outSJfilterCountTotalMin" - description: "minimum total (multi-mapping+unique) read count per junction for:\ - \ (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC\ - \ motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\n\ - Junctions are output if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin\ - \ conditions are satisfied\ndoes not apply to annotated junctions" - info: null - example: - - 3 - - 1 - - 1 - - 1 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--outSJfilterDistToOtherSJmin" - description: "minimum allowed distance to other junctions' donor/acceptor\n\n\ - does not apply to annotated junctions" - info: null - example: - - 10 - - 0 - - 5 - - 10 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--outSJfilterIntronMaxVsReadN" - description: "maximum gap allowed for junctions supported by 1,2,3,,,N reads\n\ - \ni.e. by default junctions supported by 1 read can have gaps <=50000b, by\ - \ 2 reads: <=100000b, by 3 reads: <=200000. by >=4 reads any gap <=alignIntronMax\n\ - does not apply to annotated junctions" - info: null - example: - - 50000 - - 100000 - - 200000 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - name: "Scoring" - arguments: - - type: "integer" - name: "--scoreGap" - description: "splice junction penalty (independent on intron motif)" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--scoreGapNoncan" - description: "non-canonical junction penalty (in addition to scoreGap)" - info: null - example: - - -8 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--scoreGapGCAG" - description: "GC/AG and CT/GC junction penalty (in addition to scoreGap)" - info: null - example: - - -4 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--scoreGapATAC" - description: "AT/AC and GT/AT junction penalty (in addition to scoreGap)" - info: null - example: - - -8 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--scoreGenomicLengthLog2scale" - description: "extra score logarithmically scaled with genomic length of the\ - \ alignment: scoreGenomicLengthLog2scale*log2(genomicLength)" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--scoreDelOpen" - description: "deletion open penalty" - info: null - example: - - -2 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--scoreDelBase" - description: "deletion extension penalty per base (in addition to scoreDelOpen)" - info: null - example: - - -2 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--scoreInsOpen" - description: "insertion open penalty" - info: null - example: - - -2 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--scoreInsBase" - description: "insertion extension penalty per base (in addition to scoreInsOpen)" - info: null - example: - - -2 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--scoreStitchSJshift" - description: "maximum score reduction while searching for SJ boundaries in the\ - \ stitching step" - info: null - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Alignments and Seeding" - arguments: - - type: "integer" - name: "--seedSearchStartLmax" - description: "defines the search start point through the read - the read is\ - \ split into pieces no longer than this value" - info: null - example: - - 50 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--seedSearchStartLmaxOverLread" - description: "seedSearchStartLmax normalized to read length (sum of mates' lengths\ - \ for paired-end reads)" - info: null - example: - - 1.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--seedSearchLmax" - description: "defines the maximum length of the seeds, if =0 seed length is\ - \ not limited" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--seedMultimapNmax" - description: "only pieces that map fewer than this value are utilized in the\ - \ stitching procedure" - info: null - example: - - 10000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--seedPerReadNmax" - description: "max number of seeds per read" - info: null - example: - - 1000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--seedPerWindowNmax" - description: "max number of seeds per window" - info: null - example: - - 50 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--seedNoneLociPerWindow" - description: "max number of one seed loci per window" - info: null - example: - - 10 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--seedSplitMin" - description: "min length of the seed sequences split by Ns or mate gap" - info: null - example: - - 12 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--seedMapMin" - description: "min length of seeds to be mapped" - info: null - example: - - 5 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--alignIntronMin" - description: "minimum intron size, genomic gap is considered intron if its length>=alignIntronMin,\ - \ otherwise it is considered Deletion" - info: null - example: - - 21 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--alignIntronMax" - description: "maximum intron size, if 0, max intron size will be determined\ - \ by (2^winBinNbits)*winAnchorDistNbins" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--alignMatesGapMax" - description: "maximum gap between two mates, if 0, max intron gap will be determined\ - \ by (2^winBinNbits)*winAnchorDistNbins" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--alignSJoverhangMin" - description: "minimum overhang (i.e. block size) for spliced alignments" - info: null - example: - - 5 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--alignSJstitchMismatchNmax" - description: "maximum number of mismatches for stitching of the splice junctions\ - \ (-1: no limit).\n\n(1) non-canonical motifs, (2) GT/AG and CT/AC motif,\ - \ (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif." - info: null - example: - - 0 - - -1 - - 0 - - 0 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--alignSJDBoverhangMin" - description: "minimum overhang (i.e. block size) for annotated (sjdb) spliced\ - \ alignments" - info: null - example: - - 3 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--alignSplicedMateMapLmin" - description: "minimum mapped length for a read mate that is spliced" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--alignSplicedMateMapLminOverLmate" - description: "alignSplicedMateMapLmin normalized to mate length" - info: null - example: - - 0.66 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--alignWindowsPerReadNmax" - description: "max number of windows per read" - info: null - example: - - 10000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--alignTranscriptsPerWindowNmax" - description: "max number of transcripts per window" - info: null - example: - - 100 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--alignTranscriptsPerReadNmax" - description: "max number of different alignments per read to consider" - info: null - example: - - 10000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--alignEndsType" - description: "type of read ends alignment\n\n- Local ... standard\ - \ local alignment with soft-clipping allowed\n- EndToEnd ... force\ - \ end-to-end read alignment, do not soft-clip\n- Extend5pOfRead1 ... fully\ - \ extend only the 5p of the read1, all other ends: local alignment\n- Extend5pOfReads12\ - \ ... fully extend only the 5p of the both read1 and read2, all other ends:\ - \ local alignment" - info: null - example: - - "Local" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--alignEndsProtrude" - description: "allow protrusion of alignment ends, i.e. start (end) of the +strand\ - \ mate downstream of the start (end) of the -strand mate\n\n1st word: int:\ - \ maximum number of protrusion bases allowed\n2nd word: string:\n- \ - \ ConcordantPair ... report alignments with non-zero protrusion\ - \ as concordant pairs\n- DiscordantPair ... report alignments\ - \ with non-zero protrusion as discordant pairs" - info: null - example: - - "0 ConcordantPair" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--alignSoftClipAtReferenceEnds" - description: "allow the soft-clipping of the alignments past the end of the\ - \ chromosomes\n\n- Yes ... allow\n- No ... prohibit, useful for compatibility\ - \ with Cufflinks" - info: null - example: - - "Yes" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--alignInsertionFlush" - description: "how to flush ambiguous insertion positions\n\n- None ... insertions\ - \ are not flushed\n- Right ... insertions are flushed to the right" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Paired-End reads" - arguments: - - type: "integer" - name: "--peOverlapNbasesMin" - description: "minimum number of overlapping bases to trigger mates merging and\ - \ realignment. Specify >0 value to switch on the \"merginf of overlapping\ - \ mates\" algorithm." - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--peOverlapMMp" - description: "maximum proportion of mismatched bases in the overlap area" - info: null - example: - - 0.01 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Windows, Anchors, Binning" - arguments: - - type: "integer" - name: "--winAnchorMultimapNmax" - description: "max number of loci anchors are allowed to map to" - info: null - example: - - 50 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--winBinNbits" - description: "=log2(winBin), where winBin is the size of the bin for the windows/clustering,\ - \ each window will occupy an integer number of bins." - info: null - example: - - 16 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--winAnchorDistNbins" - description: "max number of bins between two anchors that allows aggregation\ - \ of anchors into one window" - info: null - example: - - 9 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--winFlankNbins" - description: "log2(winFlank), where win Flank is the size of the left and right\ - \ flanking regions for each window" - info: null - example: - - 4 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--winReadCoverageRelativeMin" - description: "minimum relative coverage of the read sequence by the seeds in\ - \ a window, for STARlong algorithm only." - info: null - example: - - 0.5 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--winReadCoverageBasesMin" - description: "minimum number of bases covered by the seeds in a window , for\ - \ STARlong algorithm only." - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Chimeric Alignments" - arguments: - - type: "string" - name: "--chimOutType" - description: "type of chimeric output\n\n- Junctions ... Chimeric.out.junction\n\ - - SeparateSAMold ... output old SAM into separate Chimeric.out.sam file\n\ - - WithinBAM ... output into main aligned BAM files (Aligned.*.bam)\n\ - - WithinBAM HardClip ... (default) hard-clipping in the CIGAR for supplemental\ - \ chimeric alignments (default if no 2nd word is present)\n- WithinBAM SoftClip\ - \ ... soft-clipping in the CIGAR for supplemental chimeric alignments" - info: null - example: - - "Junctions" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--chimSegmentMin" - description: "minimum length of chimeric segment length, if ==0, no chimeric\ - \ output" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimScoreMin" - description: "minimum total (summed) score of the chimeric segments" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimScoreDropMax" - description: "max drop (difference) of chimeric score (the sum of scores of\ - \ all chimeric segments) from the read length" - info: null - example: - - 20 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimScoreSeparation" - description: "minimum difference (separation) between the best chimeric score\ - \ and the next one" - info: null - example: - - 10 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimScoreJunctionNonGTAG" - description: "penalty for a non-GT/AG chimeric junction" - info: null - example: - - -1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimJunctionOverhangMin" - description: "minimum overhang for a chimeric junction" - info: null - example: - - 20 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimSegmentReadGapMax" - description: "maximum gap in the read sequence between chimeric segments" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--chimFilter" - description: "different filters for chimeric alignments\n\n- None ... no filtering\n\ - - banGenomicN ... Ns are not allowed in the genome sequence around the chimeric\ - \ junction" - info: null - example: - - "banGenomicN" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--chimMainSegmentMultNmax" - description: "maximum number of multi-alignments for the main chimeric segment.\ - \ =1 will prohibit multimapping main segments." - info: null - example: - - 10 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimMultimapNmax" - description: "maximum number of chimeric multi-alignments\n\n- 0 ... use the\ - \ old scheme for chimeric detection which only considered unique alignments" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimMultimapScoreRange" - description: "the score range for multi-mapping chimeras below the best chimeric\ - \ score. Only works with --chimMultimapNmax > 1" - info: null - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimNonchimScoreDropMin" - description: "to trigger chimeric detection, the drop in the best non-chimeric\ - \ alignment score with respect to the read length has to be greater than this\ - \ value" - info: null - example: - - 20 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimOutJunctionFormat" - description: "formatting type for the Chimeric.out.junction file\n\n- 0 ...\ - \ no comment lines/headers\n- 1 ... comment lines at the end of the file:\ - \ command line and Nreads: total, unique/multi-mapping" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Quantification of Annotations" - arguments: - - type: "string" - name: "--quantMode" - description: "types of quantification requested\n\n- - ... none\n\ - - TranscriptomeSAM ... output SAM/BAM alignments to transcriptome into a separate\ - \ file\n- GeneCounts ... count reads per gene" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--quantTranscriptomeBAMcompression" - description: "-2 to 10 transcriptome BAM compression level\n\n- -2 ... no\ - \ BAM output\n- -1 ... default compression (6?)\n- 0 ... no compression\n\ - - 10 ... maximum compression" - info: null - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--quantTranscriptomeBan" - description: "prohibit various alignment type\n\n- IndelSoftclipSingleend ...\ - \ prohibit indels, soft clipping and single-end alignments - compatible with\ - \ RSEM\n- Singleend ... prohibit single-end alignments" - info: null - example: - - "IndelSoftclipSingleend" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "2-pass Mapping" - arguments: - - type: "string" - name: "--twopassMode" - description: "2-pass mapping mode.\n\n- None ... 1-pass mapping\n- Basic\ - \ ... basic 2-pass mapping, with all 1st pass junctions inserted into\ - \ the genome indices on the fly" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--twopass1readsN" - description: "number of reads to process for the 1st step. Use very large number\ - \ (or default -1) to map all reads in the first step." - info: null - example: - - -1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "WASP parameters" - arguments: - - type: "string" - name: "--waspOutputMode" - description: "WASP allele-specific output type. This is re-implementation of\ - \ the original WASP mappability filtering by Bryce van de Geijn, Graham McVicker,\ - \ Yoav Gilad & Jonathan K Pritchard. Please cite the original WASP paper:\ - \ Nature Methods 12, 1061-1063 (2015), https://www.nature.com/articles/nmeth.3582\ - \ .\n\n- SAMtag ... add WASP tags to the alignments that pass WASP filtering" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "STARsolo (single cell RNA-seq) parameters" - arguments: - - type: "string" - name: "--soloType" - description: "type of single-cell RNA-seq\n\n- CB_UMI_Simple ... (a.k.a. Droplet)\ - \ one UMI and one Cell Barcode of fixed length in read2, e.g. Drop-seq and\ - \ 10X Chromium.\n- CB_UMI_Complex ... multiple Cell Barcodes of varying length,\ - \ one UMI of fixed length and one adapter sequence of fixed length are allowed\ - \ in read2 only (e.g. inDrop, ddSeq).\n- CB_samTagOut ... output Cell Barcode\ - \ as CR and/or CB SAm tag. No UMI counting. --readFilesIn cDNA_read1 [cDNA_read2\ - \ if paired-end] CellBarcode_read . Requires --outSAMtype BAM Unsorted [and/or\ - \ SortedByCoordinate]\n- SmartSeq ... Smart-seq: each cell in a separate\ - \ FASTQ (paired- or single-end), barcodes are corresponding read-groups, no\ - \ UMI sequences, alignments deduplicated according to alignment start and\ - \ end (after extending soft-clipped bases)" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloCBwhitelist" - description: "file(s) with whitelist(s) of cell barcodes. Only --soloType CB_UMI_Complex\ - \ allows more than one whitelist file.\n\n- None ... no whitelist:\ - \ all cell barcodes are allowed" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--soloCBstart" - description: "cell barcode start base" - info: null - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--soloCBlen" - description: "cell barcode length" - info: null - example: - - 16 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--soloUMIstart" - description: "UMI start base" - info: null - example: - - 17 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--soloUMIlen" - description: "UMI length" - info: null - example: - - 10 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--soloBarcodeReadLength" - description: "length of the barcode read\n\n- 1 ... equal to sum of soloCBlen+soloUMIlen\n\ - - 0 ... not defined, do not check" - info: null - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--soloBarcodeMate" - description: "identifies which read mate contains the barcode (CB+UMI) sequence\n\ - \n- 0 ... barcode sequence is on separate read, which should always be the\ - \ last file in the --readFilesIn listed\n- 1 ... barcode sequence is a part\ - \ of mate 1\n- 2 ... barcode sequence is a part of mate 2" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--soloCBposition" - description: "position of Cell Barcode(s) on the barcode read.\n\nPresently\ - \ only works with --soloType CB_UMI_Complex, and barcodes are assumed to be\ - \ on Read2.\nFormat for each barcode: startAnchor_startPosition_endAnchor_endPosition\n\ - start(end)Anchor defines the Anchor Base for the CB: 0: read start; 1: read\ - \ end; 2: adapter start; 3: adapter end\nstart(end)Position is the 0-based\ - \ position with of the CB start(end) with respect to the Anchor Base\nString\ - \ for different barcodes are separated by space.\nExample: inDrop (Zilionis\ - \ et al, Nat. Protocols, 2017):\n--soloCBposition 0_0_2_-1 3_1_3_8" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloUMIposition" - description: "position of the UMI on the barcode read, same as soloCBposition\n\ - \nExample: inDrop (Zilionis et al, Nat. Protocols, 2017):\n--soloCBposition\ - \ 3_9_3_14" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--soloAdapterSequence" - description: "adapter sequence to anchor barcodes. Only one adapter sequence\ - \ is allowed." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--soloAdapterMismatchesNmax" - description: "maximum number of mismatches allowed in adapter sequence." - info: null - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--soloCBmatchWLtype" - description: "matching the Cell Barcodes to the WhiteList\n\n- Exact \ - \ ... only exact matches allowed\n- 1MM \ - \ ... only one match in whitelist with 1 mismatched base allowed.\ - \ Allowed CBs have to have at least one read with exact match.\n- 1MM_multi\ - \ ... multiple matches in whitelist with 1 mismatched\ - \ base allowed, posterior probability calculation is used choose one of the\ - \ matches.\nAllowed CBs have to have at least one read with exact match. This\ - \ option matches best with CellRanger 2.2.0\n- 1MM_multi_pseudocounts \ - \ ... same as 1MM_Multi, but pseudocounts of 1 are added to all whitelist\ - \ barcodes.\n- 1MM_multi_Nbase_pseudocounts ... same as 1MM_multi_pseudocounts,\ - \ multimatching to WL is allowed for CBs with N-bases. This option matches\ - \ best with CellRanger >= 3.0.0\n- EditDist_2 ... allow\ - \ up to edit distance of 3 fpr each of the barcodes. May include one deletion\ - \ + one insertion. Only works with --soloType CB_UMI_Complex. Matches to multiple\ - \ passlist barcdoes are not allowed. Similar to ParseBio Split-seq pipeline." - info: null - example: - - "1MM_multi" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--soloInputSAMattrBarcodeSeq" - description: "when inputting reads from a SAM file (--readsFileType SAM SE/PE),\ - \ these SAM attributes mark the barcode sequence (in proper order).\n\nFor\ - \ instance, for 10X CellRanger or STARsolo BAMs, use --soloInputSAMattrBarcodeSeq\ - \ CR UR .\nThis parameter is required when running STARsolo with input from\ - \ SAM." - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloInputSAMattrBarcodeQual" - description: "when inputting reads from a SAM file (--readsFileType SAM SE/PE),\ - \ these SAM attributes mark the barcode qualities (in proper order).\n\nFor\ - \ instance, for 10X CellRanger or STARsolo BAMs, use --soloInputSAMattrBarcodeQual\ - \ CY UY .\nIf this parameter is '-' (default), the quality 'H' will be assigned\ - \ to all bases." - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloStrand" - description: "strandedness of the solo libraries:\n\n- Unstranded ... no strand\ - \ information\n- Forward ... read strand same as the original RNA molecule\n\ - - Reverse ... read strand opposite to the original RNA molecule" - info: null - example: - - "Forward" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--soloFeatures" - description: "genomic features for which the UMI counts per Cell Barcode are\ - \ collected\n\n- Gene ... genes: reads match the gene transcript\n\ - - SJ ... splice junctions: reported in SJ.out.tab\n- GeneFull\ - \ ... full gene (pre-mRNA): count all reads overlapping genes' exons\ - \ and introns\n- GeneFull_ExonOverIntron ... full gene (pre-mRNA): count all\ - \ reads overlapping genes' exons and introns: prioritize 100% overlap with\ - \ exons\n- GeneFull_Ex50pAS ... full gene (pre-RNA): count all reads\ - \ overlapping genes' exons and introns: prioritize >50% overlap with exons.\ - \ Do not count reads with 100% exonic overlap in the antisense direction." - info: null - example: - - "Gene" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloMultiMappers" - description: "counting method for reads mapping to multiple genes\n\n- Unique\ - \ ... count only reads that map to unique genes\n- Uniform ... uniformly\ - \ distribute multi-genic UMIs to all genes\n- Rescue ... distribute UMIs\ - \ proportionally to unique+uniform counts (~ first iteration of EM)\n- PropUnique\ - \ ... distribute UMIs proportionally to unique mappers, if present, and uniformly\ - \ if not.\n- EM ... multi-gene UMIs are distributed using Expectation\ - \ Maximization algorithm" - info: null - example: - - "Unique" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloUMIdedup" - description: "type of UMI deduplication (collapsing) algorithm\n\n- 1MM_All\ - \ ... all UMIs with 1 mismatch distance to each other\ - \ are collapsed (i.e. counted once).\n- 1MM_Directional_UMItools ... follows\ - \ the \"directional\" method from the UMI-tools by Smith, Heger and Sudbery\ - \ (Genome Research 2017).\n- 1MM_Directional ... same as 1MM_Directional_UMItools,\ - \ but with more stringent criteria for duplicate UMIs\n- Exact \ - \ ... only exactly matching UMIs are collapsed.\n- NoDedup \ - \ ... no deduplication of UMIs, count all reads.\n- 1MM_CR\ - \ ... CellRanger2-4 algorithm for 1MM UMI collapsing." - info: null - example: - - "1MM_All" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloUMIfiltering" - description: "type of UMI filtering (for reads uniquely mapping to genes)\n\n\ - - - ... basic filtering: remove UMIs with N and homopolymers\ - \ (similar to CellRanger 2.2.0).\n- MultiGeneUMI ... basic + remove\ - \ lower-count UMIs that map to more than one gene.\n- MultiGeneUMI_All ...\ - \ basic + remove all UMIs that map to more than one gene.\n- MultiGeneUMI_CR\ - \ ... basic + remove lower-count UMIs that map to more than one gene, matching\ - \ CellRanger > 3.0.0 .\nOnly works with --soloUMIdedup 1MM_CR" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloOutFileNames" - description: "file names for STARsolo output:\n\nfile_name_prefix gene_names\ - \ barcode_sequences cell_feature_count_matrix" - info: null - example: - - "Solo.out/" - - "features.tsv" - - "barcodes.tsv" - - "matrix.mtx" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloCellFilter" - description: "cell filtering type and parameters\n\n- None ... do\ - \ not output filtered cells\n- TopCells ... only report top cells by\ - \ UMI count, followed by the exact number of cells\n- CellRanger2.2 ...\ - \ simple filtering of CellRanger 2.2.\nCan be followed by numbers: number\ - \ of expected cells, robust maximum percentile for UMI count, maximum to minimum\ - \ ratio for UMI count\nThe harcoded values are from CellRanger: nExpectedCells=3000;\ - \ maxPercentile=0.99; maxMinRatio=10\n- EmptyDrops_CR ... EmptyDrops filtering\ - \ in CellRanger flavor. Please cite the original EmptyDrops paper: A.T.L Lun\ - \ et al, Genome Biology, 20, 63 (2019): https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1662-y\n\ - Can be followed by 10 numeric parameters: nExpectedCells maxPercentile\ - \ maxMinRatio indMin indMax umiMin umiMinFracMedian candMaxN \ - \ FDR simN\nThe harcoded values are from CellRanger: 3000 \ - \ 0.99 10 45000 90000 500 0.01\ - \ 20000 0.01 10000" - info: null - example: - - "CellRanger2.2" - - "3000" - - "0.99" - - "10" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloOutFormatFeaturesGeneField3" - description: "field 3 in the Gene features.tsv file. If \"-\", then no 3rd field\ - \ is output." - info: null - example: - - "Gene Expression" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloCellReadStats" - description: "Output reads statistics for each CB\n\n- Standard ... standard\ - \ output" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Align fastq files using STAR." - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/cellranger_tiny_fastq" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "docker" - env: - - "STAR_VERSION 2.7.10b" - - "PACKAGES gcc g++ make wget zlib1g-dev unzip" - - type: "docker" - run: - - "apt-get update && \\\n apt-get install -y --no-install-recommends ${PACKAGES}\ - \ && \\\n cd /tmp && \\\n wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip\ - \ && \\\n unzip ${STAR_VERSION}.zip && \\\n cd STAR-${STAR_VERSION}/source\ - \ && \\\n make STARstatic CXXFLAGS_SIMD=-std=c++11 && \\\n cp STAR /usr/local/bin\ - \ && \\\n cd / && \\\n rm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip\ - \ && \\\n apt-get --purge autoremove -y ${PACKAGES} && \\\n apt-get clean\n" - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highmem" - - "highcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/mapping/star_align/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/mapping/star_align" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/mapping/star_align/star_align" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/mapping/star_align/setup_logger.py b/target/docker/mapping/star_align/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/docker/mapping/star_align/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/docker/mapping/star_align/star_align b/target/docker/mapping/star_align/star_align deleted file mode 100755 index b05bbaaa1e4..00000000000 --- a/target/docker/mapping/star_align/star_align +++ /dev/null @@ -1,5713 +0,0 @@ -#!/usr/bin/env bash - -# star_align 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Angela Oliveira Pisco (author) -# * Robrecht Cannoodt (author, maintainer) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="star_align" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "star_align 0.12.3" - echo "" - echo "Align fastq files using STAR." - echo "" - echo "Input/Output:" - echo " --readFilesIn, --input" - echo " type: file, required parameter, multiple values allowed, file must exist" - echo " example:" - echo "mysample_S1_L001_R1_001.fastq.gz;mysample_S1_L001_R2_001.fastq.gz" - echo " The FASTQ files to be analyzed. Corresponds to the --readFilesIn" - echo " argument in the STAR command." - echo "" - echo " --genomeDir, --reference" - echo " type: file, required parameter, file must exist" - echo " example: /path/to/reference" - echo " Path to the reference built by star_build_reference. Corresponds to the" - echo " --genomeDir argument in the STAR command." - echo "" - echo " --outFileNamePrefix, --output" - echo " type: file, required parameter, output, file must exist" - echo " example: /path/to/foo" - echo " Path to output directory. Corresponds to the --outFileNamePrefix" - echo " argument in the STAR command." - echo "" - echo "Run Parameters:" - echo " --runRNGseed" - echo " type: integer" - echo " example: 777" - echo " random number generator seed." - echo "" - echo "Genome Parameters:" - echo " --genomeLoad" - echo " type: string" - echo " example: NoSharedMemory" - echo " mode of shared memory usage for the genome files. Only used with" - echo " --runMode alignReads." - echo " - LoadAndKeep ... load genome into shared and keep it in memory" - echo " after run" - echo " - LoadAndRemove ... load genome into shared but remove it after run" - echo " - LoadAndExit ... load genome into shared memory and exit, keeping" - echo " the genome in memory for future runs" - echo " - Remove ... do not map anything, just remove loaded genome" - echo " from memory" - echo " - NoSharedMemory ... do not use shared memory, each job will have its" - echo " own private copy of the genome" - echo "" - echo " --genomeFastaFiles" - echo " type: file, multiple values allowed, file must exist" - echo " path(s) to the fasta files with the genome sequences, separated by" - echo " spaces. These files should be plain text FASTA files, they *cannot* be" - echo " zipped." - echo " Required for the genome generation (--runMode genomeGenerate). Can also" - echo " be used in the mapping (--runMode alignReads) to add extra (new)" - echo " sequences to the genome (e.g. spike-ins)." - echo "" - echo " --genomeFileSizes" - echo " type: integer, multiple values allowed" - echo " example: 0" - echo " genome files exact sizes in bytes. Typically, this should not be defined" - echo " by the user." - echo "" - echo " --genomeTransformOutput" - echo " type: string, multiple values allowed" - echo " which output to transform back to original genome" - echo " - SAM ... SAM/BAM alignments" - echo " - SJ ... splice junctions (SJ.out.tab)" - echo " - None ... no transformation of the output" - echo "" - echo " --genomeChrSetMitochondrial" - echo " type: string, multiple values allowed" - echo " example: chrM;M;MT" - echo " names of the mitochondrial chromosomes. Presently only used for STARsolo" - echo " statistics output/" - echo "" - echo "Splice Junctions Database:" - echo " --sjdbFileChrStartEnd" - echo " type: string, multiple values allowed" - echo " path to the files with genomic coordinates (chr start end" - echo " strand) for the splice junction introns. Multiple files can be" - echo " supplied and will be concatenated." - echo "" - echo " --sjdbGTFfile" - echo " type: file, file must exist" - echo " path to the GTF file with annotations" - echo "" - echo " --sjdbGTFchrPrefix" - echo " type: string" - echo " prefix for chromosome names in a GTF file (e.g. 'chr' for using ENSMEBL" - echo " annotations with UCSC genomes)" - echo "" - echo " --sjdbGTFfeatureExon" - echo " type: string" - echo " example: exon" - echo " feature type in GTF file to be used as exons for building transcripts" - echo "" - echo " --sjdbGTFtagExonParentTranscript" - echo " type: string" - echo " example: transcript_id" - echo " GTF attribute name for parent transcript ID (default \"transcript_id\"" - echo " works for GTF files)" - echo "" - echo " --sjdbGTFtagExonParentGene" - echo " type: string" - echo " example: gene_id" - echo " GTF attribute name for parent gene ID (default \"gene_id\" works for GTF" - echo " files)" - echo "" - echo " --sjdbGTFtagExonParentGeneName" - echo " type: string, multiple values allowed" - echo " example: gene_name" - echo " GTF attribute name for parent gene name" - echo "" - echo " --sjdbGTFtagExonParentGeneType" - echo " type: string, multiple values allowed" - echo " example: gene_type;gene_biotype" - echo " GTF attribute name for parent gene type" - echo "" - echo " --sjdbOverhang" - echo " type: integer" - echo " example: 100" - echo " length of the donor/acceptor sequence on each side of the junctions," - echo " ideally = (mate_length - 1)" - echo "" - echo " --sjdbScore" - echo " type: integer" - echo " example: 2" - echo " extra alignment score for alignments that cross database junctions" - echo "" - echo " --sjdbInsertSave" - echo " type: string" - echo " example: Basic" - echo " which files to save when sjdb junctions are inserted on the fly at the" - echo " mapping step" - echo " - Basic ... only small junction / transcript files" - echo " - All ... all files including big Genome, SA and SAindex - this will" - echo " create a complete genome directory" - echo "" - echo "Variation parameters:" - echo " --varVCFfile" - echo " type: string" - echo " path to the VCF file that contains variation data. The 10th column" - echo " should contain the genotype information, e.g. 0/1" - echo "" - echo "Read Parameters:" - echo " --readFilesType" - echo " type: string" - echo " example: Fastx" - echo " format of input read files" - echo " - Fastx ... FASTA or FASTQ" - echo " - SAM SE ... SAM or BAM single-end reads; for BAM use" - echo " --readFilesCommand samtools view" - echo " - SAM PE ... SAM or BAM paired-end reads; for BAM use" - echo " --readFilesCommand samtools view" - echo "" - echo " --readFilesSAMattrKeep" - echo " type: string, multiple values allowed" - echo " example: All" - echo " for --readFilesType SAM SE/PE, which SAM tags to keep in the output BAM," - echo " e.g.: --readFilesSAMtagsKeep RG PL" - echo " - All ... keep all tags" - echo " - None ... do not keep any tags" - echo "" - echo " --readFilesManifest" - echo " type: file, file must exist" - echo " path to the \"manifest\" file with the names of read files. The manifest" - echo " file should contain 3 tab-separated columns:" - echo " paired-end reads: read1_file_name \$tab\$ read2_file_name \$tab\$" - echo " read_group_line." - echo " single-end reads: read1_file_name \$tab\$ - \$tab\$" - echo " read_group_line." - echo " Spaces, but not tabs are allowed in file names." - echo " If read_group_line does not start with ID:, it can only contain one ID" - echo " field, and ID: will be added to it." - echo " If read_group_line starts with ID:, it can contain several fields" - echo " separated by \$tab\$, and all fields will be be copied verbatim into SAM" - echo " @RG header line." - echo "" - echo " --readFilesPrefix" - echo " type: string" - echo " prefix for the read files names, i.e. it will be added in front of the" - echo " strings in --readFilesIn" - echo "" - echo " --readFilesCommand" - echo " type: string, multiple values allowed" - echo " command line to execute for each of the input file. This command should" - echo " generate FASTA or FASTQ text and send it to stdout" - echo " For example: zcat - to uncompress .gz files, bzcat - to uncompress .bz2" - echo " files, etc." - echo "" - echo " --readMapNumber" - echo " type: integer" - echo " example: -1" - echo " number of reads to map from the beginning of the file" - echo " -1: map all reads" - echo "" - echo " --readMatesLengthsIn" - echo " type: string" - echo " example: NotEqual" - echo " Equal/NotEqual - lengths of names,sequences,qualities for both mates are" - echo " the same / not the same. NotEqual is safe in all situations." - echo "" - echo " --readNameSeparator" - echo " type: string, multiple values allowed" - echo " example: /" - echo " character(s) separating the part of the read names that will be trimmed" - echo " in output (read name after space is always trimmed)" - echo "" - echo " --readQualityScoreBase" - echo " type: integer" - echo " example: 33" - echo " number to be subtracted from the ASCII code to get Phred quality score" - echo "" - echo "Read Clipping:" - echo " --clipAdapterType" - echo " type: string" - echo " example: Hamming" - echo " adapter clipping type" - echo " - Hamming ... adapter clipping based on Hamming distance, with the" - echo " number of mismatches controlled by --clip5pAdapterMMp" - echo " - CellRanger4 ... 5p and 3p adapter clipping similar to CellRanger4." - echo " Utilizes Opal package by Martin Sosic: https://github.com/Martinsos/opal" - echo " - None ... no adapter clipping, all other clip* parameters are" - echo " disregarded" - echo "" - echo " --clip3pNbases" - echo " type: integer, multiple values allowed" - echo " example: 0" - echo " number(s) of bases to clip from 3p of each mate. If one value is given," - echo " it will be assumed the same for both mates." - echo "" - echo " --clip3pAdapterSeq" - echo " type: string, multiple values allowed" - echo " adapter sequences to clip from 3p of each mate. If one value is given," - echo " it will be assumed the same for both mates." - echo " - polyA ... polyA sequence with the length equal to read length" - echo "" - echo " --clip3pAdapterMMp" - echo " type: double, multiple values allowed" - echo " example: 0.1" - echo " max proportion of mismatches for 3p adapter clipping for each mate. If" - echo " one value is given, it will be assumed the same for both mates." - echo "" - echo " --clip3pAfterAdapterNbases" - echo " type: integer, multiple values allowed" - echo " example: 0" - echo " number of bases to clip from 3p of each mate after the adapter clipping." - echo " If one value is given, it will be assumed the same for both mates." - echo "" - echo " --clip5pNbases" - echo " type: integer, multiple values allowed" - echo " example: 0" - echo " number(s) of bases to clip from 5p of each mate. If one value is given," - echo " it will be assumed the same for both mates." - echo "" - echo "Limits:" - echo " --limitGenomeGenerateRAM" - echo " type: long" - echo " example: 31000000000" - echo " maximum available RAM (bytes) for genome generation" - echo "" - echo " --limitIObufferSize" - echo " type: long, multiple values allowed" - echo " example: 30000000;50000000" - echo " max available buffers size (bytes) for input/output, per thread" - echo "" - echo " --limitOutSAMoneReadBytes" - echo " type: long" - echo " example: 100000" - echo " max size of the SAM record (bytes) for one read. Recommended value:" - echo " >(2*(LengthMate1+LengthMate2+100)*outFilterMultimapNmax" - echo "" - echo " --limitOutSJoneRead" - echo " type: integer" - echo " example: 1000" - echo " max number of junctions for one read (including all multi-mappers)" - echo "" - echo " --limitOutSJcollapsed" - echo " type: integer" - echo " example: 1000000" - echo " max number of collapsed junctions" - echo "" - echo " --limitBAMsortRAM" - echo " type: long" - echo " example: 0" - echo " maximum available RAM (bytes) for sorting BAM. If =0, it will be set to" - echo " the genome index size. 0 value can only be used with --genomeLoad" - echo " NoSharedMemory option." - echo "" - echo " --limitSjdbInsertNsj" - echo " type: integer" - echo " example: 1000000" - echo " maximum number of junctions to be inserted to the genome on the fly at" - echo " the mapping stage, including those from annotations and those detected" - echo " in the 1st step of the 2-pass run" - echo "" - echo " --limitNreadsSoft" - echo " type: integer" - echo " example: -1" - echo " soft limit on the number of reads" - echo "" - echo "Output: general:" - echo " --outTmpKeep" - echo " type: string" - echo " whether to keep the temporary files after STAR runs is finished" - echo " - None ... remove all temporary files" - echo " - All ... keep all files" - echo "" - echo " --outStd" - echo " type: string" - echo " example: Log" - echo " which output will be directed to stdout (standard out)" - echo " - Log ... log messages" - echo " - SAM ... alignments in SAM format (which normally" - echo " are output to Aligned.out.sam file), normal standard output will go into" - echo " Log.std.out" - echo " - BAM_Unsorted ... alignments in BAM format, unsorted." - echo " Requires --outSAMtype BAM Unsorted" - echo " - BAM_SortedByCoordinate ... alignments in BAM format, sorted by" - echo " coordinate. Requires --outSAMtype BAM SortedByCoordinate" - echo " - BAM_Quant ... alignments to transcriptome in BAM format," - echo " unsorted. Requires --quantMode TranscriptomeSAM" - echo "" - echo " --outReadsUnmapped" - echo " type: string" - echo " output of unmapped and partially mapped (i.e. mapped only one mate of a" - echo " paired end read) reads in separate file(s)." - echo " - None ... no output" - echo " - Fastx ... output in separate fasta/fastq files, Unmapped.out.mate1/2" - echo "" - echo " --outQSconversionAdd" - echo " type: integer" - echo " example: 0" - echo " add this number to the quality score (e.g. to convert from Illumina to" - echo " Sanger, use -31)" - echo "" - echo " --outMultimapperOrder" - echo " type: string" - echo " example: Old_2.4" - echo " order of multimapping alignments in the output files" - echo " - Old_2.4 ... quasi-random order used before 2.5.0" - echo " - Random ... random order of alignments for each" - echo " multi-mapper. Read mates (pairs) are always adjacent, all alignment for" - echo " each read stay together. This option will become default in the future" - echo " releases." - echo "" - echo "Output: SAM and BAM:" - echo " --outSAMtype" - echo " type: string, multiple values allowed" - echo " example: SAM" - echo " type of SAM/BAM output" - echo " 1st word:" - echo " - BAM ... output BAM without sorting" - echo " - SAM ... output SAM without sorting" - echo " - None ... no SAM/BAM output" - echo " 2nd, 3rd:" - echo " - Unsorted ... standard unsorted" - echo " - SortedByCoordinate ... sorted by coordinate. This option will allocate" - echo " extra memory for sorting which can be specified by --limitBAMsortRAM." - echo "" - echo " --outSAMmode" - echo " type: string" - echo " example: Full" - echo " mode of SAM output" - echo " - None ... no SAM output" - echo " - Full ... full SAM output" - echo " - NoQS ... full SAM but without quality scores" - echo "" - echo " --outSAMstrandField" - echo " type: string" - echo " Cufflinks-like strand field flag" - echo " - None ... not used" - echo " - intronMotif ... strand derived from the intron motif. This option" - echo " changes the output alignments: reads with inconsistent and/or" - echo " non-canonical introns are filtered out." - echo "" - echo " --outSAMattributes" - echo " type: string, multiple values allowed" - echo " example: Standard" - echo " a string of desired SAM attributes, in the order desired for the output" - echo " SAM. Tags can be listed in any combination/order." - echo " ***Presets:" - echo " - None ... no attributes" - echo " - Standard ... NH HI AS nM" - echo " - All ... NH HI AS nM NM MD jM jI MC ch" - echo " ***Alignment:" - echo " - NH ... number of loci the reads maps to: =1 for unique" - echo " mappers, >1 for multimappers. Standard SAM tag." - echo " - HI ... multiple alignment index, starts with" - echo " --outSAMattrIHstart (=1 by default). Standard SAM tag." - echo " - AS ... local alignment score, +1/-1 for matches/mismateches," - echo " score* penalties for indels and gaps. For PE reads, total score for two" - echo " mates. Stadnard SAM tag." - echo " - nM ... number of mismatches. For PE reads, sum over two" - echo " mates." - echo " - NM ... edit distance to the reference (number of mismatched +" - echo " inserted + deleted bases) for each mate. Standard SAM tag." - echo " - MD ... string encoding mismatched and deleted reference bases" - echo " (see standard SAM specifications). Standard SAM tag." - echo " - jM ... intron motifs for all junctions (i.e. N in CIGAR): 0:" - echo " non-canonical; 1: GT/AG, 2: CT/AC, 3: GC/AG, 4: CT/GC, 5: AT/AC, 6:" - echo " GT/AT. If splice junctions database is used, and a junction is" - echo " annotated, 20 is added to its motif value." - echo " - jI ... start and end of introns for all junctions (1-based)." - echo " - XS ... alignment strand according to --outSAMstrandField." - echo " - MC ... mate's CIGAR string. Standard SAM tag." - echo " - ch ... marks all segment of all chimeric alingments for" - echo " --chimOutType WithinBAM output." - echo " - cN ... number of bases clipped from the read ends: 5' and 3'" - echo " ***Variation:" - echo " - vA ... variant allele" - echo " - vG ... genomic coordinate of the variant overlapped by the" - echo " read." - echo " - vW ... 1 - alignment passes WASP filtering; 2,3,4,5,6,7 -" - echo " alignment does not pass WASP filtering. Requires --waspOutputMode" - echo " SAMtag." - echo " ***STARsolo:" - echo " - CR CY UR UY ... sequences and quality scores of cell barcodes and UMIs" - echo " for the solo* demultiplexing." - echo " - GX GN ... gene ID and gene name for unique-gene reads." - echo " - gx gn ... gene IDs and gene names for unique- and multi-gene" - echo " reads." - echo " - CB UB ... error-corrected cell barcodes and UMIs for solo*" - echo " demultiplexing. Requires --outSAMtype BAM SortedByCoordinate." - echo " - sM ... assessment of CB and UMI." - echo " - sS ... sequence of the entire barcode (CB,UMI,adapter)." - echo " - sQ ... quality of the entire barcode." - echo " ***Unsupported/undocumented:" - echo " - ha ... haplotype (1/2) when mapping to the diploid genome." - echo " Requires genome generated with --genomeTransformType Diploid ." - echo " - rB ... alignment block read/genomic coordinates." - echo " - vR ... read coordinate of the variant." - echo "" - echo " --outSAMattrIHstart" - echo " type: integer" - echo " example: 1" - echo " start value for the IH attribute. 0 may be required by some downstream" - echo " software, such as Cufflinks or StringTie." - echo "" - echo " --outSAMunmapped" - echo " type: string, multiple values allowed" - echo " output of unmapped reads in the SAM format" - echo " 1st word:" - echo " - None ... no output" - echo " - Within ... output unmapped reads within the main SAM file (i.e." - echo " Aligned.out.sam)" - echo " 2nd word:" - echo " - KeepPairs ... record unmapped mate for each alignment, and, in case of" - echo " unsorted output, keep it adjacent to its mapped mate. Only affects" - echo " multi-mapping reads." - echo "" - echo " --outSAMorder" - echo " type: string" - echo " example: Paired" - echo " type of sorting for the SAM output" - echo " Paired: one mate after the other for all paired alignments" - echo " PairedKeepInputOrder: one mate after the other for all paired" - echo " alignments, the order is kept the same as in the input FASTQ files" - echo "" - echo " --outSAMprimaryFlag" - echo " type: string" - echo " example: OneBestScore" - echo " which alignments are considered primary - all others will be marked with" - echo " 0x100 bit in the FLAG" - echo " - OneBestScore ... only one alignment with the best score is primary" - echo " - AllBestScore ... all alignments with the best score are primary" - echo "" - echo " --outSAMreadID" - echo " type: string" - echo " example: Standard" - echo " read ID record type" - echo " - Standard ... first word (until space) from the FASTx read ID line," - echo " removing /1,/2 from the end" - echo " - Number ... read number (index) in the FASTx file" - echo "" - echo " --outSAMmapqUnique" - echo " type: integer" - echo " example: 255" - echo " 0 to 255: the MAPQ value for unique mappers" - echo "" - echo " --outSAMflagOR" - echo " type: integer" - echo " example: 0" - echo " 0 to 65535: sam FLAG will be bitwise OR'd with this value, i.e." - echo " FLAG=FLAG | outSAMflagOR. This is applied after all flags have been set" - echo " by STAR, and after outSAMflagAND. Can be used to set specific bits that" - echo " are not set otherwise." - echo "" - echo " --outSAMflagAND" - echo " type: integer" - echo " example: 65535" - echo " 0 to 65535: sam FLAG will be bitwise AND'd with this value, i.e." - echo " FLAG=FLAG & outSAMflagOR. This is applied after all flags have been set" - echo " by STAR, but before outSAMflagOR. Can be used to unset specific bits" - echo " that are not set otherwise." - echo "" - echo " --outSAMattrRGline" - echo " type: string, multiple values allowed" - echo " SAM/BAM read group line. The first word contains the read group" - echo " identifier and must start with \"ID:\", e.g. --outSAMattrRGline ID:xxx" - echo " CN:yy \"DS:z z z\"." - echo " xxx will be added as RG tag to each output alignment. Any spaces in the" - echo " tag values have to be double quoted." - echo " Comma separated RG lines correspons to different (comma separated) input" - echo " files in --readFilesIn. Commas have to be surrounded by spaces, e.g." - echo " --outSAMattrRGline ID:xxx , ID:zzz \"DS:z z\" , ID:yyy DS:yyyy" - echo "" - echo " --outSAMheaderHD" - echo " type: string, multiple values allowed" - echo " @HD (header) line of the SAM header" - echo "" - echo " --outSAMheaderPG" - echo " type: string, multiple values allowed" - echo " extra @PG (software) line of the SAM header (in addition to STAR)" - echo "" - echo " --outSAMheaderCommentFile" - echo " type: string" - echo " path to the file with @CO (comment) lines of the SAM header" - echo "" - echo " --outSAMfilter" - echo " type: string, multiple values allowed" - echo " filter the output into main SAM/BAM files" - echo " - KeepOnlyAddedReferences ... only keep the reads for which all" - echo " alignments are to the extra reference sequences added with" - echo " --genomeFastaFiles at the mapping stage." - echo " - KeepAllAddedReferences ... keep all alignments to the extra reference" - echo " sequences added with --genomeFastaFiles at the mapping stage." - echo "" - echo " --outSAMmultNmax" - echo " type: integer" - echo " example: -1" - echo " max number of multiple alignments for a read that will be output to the" - echo " SAM/BAM files. Note that if this value is not equal to -1, the top" - echo " scoring alignment will be output first" - echo " - -1 ... all alignments (up to --outFilterMultimapNmax) will be output" - echo "" - echo " --outSAMtlen" - echo " type: integer" - echo " example: 1" - echo " calculation method for the TLEN field in the SAM/BAM files" - echo " - 1 ... leftmost base of the (+)strand mate to rightmost base of the" - echo " (-)mate. (+)sign for the (+)strand mate" - echo " - 2 ... leftmost base of any mate to rightmost base of any mate. (+)sign" - echo " for the mate with the leftmost base. This is different from 1 for" - echo " overlapping mates with protruding ends" - echo "" - echo " --outBAMcompression" - echo " type: integer" - echo " example: 1" - echo " -1 to 10 BAM compression level, -1=default compression (6?), 0=no" - echo " compression, 10=maximum compression" - echo "" - echo " --outBAMsortingThreadN" - echo " type: integer" - echo " example: 0" - echo " >=0: number of threads for BAM sorting. 0 will default to" - echo " min(6,--runThreadN)." - echo "" - echo " --outBAMsortingBinsN" - echo " type: integer" - echo " example: 50" - echo " >0: number of genome bins for coordinate-sorting" - echo "" - echo "BAM processing:" - echo " --bamRemoveDuplicatesType" - echo " type: string" - echo " mark duplicates in the BAM file, for now only works with (i) sorted BAM" - echo " fed with inputBAMfile, and (ii) for paired-end alignments only" - echo " - - ... no duplicate removal/marking" - echo " - UniqueIdentical ... mark all multimappers, and duplicate" - echo " unique mappers. The coordinates, FLAG, CIGAR must be identical" - echo " - UniqueIdenticalNotMulti ... mark duplicate unique mappers but not" - echo " multimappers." - echo "" - echo " --bamRemoveDuplicatesMate2basesN" - echo " type: integer" - echo " example: 0" - echo " number of bases from the 5' of mate 2 to use in collapsing (e.g. for" - echo " RAMPAGE)" - echo "" - echo "Output Wiggle:" - echo " --outWigType" - echo " type: string, multiple values allowed" - echo " type of signal output, e.g. \"bedGraph\" OR \"bedGraph read1_5p\". Requires" - echo " sorted BAM: --outSAMtype BAM SortedByCoordinate ." - echo " 1st word:" - echo " - None ... no signal output" - echo " - bedGraph ... bedGraph format" - echo " - wiggle ... wiggle format" - echo " 2nd word:" - echo " - read1_5p ... signal from only 5' of the 1st read, useful for" - echo " CAGE/RAMPAGE etc" - echo " - read2 ... signal from only 2nd read" - echo "" - echo " --outWigStrand" - echo " type: string" - echo " example: Stranded" - echo " strandedness of wiggle/bedGraph output" - echo " - Stranded ... separate strands, str1 and str2" - echo " - Unstranded ... collapsed strands" - echo "" - echo " --outWigReferencesPrefix" - echo " type: string" - echo " prefix matching reference names to include in the output wiggle file," - echo " e.g. \"chr\", default \"-\" - include all references" - echo "" - echo " --outWigNorm" - echo " type: string" - echo " example: RPM" - echo " type of normalization for the signal" - echo " - RPM ... reads per million of mapped reads" - echo " - None ... no normalization, \"raw\" counts" - echo "" - echo "Output Filtering:" - echo " --outFilterType" - echo " type: string" - echo " example: Normal" - echo " type of filtering" - echo " - Normal ... standard filtering using only current alignment" - echo " - BySJout ... keep only those reads that contain junctions that passed" - echo " filtering into SJ.out.tab" - echo "" - echo " --outFilterMultimapScoreRange" - echo " type: integer" - echo " example: 1" - echo " the score range below the maximum score for multimapping alignments" - echo "" - echo " --outFilterMultimapNmax" - echo " type: integer" - echo " example: 10" - echo " maximum number of loci the read is allowed to map to. Alignments (all of" - echo " them) will be output only if the read maps to no more loci than this" - echo " value." - echo " Otherwise no alignments will be output, and the read will be counted as" - echo " \"mapped to too many loci\" in the Log.final.out ." - echo "" - echo " --outFilterMismatchNmax" - echo " type: integer" - echo " example: 10" - echo " alignment will be output only if it has no more mismatches than this" - echo " value." - echo "" - echo " --outFilterMismatchNoverLmax" - echo " type: double" - echo " example: 0.3" - echo " alignment will be output only if its ratio of mismatches to *mapped*" - echo " length is less than or equal to this value." - echo "" - echo " --outFilterMismatchNoverReadLmax" - echo " type: double" - echo " example: 1.0" - echo " alignment will be output only if its ratio of mismatches to *read*" - echo " length is less than or equal to this value." - echo "" - echo " --outFilterScoreMin" - echo " type: integer" - echo " example: 0" - echo " alignment will be output only if its score is higher than or equal to" - echo " this value." - echo "" - echo " --outFilterScoreMinOverLread" - echo " type: double" - echo " example: 0.66" - echo " same as outFilterScoreMin, but normalized to read length (sum of mates'" - echo " lengths for paired-end reads)" - echo "" - echo " --outFilterMatchNmin" - echo " type: integer" - echo " example: 0" - echo " alignment will be output only if the number of matched bases is higher" - echo " than or equal to this value." - echo "" - echo " --outFilterMatchNminOverLread" - echo " type: double" - echo " example: 0.66" - echo " sam as outFilterMatchNmin, but normalized to the read length (sum of" - echo " mates' lengths for paired-end reads)." - echo "" - echo " --outFilterIntronMotifs" - echo " type: string" - echo " filter alignment using their motifs" - echo " - None ... no filtering" - echo " - RemoveNoncanonical ... filter out alignments that contain" - echo " non-canonical junctions" - echo " - RemoveNoncanonicalUnannotated ... filter out alignments that contain" - echo " non-canonical unannotated junctions when using annotated splice" - echo " junctions database. The annotated non-canonical junctions will be kept." - echo "" - echo " --outFilterIntronStrands" - echo " type: string" - echo " example: RemoveInconsistentStrands" - echo " filter alignments" - echo " - RemoveInconsistentStrands ... remove alignments that have" - echo " junctions with inconsistent strands" - echo " - None ... no filtering" - echo "" - echo "Output splice junctions (SJ.out.tab):" - echo " --outSJtype" - echo " type: string" - echo " example: Standard" - echo " type of splice junction output" - echo " - Standard ... standard SJ.out.tab output" - echo " - None ... no splice junction output" - echo "" - echo "Output Filtering: Splice Junctions:" - echo " --outSJfilterReads" - echo " type: string" - echo " example: All" - echo " which reads to consider for collapsed splice junctions output" - echo " - All ... all reads, unique- and multi-mappers" - echo " - Unique ... uniquely mapping reads only" - echo "" - echo " --outSJfilterOverhangMin" - echo " type: integer, multiple values allowed" - echo " example: 30;12;12;12" - echo " minimum overhang length for splice junctions on both sides for: (1)" - echo " non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC" - echo " motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif" - echo " does not apply to annotated junctions" - echo "" - echo " --outSJfilterCountUniqueMin" - echo " type: integer, multiple values allowed" - echo " example: 3;1;1;1" - echo " minimum uniquely mapping read count per junction for: (1) non-canonical" - echo " motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC" - echo " and GT/AT motif. -1 means no output for that motif" - echo " Junctions are output if one of outSJfilterCountUniqueMin OR" - echo " outSJfilterCountTotalMin conditions are satisfied" - echo " does not apply to annotated junctions" - echo "" - echo " --outSJfilterCountTotalMin" - echo " type: integer, multiple values allowed" - echo " example: 3;1;1;1" - echo " minimum total (multi-mapping+unique) read count per junction for: (1)" - echo " non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC" - echo " motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif" - echo " Junctions are output if one of outSJfilterCountUniqueMin OR" - echo " outSJfilterCountTotalMin conditions are satisfied" - echo " does not apply to annotated junctions" - echo "" - echo " --outSJfilterDistToOtherSJmin" - echo " type: integer, multiple values allowed" - echo " example: 10;0;5;10" - echo " minimum allowed distance to other junctions' donor/acceptor" - echo " does not apply to annotated junctions" - echo "" - echo " --outSJfilterIntronMaxVsReadN" - echo " type: integer, multiple values allowed" - echo " example: 50000;100000;200000" - echo " maximum gap allowed for junctions supported by 1,2,3,,,N reads" - echo " i.e. by default junctions supported by 1 read can have gaps <=50000b, by" - echo " 2 reads: <=100000b, by 3 reads: <=200000. by >=4 reads any gap" - echo " <=alignIntronMax" - echo " does not apply to annotated junctions" - echo "" - echo "Scoring:" - echo " --scoreGap" - echo " type: integer" - echo " example: 0" - echo " splice junction penalty (independent on intron motif)" - echo "" - echo " --scoreGapNoncan" - echo " type: integer" - echo " example: -8" - echo " non-canonical junction penalty (in addition to scoreGap)" - echo "" - echo " --scoreGapGCAG" - echo " type: integer" - echo " example: -4" - echo " GC/AG and CT/GC junction penalty (in addition to scoreGap)" - echo "" - echo " --scoreGapATAC" - echo " type: integer" - echo " example: -8" - echo " AT/AC and GT/AT junction penalty (in addition to scoreGap)" - echo "" - echo " --scoreGenomicLengthLog2scale" - echo " type: integer" - echo " example: 0" - echo " extra score logarithmically scaled with genomic length of the alignment:" - echo " scoreGenomicLengthLog2scale*log2(genomicLength)" - echo "" - echo " --scoreDelOpen" - echo " type: integer" - echo " example: -2" - echo " deletion open penalty" - echo "" - echo " --scoreDelBase" - echo " type: integer" - echo " example: -2" - echo " deletion extension penalty per base (in addition to scoreDelOpen)" - echo "" - echo " --scoreInsOpen" - echo " type: integer" - echo " example: -2" - echo " insertion open penalty" - echo "" - echo " --scoreInsBase" - echo " type: integer" - echo " example: -2" - echo " insertion extension penalty per base (in addition to scoreInsOpen)" - echo "" - echo " --scoreStitchSJshift" - echo " type: integer" - echo " example: 1" - echo " maximum score reduction while searching for SJ boundaries in the" - echo " stitching step" - echo "" - echo "Alignments and Seeding:" - echo " --seedSearchStartLmax" - echo " type: integer" - echo " example: 50" - echo " defines the search start point through the read - the read is split into" - echo " pieces no longer than this value" - echo "" - echo " --seedSearchStartLmaxOverLread" - echo " type: double" - echo " example: 1.0" - echo " seedSearchStartLmax normalized to read length (sum of mates' lengths for" - echo " paired-end reads)" - echo "" - echo " --seedSearchLmax" - echo " type: integer" - echo " example: 0" - echo " defines the maximum length of the seeds, if =0 seed length is not" - echo " limited" - echo "" - echo " --seedMultimapNmax" - echo " type: integer" - echo " example: 10000" - echo " only pieces that map fewer than this value are utilized in the stitching" - echo " procedure" - echo "" - echo " --seedPerReadNmax" - echo " type: integer" - echo " example: 1000" - echo " max number of seeds per read" - echo "" - echo " --seedPerWindowNmax" - echo " type: integer" - echo " example: 50" - echo " max number of seeds per window" - echo "" - echo " --seedNoneLociPerWindow" - echo " type: integer" - echo " example: 10" - echo " max number of one seed loci per window" - echo "" - echo " --seedSplitMin" - echo " type: integer" - echo " example: 12" - echo " min length of the seed sequences split by Ns or mate gap" - echo "" - echo " --seedMapMin" - echo " type: integer" - echo " example: 5" - echo " min length of seeds to be mapped" - echo "" - echo " --alignIntronMin" - echo " type: integer" - echo " example: 21" - echo " minimum intron size, genomic gap is considered intron if its" - echo " length>=alignIntronMin, otherwise it is considered Deletion" - echo "" - echo " --alignIntronMax" - echo " type: integer" - echo " example: 0" - echo " maximum intron size, if 0, max intron size will be determined by" - echo " (2^winBinNbits)*winAnchorDistNbins" - echo "" - echo " --alignMatesGapMax" - echo " type: integer" - echo " example: 0" - echo " maximum gap between two mates, if 0, max intron gap will be determined" - echo " by (2^winBinNbits)*winAnchorDistNbins" - echo "" - echo " --alignSJoverhangMin" - echo " type: integer" - echo " example: 5" - echo " minimum overhang (i.e. block size) for spliced alignments" - echo "" - echo " --alignSJstitchMismatchNmax" - echo " type: integer, multiple values allowed" - echo " example: 0;-1;0;0" - echo " maximum number of mismatches for stitching of the splice junctions (-1:" - echo " no limit)." - echo " (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC" - echo " motif, (4) AT/AC and GT/AT motif." - echo "" - echo " --alignSJDBoverhangMin" - echo " type: integer" - echo " example: 3" - echo " minimum overhang (i.e. block size) for annotated (sjdb) spliced" - echo " alignments" - echo "" - echo " --alignSplicedMateMapLmin" - echo " type: integer" - echo " example: 0" - echo " minimum mapped length for a read mate that is spliced" - echo "" - echo " --alignSplicedMateMapLminOverLmate" - echo " type: double" - echo " example: 0.66" - echo " alignSplicedMateMapLmin normalized to mate length" - echo "" - echo " --alignWindowsPerReadNmax" - echo " type: integer" - echo " example: 10000" - echo " max number of windows per read" - echo "" - echo " --alignTranscriptsPerWindowNmax" - echo " type: integer" - echo " example: 100" - echo " max number of transcripts per window" - echo "" - echo " --alignTranscriptsPerReadNmax" - echo " type: integer" - echo " example: 10000" - echo " max number of different alignments per read to consider" - echo "" - echo " --alignEndsType" - echo " type: string" - echo " example: Local" - echo " type of read ends alignment" - echo " - Local ... standard local alignment with soft-clipping" - echo " allowed" - echo " - EndToEnd ... force end-to-end read alignment, do not" - echo " soft-clip" - echo " - Extend5pOfRead1 ... fully extend only the 5p of the read1, all other" - echo " ends: local alignment" - echo " - Extend5pOfReads12 ... fully extend only the 5p of the both read1 and" - echo " read2, all other ends: local alignment" - echo "" - echo " --alignEndsProtrude" - echo " type: string" - echo " example: 0 ConcordantPair" - echo " allow protrusion of alignment ends, i.e. start (end) of the +strand mate" - echo " downstream of the start (end) of the -strand mate" - echo " 1st word: int: maximum number of protrusion bases allowed" - echo " 2nd word: string:" - echo " - ConcordantPair ... report alignments with non-zero" - echo " protrusion as concordant pairs" - echo " - DiscordantPair ... report alignments with non-zero" - echo " protrusion as discordant pairs" - echo "" - echo " --alignSoftClipAtReferenceEnds" - echo " type: string" - echo " example: Yes" - echo " allow the soft-clipping of the alignments past the end of the" - echo " chromosomes" - echo " - Yes ... allow" - echo " - No ... prohibit, useful for compatibility with Cufflinks" - echo "" - echo " --alignInsertionFlush" - echo " type: string" - echo " how to flush ambiguous insertion positions" - echo " - None ... insertions are not flushed" - echo " - Right ... insertions are flushed to the right" - echo "" - echo "Paired-End reads:" - echo " --peOverlapNbasesMin" - echo " type: integer" - echo " example: 0" - echo " minimum number of overlapping bases to trigger mates merging and" - echo " realignment. Specify >0 value to switch on the \"merginf of overlapping" - echo " mates\" algorithm." - echo "" - echo " --peOverlapMMp" - echo " type: double" - echo " example: 0.01" - echo " maximum proportion of mismatched bases in the overlap area" - echo "" - echo "Windows, Anchors, Binning:" - echo " --winAnchorMultimapNmax" - echo " type: integer" - echo " example: 50" - echo " max number of loci anchors are allowed to map to" - echo "" - echo " --winBinNbits" - echo " type: integer" - echo " example: 16" - echo " =log2(winBin), where winBin is the size of the bin for the" - echo " windows/clustering, each window will occupy an integer number of bins." - echo "" - echo " --winAnchorDistNbins" - echo " type: integer" - echo " example: 9" - echo " max number of bins between two anchors that allows aggregation of" - echo " anchors into one window" - echo "" - echo " --winFlankNbins" - echo " type: integer" - echo " example: 4" - echo " log2(winFlank), where win Flank is the size of the left and right" - echo " flanking regions for each window" - echo "" - echo " --winReadCoverageRelativeMin" - echo " type: double" - echo " example: 0.5" - echo " minimum relative coverage of the read sequence by the seeds in a window," - echo " for STARlong algorithm only." - echo "" - echo " --winReadCoverageBasesMin" - echo " type: integer" - echo " example: 0" - echo " minimum number of bases covered by the seeds in a window , for STARlong" - echo " algorithm only." - echo "" - echo "Chimeric Alignments:" - echo " --chimOutType" - echo " type: string, multiple values allowed" - echo " example: Junctions" - echo " type of chimeric output" - echo " - Junctions ... Chimeric.out.junction" - echo " - SeparateSAMold ... output old SAM into separate Chimeric.out.sam file" - echo " - WithinBAM ... output into main aligned BAM files (Aligned.*.bam)" - echo " - WithinBAM HardClip ... (default) hard-clipping in the CIGAR for" - echo " supplemental chimeric alignments (default if no 2nd word is present)" - echo " - WithinBAM SoftClip ... soft-clipping in the CIGAR for supplemental" - echo " chimeric alignments" - echo "" - echo " --chimSegmentMin" - echo " type: integer" - echo " example: 0" - echo " minimum length of chimeric segment length, if ==0, no chimeric output" - echo "" - echo " --chimScoreMin" - echo " type: integer" - echo " example: 0" - echo " minimum total (summed) score of the chimeric segments" - echo "" - echo " --chimScoreDropMax" - echo " type: integer" - echo " example: 20" - echo " max drop (difference) of chimeric score (the sum of scores of all" - echo " chimeric segments) from the read length" - echo "" - echo " --chimScoreSeparation" - echo " type: integer" - echo " example: 10" - echo " minimum difference (separation) between the best chimeric score and the" - echo " next one" - echo "" - echo " --chimScoreJunctionNonGTAG" - echo " type: integer" - echo " example: -1" - echo " penalty for a non-GT/AG chimeric junction" - echo "" - echo " --chimJunctionOverhangMin" - echo " type: integer" - echo " example: 20" - echo " minimum overhang for a chimeric junction" - echo "" - echo " --chimSegmentReadGapMax" - echo " type: integer" - echo " example: 0" - echo " maximum gap in the read sequence between chimeric segments" - echo "" - echo " --chimFilter" - echo " type: string, multiple values allowed" - echo " example: banGenomicN" - echo " different filters for chimeric alignments" - echo " - None ... no filtering" - echo " - banGenomicN ... Ns are not allowed in the genome sequence around the" - echo " chimeric junction" - echo "" - echo " --chimMainSegmentMultNmax" - echo " type: integer" - echo " example: 10" - echo " maximum number of multi-alignments for the main chimeric segment. =1" - echo " will prohibit multimapping main segments." - echo "" - echo " --chimMultimapNmax" - echo " type: integer" - echo " example: 0" - echo " maximum number of chimeric multi-alignments" - echo " - 0 ... use the old scheme for chimeric detection which only considered" - echo " unique alignments" - echo "" - echo " --chimMultimapScoreRange" - echo " type: integer" - echo " example: 1" - echo " the score range for multi-mapping chimeras below the best chimeric" - echo " score. Only works with --chimMultimapNmax > 1" - echo "" - echo " --chimNonchimScoreDropMin" - echo " type: integer" - echo " example: 20" - echo " to trigger chimeric detection, the drop in the best non-chimeric" - echo " alignment score with respect to the read length has to be greater than" - echo " this value" - echo "" - echo " --chimOutJunctionFormat" - echo " type: integer" - echo " example: 0" - echo " formatting type for the Chimeric.out.junction file" - echo " - 0 ... no comment lines/headers" - echo " - 1 ... comment lines at the end of the file: command line and Nreads:" - echo " total, unique/multi-mapping" - echo "" - echo "Quantification of Annotations:" - echo " --quantMode" - echo " type: string, multiple values allowed" - echo " types of quantification requested" - echo " - - ... none" - echo " - TranscriptomeSAM ... output SAM/BAM alignments to transcriptome into a" - echo " separate file" - echo " - GeneCounts ... count reads per gene" - echo "" - echo " --quantTranscriptomeBAMcompression" - echo " type: integer" - echo " example: 1" - echo " -2 to 10 transcriptome BAM compression level" - echo " - -2 ... no BAM output" - echo " - -1 ... default compression (6?)" - echo " - 0 ... no compression" - echo " - 10 ... maximum compression" - echo "" - echo " --quantTranscriptomeBan" - echo " type: string" - echo " example: IndelSoftclipSingleend" - echo " prohibit various alignment type" - echo " - IndelSoftclipSingleend ... prohibit indels, soft clipping and" - echo " single-end alignments - compatible with RSEM" - echo " - Singleend ... prohibit single-end alignments" - echo "" - echo "2-pass Mapping:" - echo " --twopassMode" - echo " type: string" - echo " 2-pass mapping mode." - echo " - None ... 1-pass mapping" - echo " - Basic ... basic 2-pass mapping, with all 1st pass junctions" - echo " inserted into the genome indices on the fly" - echo "" - echo " --twopass1readsN" - echo " type: integer" - echo " example: -1" - echo " number of reads to process for the 1st step. Use very large number (or" - echo " default -1) to map all reads in the first step." - echo "" - echo "WASP parameters:" - echo " --waspOutputMode" - echo " type: string" - echo " WASP allele-specific output type. This is re-implementation of the" - echo " original WASP mappability filtering by Bryce van de Geijn, Graham" - echo " McVicker, Yoav Gilad & Jonathan K Pritchard. Please cite the original" - echo " WASP paper: Nature Methods 12, 1061-1063 (2015)," - echo " https://www.nature.com/articles/nmeth.3582 ." - echo " - SAMtag ... add WASP tags to the alignments that pass WASP" - echo " filtering" - echo "" - echo "STARsolo (single cell RNA-seq) parameters:" - echo " --soloType" - echo " type: string, multiple values allowed" - echo " type of single-cell RNA-seq" - echo " - CB_UMI_Simple ... (a.k.a. Droplet) one UMI and one Cell Barcode of" - echo " fixed length in read2, e.g. Drop-seq and 10X Chromium." - echo " - CB_UMI_Complex ... multiple Cell Barcodes of varying length, one UMI" - echo " of fixed length and one adapter sequence of fixed length are allowed in" - echo " read2 only (e.g. inDrop, ddSeq)." - echo " - CB_samTagOut ... output Cell Barcode as CR and/or CB SAm tag. No" - echo " UMI counting. --readFilesIn cDNA_read1 [cDNA_read2 if paired-end]" - echo " CellBarcode_read . Requires --outSAMtype BAM Unsorted [and/or" - echo " SortedByCoordinate]" - echo " - SmartSeq ... Smart-seq: each cell in a separate FASTQ (paired-" - echo " or single-end), barcodes are corresponding read-groups, no UMI" - echo " sequences, alignments deduplicated according to alignment start and end" - echo " (after extending soft-clipped bases)" - echo "" - echo " --soloCBwhitelist" - echo " type: string, multiple values allowed" - echo " file(s) with whitelist(s) of cell barcodes. Only --soloType" - echo " CB_UMI_Complex allows more than one whitelist file." - echo " - None ... no whitelist: all cell barcodes are allowed" - echo "" - echo " --soloCBstart" - echo " type: integer" - echo " example: 1" - echo " cell barcode start base" - echo "" - echo " --soloCBlen" - echo " type: integer" - echo " example: 16" - echo " cell barcode length" - echo "" - echo " --soloUMIstart" - echo " type: integer" - echo " example: 17" - echo " UMI start base" - echo "" - echo " --soloUMIlen" - echo " type: integer" - echo " example: 10" - echo " UMI length" - echo "" - echo " --soloBarcodeReadLength" - echo " type: integer" - echo " example: 1" - echo " length of the barcode read" - echo " - 1 ... equal to sum of soloCBlen+soloUMIlen" - echo " - 0 ... not defined, do not check" - echo "" - echo " --soloBarcodeMate" - echo " type: integer" - echo " example: 0" - echo " identifies which read mate contains the barcode (CB+UMI) sequence" - echo " - 0 ... barcode sequence is on separate read, which should always be" - echo " the last file in the --readFilesIn listed" - echo " - 1 ... barcode sequence is a part of mate 1" - echo " - 2 ... barcode sequence is a part of mate 2" - echo "" - echo " --soloCBposition" - echo " type: string, multiple values allowed" - echo " position of Cell Barcode(s) on the barcode read." - echo " Presently only works with --soloType CB_UMI_Complex, and barcodes are" - echo " assumed to be on Read2." - echo " Format for each barcode: startAnchor_startPosition_endAnchor_endPosition" - echo " start(end)Anchor defines the Anchor Base for the CB: 0: read start; 1:" - echo " read end; 2: adapter start; 3: adapter end" - echo " start(end)Position is the 0-based position with of the CB start(end)" - echo " with respect to the Anchor Base" - echo " String for different barcodes are separated by space." - echo " Example: inDrop (Zilionis et al, Nat. Protocols, 2017):" - echo " --soloCBposition 0_0_2_-1 3_1_3_8" - echo "" - echo " --soloUMIposition" - echo " type: string" - echo " position of the UMI on the barcode read, same as soloCBposition" - echo " Example: inDrop (Zilionis et al, Nat. Protocols, 2017):" - echo " --soloCBposition 3_9_3_14" - echo "" - echo " --soloAdapterSequence" - echo " type: string" - echo " adapter sequence to anchor barcodes. Only one adapter sequence is" - echo " allowed." - echo "" - echo " --soloAdapterMismatchesNmax" - echo " type: integer" - echo " example: 1" - echo " maximum number of mismatches allowed in adapter sequence." - echo "" - echo " --soloCBmatchWLtype" - echo " type: string" - echo " example: 1MM_multi" - echo " matching the Cell Barcodes to the WhiteList" - echo " - Exact ... only exact matches allowed" - echo " - 1MM ... only one match in whitelist with 1" - echo " mismatched base allowed. Allowed CBs have to have at least one read with" - echo " exact match." - echo " - 1MM_multi ... multiple matches in whitelist with" - echo " 1 mismatched base allowed, posterior probability calculation is used" - echo " choose one of the matches." - echo " Allowed CBs have to have at least one read with exact match. This option" - echo " matches best with CellRanger 2.2.0" - echo " - 1MM_multi_pseudocounts ... same as 1MM_Multi, but" - echo " pseudocounts of 1 are added to all whitelist barcodes." - echo " - 1MM_multi_Nbase_pseudocounts ... same as 1MM_multi_pseudocounts," - echo " multimatching to WL is allowed for CBs with N-bases. This option matches" - echo " best with CellRanger >= 3.0.0" - echo " - EditDist_2 ... allow up to edit distance of 3 fpr" - echo " each of the barcodes. May include one deletion + one insertion. Only" - echo " works with --soloType CB_UMI_Complex. Matches to multiple passlist" - echo " barcdoes are not allowed. Similar to ParseBio Split-seq pipeline." - echo "" - echo " --soloInputSAMattrBarcodeSeq" - echo " type: string, multiple values allowed" - echo " when inputting reads from a SAM file (--readsFileType SAM SE/PE), these" - echo " SAM attributes mark the barcode sequence (in proper order)." - echo " For instance, for 10X CellRanger or STARsolo BAMs, use" - echo " --soloInputSAMattrBarcodeSeq CR UR ." - echo " This parameter is required when running STARsolo with input from SAM." - echo "" - echo " --soloInputSAMattrBarcodeQual" - echo " type: string, multiple values allowed" - echo " when inputting reads from a SAM file (--readsFileType SAM SE/PE), these" - echo " SAM attributes mark the barcode qualities (in proper order)." - echo " For instance, for 10X CellRanger or STARsolo BAMs, use" - echo " --soloInputSAMattrBarcodeQual CY UY ." - echo " If this parameter is '-' (default), the quality 'H' will be assigned to" - echo " all bases." - echo "" - echo " --soloStrand" - echo " type: string" - echo " example: Forward" - echo " strandedness of the solo libraries:" - echo " - Unstranded ... no strand information" - echo " - Forward ... read strand same as the original RNA molecule" - echo " - Reverse ... read strand opposite to the original RNA molecule" - echo "" - echo " --soloFeatures" - echo " type: string, multiple values allowed" - echo " example: Gene" - echo " genomic features for which the UMI counts per Cell Barcode are collected" - echo " - Gene ... genes: reads match the gene transcript" - echo " - SJ ... splice junctions: reported in SJ.out.tab" - echo " - GeneFull ... full gene (pre-mRNA): count all reads overlapping" - echo " genes' exons and introns" - echo " - GeneFull_ExonOverIntron ... full gene (pre-mRNA): count all reads" - echo " overlapping genes' exons and introns: prioritize 100% overlap with exons" - echo " - GeneFull_Ex50pAS ... full gene (pre-RNA): count all reads" - echo " overlapping genes' exons and introns: prioritize >50% overlap with" - echo " exons. Do not count reads with 100% exonic overlap in the antisense" - echo " direction." - echo "" - echo " --soloMultiMappers" - echo " type: string, multiple values allowed" - echo " example: Unique" - echo " counting method for reads mapping to multiple genes" - echo " - Unique ... count only reads that map to unique genes" - echo " - Uniform ... uniformly distribute multi-genic UMIs to all genes" - echo " - Rescue ... distribute UMIs proportionally to unique+uniform counts" - echo " (~ first iteration of EM)" - echo " - PropUnique ... distribute UMIs proportionally to unique mappers, if" - echo " present, and uniformly if not." - echo " - EM ... multi-gene UMIs are distributed using Expectation" - echo " Maximization algorithm" - echo "" - echo " --soloUMIdedup" - echo " type: string, multiple values allowed" - echo " example: 1MM_All" - echo " type of UMI deduplication (collapsing) algorithm" - echo " - 1MM_All ... all UMIs with 1 mismatch distance to" - echo " each other are collapsed (i.e. counted once)." - echo " - 1MM_Directional_UMItools ... follows the \"directional\" method from" - echo " the UMI-tools by Smith, Heger and Sudbery (Genome Research 2017)." - echo " - 1MM_Directional ... same as 1MM_Directional_UMItools, but" - echo " with more stringent criteria for duplicate UMIs" - echo " - Exact ... only exactly matching UMIs are" - echo " collapsed." - echo " - NoDedup ... no deduplication of UMIs, count all" - echo " reads." - echo " - 1MM_CR ... CellRanger2-4 algorithm for 1MM UMI" - echo " collapsing." - echo "" - echo " --soloUMIfiltering" - echo " type: string, multiple values allowed" - echo " type of UMI filtering (for reads uniquely mapping to genes)" - echo " - - ... basic filtering: remove UMIs with N and" - echo " homopolymers (similar to CellRanger 2.2.0)." - echo " - MultiGeneUMI ... basic + remove lower-count UMIs that map to" - echo " more than one gene." - echo " - MultiGeneUMI_All ... basic + remove all UMIs that map to more than" - echo " one gene." - echo " - MultiGeneUMI_CR ... basic + remove lower-count UMIs that map to" - echo " more than one gene, matching CellRanger > 3.0.0 ." - echo " Only works with --soloUMIdedup 1MM_CR" - echo "" - echo " --soloOutFileNames" - echo " type: string, multiple values allowed" - echo " example: Solo.out/;features.tsv;barcodes.tsv;matrix.mtx" - echo " file names for STARsolo output:" - echo " file_name_prefix gene_names barcode_sequences" - echo " cell_feature_count_matrix" - echo "" - echo " --soloCellFilter" - echo " type: string, multiple values allowed" - echo " example: CellRanger2.2;3000;0.99;10" - echo " cell filtering type and parameters" - echo " - None ... do not output filtered cells" - echo " - TopCells ... only report top cells by UMI count, followed by" - echo " the exact number of cells" - echo " - CellRanger2.2 ... simple filtering of CellRanger 2.2." - echo " Can be followed by numbers: number of expected cells, robust maximum" - echo " percentile for UMI count, maximum to minimum ratio for UMI count" - echo " The harcoded values are from CellRanger: nExpectedCells=3000;" - echo " maxPercentile=0.99; maxMinRatio=10" - echo " - EmptyDrops_CR ... EmptyDrops filtering in CellRanger flavor. Please" - echo " cite the original EmptyDrops paper: A.T.L Lun et al, Genome Biology, 20," - echo " 63 (2019):" - echo " " - echo "https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1662-y" - echo " Can be followed by 10 numeric parameters: nExpectedCells" - echo " maxPercentile maxMinRatio indMin indMax umiMin" - echo " umiMinFracMedian candMaxN FDR simN" - echo " The harcoded values are from CellRanger: 3000" - echo " 0.99 10 45000 90000 500 0.01" - echo " 20000 0.01 10000" - echo "" - echo " --soloOutFormatFeaturesGeneField3" - echo " type: string, multiple values allowed" - echo " example: Gene Expression" - echo " field 3 in the Gene features.tsv file. If \"-\", then no 3rd field is" - echo " output." - echo "" - echo " --soloCellReadStats" - echo " type: string" - echo " Output reads statistics for each CB" - echo " - Standard ... standard output" -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM python:3.10-slim - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ - rm -rf /var/lib/apt/lists/* - -ENV STAR_VERSION 2.7.10b -ENV PACKAGES gcc g++ make wget zlib1g-dev unzip -RUN apt-get update && \ - apt-get install -y --no-install-recommends ${PACKAGES} && \ - cd /tmp && \ - wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip && \ - unzip ${STAR_VERSION}.zip && \ - cd STAR-${STAR_VERSION}/source && \ - make STARstatic CXXFLAGS_SIMD=-std=c++11 && \ - cp STAR /usr/local/bin && \ - cd / && \ - rm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip && \ - apt-get --purge autoremove -y ${PACKAGES} && \ - apt-get clean - -LABEL org.opencontainers.image.authors="Angela Oliveira Pisco, Robrecht Cannoodt" -LABEL org.opencontainers.image.description="Companion container for running component mapping star_align" -LABEL org.opencontainers.image.created="2024-01-25T10:13:56Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-star_align-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "star_align 0.12.3" - exit - ;; - --input) - if [ -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT="$2" - else - VIASH_PAR_INPUT="$VIASH_PAR_INPUT;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - if [ -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - else - VIASH_PAR_INPUT="$VIASH_PAR_INPUT;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --readFilesIn) - if [ -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT="$2" - else - VIASH_PAR_INPUT="$VIASH_PAR_INPUT;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --readFilesIn. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reference) - [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reference=*) - [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference=*\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --genomeDir) - [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--genomeDir\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --genomeDir. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outFileNamePrefix) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--outFileNamePrefix\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFileNamePrefix. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --runRNGseed) - [ -n "$VIASH_PAR_RUNRNGSEED" ] && ViashError Bad arguments for option \'--runRNGseed\': \'$VIASH_PAR_RUNRNGSEED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_RUNRNGSEED="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --runRNGseed. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --runRNGseed=*) - [ -n "$VIASH_PAR_RUNRNGSEED" ] && ViashError Bad arguments for option \'--runRNGseed=*\': \'$VIASH_PAR_RUNRNGSEED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_RUNRNGSEED=$(ViashRemoveFlags "$1") - shift 1 - ;; - --genomeLoad) - [ -n "$VIASH_PAR_GENOMELOAD" ] && ViashError Bad arguments for option \'--genomeLoad\': \'$VIASH_PAR_GENOMELOAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_GENOMELOAD="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --genomeLoad. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --genomeLoad=*) - [ -n "$VIASH_PAR_GENOMELOAD" ] && ViashError Bad arguments for option \'--genomeLoad=*\': \'$VIASH_PAR_GENOMELOAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_GENOMELOAD=$(ViashRemoveFlags "$1") - shift 1 - ;; - --genomeFastaFiles) - if [ -z "$VIASH_PAR_GENOMEFASTAFILES" ]; then - VIASH_PAR_GENOMEFASTAFILES="$2" - else - VIASH_PAR_GENOMEFASTAFILES="$VIASH_PAR_GENOMEFASTAFILES;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --genomeFastaFiles. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --genomeFastaFiles=*) - if [ -z "$VIASH_PAR_GENOMEFASTAFILES" ]; then - VIASH_PAR_GENOMEFASTAFILES=$(ViashRemoveFlags "$1") - else - VIASH_PAR_GENOMEFASTAFILES="$VIASH_PAR_GENOMEFASTAFILES;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --genomeFileSizes) - if [ -z "$VIASH_PAR_GENOMEFILESIZES" ]; then - VIASH_PAR_GENOMEFILESIZES="$2" - else - VIASH_PAR_GENOMEFILESIZES="$VIASH_PAR_GENOMEFILESIZES;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --genomeFileSizes. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --genomeFileSizes=*) - if [ -z "$VIASH_PAR_GENOMEFILESIZES" ]; then - VIASH_PAR_GENOMEFILESIZES=$(ViashRemoveFlags "$1") - else - VIASH_PAR_GENOMEFILESIZES="$VIASH_PAR_GENOMEFILESIZES;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --genomeTransformOutput) - if [ -z "$VIASH_PAR_GENOMETRANSFORMOUTPUT" ]; then - VIASH_PAR_GENOMETRANSFORMOUTPUT="$2" - else - VIASH_PAR_GENOMETRANSFORMOUTPUT="$VIASH_PAR_GENOMETRANSFORMOUTPUT;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --genomeTransformOutput. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --genomeTransformOutput=*) - if [ -z "$VIASH_PAR_GENOMETRANSFORMOUTPUT" ]; then - VIASH_PAR_GENOMETRANSFORMOUTPUT=$(ViashRemoveFlags "$1") - else - VIASH_PAR_GENOMETRANSFORMOUTPUT="$VIASH_PAR_GENOMETRANSFORMOUTPUT;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --genomeChrSetMitochondrial) - if [ -z "$VIASH_PAR_GENOMECHRSETMITOCHONDRIAL" ]; then - VIASH_PAR_GENOMECHRSETMITOCHONDRIAL="$2" - else - VIASH_PAR_GENOMECHRSETMITOCHONDRIAL="$VIASH_PAR_GENOMECHRSETMITOCHONDRIAL;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --genomeChrSetMitochondrial. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --genomeChrSetMitochondrial=*) - if [ -z "$VIASH_PAR_GENOMECHRSETMITOCHONDRIAL" ]; then - VIASH_PAR_GENOMECHRSETMITOCHONDRIAL=$(ViashRemoveFlags "$1") - else - VIASH_PAR_GENOMECHRSETMITOCHONDRIAL="$VIASH_PAR_GENOMECHRSETMITOCHONDRIAL;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --sjdbFileChrStartEnd) - if [ -z "$VIASH_PAR_SJDBFILECHRSTARTEND" ]; then - VIASH_PAR_SJDBFILECHRSTARTEND="$2" - else - VIASH_PAR_SJDBFILECHRSTARTEND="$VIASH_PAR_SJDBFILECHRSTARTEND;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbFileChrStartEnd. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --sjdbFileChrStartEnd=*) - if [ -z "$VIASH_PAR_SJDBFILECHRSTARTEND" ]; then - VIASH_PAR_SJDBFILECHRSTARTEND=$(ViashRemoveFlags "$1") - else - VIASH_PAR_SJDBFILECHRSTARTEND="$VIASH_PAR_SJDBFILECHRSTARTEND;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --sjdbGTFfile) - [ -n "$VIASH_PAR_SJDBGTFFILE" ] && ViashError Bad arguments for option \'--sjdbGTFfile\': \'$VIASH_PAR_SJDBGTFFILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SJDBGTFFILE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbGTFfile. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --sjdbGTFfile=*) - [ -n "$VIASH_PAR_SJDBGTFFILE" ] && ViashError Bad arguments for option \'--sjdbGTFfile=*\': \'$VIASH_PAR_SJDBGTFFILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SJDBGTFFILE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --sjdbGTFchrPrefix) - [ -n "$VIASH_PAR_SJDBGTFCHRPREFIX" ] && ViashError Bad arguments for option \'--sjdbGTFchrPrefix\': \'$VIASH_PAR_SJDBGTFCHRPREFIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SJDBGTFCHRPREFIX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbGTFchrPrefix. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --sjdbGTFchrPrefix=*) - [ -n "$VIASH_PAR_SJDBGTFCHRPREFIX" ] && ViashError Bad arguments for option \'--sjdbGTFchrPrefix=*\': \'$VIASH_PAR_SJDBGTFCHRPREFIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SJDBGTFCHRPREFIX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --sjdbGTFfeatureExon) - [ -n "$VIASH_PAR_SJDBGTFFEATUREEXON" ] && ViashError Bad arguments for option \'--sjdbGTFfeatureExon\': \'$VIASH_PAR_SJDBGTFFEATUREEXON\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SJDBGTFFEATUREEXON="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbGTFfeatureExon. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --sjdbGTFfeatureExon=*) - [ -n "$VIASH_PAR_SJDBGTFFEATUREEXON" ] && ViashError Bad arguments for option \'--sjdbGTFfeatureExon=*\': \'$VIASH_PAR_SJDBGTFFEATUREEXON\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SJDBGTFFEATUREEXON=$(ViashRemoveFlags "$1") - shift 1 - ;; - --sjdbGTFtagExonParentTranscript) - [ -n "$VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT" ] && ViashError Bad arguments for option \'--sjdbGTFtagExonParentTranscript\': \'$VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbGTFtagExonParentTranscript. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --sjdbGTFtagExonParentTranscript=*) - [ -n "$VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT" ] && ViashError Bad arguments for option \'--sjdbGTFtagExonParentTranscript=*\': \'$VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --sjdbGTFtagExonParentGene) - [ -n "$VIASH_PAR_SJDBGTFTAGEXONPARENTGENE" ] && ViashError Bad arguments for option \'--sjdbGTFtagExonParentGene\': \'$VIASH_PAR_SJDBGTFTAGEXONPARENTGENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SJDBGTFTAGEXONPARENTGENE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbGTFtagExonParentGene. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --sjdbGTFtagExonParentGene=*) - [ -n "$VIASH_PAR_SJDBGTFTAGEXONPARENTGENE" ] && ViashError Bad arguments for option \'--sjdbGTFtagExonParentGene=*\': \'$VIASH_PAR_SJDBGTFTAGEXONPARENTGENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SJDBGTFTAGEXONPARENTGENE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --sjdbGTFtagExonParentGeneName) - if [ -z "$VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME" ]; then - VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME="$2" - else - VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME="$VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbGTFtagExonParentGeneName. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --sjdbGTFtagExonParentGeneName=*) - if [ -z "$VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME" ]; then - VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME=$(ViashRemoveFlags "$1") - else - VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME="$VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --sjdbGTFtagExonParentGeneType) - if [ -z "$VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE" ]; then - VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE="$2" - else - VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE="$VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbGTFtagExonParentGeneType. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --sjdbGTFtagExonParentGeneType=*) - if [ -z "$VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE" ]; then - VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE=$(ViashRemoveFlags "$1") - else - VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE="$VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --sjdbOverhang) - [ -n "$VIASH_PAR_SJDBOVERHANG" ] && ViashError Bad arguments for option \'--sjdbOverhang\': \'$VIASH_PAR_SJDBOVERHANG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SJDBOVERHANG="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbOverhang. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --sjdbOverhang=*) - [ -n "$VIASH_PAR_SJDBOVERHANG" ] && ViashError Bad arguments for option \'--sjdbOverhang=*\': \'$VIASH_PAR_SJDBOVERHANG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SJDBOVERHANG=$(ViashRemoveFlags "$1") - shift 1 - ;; - --sjdbScore) - [ -n "$VIASH_PAR_SJDBSCORE" ] && ViashError Bad arguments for option \'--sjdbScore\': \'$VIASH_PAR_SJDBSCORE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SJDBSCORE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbScore. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --sjdbScore=*) - [ -n "$VIASH_PAR_SJDBSCORE" ] && ViashError Bad arguments for option \'--sjdbScore=*\': \'$VIASH_PAR_SJDBSCORE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SJDBSCORE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --sjdbInsertSave) - [ -n "$VIASH_PAR_SJDBINSERTSAVE" ] && ViashError Bad arguments for option \'--sjdbInsertSave\': \'$VIASH_PAR_SJDBINSERTSAVE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SJDBINSERTSAVE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbInsertSave. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --sjdbInsertSave=*) - [ -n "$VIASH_PAR_SJDBINSERTSAVE" ] && ViashError Bad arguments for option \'--sjdbInsertSave=*\': \'$VIASH_PAR_SJDBINSERTSAVE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SJDBINSERTSAVE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --varVCFfile) - [ -n "$VIASH_PAR_VARVCFFILE" ] && ViashError Bad arguments for option \'--varVCFfile\': \'$VIASH_PAR_VARVCFFILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_VARVCFFILE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --varVCFfile. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --varVCFfile=*) - [ -n "$VIASH_PAR_VARVCFFILE" ] && ViashError Bad arguments for option \'--varVCFfile=*\': \'$VIASH_PAR_VARVCFFILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_VARVCFFILE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --readFilesType) - [ -n "$VIASH_PAR_READFILESTYPE" ] && ViashError Bad arguments for option \'--readFilesType\': \'$VIASH_PAR_READFILESTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_READFILESTYPE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --readFilesType. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --readFilesType=*) - [ -n "$VIASH_PAR_READFILESTYPE" ] && ViashError Bad arguments for option \'--readFilesType=*\': \'$VIASH_PAR_READFILESTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_READFILESTYPE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --readFilesSAMattrKeep) - if [ -z "$VIASH_PAR_READFILESSAMATTRKEEP" ]; then - VIASH_PAR_READFILESSAMATTRKEEP="$2" - else - VIASH_PAR_READFILESSAMATTRKEEP="$VIASH_PAR_READFILESSAMATTRKEEP;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --readFilesSAMattrKeep. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --readFilesSAMattrKeep=*) - if [ -z "$VIASH_PAR_READFILESSAMATTRKEEP" ]; then - VIASH_PAR_READFILESSAMATTRKEEP=$(ViashRemoveFlags "$1") - else - VIASH_PAR_READFILESSAMATTRKEEP="$VIASH_PAR_READFILESSAMATTRKEEP;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --readFilesManifest) - [ -n "$VIASH_PAR_READFILESMANIFEST" ] && ViashError Bad arguments for option \'--readFilesManifest\': \'$VIASH_PAR_READFILESMANIFEST\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_READFILESMANIFEST="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --readFilesManifest. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --readFilesManifest=*) - [ -n "$VIASH_PAR_READFILESMANIFEST" ] && ViashError Bad arguments for option \'--readFilesManifest=*\': \'$VIASH_PAR_READFILESMANIFEST\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_READFILESMANIFEST=$(ViashRemoveFlags "$1") - shift 1 - ;; - --readFilesPrefix) - [ -n "$VIASH_PAR_READFILESPREFIX" ] && ViashError Bad arguments for option \'--readFilesPrefix\': \'$VIASH_PAR_READFILESPREFIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_READFILESPREFIX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --readFilesPrefix. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --readFilesPrefix=*) - [ -n "$VIASH_PAR_READFILESPREFIX" ] && ViashError Bad arguments for option \'--readFilesPrefix=*\': \'$VIASH_PAR_READFILESPREFIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_READFILESPREFIX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --readFilesCommand) - if [ -z "$VIASH_PAR_READFILESCOMMAND" ]; then - VIASH_PAR_READFILESCOMMAND="$2" - else - VIASH_PAR_READFILESCOMMAND="$VIASH_PAR_READFILESCOMMAND;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --readFilesCommand. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --readFilesCommand=*) - if [ -z "$VIASH_PAR_READFILESCOMMAND" ]; then - VIASH_PAR_READFILESCOMMAND=$(ViashRemoveFlags "$1") - else - VIASH_PAR_READFILESCOMMAND="$VIASH_PAR_READFILESCOMMAND;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --readMapNumber) - [ -n "$VIASH_PAR_READMAPNUMBER" ] && ViashError Bad arguments for option \'--readMapNumber\': \'$VIASH_PAR_READMAPNUMBER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_READMAPNUMBER="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --readMapNumber. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --readMapNumber=*) - [ -n "$VIASH_PAR_READMAPNUMBER" ] && ViashError Bad arguments for option \'--readMapNumber=*\': \'$VIASH_PAR_READMAPNUMBER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_READMAPNUMBER=$(ViashRemoveFlags "$1") - shift 1 - ;; - --readMatesLengthsIn) - [ -n "$VIASH_PAR_READMATESLENGTHSIN" ] && ViashError Bad arguments for option \'--readMatesLengthsIn\': \'$VIASH_PAR_READMATESLENGTHSIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_READMATESLENGTHSIN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --readMatesLengthsIn. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --readMatesLengthsIn=*) - [ -n "$VIASH_PAR_READMATESLENGTHSIN" ] && ViashError Bad arguments for option \'--readMatesLengthsIn=*\': \'$VIASH_PAR_READMATESLENGTHSIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_READMATESLENGTHSIN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --readNameSeparator) - if [ -z "$VIASH_PAR_READNAMESEPARATOR" ]; then - VIASH_PAR_READNAMESEPARATOR="$2" - else - VIASH_PAR_READNAMESEPARATOR="$VIASH_PAR_READNAMESEPARATOR;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --readNameSeparator. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --readNameSeparator=*) - if [ -z "$VIASH_PAR_READNAMESEPARATOR" ]; then - VIASH_PAR_READNAMESEPARATOR=$(ViashRemoveFlags "$1") - else - VIASH_PAR_READNAMESEPARATOR="$VIASH_PAR_READNAMESEPARATOR;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --readQualityScoreBase) - [ -n "$VIASH_PAR_READQUALITYSCOREBASE" ] && ViashError Bad arguments for option \'--readQualityScoreBase\': \'$VIASH_PAR_READQUALITYSCOREBASE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_READQUALITYSCOREBASE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --readQualityScoreBase. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --readQualityScoreBase=*) - [ -n "$VIASH_PAR_READQUALITYSCOREBASE" ] && ViashError Bad arguments for option \'--readQualityScoreBase=*\': \'$VIASH_PAR_READQUALITYSCOREBASE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_READQUALITYSCOREBASE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --clipAdapterType) - [ -n "$VIASH_PAR_CLIPADAPTERTYPE" ] && ViashError Bad arguments for option \'--clipAdapterType\': \'$VIASH_PAR_CLIPADAPTERTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CLIPADAPTERTYPE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --clipAdapterType. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --clipAdapterType=*) - [ -n "$VIASH_PAR_CLIPADAPTERTYPE" ] && ViashError Bad arguments for option \'--clipAdapterType=*\': \'$VIASH_PAR_CLIPADAPTERTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CLIPADAPTERTYPE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --clip3pNbases) - if [ -z "$VIASH_PAR_CLIP3PNBASES" ]; then - VIASH_PAR_CLIP3PNBASES="$2" - else - VIASH_PAR_CLIP3PNBASES="$VIASH_PAR_CLIP3PNBASES;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --clip3pNbases. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --clip3pNbases=*) - if [ -z "$VIASH_PAR_CLIP3PNBASES" ]; then - VIASH_PAR_CLIP3PNBASES=$(ViashRemoveFlags "$1") - else - VIASH_PAR_CLIP3PNBASES="$VIASH_PAR_CLIP3PNBASES;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --clip3pAdapterSeq) - if [ -z "$VIASH_PAR_CLIP3PADAPTERSEQ" ]; then - VIASH_PAR_CLIP3PADAPTERSEQ="$2" - else - VIASH_PAR_CLIP3PADAPTERSEQ="$VIASH_PAR_CLIP3PADAPTERSEQ;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --clip3pAdapterSeq. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --clip3pAdapterSeq=*) - if [ -z "$VIASH_PAR_CLIP3PADAPTERSEQ" ]; then - VIASH_PAR_CLIP3PADAPTERSEQ=$(ViashRemoveFlags "$1") - else - VIASH_PAR_CLIP3PADAPTERSEQ="$VIASH_PAR_CLIP3PADAPTERSEQ;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --clip3pAdapterMMp) - if [ -z "$VIASH_PAR_CLIP3PADAPTERMMP" ]; then - VIASH_PAR_CLIP3PADAPTERMMP="$2" - else - VIASH_PAR_CLIP3PADAPTERMMP="$VIASH_PAR_CLIP3PADAPTERMMP;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --clip3pAdapterMMp. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --clip3pAdapterMMp=*) - if [ -z "$VIASH_PAR_CLIP3PADAPTERMMP" ]; then - VIASH_PAR_CLIP3PADAPTERMMP=$(ViashRemoveFlags "$1") - else - VIASH_PAR_CLIP3PADAPTERMMP="$VIASH_PAR_CLIP3PADAPTERMMP;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --clip3pAfterAdapterNbases) - if [ -z "$VIASH_PAR_CLIP3PAFTERADAPTERNBASES" ]; then - VIASH_PAR_CLIP3PAFTERADAPTERNBASES="$2" - else - VIASH_PAR_CLIP3PAFTERADAPTERNBASES="$VIASH_PAR_CLIP3PAFTERADAPTERNBASES;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --clip3pAfterAdapterNbases. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --clip3pAfterAdapterNbases=*) - if [ -z "$VIASH_PAR_CLIP3PAFTERADAPTERNBASES" ]; then - VIASH_PAR_CLIP3PAFTERADAPTERNBASES=$(ViashRemoveFlags "$1") - else - VIASH_PAR_CLIP3PAFTERADAPTERNBASES="$VIASH_PAR_CLIP3PAFTERADAPTERNBASES;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --clip5pNbases) - if [ -z "$VIASH_PAR_CLIP5PNBASES" ]; then - VIASH_PAR_CLIP5PNBASES="$2" - else - VIASH_PAR_CLIP5PNBASES="$VIASH_PAR_CLIP5PNBASES;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --clip5pNbases. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --clip5pNbases=*) - if [ -z "$VIASH_PAR_CLIP5PNBASES" ]; then - VIASH_PAR_CLIP5PNBASES=$(ViashRemoveFlags "$1") - else - VIASH_PAR_CLIP5PNBASES="$VIASH_PAR_CLIP5PNBASES;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --limitGenomeGenerateRAM) - [ -n "$VIASH_PAR_LIMITGENOMEGENERATERAM" ] && ViashError Bad arguments for option \'--limitGenomeGenerateRAM\': \'$VIASH_PAR_LIMITGENOMEGENERATERAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LIMITGENOMEGENERATERAM="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --limitGenomeGenerateRAM. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --limitGenomeGenerateRAM=*) - [ -n "$VIASH_PAR_LIMITGENOMEGENERATERAM" ] && ViashError Bad arguments for option \'--limitGenomeGenerateRAM=*\': \'$VIASH_PAR_LIMITGENOMEGENERATERAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LIMITGENOMEGENERATERAM=$(ViashRemoveFlags "$1") - shift 1 - ;; - --limitIObufferSize) - if [ -z "$VIASH_PAR_LIMITIOBUFFERSIZE" ]; then - VIASH_PAR_LIMITIOBUFFERSIZE="$2" - else - VIASH_PAR_LIMITIOBUFFERSIZE="$VIASH_PAR_LIMITIOBUFFERSIZE;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --limitIObufferSize. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --limitIObufferSize=*) - if [ -z "$VIASH_PAR_LIMITIOBUFFERSIZE" ]; then - VIASH_PAR_LIMITIOBUFFERSIZE=$(ViashRemoveFlags "$1") - else - VIASH_PAR_LIMITIOBUFFERSIZE="$VIASH_PAR_LIMITIOBUFFERSIZE;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --limitOutSAMoneReadBytes) - [ -n "$VIASH_PAR_LIMITOUTSAMONEREADBYTES" ] && ViashError Bad arguments for option \'--limitOutSAMoneReadBytes\': \'$VIASH_PAR_LIMITOUTSAMONEREADBYTES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LIMITOUTSAMONEREADBYTES="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --limitOutSAMoneReadBytes. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --limitOutSAMoneReadBytes=*) - [ -n "$VIASH_PAR_LIMITOUTSAMONEREADBYTES" ] && ViashError Bad arguments for option \'--limitOutSAMoneReadBytes=*\': \'$VIASH_PAR_LIMITOUTSAMONEREADBYTES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LIMITOUTSAMONEREADBYTES=$(ViashRemoveFlags "$1") - shift 1 - ;; - --limitOutSJoneRead) - [ -n "$VIASH_PAR_LIMITOUTSJONEREAD" ] && ViashError Bad arguments for option \'--limitOutSJoneRead\': \'$VIASH_PAR_LIMITOUTSJONEREAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LIMITOUTSJONEREAD="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --limitOutSJoneRead. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --limitOutSJoneRead=*) - [ -n "$VIASH_PAR_LIMITOUTSJONEREAD" ] && ViashError Bad arguments for option \'--limitOutSJoneRead=*\': \'$VIASH_PAR_LIMITOUTSJONEREAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LIMITOUTSJONEREAD=$(ViashRemoveFlags "$1") - shift 1 - ;; - --limitOutSJcollapsed) - [ -n "$VIASH_PAR_LIMITOUTSJCOLLAPSED" ] && ViashError Bad arguments for option \'--limitOutSJcollapsed\': \'$VIASH_PAR_LIMITOUTSJCOLLAPSED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LIMITOUTSJCOLLAPSED="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --limitOutSJcollapsed. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --limitOutSJcollapsed=*) - [ -n "$VIASH_PAR_LIMITOUTSJCOLLAPSED" ] && ViashError Bad arguments for option \'--limitOutSJcollapsed=*\': \'$VIASH_PAR_LIMITOUTSJCOLLAPSED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LIMITOUTSJCOLLAPSED=$(ViashRemoveFlags "$1") - shift 1 - ;; - --limitBAMsortRAM) - [ -n "$VIASH_PAR_LIMITBAMSORTRAM" ] && ViashError Bad arguments for option \'--limitBAMsortRAM\': \'$VIASH_PAR_LIMITBAMSORTRAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LIMITBAMSORTRAM="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --limitBAMsortRAM. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --limitBAMsortRAM=*) - [ -n "$VIASH_PAR_LIMITBAMSORTRAM" ] && ViashError Bad arguments for option \'--limitBAMsortRAM=*\': \'$VIASH_PAR_LIMITBAMSORTRAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LIMITBAMSORTRAM=$(ViashRemoveFlags "$1") - shift 1 - ;; - --limitSjdbInsertNsj) - [ -n "$VIASH_PAR_LIMITSJDBINSERTNSJ" ] && ViashError Bad arguments for option \'--limitSjdbInsertNsj\': \'$VIASH_PAR_LIMITSJDBINSERTNSJ\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LIMITSJDBINSERTNSJ="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --limitSjdbInsertNsj. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --limitSjdbInsertNsj=*) - [ -n "$VIASH_PAR_LIMITSJDBINSERTNSJ" ] && ViashError Bad arguments for option \'--limitSjdbInsertNsj=*\': \'$VIASH_PAR_LIMITSJDBINSERTNSJ\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LIMITSJDBINSERTNSJ=$(ViashRemoveFlags "$1") - shift 1 - ;; - --limitNreadsSoft) - [ -n "$VIASH_PAR_LIMITNREADSSOFT" ] && ViashError Bad arguments for option \'--limitNreadsSoft\': \'$VIASH_PAR_LIMITNREADSSOFT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LIMITNREADSSOFT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --limitNreadsSoft. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --limitNreadsSoft=*) - [ -n "$VIASH_PAR_LIMITNREADSSOFT" ] && ViashError Bad arguments for option \'--limitNreadsSoft=*\': \'$VIASH_PAR_LIMITNREADSSOFT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LIMITNREADSSOFT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outTmpKeep) - [ -n "$VIASH_PAR_OUTTMPKEEP" ] && ViashError Bad arguments for option \'--outTmpKeep\': \'$VIASH_PAR_OUTTMPKEEP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTTMPKEEP="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outTmpKeep. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outTmpKeep=*) - [ -n "$VIASH_PAR_OUTTMPKEEP" ] && ViashError Bad arguments for option \'--outTmpKeep=*\': \'$VIASH_PAR_OUTTMPKEEP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTTMPKEEP=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outStd) - [ -n "$VIASH_PAR_OUTSTD" ] && ViashError Bad arguments for option \'--outStd\': \'$VIASH_PAR_OUTSTD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSTD="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outStd. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outStd=*) - [ -n "$VIASH_PAR_OUTSTD" ] && ViashError Bad arguments for option \'--outStd=*\': \'$VIASH_PAR_OUTSTD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSTD=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outReadsUnmapped) - [ -n "$VIASH_PAR_OUTREADSUNMAPPED" ] && ViashError Bad arguments for option \'--outReadsUnmapped\': \'$VIASH_PAR_OUTREADSUNMAPPED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTREADSUNMAPPED="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outReadsUnmapped. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outReadsUnmapped=*) - [ -n "$VIASH_PAR_OUTREADSUNMAPPED" ] && ViashError Bad arguments for option \'--outReadsUnmapped=*\': \'$VIASH_PAR_OUTREADSUNMAPPED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTREADSUNMAPPED=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outQSconversionAdd) - [ -n "$VIASH_PAR_OUTQSCONVERSIONADD" ] && ViashError Bad arguments for option \'--outQSconversionAdd\': \'$VIASH_PAR_OUTQSCONVERSIONADD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTQSCONVERSIONADD="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outQSconversionAdd. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outQSconversionAdd=*) - [ -n "$VIASH_PAR_OUTQSCONVERSIONADD" ] && ViashError Bad arguments for option \'--outQSconversionAdd=*\': \'$VIASH_PAR_OUTQSCONVERSIONADD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTQSCONVERSIONADD=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outMultimapperOrder) - [ -n "$VIASH_PAR_OUTMULTIMAPPERORDER" ] && ViashError Bad arguments for option \'--outMultimapperOrder\': \'$VIASH_PAR_OUTMULTIMAPPERORDER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTMULTIMAPPERORDER="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outMultimapperOrder. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outMultimapperOrder=*) - [ -n "$VIASH_PAR_OUTMULTIMAPPERORDER" ] && ViashError Bad arguments for option \'--outMultimapperOrder=*\': \'$VIASH_PAR_OUTMULTIMAPPERORDER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTMULTIMAPPERORDER=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outSAMtype) - if [ -z "$VIASH_PAR_OUTSAMTYPE" ]; then - VIASH_PAR_OUTSAMTYPE="$2" - else - VIASH_PAR_OUTSAMTYPE="$VIASH_PAR_OUTSAMTYPE;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMtype. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMtype=*) - if [ -z "$VIASH_PAR_OUTSAMTYPE" ]; then - VIASH_PAR_OUTSAMTYPE=$(ViashRemoveFlags "$1") - else - VIASH_PAR_OUTSAMTYPE="$VIASH_PAR_OUTSAMTYPE;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --outSAMmode) - [ -n "$VIASH_PAR_OUTSAMMODE" ] && ViashError Bad arguments for option \'--outSAMmode\': \'$VIASH_PAR_OUTSAMMODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMMODE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMmode. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMmode=*) - [ -n "$VIASH_PAR_OUTSAMMODE" ] && ViashError Bad arguments for option \'--outSAMmode=*\': \'$VIASH_PAR_OUTSAMMODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMMODE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outSAMstrandField) - [ -n "$VIASH_PAR_OUTSAMSTRANDFIELD" ] && ViashError Bad arguments for option \'--outSAMstrandField\': \'$VIASH_PAR_OUTSAMSTRANDFIELD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMSTRANDFIELD="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMstrandField. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMstrandField=*) - [ -n "$VIASH_PAR_OUTSAMSTRANDFIELD" ] && ViashError Bad arguments for option \'--outSAMstrandField=*\': \'$VIASH_PAR_OUTSAMSTRANDFIELD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMSTRANDFIELD=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outSAMattributes) - if [ -z "$VIASH_PAR_OUTSAMATTRIBUTES" ]; then - VIASH_PAR_OUTSAMATTRIBUTES="$2" - else - VIASH_PAR_OUTSAMATTRIBUTES="$VIASH_PAR_OUTSAMATTRIBUTES;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMattributes. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMattributes=*) - if [ -z "$VIASH_PAR_OUTSAMATTRIBUTES" ]; then - VIASH_PAR_OUTSAMATTRIBUTES=$(ViashRemoveFlags "$1") - else - VIASH_PAR_OUTSAMATTRIBUTES="$VIASH_PAR_OUTSAMATTRIBUTES;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --outSAMattrIHstart) - [ -n "$VIASH_PAR_OUTSAMATTRIHSTART" ] && ViashError Bad arguments for option \'--outSAMattrIHstart\': \'$VIASH_PAR_OUTSAMATTRIHSTART\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMATTRIHSTART="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMattrIHstart. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMattrIHstart=*) - [ -n "$VIASH_PAR_OUTSAMATTRIHSTART" ] && ViashError Bad arguments for option \'--outSAMattrIHstart=*\': \'$VIASH_PAR_OUTSAMATTRIHSTART\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMATTRIHSTART=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outSAMunmapped) - if [ -z "$VIASH_PAR_OUTSAMUNMAPPED" ]; then - VIASH_PAR_OUTSAMUNMAPPED="$2" - else - VIASH_PAR_OUTSAMUNMAPPED="$VIASH_PAR_OUTSAMUNMAPPED;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMunmapped. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMunmapped=*) - if [ -z "$VIASH_PAR_OUTSAMUNMAPPED" ]; then - VIASH_PAR_OUTSAMUNMAPPED=$(ViashRemoveFlags "$1") - else - VIASH_PAR_OUTSAMUNMAPPED="$VIASH_PAR_OUTSAMUNMAPPED;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --outSAMorder) - [ -n "$VIASH_PAR_OUTSAMORDER" ] && ViashError Bad arguments for option \'--outSAMorder\': \'$VIASH_PAR_OUTSAMORDER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMORDER="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMorder. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMorder=*) - [ -n "$VIASH_PAR_OUTSAMORDER" ] && ViashError Bad arguments for option \'--outSAMorder=*\': \'$VIASH_PAR_OUTSAMORDER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMORDER=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outSAMprimaryFlag) - [ -n "$VIASH_PAR_OUTSAMPRIMARYFLAG" ] && ViashError Bad arguments for option \'--outSAMprimaryFlag\': \'$VIASH_PAR_OUTSAMPRIMARYFLAG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMPRIMARYFLAG="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMprimaryFlag. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMprimaryFlag=*) - [ -n "$VIASH_PAR_OUTSAMPRIMARYFLAG" ] && ViashError Bad arguments for option \'--outSAMprimaryFlag=*\': \'$VIASH_PAR_OUTSAMPRIMARYFLAG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMPRIMARYFLAG=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outSAMreadID) - [ -n "$VIASH_PAR_OUTSAMREADID" ] && ViashError Bad arguments for option \'--outSAMreadID\': \'$VIASH_PAR_OUTSAMREADID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMREADID="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMreadID. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMreadID=*) - [ -n "$VIASH_PAR_OUTSAMREADID" ] && ViashError Bad arguments for option \'--outSAMreadID=*\': \'$VIASH_PAR_OUTSAMREADID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMREADID=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outSAMmapqUnique) - [ -n "$VIASH_PAR_OUTSAMMAPQUNIQUE" ] && ViashError Bad arguments for option \'--outSAMmapqUnique\': \'$VIASH_PAR_OUTSAMMAPQUNIQUE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMMAPQUNIQUE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMmapqUnique. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMmapqUnique=*) - [ -n "$VIASH_PAR_OUTSAMMAPQUNIQUE" ] && ViashError Bad arguments for option \'--outSAMmapqUnique=*\': \'$VIASH_PAR_OUTSAMMAPQUNIQUE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMMAPQUNIQUE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outSAMflagOR) - [ -n "$VIASH_PAR_OUTSAMFLAGOR" ] && ViashError Bad arguments for option \'--outSAMflagOR\': \'$VIASH_PAR_OUTSAMFLAGOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMFLAGOR="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMflagOR. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMflagOR=*) - [ -n "$VIASH_PAR_OUTSAMFLAGOR" ] && ViashError Bad arguments for option \'--outSAMflagOR=*\': \'$VIASH_PAR_OUTSAMFLAGOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMFLAGOR=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outSAMflagAND) - [ -n "$VIASH_PAR_OUTSAMFLAGAND" ] && ViashError Bad arguments for option \'--outSAMflagAND\': \'$VIASH_PAR_OUTSAMFLAGAND\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMFLAGAND="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMflagAND. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMflagAND=*) - [ -n "$VIASH_PAR_OUTSAMFLAGAND" ] && ViashError Bad arguments for option \'--outSAMflagAND=*\': \'$VIASH_PAR_OUTSAMFLAGAND\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMFLAGAND=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outSAMattrRGline) - if [ -z "$VIASH_PAR_OUTSAMATTRRGLINE" ]; then - VIASH_PAR_OUTSAMATTRRGLINE="$2" - else - VIASH_PAR_OUTSAMATTRRGLINE="$VIASH_PAR_OUTSAMATTRRGLINE;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMattrRGline. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMattrRGline=*) - if [ -z "$VIASH_PAR_OUTSAMATTRRGLINE" ]; then - VIASH_PAR_OUTSAMATTRRGLINE=$(ViashRemoveFlags "$1") - else - VIASH_PAR_OUTSAMATTRRGLINE="$VIASH_PAR_OUTSAMATTRRGLINE;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --outSAMheaderHD) - if [ -z "$VIASH_PAR_OUTSAMHEADERHD" ]; then - VIASH_PAR_OUTSAMHEADERHD="$2" - else - VIASH_PAR_OUTSAMHEADERHD="$VIASH_PAR_OUTSAMHEADERHD;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMheaderHD. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMheaderHD=*) - if [ -z "$VIASH_PAR_OUTSAMHEADERHD" ]; then - VIASH_PAR_OUTSAMHEADERHD=$(ViashRemoveFlags "$1") - else - VIASH_PAR_OUTSAMHEADERHD="$VIASH_PAR_OUTSAMHEADERHD;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --outSAMheaderPG) - if [ -z "$VIASH_PAR_OUTSAMHEADERPG" ]; then - VIASH_PAR_OUTSAMHEADERPG="$2" - else - VIASH_PAR_OUTSAMHEADERPG="$VIASH_PAR_OUTSAMHEADERPG;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMheaderPG. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMheaderPG=*) - if [ -z "$VIASH_PAR_OUTSAMHEADERPG" ]; then - VIASH_PAR_OUTSAMHEADERPG=$(ViashRemoveFlags "$1") - else - VIASH_PAR_OUTSAMHEADERPG="$VIASH_PAR_OUTSAMHEADERPG;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --outSAMheaderCommentFile) - [ -n "$VIASH_PAR_OUTSAMHEADERCOMMENTFILE" ] && ViashError Bad arguments for option \'--outSAMheaderCommentFile\': \'$VIASH_PAR_OUTSAMHEADERCOMMENTFILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMHEADERCOMMENTFILE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMheaderCommentFile. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMheaderCommentFile=*) - [ -n "$VIASH_PAR_OUTSAMHEADERCOMMENTFILE" ] && ViashError Bad arguments for option \'--outSAMheaderCommentFile=*\': \'$VIASH_PAR_OUTSAMHEADERCOMMENTFILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMHEADERCOMMENTFILE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outSAMfilter) - if [ -z "$VIASH_PAR_OUTSAMFILTER" ]; then - VIASH_PAR_OUTSAMFILTER="$2" - else - VIASH_PAR_OUTSAMFILTER="$VIASH_PAR_OUTSAMFILTER;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMfilter. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMfilter=*) - if [ -z "$VIASH_PAR_OUTSAMFILTER" ]; then - VIASH_PAR_OUTSAMFILTER=$(ViashRemoveFlags "$1") - else - VIASH_PAR_OUTSAMFILTER="$VIASH_PAR_OUTSAMFILTER;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --outSAMmultNmax) - [ -n "$VIASH_PAR_OUTSAMMULTNMAX" ] && ViashError Bad arguments for option \'--outSAMmultNmax\': \'$VIASH_PAR_OUTSAMMULTNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMMULTNMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMmultNmax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMmultNmax=*) - [ -n "$VIASH_PAR_OUTSAMMULTNMAX" ] && ViashError Bad arguments for option \'--outSAMmultNmax=*\': \'$VIASH_PAR_OUTSAMMULTNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMMULTNMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outSAMtlen) - [ -n "$VIASH_PAR_OUTSAMTLEN" ] && ViashError Bad arguments for option \'--outSAMtlen\': \'$VIASH_PAR_OUTSAMTLEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMTLEN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMtlen. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMtlen=*) - [ -n "$VIASH_PAR_OUTSAMTLEN" ] && ViashError Bad arguments for option \'--outSAMtlen=*\': \'$VIASH_PAR_OUTSAMTLEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMTLEN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outBAMcompression) - [ -n "$VIASH_PAR_OUTBAMCOMPRESSION" ] && ViashError Bad arguments for option \'--outBAMcompression\': \'$VIASH_PAR_OUTBAMCOMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTBAMCOMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outBAMcompression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outBAMcompression=*) - [ -n "$VIASH_PAR_OUTBAMCOMPRESSION" ] && ViashError Bad arguments for option \'--outBAMcompression=*\': \'$VIASH_PAR_OUTBAMCOMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTBAMCOMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outBAMsortingThreadN) - [ -n "$VIASH_PAR_OUTBAMSORTINGTHREADN" ] && ViashError Bad arguments for option \'--outBAMsortingThreadN\': \'$VIASH_PAR_OUTBAMSORTINGTHREADN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTBAMSORTINGTHREADN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outBAMsortingThreadN. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outBAMsortingThreadN=*) - [ -n "$VIASH_PAR_OUTBAMSORTINGTHREADN" ] && ViashError Bad arguments for option \'--outBAMsortingThreadN=*\': \'$VIASH_PAR_OUTBAMSORTINGTHREADN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTBAMSORTINGTHREADN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outBAMsortingBinsN) - [ -n "$VIASH_PAR_OUTBAMSORTINGBINSN" ] && ViashError Bad arguments for option \'--outBAMsortingBinsN\': \'$VIASH_PAR_OUTBAMSORTINGBINSN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTBAMSORTINGBINSN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outBAMsortingBinsN. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outBAMsortingBinsN=*) - [ -n "$VIASH_PAR_OUTBAMSORTINGBINSN" ] && ViashError Bad arguments for option \'--outBAMsortingBinsN=*\': \'$VIASH_PAR_OUTBAMSORTINGBINSN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTBAMSORTINGBINSN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --bamRemoveDuplicatesType) - [ -n "$VIASH_PAR_BAMREMOVEDUPLICATESTYPE" ] && ViashError Bad arguments for option \'--bamRemoveDuplicatesType\': \'$VIASH_PAR_BAMREMOVEDUPLICATESTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BAMREMOVEDUPLICATESTYPE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --bamRemoveDuplicatesType. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --bamRemoveDuplicatesType=*) - [ -n "$VIASH_PAR_BAMREMOVEDUPLICATESTYPE" ] && ViashError Bad arguments for option \'--bamRemoveDuplicatesType=*\': \'$VIASH_PAR_BAMREMOVEDUPLICATESTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BAMREMOVEDUPLICATESTYPE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --bamRemoveDuplicatesMate2basesN) - [ -n "$VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN" ] && ViashError Bad arguments for option \'--bamRemoveDuplicatesMate2basesN\': \'$VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --bamRemoveDuplicatesMate2basesN. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --bamRemoveDuplicatesMate2basesN=*) - [ -n "$VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN" ] && ViashError Bad arguments for option \'--bamRemoveDuplicatesMate2basesN=*\': \'$VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outWigType) - if [ -z "$VIASH_PAR_OUTWIGTYPE" ]; then - VIASH_PAR_OUTWIGTYPE="$2" - else - VIASH_PAR_OUTWIGTYPE="$VIASH_PAR_OUTWIGTYPE;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outWigType. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outWigType=*) - if [ -z "$VIASH_PAR_OUTWIGTYPE" ]; then - VIASH_PAR_OUTWIGTYPE=$(ViashRemoveFlags "$1") - else - VIASH_PAR_OUTWIGTYPE="$VIASH_PAR_OUTWIGTYPE;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --outWigStrand) - [ -n "$VIASH_PAR_OUTWIGSTRAND" ] && ViashError Bad arguments for option \'--outWigStrand\': \'$VIASH_PAR_OUTWIGSTRAND\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTWIGSTRAND="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outWigStrand. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outWigStrand=*) - [ -n "$VIASH_PAR_OUTWIGSTRAND" ] && ViashError Bad arguments for option \'--outWigStrand=*\': \'$VIASH_PAR_OUTWIGSTRAND\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTWIGSTRAND=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outWigReferencesPrefix) - [ -n "$VIASH_PAR_OUTWIGREFERENCESPREFIX" ] && ViashError Bad arguments for option \'--outWigReferencesPrefix\': \'$VIASH_PAR_OUTWIGREFERENCESPREFIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTWIGREFERENCESPREFIX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outWigReferencesPrefix. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outWigReferencesPrefix=*) - [ -n "$VIASH_PAR_OUTWIGREFERENCESPREFIX" ] && ViashError Bad arguments for option \'--outWigReferencesPrefix=*\': \'$VIASH_PAR_OUTWIGREFERENCESPREFIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTWIGREFERENCESPREFIX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outWigNorm) - [ -n "$VIASH_PAR_OUTWIGNORM" ] && ViashError Bad arguments for option \'--outWigNorm\': \'$VIASH_PAR_OUTWIGNORM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTWIGNORM="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outWigNorm. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outWigNorm=*) - [ -n "$VIASH_PAR_OUTWIGNORM" ] && ViashError Bad arguments for option \'--outWigNorm=*\': \'$VIASH_PAR_OUTWIGNORM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTWIGNORM=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outFilterType) - [ -n "$VIASH_PAR_OUTFILTERTYPE" ] && ViashError Bad arguments for option \'--outFilterType\': \'$VIASH_PAR_OUTFILTERTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERTYPE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterType. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outFilterType=*) - [ -n "$VIASH_PAR_OUTFILTERTYPE" ] && ViashError Bad arguments for option \'--outFilterType=*\': \'$VIASH_PAR_OUTFILTERTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERTYPE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outFilterMultimapScoreRange) - [ -n "$VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE" ] && ViashError Bad arguments for option \'--outFilterMultimapScoreRange\': \'$VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterMultimapScoreRange. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outFilterMultimapScoreRange=*) - [ -n "$VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE" ] && ViashError Bad arguments for option \'--outFilterMultimapScoreRange=*\': \'$VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outFilterMultimapNmax) - [ -n "$VIASH_PAR_OUTFILTERMULTIMAPNMAX" ] && ViashError Bad arguments for option \'--outFilterMultimapNmax\': \'$VIASH_PAR_OUTFILTERMULTIMAPNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERMULTIMAPNMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterMultimapNmax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outFilterMultimapNmax=*) - [ -n "$VIASH_PAR_OUTFILTERMULTIMAPNMAX" ] && ViashError Bad arguments for option \'--outFilterMultimapNmax=*\': \'$VIASH_PAR_OUTFILTERMULTIMAPNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERMULTIMAPNMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outFilterMismatchNmax) - [ -n "$VIASH_PAR_OUTFILTERMISMATCHNMAX" ] && ViashError Bad arguments for option \'--outFilterMismatchNmax\': \'$VIASH_PAR_OUTFILTERMISMATCHNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERMISMATCHNMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterMismatchNmax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outFilterMismatchNmax=*) - [ -n "$VIASH_PAR_OUTFILTERMISMATCHNMAX" ] && ViashError Bad arguments for option \'--outFilterMismatchNmax=*\': \'$VIASH_PAR_OUTFILTERMISMATCHNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERMISMATCHNMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outFilterMismatchNoverLmax) - [ -n "$VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX" ] && ViashError Bad arguments for option \'--outFilterMismatchNoverLmax\': \'$VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterMismatchNoverLmax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outFilterMismatchNoverLmax=*) - [ -n "$VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX" ] && ViashError Bad arguments for option \'--outFilterMismatchNoverLmax=*\': \'$VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outFilterMismatchNoverReadLmax) - [ -n "$VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX" ] && ViashError Bad arguments for option \'--outFilterMismatchNoverReadLmax\': \'$VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterMismatchNoverReadLmax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outFilterMismatchNoverReadLmax=*) - [ -n "$VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX" ] && ViashError Bad arguments for option \'--outFilterMismatchNoverReadLmax=*\': \'$VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outFilterScoreMin) - [ -n "$VIASH_PAR_OUTFILTERSCOREMIN" ] && ViashError Bad arguments for option \'--outFilterScoreMin\': \'$VIASH_PAR_OUTFILTERSCOREMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERSCOREMIN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterScoreMin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outFilterScoreMin=*) - [ -n "$VIASH_PAR_OUTFILTERSCOREMIN" ] && ViashError Bad arguments for option \'--outFilterScoreMin=*\': \'$VIASH_PAR_OUTFILTERSCOREMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERSCOREMIN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outFilterScoreMinOverLread) - [ -n "$VIASH_PAR_OUTFILTERSCOREMINOVERLREAD" ] && ViashError Bad arguments for option \'--outFilterScoreMinOverLread\': \'$VIASH_PAR_OUTFILTERSCOREMINOVERLREAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERSCOREMINOVERLREAD="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterScoreMinOverLread. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outFilterScoreMinOverLread=*) - [ -n "$VIASH_PAR_OUTFILTERSCOREMINOVERLREAD" ] && ViashError Bad arguments for option \'--outFilterScoreMinOverLread=*\': \'$VIASH_PAR_OUTFILTERSCOREMINOVERLREAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERSCOREMINOVERLREAD=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outFilterMatchNmin) - [ -n "$VIASH_PAR_OUTFILTERMATCHNMIN" ] && ViashError Bad arguments for option \'--outFilterMatchNmin\': \'$VIASH_PAR_OUTFILTERMATCHNMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERMATCHNMIN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterMatchNmin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outFilterMatchNmin=*) - [ -n "$VIASH_PAR_OUTFILTERMATCHNMIN" ] && ViashError Bad arguments for option \'--outFilterMatchNmin=*\': \'$VIASH_PAR_OUTFILTERMATCHNMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERMATCHNMIN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outFilterMatchNminOverLread) - [ -n "$VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD" ] && ViashError Bad arguments for option \'--outFilterMatchNminOverLread\': \'$VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterMatchNminOverLread. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outFilterMatchNminOverLread=*) - [ -n "$VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD" ] && ViashError Bad arguments for option \'--outFilterMatchNminOverLread=*\': \'$VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outFilterIntronMotifs) - [ -n "$VIASH_PAR_OUTFILTERINTRONMOTIFS" ] && ViashError Bad arguments for option \'--outFilterIntronMotifs\': \'$VIASH_PAR_OUTFILTERINTRONMOTIFS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERINTRONMOTIFS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterIntronMotifs. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outFilterIntronMotifs=*) - [ -n "$VIASH_PAR_OUTFILTERINTRONMOTIFS" ] && ViashError Bad arguments for option \'--outFilterIntronMotifs=*\': \'$VIASH_PAR_OUTFILTERINTRONMOTIFS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERINTRONMOTIFS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outFilterIntronStrands) - [ -n "$VIASH_PAR_OUTFILTERINTRONSTRANDS" ] && ViashError Bad arguments for option \'--outFilterIntronStrands\': \'$VIASH_PAR_OUTFILTERINTRONSTRANDS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERINTRONSTRANDS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterIntronStrands. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outFilterIntronStrands=*) - [ -n "$VIASH_PAR_OUTFILTERINTRONSTRANDS" ] && ViashError Bad arguments for option \'--outFilterIntronStrands=*\': \'$VIASH_PAR_OUTFILTERINTRONSTRANDS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERINTRONSTRANDS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outSJtype) - [ -n "$VIASH_PAR_OUTSJTYPE" ] && ViashError Bad arguments for option \'--outSJtype\': \'$VIASH_PAR_OUTSJTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSJTYPE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSJtype. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSJtype=*) - [ -n "$VIASH_PAR_OUTSJTYPE" ] && ViashError Bad arguments for option \'--outSJtype=*\': \'$VIASH_PAR_OUTSJTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSJTYPE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outSJfilterReads) - [ -n "$VIASH_PAR_OUTSJFILTERREADS" ] && ViashError Bad arguments for option \'--outSJfilterReads\': \'$VIASH_PAR_OUTSJFILTERREADS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSJFILTERREADS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSJfilterReads. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSJfilterReads=*) - [ -n "$VIASH_PAR_OUTSJFILTERREADS" ] && ViashError Bad arguments for option \'--outSJfilterReads=*\': \'$VIASH_PAR_OUTSJFILTERREADS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSJFILTERREADS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outSJfilterOverhangMin) - if [ -z "$VIASH_PAR_OUTSJFILTEROVERHANGMIN" ]; then - VIASH_PAR_OUTSJFILTEROVERHANGMIN="$2" - else - VIASH_PAR_OUTSJFILTEROVERHANGMIN="$VIASH_PAR_OUTSJFILTEROVERHANGMIN;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSJfilterOverhangMin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSJfilterOverhangMin=*) - if [ -z "$VIASH_PAR_OUTSJFILTEROVERHANGMIN" ]; then - VIASH_PAR_OUTSJFILTEROVERHANGMIN=$(ViashRemoveFlags "$1") - else - VIASH_PAR_OUTSJFILTEROVERHANGMIN="$VIASH_PAR_OUTSJFILTEROVERHANGMIN;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --outSJfilterCountUniqueMin) - if [ -z "$VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN" ]; then - VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN="$2" - else - VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN="$VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSJfilterCountUniqueMin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSJfilterCountUniqueMin=*) - if [ -z "$VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN" ]; then - VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN=$(ViashRemoveFlags "$1") - else - VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN="$VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --outSJfilterCountTotalMin) - if [ -z "$VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN" ]; then - VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN="$2" - else - VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN="$VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSJfilterCountTotalMin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSJfilterCountTotalMin=*) - if [ -z "$VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN" ]; then - VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN=$(ViashRemoveFlags "$1") - else - VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN="$VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --outSJfilterDistToOtherSJmin) - if [ -z "$VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN" ]; then - VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN="$2" - else - VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN="$VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSJfilterDistToOtherSJmin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSJfilterDistToOtherSJmin=*) - if [ -z "$VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN" ]; then - VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN=$(ViashRemoveFlags "$1") - else - VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN="$VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --outSJfilterIntronMaxVsReadN) - if [ -z "$VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN" ]; then - VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN="$2" - else - VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN="$VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSJfilterIntronMaxVsReadN. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSJfilterIntronMaxVsReadN=*) - if [ -z "$VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN" ]; then - VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN=$(ViashRemoveFlags "$1") - else - VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN="$VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --scoreGap) - [ -n "$VIASH_PAR_SCOREGAP" ] && ViashError Bad arguments for option \'--scoreGap\': \'$VIASH_PAR_SCOREGAP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCOREGAP="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreGap. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --scoreGap=*) - [ -n "$VIASH_PAR_SCOREGAP" ] && ViashError Bad arguments for option \'--scoreGap=*\': \'$VIASH_PAR_SCOREGAP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCOREGAP=$(ViashRemoveFlags "$1") - shift 1 - ;; - --scoreGapNoncan) - [ -n "$VIASH_PAR_SCOREGAPNONCAN" ] && ViashError Bad arguments for option \'--scoreGapNoncan\': \'$VIASH_PAR_SCOREGAPNONCAN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCOREGAPNONCAN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreGapNoncan. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --scoreGapNoncan=*) - [ -n "$VIASH_PAR_SCOREGAPNONCAN" ] && ViashError Bad arguments for option \'--scoreGapNoncan=*\': \'$VIASH_PAR_SCOREGAPNONCAN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCOREGAPNONCAN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --scoreGapGCAG) - [ -n "$VIASH_PAR_SCOREGAPGCAG" ] && ViashError Bad arguments for option \'--scoreGapGCAG\': \'$VIASH_PAR_SCOREGAPGCAG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCOREGAPGCAG="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreGapGCAG. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --scoreGapGCAG=*) - [ -n "$VIASH_PAR_SCOREGAPGCAG" ] && ViashError Bad arguments for option \'--scoreGapGCAG=*\': \'$VIASH_PAR_SCOREGAPGCAG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCOREGAPGCAG=$(ViashRemoveFlags "$1") - shift 1 - ;; - --scoreGapATAC) - [ -n "$VIASH_PAR_SCOREGAPATAC" ] && ViashError Bad arguments for option \'--scoreGapATAC\': \'$VIASH_PAR_SCOREGAPATAC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCOREGAPATAC="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreGapATAC. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --scoreGapATAC=*) - [ -n "$VIASH_PAR_SCOREGAPATAC" ] && ViashError Bad arguments for option \'--scoreGapATAC=*\': \'$VIASH_PAR_SCOREGAPATAC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCOREGAPATAC=$(ViashRemoveFlags "$1") - shift 1 - ;; - --scoreGenomicLengthLog2scale) - [ -n "$VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE" ] && ViashError Bad arguments for option \'--scoreGenomicLengthLog2scale\': \'$VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreGenomicLengthLog2scale. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --scoreGenomicLengthLog2scale=*) - [ -n "$VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE" ] && ViashError Bad arguments for option \'--scoreGenomicLengthLog2scale=*\': \'$VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --scoreDelOpen) - [ -n "$VIASH_PAR_SCOREDELOPEN" ] && ViashError Bad arguments for option \'--scoreDelOpen\': \'$VIASH_PAR_SCOREDELOPEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCOREDELOPEN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreDelOpen. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --scoreDelOpen=*) - [ -n "$VIASH_PAR_SCOREDELOPEN" ] && ViashError Bad arguments for option \'--scoreDelOpen=*\': \'$VIASH_PAR_SCOREDELOPEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCOREDELOPEN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --scoreDelBase) - [ -n "$VIASH_PAR_SCOREDELBASE" ] && ViashError Bad arguments for option \'--scoreDelBase\': \'$VIASH_PAR_SCOREDELBASE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCOREDELBASE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreDelBase. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --scoreDelBase=*) - [ -n "$VIASH_PAR_SCOREDELBASE" ] && ViashError Bad arguments for option \'--scoreDelBase=*\': \'$VIASH_PAR_SCOREDELBASE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCOREDELBASE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --scoreInsOpen) - [ -n "$VIASH_PAR_SCOREINSOPEN" ] && ViashError Bad arguments for option \'--scoreInsOpen\': \'$VIASH_PAR_SCOREINSOPEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCOREINSOPEN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreInsOpen. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --scoreInsOpen=*) - [ -n "$VIASH_PAR_SCOREINSOPEN" ] && ViashError Bad arguments for option \'--scoreInsOpen=*\': \'$VIASH_PAR_SCOREINSOPEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCOREINSOPEN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --scoreInsBase) - [ -n "$VIASH_PAR_SCOREINSBASE" ] && ViashError Bad arguments for option \'--scoreInsBase\': \'$VIASH_PAR_SCOREINSBASE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCOREINSBASE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreInsBase. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --scoreInsBase=*) - [ -n "$VIASH_PAR_SCOREINSBASE" ] && ViashError Bad arguments for option \'--scoreInsBase=*\': \'$VIASH_PAR_SCOREINSBASE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCOREINSBASE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --scoreStitchSJshift) - [ -n "$VIASH_PAR_SCORESTITCHSJSHIFT" ] && ViashError Bad arguments for option \'--scoreStitchSJshift\': \'$VIASH_PAR_SCORESTITCHSJSHIFT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCORESTITCHSJSHIFT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreStitchSJshift. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --scoreStitchSJshift=*) - [ -n "$VIASH_PAR_SCORESTITCHSJSHIFT" ] && ViashError Bad arguments for option \'--scoreStitchSJshift=*\': \'$VIASH_PAR_SCORESTITCHSJSHIFT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCORESTITCHSJSHIFT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --seedSearchStartLmax) - [ -n "$VIASH_PAR_SEEDSEARCHSTARTLMAX" ] && ViashError Bad arguments for option \'--seedSearchStartLmax\': \'$VIASH_PAR_SEEDSEARCHSTARTLMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEEDSEARCHSTARTLMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --seedSearchStartLmax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --seedSearchStartLmax=*) - [ -n "$VIASH_PAR_SEEDSEARCHSTARTLMAX" ] && ViashError Bad arguments for option \'--seedSearchStartLmax=*\': \'$VIASH_PAR_SEEDSEARCHSTARTLMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEEDSEARCHSTARTLMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --seedSearchStartLmaxOverLread) - [ -n "$VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD" ] && ViashError Bad arguments for option \'--seedSearchStartLmaxOverLread\': \'$VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --seedSearchStartLmaxOverLread. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --seedSearchStartLmaxOverLread=*) - [ -n "$VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD" ] && ViashError Bad arguments for option \'--seedSearchStartLmaxOverLread=*\': \'$VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD=$(ViashRemoveFlags "$1") - shift 1 - ;; - --seedSearchLmax) - [ -n "$VIASH_PAR_SEEDSEARCHLMAX" ] && ViashError Bad arguments for option \'--seedSearchLmax\': \'$VIASH_PAR_SEEDSEARCHLMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEEDSEARCHLMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --seedSearchLmax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --seedSearchLmax=*) - [ -n "$VIASH_PAR_SEEDSEARCHLMAX" ] && ViashError Bad arguments for option \'--seedSearchLmax=*\': \'$VIASH_PAR_SEEDSEARCHLMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEEDSEARCHLMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --seedMultimapNmax) - [ -n "$VIASH_PAR_SEEDMULTIMAPNMAX" ] && ViashError Bad arguments for option \'--seedMultimapNmax\': \'$VIASH_PAR_SEEDMULTIMAPNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEEDMULTIMAPNMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --seedMultimapNmax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --seedMultimapNmax=*) - [ -n "$VIASH_PAR_SEEDMULTIMAPNMAX" ] && ViashError Bad arguments for option \'--seedMultimapNmax=*\': \'$VIASH_PAR_SEEDMULTIMAPNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEEDMULTIMAPNMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --seedPerReadNmax) - [ -n "$VIASH_PAR_SEEDPERREADNMAX" ] && ViashError Bad arguments for option \'--seedPerReadNmax\': \'$VIASH_PAR_SEEDPERREADNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEEDPERREADNMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --seedPerReadNmax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --seedPerReadNmax=*) - [ -n "$VIASH_PAR_SEEDPERREADNMAX" ] && ViashError Bad arguments for option \'--seedPerReadNmax=*\': \'$VIASH_PAR_SEEDPERREADNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEEDPERREADNMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --seedPerWindowNmax) - [ -n "$VIASH_PAR_SEEDPERWINDOWNMAX" ] && ViashError Bad arguments for option \'--seedPerWindowNmax\': \'$VIASH_PAR_SEEDPERWINDOWNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEEDPERWINDOWNMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --seedPerWindowNmax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --seedPerWindowNmax=*) - [ -n "$VIASH_PAR_SEEDPERWINDOWNMAX" ] && ViashError Bad arguments for option \'--seedPerWindowNmax=*\': \'$VIASH_PAR_SEEDPERWINDOWNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEEDPERWINDOWNMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --seedNoneLociPerWindow) - [ -n "$VIASH_PAR_SEEDNONELOCIPERWINDOW" ] && ViashError Bad arguments for option \'--seedNoneLociPerWindow\': \'$VIASH_PAR_SEEDNONELOCIPERWINDOW\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEEDNONELOCIPERWINDOW="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --seedNoneLociPerWindow. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --seedNoneLociPerWindow=*) - [ -n "$VIASH_PAR_SEEDNONELOCIPERWINDOW" ] && ViashError Bad arguments for option \'--seedNoneLociPerWindow=*\': \'$VIASH_PAR_SEEDNONELOCIPERWINDOW\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEEDNONELOCIPERWINDOW=$(ViashRemoveFlags "$1") - shift 1 - ;; - --seedSplitMin) - [ -n "$VIASH_PAR_SEEDSPLITMIN" ] && ViashError Bad arguments for option \'--seedSplitMin\': \'$VIASH_PAR_SEEDSPLITMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEEDSPLITMIN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --seedSplitMin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --seedSplitMin=*) - [ -n "$VIASH_PAR_SEEDSPLITMIN" ] && ViashError Bad arguments for option \'--seedSplitMin=*\': \'$VIASH_PAR_SEEDSPLITMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEEDSPLITMIN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --seedMapMin) - [ -n "$VIASH_PAR_SEEDMAPMIN" ] && ViashError Bad arguments for option \'--seedMapMin\': \'$VIASH_PAR_SEEDMAPMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEEDMAPMIN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --seedMapMin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --seedMapMin=*) - [ -n "$VIASH_PAR_SEEDMAPMIN" ] && ViashError Bad arguments for option \'--seedMapMin=*\': \'$VIASH_PAR_SEEDMAPMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEEDMAPMIN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --alignIntronMin) - [ -n "$VIASH_PAR_ALIGNINTRONMIN" ] && ViashError Bad arguments for option \'--alignIntronMin\': \'$VIASH_PAR_ALIGNINTRONMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNINTRONMIN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignIntronMin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --alignIntronMin=*) - [ -n "$VIASH_PAR_ALIGNINTRONMIN" ] && ViashError Bad arguments for option \'--alignIntronMin=*\': \'$VIASH_PAR_ALIGNINTRONMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNINTRONMIN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --alignIntronMax) - [ -n "$VIASH_PAR_ALIGNINTRONMAX" ] && ViashError Bad arguments for option \'--alignIntronMax\': \'$VIASH_PAR_ALIGNINTRONMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNINTRONMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignIntronMax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --alignIntronMax=*) - [ -n "$VIASH_PAR_ALIGNINTRONMAX" ] && ViashError Bad arguments for option \'--alignIntronMax=*\': \'$VIASH_PAR_ALIGNINTRONMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNINTRONMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --alignMatesGapMax) - [ -n "$VIASH_PAR_ALIGNMATESGAPMAX" ] && ViashError Bad arguments for option \'--alignMatesGapMax\': \'$VIASH_PAR_ALIGNMATESGAPMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNMATESGAPMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignMatesGapMax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --alignMatesGapMax=*) - [ -n "$VIASH_PAR_ALIGNMATESGAPMAX" ] && ViashError Bad arguments for option \'--alignMatesGapMax=*\': \'$VIASH_PAR_ALIGNMATESGAPMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNMATESGAPMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --alignSJoverhangMin) - [ -n "$VIASH_PAR_ALIGNSJOVERHANGMIN" ] && ViashError Bad arguments for option \'--alignSJoverhangMin\': \'$VIASH_PAR_ALIGNSJOVERHANGMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNSJOVERHANGMIN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignSJoverhangMin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --alignSJoverhangMin=*) - [ -n "$VIASH_PAR_ALIGNSJOVERHANGMIN" ] && ViashError Bad arguments for option \'--alignSJoverhangMin=*\': \'$VIASH_PAR_ALIGNSJOVERHANGMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNSJOVERHANGMIN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --alignSJstitchMismatchNmax) - if [ -z "$VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX" ]; then - VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX="$2" - else - VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX="$VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignSJstitchMismatchNmax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --alignSJstitchMismatchNmax=*) - if [ -z "$VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX" ]; then - VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX=$(ViashRemoveFlags "$1") - else - VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX="$VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --alignSJDBoverhangMin) - [ -n "$VIASH_PAR_ALIGNSJDBOVERHANGMIN" ] && ViashError Bad arguments for option \'--alignSJDBoverhangMin\': \'$VIASH_PAR_ALIGNSJDBOVERHANGMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNSJDBOVERHANGMIN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignSJDBoverhangMin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --alignSJDBoverhangMin=*) - [ -n "$VIASH_PAR_ALIGNSJDBOVERHANGMIN" ] && ViashError Bad arguments for option \'--alignSJDBoverhangMin=*\': \'$VIASH_PAR_ALIGNSJDBOVERHANGMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNSJDBOVERHANGMIN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --alignSplicedMateMapLmin) - [ -n "$VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN" ] && ViashError Bad arguments for option \'--alignSplicedMateMapLmin\': \'$VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignSplicedMateMapLmin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --alignSplicedMateMapLmin=*) - [ -n "$VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN" ] && ViashError Bad arguments for option \'--alignSplicedMateMapLmin=*\': \'$VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --alignSplicedMateMapLminOverLmate) - [ -n "$VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE" ] && ViashError Bad arguments for option \'--alignSplicedMateMapLminOverLmate\': \'$VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignSplicedMateMapLminOverLmate. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --alignSplicedMateMapLminOverLmate=*) - [ -n "$VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE" ] && ViashError Bad arguments for option \'--alignSplicedMateMapLminOverLmate=*\': \'$VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --alignWindowsPerReadNmax) - [ -n "$VIASH_PAR_ALIGNWINDOWSPERREADNMAX" ] && ViashError Bad arguments for option \'--alignWindowsPerReadNmax\': \'$VIASH_PAR_ALIGNWINDOWSPERREADNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNWINDOWSPERREADNMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignWindowsPerReadNmax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --alignWindowsPerReadNmax=*) - [ -n "$VIASH_PAR_ALIGNWINDOWSPERREADNMAX" ] && ViashError Bad arguments for option \'--alignWindowsPerReadNmax=*\': \'$VIASH_PAR_ALIGNWINDOWSPERREADNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNWINDOWSPERREADNMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --alignTranscriptsPerWindowNmax) - [ -n "$VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX" ] && ViashError Bad arguments for option \'--alignTranscriptsPerWindowNmax\': \'$VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignTranscriptsPerWindowNmax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --alignTranscriptsPerWindowNmax=*) - [ -n "$VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX" ] && ViashError Bad arguments for option \'--alignTranscriptsPerWindowNmax=*\': \'$VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --alignTranscriptsPerReadNmax) - [ -n "$VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX" ] && ViashError Bad arguments for option \'--alignTranscriptsPerReadNmax\': \'$VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignTranscriptsPerReadNmax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --alignTranscriptsPerReadNmax=*) - [ -n "$VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX" ] && ViashError Bad arguments for option \'--alignTranscriptsPerReadNmax=*\': \'$VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --alignEndsType) - [ -n "$VIASH_PAR_ALIGNENDSTYPE" ] && ViashError Bad arguments for option \'--alignEndsType\': \'$VIASH_PAR_ALIGNENDSTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNENDSTYPE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignEndsType. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --alignEndsType=*) - [ -n "$VIASH_PAR_ALIGNENDSTYPE" ] && ViashError Bad arguments for option \'--alignEndsType=*\': \'$VIASH_PAR_ALIGNENDSTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNENDSTYPE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --alignEndsProtrude) - [ -n "$VIASH_PAR_ALIGNENDSPROTRUDE" ] && ViashError Bad arguments for option \'--alignEndsProtrude\': \'$VIASH_PAR_ALIGNENDSPROTRUDE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNENDSPROTRUDE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignEndsProtrude. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --alignEndsProtrude=*) - [ -n "$VIASH_PAR_ALIGNENDSPROTRUDE" ] && ViashError Bad arguments for option \'--alignEndsProtrude=*\': \'$VIASH_PAR_ALIGNENDSPROTRUDE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNENDSPROTRUDE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --alignSoftClipAtReferenceEnds) - [ -n "$VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS" ] && ViashError Bad arguments for option \'--alignSoftClipAtReferenceEnds\': \'$VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignSoftClipAtReferenceEnds. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --alignSoftClipAtReferenceEnds=*) - [ -n "$VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS" ] && ViashError Bad arguments for option \'--alignSoftClipAtReferenceEnds=*\': \'$VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --alignInsertionFlush) - [ -n "$VIASH_PAR_ALIGNINSERTIONFLUSH" ] && ViashError Bad arguments for option \'--alignInsertionFlush\': \'$VIASH_PAR_ALIGNINSERTIONFLUSH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNINSERTIONFLUSH="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignInsertionFlush. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --alignInsertionFlush=*) - [ -n "$VIASH_PAR_ALIGNINSERTIONFLUSH" ] && ViashError Bad arguments for option \'--alignInsertionFlush=*\': \'$VIASH_PAR_ALIGNINSERTIONFLUSH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNINSERTIONFLUSH=$(ViashRemoveFlags "$1") - shift 1 - ;; - --peOverlapNbasesMin) - [ -n "$VIASH_PAR_PEOVERLAPNBASESMIN" ] && ViashError Bad arguments for option \'--peOverlapNbasesMin\': \'$VIASH_PAR_PEOVERLAPNBASESMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_PEOVERLAPNBASESMIN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --peOverlapNbasesMin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --peOverlapNbasesMin=*) - [ -n "$VIASH_PAR_PEOVERLAPNBASESMIN" ] && ViashError Bad arguments for option \'--peOverlapNbasesMin=*\': \'$VIASH_PAR_PEOVERLAPNBASESMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_PEOVERLAPNBASESMIN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --peOverlapMMp) - [ -n "$VIASH_PAR_PEOVERLAPMMP" ] && ViashError Bad arguments for option \'--peOverlapMMp\': \'$VIASH_PAR_PEOVERLAPMMP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_PEOVERLAPMMP="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --peOverlapMMp. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --peOverlapMMp=*) - [ -n "$VIASH_PAR_PEOVERLAPMMP" ] && ViashError Bad arguments for option \'--peOverlapMMp=*\': \'$VIASH_PAR_PEOVERLAPMMP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_PEOVERLAPMMP=$(ViashRemoveFlags "$1") - shift 1 - ;; - --winAnchorMultimapNmax) - [ -n "$VIASH_PAR_WINANCHORMULTIMAPNMAX" ] && ViashError Bad arguments for option \'--winAnchorMultimapNmax\': \'$VIASH_PAR_WINANCHORMULTIMAPNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_WINANCHORMULTIMAPNMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --winAnchorMultimapNmax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --winAnchorMultimapNmax=*) - [ -n "$VIASH_PAR_WINANCHORMULTIMAPNMAX" ] && ViashError Bad arguments for option \'--winAnchorMultimapNmax=*\': \'$VIASH_PAR_WINANCHORMULTIMAPNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_WINANCHORMULTIMAPNMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --winBinNbits) - [ -n "$VIASH_PAR_WINBINNBITS" ] && ViashError Bad arguments for option \'--winBinNbits\': \'$VIASH_PAR_WINBINNBITS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_WINBINNBITS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --winBinNbits. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --winBinNbits=*) - [ -n "$VIASH_PAR_WINBINNBITS" ] && ViashError Bad arguments for option \'--winBinNbits=*\': \'$VIASH_PAR_WINBINNBITS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_WINBINNBITS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --winAnchorDistNbins) - [ -n "$VIASH_PAR_WINANCHORDISTNBINS" ] && ViashError Bad arguments for option \'--winAnchorDistNbins\': \'$VIASH_PAR_WINANCHORDISTNBINS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_WINANCHORDISTNBINS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --winAnchorDistNbins. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --winAnchorDistNbins=*) - [ -n "$VIASH_PAR_WINANCHORDISTNBINS" ] && ViashError Bad arguments for option \'--winAnchorDistNbins=*\': \'$VIASH_PAR_WINANCHORDISTNBINS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_WINANCHORDISTNBINS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --winFlankNbins) - [ -n "$VIASH_PAR_WINFLANKNBINS" ] && ViashError Bad arguments for option \'--winFlankNbins\': \'$VIASH_PAR_WINFLANKNBINS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_WINFLANKNBINS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --winFlankNbins. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --winFlankNbins=*) - [ -n "$VIASH_PAR_WINFLANKNBINS" ] && ViashError Bad arguments for option \'--winFlankNbins=*\': \'$VIASH_PAR_WINFLANKNBINS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_WINFLANKNBINS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --winReadCoverageRelativeMin) - [ -n "$VIASH_PAR_WINREADCOVERAGERELATIVEMIN" ] && ViashError Bad arguments for option \'--winReadCoverageRelativeMin\': \'$VIASH_PAR_WINREADCOVERAGERELATIVEMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_WINREADCOVERAGERELATIVEMIN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --winReadCoverageRelativeMin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --winReadCoverageRelativeMin=*) - [ -n "$VIASH_PAR_WINREADCOVERAGERELATIVEMIN" ] && ViashError Bad arguments for option \'--winReadCoverageRelativeMin=*\': \'$VIASH_PAR_WINREADCOVERAGERELATIVEMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_WINREADCOVERAGERELATIVEMIN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --winReadCoverageBasesMin) - [ -n "$VIASH_PAR_WINREADCOVERAGEBASESMIN" ] && ViashError Bad arguments for option \'--winReadCoverageBasesMin\': \'$VIASH_PAR_WINREADCOVERAGEBASESMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_WINREADCOVERAGEBASESMIN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --winReadCoverageBasesMin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --winReadCoverageBasesMin=*) - [ -n "$VIASH_PAR_WINREADCOVERAGEBASESMIN" ] && ViashError Bad arguments for option \'--winReadCoverageBasesMin=*\': \'$VIASH_PAR_WINREADCOVERAGEBASESMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_WINREADCOVERAGEBASESMIN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --chimOutType) - if [ -z "$VIASH_PAR_CHIMOUTTYPE" ]; then - VIASH_PAR_CHIMOUTTYPE="$2" - else - VIASH_PAR_CHIMOUTTYPE="$VIASH_PAR_CHIMOUTTYPE;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimOutType. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --chimOutType=*) - if [ -z "$VIASH_PAR_CHIMOUTTYPE" ]; then - VIASH_PAR_CHIMOUTTYPE=$(ViashRemoveFlags "$1") - else - VIASH_PAR_CHIMOUTTYPE="$VIASH_PAR_CHIMOUTTYPE;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --chimSegmentMin) - [ -n "$VIASH_PAR_CHIMSEGMENTMIN" ] && ViashError Bad arguments for option \'--chimSegmentMin\': \'$VIASH_PAR_CHIMSEGMENTMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMSEGMENTMIN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimSegmentMin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --chimSegmentMin=*) - [ -n "$VIASH_PAR_CHIMSEGMENTMIN" ] && ViashError Bad arguments for option \'--chimSegmentMin=*\': \'$VIASH_PAR_CHIMSEGMENTMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMSEGMENTMIN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --chimScoreMin) - [ -n "$VIASH_PAR_CHIMSCOREMIN" ] && ViashError Bad arguments for option \'--chimScoreMin\': \'$VIASH_PAR_CHIMSCOREMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMSCOREMIN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimScoreMin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --chimScoreMin=*) - [ -n "$VIASH_PAR_CHIMSCOREMIN" ] && ViashError Bad arguments for option \'--chimScoreMin=*\': \'$VIASH_PAR_CHIMSCOREMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMSCOREMIN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --chimScoreDropMax) - [ -n "$VIASH_PAR_CHIMSCOREDROPMAX" ] && ViashError Bad arguments for option \'--chimScoreDropMax\': \'$VIASH_PAR_CHIMSCOREDROPMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMSCOREDROPMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimScoreDropMax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --chimScoreDropMax=*) - [ -n "$VIASH_PAR_CHIMSCOREDROPMAX" ] && ViashError Bad arguments for option \'--chimScoreDropMax=*\': \'$VIASH_PAR_CHIMSCOREDROPMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMSCOREDROPMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --chimScoreSeparation) - [ -n "$VIASH_PAR_CHIMSCORESEPARATION" ] && ViashError Bad arguments for option \'--chimScoreSeparation\': \'$VIASH_PAR_CHIMSCORESEPARATION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMSCORESEPARATION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimScoreSeparation. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --chimScoreSeparation=*) - [ -n "$VIASH_PAR_CHIMSCORESEPARATION" ] && ViashError Bad arguments for option \'--chimScoreSeparation=*\': \'$VIASH_PAR_CHIMSCORESEPARATION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMSCORESEPARATION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --chimScoreJunctionNonGTAG) - [ -n "$VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG" ] && ViashError Bad arguments for option \'--chimScoreJunctionNonGTAG\': \'$VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimScoreJunctionNonGTAG. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --chimScoreJunctionNonGTAG=*) - [ -n "$VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG" ] && ViashError Bad arguments for option \'--chimScoreJunctionNonGTAG=*\': \'$VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG=$(ViashRemoveFlags "$1") - shift 1 - ;; - --chimJunctionOverhangMin) - [ -n "$VIASH_PAR_CHIMJUNCTIONOVERHANGMIN" ] && ViashError Bad arguments for option \'--chimJunctionOverhangMin\': \'$VIASH_PAR_CHIMJUNCTIONOVERHANGMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMJUNCTIONOVERHANGMIN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimJunctionOverhangMin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --chimJunctionOverhangMin=*) - [ -n "$VIASH_PAR_CHIMJUNCTIONOVERHANGMIN" ] && ViashError Bad arguments for option \'--chimJunctionOverhangMin=*\': \'$VIASH_PAR_CHIMJUNCTIONOVERHANGMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMJUNCTIONOVERHANGMIN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --chimSegmentReadGapMax) - [ -n "$VIASH_PAR_CHIMSEGMENTREADGAPMAX" ] && ViashError Bad arguments for option \'--chimSegmentReadGapMax\': \'$VIASH_PAR_CHIMSEGMENTREADGAPMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMSEGMENTREADGAPMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimSegmentReadGapMax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --chimSegmentReadGapMax=*) - [ -n "$VIASH_PAR_CHIMSEGMENTREADGAPMAX" ] && ViashError Bad arguments for option \'--chimSegmentReadGapMax=*\': \'$VIASH_PAR_CHIMSEGMENTREADGAPMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMSEGMENTREADGAPMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --chimFilter) - if [ -z "$VIASH_PAR_CHIMFILTER" ]; then - VIASH_PAR_CHIMFILTER="$2" - else - VIASH_PAR_CHIMFILTER="$VIASH_PAR_CHIMFILTER;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimFilter. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --chimFilter=*) - if [ -z "$VIASH_PAR_CHIMFILTER" ]; then - VIASH_PAR_CHIMFILTER=$(ViashRemoveFlags "$1") - else - VIASH_PAR_CHIMFILTER="$VIASH_PAR_CHIMFILTER;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --chimMainSegmentMultNmax) - [ -n "$VIASH_PAR_CHIMMAINSEGMENTMULTNMAX" ] && ViashError Bad arguments for option \'--chimMainSegmentMultNmax\': \'$VIASH_PAR_CHIMMAINSEGMENTMULTNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMMAINSEGMENTMULTNMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimMainSegmentMultNmax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --chimMainSegmentMultNmax=*) - [ -n "$VIASH_PAR_CHIMMAINSEGMENTMULTNMAX" ] && ViashError Bad arguments for option \'--chimMainSegmentMultNmax=*\': \'$VIASH_PAR_CHIMMAINSEGMENTMULTNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMMAINSEGMENTMULTNMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --chimMultimapNmax) - [ -n "$VIASH_PAR_CHIMMULTIMAPNMAX" ] && ViashError Bad arguments for option \'--chimMultimapNmax\': \'$VIASH_PAR_CHIMMULTIMAPNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMMULTIMAPNMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimMultimapNmax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --chimMultimapNmax=*) - [ -n "$VIASH_PAR_CHIMMULTIMAPNMAX" ] && ViashError Bad arguments for option \'--chimMultimapNmax=*\': \'$VIASH_PAR_CHIMMULTIMAPNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMMULTIMAPNMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --chimMultimapScoreRange) - [ -n "$VIASH_PAR_CHIMMULTIMAPSCORERANGE" ] && ViashError Bad arguments for option \'--chimMultimapScoreRange\': \'$VIASH_PAR_CHIMMULTIMAPSCORERANGE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMMULTIMAPSCORERANGE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimMultimapScoreRange. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --chimMultimapScoreRange=*) - [ -n "$VIASH_PAR_CHIMMULTIMAPSCORERANGE" ] && ViashError Bad arguments for option \'--chimMultimapScoreRange=*\': \'$VIASH_PAR_CHIMMULTIMAPSCORERANGE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMMULTIMAPSCORERANGE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --chimNonchimScoreDropMin) - [ -n "$VIASH_PAR_CHIMNONCHIMSCOREDROPMIN" ] && ViashError Bad arguments for option \'--chimNonchimScoreDropMin\': \'$VIASH_PAR_CHIMNONCHIMSCOREDROPMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMNONCHIMSCOREDROPMIN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimNonchimScoreDropMin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --chimNonchimScoreDropMin=*) - [ -n "$VIASH_PAR_CHIMNONCHIMSCOREDROPMIN" ] && ViashError Bad arguments for option \'--chimNonchimScoreDropMin=*\': \'$VIASH_PAR_CHIMNONCHIMSCOREDROPMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMNONCHIMSCOREDROPMIN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --chimOutJunctionFormat) - [ -n "$VIASH_PAR_CHIMOUTJUNCTIONFORMAT" ] && ViashError Bad arguments for option \'--chimOutJunctionFormat\': \'$VIASH_PAR_CHIMOUTJUNCTIONFORMAT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMOUTJUNCTIONFORMAT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimOutJunctionFormat. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --chimOutJunctionFormat=*) - [ -n "$VIASH_PAR_CHIMOUTJUNCTIONFORMAT" ] && ViashError Bad arguments for option \'--chimOutJunctionFormat=*\': \'$VIASH_PAR_CHIMOUTJUNCTIONFORMAT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMOUTJUNCTIONFORMAT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --quantMode) - if [ -z "$VIASH_PAR_QUANTMODE" ]; then - VIASH_PAR_QUANTMODE="$2" - else - VIASH_PAR_QUANTMODE="$VIASH_PAR_QUANTMODE;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --quantMode. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --quantMode=*) - if [ -z "$VIASH_PAR_QUANTMODE" ]; then - VIASH_PAR_QUANTMODE=$(ViashRemoveFlags "$1") - else - VIASH_PAR_QUANTMODE="$VIASH_PAR_QUANTMODE;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --quantTranscriptomeBAMcompression) - [ -n "$VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION" ] && ViashError Bad arguments for option \'--quantTranscriptomeBAMcompression\': \'$VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --quantTranscriptomeBAMcompression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --quantTranscriptomeBAMcompression=*) - [ -n "$VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION" ] && ViashError Bad arguments for option \'--quantTranscriptomeBAMcompression=*\': \'$VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --quantTranscriptomeBan) - [ -n "$VIASH_PAR_QUANTTRANSCRIPTOMEBAN" ] && ViashError Bad arguments for option \'--quantTranscriptomeBan\': \'$VIASH_PAR_QUANTTRANSCRIPTOMEBAN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_QUANTTRANSCRIPTOMEBAN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --quantTranscriptomeBan. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --quantTranscriptomeBan=*) - [ -n "$VIASH_PAR_QUANTTRANSCRIPTOMEBAN" ] && ViashError Bad arguments for option \'--quantTranscriptomeBan=*\': \'$VIASH_PAR_QUANTTRANSCRIPTOMEBAN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_QUANTTRANSCRIPTOMEBAN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --twopassMode) - [ -n "$VIASH_PAR_TWOPASSMODE" ] && ViashError Bad arguments for option \'--twopassMode\': \'$VIASH_PAR_TWOPASSMODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TWOPASSMODE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --twopassMode. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --twopassMode=*) - [ -n "$VIASH_PAR_TWOPASSMODE" ] && ViashError Bad arguments for option \'--twopassMode=*\': \'$VIASH_PAR_TWOPASSMODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TWOPASSMODE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --twopass1readsN) - [ -n "$VIASH_PAR_TWOPASS1READSN" ] && ViashError Bad arguments for option \'--twopass1readsN\': \'$VIASH_PAR_TWOPASS1READSN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TWOPASS1READSN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --twopass1readsN. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --twopass1readsN=*) - [ -n "$VIASH_PAR_TWOPASS1READSN" ] && ViashError Bad arguments for option \'--twopass1readsN=*\': \'$VIASH_PAR_TWOPASS1READSN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TWOPASS1READSN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --waspOutputMode) - [ -n "$VIASH_PAR_WASPOUTPUTMODE" ] && ViashError Bad arguments for option \'--waspOutputMode\': \'$VIASH_PAR_WASPOUTPUTMODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_WASPOUTPUTMODE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --waspOutputMode. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --waspOutputMode=*) - [ -n "$VIASH_PAR_WASPOUTPUTMODE" ] && ViashError Bad arguments for option \'--waspOutputMode=*\': \'$VIASH_PAR_WASPOUTPUTMODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_WASPOUTPUTMODE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --soloType) - if [ -z "$VIASH_PAR_SOLOTYPE" ]; then - VIASH_PAR_SOLOTYPE="$2" - else - VIASH_PAR_SOLOTYPE="$VIASH_PAR_SOLOTYPE;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloType. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloType=*) - if [ -z "$VIASH_PAR_SOLOTYPE" ]; then - VIASH_PAR_SOLOTYPE=$(ViashRemoveFlags "$1") - else - VIASH_PAR_SOLOTYPE="$VIASH_PAR_SOLOTYPE;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --soloCBwhitelist) - if [ -z "$VIASH_PAR_SOLOCBWHITELIST" ]; then - VIASH_PAR_SOLOCBWHITELIST="$2" - else - VIASH_PAR_SOLOCBWHITELIST="$VIASH_PAR_SOLOCBWHITELIST;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloCBwhitelist. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloCBwhitelist=*) - if [ -z "$VIASH_PAR_SOLOCBWHITELIST" ]; then - VIASH_PAR_SOLOCBWHITELIST=$(ViashRemoveFlags "$1") - else - VIASH_PAR_SOLOCBWHITELIST="$VIASH_PAR_SOLOCBWHITELIST;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --soloCBstart) - [ -n "$VIASH_PAR_SOLOCBSTART" ] && ViashError Bad arguments for option \'--soloCBstart\': \'$VIASH_PAR_SOLOCBSTART\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOCBSTART="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloCBstart. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloCBstart=*) - [ -n "$VIASH_PAR_SOLOCBSTART" ] && ViashError Bad arguments for option \'--soloCBstart=*\': \'$VIASH_PAR_SOLOCBSTART\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOCBSTART=$(ViashRemoveFlags "$1") - shift 1 - ;; - --soloCBlen) - [ -n "$VIASH_PAR_SOLOCBLEN" ] && ViashError Bad arguments for option \'--soloCBlen\': \'$VIASH_PAR_SOLOCBLEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOCBLEN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloCBlen. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloCBlen=*) - [ -n "$VIASH_PAR_SOLOCBLEN" ] && ViashError Bad arguments for option \'--soloCBlen=*\': \'$VIASH_PAR_SOLOCBLEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOCBLEN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --soloUMIstart) - [ -n "$VIASH_PAR_SOLOUMISTART" ] && ViashError Bad arguments for option \'--soloUMIstart\': \'$VIASH_PAR_SOLOUMISTART\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOUMISTART="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloUMIstart. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloUMIstart=*) - [ -n "$VIASH_PAR_SOLOUMISTART" ] && ViashError Bad arguments for option \'--soloUMIstart=*\': \'$VIASH_PAR_SOLOUMISTART\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOUMISTART=$(ViashRemoveFlags "$1") - shift 1 - ;; - --soloUMIlen) - [ -n "$VIASH_PAR_SOLOUMILEN" ] && ViashError Bad arguments for option \'--soloUMIlen\': \'$VIASH_PAR_SOLOUMILEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOUMILEN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloUMIlen. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloUMIlen=*) - [ -n "$VIASH_PAR_SOLOUMILEN" ] && ViashError Bad arguments for option \'--soloUMIlen=*\': \'$VIASH_PAR_SOLOUMILEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOUMILEN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --soloBarcodeReadLength) - [ -n "$VIASH_PAR_SOLOBARCODEREADLENGTH" ] && ViashError Bad arguments for option \'--soloBarcodeReadLength\': \'$VIASH_PAR_SOLOBARCODEREADLENGTH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOBARCODEREADLENGTH="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloBarcodeReadLength. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloBarcodeReadLength=*) - [ -n "$VIASH_PAR_SOLOBARCODEREADLENGTH" ] && ViashError Bad arguments for option \'--soloBarcodeReadLength=*\': \'$VIASH_PAR_SOLOBARCODEREADLENGTH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOBARCODEREADLENGTH=$(ViashRemoveFlags "$1") - shift 1 - ;; - --soloBarcodeMate) - [ -n "$VIASH_PAR_SOLOBARCODEMATE" ] && ViashError Bad arguments for option \'--soloBarcodeMate\': \'$VIASH_PAR_SOLOBARCODEMATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOBARCODEMATE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloBarcodeMate. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloBarcodeMate=*) - [ -n "$VIASH_PAR_SOLOBARCODEMATE" ] && ViashError Bad arguments for option \'--soloBarcodeMate=*\': \'$VIASH_PAR_SOLOBARCODEMATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOBARCODEMATE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --soloCBposition) - if [ -z "$VIASH_PAR_SOLOCBPOSITION" ]; then - VIASH_PAR_SOLOCBPOSITION="$2" - else - VIASH_PAR_SOLOCBPOSITION="$VIASH_PAR_SOLOCBPOSITION;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloCBposition. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloCBposition=*) - if [ -z "$VIASH_PAR_SOLOCBPOSITION" ]; then - VIASH_PAR_SOLOCBPOSITION=$(ViashRemoveFlags "$1") - else - VIASH_PAR_SOLOCBPOSITION="$VIASH_PAR_SOLOCBPOSITION;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --soloUMIposition) - [ -n "$VIASH_PAR_SOLOUMIPOSITION" ] && ViashError Bad arguments for option \'--soloUMIposition\': \'$VIASH_PAR_SOLOUMIPOSITION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOUMIPOSITION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloUMIposition. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloUMIposition=*) - [ -n "$VIASH_PAR_SOLOUMIPOSITION" ] && ViashError Bad arguments for option \'--soloUMIposition=*\': \'$VIASH_PAR_SOLOUMIPOSITION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOUMIPOSITION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --soloAdapterSequence) - [ -n "$VIASH_PAR_SOLOADAPTERSEQUENCE" ] && ViashError Bad arguments for option \'--soloAdapterSequence\': \'$VIASH_PAR_SOLOADAPTERSEQUENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOADAPTERSEQUENCE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloAdapterSequence. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloAdapterSequence=*) - [ -n "$VIASH_PAR_SOLOADAPTERSEQUENCE" ] && ViashError Bad arguments for option \'--soloAdapterSequence=*\': \'$VIASH_PAR_SOLOADAPTERSEQUENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOADAPTERSEQUENCE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --soloAdapterMismatchesNmax) - [ -n "$VIASH_PAR_SOLOADAPTERMISMATCHESNMAX" ] && ViashError Bad arguments for option \'--soloAdapterMismatchesNmax\': \'$VIASH_PAR_SOLOADAPTERMISMATCHESNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOADAPTERMISMATCHESNMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloAdapterMismatchesNmax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloAdapterMismatchesNmax=*) - [ -n "$VIASH_PAR_SOLOADAPTERMISMATCHESNMAX" ] && ViashError Bad arguments for option \'--soloAdapterMismatchesNmax=*\': \'$VIASH_PAR_SOLOADAPTERMISMATCHESNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOADAPTERMISMATCHESNMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --soloCBmatchWLtype) - [ -n "$VIASH_PAR_SOLOCBMATCHWLTYPE" ] && ViashError Bad arguments for option \'--soloCBmatchWLtype\': \'$VIASH_PAR_SOLOCBMATCHWLTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOCBMATCHWLTYPE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloCBmatchWLtype. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloCBmatchWLtype=*) - [ -n "$VIASH_PAR_SOLOCBMATCHWLTYPE" ] && ViashError Bad arguments for option \'--soloCBmatchWLtype=*\': \'$VIASH_PAR_SOLOCBMATCHWLTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOCBMATCHWLTYPE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --soloInputSAMattrBarcodeSeq) - if [ -z "$VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ" ]; then - VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ="$2" - else - VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ="$VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloInputSAMattrBarcodeSeq. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloInputSAMattrBarcodeSeq=*) - if [ -z "$VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ" ]; then - VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ=$(ViashRemoveFlags "$1") - else - VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ="$VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --soloInputSAMattrBarcodeQual) - if [ -z "$VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL" ]; then - VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL="$2" - else - VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL="$VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloInputSAMattrBarcodeQual. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloInputSAMattrBarcodeQual=*) - if [ -z "$VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL" ]; then - VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL=$(ViashRemoveFlags "$1") - else - VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL="$VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --soloStrand) - [ -n "$VIASH_PAR_SOLOSTRAND" ] && ViashError Bad arguments for option \'--soloStrand\': \'$VIASH_PAR_SOLOSTRAND\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOSTRAND="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloStrand. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloStrand=*) - [ -n "$VIASH_PAR_SOLOSTRAND" ] && ViashError Bad arguments for option \'--soloStrand=*\': \'$VIASH_PAR_SOLOSTRAND\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOSTRAND=$(ViashRemoveFlags "$1") - shift 1 - ;; - --soloFeatures) - if [ -z "$VIASH_PAR_SOLOFEATURES" ]; then - VIASH_PAR_SOLOFEATURES="$2" - else - VIASH_PAR_SOLOFEATURES="$VIASH_PAR_SOLOFEATURES;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloFeatures. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloFeatures=*) - if [ -z "$VIASH_PAR_SOLOFEATURES" ]; then - VIASH_PAR_SOLOFEATURES=$(ViashRemoveFlags "$1") - else - VIASH_PAR_SOLOFEATURES="$VIASH_PAR_SOLOFEATURES;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --soloMultiMappers) - if [ -z "$VIASH_PAR_SOLOMULTIMAPPERS" ]; then - VIASH_PAR_SOLOMULTIMAPPERS="$2" - else - VIASH_PAR_SOLOMULTIMAPPERS="$VIASH_PAR_SOLOMULTIMAPPERS;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloMultiMappers. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloMultiMappers=*) - if [ -z "$VIASH_PAR_SOLOMULTIMAPPERS" ]; then - VIASH_PAR_SOLOMULTIMAPPERS=$(ViashRemoveFlags "$1") - else - VIASH_PAR_SOLOMULTIMAPPERS="$VIASH_PAR_SOLOMULTIMAPPERS;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --soloUMIdedup) - if [ -z "$VIASH_PAR_SOLOUMIDEDUP" ]; then - VIASH_PAR_SOLOUMIDEDUP="$2" - else - VIASH_PAR_SOLOUMIDEDUP="$VIASH_PAR_SOLOUMIDEDUP;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloUMIdedup. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloUMIdedup=*) - if [ -z "$VIASH_PAR_SOLOUMIDEDUP" ]; then - VIASH_PAR_SOLOUMIDEDUP=$(ViashRemoveFlags "$1") - else - VIASH_PAR_SOLOUMIDEDUP="$VIASH_PAR_SOLOUMIDEDUP;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --soloUMIfiltering) - if [ -z "$VIASH_PAR_SOLOUMIFILTERING" ]; then - VIASH_PAR_SOLOUMIFILTERING="$2" - else - VIASH_PAR_SOLOUMIFILTERING="$VIASH_PAR_SOLOUMIFILTERING;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloUMIfiltering. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloUMIfiltering=*) - if [ -z "$VIASH_PAR_SOLOUMIFILTERING" ]; then - VIASH_PAR_SOLOUMIFILTERING=$(ViashRemoveFlags "$1") - else - VIASH_PAR_SOLOUMIFILTERING="$VIASH_PAR_SOLOUMIFILTERING;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --soloOutFileNames) - if [ -z "$VIASH_PAR_SOLOOUTFILENAMES" ]; then - VIASH_PAR_SOLOOUTFILENAMES="$2" - else - VIASH_PAR_SOLOOUTFILENAMES="$VIASH_PAR_SOLOOUTFILENAMES;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloOutFileNames. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloOutFileNames=*) - if [ -z "$VIASH_PAR_SOLOOUTFILENAMES" ]; then - VIASH_PAR_SOLOOUTFILENAMES=$(ViashRemoveFlags "$1") - else - VIASH_PAR_SOLOOUTFILENAMES="$VIASH_PAR_SOLOOUTFILENAMES;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --soloCellFilter) - if [ -z "$VIASH_PAR_SOLOCELLFILTER" ]; then - VIASH_PAR_SOLOCELLFILTER="$2" - else - VIASH_PAR_SOLOCELLFILTER="$VIASH_PAR_SOLOCELLFILTER;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloCellFilter. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloCellFilter=*) - if [ -z "$VIASH_PAR_SOLOCELLFILTER" ]; then - VIASH_PAR_SOLOCELLFILTER=$(ViashRemoveFlags "$1") - else - VIASH_PAR_SOLOCELLFILTER="$VIASH_PAR_SOLOCELLFILTER;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --soloOutFormatFeaturesGeneField3) - if [ -z "$VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3" ]; then - VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3="$2" - else - VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3="$VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloOutFormatFeaturesGeneField3. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloOutFormatFeaturesGeneField3=*) - if [ -z "$VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3" ]; then - VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3=$(ViashRemoveFlags "$1") - else - VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3="$VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --soloCellReadStats) - [ -n "$VIASH_PAR_SOLOCELLREADSTATS" ] && ViashError Bad arguments for option \'--soloCellReadStats\': \'$VIASH_PAR_SOLOCELLREADSTATS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOCELLREADSTATS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloCellReadStats. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloCellReadStats=*) - [ -n "$VIASH_PAR_SOLOCELLREADSTATS" ] && ViashError Bad arguments for option \'--soloCellReadStats=*\': \'$VIASH_PAR_SOLOCELLREADSTATS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOCELLREADSTATS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/mapping_star_align:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/mapping_star_align:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/mapping_star_align:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/mapping_star_align:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_REFERENCE+x} ]; then - ViashError '--reference' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ]; then - IFS=';' - set -f - for file in $VIASH_PAR_INPUT; do - unset IFS - if [ ! -e "$file" ]; then - ViashError "Input file '$file' does not exist." - exit 1 - fi - done - set +f -fi -if [ ! -z "$VIASH_PAR_REFERENCE" ] && [ ! -e "$VIASH_PAR_REFERENCE" ]; then - ViashError "Input file '$VIASH_PAR_REFERENCE' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_GENOMEFASTAFILES" ]; then - IFS=';' - set -f - for file in $VIASH_PAR_GENOMEFASTAFILES; do - unset IFS - if [ ! -e "$file" ]; then - ViashError "Input file '$file' does not exist." - exit 1 - fi - done - set +f -fi -if [ ! -z "$VIASH_PAR_SJDBGTFFILE" ] && [ ! -e "$VIASH_PAR_SJDBGTFFILE" ]; then - ViashError "Input file '$VIASH_PAR_SJDBGTFFILE' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_READFILESMANIFEST" ] && [ ! -e "$VIASH_PAR_READFILESMANIFEST" ]; then - ViashError "Input file '$VIASH_PAR_READFILESMANIFEST' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_RUNRNGSEED" ]]; then - if ! [[ "$VIASH_PAR_RUNRNGSEED" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--runRNGseed' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [ -n "$VIASH_PAR_GENOMEFILESIZES" ]; then - IFS=';' - set -f - for val in $VIASH_PAR_GENOMEFILESIZES; do - if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--genomeFileSizes' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi - done - set +f - unset IFS -fi - -if [[ -n "$VIASH_PAR_SJDBOVERHANG" ]]; then - if ! [[ "$VIASH_PAR_SJDBOVERHANG" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--sjdbOverhang' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SJDBSCORE" ]]; then - if ! [[ "$VIASH_PAR_SJDBSCORE" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--sjdbScore' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_READMAPNUMBER" ]]; then - if ! [[ "$VIASH_PAR_READMAPNUMBER" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--readMapNumber' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_READQUALITYSCOREBASE" ]]; then - if ! [[ "$VIASH_PAR_READQUALITYSCOREBASE" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--readQualityScoreBase' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [ -n "$VIASH_PAR_CLIP3PNBASES" ]; then - IFS=';' - set -f - for val in $VIASH_PAR_CLIP3PNBASES; do - if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--clip3pNbases' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi - done - set +f - unset IFS -fi - -if [ -n "$VIASH_PAR_CLIP3PADAPTERMMP" ]; then - IFS=';' - set -f - for val in $VIASH_PAR_CLIP3PADAPTERMMP; do - if ! [[ "${val}" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--clip3pAdapterMMp' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi - done - set +f - unset IFS -fi - -if [ -n "$VIASH_PAR_CLIP3PAFTERADAPTERNBASES" ]; then - IFS=';' - set -f - for val in $VIASH_PAR_CLIP3PAFTERADAPTERNBASES; do - if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--clip3pAfterAdapterNbases' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi - done - set +f - unset IFS -fi - -if [ -n "$VIASH_PAR_CLIP5PNBASES" ]; then - IFS=';' - set -f - for val in $VIASH_PAR_CLIP5PNBASES; do - if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--clip5pNbases' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi - done - set +f - unset IFS -fi - -if [[ -n "$VIASH_PAR_LIMITGENOMEGENERATERAM" ]]; then - if ! [[ "$VIASH_PAR_LIMITGENOMEGENERATERAM" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--limitGenomeGenerateRAM' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [ -n "$VIASH_PAR_LIMITIOBUFFERSIZE" ]; then - IFS=';' - set -f - for val in $VIASH_PAR_LIMITIOBUFFERSIZE; do - if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--limitIObufferSize' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi - done - set +f - unset IFS -fi - -if [[ -n "$VIASH_PAR_LIMITOUTSAMONEREADBYTES" ]]; then - if ! [[ "$VIASH_PAR_LIMITOUTSAMONEREADBYTES" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--limitOutSAMoneReadBytes' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_LIMITOUTSJONEREAD" ]]; then - if ! [[ "$VIASH_PAR_LIMITOUTSJONEREAD" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--limitOutSJoneRead' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_LIMITOUTSJCOLLAPSED" ]]; then - if ! [[ "$VIASH_PAR_LIMITOUTSJCOLLAPSED" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--limitOutSJcollapsed' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_LIMITBAMSORTRAM" ]]; then - if ! [[ "$VIASH_PAR_LIMITBAMSORTRAM" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--limitBAMsortRAM' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_LIMITSJDBINSERTNSJ" ]]; then - if ! [[ "$VIASH_PAR_LIMITSJDBINSERTNSJ" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--limitSjdbInsertNsj' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_LIMITNREADSSOFT" ]]; then - if ! [[ "$VIASH_PAR_LIMITNREADSSOFT" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--limitNreadsSoft' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTQSCONVERSIONADD" ]]; then - if ! [[ "$VIASH_PAR_OUTQSCONVERSIONADD" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outQSconversionAdd' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTSAMATTRIHSTART" ]]; then - if ! [[ "$VIASH_PAR_OUTSAMATTRIHSTART" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outSAMattrIHstart' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTSAMMAPQUNIQUE" ]]; then - if ! [[ "$VIASH_PAR_OUTSAMMAPQUNIQUE" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outSAMmapqUnique' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTSAMFLAGOR" ]]; then - if ! [[ "$VIASH_PAR_OUTSAMFLAGOR" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outSAMflagOR' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTSAMFLAGAND" ]]; then - if ! [[ "$VIASH_PAR_OUTSAMFLAGAND" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outSAMflagAND' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTSAMMULTNMAX" ]]; then - if ! [[ "$VIASH_PAR_OUTSAMMULTNMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outSAMmultNmax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTSAMTLEN" ]]; then - if ! [[ "$VIASH_PAR_OUTSAMTLEN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outSAMtlen' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTBAMCOMPRESSION" ]]; then - if ! [[ "$VIASH_PAR_OUTBAMCOMPRESSION" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outBAMcompression' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTBAMSORTINGTHREADN" ]]; then - if ! [[ "$VIASH_PAR_OUTBAMSORTINGTHREADN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outBAMsortingThreadN' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTBAMSORTINGBINSN" ]]; then - if ! [[ "$VIASH_PAR_OUTBAMSORTINGBINSN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outBAMsortingBinsN' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN" ]]; then - if ! [[ "$VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--bamRemoveDuplicatesMate2basesN' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE" ]]; then - if ! [[ "$VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outFilterMultimapScoreRange' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTFILTERMULTIMAPNMAX" ]]; then - if ! [[ "$VIASH_PAR_OUTFILTERMULTIMAPNMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outFilterMultimapNmax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTFILTERMISMATCHNMAX" ]]; then - if ! [[ "$VIASH_PAR_OUTFILTERMISMATCHNMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outFilterMismatchNmax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX" ]]; then - if ! [[ "$VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--outFilterMismatchNoverLmax' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX" ]]; then - if ! [[ "$VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--outFilterMismatchNoverReadLmax' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTFILTERSCOREMIN" ]]; then - if ! [[ "$VIASH_PAR_OUTFILTERSCOREMIN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outFilterScoreMin' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTFILTERSCOREMINOVERLREAD" ]]; then - if ! [[ "$VIASH_PAR_OUTFILTERSCOREMINOVERLREAD" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--outFilterScoreMinOverLread' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTFILTERMATCHNMIN" ]]; then - if ! [[ "$VIASH_PAR_OUTFILTERMATCHNMIN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outFilterMatchNmin' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD" ]]; then - if ! [[ "$VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--outFilterMatchNminOverLread' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [ -n "$VIASH_PAR_OUTSJFILTEROVERHANGMIN" ]; then - IFS=';' - set -f - for val in $VIASH_PAR_OUTSJFILTEROVERHANGMIN; do - if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outSJfilterOverhangMin' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi - done - set +f - unset IFS -fi - -if [ -n "$VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN" ]; then - IFS=';' - set -f - for val in $VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN; do - if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outSJfilterCountUniqueMin' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi - done - set +f - unset IFS -fi - -if [ -n "$VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN" ]; then - IFS=';' - set -f - for val in $VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN; do - if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outSJfilterCountTotalMin' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi - done - set +f - unset IFS -fi - -if [ -n "$VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN" ]; then - IFS=';' - set -f - for val in $VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN; do - if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outSJfilterDistToOtherSJmin' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi - done - set +f - unset IFS -fi - -if [ -n "$VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN" ]; then - IFS=';' - set -f - for val in $VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN; do - if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outSJfilterIntronMaxVsReadN' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi - done - set +f - unset IFS -fi - -if [[ -n "$VIASH_PAR_SCOREGAP" ]]; then - if ! [[ "$VIASH_PAR_SCOREGAP" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--scoreGap' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SCOREGAPNONCAN" ]]; then - if ! [[ "$VIASH_PAR_SCOREGAPNONCAN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--scoreGapNoncan' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SCOREGAPGCAG" ]]; then - if ! [[ "$VIASH_PAR_SCOREGAPGCAG" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--scoreGapGCAG' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SCOREGAPATAC" ]]; then - if ! [[ "$VIASH_PAR_SCOREGAPATAC" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--scoreGapATAC' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE" ]]; then - if ! [[ "$VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--scoreGenomicLengthLog2scale' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SCOREDELOPEN" ]]; then - if ! [[ "$VIASH_PAR_SCOREDELOPEN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--scoreDelOpen' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SCOREDELBASE" ]]; then - if ! [[ "$VIASH_PAR_SCOREDELBASE" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--scoreDelBase' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SCOREINSOPEN" ]]; then - if ! [[ "$VIASH_PAR_SCOREINSOPEN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--scoreInsOpen' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SCOREINSBASE" ]]; then - if ! [[ "$VIASH_PAR_SCOREINSBASE" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--scoreInsBase' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SCORESTITCHSJSHIFT" ]]; then - if ! [[ "$VIASH_PAR_SCORESTITCHSJSHIFT" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--scoreStitchSJshift' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SEEDSEARCHSTARTLMAX" ]]; then - if ! [[ "$VIASH_PAR_SEEDSEARCHSTARTLMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--seedSearchStartLmax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD" ]]; then - if ! [[ "$VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--seedSearchStartLmaxOverLread' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SEEDSEARCHLMAX" ]]; then - if ! [[ "$VIASH_PAR_SEEDSEARCHLMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--seedSearchLmax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SEEDMULTIMAPNMAX" ]]; then - if ! [[ "$VIASH_PAR_SEEDMULTIMAPNMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--seedMultimapNmax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SEEDPERREADNMAX" ]]; then - if ! [[ "$VIASH_PAR_SEEDPERREADNMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--seedPerReadNmax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SEEDPERWINDOWNMAX" ]]; then - if ! [[ "$VIASH_PAR_SEEDPERWINDOWNMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--seedPerWindowNmax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SEEDNONELOCIPERWINDOW" ]]; then - if ! [[ "$VIASH_PAR_SEEDNONELOCIPERWINDOW" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--seedNoneLociPerWindow' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SEEDSPLITMIN" ]]; then - if ! [[ "$VIASH_PAR_SEEDSPLITMIN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--seedSplitMin' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SEEDMAPMIN" ]]; then - if ! [[ "$VIASH_PAR_SEEDMAPMIN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--seedMapMin' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_ALIGNINTRONMIN" ]]; then - if ! [[ "$VIASH_PAR_ALIGNINTRONMIN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--alignIntronMin' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_ALIGNINTRONMAX" ]]; then - if ! [[ "$VIASH_PAR_ALIGNINTRONMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--alignIntronMax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_ALIGNMATESGAPMAX" ]]; then - if ! [[ "$VIASH_PAR_ALIGNMATESGAPMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--alignMatesGapMax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_ALIGNSJOVERHANGMIN" ]]; then - if ! [[ "$VIASH_PAR_ALIGNSJOVERHANGMIN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--alignSJoverhangMin' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [ -n "$VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX" ]; then - IFS=';' - set -f - for val in $VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX; do - if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--alignSJstitchMismatchNmax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi - done - set +f - unset IFS -fi - -if [[ -n "$VIASH_PAR_ALIGNSJDBOVERHANGMIN" ]]; then - if ! [[ "$VIASH_PAR_ALIGNSJDBOVERHANGMIN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--alignSJDBoverhangMin' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN" ]]; then - if ! [[ "$VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--alignSplicedMateMapLmin' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE" ]]; then - if ! [[ "$VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--alignSplicedMateMapLminOverLmate' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_ALIGNWINDOWSPERREADNMAX" ]]; then - if ! [[ "$VIASH_PAR_ALIGNWINDOWSPERREADNMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--alignWindowsPerReadNmax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX" ]]; then - if ! [[ "$VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--alignTranscriptsPerWindowNmax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX" ]]; then - if ! [[ "$VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--alignTranscriptsPerReadNmax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_PEOVERLAPNBASESMIN" ]]; then - if ! [[ "$VIASH_PAR_PEOVERLAPNBASESMIN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--peOverlapNbasesMin' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_PEOVERLAPMMP" ]]; then - if ! [[ "$VIASH_PAR_PEOVERLAPMMP" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--peOverlapMMp' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_WINANCHORMULTIMAPNMAX" ]]; then - if ! [[ "$VIASH_PAR_WINANCHORMULTIMAPNMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--winAnchorMultimapNmax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_WINBINNBITS" ]]; then - if ! [[ "$VIASH_PAR_WINBINNBITS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--winBinNbits' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_WINANCHORDISTNBINS" ]]; then - if ! [[ "$VIASH_PAR_WINANCHORDISTNBINS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--winAnchorDistNbins' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_WINFLANKNBINS" ]]; then - if ! [[ "$VIASH_PAR_WINFLANKNBINS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--winFlankNbins' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_WINREADCOVERAGERELATIVEMIN" ]]; then - if ! [[ "$VIASH_PAR_WINREADCOVERAGERELATIVEMIN" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--winReadCoverageRelativeMin' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_WINREADCOVERAGEBASESMIN" ]]; then - if ! [[ "$VIASH_PAR_WINREADCOVERAGEBASESMIN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--winReadCoverageBasesMin' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_CHIMSEGMENTMIN" ]]; then - if ! [[ "$VIASH_PAR_CHIMSEGMENTMIN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--chimSegmentMin' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_CHIMSCOREMIN" ]]; then - if ! [[ "$VIASH_PAR_CHIMSCOREMIN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--chimScoreMin' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_CHIMSCOREDROPMAX" ]]; then - if ! [[ "$VIASH_PAR_CHIMSCOREDROPMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--chimScoreDropMax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_CHIMSCORESEPARATION" ]]; then - if ! [[ "$VIASH_PAR_CHIMSCORESEPARATION" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--chimScoreSeparation' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG" ]]; then - if ! [[ "$VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--chimScoreJunctionNonGTAG' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_CHIMJUNCTIONOVERHANGMIN" ]]; then - if ! [[ "$VIASH_PAR_CHIMJUNCTIONOVERHANGMIN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--chimJunctionOverhangMin' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_CHIMSEGMENTREADGAPMAX" ]]; then - if ! [[ "$VIASH_PAR_CHIMSEGMENTREADGAPMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--chimSegmentReadGapMax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_CHIMMAINSEGMENTMULTNMAX" ]]; then - if ! [[ "$VIASH_PAR_CHIMMAINSEGMENTMULTNMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--chimMainSegmentMultNmax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_CHIMMULTIMAPNMAX" ]]; then - if ! [[ "$VIASH_PAR_CHIMMULTIMAPNMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--chimMultimapNmax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_CHIMMULTIMAPSCORERANGE" ]]; then - if ! [[ "$VIASH_PAR_CHIMMULTIMAPSCORERANGE" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--chimMultimapScoreRange' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_CHIMNONCHIMSCOREDROPMIN" ]]; then - if ! [[ "$VIASH_PAR_CHIMNONCHIMSCOREDROPMIN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--chimNonchimScoreDropMin' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_CHIMOUTJUNCTIONFORMAT" ]]; then - if ! [[ "$VIASH_PAR_CHIMOUTJUNCTIONFORMAT" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--chimOutJunctionFormat' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION" ]]; then - if ! [[ "$VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--quantTranscriptomeBAMcompression' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_TWOPASS1READSN" ]]; then - if ! [[ "$VIASH_PAR_TWOPASS1READSN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--twopass1readsN' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SOLOCBSTART" ]]; then - if ! [[ "$VIASH_PAR_SOLOCBSTART" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--soloCBstart' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SOLOCBLEN" ]]; then - if ! [[ "$VIASH_PAR_SOLOCBLEN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--soloCBlen' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SOLOUMISTART" ]]; then - if ! [[ "$VIASH_PAR_SOLOUMISTART" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--soloUMIstart' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SOLOUMILEN" ]]; then - if ! [[ "$VIASH_PAR_SOLOUMILEN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--soloUMIlen' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SOLOBARCODEREADLENGTH" ]]; then - if ! [[ "$VIASH_PAR_SOLOBARCODEREADLENGTH" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--soloBarcodeReadLength' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SOLOBARCODEMATE" ]]; then - if ! [[ "$VIASH_PAR_SOLOBARCODEMATE" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--soloBarcodeMate' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SOLOADAPTERMISMATCHESNMAX" ]]; then - if ! [[ "$VIASH_PAR_SOLOADAPTERMISMATCHESNMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--soloAdapterMismatchesNmax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_TEST_INPUT=() - IFS=';' - for var in $VIASH_PAR_INPUT; do - unset IFS - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$var")" ) - var=$(ViashAutodetectMount "$var") - VIASH_TEST_INPUT+=( "$var" ) - done - VIASH_PAR_INPUT=$(IFS=';' ; echo "${VIASH_TEST_INPUT[*]}") -fi -if [ ! -z "$VIASH_PAR_REFERENCE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_REFERENCE")" ) - VIASH_PAR_REFERENCE=$(ViashAutodetectMount "$VIASH_PAR_REFERENCE") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_PAR_GENOMEFASTAFILES" ]; then - VIASH_TEST_GENOMEFASTAFILES=() - IFS=';' - for var in $VIASH_PAR_GENOMEFASTAFILES; do - unset IFS - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$var")" ) - var=$(ViashAutodetectMount "$var") - VIASH_TEST_GENOMEFASTAFILES+=( "$var" ) - done - VIASH_PAR_GENOMEFASTAFILES=$(IFS=';' ; echo "${VIASH_TEST_GENOMEFASTAFILES[*]}") -fi -if [ ! -z "$VIASH_PAR_SJDBGTFFILE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_SJDBGTFFILE")" ) - VIASH_PAR_SJDBGTFFILE=$(ViashAutodetectMount "$VIASH_PAR_SJDBGTFFILE") -fi -if [ ! -z "$VIASH_PAR_READFILESMANIFEST" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_READFILESMANIFEST")" ) - VIASH_PAR_READFILESMANIFEST=$(ViashAutodetectMount "$VIASH_PAR_READFILESMANIFEST") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/mapping_star_align:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/mapping_star_align:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/mapping_star_align:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-star_align-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -import re -import tempfile -import subprocess -from pathlib import Path -import tarfile -import gzip -import shutil - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'reference': $( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "r'${VIASH_PAR_REFERENCE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'runRNGseed': $( if [ ! -z ${VIASH_PAR_RUNRNGSEED+x} ]; then echo "int(r'${VIASH_PAR_RUNRNGSEED//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'genomeLoad': $( if [ ! -z ${VIASH_PAR_GENOMELOAD+x} ]; then echo "r'${VIASH_PAR_GENOMELOAD//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'genomeFastaFiles': $( if [ ! -z ${VIASH_PAR_GENOMEFASTAFILES+x} ]; then echo "r'${VIASH_PAR_GENOMEFASTAFILES//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'genomeFileSizes': $( if [ ! -z ${VIASH_PAR_GENOMEFILESIZES+x} ]; then echo "list(map(int, r'${VIASH_PAR_GENOMEFILESIZES//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), - 'genomeTransformOutput': $( if [ ! -z ${VIASH_PAR_GENOMETRANSFORMOUTPUT+x} ]; then echo "r'${VIASH_PAR_GENOMETRANSFORMOUTPUT//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'genomeChrSetMitochondrial': $( if [ ! -z ${VIASH_PAR_GENOMECHRSETMITOCHONDRIAL+x} ]; then echo "r'${VIASH_PAR_GENOMECHRSETMITOCHONDRIAL//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'sjdbFileChrStartEnd': $( if [ ! -z ${VIASH_PAR_SJDBFILECHRSTARTEND+x} ]; then echo "r'${VIASH_PAR_SJDBFILECHRSTARTEND//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'sjdbGTFfile': $( if [ ! -z ${VIASH_PAR_SJDBGTFFILE+x} ]; then echo "r'${VIASH_PAR_SJDBGTFFILE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'sjdbGTFchrPrefix': $( if [ ! -z ${VIASH_PAR_SJDBGTFCHRPREFIX+x} ]; then echo "r'${VIASH_PAR_SJDBGTFCHRPREFIX//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'sjdbGTFfeatureExon': $( if [ ! -z ${VIASH_PAR_SJDBGTFFEATUREEXON+x} ]; then echo "r'${VIASH_PAR_SJDBGTFFEATUREEXON//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'sjdbGTFtagExonParentTranscript': $( if [ ! -z ${VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT+x} ]; then echo "r'${VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'sjdbGTFtagExonParentGene': $( if [ ! -z ${VIASH_PAR_SJDBGTFTAGEXONPARENTGENE+x} ]; then echo "r'${VIASH_PAR_SJDBGTFTAGEXONPARENTGENE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'sjdbGTFtagExonParentGeneName': $( if [ ! -z ${VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME+x} ]; then echo "r'${VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'sjdbGTFtagExonParentGeneType': $( if [ ! -z ${VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE+x} ]; then echo "r'${VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'sjdbOverhang': $( if [ ! -z ${VIASH_PAR_SJDBOVERHANG+x} ]; then echo "int(r'${VIASH_PAR_SJDBOVERHANG//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'sjdbScore': $( if [ ! -z ${VIASH_PAR_SJDBSCORE+x} ]; then echo "int(r'${VIASH_PAR_SJDBSCORE//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'sjdbInsertSave': $( if [ ! -z ${VIASH_PAR_SJDBINSERTSAVE+x} ]; then echo "r'${VIASH_PAR_SJDBINSERTSAVE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'varVCFfile': $( if [ ! -z ${VIASH_PAR_VARVCFFILE+x} ]; then echo "r'${VIASH_PAR_VARVCFFILE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'readFilesType': $( if [ ! -z ${VIASH_PAR_READFILESTYPE+x} ]; then echo "r'${VIASH_PAR_READFILESTYPE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'readFilesSAMattrKeep': $( if [ ! -z ${VIASH_PAR_READFILESSAMATTRKEEP+x} ]; then echo "r'${VIASH_PAR_READFILESSAMATTRKEEP//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'readFilesManifest': $( if [ ! -z ${VIASH_PAR_READFILESMANIFEST+x} ]; then echo "r'${VIASH_PAR_READFILESMANIFEST//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'readFilesPrefix': $( if [ ! -z ${VIASH_PAR_READFILESPREFIX+x} ]; then echo "r'${VIASH_PAR_READFILESPREFIX//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'readFilesCommand': $( if [ ! -z ${VIASH_PAR_READFILESCOMMAND+x} ]; then echo "r'${VIASH_PAR_READFILESCOMMAND//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'readMapNumber': $( if [ ! -z ${VIASH_PAR_READMAPNUMBER+x} ]; then echo "int(r'${VIASH_PAR_READMAPNUMBER//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'readMatesLengthsIn': $( if [ ! -z ${VIASH_PAR_READMATESLENGTHSIN+x} ]; then echo "r'${VIASH_PAR_READMATESLENGTHSIN//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'readNameSeparator': $( if [ ! -z ${VIASH_PAR_READNAMESEPARATOR+x} ]; then echo "r'${VIASH_PAR_READNAMESEPARATOR//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'readQualityScoreBase': $( if [ ! -z ${VIASH_PAR_READQUALITYSCOREBASE+x} ]; then echo "int(r'${VIASH_PAR_READQUALITYSCOREBASE//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'clipAdapterType': $( if [ ! -z ${VIASH_PAR_CLIPADAPTERTYPE+x} ]; then echo "r'${VIASH_PAR_CLIPADAPTERTYPE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'clip3pNbases': $( if [ ! -z ${VIASH_PAR_CLIP3PNBASES+x} ]; then echo "list(map(int, r'${VIASH_PAR_CLIP3PNBASES//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), - 'clip3pAdapterSeq': $( if [ ! -z ${VIASH_PAR_CLIP3PADAPTERSEQ+x} ]; then echo "r'${VIASH_PAR_CLIP3PADAPTERSEQ//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'clip3pAdapterMMp': $( if [ ! -z ${VIASH_PAR_CLIP3PADAPTERMMP+x} ]; then echo "list(map(float, r'${VIASH_PAR_CLIP3PADAPTERMMP//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), - 'clip3pAfterAdapterNbases': $( if [ ! -z ${VIASH_PAR_CLIP3PAFTERADAPTERNBASES+x} ]; then echo "list(map(int, r'${VIASH_PAR_CLIP3PAFTERADAPTERNBASES//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), - 'clip5pNbases': $( if [ ! -z ${VIASH_PAR_CLIP5PNBASES+x} ]; then echo "list(map(int, r'${VIASH_PAR_CLIP5PNBASES//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), - 'limitGenomeGenerateRAM': $( if [ ! -z ${VIASH_PAR_LIMITGENOMEGENERATERAM+x} ]; then echo "int(r'${VIASH_PAR_LIMITGENOMEGENERATERAM//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'limitIObufferSize': $( if [ ! -z ${VIASH_PAR_LIMITIOBUFFERSIZE+x} ]; then echo "list(map(int, r'${VIASH_PAR_LIMITIOBUFFERSIZE//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), - 'limitOutSAMoneReadBytes': $( if [ ! -z ${VIASH_PAR_LIMITOUTSAMONEREADBYTES+x} ]; then echo "int(r'${VIASH_PAR_LIMITOUTSAMONEREADBYTES//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'limitOutSJoneRead': $( if [ ! -z ${VIASH_PAR_LIMITOUTSJONEREAD+x} ]; then echo "int(r'${VIASH_PAR_LIMITOUTSJONEREAD//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'limitOutSJcollapsed': $( if [ ! -z ${VIASH_PAR_LIMITOUTSJCOLLAPSED+x} ]; then echo "int(r'${VIASH_PAR_LIMITOUTSJCOLLAPSED//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'limitBAMsortRAM': $( if [ ! -z ${VIASH_PAR_LIMITBAMSORTRAM+x} ]; then echo "int(r'${VIASH_PAR_LIMITBAMSORTRAM//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'limitSjdbInsertNsj': $( if [ ! -z ${VIASH_PAR_LIMITSJDBINSERTNSJ+x} ]; then echo "int(r'${VIASH_PAR_LIMITSJDBINSERTNSJ//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'limitNreadsSoft': $( if [ ! -z ${VIASH_PAR_LIMITNREADSSOFT+x} ]; then echo "int(r'${VIASH_PAR_LIMITNREADSSOFT//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outTmpKeep': $( if [ ! -z ${VIASH_PAR_OUTTMPKEEP+x} ]; then echo "r'${VIASH_PAR_OUTTMPKEEP//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'outStd': $( if [ ! -z ${VIASH_PAR_OUTSTD+x} ]; then echo "r'${VIASH_PAR_OUTSTD//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'outReadsUnmapped': $( if [ ! -z ${VIASH_PAR_OUTREADSUNMAPPED+x} ]; then echo "r'${VIASH_PAR_OUTREADSUNMAPPED//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'outQSconversionAdd': $( if [ ! -z ${VIASH_PAR_OUTQSCONVERSIONADD+x} ]; then echo "int(r'${VIASH_PAR_OUTQSCONVERSIONADD//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outMultimapperOrder': $( if [ ! -z ${VIASH_PAR_OUTMULTIMAPPERORDER+x} ]; then echo "r'${VIASH_PAR_OUTMULTIMAPPERORDER//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'outSAMtype': $( if [ ! -z ${VIASH_PAR_OUTSAMTYPE+x} ]; then echo "r'${VIASH_PAR_OUTSAMTYPE//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'outSAMmode': $( if [ ! -z ${VIASH_PAR_OUTSAMMODE+x} ]; then echo "r'${VIASH_PAR_OUTSAMMODE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'outSAMstrandField': $( if [ ! -z ${VIASH_PAR_OUTSAMSTRANDFIELD+x} ]; then echo "r'${VIASH_PAR_OUTSAMSTRANDFIELD//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'outSAMattributes': $( if [ ! -z ${VIASH_PAR_OUTSAMATTRIBUTES+x} ]; then echo "r'${VIASH_PAR_OUTSAMATTRIBUTES//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'outSAMattrIHstart': $( if [ ! -z ${VIASH_PAR_OUTSAMATTRIHSTART+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMATTRIHSTART//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outSAMunmapped': $( if [ ! -z ${VIASH_PAR_OUTSAMUNMAPPED+x} ]; then echo "r'${VIASH_PAR_OUTSAMUNMAPPED//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'outSAMorder': $( if [ ! -z ${VIASH_PAR_OUTSAMORDER+x} ]; then echo "r'${VIASH_PAR_OUTSAMORDER//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'outSAMprimaryFlag': $( if [ ! -z ${VIASH_PAR_OUTSAMPRIMARYFLAG+x} ]; then echo "r'${VIASH_PAR_OUTSAMPRIMARYFLAG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'outSAMreadID': $( if [ ! -z ${VIASH_PAR_OUTSAMREADID+x} ]; then echo "r'${VIASH_PAR_OUTSAMREADID//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'outSAMmapqUnique': $( if [ ! -z ${VIASH_PAR_OUTSAMMAPQUNIQUE+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMMAPQUNIQUE//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outSAMflagOR': $( if [ ! -z ${VIASH_PAR_OUTSAMFLAGOR+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMFLAGOR//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outSAMflagAND': $( if [ ! -z ${VIASH_PAR_OUTSAMFLAGAND+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMFLAGAND//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outSAMattrRGline': $( if [ ! -z ${VIASH_PAR_OUTSAMATTRRGLINE+x} ]; then echo "r'${VIASH_PAR_OUTSAMATTRRGLINE//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'outSAMheaderHD': $( if [ ! -z ${VIASH_PAR_OUTSAMHEADERHD+x} ]; then echo "r'${VIASH_PAR_OUTSAMHEADERHD//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'outSAMheaderPG': $( if [ ! -z ${VIASH_PAR_OUTSAMHEADERPG+x} ]; then echo "r'${VIASH_PAR_OUTSAMHEADERPG//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'outSAMheaderCommentFile': $( if [ ! -z ${VIASH_PAR_OUTSAMHEADERCOMMENTFILE+x} ]; then echo "r'${VIASH_PAR_OUTSAMHEADERCOMMENTFILE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'outSAMfilter': $( if [ ! -z ${VIASH_PAR_OUTSAMFILTER+x} ]; then echo "r'${VIASH_PAR_OUTSAMFILTER//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'outSAMmultNmax': $( if [ ! -z ${VIASH_PAR_OUTSAMMULTNMAX+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMMULTNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outSAMtlen': $( if [ ! -z ${VIASH_PAR_OUTSAMTLEN+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMTLEN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outBAMcompression': $( if [ ! -z ${VIASH_PAR_OUTBAMCOMPRESSION+x} ]; then echo "int(r'${VIASH_PAR_OUTBAMCOMPRESSION//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outBAMsortingThreadN': $( if [ ! -z ${VIASH_PAR_OUTBAMSORTINGTHREADN+x} ]; then echo "int(r'${VIASH_PAR_OUTBAMSORTINGTHREADN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outBAMsortingBinsN': $( if [ ! -z ${VIASH_PAR_OUTBAMSORTINGBINSN+x} ]; then echo "int(r'${VIASH_PAR_OUTBAMSORTINGBINSN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'bamRemoveDuplicatesType': $( if [ ! -z ${VIASH_PAR_BAMREMOVEDUPLICATESTYPE+x} ]; then echo "r'${VIASH_PAR_BAMREMOVEDUPLICATESTYPE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'bamRemoveDuplicatesMate2basesN': $( if [ ! -z ${VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN+x} ]; then echo "int(r'${VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outWigType': $( if [ ! -z ${VIASH_PAR_OUTWIGTYPE+x} ]; then echo "r'${VIASH_PAR_OUTWIGTYPE//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'outWigStrand': $( if [ ! -z ${VIASH_PAR_OUTWIGSTRAND+x} ]; then echo "r'${VIASH_PAR_OUTWIGSTRAND//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'outWigReferencesPrefix': $( if [ ! -z ${VIASH_PAR_OUTWIGREFERENCESPREFIX+x} ]; then echo "r'${VIASH_PAR_OUTWIGREFERENCESPREFIX//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'outWigNorm': $( if [ ! -z ${VIASH_PAR_OUTWIGNORM+x} ]; then echo "r'${VIASH_PAR_OUTWIGNORM//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'outFilterType': $( if [ ! -z ${VIASH_PAR_OUTFILTERTYPE+x} ]; then echo "r'${VIASH_PAR_OUTFILTERTYPE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'outFilterMultimapScoreRange': $( if [ ! -z ${VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outFilterMultimapNmax': $( if [ ! -z ${VIASH_PAR_OUTFILTERMULTIMAPNMAX+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERMULTIMAPNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outFilterMismatchNmax': $( if [ ! -z ${VIASH_PAR_OUTFILTERMISMATCHNMAX+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERMISMATCHNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outFilterMismatchNoverLmax': $( if [ ! -z ${VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX+x} ]; then echo "float(r'${VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outFilterMismatchNoverReadLmax': $( if [ ! -z ${VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX+x} ]; then echo "float(r'${VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outFilterScoreMin': $( if [ ! -z ${VIASH_PAR_OUTFILTERSCOREMIN+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERSCOREMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outFilterScoreMinOverLread': $( if [ ! -z ${VIASH_PAR_OUTFILTERSCOREMINOVERLREAD+x} ]; then echo "float(r'${VIASH_PAR_OUTFILTERSCOREMINOVERLREAD//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outFilterMatchNmin': $( if [ ! -z ${VIASH_PAR_OUTFILTERMATCHNMIN+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERMATCHNMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outFilterMatchNminOverLread': $( if [ ! -z ${VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD+x} ]; then echo "float(r'${VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outFilterIntronMotifs': $( if [ ! -z ${VIASH_PAR_OUTFILTERINTRONMOTIFS+x} ]; then echo "r'${VIASH_PAR_OUTFILTERINTRONMOTIFS//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'outFilterIntronStrands': $( if [ ! -z ${VIASH_PAR_OUTFILTERINTRONSTRANDS+x} ]; then echo "r'${VIASH_PAR_OUTFILTERINTRONSTRANDS//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'outSJtype': $( if [ ! -z ${VIASH_PAR_OUTSJTYPE+x} ]; then echo "r'${VIASH_PAR_OUTSJTYPE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'outSJfilterReads': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERREADS+x} ]; then echo "r'${VIASH_PAR_OUTSJFILTERREADS//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'outSJfilterOverhangMin': $( if [ ! -z ${VIASH_PAR_OUTSJFILTEROVERHANGMIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTEROVERHANGMIN//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), - 'outSJfilterCountUniqueMin': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), - 'outSJfilterCountTotalMin': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), - 'outSJfilterDistToOtherSJmin': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), - 'outSJfilterIntronMaxVsReadN': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), - 'scoreGap': $( if [ ! -z ${VIASH_PAR_SCOREGAP+x} ]; then echo "int(r'${VIASH_PAR_SCOREGAP//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'scoreGapNoncan': $( if [ ! -z ${VIASH_PAR_SCOREGAPNONCAN+x} ]; then echo "int(r'${VIASH_PAR_SCOREGAPNONCAN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'scoreGapGCAG': $( if [ ! -z ${VIASH_PAR_SCOREGAPGCAG+x} ]; then echo "int(r'${VIASH_PAR_SCOREGAPGCAG//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'scoreGapATAC': $( if [ ! -z ${VIASH_PAR_SCOREGAPATAC+x} ]; then echo "int(r'${VIASH_PAR_SCOREGAPATAC//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'scoreGenomicLengthLog2scale': $( if [ ! -z ${VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE+x} ]; then echo "int(r'${VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'scoreDelOpen': $( if [ ! -z ${VIASH_PAR_SCOREDELOPEN+x} ]; then echo "int(r'${VIASH_PAR_SCOREDELOPEN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'scoreDelBase': $( if [ ! -z ${VIASH_PAR_SCOREDELBASE+x} ]; then echo "int(r'${VIASH_PAR_SCOREDELBASE//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'scoreInsOpen': $( if [ ! -z ${VIASH_PAR_SCOREINSOPEN+x} ]; then echo "int(r'${VIASH_PAR_SCOREINSOPEN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'scoreInsBase': $( if [ ! -z ${VIASH_PAR_SCOREINSBASE+x} ]; then echo "int(r'${VIASH_PAR_SCOREINSBASE//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'scoreStitchSJshift': $( if [ ! -z ${VIASH_PAR_SCORESTITCHSJSHIFT+x} ]; then echo "int(r'${VIASH_PAR_SCORESTITCHSJSHIFT//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'seedSearchStartLmax': $( if [ ! -z ${VIASH_PAR_SEEDSEARCHSTARTLMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDSEARCHSTARTLMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'seedSearchStartLmaxOverLread': $( if [ ! -z ${VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD+x} ]; then echo "float(r'${VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'seedSearchLmax': $( if [ ! -z ${VIASH_PAR_SEEDSEARCHLMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDSEARCHLMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'seedMultimapNmax': $( if [ ! -z ${VIASH_PAR_SEEDMULTIMAPNMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDMULTIMAPNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'seedPerReadNmax': $( if [ ! -z ${VIASH_PAR_SEEDPERREADNMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDPERREADNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'seedPerWindowNmax': $( if [ ! -z ${VIASH_PAR_SEEDPERWINDOWNMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDPERWINDOWNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'seedNoneLociPerWindow': $( if [ ! -z ${VIASH_PAR_SEEDNONELOCIPERWINDOW+x} ]; then echo "int(r'${VIASH_PAR_SEEDNONELOCIPERWINDOW//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'seedSplitMin': $( if [ ! -z ${VIASH_PAR_SEEDSPLITMIN+x} ]; then echo "int(r'${VIASH_PAR_SEEDSPLITMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'seedMapMin': $( if [ ! -z ${VIASH_PAR_SEEDMAPMIN+x} ]; then echo "int(r'${VIASH_PAR_SEEDMAPMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'alignIntronMin': $( if [ ! -z ${VIASH_PAR_ALIGNINTRONMIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGNINTRONMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'alignIntronMax': $( if [ ! -z ${VIASH_PAR_ALIGNINTRONMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNINTRONMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'alignMatesGapMax': $( if [ ! -z ${VIASH_PAR_ALIGNMATESGAPMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNMATESGAPMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'alignSJoverhangMin': $( if [ ! -z ${VIASH_PAR_ALIGNSJOVERHANGMIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGNSJOVERHANGMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'alignSJstitchMismatchNmax': $( if [ ! -z ${VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX+x} ]; then echo "list(map(int, r'${VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), - 'alignSJDBoverhangMin': $( if [ ! -z ${VIASH_PAR_ALIGNSJDBOVERHANGMIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGNSJDBOVERHANGMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'alignSplicedMateMapLmin': $( if [ ! -z ${VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'alignSplicedMateMapLminOverLmate': $( if [ ! -z ${VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE+x} ]; then echo "float(r'${VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'alignWindowsPerReadNmax': $( if [ ! -z ${VIASH_PAR_ALIGNWINDOWSPERREADNMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNWINDOWSPERREADNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'alignTranscriptsPerWindowNmax': $( if [ ! -z ${VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'alignTranscriptsPerReadNmax': $( if [ ! -z ${VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'alignEndsType': $( if [ ! -z ${VIASH_PAR_ALIGNENDSTYPE+x} ]; then echo "r'${VIASH_PAR_ALIGNENDSTYPE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'alignEndsProtrude': $( if [ ! -z ${VIASH_PAR_ALIGNENDSPROTRUDE+x} ]; then echo "r'${VIASH_PAR_ALIGNENDSPROTRUDE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'alignSoftClipAtReferenceEnds': $( if [ ! -z ${VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS+x} ]; then echo "r'${VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'alignInsertionFlush': $( if [ ! -z ${VIASH_PAR_ALIGNINSERTIONFLUSH+x} ]; then echo "r'${VIASH_PAR_ALIGNINSERTIONFLUSH//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'peOverlapNbasesMin': $( if [ ! -z ${VIASH_PAR_PEOVERLAPNBASESMIN+x} ]; then echo "int(r'${VIASH_PAR_PEOVERLAPNBASESMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'peOverlapMMp': $( if [ ! -z ${VIASH_PAR_PEOVERLAPMMP+x} ]; then echo "float(r'${VIASH_PAR_PEOVERLAPMMP//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'winAnchorMultimapNmax': $( if [ ! -z ${VIASH_PAR_WINANCHORMULTIMAPNMAX+x} ]; then echo "int(r'${VIASH_PAR_WINANCHORMULTIMAPNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'winBinNbits': $( if [ ! -z ${VIASH_PAR_WINBINNBITS+x} ]; then echo "int(r'${VIASH_PAR_WINBINNBITS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'winAnchorDistNbins': $( if [ ! -z ${VIASH_PAR_WINANCHORDISTNBINS+x} ]; then echo "int(r'${VIASH_PAR_WINANCHORDISTNBINS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'winFlankNbins': $( if [ ! -z ${VIASH_PAR_WINFLANKNBINS+x} ]; then echo "int(r'${VIASH_PAR_WINFLANKNBINS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'winReadCoverageRelativeMin': $( if [ ! -z ${VIASH_PAR_WINREADCOVERAGERELATIVEMIN+x} ]; then echo "float(r'${VIASH_PAR_WINREADCOVERAGERELATIVEMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'winReadCoverageBasesMin': $( if [ ! -z ${VIASH_PAR_WINREADCOVERAGEBASESMIN+x} ]; then echo "int(r'${VIASH_PAR_WINREADCOVERAGEBASESMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'chimOutType': $( if [ ! -z ${VIASH_PAR_CHIMOUTTYPE+x} ]; then echo "r'${VIASH_PAR_CHIMOUTTYPE//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'chimSegmentMin': $( if [ ! -z ${VIASH_PAR_CHIMSEGMENTMIN+x} ]; then echo "int(r'${VIASH_PAR_CHIMSEGMENTMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'chimScoreMin': $( if [ ! -z ${VIASH_PAR_CHIMSCOREMIN+x} ]; then echo "int(r'${VIASH_PAR_CHIMSCOREMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'chimScoreDropMax': $( if [ ! -z ${VIASH_PAR_CHIMSCOREDROPMAX+x} ]; then echo "int(r'${VIASH_PAR_CHIMSCOREDROPMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'chimScoreSeparation': $( if [ ! -z ${VIASH_PAR_CHIMSCORESEPARATION+x} ]; then echo "int(r'${VIASH_PAR_CHIMSCORESEPARATION//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'chimScoreJunctionNonGTAG': $( if [ ! -z ${VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG+x} ]; then echo "int(r'${VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'chimJunctionOverhangMin': $( if [ ! -z ${VIASH_PAR_CHIMJUNCTIONOVERHANGMIN+x} ]; then echo "int(r'${VIASH_PAR_CHIMJUNCTIONOVERHANGMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'chimSegmentReadGapMax': $( if [ ! -z ${VIASH_PAR_CHIMSEGMENTREADGAPMAX+x} ]; then echo "int(r'${VIASH_PAR_CHIMSEGMENTREADGAPMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'chimFilter': $( if [ ! -z ${VIASH_PAR_CHIMFILTER+x} ]; then echo "r'${VIASH_PAR_CHIMFILTER//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'chimMainSegmentMultNmax': $( if [ ! -z ${VIASH_PAR_CHIMMAINSEGMENTMULTNMAX+x} ]; then echo "int(r'${VIASH_PAR_CHIMMAINSEGMENTMULTNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'chimMultimapNmax': $( if [ ! -z ${VIASH_PAR_CHIMMULTIMAPNMAX+x} ]; then echo "int(r'${VIASH_PAR_CHIMMULTIMAPNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'chimMultimapScoreRange': $( if [ ! -z ${VIASH_PAR_CHIMMULTIMAPSCORERANGE+x} ]; then echo "int(r'${VIASH_PAR_CHIMMULTIMAPSCORERANGE//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'chimNonchimScoreDropMin': $( if [ ! -z ${VIASH_PAR_CHIMNONCHIMSCOREDROPMIN+x} ]; then echo "int(r'${VIASH_PAR_CHIMNONCHIMSCOREDROPMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'chimOutJunctionFormat': $( if [ ! -z ${VIASH_PAR_CHIMOUTJUNCTIONFORMAT+x} ]; then echo "int(r'${VIASH_PAR_CHIMOUTJUNCTIONFORMAT//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'quantMode': $( if [ ! -z ${VIASH_PAR_QUANTMODE+x} ]; then echo "r'${VIASH_PAR_QUANTMODE//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'quantTranscriptomeBAMcompression': $( if [ ! -z ${VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION+x} ]; then echo "int(r'${VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'quantTranscriptomeBan': $( if [ ! -z ${VIASH_PAR_QUANTTRANSCRIPTOMEBAN+x} ]; then echo "r'${VIASH_PAR_QUANTTRANSCRIPTOMEBAN//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'twopassMode': $( if [ ! -z ${VIASH_PAR_TWOPASSMODE+x} ]; then echo "r'${VIASH_PAR_TWOPASSMODE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'twopass1readsN': $( if [ ! -z ${VIASH_PAR_TWOPASS1READSN+x} ]; then echo "int(r'${VIASH_PAR_TWOPASS1READSN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'waspOutputMode': $( if [ ! -z ${VIASH_PAR_WASPOUTPUTMODE+x} ]; then echo "r'${VIASH_PAR_WASPOUTPUTMODE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'soloType': $( if [ ! -z ${VIASH_PAR_SOLOTYPE+x} ]; then echo "r'${VIASH_PAR_SOLOTYPE//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'soloCBwhitelist': $( if [ ! -z ${VIASH_PAR_SOLOCBWHITELIST+x} ]; then echo "r'${VIASH_PAR_SOLOCBWHITELIST//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'soloCBstart': $( if [ ! -z ${VIASH_PAR_SOLOCBSTART+x} ]; then echo "int(r'${VIASH_PAR_SOLOCBSTART//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'soloCBlen': $( if [ ! -z ${VIASH_PAR_SOLOCBLEN+x} ]; then echo "int(r'${VIASH_PAR_SOLOCBLEN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'soloUMIstart': $( if [ ! -z ${VIASH_PAR_SOLOUMISTART+x} ]; then echo "int(r'${VIASH_PAR_SOLOUMISTART//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'soloUMIlen': $( if [ ! -z ${VIASH_PAR_SOLOUMILEN+x} ]; then echo "int(r'${VIASH_PAR_SOLOUMILEN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'soloBarcodeReadLength': $( if [ ! -z ${VIASH_PAR_SOLOBARCODEREADLENGTH+x} ]; then echo "int(r'${VIASH_PAR_SOLOBARCODEREADLENGTH//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'soloBarcodeMate': $( if [ ! -z ${VIASH_PAR_SOLOBARCODEMATE+x} ]; then echo "int(r'${VIASH_PAR_SOLOBARCODEMATE//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'soloCBposition': $( if [ ! -z ${VIASH_PAR_SOLOCBPOSITION+x} ]; then echo "r'${VIASH_PAR_SOLOCBPOSITION//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'soloUMIposition': $( if [ ! -z ${VIASH_PAR_SOLOUMIPOSITION+x} ]; then echo "r'${VIASH_PAR_SOLOUMIPOSITION//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'soloAdapterSequence': $( if [ ! -z ${VIASH_PAR_SOLOADAPTERSEQUENCE+x} ]; then echo "r'${VIASH_PAR_SOLOADAPTERSEQUENCE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'soloAdapterMismatchesNmax': $( if [ ! -z ${VIASH_PAR_SOLOADAPTERMISMATCHESNMAX+x} ]; then echo "int(r'${VIASH_PAR_SOLOADAPTERMISMATCHESNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'soloCBmatchWLtype': $( if [ ! -z ${VIASH_PAR_SOLOCBMATCHWLTYPE+x} ]; then echo "r'${VIASH_PAR_SOLOCBMATCHWLTYPE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'soloInputSAMattrBarcodeSeq': $( if [ ! -z ${VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ+x} ]; then echo "r'${VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'soloInputSAMattrBarcodeQual': $( if [ ! -z ${VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL+x} ]; then echo "r'${VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'soloStrand': $( if [ ! -z ${VIASH_PAR_SOLOSTRAND+x} ]; then echo "r'${VIASH_PAR_SOLOSTRAND//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'soloFeatures': $( if [ ! -z ${VIASH_PAR_SOLOFEATURES+x} ]; then echo "r'${VIASH_PAR_SOLOFEATURES//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'soloMultiMappers': $( if [ ! -z ${VIASH_PAR_SOLOMULTIMAPPERS+x} ]; then echo "r'${VIASH_PAR_SOLOMULTIMAPPERS//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'soloUMIdedup': $( if [ ! -z ${VIASH_PAR_SOLOUMIDEDUP+x} ]; then echo "r'${VIASH_PAR_SOLOUMIDEDUP//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'soloUMIfiltering': $( if [ ! -z ${VIASH_PAR_SOLOUMIFILTERING+x} ]; then echo "r'${VIASH_PAR_SOLOUMIFILTERING//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'soloOutFileNames': $( if [ ! -z ${VIASH_PAR_SOLOOUTFILENAMES+x} ]; then echo "r'${VIASH_PAR_SOLOOUTFILENAMES//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'soloCellFilter': $( if [ ! -z ${VIASH_PAR_SOLOCELLFILTER+x} ]; then echo "r'${VIASH_PAR_SOLOCELLFILTER//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'soloOutFormatFeaturesGeneField3': $( if [ ! -z ${VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3+x} ]; then echo "r'${VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'soloCellReadStats': $( if [ ! -z ${VIASH_PAR_SOLOCELLREADSTATS+x} ]; then echo "r'${VIASH_PAR_SOLOCELLREADSTATS//\'/\'\"\'\"r\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -## VIASH END - -######################## -### Helper functions ### -######################## - -# regex for matching R[12] fastq(gz) files -# examples: -# - TSP10_Fat_MAT_SS2_B134171_B115063_Immune_A1_L003_R1.fastq.gz -# - tinygex_S1_L001_I1_001.fastq.gz -fastqgz_regex = r'(.+)_(R\\d+)(_\\d+)?\\.fastq(\\.gz)?' - -# helper function for cheching whether something is a gzip -def is_gz_file(path: Path) -> bool: - with open(path, 'rb') as file: - return file.read(2) == b'\\x1f\\x8b' - -# look for fastq files in a directory -def search_fastqs(path: Path) -> list[Path]: - if path.is_dir(): - print(f"Input '{path}' is a directory, traversing to see if we can detect any FASTQ files.", flush=True) - value_paths = [file for file in path.iterdir() if re.match(fastqgz_regex, file.name) ] - return value_paths - else: - return [path] - -# if {par_value} is a Path, extract it to a temp_dir_path and return the resulting path -def extract_if_need_be(par_value: Path, temp_dir_path: Path) -> Path: - - if par_value.is_file() and tarfile.is_tarfile(par_value): - # Remove two extensions (if they exist) - extaction_dir_name = Path(par_value.stem).stem - unpacked_path = temp_dir_path / extaction_dir_name - print(f' Tar detected; extracting {par_value} to {unpacked_path}', flush=True) - - with tarfile.open(par_value, 'r') as open_tar: - members = open_tar.getmembers() - root_dirs = [member - for member in members - if member.isdir() and member.name != '.' and '/' not in member.name] - # if there is only one root_dir (and there are files in that directory) - # strip that directory name from the destination folder - if len(root_dirs) == 1: - for mem in members: - mem.path = Path(*Path(mem.path).parts[1:]) - members_to_move = [mem for mem in members if mem.path != Path('.')] - open_tar.extractall(unpacked_path, members=members_to_move) - return unpacked_path - - elif par_value.is_file() and is_gz_file(par_value): - # Remove extension (if it exists) - extaction_file_name = Path(par_value.stem) - unpacked_path = temp_dir_path / extaction_file_name - print(f' Gzip detected; extracting {par_value} to {unpacked_path}', flush=True) - - with gzip.open(par_value, 'rb') as f_in: - with open(unpacked_path, 'wb') as f_out: - shutil.copyfileobj(f_in, f_out) - return unpacked_path - - else: - return par_value - -######################## -### Main code ### -######################## - -# rename keys and convert path strings to Path -# note: only list file arguments here. if non-file arguments also need to be renamed, -# the \`processPar()\` generator needs to be adapted -to_rename = {'input': 'readFilesIn', 'reference': 'genomeDir', 'output': 'outFileNamePrefix'} - -def process_par(orig_par, to_rename): - for key, value in orig_par.items(): - # rename the key in par based on the \`to_rename\` dict - if key in to_rename.keys(): - new_key = to_rename[key] - - # also turn value into a Path - if isinstance(value, list): - new_value = [Path(val) for val in value] - else: - new_value = Path(value) - else: - new_key = key - new_value = value - yield new_key, new_value -par = dict(process_par(par, to_rename)) - -# create output dir if need be -par["outFileNamePrefix"].mkdir(parents=True, exist_ok=True) - -with tempfile.TemporaryDirectory(prefix="star-", dir=meta["temp_dir"], ignore_cleanup_errors=True) as temp_dir: - print(">> Check whether input files are directories", flush=True) - new_read_files_in = [] - for path in par["readFilesIn"]: - new_read_files_in.extend(search_fastqs(path)) - par["readFilesIn"] = new_read_files_in - print("", flush=True) - - # checking for compressed files, ungzip files if need be - temp_dir_path = Path(temp_dir) - for par_name in ["genomeDir", "readFilesIn"]: - par_values = par[par_name] - if par_values: - # turn value into list - is_multiple = isinstance(par_values, list) - if not is_multiple: - par_values = [ par_values ] - - # output list - new_values = [] - for par_value in par_values: - print(f'>> Check compression of --{par_name} with value: {par_value}', flush=True) - new_value = extract_if_need_be(par_value, temp_dir_path) - new_values.append(new_value) - - # unlist if need be - if not is_multiple: - new_values = new_values[0] - - # replace value - par[par_name] = new_values - # end ungzipping - print("", flush=True) - - print("Grouping R1/R2 input files into pairs", flush=True) - input_grouped = {} - for path in par['readFilesIn']: - key = re.search(fastqgz_regex, path.name).group(2) - if key not in input_grouped: - input_grouped[key] = [] - input_grouped[key].append(str(path)) - par['readFilesIn'] = [ ','.join(val) for val in input_grouped.values() ] - print("", flush=True) - - print(">> Constructing command", flush=True) - par["runMode"] = "alignReads" - par["outTmpDir"] = temp_dir_path / "run" - if 'cpus' in meta and meta['cpus']: - par["runThreadN"] = meta["cpus"] - # make sure there is a trailing / - par["outFileNamePrefix"] = f"{par['outFileNamePrefix']}/" - - cmd_args = [ "STAR" ] - for name, value in par.items(): - if value is not None: - if isinstance(value, list): - cmd_args.extend(["--" + name] + [str(x) for x in value]) - else: - cmd_args.extend(["--" + name, str(value)]) - print("", flush=True) - - print(">> Running STAR with command:", flush=True) - print("+ " + ' '.join([str(x) for x in cmd_args]), flush=True) - print("", flush=True) - - subprocess.run( - cmd_args, - check=True - ) -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - unset VIASH_TEST_INPUT - IFS=';' - for var in $VIASH_PAR_INPUT; do - unset IFS - if [ -z "$VIASH_TEST_INPUT" ]; then - VIASH_TEST_INPUT="$(ViashStripAutomount "$var")" - else - VIASH_TEST_INPUT="$VIASH_TEST_INPUT;""$(ViashStripAutomount "$var")" - fi - done - VIASH_PAR_INPUT="$VIASH_TEST_INPUT" -fi -if [ ! -z "$VIASH_PAR_REFERENCE" ]; then - VIASH_PAR_REFERENCE=$(ViashStripAutomount "$VIASH_PAR_REFERENCE") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_PAR_GENOMEFASTAFILES" ]; then - unset VIASH_TEST_GENOMEFASTAFILES - IFS=';' - for var in $VIASH_PAR_GENOMEFASTAFILES; do - unset IFS - if [ -z "$VIASH_TEST_GENOMEFASTAFILES" ]; then - VIASH_TEST_GENOMEFASTAFILES="$(ViashStripAutomount "$var")" - else - VIASH_TEST_GENOMEFASTAFILES="$VIASH_TEST_GENOMEFASTAFILES;""$(ViashStripAutomount "$var")" - fi - done - VIASH_PAR_GENOMEFASTAFILES="$VIASH_TEST_GENOMEFASTAFILES" -fi -if [ ! -z "$VIASH_PAR_SJDBGTFFILE" ]; then - VIASH_PAR_SJDBGTFFILE=$(ViashStripAutomount "$VIASH_PAR_SJDBGTFFILE") -fi -if [ ! -z "$VIASH_PAR_READFILESMANIFEST" ]; then - VIASH_PAR_READFILESMANIFEST=$(ViashStripAutomount "$VIASH_PAR_READFILESMANIFEST") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/mapping/star_align_v273a/.config.vsh.yaml b/target/docker/mapping/star_align_v273a/.config.vsh.yaml deleted file mode 100644 index 15a0341df41..00000000000 --- a/target/docker/mapping/star_align_v273a/.config.vsh.yaml +++ /dev/null @@ -1,2535 +0,0 @@ -functionality: - name: "star_align_v273a" - namespace: "mapping" - version: "0.12.3" - authors: - - name: "Angela Oliveira Pisco" - roles: - - "author" - info: - role: "Contributor" - links: - github: "aopisco" - orcid: "0000-0003-0142-2355" - linkedin: "aopisco" - organizations: - - name: "Insitro" - href: "https://insitro.com" - role: "Director of Computational Biology" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - - name: "Robrecht Cannoodt" - roles: - - "author" - - "maintainer" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - argument_groups: - - name: "Input/Output" - arguments: - - type: "file" - name: "--input" - alternatives: - - "--readFilesIn" - description: "The FASTQ files to be analyzed. Corresponds to the --readFilesIn\ - \ in the STAR command." - info: null - example: - - "mysample_S1_L001_R1_001.fastq.gz" - - "mysample_S1_L001_R2_001.fastq.gz" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "file" - name: "--reference" - alternatives: - - "--genomeDir" - description: "Path to the reference built by star_build_reference. Corresponds\ - \ to the --genomeDir in the STAR command." - info: null - example: - - "/path/to/reference" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "--outFileNamePrefix" - description: "Path to output directory. Corresponds to the --outFileNamePrefix\ - \ in the STAR command." - info: null - example: - - "/path/to/foo" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Run Parameters" - arguments: - - type: "integer" - name: "--runRNGseed" - description: "random number generator seed." - info: null - example: - - 777 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Genome Parameters" - arguments: - - type: "string" - name: "--genomeLoad" - description: "mode of shared memory usage for the genome files. Only used with\ - \ --runMode alignReads.\n\n- LoadAndKeep ... load genome into shared and\ - \ keep it in memory after run\n- LoadAndRemove ... load genome into shared\ - \ but remove it after run\n- LoadAndExit ... load genome into shared memory\ - \ and exit, keeping the genome in memory for future runs\n- Remove \ - \ ... do not map anything, just remove loaded genome from memory\n- NoSharedMemory\ - \ ... do not use shared memory, each job will have its own private copy of\ - \ the genome" - info: null - example: - - "NoSharedMemory" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--genomeFastaFiles" - description: "path(s) to the fasta files with the genome sequences, separated\ - \ by spaces. These files should be plain text FASTA files, they *cannot* be\ - \ zipped.\n\nRequired for the genome generation (--runMode genomeGenerate).\ - \ Can also be used in the mapping (--runMode alignReads) to add extra (new)\ - \ sequences to the genome (e.g. spike-ins)." - info: null - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--genomeFileSizes" - description: "genome files exact sizes in bytes. Typically, this should not\ - \ be defined by the user." - info: null - example: - - 0 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--genomeTransformOutput" - description: "which output to transform back to original genome\n\n- SAM \ - \ ... SAM/BAM alignments\n- SJ ... splice junctions (SJ.out.tab)\n-\ - \ None ... no transformation of the output" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--genomeChrSetMitochondrial" - description: "names of the mitochondrial chromosomes. Presently only used for\ - \ STARsolo statistics output/" - info: null - example: - - "chrM" - - "M" - - "MT" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - name: "Splice Junctions Database" - arguments: - - type: "string" - name: "--sjdbFileChrStartEnd" - description: "path to the files with genomic coordinates (chr start \ - \ end strand) for the splice junction introns. Multiple files can be\ - \ supplied and will be concatenated." - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "file" - name: "--sjdbGTFfile" - description: "path to the GTF file with annotations" - info: null - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--sjdbGTFchrPrefix" - description: "prefix for chromosome names in a GTF file (e.g. 'chr' for using\ - \ ENSMEBL annotations with UCSC genomes)" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--sjdbGTFfeatureExon" - description: "feature type in GTF file to be used as exons for building transcripts" - info: null - example: - - "exon" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--sjdbGTFtagExonParentTranscript" - description: "GTF attribute name for parent transcript ID (default \"transcript_id\"\ - \ works for GTF files)" - info: null - example: - - "transcript_id" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--sjdbGTFtagExonParentGene" - description: "GTF attribute name for parent gene ID (default \"gene_id\" works\ - \ for GTF files)" - info: null - example: - - "gene_id" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--sjdbGTFtagExonParentGeneName" - description: "GTF attribute name for parent gene name" - info: null - example: - - "gene_name" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--sjdbGTFtagExonParentGeneType" - description: "GTF attribute name for parent gene type" - info: null - example: - - "gene_type" - - "gene_biotype" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--sjdbOverhang" - description: "length of the donor/acceptor sequence on each side of the junctions,\ - \ ideally = (mate_length - 1)" - info: null - example: - - 100 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--sjdbScore" - description: "extra alignment score for alignments that cross database junctions" - info: null - example: - - 2 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--sjdbInsertSave" - description: "which files to save when sjdb junctions are inserted on the fly\ - \ at the mapping step\n\n- Basic ... only small junction / transcript files\n\ - - All ... all files including big Genome, SA and SAindex - this will create\ - \ a complete genome directory" - info: null - example: - - "Basic" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Variation parameters" - arguments: - - type: "string" - name: "--varVCFfile" - description: "path to the VCF file that contains variation data. The 10th column\ - \ should contain the genotype information, e.g. 0/1" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Read Parameters" - arguments: - - type: "string" - name: "--readFilesType" - description: "format of input read files\n\n- Fastx ... FASTA or FASTQ\n\ - - SAM SE ... SAM or BAM single-end reads; for BAM use --readFilesCommand\ - \ samtools view\n- SAM PE ... SAM or BAM paired-end reads; for BAM use\ - \ --readFilesCommand samtools view" - info: null - example: - - "Fastx" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--readFilesSAMattrKeep" - description: "for --readFilesType SAM SE/PE, which SAM tags to keep in the output\ - \ BAM, e.g.: --readFilesSAMtagsKeep RG PL\n\n- All ... keep all tags\n\ - - None ... do not keep any tags" - info: null - example: - - "All" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "file" - name: "--readFilesManifest" - description: "path to the \"manifest\" file with the names of read files. The\ - \ manifest file should contain 3 tab-separated columns:\n\npaired-end reads:\ - \ read1_file_name $tab$ read2_file_name $tab$ read_group_line.\nsingle-end\ - \ reads: read1_file_name $tab$ - $tab$ read_group_line.\nSpaces,\ - \ but not tabs are allowed in file names.\nIf read_group_line does not start\ - \ with ID:, it can only contain one ID field, and ID: will be added to it.\n\ - If read_group_line starts with ID:, it can contain several fields separated\ - \ by $tab$, and all fields will be be copied verbatim into SAM @RG header\ - \ line." - info: null - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--readFilesPrefix" - description: "prefix for the read files names, i.e. it will be added in front\ - \ of the strings in --readFilesIn" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--readFilesCommand" - description: "command line to execute for each of the input file. This command\ - \ should generate FASTA or FASTQ text and send it to stdout\n\nFor example:\ - \ zcat - to uncompress .gz files, bzcat - to uncompress .bz2 files, etc." - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--readMapNumber" - description: "number of reads to map from the beginning of the file\n\n-1: map\ - \ all reads" - info: null - example: - - -1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--readMatesLengthsIn" - description: "Equal/NotEqual - lengths of names,sequences,qualities for both\ - \ mates are the same / not the same. NotEqual is safe in all situations." - info: null - example: - - "NotEqual" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--readNameSeparator" - description: "character(s) separating the part of the read names that will be\ - \ trimmed in output (read name after space is always trimmed)" - info: null - example: - - "/" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--readQualityScoreBase" - description: "number to be subtracted from the ASCII code to get Phred quality\ - \ score" - info: null - example: - - 33 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Read Clipping" - arguments: - - type: "string" - name: "--clipAdapterType" - description: "adapter clipping type\n\n- Hamming ... adapter clipping based\ - \ on Hamming distance, with the number of mismatches controlled by --clip5pAdapterMMp\n\ - - CellRanger4 ... 5p and 3p adapter clipping similar to CellRanger4. Utilizes\ - \ Opal package by Martin Sosic: https://github.com/Martinsos/opal\n- None\ - \ ... no adapter clipping, all other clip* parameters are disregarded" - info: null - example: - - "Hamming" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--clip3pNbases" - description: "number(s) of bases to clip from 3p of each mate. If one value\ - \ is given, it will be assumed the same for both mates." - info: null - example: - - 0 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--clip3pAdapterSeq" - description: "adapter sequences to clip from 3p of each mate. If one value\ - \ is given, it will be assumed the same for both mates.\n\n- polyA ... polyA\ - \ sequence with the length equal to read length" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "double" - name: "--clip3pAdapterMMp" - description: "max proportion of mismatches for 3p adapter clipping for each\ - \ mate. If one value is given, it will be assumed the same for both mates." - info: null - example: - - 0.1 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--clip3pAfterAdapterNbases" - description: "number of bases to clip from 3p of each mate after the adapter\ - \ clipping. If one value is given, it will be assumed the same for both mates." - info: null - example: - - 0 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--clip5pNbases" - description: "number(s) of bases to clip from 5p of each mate. If one value\ - \ is given, it will be assumed the same for both mates." - info: null - example: - - 0 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - name: "Limits" - arguments: - - type: "long" - name: "--limitGenomeGenerateRAM" - description: "maximum available RAM (bytes) for genome generation" - info: null - example: - - 31000000000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "long" - name: "--limitIObufferSize" - description: "max available buffers size (bytes) for input/output, per thread" - info: null - example: - - 30000000 - - 50000000 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "long" - name: "--limitOutSAMoneReadBytes" - description: "max size of the SAM record (bytes) for one read. Recommended value:\ - \ >(2*(LengthMate1+LengthMate2+100)*outFilterMultimapNmax" - info: null - example: - - 100000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--limitOutSJoneRead" - description: "max number of junctions for one read (including all multi-mappers)" - info: null - example: - - 1000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--limitOutSJcollapsed" - description: "max number of collapsed junctions" - info: null - example: - - 1000000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "long" - name: "--limitBAMsortRAM" - description: "maximum available RAM (bytes) for sorting BAM. If =0, it will\ - \ be set to the genome index size. 0 value can only be used with --genomeLoad\ - \ NoSharedMemory option." - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--limitSjdbInsertNsj" - description: "maximum number of junctions to be inserted to the genome on the\ - \ fly at the mapping stage, including those from annotations and those detected\ - \ in the 1st step of the 2-pass run" - info: null - example: - - 1000000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--limitNreadsSoft" - description: "soft limit on the number of reads" - info: null - example: - - -1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Output: general" - arguments: - - type: "string" - name: "--outTmpKeep" - description: "whether to keep the temporary files after STAR runs is finished\n\ - \n- None ... remove all temporary files\n- All ... keep all files" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outStd" - description: "which output will be directed to stdout (standard out)\n\n- Log\ - \ ... log messages\n- SAM ... alignments\ - \ in SAM format (which normally are output to Aligned.out.sam file), normal\ - \ standard output will go into Log.std.out\n- BAM_Unsorted ... alignments\ - \ in BAM format, unsorted. Requires --outSAMtype BAM Unsorted\n- BAM_SortedByCoordinate\ - \ ... alignments in BAM format, sorted by coordinate. Requires --outSAMtype\ - \ BAM SortedByCoordinate\n- BAM_Quant ... alignments to transcriptome\ - \ in BAM format, unsorted. Requires --quantMode TranscriptomeSAM" - info: null - example: - - "Log" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outReadsUnmapped" - description: "output of unmapped and partially mapped (i.e. mapped only one\ - \ mate of a paired end read) reads in separate file(s).\n\n- None ... no\ - \ output\n- Fastx ... output in separate fasta/fastq files, Unmapped.out.mate1/2" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outQSconversionAdd" - description: "add this number to the quality score (e.g. to convert from Illumina\ - \ to Sanger, use -31)" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outMultimapperOrder" - description: "order of multimapping alignments in the output files\n\n- Old_2.4\ - \ ... quasi-random order used before 2.5.0\n- Random \ - \ ... random order of alignments for each multi-mapper. Read mates (pairs)\ - \ are always adjacent, all alignment for each read stay together. This option\ - \ will become default in the future releases." - info: null - example: - - "Old_2.4" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Output: SAM and BAM" - arguments: - - type: "string" - name: "--outSAMtype" - description: "type of SAM/BAM output\n\n1st word:\n- BAM ... output BAM without\ - \ sorting\n- SAM ... output SAM without sorting\n- None ... no SAM/BAM output\n\ - 2nd, 3rd:\n- Unsorted ... standard unsorted\n- SortedByCoordinate\ - \ ... sorted by coordinate. This option will allocate extra memory for sorting\ - \ which can be specified by --limitBAMsortRAM." - info: null - example: - - "SAM" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--outSAMmode" - description: "mode of SAM output\n\n- None ... no SAM output\n- Full ... full\ - \ SAM output\n- NoQS ... full SAM but without quality scores" - info: null - example: - - "Full" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outSAMstrandField" - description: "Cufflinks-like strand field flag\n\n- None ... not used\n\ - - intronMotif ... strand derived from the intron motif. This option changes\ - \ the output alignments: reads with inconsistent and/or non-canonical introns\ - \ are filtered out." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outSAMattributes" - description: "a string of desired SAM attributes, in the order desired for the\ - \ output SAM. Tags can be listed in any combination/order.\n\n***Presets:\n\ - - None ... no attributes\n- Standard ... NH HI AS nM\n- All \ - \ ... NH HI AS nM NM MD jM jI MC ch\n***Alignment:\n- NH ...\ - \ number of loci the reads maps to: =1 for unique mappers, >1 for multimappers.\ - \ Standard SAM tag.\n- HI ... multiple alignment index, starts with\ - \ --outSAMattrIHstart (=1 by default). Standard SAM tag.\n- AS ...\ - \ local alignment score, +1/-1 for matches/mismateches, score* penalties for\ - \ indels and gaps. For PE reads, total score for two mates. Stadnard SAM tag.\n\ - - nM ... number of mismatches. For PE reads, sum over two mates.\n\ - - NM ... edit distance to the reference (number of mismatched + inserted\ - \ + deleted bases) for each mate. Standard SAM tag.\n- MD ... string\ - \ encoding mismatched and deleted reference bases (see standard SAM specifications).\ - \ Standard SAM tag.\n- jM ... intron motifs for all junctions (i.e.\ - \ N in CIGAR): 0: non-canonical; 1: GT/AG, 2: CT/AC, 3: GC/AG, 4: CT/GC, 5:\ - \ AT/AC, 6: GT/AT. If splice junctions database is used, and a junction is\ - \ annotated, 20 is added to its motif value.\n- jI ... start and\ - \ end of introns for all junctions (1-based).\n- XS ... alignment\ - \ strand according to --outSAMstrandField.\n- MC ... mate's CIGAR\ - \ string. Standard SAM tag.\n- ch ... marks all segment of all chimeric\ - \ alingments for --chimOutType WithinBAM output.\n- cN ... number\ - \ of bases clipped from the read ends: 5' and 3'\n***Variation:\n- vA \ - \ ... variant allele\n- vG ... genomic coordinate of the variant\ - \ overlapped by the read.\n- vW ... 1 - alignment passes WASP filtering;\ - \ 2,3,4,5,6,7 - alignment does not pass WASP filtering. Requires --waspOutputMode\ - \ SAMtag.\n***STARsolo:\n- CR CY UR UY ... sequences and quality scores of\ - \ cell barcodes and UMIs for the solo* demultiplexing.\n- GX GN ...\ - \ gene ID and gene name for unique-gene reads.\n- gx gn ... gene IDs\ - \ and gene names for unique- and multi-gene reads.\n- CB UB ... error-corrected\ - \ cell barcodes and UMIs for solo* demultiplexing. Requires --outSAMtype BAM\ - \ SortedByCoordinate.\n- sM ... assessment of CB and UMI.\n- sS \ - \ ... sequence of the entire barcode (CB,UMI,adapter).\n- sQ \ - \ ... quality of the entire barcode.\n***Unsupported/undocumented:\n-\ - \ ha ... haplotype (1/2) when mapping to the diploid genome. Requires\ - \ genome generated with --genomeTransformType Diploid .\n- rB ...\ - \ alignment block read/genomic coordinates.\n- vR ... read coordinate\ - \ of the variant." - info: null - example: - - "Standard" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--outSAMattrIHstart" - description: "start value for the IH attribute. 0 may be required by some downstream\ - \ software, such as Cufflinks or StringTie." - info: null - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outSAMunmapped" - description: "output of unmapped reads in the SAM format\n\n1st word:\n- None\ - \ ... no output\n- Within ... output unmapped reads within the main SAM\ - \ file (i.e. Aligned.out.sam)\n2nd word:\n- KeepPairs ... record unmapped\ - \ mate for each alignment, and, in case of unsorted output, keep it adjacent\ - \ to its mapped mate. Only affects multi-mapping reads." - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--outSAMorder" - description: "type of sorting for the SAM output\n\nPaired: one mate after the\ - \ other for all paired alignments\nPairedKeepInputOrder: one mate after the\ - \ other for all paired alignments, the order is kept the same as in the input\ - \ FASTQ files" - info: null - example: - - "Paired" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outSAMprimaryFlag" - description: "which alignments are considered primary - all others will be marked\ - \ with 0x100 bit in the FLAG\n\n- OneBestScore ... only one alignment with\ - \ the best score is primary\n- AllBestScore ... all alignments with the best\ - \ score are primary" - info: null - example: - - "OneBestScore" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outSAMreadID" - description: "read ID record type\n\n- Standard ... first word (until space)\ - \ from the FASTx read ID line, removing /1,/2 from the end\n- Number ...\ - \ read number (index) in the FASTx file" - info: null - example: - - "Standard" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outSAMmapqUnique" - description: "0 to 255: the MAPQ value for unique mappers" - info: null - example: - - 255 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outSAMflagOR" - description: "0 to 65535: sam FLAG will be bitwise OR'd with this value, i.e.\ - \ FLAG=FLAG | outSAMflagOR. This is applied after all flags have been set\ - \ by STAR, and after outSAMflagAND. Can be used to set specific bits that\ - \ are not set otherwise." - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outSAMflagAND" - description: "0 to 65535: sam FLAG will be bitwise AND'd with this value, i.e.\ - \ FLAG=FLAG & outSAMflagOR. This is applied after all flags have been set\ - \ by STAR, but before outSAMflagOR. Can be used to unset specific bits that\ - \ are not set otherwise." - info: null - example: - - 65535 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outSAMattrRGline" - description: "SAM/BAM read group line. The first word contains the read group\ - \ identifier and must start with \"ID:\", e.g. --outSAMattrRGline ID:xxx CN:yy\ - \ \"DS:z z z\".\n\nxxx will be added as RG tag to each output alignment. Any\ - \ spaces in the tag values have to be double quoted.\nComma separated RG lines\ - \ correspons to different (comma separated) input files in --readFilesIn.\ - \ Commas have to be surrounded by spaces, e.g.\n--outSAMattrRGline ID:xxx\ - \ , ID:zzz \"DS:z z\" , ID:yyy DS:yyyy" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--outSAMheaderHD" - description: "@HD (header) line of the SAM header" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--outSAMheaderPG" - description: "extra @PG (software) line of the SAM header (in addition to STAR)" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--outSAMheaderCommentFile" - description: "path to the file with @CO (comment) lines of the SAM header" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outSAMfilter" - description: "filter the output into main SAM/BAM files\n\n- KeepOnlyAddedReferences\ - \ ... only keep the reads for which all alignments are to the extra reference\ - \ sequences added with --genomeFastaFiles at the mapping stage.\n- KeepAllAddedReferences\ - \ ... keep all alignments to the extra reference sequences added with --genomeFastaFiles\ - \ at the mapping stage." - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--outSAMmultNmax" - description: "max number of multiple alignments for a read that will be output\ - \ to the SAM/BAM files. Note that if this value is not equal to -1, the top\ - \ scoring alignment will be output first\n\n- -1 ... all alignments (up to\ - \ --outFilterMultimapNmax) will be output" - info: null - example: - - -1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outSAMtlen" - description: "calculation method for the TLEN field in the SAM/BAM files\n\n\ - - 1 ... leftmost base of the (+)strand mate to rightmost base of the (-)mate.\ - \ (+)sign for the (+)strand mate\n- 2 ... leftmost base of any mate to rightmost\ - \ base of any mate. (+)sign for the mate with the leftmost base. This is different\ - \ from 1 for overlapping mates with protruding ends" - info: null - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outBAMcompression" - description: "-1 to 10 BAM compression level, -1=default compression (6?),\ - \ 0=no compression, 10=maximum compression" - info: null - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outBAMsortingThreadN" - description: ">=0: number of threads for BAM sorting. 0 will default to min(6,--runThreadN)." - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outBAMsortingBinsN" - description: ">0: number of genome bins for coordinate-sorting" - info: null - example: - - 50 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "BAM processing" - arguments: - - type: "string" - name: "--bamRemoveDuplicatesType" - description: "mark duplicates in the BAM file, for now only works with (i) sorted\ - \ BAM fed with inputBAMfile, and (ii) for paired-end alignments only\n\n-\ - \ - ... no duplicate removal/marking\n- UniqueIdentical\ - \ ... mark all multimappers, and duplicate unique mappers. The coordinates,\ - \ FLAG, CIGAR must be identical\n- UniqueIdenticalNotMulti ... mark duplicate\ - \ unique mappers but not multimappers." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--bamRemoveDuplicatesMate2basesN" - description: "number of bases from the 5' of mate 2 to use in collapsing (e.g.\ - \ for RAMPAGE)" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Output Wiggle" - arguments: - - type: "string" - name: "--outWigType" - description: "type of signal output, e.g. \"bedGraph\" OR \"bedGraph read1_5p\"\ - . Requires sorted BAM: --outSAMtype BAM SortedByCoordinate .\n\n1st word:\n\ - - None ... no signal output\n- bedGraph ... bedGraph format\n- wiggle\ - \ ... wiggle format\n2nd word:\n- read1_5p ... signal from only 5' of\ - \ the 1st read, useful for CAGE/RAMPAGE etc\n- read2 ... signal from\ - \ only 2nd read" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--outWigStrand" - description: "strandedness of wiggle/bedGraph output\n\n- Stranded ... separate\ - \ strands, str1 and str2\n- Unstranded ... collapsed strands" - info: null - example: - - "Stranded" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outWigReferencesPrefix" - description: "prefix matching reference names to include in the output wiggle\ - \ file, e.g. \"chr\", default \"-\" - include all references" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outWigNorm" - description: "type of normalization for the signal\n\n- RPM ... reads per\ - \ million of mapped reads\n- None ... no normalization, \"raw\" counts" - info: null - example: - - "RPM" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Output Filtering" - arguments: - - type: "string" - name: "--outFilterType" - description: "type of filtering\n\n- Normal ... standard filtering using only\ - \ current alignment\n- BySJout ... keep only those reads that contain junctions\ - \ that passed filtering into SJ.out.tab" - info: null - example: - - "Normal" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outFilterMultimapScoreRange" - description: "the score range below the maximum score for multimapping alignments" - info: null - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outFilterMultimapNmax" - description: "maximum number of loci the read is allowed to map to. Alignments\ - \ (all of them) will be output only if the read maps to no more loci than\ - \ this value.\n\nOtherwise no alignments will be output, and the read will\ - \ be counted as \"mapped to too many loci\" in the Log.final.out ." - info: null - example: - - 10 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outFilterMismatchNmax" - description: "alignment will be output only if it has no more mismatches than\ - \ this value." - info: null - example: - - 10 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--outFilterMismatchNoverLmax" - description: "alignment will be output only if its ratio of mismatches to *mapped*\ - \ length is less than or equal to this value." - info: null - example: - - 0.3 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--outFilterMismatchNoverReadLmax" - description: "alignment will be output only if its ratio of mismatches to *read*\ - \ length is less than or equal to this value." - info: null - example: - - 1.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outFilterScoreMin" - description: "alignment will be output only if its score is higher than or equal\ - \ to this value." - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--outFilterScoreMinOverLread" - description: "same as outFilterScoreMin, but normalized to read length (sum\ - \ of mates' lengths for paired-end reads)" - info: null - example: - - 0.66 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outFilterMatchNmin" - description: "alignment will be output only if the number of matched bases is\ - \ higher than or equal to this value." - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--outFilterMatchNminOverLread" - description: "sam as outFilterMatchNmin, but normalized to the read length (sum\ - \ of mates' lengths for paired-end reads)." - info: null - example: - - 0.66 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outFilterIntronMotifs" - description: "filter alignment using their motifs\n\n- None \ - \ ... no filtering\n- RemoveNoncanonical ... filter\ - \ out alignments that contain non-canonical junctions\n- RemoveNoncanonicalUnannotated\ - \ ... filter out alignments that contain non-canonical unannotated junctions\ - \ when using annotated splice junctions database. The annotated non-canonical\ - \ junctions will be kept." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outFilterIntronStrands" - description: "filter alignments\n\n- RemoveInconsistentStrands ... remove\ - \ alignments that have junctions with inconsistent strands\n- None \ - \ ... no filtering" - info: null - example: - - "RemoveInconsistentStrands" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Output splice junctions (SJ.out.tab)" - arguments: - - type: "string" - name: "--outSJtype" - description: "type of splice junction output\n\n- Standard ... standard SJ.out.tab\ - \ output\n- None ... no splice junction output" - info: null - example: - - "Standard" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Output Filtering: Splice Junctions" - arguments: - - type: "string" - name: "--outSJfilterReads" - description: "which reads to consider for collapsed splice junctions output\n\ - \n- All ... all reads, unique- and multi-mappers\n- Unique ... uniquely\ - \ mapping reads only" - info: null - example: - - "All" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outSJfilterOverhangMin" - description: "minimum overhang length for splice junctions on both sides for:\ - \ (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC\ - \ motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\n\ - does not apply to annotated junctions" - info: null - example: - - 30 - - 12 - - 12 - - 12 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--outSJfilterCountUniqueMin" - description: "minimum uniquely mapping read count per junction for: (1) non-canonical\ - \ motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC\ - \ and GT/AT motif. -1 means no output for that motif\n\nJunctions are output\ - \ if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin conditions\ - \ are satisfied\ndoes not apply to annotated junctions" - info: null - example: - - 3 - - 1 - - 1 - - 1 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--outSJfilterCountTotalMin" - description: "minimum total (multi-mapping+unique) read count per junction for:\ - \ (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC\ - \ motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\n\ - Junctions are output if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin\ - \ conditions are satisfied\ndoes not apply to annotated junctions" - info: null - example: - - 3 - - 1 - - 1 - - 1 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--outSJfilterDistToOtherSJmin" - description: "minimum allowed distance to other junctions' donor/acceptor\n\n\ - does not apply to annotated junctions" - info: null - example: - - 10 - - 0 - - 5 - - 10 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--outSJfilterIntronMaxVsReadN" - description: "maximum gap allowed for junctions supported by 1,2,3,,,N reads\n\ - \ni.e. by default junctions supported by 1 read can have gaps <=50000b, by\ - \ 2 reads: <=100000b, by 3 reads: <=200000. by >=4 reads any gap <=alignIntronMax\n\ - does not apply to annotated junctions" - info: null - example: - - 50000 - - 100000 - - 200000 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - name: "Scoring" - arguments: - - type: "integer" - name: "--scoreGap" - description: "splice junction penalty (independent on intron motif)" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--scoreGapNoncan" - description: "non-canonical junction penalty (in addition to scoreGap)" - info: null - example: - - -8 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--scoreGapGCAG" - description: "GC/AG and CT/GC junction penalty (in addition to scoreGap)" - info: null - example: - - -4 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--scoreGapATAC" - description: "AT/AC and GT/AT junction penalty (in addition to scoreGap)" - info: null - example: - - -8 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--scoreGenomicLengthLog2scale" - description: "extra score logarithmically scaled with genomic length of the\ - \ alignment: scoreGenomicLengthLog2scale*log2(genomicLength)" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--scoreDelOpen" - description: "deletion open penalty" - info: null - example: - - -2 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--scoreDelBase" - description: "deletion extension penalty per base (in addition to scoreDelOpen)" - info: null - example: - - -2 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--scoreInsOpen" - description: "insertion open penalty" - info: null - example: - - -2 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--scoreInsBase" - description: "insertion extension penalty per base (in addition to scoreInsOpen)" - info: null - example: - - -2 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--scoreStitchSJshift" - description: "maximum score reduction while searching for SJ boundaries in the\ - \ stitching step" - info: null - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Alignments and Seeding" - arguments: - - type: "integer" - name: "--seedSearchStartLmax" - description: "defines the search start point through the read - the read is\ - \ split into pieces no longer than this value" - info: null - example: - - 50 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--seedSearchStartLmaxOverLread" - description: "seedSearchStartLmax normalized to read length (sum of mates' lengths\ - \ for paired-end reads)" - info: null - example: - - 1.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--seedSearchLmax" - description: "defines the maximum length of the seeds, if =0 seed length is\ - \ not limited" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--seedMultimapNmax" - description: "only pieces that map fewer than this value are utilized in the\ - \ stitching procedure" - info: null - example: - - 10000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--seedPerReadNmax" - description: "max number of seeds per read" - info: null - example: - - 1000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--seedPerWindowNmax" - description: "max number of seeds per window" - info: null - example: - - 50 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--seedNoneLociPerWindow" - description: "max number of one seed loci per window" - info: null - example: - - 10 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--seedSplitMin" - description: "min length of the seed sequences split by Ns or mate gap" - info: null - example: - - 12 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--seedMapMin" - description: "min length of seeds to be mapped" - info: null - example: - - 5 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--alignIntronMin" - description: "minimum intron size, genomic gap is considered intron if its length>=alignIntronMin,\ - \ otherwise it is considered Deletion" - info: null - example: - - 21 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--alignIntronMax" - description: "maximum intron size, if 0, max intron size will be determined\ - \ by (2^winBinNbits)*winAnchorDistNbins" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--alignMatesGapMax" - description: "maximum gap between two mates, if 0, max intron gap will be determined\ - \ by (2^winBinNbits)*winAnchorDistNbins" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--alignSJoverhangMin" - description: "minimum overhang (i.e. block size) for spliced alignments" - info: null - example: - - 5 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--alignSJstitchMismatchNmax" - description: "maximum number of mismatches for stitching of the splice junctions\ - \ (-1: no limit).\n\n(1) non-canonical motifs, (2) GT/AG and CT/AC motif,\ - \ (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif." - info: null - example: - - 0 - - -1 - - 0 - - 0 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--alignSJDBoverhangMin" - description: "minimum overhang (i.e. block size) for annotated (sjdb) spliced\ - \ alignments" - info: null - example: - - 3 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--alignSplicedMateMapLmin" - description: "minimum mapped length for a read mate that is spliced" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--alignSplicedMateMapLminOverLmate" - description: "alignSplicedMateMapLmin normalized to mate length" - info: null - example: - - 0.66 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--alignWindowsPerReadNmax" - description: "max number of windows per read" - info: null - example: - - 10000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--alignTranscriptsPerWindowNmax" - description: "max number of transcripts per window" - info: null - example: - - 100 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--alignTranscriptsPerReadNmax" - description: "max number of different alignments per read to consider" - info: null - example: - - 10000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--alignEndsType" - description: "type of read ends alignment\n\n- Local ... standard\ - \ local alignment with soft-clipping allowed\n- EndToEnd ... force\ - \ end-to-end read alignment, do not soft-clip\n- Extend5pOfRead1 ... fully\ - \ extend only the 5p of the read1, all other ends: local alignment\n- Extend5pOfReads12\ - \ ... fully extend only the 5p of the both read1 and read2, all other ends:\ - \ local alignment" - info: null - example: - - "Local" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--alignEndsProtrude" - description: "allow protrusion of alignment ends, i.e. start (end) of the +strand\ - \ mate downstream of the start (end) of the -strand mate\n\n1st word: int:\ - \ maximum number of protrusion bases allowed\n2nd word: string:\n- \ - \ ConcordantPair ... report alignments with non-zero protrusion\ - \ as concordant pairs\n- DiscordantPair ... report alignments\ - \ with non-zero protrusion as discordant pairs" - info: null - example: - - "0 ConcordantPair" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--alignSoftClipAtReferenceEnds" - description: "allow the soft-clipping of the alignments past the end of the\ - \ chromosomes\n\n- Yes ... allow\n- No ... prohibit, useful for compatibility\ - \ with Cufflinks" - info: null - example: - - "Yes" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--alignInsertionFlush" - description: "how to flush ambiguous insertion positions\n\n- None ... insertions\ - \ are not flushed\n- Right ... insertions are flushed to the right" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Paired-End reads" - arguments: - - type: "integer" - name: "--peOverlapNbasesMin" - description: "minimum number of overlapping bases to trigger mates merging and\ - \ realignment. Specify >0 value to switch on the \"merginf of overlapping\ - \ mates\" algorithm." - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--peOverlapMMp" - description: "maximum proportion of mismatched bases in the overlap area" - info: null - example: - - 0.01 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Windows, Anchors, Binning" - arguments: - - type: "integer" - name: "--winAnchorMultimapNmax" - description: "max number of loci anchors are allowed to map to" - info: null - example: - - 50 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--winBinNbits" - description: "=log2(winBin), where winBin is the size of the bin for the windows/clustering,\ - \ each window will occupy an integer number of bins." - info: null - example: - - 16 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--winAnchorDistNbins" - description: "max number of bins between two anchors that allows aggregation\ - \ of anchors into one window" - info: null - example: - - 9 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--winFlankNbins" - description: "log2(winFlank), where win Flank is the size of the left and right\ - \ flanking regions for each window" - info: null - example: - - 4 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--winReadCoverageRelativeMin" - description: "minimum relative coverage of the read sequence by the seeds in\ - \ a window, for STARlong algorithm only." - info: null - example: - - 0.5 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--winReadCoverageBasesMin" - description: "minimum number of bases covered by the seeds in a window , for\ - \ STARlong algorithm only." - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Chimeric Alignments" - arguments: - - type: "string" - name: "--chimOutType" - description: "type of chimeric output\n\n- Junctions ... Chimeric.out.junction\n\ - - SeparateSAMold ... output old SAM into separate Chimeric.out.sam file\n\ - - WithinBAM ... output into main aligned BAM files (Aligned.*.bam)\n\ - - WithinBAM HardClip ... (default) hard-clipping in the CIGAR for supplemental\ - \ chimeric alignments (default if no 2nd word is present)\n- WithinBAM SoftClip\ - \ ... soft-clipping in the CIGAR for supplemental chimeric alignments" - info: null - example: - - "Junctions" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--chimSegmentMin" - description: "minimum length of chimeric segment length, if ==0, no chimeric\ - \ output" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimScoreMin" - description: "minimum total (summed) score of the chimeric segments" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimScoreDropMax" - description: "max drop (difference) of chimeric score (the sum of scores of\ - \ all chimeric segments) from the read length" - info: null - example: - - 20 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimScoreSeparation" - description: "minimum difference (separation) between the best chimeric score\ - \ and the next one" - info: null - example: - - 10 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimScoreJunctionNonGTAG" - description: "penalty for a non-GT/AG chimeric junction" - info: null - example: - - -1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimJunctionOverhangMin" - description: "minimum overhang for a chimeric junction" - info: null - example: - - 20 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimSegmentReadGapMax" - description: "maximum gap in the read sequence between chimeric segments" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--chimFilter" - description: "different filters for chimeric alignments\n\n- None ... no filtering\n\ - - banGenomicN ... Ns are not allowed in the genome sequence around the chimeric\ - \ junction" - info: null - example: - - "banGenomicN" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--chimMainSegmentMultNmax" - description: "maximum number of multi-alignments for the main chimeric segment.\ - \ =1 will prohibit multimapping main segments." - info: null - example: - - 10 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimMultimapNmax" - description: "maximum number of chimeric multi-alignments\n\n- 0 ... use the\ - \ old scheme for chimeric detection which only considered unique alignments" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimMultimapScoreRange" - description: "the score range for multi-mapping chimeras below the best chimeric\ - \ score. Only works with --chimMultimapNmax > 1" - info: null - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimNonchimScoreDropMin" - description: "to trigger chimeric detection, the drop in the best non-chimeric\ - \ alignment score with respect to the read length has to be greater than this\ - \ value" - info: null - example: - - 20 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimOutJunctionFormat" - description: "formatting type for the Chimeric.out.junction file\n\n- 0 ...\ - \ no comment lines/headers\n- 1 ... comment lines at the end of the file:\ - \ command line and Nreads: total, unique/multi-mapping" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Quantification of Annotations" - arguments: - - type: "string" - name: "--quantMode" - description: "types of quantification requested\n\n- - ... none\n\ - - TranscriptomeSAM ... output SAM/BAM alignments to transcriptome into a separate\ - \ file\n- GeneCounts ... count reads per gene" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--quantTranscriptomeBAMcompression" - description: "-2 to 10 transcriptome BAM compression level\n\n- -2 ... no\ - \ BAM output\n- -1 ... default compression (6?)\n- 0 ... no compression\n\ - - 10 ... maximum compression" - info: null - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--quantTranscriptomeBan" - description: "prohibit various alignment type\n\n- IndelSoftclipSingleend ...\ - \ prohibit indels, soft clipping and single-end alignments - compatible with\ - \ RSEM\n- Singleend ... prohibit single-end alignments" - info: null - example: - - "IndelSoftclipSingleend" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "2-pass Mapping" - arguments: - - type: "string" - name: "--twopassMode" - description: "2-pass mapping mode.\n\n- None ... 1-pass mapping\n- Basic\ - \ ... basic 2-pass mapping, with all 1st pass junctions inserted into\ - \ the genome indices on the fly" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--twopass1readsN" - description: "number of reads to process for the 1st step. Use very large number\ - \ (or default -1) to map all reads in the first step." - info: null - example: - - -1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "WASP parameters" - arguments: - - type: "string" - name: "--waspOutputMode" - description: "WASP allele-specific output type. This is re-implementation of\ - \ the original WASP mappability filtering by Bryce van de Geijn, Graham McVicker,\ - \ Yoav Gilad & Jonathan K Pritchard. Please cite the original WASP paper:\ - \ Nature Methods 12, 1061-1063 (2015), https://www.nature.com/articles/nmeth.3582\ - \ .\n\n- SAMtag ... add WASP tags to the alignments that pass WASP filtering" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "STARsolo (single cell RNA-seq) parameters" - arguments: - - type: "string" - name: "--soloType" - description: "type of single-cell RNA-seq\n\n- CB_UMI_Simple ... (a.k.a. Droplet)\ - \ one UMI and one Cell Barcode of fixed length in read2, e.g. Drop-seq and\ - \ 10X Chromium.\n- CB_UMI_Complex ... multiple Cell Barcodes of varying length,\ - \ one UMI of fixed length and one adapter sequence of fixed length are allowed\ - \ in read2 only (e.g. inDrop, ddSeq).\n- CB_samTagOut ... output Cell Barcode\ - \ as CR and/or CB SAm tag. No UMI counting. --readFilesIn cDNA_read1 [cDNA_read2\ - \ if paired-end] CellBarcode_read . Requires --outSAMtype BAM Unsorted [and/or\ - \ SortedByCoordinate]\n- SmartSeq ... Smart-seq: each cell in a separate\ - \ FASTQ (paired- or single-end), barcodes are corresponding read-groups, no\ - \ UMI sequences, alignments deduplicated according to alignment start and\ - \ end (after extending soft-clipped bases)" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloCBwhitelist" - description: "file(s) with whitelist(s) of cell barcodes. Only --soloType CB_UMI_Complex\ - \ allows more than one whitelist file.\n\n- None ... no whitelist:\ - \ all cell barcodes are allowed" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--soloCBstart" - description: "cell barcode start base" - info: null - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--soloCBlen" - description: "cell barcode length" - info: null - example: - - 16 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--soloUMIstart" - description: "UMI start base" - info: null - example: - - 17 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--soloUMIlen" - description: "UMI length" - info: null - example: - - 10 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--soloBarcodeReadLength" - description: "length of the barcode read\n\n- 1 ... equal to sum of soloCBlen+soloUMIlen\n\ - - 0 ... not defined, do not check" - info: null - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--soloBarcodeMate" - description: "identifies which read mate contains the barcode (CB+UMI) sequence\n\ - \n- 0 ... barcode sequence is on separate read, which should always be the\ - \ last file in the --readFilesIn listed\n- 1 ... barcode sequence is a part\ - \ of mate 1\n- 2 ... barcode sequence is a part of mate 2" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--soloCBposition" - description: "position of Cell Barcode(s) on the barcode read.\n\nPresently\ - \ only works with --soloType CB_UMI_Complex, and barcodes are assumed to be\ - \ on Read2.\nFormat for each barcode: startAnchor_startPosition_endAnchor_endPosition\n\ - start(end)Anchor defines the Anchor Base for the CB: 0: read start; 1: read\ - \ end; 2: adapter start; 3: adapter end\nstart(end)Position is the 0-based\ - \ position with of the CB start(end) with respect to the Anchor Base\nString\ - \ for different barcodes are separated by space.\nExample: inDrop (Zilionis\ - \ et al, Nat. Protocols, 2017):\n--soloCBposition 0_0_2_-1 3_1_3_8" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloUMIposition" - description: "position of the UMI on the barcode read, same as soloCBposition\n\ - \nExample: inDrop (Zilionis et al, Nat. Protocols, 2017):\n--soloCBposition\ - \ 3_9_3_14" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--soloAdapterSequence" - description: "adapter sequence to anchor barcodes. Only one adapter sequence\ - \ is allowed." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--soloAdapterMismatchesNmax" - description: "maximum number of mismatches allowed in adapter sequence." - info: null - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--soloCBmatchWLtype" - description: "matching the Cell Barcodes to the WhiteList\n\n- Exact \ - \ ... only exact matches allowed\n- 1MM \ - \ ... only one match in whitelist with 1 mismatched base allowed.\ - \ Allowed CBs have to have at least one read with exact match.\n- 1MM_multi\ - \ ... multiple matches in whitelist with 1 mismatched\ - \ base allowed, posterior probability calculation is used choose one of the\ - \ matches.\nAllowed CBs have to have at least one read with exact match. This\ - \ option matches best with CellRanger 2.2.0\n- 1MM_multi_pseudocounts \ - \ ... same as 1MM_Multi, but pseudocounts of 1 are added to all whitelist\ - \ barcodes.\n- 1MM_multi_Nbase_pseudocounts ... same as 1MM_multi_pseudocounts,\ - \ multimatching to WL is allowed for CBs with N-bases. This option matches\ - \ best with CellRanger >= 3.0.0\n- EditDist_2 ... allow\ - \ up to edit distance of 3 fpr each of the barcodes. May include one deletion\ - \ + one insertion. Only works with --soloType CB_UMI_Complex. Matches to multiple\ - \ passlist barcdoes are not allowed. Similar to ParseBio Split-seq pipeline." - info: null - example: - - "1MM_multi" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--soloInputSAMattrBarcodeSeq" - description: "when inputting reads from a SAM file (--readsFileType SAM SE/PE),\ - \ these SAM attributes mark the barcode sequence (in proper order).\n\nFor\ - \ instance, for 10X CellRanger or STARsolo BAMs, use --soloInputSAMattrBarcodeSeq\ - \ CR UR .\nThis parameter is required when running STARsolo with input from\ - \ SAM." - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloInputSAMattrBarcodeQual" - description: "when inputting reads from a SAM file (--readsFileType SAM SE/PE),\ - \ these SAM attributes mark the barcode qualities (in proper order).\n\nFor\ - \ instance, for 10X CellRanger or STARsolo BAMs, use --soloInputSAMattrBarcodeQual\ - \ CY UY .\nIf this parameter is '-' (default), the quality 'H' will be assigned\ - \ to all bases." - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloStrand" - description: "strandedness of the solo libraries:\n\n- Unstranded ... no strand\ - \ information\n- Forward ... read strand same as the original RNA molecule\n\ - - Reverse ... read strand opposite to the original RNA molecule" - info: null - example: - - "Forward" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--soloFeatures" - description: "genomic features for which the UMI counts per Cell Barcode are\ - \ collected\n\n- Gene ... genes: reads match the gene transcript\n\ - - SJ ... splice junctions: reported in SJ.out.tab\n- GeneFull\ - \ ... full gene (pre-mRNA): count all reads overlapping genes' exons\ - \ and introns\n- GeneFull_ExonOverIntron ... full gene (pre-mRNA): count all\ - \ reads overlapping genes' exons and introns: prioritize 100% overlap with\ - \ exons\n- GeneFull_Ex50pAS ... full gene (pre-RNA): count all reads\ - \ overlapping genes' exons and introns: prioritize >50% overlap with exons.\ - \ Do not count reads with 100% exonic overlap in the antisense direction." - info: null - example: - - "Gene" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloMultiMappers" - description: "counting method for reads mapping to multiple genes\n\n- Unique\ - \ ... count only reads that map to unique genes\n- Uniform ... uniformly\ - \ distribute multi-genic UMIs to all genes\n- Rescue ... distribute UMIs\ - \ proportionally to unique+uniform counts (~ first iteration of EM)\n- PropUnique\ - \ ... distribute UMIs proportionally to unique mappers, if present, and uniformly\ - \ if not.\n- EM ... multi-gene UMIs are distributed using Expectation\ - \ Maximization algorithm" - info: null - example: - - "Unique" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloUMIdedup" - description: "type of UMI deduplication (collapsing) algorithm\n\n- 1MM_All\ - \ ... all UMIs with 1 mismatch distance to each other\ - \ are collapsed (i.e. counted once).\n- 1MM_Directional_UMItools ... follows\ - \ the \"directional\" method from the UMI-tools by Smith, Heger and Sudbery\ - \ (Genome Research 2017).\n- 1MM_Directional ... same as 1MM_Directional_UMItools,\ - \ but with more stringent criteria for duplicate UMIs\n- Exact \ - \ ... only exactly matching UMIs are collapsed.\n- NoDedup \ - \ ... no deduplication of UMIs, count all reads.\n- 1MM_CR\ - \ ... CellRanger2-4 algorithm for 1MM UMI collapsing." - info: null - example: - - "1MM_All" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloUMIfiltering" - description: "type of UMI filtering (for reads uniquely mapping to genes)\n\n\ - - - ... basic filtering: remove UMIs with N and homopolymers\ - \ (similar to CellRanger 2.2.0).\n- MultiGeneUMI ... basic + remove\ - \ lower-count UMIs that map to more than one gene.\n- MultiGeneUMI_All ...\ - \ basic + remove all UMIs that map to more than one gene.\n- MultiGeneUMI_CR\ - \ ... basic + remove lower-count UMIs that map to more than one gene, matching\ - \ CellRanger > 3.0.0 .\nOnly works with --soloUMIdedup 1MM_CR" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloOutFileNames" - description: "file names for STARsolo output:\n\nfile_name_prefix gene_names\ - \ barcode_sequences cell_feature_count_matrix" - info: null - example: - - "Solo.out/" - - "features.tsv" - - "barcodes.tsv" - - "matrix.mtx" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloCellFilter" - description: "cell filtering type and parameters\n\n- None ... do\ - \ not output filtered cells\n- TopCells ... only report top cells by\ - \ UMI count, followed by the exact number of cells\n- CellRanger2.2 ...\ - \ simple filtering of CellRanger 2.2.\nCan be followed by numbers: number\ - \ of expected cells, robust maximum percentile for UMI count, maximum to minimum\ - \ ratio for UMI count\nThe harcoded values are from CellRanger: nExpectedCells=3000;\ - \ maxPercentile=0.99; maxMinRatio=10\n- EmptyDrops_CR ... EmptyDrops filtering\ - \ in CellRanger flavor. Please cite the original EmptyDrops paper: A.T.L Lun\ - \ et al, Genome Biology, 20, 63 (2019): https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1662-y\n\ - Can be followed by 10 numeric parameters: nExpectedCells maxPercentile\ - \ maxMinRatio indMin indMax umiMin umiMinFracMedian candMaxN \ - \ FDR simN\nThe harcoded values are from CellRanger: 3000 \ - \ 0.99 10 45000 90000 500 0.01\ - \ 20000 0.01 10000" - info: null - example: - - "CellRanger2.2" - - "3000" - - "0.99" - - "10" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloOutFormatFeaturesGeneField3" - description: "field 3 in the Gene features.tsv file. If \"-\", then no 3rd field\ - \ is output." - info: null - example: - - "Gene Expression" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloCellReadStats" - description: "Output reads statistics for each CB\n\n- Standard ... standard\ - \ output" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "../star_align/script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Align fastq files using STAR." - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/cellranger_tiny_fastq" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "docker" - env: - - "STAR_VERSION 2.7.3a" - - "PACKAGES gcc g++ make wget zlib1g-dev unzip" - - type: "docker" - run: - - "apt-get update && \\\n apt-get install -y --no-install-recommends ${PACKAGES}\ - \ && \\\n cd /tmp && \\\n wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip\ - \ && \\\n unzip ${STAR_VERSION}.zip && \\\n cd STAR-${STAR_VERSION}/source\ - \ && \\\n make STARstatic CXXFLAGS_SIMD=-std=c++11 && \\\n cp STAR /usr/local/bin\ - \ && \\\n cd / && \\\n rm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip\ - \ && \\\n apt-get --purge autoremove -y ${PACKAGES} && \\\n apt-get clean\n" - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highmem" - - "highcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/mapping/star_align_v273a/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/mapping/star_align_v273a" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/mapping/star_align_v273a/star_align_v273a" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/mapping/star_align_v273a/setup_logger.py b/target/docker/mapping/star_align_v273a/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/docker/mapping/star_align_v273a/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/docker/mapping/star_align_v273a/star_align_v273a b/target/docker/mapping/star_align_v273a/star_align_v273a deleted file mode 100755 index 3e3d0f0f339..00000000000 --- a/target/docker/mapping/star_align_v273a/star_align_v273a +++ /dev/null @@ -1,5713 +0,0 @@ -#!/usr/bin/env bash - -# star_align_v273a 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Angela Oliveira Pisco (author) -# * Robrecht Cannoodt (author, maintainer) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="star_align_v273a" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "star_align_v273a 0.12.3" - echo "" - echo "Align fastq files using STAR." - echo "" - echo "Input/Output:" - echo " --readFilesIn, --input" - echo " type: file, required parameter, multiple values allowed, file must exist" - echo " example:" - echo "mysample_S1_L001_R1_001.fastq.gz;mysample_S1_L001_R2_001.fastq.gz" - echo " The FASTQ files to be analyzed. Corresponds to the --readFilesIn in the" - echo " STAR command." - echo "" - echo " --genomeDir, --reference" - echo " type: file, required parameter, file must exist" - echo " example: /path/to/reference" - echo " Path to the reference built by star_build_reference. Corresponds to the" - echo " --genomeDir in the STAR command." - echo "" - echo " --outFileNamePrefix, --output" - echo " type: file, required parameter, output, file must exist" - echo " example: /path/to/foo" - echo " Path to output directory. Corresponds to the --outFileNamePrefix in the" - echo " STAR command." - echo "" - echo "Run Parameters:" - echo " --runRNGseed" - echo " type: integer" - echo " example: 777" - echo " random number generator seed." - echo "" - echo "Genome Parameters:" - echo " --genomeLoad" - echo " type: string" - echo " example: NoSharedMemory" - echo " mode of shared memory usage for the genome files. Only used with" - echo " --runMode alignReads." - echo " - LoadAndKeep ... load genome into shared and keep it in memory" - echo " after run" - echo " - LoadAndRemove ... load genome into shared but remove it after run" - echo " - LoadAndExit ... load genome into shared memory and exit, keeping" - echo " the genome in memory for future runs" - echo " - Remove ... do not map anything, just remove loaded genome" - echo " from memory" - echo " - NoSharedMemory ... do not use shared memory, each job will have its" - echo " own private copy of the genome" - echo "" - echo " --genomeFastaFiles" - echo " type: file, multiple values allowed, file must exist" - echo " path(s) to the fasta files with the genome sequences, separated by" - echo " spaces. These files should be plain text FASTA files, they *cannot* be" - echo " zipped." - echo " Required for the genome generation (--runMode genomeGenerate). Can also" - echo " be used in the mapping (--runMode alignReads) to add extra (new)" - echo " sequences to the genome (e.g. spike-ins)." - echo "" - echo " --genomeFileSizes" - echo " type: integer, multiple values allowed" - echo " example: 0" - echo " genome files exact sizes in bytes. Typically, this should not be defined" - echo " by the user." - echo "" - echo " --genomeTransformOutput" - echo " type: string, multiple values allowed" - echo " which output to transform back to original genome" - echo " - SAM ... SAM/BAM alignments" - echo " - SJ ... splice junctions (SJ.out.tab)" - echo " - None ... no transformation of the output" - echo "" - echo " --genomeChrSetMitochondrial" - echo " type: string, multiple values allowed" - echo " example: chrM;M;MT" - echo " names of the mitochondrial chromosomes. Presently only used for STARsolo" - echo " statistics output/" - echo "" - echo "Splice Junctions Database:" - echo " --sjdbFileChrStartEnd" - echo " type: string, multiple values allowed" - echo " path to the files with genomic coordinates (chr start end" - echo " strand) for the splice junction introns. Multiple files can be" - echo " supplied and will be concatenated." - echo "" - echo " --sjdbGTFfile" - echo " type: file, file must exist" - echo " path to the GTF file with annotations" - echo "" - echo " --sjdbGTFchrPrefix" - echo " type: string" - echo " prefix for chromosome names in a GTF file (e.g. 'chr' for using ENSMEBL" - echo " annotations with UCSC genomes)" - echo "" - echo " --sjdbGTFfeatureExon" - echo " type: string" - echo " example: exon" - echo " feature type in GTF file to be used as exons for building transcripts" - echo "" - echo " --sjdbGTFtagExonParentTranscript" - echo " type: string" - echo " example: transcript_id" - echo " GTF attribute name for parent transcript ID (default \"transcript_id\"" - echo " works for GTF files)" - echo "" - echo " --sjdbGTFtagExonParentGene" - echo " type: string" - echo " example: gene_id" - echo " GTF attribute name for parent gene ID (default \"gene_id\" works for GTF" - echo " files)" - echo "" - echo " --sjdbGTFtagExonParentGeneName" - echo " type: string, multiple values allowed" - echo " example: gene_name" - echo " GTF attribute name for parent gene name" - echo "" - echo " --sjdbGTFtagExonParentGeneType" - echo " type: string, multiple values allowed" - echo " example: gene_type;gene_biotype" - echo " GTF attribute name for parent gene type" - echo "" - echo " --sjdbOverhang" - echo " type: integer" - echo " example: 100" - echo " length of the donor/acceptor sequence on each side of the junctions," - echo " ideally = (mate_length - 1)" - echo "" - echo " --sjdbScore" - echo " type: integer" - echo " example: 2" - echo " extra alignment score for alignments that cross database junctions" - echo "" - echo " --sjdbInsertSave" - echo " type: string" - echo " example: Basic" - echo " which files to save when sjdb junctions are inserted on the fly at the" - echo " mapping step" - echo " - Basic ... only small junction / transcript files" - echo " - All ... all files including big Genome, SA and SAindex - this will" - echo " create a complete genome directory" - echo "" - echo "Variation parameters:" - echo " --varVCFfile" - echo " type: string" - echo " path to the VCF file that contains variation data. The 10th column" - echo " should contain the genotype information, e.g. 0/1" - echo "" - echo "Read Parameters:" - echo " --readFilesType" - echo " type: string" - echo " example: Fastx" - echo " format of input read files" - echo " - Fastx ... FASTA or FASTQ" - echo " - SAM SE ... SAM or BAM single-end reads; for BAM use" - echo " --readFilesCommand samtools view" - echo " - SAM PE ... SAM or BAM paired-end reads; for BAM use" - echo " --readFilesCommand samtools view" - echo "" - echo " --readFilesSAMattrKeep" - echo " type: string, multiple values allowed" - echo " example: All" - echo " for --readFilesType SAM SE/PE, which SAM tags to keep in the output BAM," - echo " e.g.: --readFilesSAMtagsKeep RG PL" - echo " - All ... keep all tags" - echo " - None ... do not keep any tags" - echo "" - echo " --readFilesManifest" - echo " type: file, file must exist" - echo " path to the \"manifest\" file with the names of read files. The manifest" - echo " file should contain 3 tab-separated columns:" - echo " paired-end reads: read1_file_name \$tab\$ read2_file_name \$tab\$" - echo " read_group_line." - echo " single-end reads: read1_file_name \$tab\$ - \$tab\$" - echo " read_group_line." - echo " Spaces, but not tabs are allowed in file names." - echo " If read_group_line does not start with ID:, it can only contain one ID" - echo " field, and ID: will be added to it." - echo " If read_group_line starts with ID:, it can contain several fields" - echo " separated by \$tab\$, and all fields will be be copied verbatim into SAM" - echo " @RG header line." - echo "" - echo " --readFilesPrefix" - echo " type: string" - echo " prefix for the read files names, i.e. it will be added in front of the" - echo " strings in --readFilesIn" - echo "" - echo " --readFilesCommand" - echo " type: string, multiple values allowed" - echo " command line to execute for each of the input file. This command should" - echo " generate FASTA or FASTQ text and send it to stdout" - echo " For example: zcat - to uncompress .gz files, bzcat - to uncompress .bz2" - echo " files, etc." - echo "" - echo " --readMapNumber" - echo " type: integer" - echo " example: -1" - echo " number of reads to map from the beginning of the file" - echo " -1: map all reads" - echo "" - echo " --readMatesLengthsIn" - echo " type: string" - echo " example: NotEqual" - echo " Equal/NotEqual - lengths of names,sequences,qualities for both mates are" - echo " the same / not the same. NotEqual is safe in all situations." - echo "" - echo " --readNameSeparator" - echo " type: string, multiple values allowed" - echo " example: /" - echo " character(s) separating the part of the read names that will be trimmed" - echo " in output (read name after space is always trimmed)" - echo "" - echo " --readQualityScoreBase" - echo " type: integer" - echo " example: 33" - echo " number to be subtracted from the ASCII code to get Phred quality score" - echo "" - echo "Read Clipping:" - echo " --clipAdapterType" - echo " type: string" - echo " example: Hamming" - echo " adapter clipping type" - echo " - Hamming ... adapter clipping based on Hamming distance, with the" - echo " number of mismatches controlled by --clip5pAdapterMMp" - echo " - CellRanger4 ... 5p and 3p adapter clipping similar to CellRanger4." - echo " Utilizes Opal package by Martin Sosic: https://github.com/Martinsos/opal" - echo " - None ... no adapter clipping, all other clip* parameters are" - echo " disregarded" - echo "" - echo " --clip3pNbases" - echo " type: integer, multiple values allowed" - echo " example: 0" - echo " number(s) of bases to clip from 3p of each mate. If one value is given," - echo " it will be assumed the same for both mates." - echo "" - echo " --clip3pAdapterSeq" - echo " type: string, multiple values allowed" - echo " adapter sequences to clip from 3p of each mate. If one value is given," - echo " it will be assumed the same for both mates." - echo " - polyA ... polyA sequence with the length equal to read length" - echo "" - echo " --clip3pAdapterMMp" - echo " type: double, multiple values allowed" - echo " example: 0.1" - echo " max proportion of mismatches for 3p adapter clipping for each mate. If" - echo " one value is given, it will be assumed the same for both mates." - echo "" - echo " --clip3pAfterAdapterNbases" - echo " type: integer, multiple values allowed" - echo " example: 0" - echo " number of bases to clip from 3p of each mate after the adapter clipping." - echo " If one value is given, it will be assumed the same for both mates." - echo "" - echo " --clip5pNbases" - echo " type: integer, multiple values allowed" - echo " example: 0" - echo " number(s) of bases to clip from 5p of each mate. If one value is given," - echo " it will be assumed the same for both mates." - echo "" - echo "Limits:" - echo " --limitGenomeGenerateRAM" - echo " type: long" - echo " example: 31000000000" - echo " maximum available RAM (bytes) for genome generation" - echo "" - echo " --limitIObufferSize" - echo " type: long, multiple values allowed" - echo " example: 30000000;50000000" - echo " max available buffers size (bytes) for input/output, per thread" - echo "" - echo " --limitOutSAMoneReadBytes" - echo " type: long" - echo " example: 100000" - echo " max size of the SAM record (bytes) for one read. Recommended value:" - echo " >(2*(LengthMate1+LengthMate2+100)*outFilterMultimapNmax" - echo "" - echo " --limitOutSJoneRead" - echo " type: integer" - echo " example: 1000" - echo " max number of junctions for one read (including all multi-mappers)" - echo "" - echo " --limitOutSJcollapsed" - echo " type: integer" - echo " example: 1000000" - echo " max number of collapsed junctions" - echo "" - echo " --limitBAMsortRAM" - echo " type: long" - echo " example: 0" - echo " maximum available RAM (bytes) for sorting BAM. If =0, it will be set to" - echo " the genome index size. 0 value can only be used with --genomeLoad" - echo " NoSharedMemory option." - echo "" - echo " --limitSjdbInsertNsj" - echo " type: integer" - echo " example: 1000000" - echo " maximum number of junctions to be inserted to the genome on the fly at" - echo " the mapping stage, including those from annotations and those detected" - echo " in the 1st step of the 2-pass run" - echo "" - echo " --limitNreadsSoft" - echo " type: integer" - echo " example: -1" - echo " soft limit on the number of reads" - echo "" - echo "Output: general:" - echo " --outTmpKeep" - echo " type: string" - echo " whether to keep the temporary files after STAR runs is finished" - echo " - None ... remove all temporary files" - echo " - All ... keep all files" - echo "" - echo " --outStd" - echo " type: string" - echo " example: Log" - echo " which output will be directed to stdout (standard out)" - echo " - Log ... log messages" - echo " - SAM ... alignments in SAM format (which normally" - echo " are output to Aligned.out.sam file), normal standard output will go into" - echo " Log.std.out" - echo " - BAM_Unsorted ... alignments in BAM format, unsorted." - echo " Requires --outSAMtype BAM Unsorted" - echo " - BAM_SortedByCoordinate ... alignments in BAM format, sorted by" - echo " coordinate. Requires --outSAMtype BAM SortedByCoordinate" - echo " - BAM_Quant ... alignments to transcriptome in BAM format," - echo " unsorted. Requires --quantMode TranscriptomeSAM" - echo "" - echo " --outReadsUnmapped" - echo " type: string" - echo " output of unmapped and partially mapped (i.e. mapped only one mate of a" - echo " paired end read) reads in separate file(s)." - echo " - None ... no output" - echo " - Fastx ... output in separate fasta/fastq files, Unmapped.out.mate1/2" - echo "" - echo " --outQSconversionAdd" - echo " type: integer" - echo " example: 0" - echo " add this number to the quality score (e.g. to convert from Illumina to" - echo " Sanger, use -31)" - echo "" - echo " --outMultimapperOrder" - echo " type: string" - echo " example: Old_2.4" - echo " order of multimapping alignments in the output files" - echo " - Old_2.4 ... quasi-random order used before 2.5.0" - echo " - Random ... random order of alignments for each" - echo " multi-mapper. Read mates (pairs) are always adjacent, all alignment for" - echo " each read stay together. This option will become default in the future" - echo " releases." - echo "" - echo "Output: SAM and BAM:" - echo " --outSAMtype" - echo " type: string, multiple values allowed" - echo " example: SAM" - echo " type of SAM/BAM output" - echo " 1st word:" - echo " - BAM ... output BAM without sorting" - echo " - SAM ... output SAM without sorting" - echo " - None ... no SAM/BAM output" - echo " 2nd, 3rd:" - echo " - Unsorted ... standard unsorted" - echo " - SortedByCoordinate ... sorted by coordinate. This option will allocate" - echo " extra memory for sorting which can be specified by --limitBAMsortRAM." - echo "" - echo " --outSAMmode" - echo " type: string" - echo " example: Full" - echo " mode of SAM output" - echo " - None ... no SAM output" - echo " - Full ... full SAM output" - echo " - NoQS ... full SAM but without quality scores" - echo "" - echo " --outSAMstrandField" - echo " type: string" - echo " Cufflinks-like strand field flag" - echo " - None ... not used" - echo " - intronMotif ... strand derived from the intron motif. This option" - echo " changes the output alignments: reads with inconsistent and/or" - echo " non-canonical introns are filtered out." - echo "" - echo " --outSAMattributes" - echo " type: string, multiple values allowed" - echo " example: Standard" - echo " a string of desired SAM attributes, in the order desired for the output" - echo " SAM. Tags can be listed in any combination/order." - echo " ***Presets:" - echo " - None ... no attributes" - echo " - Standard ... NH HI AS nM" - echo " - All ... NH HI AS nM NM MD jM jI MC ch" - echo " ***Alignment:" - echo " - NH ... number of loci the reads maps to: =1 for unique" - echo " mappers, >1 for multimappers. Standard SAM tag." - echo " - HI ... multiple alignment index, starts with" - echo " --outSAMattrIHstart (=1 by default). Standard SAM tag." - echo " - AS ... local alignment score, +1/-1 for matches/mismateches," - echo " score* penalties for indels and gaps. For PE reads, total score for two" - echo " mates. Stadnard SAM tag." - echo " - nM ... number of mismatches. For PE reads, sum over two" - echo " mates." - echo " - NM ... edit distance to the reference (number of mismatched +" - echo " inserted + deleted bases) for each mate. Standard SAM tag." - echo " - MD ... string encoding mismatched and deleted reference bases" - echo " (see standard SAM specifications). Standard SAM tag." - echo " - jM ... intron motifs for all junctions (i.e. N in CIGAR): 0:" - echo " non-canonical; 1: GT/AG, 2: CT/AC, 3: GC/AG, 4: CT/GC, 5: AT/AC, 6:" - echo " GT/AT. If splice junctions database is used, and a junction is" - echo " annotated, 20 is added to its motif value." - echo " - jI ... start and end of introns for all junctions (1-based)." - echo " - XS ... alignment strand according to --outSAMstrandField." - echo " - MC ... mate's CIGAR string. Standard SAM tag." - echo " - ch ... marks all segment of all chimeric alingments for" - echo " --chimOutType WithinBAM output." - echo " - cN ... number of bases clipped from the read ends: 5' and 3'" - echo " ***Variation:" - echo " - vA ... variant allele" - echo " - vG ... genomic coordinate of the variant overlapped by the" - echo " read." - echo " - vW ... 1 - alignment passes WASP filtering; 2,3,4,5,6,7 -" - echo " alignment does not pass WASP filtering. Requires --waspOutputMode" - echo " SAMtag." - echo " ***STARsolo:" - echo " - CR CY UR UY ... sequences and quality scores of cell barcodes and UMIs" - echo " for the solo* demultiplexing." - echo " - GX GN ... gene ID and gene name for unique-gene reads." - echo " - gx gn ... gene IDs and gene names for unique- and multi-gene" - echo " reads." - echo " - CB UB ... error-corrected cell barcodes and UMIs for solo*" - echo " demultiplexing. Requires --outSAMtype BAM SortedByCoordinate." - echo " - sM ... assessment of CB and UMI." - echo " - sS ... sequence of the entire barcode (CB,UMI,adapter)." - echo " - sQ ... quality of the entire barcode." - echo " ***Unsupported/undocumented:" - echo " - ha ... haplotype (1/2) when mapping to the diploid genome." - echo " Requires genome generated with --genomeTransformType Diploid ." - echo " - rB ... alignment block read/genomic coordinates." - echo " - vR ... read coordinate of the variant." - echo "" - echo " --outSAMattrIHstart" - echo " type: integer" - echo " example: 1" - echo " start value for the IH attribute. 0 may be required by some downstream" - echo " software, such as Cufflinks or StringTie." - echo "" - echo " --outSAMunmapped" - echo " type: string, multiple values allowed" - echo " output of unmapped reads in the SAM format" - echo " 1st word:" - echo " - None ... no output" - echo " - Within ... output unmapped reads within the main SAM file (i.e." - echo " Aligned.out.sam)" - echo " 2nd word:" - echo " - KeepPairs ... record unmapped mate for each alignment, and, in case of" - echo " unsorted output, keep it adjacent to its mapped mate. Only affects" - echo " multi-mapping reads." - echo "" - echo " --outSAMorder" - echo " type: string" - echo " example: Paired" - echo " type of sorting for the SAM output" - echo " Paired: one mate after the other for all paired alignments" - echo " PairedKeepInputOrder: one mate after the other for all paired" - echo " alignments, the order is kept the same as in the input FASTQ files" - echo "" - echo " --outSAMprimaryFlag" - echo " type: string" - echo " example: OneBestScore" - echo " which alignments are considered primary - all others will be marked with" - echo " 0x100 bit in the FLAG" - echo " - OneBestScore ... only one alignment with the best score is primary" - echo " - AllBestScore ... all alignments with the best score are primary" - echo "" - echo " --outSAMreadID" - echo " type: string" - echo " example: Standard" - echo " read ID record type" - echo " - Standard ... first word (until space) from the FASTx read ID line," - echo " removing /1,/2 from the end" - echo " - Number ... read number (index) in the FASTx file" - echo "" - echo " --outSAMmapqUnique" - echo " type: integer" - echo " example: 255" - echo " 0 to 255: the MAPQ value for unique mappers" - echo "" - echo " --outSAMflagOR" - echo " type: integer" - echo " example: 0" - echo " 0 to 65535: sam FLAG will be bitwise OR'd with this value, i.e." - echo " FLAG=FLAG | outSAMflagOR. This is applied after all flags have been set" - echo " by STAR, and after outSAMflagAND. Can be used to set specific bits that" - echo " are not set otherwise." - echo "" - echo " --outSAMflagAND" - echo " type: integer" - echo " example: 65535" - echo " 0 to 65535: sam FLAG will be bitwise AND'd with this value, i.e." - echo " FLAG=FLAG & outSAMflagOR. This is applied after all flags have been set" - echo " by STAR, but before outSAMflagOR. Can be used to unset specific bits" - echo " that are not set otherwise." - echo "" - echo " --outSAMattrRGline" - echo " type: string, multiple values allowed" - echo " SAM/BAM read group line. The first word contains the read group" - echo " identifier and must start with \"ID:\", e.g. --outSAMattrRGline ID:xxx" - echo " CN:yy \"DS:z z z\"." - echo " xxx will be added as RG tag to each output alignment. Any spaces in the" - echo " tag values have to be double quoted." - echo " Comma separated RG lines correspons to different (comma separated) input" - echo " files in --readFilesIn. Commas have to be surrounded by spaces, e.g." - echo " --outSAMattrRGline ID:xxx , ID:zzz \"DS:z z\" , ID:yyy DS:yyyy" - echo "" - echo " --outSAMheaderHD" - echo " type: string, multiple values allowed" - echo " @HD (header) line of the SAM header" - echo "" - echo " --outSAMheaderPG" - echo " type: string, multiple values allowed" - echo " extra @PG (software) line of the SAM header (in addition to STAR)" - echo "" - echo " --outSAMheaderCommentFile" - echo " type: string" - echo " path to the file with @CO (comment) lines of the SAM header" - echo "" - echo " --outSAMfilter" - echo " type: string, multiple values allowed" - echo " filter the output into main SAM/BAM files" - echo " - KeepOnlyAddedReferences ... only keep the reads for which all" - echo " alignments are to the extra reference sequences added with" - echo " --genomeFastaFiles at the mapping stage." - echo " - KeepAllAddedReferences ... keep all alignments to the extra reference" - echo " sequences added with --genomeFastaFiles at the mapping stage." - echo "" - echo " --outSAMmultNmax" - echo " type: integer" - echo " example: -1" - echo " max number of multiple alignments for a read that will be output to the" - echo " SAM/BAM files. Note that if this value is not equal to -1, the top" - echo " scoring alignment will be output first" - echo " - -1 ... all alignments (up to --outFilterMultimapNmax) will be output" - echo "" - echo " --outSAMtlen" - echo " type: integer" - echo " example: 1" - echo " calculation method for the TLEN field in the SAM/BAM files" - echo " - 1 ... leftmost base of the (+)strand mate to rightmost base of the" - echo " (-)mate. (+)sign for the (+)strand mate" - echo " - 2 ... leftmost base of any mate to rightmost base of any mate. (+)sign" - echo " for the mate with the leftmost base. This is different from 1 for" - echo " overlapping mates with protruding ends" - echo "" - echo " --outBAMcompression" - echo " type: integer" - echo " example: 1" - echo " -1 to 10 BAM compression level, -1=default compression (6?), 0=no" - echo " compression, 10=maximum compression" - echo "" - echo " --outBAMsortingThreadN" - echo " type: integer" - echo " example: 0" - echo " >=0: number of threads for BAM sorting. 0 will default to" - echo " min(6,--runThreadN)." - echo "" - echo " --outBAMsortingBinsN" - echo " type: integer" - echo " example: 50" - echo " >0: number of genome bins for coordinate-sorting" - echo "" - echo "BAM processing:" - echo " --bamRemoveDuplicatesType" - echo " type: string" - echo " mark duplicates in the BAM file, for now only works with (i) sorted BAM" - echo " fed with inputBAMfile, and (ii) for paired-end alignments only" - echo " - - ... no duplicate removal/marking" - echo " - UniqueIdentical ... mark all multimappers, and duplicate" - echo " unique mappers. The coordinates, FLAG, CIGAR must be identical" - echo " - UniqueIdenticalNotMulti ... mark duplicate unique mappers but not" - echo " multimappers." - echo "" - echo " --bamRemoveDuplicatesMate2basesN" - echo " type: integer" - echo " example: 0" - echo " number of bases from the 5' of mate 2 to use in collapsing (e.g. for" - echo " RAMPAGE)" - echo "" - echo "Output Wiggle:" - echo " --outWigType" - echo " type: string, multiple values allowed" - echo " type of signal output, e.g. \"bedGraph\" OR \"bedGraph read1_5p\". Requires" - echo " sorted BAM: --outSAMtype BAM SortedByCoordinate ." - echo " 1st word:" - echo " - None ... no signal output" - echo " - bedGraph ... bedGraph format" - echo " - wiggle ... wiggle format" - echo " 2nd word:" - echo " - read1_5p ... signal from only 5' of the 1st read, useful for" - echo " CAGE/RAMPAGE etc" - echo " - read2 ... signal from only 2nd read" - echo "" - echo " --outWigStrand" - echo " type: string" - echo " example: Stranded" - echo " strandedness of wiggle/bedGraph output" - echo " - Stranded ... separate strands, str1 and str2" - echo " - Unstranded ... collapsed strands" - echo "" - echo " --outWigReferencesPrefix" - echo " type: string" - echo " prefix matching reference names to include in the output wiggle file," - echo " e.g. \"chr\", default \"-\" - include all references" - echo "" - echo " --outWigNorm" - echo " type: string" - echo " example: RPM" - echo " type of normalization for the signal" - echo " - RPM ... reads per million of mapped reads" - echo " - None ... no normalization, \"raw\" counts" - echo "" - echo "Output Filtering:" - echo " --outFilterType" - echo " type: string" - echo " example: Normal" - echo " type of filtering" - echo " - Normal ... standard filtering using only current alignment" - echo " - BySJout ... keep only those reads that contain junctions that passed" - echo " filtering into SJ.out.tab" - echo "" - echo " --outFilterMultimapScoreRange" - echo " type: integer" - echo " example: 1" - echo " the score range below the maximum score for multimapping alignments" - echo "" - echo " --outFilterMultimapNmax" - echo " type: integer" - echo " example: 10" - echo " maximum number of loci the read is allowed to map to. Alignments (all of" - echo " them) will be output only if the read maps to no more loci than this" - echo " value." - echo " Otherwise no alignments will be output, and the read will be counted as" - echo " \"mapped to too many loci\" in the Log.final.out ." - echo "" - echo " --outFilterMismatchNmax" - echo " type: integer" - echo " example: 10" - echo " alignment will be output only if it has no more mismatches than this" - echo " value." - echo "" - echo " --outFilterMismatchNoverLmax" - echo " type: double" - echo " example: 0.3" - echo " alignment will be output only if its ratio of mismatches to *mapped*" - echo " length is less than or equal to this value." - echo "" - echo " --outFilterMismatchNoverReadLmax" - echo " type: double" - echo " example: 1.0" - echo " alignment will be output only if its ratio of mismatches to *read*" - echo " length is less than or equal to this value." - echo "" - echo " --outFilterScoreMin" - echo " type: integer" - echo " example: 0" - echo " alignment will be output only if its score is higher than or equal to" - echo " this value." - echo "" - echo " --outFilterScoreMinOverLread" - echo " type: double" - echo " example: 0.66" - echo " same as outFilterScoreMin, but normalized to read length (sum of mates'" - echo " lengths for paired-end reads)" - echo "" - echo " --outFilterMatchNmin" - echo " type: integer" - echo " example: 0" - echo " alignment will be output only if the number of matched bases is higher" - echo " than or equal to this value." - echo "" - echo " --outFilterMatchNminOverLread" - echo " type: double" - echo " example: 0.66" - echo " sam as outFilterMatchNmin, but normalized to the read length (sum of" - echo " mates' lengths for paired-end reads)." - echo "" - echo " --outFilterIntronMotifs" - echo " type: string" - echo " filter alignment using their motifs" - echo " - None ... no filtering" - echo " - RemoveNoncanonical ... filter out alignments that contain" - echo " non-canonical junctions" - echo " - RemoveNoncanonicalUnannotated ... filter out alignments that contain" - echo " non-canonical unannotated junctions when using annotated splice" - echo " junctions database. The annotated non-canonical junctions will be kept." - echo "" - echo " --outFilterIntronStrands" - echo " type: string" - echo " example: RemoveInconsistentStrands" - echo " filter alignments" - echo " - RemoveInconsistentStrands ... remove alignments that have" - echo " junctions with inconsistent strands" - echo " - None ... no filtering" - echo "" - echo "Output splice junctions (SJ.out.tab):" - echo " --outSJtype" - echo " type: string" - echo " example: Standard" - echo " type of splice junction output" - echo " - Standard ... standard SJ.out.tab output" - echo " - None ... no splice junction output" - echo "" - echo "Output Filtering: Splice Junctions:" - echo " --outSJfilterReads" - echo " type: string" - echo " example: All" - echo " which reads to consider for collapsed splice junctions output" - echo " - All ... all reads, unique- and multi-mappers" - echo " - Unique ... uniquely mapping reads only" - echo "" - echo " --outSJfilterOverhangMin" - echo " type: integer, multiple values allowed" - echo " example: 30;12;12;12" - echo " minimum overhang length for splice junctions on both sides for: (1)" - echo " non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC" - echo " motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif" - echo " does not apply to annotated junctions" - echo "" - echo " --outSJfilterCountUniqueMin" - echo " type: integer, multiple values allowed" - echo " example: 3;1;1;1" - echo " minimum uniquely mapping read count per junction for: (1) non-canonical" - echo " motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC" - echo " and GT/AT motif. -1 means no output for that motif" - echo " Junctions are output if one of outSJfilterCountUniqueMin OR" - echo " outSJfilterCountTotalMin conditions are satisfied" - echo " does not apply to annotated junctions" - echo "" - echo " --outSJfilterCountTotalMin" - echo " type: integer, multiple values allowed" - echo " example: 3;1;1;1" - echo " minimum total (multi-mapping+unique) read count per junction for: (1)" - echo " non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC" - echo " motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif" - echo " Junctions are output if one of outSJfilterCountUniqueMin OR" - echo " outSJfilterCountTotalMin conditions are satisfied" - echo " does not apply to annotated junctions" - echo "" - echo " --outSJfilterDistToOtherSJmin" - echo " type: integer, multiple values allowed" - echo " example: 10;0;5;10" - echo " minimum allowed distance to other junctions' donor/acceptor" - echo " does not apply to annotated junctions" - echo "" - echo " --outSJfilterIntronMaxVsReadN" - echo " type: integer, multiple values allowed" - echo " example: 50000;100000;200000" - echo " maximum gap allowed for junctions supported by 1,2,3,,,N reads" - echo " i.e. by default junctions supported by 1 read can have gaps <=50000b, by" - echo " 2 reads: <=100000b, by 3 reads: <=200000. by >=4 reads any gap" - echo " <=alignIntronMax" - echo " does not apply to annotated junctions" - echo "" - echo "Scoring:" - echo " --scoreGap" - echo " type: integer" - echo " example: 0" - echo " splice junction penalty (independent on intron motif)" - echo "" - echo " --scoreGapNoncan" - echo " type: integer" - echo " example: -8" - echo " non-canonical junction penalty (in addition to scoreGap)" - echo "" - echo " --scoreGapGCAG" - echo " type: integer" - echo " example: -4" - echo " GC/AG and CT/GC junction penalty (in addition to scoreGap)" - echo "" - echo " --scoreGapATAC" - echo " type: integer" - echo " example: -8" - echo " AT/AC and GT/AT junction penalty (in addition to scoreGap)" - echo "" - echo " --scoreGenomicLengthLog2scale" - echo " type: integer" - echo " example: 0" - echo " extra score logarithmically scaled with genomic length of the alignment:" - echo " scoreGenomicLengthLog2scale*log2(genomicLength)" - echo "" - echo " --scoreDelOpen" - echo " type: integer" - echo " example: -2" - echo " deletion open penalty" - echo "" - echo " --scoreDelBase" - echo " type: integer" - echo " example: -2" - echo " deletion extension penalty per base (in addition to scoreDelOpen)" - echo "" - echo " --scoreInsOpen" - echo " type: integer" - echo " example: -2" - echo " insertion open penalty" - echo "" - echo " --scoreInsBase" - echo " type: integer" - echo " example: -2" - echo " insertion extension penalty per base (in addition to scoreInsOpen)" - echo "" - echo " --scoreStitchSJshift" - echo " type: integer" - echo " example: 1" - echo " maximum score reduction while searching for SJ boundaries in the" - echo " stitching step" - echo "" - echo "Alignments and Seeding:" - echo " --seedSearchStartLmax" - echo " type: integer" - echo " example: 50" - echo " defines the search start point through the read - the read is split into" - echo " pieces no longer than this value" - echo "" - echo " --seedSearchStartLmaxOverLread" - echo " type: double" - echo " example: 1.0" - echo " seedSearchStartLmax normalized to read length (sum of mates' lengths for" - echo " paired-end reads)" - echo "" - echo " --seedSearchLmax" - echo " type: integer" - echo " example: 0" - echo " defines the maximum length of the seeds, if =0 seed length is not" - echo " limited" - echo "" - echo " --seedMultimapNmax" - echo " type: integer" - echo " example: 10000" - echo " only pieces that map fewer than this value are utilized in the stitching" - echo " procedure" - echo "" - echo " --seedPerReadNmax" - echo " type: integer" - echo " example: 1000" - echo " max number of seeds per read" - echo "" - echo " --seedPerWindowNmax" - echo " type: integer" - echo " example: 50" - echo " max number of seeds per window" - echo "" - echo " --seedNoneLociPerWindow" - echo " type: integer" - echo " example: 10" - echo " max number of one seed loci per window" - echo "" - echo " --seedSplitMin" - echo " type: integer" - echo " example: 12" - echo " min length of the seed sequences split by Ns or mate gap" - echo "" - echo " --seedMapMin" - echo " type: integer" - echo " example: 5" - echo " min length of seeds to be mapped" - echo "" - echo " --alignIntronMin" - echo " type: integer" - echo " example: 21" - echo " minimum intron size, genomic gap is considered intron if its" - echo " length>=alignIntronMin, otherwise it is considered Deletion" - echo "" - echo " --alignIntronMax" - echo " type: integer" - echo " example: 0" - echo " maximum intron size, if 0, max intron size will be determined by" - echo " (2^winBinNbits)*winAnchorDistNbins" - echo "" - echo " --alignMatesGapMax" - echo " type: integer" - echo " example: 0" - echo " maximum gap between two mates, if 0, max intron gap will be determined" - echo " by (2^winBinNbits)*winAnchorDistNbins" - echo "" - echo " --alignSJoverhangMin" - echo " type: integer" - echo " example: 5" - echo " minimum overhang (i.e. block size) for spliced alignments" - echo "" - echo " --alignSJstitchMismatchNmax" - echo " type: integer, multiple values allowed" - echo " example: 0;-1;0;0" - echo " maximum number of mismatches for stitching of the splice junctions (-1:" - echo " no limit)." - echo " (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC" - echo " motif, (4) AT/AC and GT/AT motif." - echo "" - echo " --alignSJDBoverhangMin" - echo " type: integer" - echo " example: 3" - echo " minimum overhang (i.e. block size) for annotated (sjdb) spliced" - echo " alignments" - echo "" - echo " --alignSplicedMateMapLmin" - echo " type: integer" - echo " example: 0" - echo " minimum mapped length for a read mate that is spliced" - echo "" - echo " --alignSplicedMateMapLminOverLmate" - echo " type: double" - echo " example: 0.66" - echo " alignSplicedMateMapLmin normalized to mate length" - echo "" - echo " --alignWindowsPerReadNmax" - echo " type: integer" - echo " example: 10000" - echo " max number of windows per read" - echo "" - echo " --alignTranscriptsPerWindowNmax" - echo " type: integer" - echo " example: 100" - echo " max number of transcripts per window" - echo "" - echo " --alignTranscriptsPerReadNmax" - echo " type: integer" - echo " example: 10000" - echo " max number of different alignments per read to consider" - echo "" - echo " --alignEndsType" - echo " type: string" - echo " example: Local" - echo " type of read ends alignment" - echo " - Local ... standard local alignment with soft-clipping" - echo " allowed" - echo " - EndToEnd ... force end-to-end read alignment, do not" - echo " soft-clip" - echo " - Extend5pOfRead1 ... fully extend only the 5p of the read1, all other" - echo " ends: local alignment" - echo " - Extend5pOfReads12 ... fully extend only the 5p of the both read1 and" - echo " read2, all other ends: local alignment" - echo "" - echo " --alignEndsProtrude" - echo " type: string" - echo " example: 0 ConcordantPair" - echo " allow protrusion of alignment ends, i.e. start (end) of the +strand mate" - echo " downstream of the start (end) of the -strand mate" - echo " 1st word: int: maximum number of protrusion bases allowed" - echo " 2nd word: string:" - echo " - ConcordantPair ... report alignments with non-zero" - echo " protrusion as concordant pairs" - echo " - DiscordantPair ... report alignments with non-zero" - echo " protrusion as discordant pairs" - echo "" - echo " --alignSoftClipAtReferenceEnds" - echo " type: string" - echo " example: Yes" - echo " allow the soft-clipping of the alignments past the end of the" - echo " chromosomes" - echo " - Yes ... allow" - echo " - No ... prohibit, useful for compatibility with Cufflinks" - echo "" - echo " --alignInsertionFlush" - echo " type: string" - echo " how to flush ambiguous insertion positions" - echo " - None ... insertions are not flushed" - echo " - Right ... insertions are flushed to the right" - echo "" - echo "Paired-End reads:" - echo " --peOverlapNbasesMin" - echo " type: integer" - echo " example: 0" - echo " minimum number of overlapping bases to trigger mates merging and" - echo " realignment. Specify >0 value to switch on the \"merginf of overlapping" - echo " mates\" algorithm." - echo "" - echo " --peOverlapMMp" - echo " type: double" - echo " example: 0.01" - echo " maximum proportion of mismatched bases in the overlap area" - echo "" - echo "Windows, Anchors, Binning:" - echo " --winAnchorMultimapNmax" - echo " type: integer" - echo " example: 50" - echo " max number of loci anchors are allowed to map to" - echo "" - echo " --winBinNbits" - echo " type: integer" - echo " example: 16" - echo " =log2(winBin), where winBin is the size of the bin for the" - echo " windows/clustering, each window will occupy an integer number of bins." - echo "" - echo " --winAnchorDistNbins" - echo " type: integer" - echo " example: 9" - echo " max number of bins between two anchors that allows aggregation of" - echo " anchors into one window" - echo "" - echo " --winFlankNbins" - echo " type: integer" - echo " example: 4" - echo " log2(winFlank), where win Flank is the size of the left and right" - echo " flanking regions for each window" - echo "" - echo " --winReadCoverageRelativeMin" - echo " type: double" - echo " example: 0.5" - echo " minimum relative coverage of the read sequence by the seeds in a window," - echo " for STARlong algorithm only." - echo "" - echo " --winReadCoverageBasesMin" - echo " type: integer" - echo " example: 0" - echo " minimum number of bases covered by the seeds in a window , for STARlong" - echo " algorithm only." - echo "" - echo "Chimeric Alignments:" - echo " --chimOutType" - echo " type: string, multiple values allowed" - echo " example: Junctions" - echo " type of chimeric output" - echo " - Junctions ... Chimeric.out.junction" - echo " - SeparateSAMold ... output old SAM into separate Chimeric.out.sam file" - echo " - WithinBAM ... output into main aligned BAM files (Aligned.*.bam)" - echo " - WithinBAM HardClip ... (default) hard-clipping in the CIGAR for" - echo " supplemental chimeric alignments (default if no 2nd word is present)" - echo " - WithinBAM SoftClip ... soft-clipping in the CIGAR for supplemental" - echo " chimeric alignments" - echo "" - echo " --chimSegmentMin" - echo " type: integer" - echo " example: 0" - echo " minimum length of chimeric segment length, if ==0, no chimeric output" - echo "" - echo " --chimScoreMin" - echo " type: integer" - echo " example: 0" - echo " minimum total (summed) score of the chimeric segments" - echo "" - echo " --chimScoreDropMax" - echo " type: integer" - echo " example: 20" - echo " max drop (difference) of chimeric score (the sum of scores of all" - echo " chimeric segments) from the read length" - echo "" - echo " --chimScoreSeparation" - echo " type: integer" - echo " example: 10" - echo " minimum difference (separation) between the best chimeric score and the" - echo " next one" - echo "" - echo " --chimScoreJunctionNonGTAG" - echo " type: integer" - echo " example: -1" - echo " penalty for a non-GT/AG chimeric junction" - echo "" - echo " --chimJunctionOverhangMin" - echo " type: integer" - echo " example: 20" - echo " minimum overhang for a chimeric junction" - echo "" - echo " --chimSegmentReadGapMax" - echo " type: integer" - echo " example: 0" - echo " maximum gap in the read sequence between chimeric segments" - echo "" - echo " --chimFilter" - echo " type: string, multiple values allowed" - echo " example: banGenomicN" - echo " different filters for chimeric alignments" - echo " - None ... no filtering" - echo " - banGenomicN ... Ns are not allowed in the genome sequence around the" - echo " chimeric junction" - echo "" - echo " --chimMainSegmentMultNmax" - echo " type: integer" - echo " example: 10" - echo " maximum number of multi-alignments for the main chimeric segment. =1" - echo " will prohibit multimapping main segments." - echo "" - echo " --chimMultimapNmax" - echo " type: integer" - echo " example: 0" - echo " maximum number of chimeric multi-alignments" - echo " - 0 ... use the old scheme for chimeric detection which only considered" - echo " unique alignments" - echo "" - echo " --chimMultimapScoreRange" - echo " type: integer" - echo " example: 1" - echo " the score range for multi-mapping chimeras below the best chimeric" - echo " score. Only works with --chimMultimapNmax > 1" - echo "" - echo " --chimNonchimScoreDropMin" - echo " type: integer" - echo " example: 20" - echo " to trigger chimeric detection, the drop in the best non-chimeric" - echo " alignment score with respect to the read length has to be greater than" - echo " this value" - echo "" - echo " --chimOutJunctionFormat" - echo " type: integer" - echo " example: 0" - echo " formatting type for the Chimeric.out.junction file" - echo " - 0 ... no comment lines/headers" - echo " - 1 ... comment lines at the end of the file: command line and Nreads:" - echo " total, unique/multi-mapping" - echo "" - echo "Quantification of Annotations:" - echo " --quantMode" - echo " type: string, multiple values allowed" - echo " types of quantification requested" - echo " - - ... none" - echo " - TranscriptomeSAM ... output SAM/BAM alignments to transcriptome into a" - echo " separate file" - echo " - GeneCounts ... count reads per gene" - echo "" - echo " --quantTranscriptomeBAMcompression" - echo " type: integer" - echo " example: 1" - echo " -2 to 10 transcriptome BAM compression level" - echo " - -2 ... no BAM output" - echo " - -1 ... default compression (6?)" - echo " - 0 ... no compression" - echo " - 10 ... maximum compression" - echo "" - echo " --quantTranscriptomeBan" - echo " type: string" - echo " example: IndelSoftclipSingleend" - echo " prohibit various alignment type" - echo " - IndelSoftclipSingleend ... prohibit indels, soft clipping and" - echo " single-end alignments - compatible with RSEM" - echo " - Singleend ... prohibit single-end alignments" - echo "" - echo "2-pass Mapping:" - echo " --twopassMode" - echo " type: string" - echo " 2-pass mapping mode." - echo " - None ... 1-pass mapping" - echo " - Basic ... basic 2-pass mapping, with all 1st pass junctions" - echo " inserted into the genome indices on the fly" - echo "" - echo " --twopass1readsN" - echo " type: integer" - echo " example: -1" - echo " number of reads to process for the 1st step. Use very large number (or" - echo " default -1) to map all reads in the first step." - echo "" - echo "WASP parameters:" - echo " --waspOutputMode" - echo " type: string" - echo " WASP allele-specific output type. This is re-implementation of the" - echo " original WASP mappability filtering by Bryce van de Geijn, Graham" - echo " McVicker, Yoav Gilad & Jonathan K Pritchard. Please cite the original" - echo " WASP paper: Nature Methods 12, 1061-1063 (2015)," - echo " https://www.nature.com/articles/nmeth.3582 ." - echo " - SAMtag ... add WASP tags to the alignments that pass WASP" - echo " filtering" - echo "" - echo "STARsolo (single cell RNA-seq) parameters:" - echo " --soloType" - echo " type: string, multiple values allowed" - echo " type of single-cell RNA-seq" - echo " - CB_UMI_Simple ... (a.k.a. Droplet) one UMI and one Cell Barcode of" - echo " fixed length in read2, e.g. Drop-seq and 10X Chromium." - echo " - CB_UMI_Complex ... multiple Cell Barcodes of varying length, one UMI" - echo " of fixed length and one adapter sequence of fixed length are allowed in" - echo " read2 only (e.g. inDrop, ddSeq)." - echo " - CB_samTagOut ... output Cell Barcode as CR and/or CB SAm tag. No" - echo " UMI counting. --readFilesIn cDNA_read1 [cDNA_read2 if paired-end]" - echo " CellBarcode_read . Requires --outSAMtype BAM Unsorted [and/or" - echo " SortedByCoordinate]" - echo " - SmartSeq ... Smart-seq: each cell in a separate FASTQ (paired-" - echo " or single-end), barcodes are corresponding read-groups, no UMI" - echo " sequences, alignments deduplicated according to alignment start and end" - echo " (after extending soft-clipped bases)" - echo "" - echo " --soloCBwhitelist" - echo " type: string, multiple values allowed" - echo " file(s) with whitelist(s) of cell barcodes. Only --soloType" - echo " CB_UMI_Complex allows more than one whitelist file." - echo " - None ... no whitelist: all cell barcodes are allowed" - echo "" - echo " --soloCBstart" - echo " type: integer" - echo " example: 1" - echo " cell barcode start base" - echo "" - echo " --soloCBlen" - echo " type: integer" - echo " example: 16" - echo " cell barcode length" - echo "" - echo " --soloUMIstart" - echo " type: integer" - echo " example: 17" - echo " UMI start base" - echo "" - echo " --soloUMIlen" - echo " type: integer" - echo " example: 10" - echo " UMI length" - echo "" - echo " --soloBarcodeReadLength" - echo " type: integer" - echo " example: 1" - echo " length of the barcode read" - echo " - 1 ... equal to sum of soloCBlen+soloUMIlen" - echo " - 0 ... not defined, do not check" - echo "" - echo " --soloBarcodeMate" - echo " type: integer" - echo " example: 0" - echo " identifies which read mate contains the barcode (CB+UMI) sequence" - echo " - 0 ... barcode sequence is on separate read, which should always be" - echo " the last file in the --readFilesIn listed" - echo " - 1 ... barcode sequence is a part of mate 1" - echo " - 2 ... barcode sequence is a part of mate 2" - echo "" - echo " --soloCBposition" - echo " type: string, multiple values allowed" - echo " position of Cell Barcode(s) on the barcode read." - echo " Presently only works with --soloType CB_UMI_Complex, and barcodes are" - echo " assumed to be on Read2." - echo " Format for each barcode: startAnchor_startPosition_endAnchor_endPosition" - echo " start(end)Anchor defines the Anchor Base for the CB: 0: read start; 1:" - echo " read end; 2: adapter start; 3: adapter end" - echo " start(end)Position is the 0-based position with of the CB start(end)" - echo " with respect to the Anchor Base" - echo " String for different barcodes are separated by space." - echo " Example: inDrop (Zilionis et al, Nat. Protocols, 2017):" - echo " --soloCBposition 0_0_2_-1 3_1_3_8" - echo "" - echo " --soloUMIposition" - echo " type: string" - echo " position of the UMI on the barcode read, same as soloCBposition" - echo " Example: inDrop (Zilionis et al, Nat. Protocols, 2017):" - echo " --soloCBposition 3_9_3_14" - echo "" - echo " --soloAdapterSequence" - echo " type: string" - echo " adapter sequence to anchor barcodes. Only one adapter sequence is" - echo " allowed." - echo "" - echo " --soloAdapterMismatchesNmax" - echo " type: integer" - echo " example: 1" - echo " maximum number of mismatches allowed in adapter sequence." - echo "" - echo " --soloCBmatchWLtype" - echo " type: string" - echo " example: 1MM_multi" - echo " matching the Cell Barcodes to the WhiteList" - echo " - Exact ... only exact matches allowed" - echo " - 1MM ... only one match in whitelist with 1" - echo " mismatched base allowed. Allowed CBs have to have at least one read with" - echo " exact match." - echo " - 1MM_multi ... multiple matches in whitelist with" - echo " 1 mismatched base allowed, posterior probability calculation is used" - echo " choose one of the matches." - echo " Allowed CBs have to have at least one read with exact match. This option" - echo " matches best with CellRanger 2.2.0" - echo " - 1MM_multi_pseudocounts ... same as 1MM_Multi, but" - echo " pseudocounts of 1 are added to all whitelist barcodes." - echo " - 1MM_multi_Nbase_pseudocounts ... same as 1MM_multi_pseudocounts," - echo " multimatching to WL is allowed for CBs with N-bases. This option matches" - echo " best with CellRanger >= 3.0.0" - echo " - EditDist_2 ... allow up to edit distance of 3 fpr" - echo " each of the barcodes. May include one deletion + one insertion. Only" - echo " works with --soloType CB_UMI_Complex. Matches to multiple passlist" - echo " barcdoes are not allowed. Similar to ParseBio Split-seq pipeline." - echo "" - echo " --soloInputSAMattrBarcodeSeq" - echo " type: string, multiple values allowed" - echo " when inputting reads from a SAM file (--readsFileType SAM SE/PE), these" - echo " SAM attributes mark the barcode sequence (in proper order)." - echo " For instance, for 10X CellRanger or STARsolo BAMs, use" - echo " --soloInputSAMattrBarcodeSeq CR UR ." - echo " This parameter is required when running STARsolo with input from SAM." - echo "" - echo " --soloInputSAMattrBarcodeQual" - echo " type: string, multiple values allowed" - echo " when inputting reads from a SAM file (--readsFileType SAM SE/PE), these" - echo " SAM attributes mark the barcode qualities (in proper order)." - echo " For instance, for 10X CellRanger or STARsolo BAMs, use" - echo " --soloInputSAMattrBarcodeQual CY UY ." - echo " If this parameter is '-' (default), the quality 'H' will be assigned to" - echo " all bases." - echo "" - echo " --soloStrand" - echo " type: string" - echo " example: Forward" - echo " strandedness of the solo libraries:" - echo " - Unstranded ... no strand information" - echo " - Forward ... read strand same as the original RNA molecule" - echo " - Reverse ... read strand opposite to the original RNA molecule" - echo "" - echo " --soloFeatures" - echo " type: string, multiple values allowed" - echo " example: Gene" - echo " genomic features for which the UMI counts per Cell Barcode are collected" - echo " - Gene ... genes: reads match the gene transcript" - echo " - SJ ... splice junctions: reported in SJ.out.tab" - echo " - GeneFull ... full gene (pre-mRNA): count all reads overlapping" - echo " genes' exons and introns" - echo " - GeneFull_ExonOverIntron ... full gene (pre-mRNA): count all reads" - echo " overlapping genes' exons and introns: prioritize 100% overlap with exons" - echo " - GeneFull_Ex50pAS ... full gene (pre-RNA): count all reads" - echo " overlapping genes' exons and introns: prioritize >50% overlap with" - echo " exons. Do not count reads with 100% exonic overlap in the antisense" - echo " direction." - echo "" - echo " --soloMultiMappers" - echo " type: string, multiple values allowed" - echo " example: Unique" - echo " counting method for reads mapping to multiple genes" - echo " - Unique ... count only reads that map to unique genes" - echo " - Uniform ... uniformly distribute multi-genic UMIs to all genes" - echo " - Rescue ... distribute UMIs proportionally to unique+uniform counts" - echo " (~ first iteration of EM)" - echo " - PropUnique ... distribute UMIs proportionally to unique mappers, if" - echo " present, and uniformly if not." - echo " - EM ... multi-gene UMIs are distributed using Expectation" - echo " Maximization algorithm" - echo "" - echo " --soloUMIdedup" - echo " type: string, multiple values allowed" - echo " example: 1MM_All" - echo " type of UMI deduplication (collapsing) algorithm" - echo " - 1MM_All ... all UMIs with 1 mismatch distance to" - echo " each other are collapsed (i.e. counted once)." - echo " - 1MM_Directional_UMItools ... follows the \"directional\" method from" - echo " the UMI-tools by Smith, Heger and Sudbery (Genome Research 2017)." - echo " - 1MM_Directional ... same as 1MM_Directional_UMItools, but" - echo " with more stringent criteria for duplicate UMIs" - echo " - Exact ... only exactly matching UMIs are" - echo " collapsed." - echo " - NoDedup ... no deduplication of UMIs, count all" - echo " reads." - echo " - 1MM_CR ... CellRanger2-4 algorithm for 1MM UMI" - echo " collapsing." - echo "" - echo " --soloUMIfiltering" - echo " type: string, multiple values allowed" - echo " type of UMI filtering (for reads uniquely mapping to genes)" - echo " - - ... basic filtering: remove UMIs with N and" - echo " homopolymers (similar to CellRanger 2.2.0)." - echo " - MultiGeneUMI ... basic + remove lower-count UMIs that map to" - echo " more than one gene." - echo " - MultiGeneUMI_All ... basic + remove all UMIs that map to more than" - echo " one gene." - echo " - MultiGeneUMI_CR ... basic + remove lower-count UMIs that map to" - echo " more than one gene, matching CellRanger > 3.0.0 ." - echo " Only works with --soloUMIdedup 1MM_CR" - echo "" - echo " --soloOutFileNames" - echo " type: string, multiple values allowed" - echo " example: Solo.out/;features.tsv;barcodes.tsv;matrix.mtx" - echo " file names for STARsolo output:" - echo " file_name_prefix gene_names barcode_sequences" - echo " cell_feature_count_matrix" - echo "" - echo " --soloCellFilter" - echo " type: string, multiple values allowed" - echo " example: CellRanger2.2;3000;0.99;10" - echo " cell filtering type and parameters" - echo " - None ... do not output filtered cells" - echo " - TopCells ... only report top cells by UMI count, followed by" - echo " the exact number of cells" - echo " - CellRanger2.2 ... simple filtering of CellRanger 2.2." - echo " Can be followed by numbers: number of expected cells, robust maximum" - echo " percentile for UMI count, maximum to minimum ratio for UMI count" - echo " The harcoded values are from CellRanger: nExpectedCells=3000;" - echo " maxPercentile=0.99; maxMinRatio=10" - echo " - EmptyDrops_CR ... EmptyDrops filtering in CellRanger flavor. Please" - echo " cite the original EmptyDrops paper: A.T.L Lun et al, Genome Biology, 20," - echo " 63 (2019):" - echo " " - echo "https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1662-y" - echo " Can be followed by 10 numeric parameters: nExpectedCells" - echo " maxPercentile maxMinRatio indMin indMax umiMin" - echo " umiMinFracMedian candMaxN FDR simN" - echo " The harcoded values are from CellRanger: 3000" - echo " 0.99 10 45000 90000 500 0.01" - echo " 20000 0.01 10000" - echo "" - echo " --soloOutFormatFeaturesGeneField3" - echo " type: string, multiple values allowed" - echo " example: Gene Expression" - echo " field 3 in the Gene features.tsv file. If \"-\", then no 3rd field is" - echo " output." - echo "" - echo " --soloCellReadStats" - echo " type: string" - echo " Output reads statistics for each CB" - echo " - Standard ... standard output" -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM python:3.10-slim - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ - rm -rf /var/lib/apt/lists/* - -ENV STAR_VERSION 2.7.3a -ENV PACKAGES gcc g++ make wget zlib1g-dev unzip -RUN apt-get update && \ - apt-get install -y --no-install-recommends ${PACKAGES} && \ - cd /tmp && \ - wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip && \ - unzip ${STAR_VERSION}.zip && \ - cd STAR-${STAR_VERSION}/source && \ - make STARstatic CXXFLAGS_SIMD=-std=c++11 && \ - cp STAR /usr/local/bin && \ - cd / && \ - rm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip && \ - apt-get --purge autoremove -y ${PACKAGES} && \ - apt-get clean - -LABEL org.opencontainers.image.authors="Angela Oliveira Pisco, Robrecht Cannoodt" -LABEL org.opencontainers.image.description="Companion container for running component mapping star_align_v273a" -LABEL org.opencontainers.image.created="2024-01-25T10:13:56Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-star_align_v273a-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "star_align_v273a 0.12.3" - exit - ;; - --input) - if [ -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT="$2" - else - VIASH_PAR_INPUT="$VIASH_PAR_INPUT;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - if [ -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - else - VIASH_PAR_INPUT="$VIASH_PAR_INPUT;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --readFilesIn) - if [ -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT="$2" - else - VIASH_PAR_INPUT="$VIASH_PAR_INPUT;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --readFilesIn. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reference) - [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reference=*) - [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference=*\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --genomeDir) - [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--genomeDir\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --genomeDir. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outFileNamePrefix) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--outFileNamePrefix\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFileNamePrefix. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --runRNGseed) - [ -n "$VIASH_PAR_RUNRNGSEED" ] && ViashError Bad arguments for option \'--runRNGseed\': \'$VIASH_PAR_RUNRNGSEED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_RUNRNGSEED="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --runRNGseed. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --runRNGseed=*) - [ -n "$VIASH_PAR_RUNRNGSEED" ] && ViashError Bad arguments for option \'--runRNGseed=*\': \'$VIASH_PAR_RUNRNGSEED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_RUNRNGSEED=$(ViashRemoveFlags "$1") - shift 1 - ;; - --genomeLoad) - [ -n "$VIASH_PAR_GENOMELOAD" ] && ViashError Bad arguments for option \'--genomeLoad\': \'$VIASH_PAR_GENOMELOAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_GENOMELOAD="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --genomeLoad. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --genomeLoad=*) - [ -n "$VIASH_PAR_GENOMELOAD" ] && ViashError Bad arguments for option \'--genomeLoad=*\': \'$VIASH_PAR_GENOMELOAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_GENOMELOAD=$(ViashRemoveFlags "$1") - shift 1 - ;; - --genomeFastaFiles) - if [ -z "$VIASH_PAR_GENOMEFASTAFILES" ]; then - VIASH_PAR_GENOMEFASTAFILES="$2" - else - VIASH_PAR_GENOMEFASTAFILES="$VIASH_PAR_GENOMEFASTAFILES;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --genomeFastaFiles. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --genomeFastaFiles=*) - if [ -z "$VIASH_PAR_GENOMEFASTAFILES" ]; then - VIASH_PAR_GENOMEFASTAFILES=$(ViashRemoveFlags "$1") - else - VIASH_PAR_GENOMEFASTAFILES="$VIASH_PAR_GENOMEFASTAFILES;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --genomeFileSizes) - if [ -z "$VIASH_PAR_GENOMEFILESIZES" ]; then - VIASH_PAR_GENOMEFILESIZES="$2" - else - VIASH_PAR_GENOMEFILESIZES="$VIASH_PAR_GENOMEFILESIZES;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --genomeFileSizes. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --genomeFileSizes=*) - if [ -z "$VIASH_PAR_GENOMEFILESIZES" ]; then - VIASH_PAR_GENOMEFILESIZES=$(ViashRemoveFlags "$1") - else - VIASH_PAR_GENOMEFILESIZES="$VIASH_PAR_GENOMEFILESIZES;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --genomeTransformOutput) - if [ -z "$VIASH_PAR_GENOMETRANSFORMOUTPUT" ]; then - VIASH_PAR_GENOMETRANSFORMOUTPUT="$2" - else - VIASH_PAR_GENOMETRANSFORMOUTPUT="$VIASH_PAR_GENOMETRANSFORMOUTPUT;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --genomeTransformOutput. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --genomeTransformOutput=*) - if [ -z "$VIASH_PAR_GENOMETRANSFORMOUTPUT" ]; then - VIASH_PAR_GENOMETRANSFORMOUTPUT=$(ViashRemoveFlags "$1") - else - VIASH_PAR_GENOMETRANSFORMOUTPUT="$VIASH_PAR_GENOMETRANSFORMOUTPUT;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --genomeChrSetMitochondrial) - if [ -z "$VIASH_PAR_GENOMECHRSETMITOCHONDRIAL" ]; then - VIASH_PAR_GENOMECHRSETMITOCHONDRIAL="$2" - else - VIASH_PAR_GENOMECHRSETMITOCHONDRIAL="$VIASH_PAR_GENOMECHRSETMITOCHONDRIAL;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --genomeChrSetMitochondrial. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --genomeChrSetMitochondrial=*) - if [ -z "$VIASH_PAR_GENOMECHRSETMITOCHONDRIAL" ]; then - VIASH_PAR_GENOMECHRSETMITOCHONDRIAL=$(ViashRemoveFlags "$1") - else - VIASH_PAR_GENOMECHRSETMITOCHONDRIAL="$VIASH_PAR_GENOMECHRSETMITOCHONDRIAL;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --sjdbFileChrStartEnd) - if [ -z "$VIASH_PAR_SJDBFILECHRSTARTEND" ]; then - VIASH_PAR_SJDBFILECHRSTARTEND="$2" - else - VIASH_PAR_SJDBFILECHRSTARTEND="$VIASH_PAR_SJDBFILECHRSTARTEND;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbFileChrStartEnd. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --sjdbFileChrStartEnd=*) - if [ -z "$VIASH_PAR_SJDBFILECHRSTARTEND" ]; then - VIASH_PAR_SJDBFILECHRSTARTEND=$(ViashRemoveFlags "$1") - else - VIASH_PAR_SJDBFILECHRSTARTEND="$VIASH_PAR_SJDBFILECHRSTARTEND;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --sjdbGTFfile) - [ -n "$VIASH_PAR_SJDBGTFFILE" ] && ViashError Bad arguments for option \'--sjdbGTFfile\': \'$VIASH_PAR_SJDBGTFFILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SJDBGTFFILE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbGTFfile. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --sjdbGTFfile=*) - [ -n "$VIASH_PAR_SJDBGTFFILE" ] && ViashError Bad arguments for option \'--sjdbGTFfile=*\': \'$VIASH_PAR_SJDBGTFFILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SJDBGTFFILE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --sjdbGTFchrPrefix) - [ -n "$VIASH_PAR_SJDBGTFCHRPREFIX" ] && ViashError Bad arguments for option \'--sjdbGTFchrPrefix\': \'$VIASH_PAR_SJDBGTFCHRPREFIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SJDBGTFCHRPREFIX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbGTFchrPrefix. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --sjdbGTFchrPrefix=*) - [ -n "$VIASH_PAR_SJDBGTFCHRPREFIX" ] && ViashError Bad arguments for option \'--sjdbGTFchrPrefix=*\': \'$VIASH_PAR_SJDBGTFCHRPREFIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SJDBGTFCHRPREFIX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --sjdbGTFfeatureExon) - [ -n "$VIASH_PAR_SJDBGTFFEATUREEXON" ] && ViashError Bad arguments for option \'--sjdbGTFfeatureExon\': \'$VIASH_PAR_SJDBGTFFEATUREEXON\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SJDBGTFFEATUREEXON="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbGTFfeatureExon. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --sjdbGTFfeatureExon=*) - [ -n "$VIASH_PAR_SJDBGTFFEATUREEXON" ] && ViashError Bad arguments for option \'--sjdbGTFfeatureExon=*\': \'$VIASH_PAR_SJDBGTFFEATUREEXON\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SJDBGTFFEATUREEXON=$(ViashRemoveFlags "$1") - shift 1 - ;; - --sjdbGTFtagExonParentTranscript) - [ -n "$VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT" ] && ViashError Bad arguments for option \'--sjdbGTFtagExonParentTranscript\': \'$VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbGTFtagExonParentTranscript. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --sjdbGTFtagExonParentTranscript=*) - [ -n "$VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT" ] && ViashError Bad arguments for option \'--sjdbGTFtagExonParentTranscript=*\': \'$VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --sjdbGTFtagExonParentGene) - [ -n "$VIASH_PAR_SJDBGTFTAGEXONPARENTGENE" ] && ViashError Bad arguments for option \'--sjdbGTFtagExonParentGene\': \'$VIASH_PAR_SJDBGTFTAGEXONPARENTGENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SJDBGTFTAGEXONPARENTGENE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbGTFtagExonParentGene. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --sjdbGTFtagExonParentGene=*) - [ -n "$VIASH_PAR_SJDBGTFTAGEXONPARENTGENE" ] && ViashError Bad arguments for option \'--sjdbGTFtagExonParentGene=*\': \'$VIASH_PAR_SJDBGTFTAGEXONPARENTGENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SJDBGTFTAGEXONPARENTGENE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --sjdbGTFtagExonParentGeneName) - if [ -z "$VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME" ]; then - VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME="$2" - else - VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME="$VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbGTFtagExonParentGeneName. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --sjdbGTFtagExonParentGeneName=*) - if [ -z "$VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME" ]; then - VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME=$(ViashRemoveFlags "$1") - else - VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME="$VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --sjdbGTFtagExonParentGeneType) - if [ -z "$VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE" ]; then - VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE="$2" - else - VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE="$VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbGTFtagExonParentGeneType. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --sjdbGTFtagExonParentGeneType=*) - if [ -z "$VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE" ]; then - VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE=$(ViashRemoveFlags "$1") - else - VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE="$VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --sjdbOverhang) - [ -n "$VIASH_PAR_SJDBOVERHANG" ] && ViashError Bad arguments for option \'--sjdbOverhang\': \'$VIASH_PAR_SJDBOVERHANG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SJDBOVERHANG="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbOverhang. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --sjdbOverhang=*) - [ -n "$VIASH_PAR_SJDBOVERHANG" ] && ViashError Bad arguments for option \'--sjdbOverhang=*\': \'$VIASH_PAR_SJDBOVERHANG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SJDBOVERHANG=$(ViashRemoveFlags "$1") - shift 1 - ;; - --sjdbScore) - [ -n "$VIASH_PAR_SJDBSCORE" ] && ViashError Bad arguments for option \'--sjdbScore\': \'$VIASH_PAR_SJDBSCORE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SJDBSCORE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbScore. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --sjdbScore=*) - [ -n "$VIASH_PAR_SJDBSCORE" ] && ViashError Bad arguments for option \'--sjdbScore=*\': \'$VIASH_PAR_SJDBSCORE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SJDBSCORE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --sjdbInsertSave) - [ -n "$VIASH_PAR_SJDBINSERTSAVE" ] && ViashError Bad arguments for option \'--sjdbInsertSave\': \'$VIASH_PAR_SJDBINSERTSAVE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SJDBINSERTSAVE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbInsertSave. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --sjdbInsertSave=*) - [ -n "$VIASH_PAR_SJDBINSERTSAVE" ] && ViashError Bad arguments for option \'--sjdbInsertSave=*\': \'$VIASH_PAR_SJDBINSERTSAVE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SJDBINSERTSAVE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --varVCFfile) - [ -n "$VIASH_PAR_VARVCFFILE" ] && ViashError Bad arguments for option \'--varVCFfile\': \'$VIASH_PAR_VARVCFFILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_VARVCFFILE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --varVCFfile. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --varVCFfile=*) - [ -n "$VIASH_PAR_VARVCFFILE" ] && ViashError Bad arguments for option \'--varVCFfile=*\': \'$VIASH_PAR_VARVCFFILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_VARVCFFILE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --readFilesType) - [ -n "$VIASH_PAR_READFILESTYPE" ] && ViashError Bad arguments for option \'--readFilesType\': \'$VIASH_PAR_READFILESTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_READFILESTYPE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --readFilesType. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --readFilesType=*) - [ -n "$VIASH_PAR_READFILESTYPE" ] && ViashError Bad arguments for option \'--readFilesType=*\': \'$VIASH_PAR_READFILESTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_READFILESTYPE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --readFilesSAMattrKeep) - if [ -z "$VIASH_PAR_READFILESSAMATTRKEEP" ]; then - VIASH_PAR_READFILESSAMATTRKEEP="$2" - else - VIASH_PAR_READFILESSAMATTRKEEP="$VIASH_PAR_READFILESSAMATTRKEEP;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --readFilesSAMattrKeep. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --readFilesSAMattrKeep=*) - if [ -z "$VIASH_PAR_READFILESSAMATTRKEEP" ]; then - VIASH_PAR_READFILESSAMATTRKEEP=$(ViashRemoveFlags "$1") - else - VIASH_PAR_READFILESSAMATTRKEEP="$VIASH_PAR_READFILESSAMATTRKEEP;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --readFilesManifest) - [ -n "$VIASH_PAR_READFILESMANIFEST" ] && ViashError Bad arguments for option \'--readFilesManifest\': \'$VIASH_PAR_READFILESMANIFEST\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_READFILESMANIFEST="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --readFilesManifest. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --readFilesManifest=*) - [ -n "$VIASH_PAR_READFILESMANIFEST" ] && ViashError Bad arguments for option \'--readFilesManifest=*\': \'$VIASH_PAR_READFILESMANIFEST\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_READFILESMANIFEST=$(ViashRemoveFlags "$1") - shift 1 - ;; - --readFilesPrefix) - [ -n "$VIASH_PAR_READFILESPREFIX" ] && ViashError Bad arguments for option \'--readFilesPrefix\': \'$VIASH_PAR_READFILESPREFIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_READFILESPREFIX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --readFilesPrefix. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --readFilesPrefix=*) - [ -n "$VIASH_PAR_READFILESPREFIX" ] && ViashError Bad arguments for option \'--readFilesPrefix=*\': \'$VIASH_PAR_READFILESPREFIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_READFILESPREFIX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --readFilesCommand) - if [ -z "$VIASH_PAR_READFILESCOMMAND" ]; then - VIASH_PAR_READFILESCOMMAND="$2" - else - VIASH_PAR_READFILESCOMMAND="$VIASH_PAR_READFILESCOMMAND;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --readFilesCommand. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --readFilesCommand=*) - if [ -z "$VIASH_PAR_READFILESCOMMAND" ]; then - VIASH_PAR_READFILESCOMMAND=$(ViashRemoveFlags "$1") - else - VIASH_PAR_READFILESCOMMAND="$VIASH_PAR_READFILESCOMMAND;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --readMapNumber) - [ -n "$VIASH_PAR_READMAPNUMBER" ] && ViashError Bad arguments for option \'--readMapNumber\': \'$VIASH_PAR_READMAPNUMBER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_READMAPNUMBER="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --readMapNumber. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --readMapNumber=*) - [ -n "$VIASH_PAR_READMAPNUMBER" ] && ViashError Bad arguments for option \'--readMapNumber=*\': \'$VIASH_PAR_READMAPNUMBER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_READMAPNUMBER=$(ViashRemoveFlags "$1") - shift 1 - ;; - --readMatesLengthsIn) - [ -n "$VIASH_PAR_READMATESLENGTHSIN" ] && ViashError Bad arguments for option \'--readMatesLengthsIn\': \'$VIASH_PAR_READMATESLENGTHSIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_READMATESLENGTHSIN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --readMatesLengthsIn. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --readMatesLengthsIn=*) - [ -n "$VIASH_PAR_READMATESLENGTHSIN" ] && ViashError Bad arguments for option \'--readMatesLengthsIn=*\': \'$VIASH_PAR_READMATESLENGTHSIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_READMATESLENGTHSIN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --readNameSeparator) - if [ -z "$VIASH_PAR_READNAMESEPARATOR" ]; then - VIASH_PAR_READNAMESEPARATOR="$2" - else - VIASH_PAR_READNAMESEPARATOR="$VIASH_PAR_READNAMESEPARATOR;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --readNameSeparator. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --readNameSeparator=*) - if [ -z "$VIASH_PAR_READNAMESEPARATOR" ]; then - VIASH_PAR_READNAMESEPARATOR=$(ViashRemoveFlags "$1") - else - VIASH_PAR_READNAMESEPARATOR="$VIASH_PAR_READNAMESEPARATOR;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --readQualityScoreBase) - [ -n "$VIASH_PAR_READQUALITYSCOREBASE" ] && ViashError Bad arguments for option \'--readQualityScoreBase\': \'$VIASH_PAR_READQUALITYSCOREBASE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_READQUALITYSCOREBASE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --readQualityScoreBase. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --readQualityScoreBase=*) - [ -n "$VIASH_PAR_READQUALITYSCOREBASE" ] && ViashError Bad arguments for option \'--readQualityScoreBase=*\': \'$VIASH_PAR_READQUALITYSCOREBASE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_READQUALITYSCOREBASE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --clipAdapterType) - [ -n "$VIASH_PAR_CLIPADAPTERTYPE" ] && ViashError Bad arguments for option \'--clipAdapterType\': \'$VIASH_PAR_CLIPADAPTERTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CLIPADAPTERTYPE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --clipAdapterType. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --clipAdapterType=*) - [ -n "$VIASH_PAR_CLIPADAPTERTYPE" ] && ViashError Bad arguments for option \'--clipAdapterType=*\': \'$VIASH_PAR_CLIPADAPTERTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CLIPADAPTERTYPE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --clip3pNbases) - if [ -z "$VIASH_PAR_CLIP3PNBASES" ]; then - VIASH_PAR_CLIP3PNBASES="$2" - else - VIASH_PAR_CLIP3PNBASES="$VIASH_PAR_CLIP3PNBASES;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --clip3pNbases. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --clip3pNbases=*) - if [ -z "$VIASH_PAR_CLIP3PNBASES" ]; then - VIASH_PAR_CLIP3PNBASES=$(ViashRemoveFlags "$1") - else - VIASH_PAR_CLIP3PNBASES="$VIASH_PAR_CLIP3PNBASES;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --clip3pAdapterSeq) - if [ -z "$VIASH_PAR_CLIP3PADAPTERSEQ" ]; then - VIASH_PAR_CLIP3PADAPTERSEQ="$2" - else - VIASH_PAR_CLIP3PADAPTERSEQ="$VIASH_PAR_CLIP3PADAPTERSEQ;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --clip3pAdapterSeq. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --clip3pAdapterSeq=*) - if [ -z "$VIASH_PAR_CLIP3PADAPTERSEQ" ]; then - VIASH_PAR_CLIP3PADAPTERSEQ=$(ViashRemoveFlags "$1") - else - VIASH_PAR_CLIP3PADAPTERSEQ="$VIASH_PAR_CLIP3PADAPTERSEQ;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --clip3pAdapterMMp) - if [ -z "$VIASH_PAR_CLIP3PADAPTERMMP" ]; then - VIASH_PAR_CLIP3PADAPTERMMP="$2" - else - VIASH_PAR_CLIP3PADAPTERMMP="$VIASH_PAR_CLIP3PADAPTERMMP;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --clip3pAdapterMMp. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --clip3pAdapterMMp=*) - if [ -z "$VIASH_PAR_CLIP3PADAPTERMMP" ]; then - VIASH_PAR_CLIP3PADAPTERMMP=$(ViashRemoveFlags "$1") - else - VIASH_PAR_CLIP3PADAPTERMMP="$VIASH_PAR_CLIP3PADAPTERMMP;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --clip3pAfterAdapterNbases) - if [ -z "$VIASH_PAR_CLIP3PAFTERADAPTERNBASES" ]; then - VIASH_PAR_CLIP3PAFTERADAPTERNBASES="$2" - else - VIASH_PAR_CLIP3PAFTERADAPTERNBASES="$VIASH_PAR_CLIP3PAFTERADAPTERNBASES;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --clip3pAfterAdapterNbases. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --clip3pAfterAdapterNbases=*) - if [ -z "$VIASH_PAR_CLIP3PAFTERADAPTERNBASES" ]; then - VIASH_PAR_CLIP3PAFTERADAPTERNBASES=$(ViashRemoveFlags "$1") - else - VIASH_PAR_CLIP3PAFTERADAPTERNBASES="$VIASH_PAR_CLIP3PAFTERADAPTERNBASES;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --clip5pNbases) - if [ -z "$VIASH_PAR_CLIP5PNBASES" ]; then - VIASH_PAR_CLIP5PNBASES="$2" - else - VIASH_PAR_CLIP5PNBASES="$VIASH_PAR_CLIP5PNBASES;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --clip5pNbases. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --clip5pNbases=*) - if [ -z "$VIASH_PAR_CLIP5PNBASES" ]; then - VIASH_PAR_CLIP5PNBASES=$(ViashRemoveFlags "$1") - else - VIASH_PAR_CLIP5PNBASES="$VIASH_PAR_CLIP5PNBASES;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --limitGenomeGenerateRAM) - [ -n "$VIASH_PAR_LIMITGENOMEGENERATERAM" ] && ViashError Bad arguments for option \'--limitGenomeGenerateRAM\': \'$VIASH_PAR_LIMITGENOMEGENERATERAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LIMITGENOMEGENERATERAM="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --limitGenomeGenerateRAM. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --limitGenomeGenerateRAM=*) - [ -n "$VIASH_PAR_LIMITGENOMEGENERATERAM" ] && ViashError Bad arguments for option \'--limitGenomeGenerateRAM=*\': \'$VIASH_PAR_LIMITGENOMEGENERATERAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LIMITGENOMEGENERATERAM=$(ViashRemoveFlags "$1") - shift 1 - ;; - --limitIObufferSize) - if [ -z "$VIASH_PAR_LIMITIOBUFFERSIZE" ]; then - VIASH_PAR_LIMITIOBUFFERSIZE="$2" - else - VIASH_PAR_LIMITIOBUFFERSIZE="$VIASH_PAR_LIMITIOBUFFERSIZE;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --limitIObufferSize. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --limitIObufferSize=*) - if [ -z "$VIASH_PAR_LIMITIOBUFFERSIZE" ]; then - VIASH_PAR_LIMITIOBUFFERSIZE=$(ViashRemoveFlags "$1") - else - VIASH_PAR_LIMITIOBUFFERSIZE="$VIASH_PAR_LIMITIOBUFFERSIZE;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --limitOutSAMoneReadBytes) - [ -n "$VIASH_PAR_LIMITOUTSAMONEREADBYTES" ] && ViashError Bad arguments for option \'--limitOutSAMoneReadBytes\': \'$VIASH_PAR_LIMITOUTSAMONEREADBYTES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LIMITOUTSAMONEREADBYTES="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --limitOutSAMoneReadBytes. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --limitOutSAMoneReadBytes=*) - [ -n "$VIASH_PAR_LIMITOUTSAMONEREADBYTES" ] && ViashError Bad arguments for option \'--limitOutSAMoneReadBytes=*\': \'$VIASH_PAR_LIMITOUTSAMONEREADBYTES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LIMITOUTSAMONEREADBYTES=$(ViashRemoveFlags "$1") - shift 1 - ;; - --limitOutSJoneRead) - [ -n "$VIASH_PAR_LIMITOUTSJONEREAD" ] && ViashError Bad arguments for option \'--limitOutSJoneRead\': \'$VIASH_PAR_LIMITOUTSJONEREAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LIMITOUTSJONEREAD="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --limitOutSJoneRead. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --limitOutSJoneRead=*) - [ -n "$VIASH_PAR_LIMITOUTSJONEREAD" ] && ViashError Bad arguments for option \'--limitOutSJoneRead=*\': \'$VIASH_PAR_LIMITOUTSJONEREAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LIMITOUTSJONEREAD=$(ViashRemoveFlags "$1") - shift 1 - ;; - --limitOutSJcollapsed) - [ -n "$VIASH_PAR_LIMITOUTSJCOLLAPSED" ] && ViashError Bad arguments for option \'--limitOutSJcollapsed\': \'$VIASH_PAR_LIMITOUTSJCOLLAPSED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LIMITOUTSJCOLLAPSED="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --limitOutSJcollapsed. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --limitOutSJcollapsed=*) - [ -n "$VIASH_PAR_LIMITOUTSJCOLLAPSED" ] && ViashError Bad arguments for option \'--limitOutSJcollapsed=*\': \'$VIASH_PAR_LIMITOUTSJCOLLAPSED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LIMITOUTSJCOLLAPSED=$(ViashRemoveFlags "$1") - shift 1 - ;; - --limitBAMsortRAM) - [ -n "$VIASH_PAR_LIMITBAMSORTRAM" ] && ViashError Bad arguments for option \'--limitBAMsortRAM\': \'$VIASH_PAR_LIMITBAMSORTRAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LIMITBAMSORTRAM="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --limitBAMsortRAM. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --limitBAMsortRAM=*) - [ -n "$VIASH_PAR_LIMITBAMSORTRAM" ] && ViashError Bad arguments for option \'--limitBAMsortRAM=*\': \'$VIASH_PAR_LIMITBAMSORTRAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LIMITBAMSORTRAM=$(ViashRemoveFlags "$1") - shift 1 - ;; - --limitSjdbInsertNsj) - [ -n "$VIASH_PAR_LIMITSJDBINSERTNSJ" ] && ViashError Bad arguments for option \'--limitSjdbInsertNsj\': \'$VIASH_PAR_LIMITSJDBINSERTNSJ\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LIMITSJDBINSERTNSJ="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --limitSjdbInsertNsj. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --limitSjdbInsertNsj=*) - [ -n "$VIASH_PAR_LIMITSJDBINSERTNSJ" ] && ViashError Bad arguments for option \'--limitSjdbInsertNsj=*\': \'$VIASH_PAR_LIMITSJDBINSERTNSJ\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LIMITSJDBINSERTNSJ=$(ViashRemoveFlags "$1") - shift 1 - ;; - --limitNreadsSoft) - [ -n "$VIASH_PAR_LIMITNREADSSOFT" ] && ViashError Bad arguments for option \'--limitNreadsSoft\': \'$VIASH_PAR_LIMITNREADSSOFT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LIMITNREADSSOFT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --limitNreadsSoft. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --limitNreadsSoft=*) - [ -n "$VIASH_PAR_LIMITNREADSSOFT" ] && ViashError Bad arguments for option \'--limitNreadsSoft=*\': \'$VIASH_PAR_LIMITNREADSSOFT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LIMITNREADSSOFT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outTmpKeep) - [ -n "$VIASH_PAR_OUTTMPKEEP" ] && ViashError Bad arguments for option \'--outTmpKeep\': \'$VIASH_PAR_OUTTMPKEEP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTTMPKEEP="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outTmpKeep. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outTmpKeep=*) - [ -n "$VIASH_PAR_OUTTMPKEEP" ] && ViashError Bad arguments for option \'--outTmpKeep=*\': \'$VIASH_PAR_OUTTMPKEEP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTTMPKEEP=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outStd) - [ -n "$VIASH_PAR_OUTSTD" ] && ViashError Bad arguments for option \'--outStd\': \'$VIASH_PAR_OUTSTD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSTD="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outStd. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outStd=*) - [ -n "$VIASH_PAR_OUTSTD" ] && ViashError Bad arguments for option \'--outStd=*\': \'$VIASH_PAR_OUTSTD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSTD=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outReadsUnmapped) - [ -n "$VIASH_PAR_OUTREADSUNMAPPED" ] && ViashError Bad arguments for option \'--outReadsUnmapped\': \'$VIASH_PAR_OUTREADSUNMAPPED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTREADSUNMAPPED="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outReadsUnmapped. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outReadsUnmapped=*) - [ -n "$VIASH_PAR_OUTREADSUNMAPPED" ] && ViashError Bad arguments for option \'--outReadsUnmapped=*\': \'$VIASH_PAR_OUTREADSUNMAPPED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTREADSUNMAPPED=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outQSconversionAdd) - [ -n "$VIASH_PAR_OUTQSCONVERSIONADD" ] && ViashError Bad arguments for option \'--outQSconversionAdd\': \'$VIASH_PAR_OUTQSCONVERSIONADD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTQSCONVERSIONADD="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outQSconversionAdd. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outQSconversionAdd=*) - [ -n "$VIASH_PAR_OUTQSCONVERSIONADD" ] && ViashError Bad arguments for option \'--outQSconversionAdd=*\': \'$VIASH_PAR_OUTQSCONVERSIONADD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTQSCONVERSIONADD=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outMultimapperOrder) - [ -n "$VIASH_PAR_OUTMULTIMAPPERORDER" ] && ViashError Bad arguments for option \'--outMultimapperOrder\': \'$VIASH_PAR_OUTMULTIMAPPERORDER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTMULTIMAPPERORDER="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outMultimapperOrder. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outMultimapperOrder=*) - [ -n "$VIASH_PAR_OUTMULTIMAPPERORDER" ] && ViashError Bad arguments for option \'--outMultimapperOrder=*\': \'$VIASH_PAR_OUTMULTIMAPPERORDER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTMULTIMAPPERORDER=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outSAMtype) - if [ -z "$VIASH_PAR_OUTSAMTYPE" ]; then - VIASH_PAR_OUTSAMTYPE="$2" - else - VIASH_PAR_OUTSAMTYPE="$VIASH_PAR_OUTSAMTYPE;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMtype. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMtype=*) - if [ -z "$VIASH_PAR_OUTSAMTYPE" ]; then - VIASH_PAR_OUTSAMTYPE=$(ViashRemoveFlags "$1") - else - VIASH_PAR_OUTSAMTYPE="$VIASH_PAR_OUTSAMTYPE;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --outSAMmode) - [ -n "$VIASH_PAR_OUTSAMMODE" ] && ViashError Bad arguments for option \'--outSAMmode\': \'$VIASH_PAR_OUTSAMMODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMMODE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMmode. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMmode=*) - [ -n "$VIASH_PAR_OUTSAMMODE" ] && ViashError Bad arguments for option \'--outSAMmode=*\': \'$VIASH_PAR_OUTSAMMODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMMODE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outSAMstrandField) - [ -n "$VIASH_PAR_OUTSAMSTRANDFIELD" ] && ViashError Bad arguments for option \'--outSAMstrandField\': \'$VIASH_PAR_OUTSAMSTRANDFIELD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMSTRANDFIELD="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMstrandField. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMstrandField=*) - [ -n "$VIASH_PAR_OUTSAMSTRANDFIELD" ] && ViashError Bad arguments for option \'--outSAMstrandField=*\': \'$VIASH_PAR_OUTSAMSTRANDFIELD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMSTRANDFIELD=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outSAMattributes) - if [ -z "$VIASH_PAR_OUTSAMATTRIBUTES" ]; then - VIASH_PAR_OUTSAMATTRIBUTES="$2" - else - VIASH_PAR_OUTSAMATTRIBUTES="$VIASH_PAR_OUTSAMATTRIBUTES;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMattributes. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMattributes=*) - if [ -z "$VIASH_PAR_OUTSAMATTRIBUTES" ]; then - VIASH_PAR_OUTSAMATTRIBUTES=$(ViashRemoveFlags "$1") - else - VIASH_PAR_OUTSAMATTRIBUTES="$VIASH_PAR_OUTSAMATTRIBUTES;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --outSAMattrIHstart) - [ -n "$VIASH_PAR_OUTSAMATTRIHSTART" ] && ViashError Bad arguments for option \'--outSAMattrIHstart\': \'$VIASH_PAR_OUTSAMATTRIHSTART\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMATTRIHSTART="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMattrIHstart. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMattrIHstart=*) - [ -n "$VIASH_PAR_OUTSAMATTRIHSTART" ] && ViashError Bad arguments for option \'--outSAMattrIHstart=*\': \'$VIASH_PAR_OUTSAMATTRIHSTART\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMATTRIHSTART=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outSAMunmapped) - if [ -z "$VIASH_PAR_OUTSAMUNMAPPED" ]; then - VIASH_PAR_OUTSAMUNMAPPED="$2" - else - VIASH_PAR_OUTSAMUNMAPPED="$VIASH_PAR_OUTSAMUNMAPPED;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMunmapped. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMunmapped=*) - if [ -z "$VIASH_PAR_OUTSAMUNMAPPED" ]; then - VIASH_PAR_OUTSAMUNMAPPED=$(ViashRemoveFlags "$1") - else - VIASH_PAR_OUTSAMUNMAPPED="$VIASH_PAR_OUTSAMUNMAPPED;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --outSAMorder) - [ -n "$VIASH_PAR_OUTSAMORDER" ] && ViashError Bad arguments for option \'--outSAMorder\': \'$VIASH_PAR_OUTSAMORDER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMORDER="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMorder. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMorder=*) - [ -n "$VIASH_PAR_OUTSAMORDER" ] && ViashError Bad arguments for option \'--outSAMorder=*\': \'$VIASH_PAR_OUTSAMORDER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMORDER=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outSAMprimaryFlag) - [ -n "$VIASH_PAR_OUTSAMPRIMARYFLAG" ] && ViashError Bad arguments for option \'--outSAMprimaryFlag\': \'$VIASH_PAR_OUTSAMPRIMARYFLAG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMPRIMARYFLAG="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMprimaryFlag. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMprimaryFlag=*) - [ -n "$VIASH_PAR_OUTSAMPRIMARYFLAG" ] && ViashError Bad arguments for option \'--outSAMprimaryFlag=*\': \'$VIASH_PAR_OUTSAMPRIMARYFLAG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMPRIMARYFLAG=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outSAMreadID) - [ -n "$VIASH_PAR_OUTSAMREADID" ] && ViashError Bad arguments for option \'--outSAMreadID\': \'$VIASH_PAR_OUTSAMREADID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMREADID="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMreadID. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMreadID=*) - [ -n "$VIASH_PAR_OUTSAMREADID" ] && ViashError Bad arguments for option \'--outSAMreadID=*\': \'$VIASH_PAR_OUTSAMREADID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMREADID=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outSAMmapqUnique) - [ -n "$VIASH_PAR_OUTSAMMAPQUNIQUE" ] && ViashError Bad arguments for option \'--outSAMmapqUnique\': \'$VIASH_PAR_OUTSAMMAPQUNIQUE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMMAPQUNIQUE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMmapqUnique. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMmapqUnique=*) - [ -n "$VIASH_PAR_OUTSAMMAPQUNIQUE" ] && ViashError Bad arguments for option \'--outSAMmapqUnique=*\': \'$VIASH_PAR_OUTSAMMAPQUNIQUE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMMAPQUNIQUE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outSAMflagOR) - [ -n "$VIASH_PAR_OUTSAMFLAGOR" ] && ViashError Bad arguments for option \'--outSAMflagOR\': \'$VIASH_PAR_OUTSAMFLAGOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMFLAGOR="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMflagOR. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMflagOR=*) - [ -n "$VIASH_PAR_OUTSAMFLAGOR" ] && ViashError Bad arguments for option \'--outSAMflagOR=*\': \'$VIASH_PAR_OUTSAMFLAGOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMFLAGOR=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outSAMflagAND) - [ -n "$VIASH_PAR_OUTSAMFLAGAND" ] && ViashError Bad arguments for option \'--outSAMflagAND\': \'$VIASH_PAR_OUTSAMFLAGAND\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMFLAGAND="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMflagAND. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMflagAND=*) - [ -n "$VIASH_PAR_OUTSAMFLAGAND" ] && ViashError Bad arguments for option \'--outSAMflagAND=*\': \'$VIASH_PAR_OUTSAMFLAGAND\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMFLAGAND=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outSAMattrRGline) - if [ -z "$VIASH_PAR_OUTSAMATTRRGLINE" ]; then - VIASH_PAR_OUTSAMATTRRGLINE="$2" - else - VIASH_PAR_OUTSAMATTRRGLINE="$VIASH_PAR_OUTSAMATTRRGLINE;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMattrRGline. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMattrRGline=*) - if [ -z "$VIASH_PAR_OUTSAMATTRRGLINE" ]; then - VIASH_PAR_OUTSAMATTRRGLINE=$(ViashRemoveFlags "$1") - else - VIASH_PAR_OUTSAMATTRRGLINE="$VIASH_PAR_OUTSAMATTRRGLINE;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --outSAMheaderHD) - if [ -z "$VIASH_PAR_OUTSAMHEADERHD" ]; then - VIASH_PAR_OUTSAMHEADERHD="$2" - else - VIASH_PAR_OUTSAMHEADERHD="$VIASH_PAR_OUTSAMHEADERHD;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMheaderHD. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMheaderHD=*) - if [ -z "$VIASH_PAR_OUTSAMHEADERHD" ]; then - VIASH_PAR_OUTSAMHEADERHD=$(ViashRemoveFlags "$1") - else - VIASH_PAR_OUTSAMHEADERHD="$VIASH_PAR_OUTSAMHEADERHD;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --outSAMheaderPG) - if [ -z "$VIASH_PAR_OUTSAMHEADERPG" ]; then - VIASH_PAR_OUTSAMHEADERPG="$2" - else - VIASH_PAR_OUTSAMHEADERPG="$VIASH_PAR_OUTSAMHEADERPG;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMheaderPG. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMheaderPG=*) - if [ -z "$VIASH_PAR_OUTSAMHEADERPG" ]; then - VIASH_PAR_OUTSAMHEADERPG=$(ViashRemoveFlags "$1") - else - VIASH_PAR_OUTSAMHEADERPG="$VIASH_PAR_OUTSAMHEADERPG;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --outSAMheaderCommentFile) - [ -n "$VIASH_PAR_OUTSAMHEADERCOMMENTFILE" ] && ViashError Bad arguments for option \'--outSAMheaderCommentFile\': \'$VIASH_PAR_OUTSAMHEADERCOMMENTFILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMHEADERCOMMENTFILE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMheaderCommentFile. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMheaderCommentFile=*) - [ -n "$VIASH_PAR_OUTSAMHEADERCOMMENTFILE" ] && ViashError Bad arguments for option \'--outSAMheaderCommentFile=*\': \'$VIASH_PAR_OUTSAMHEADERCOMMENTFILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMHEADERCOMMENTFILE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outSAMfilter) - if [ -z "$VIASH_PAR_OUTSAMFILTER" ]; then - VIASH_PAR_OUTSAMFILTER="$2" - else - VIASH_PAR_OUTSAMFILTER="$VIASH_PAR_OUTSAMFILTER;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMfilter. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMfilter=*) - if [ -z "$VIASH_PAR_OUTSAMFILTER" ]; then - VIASH_PAR_OUTSAMFILTER=$(ViashRemoveFlags "$1") - else - VIASH_PAR_OUTSAMFILTER="$VIASH_PAR_OUTSAMFILTER;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --outSAMmultNmax) - [ -n "$VIASH_PAR_OUTSAMMULTNMAX" ] && ViashError Bad arguments for option \'--outSAMmultNmax\': \'$VIASH_PAR_OUTSAMMULTNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMMULTNMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMmultNmax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMmultNmax=*) - [ -n "$VIASH_PAR_OUTSAMMULTNMAX" ] && ViashError Bad arguments for option \'--outSAMmultNmax=*\': \'$VIASH_PAR_OUTSAMMULTNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMMULTNMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outSAMtlen) - [ -n "$VIASH_PAR_OUTSAMTLEN" ] && ViashError Bad arguments for option \'--outSAMtlen\': \'$VIASH_PAR_OUTSAMTLEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMTLEN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMtlen. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSAMtlen=*) - [ -n "$VIASH_PAR_OUTSAMTLEN" ] && ViashError Bad arguments for option \'--outSAMtlen=*\': \'$VIASH_PAR_OUTSAMTLEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSAMTLEN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outBAMcompression) - [ -n "$VIASH_PAR_OUTBAMCOMPRESSION" ] && ViashError Bad arguments for option \'--outBAMcompression\': \'$VIASH_PAR_OUTBAMCOMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTBAMCOMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outBAMcompression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outBAMcompression=*) - [ -n "$VIASH_PAR_OUTBAMCOMPRESSION" ] && ViashError Bad arguments for option \'--outBAMcompression=*\': \'$VIASH_PAR_OUTBAMCOMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTBAMCOMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outBAMsortingThreadN) - [ -n "$VIASH_PAR_OUTBAMSORTINGTHREADN" ] && ViashError Bad arguments for option \'--outBAMsortingThreadN\': \'$VIASH_PAR_OUTBAMSORTINGTHREADN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTBAMSORTINGTHREADN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outBAMsortingThreadN. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outBAMsortingThreadN=*) - [ -n "$VIASH_PAR_OUTBAMSORTINGTHREADN" ] && ViashError Bad arguments for option \'--outBAMsortingThreadN=*\': \'$VIASH_PAR_OUTBAMSORTINGTHREADN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTBAMSORTINGTHREADN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outBAMsortingBinsN) - [ -n "$VIASH_PAR_OUTBAMSORTINGBINSN" ] && ViashError Bad arguments for option \'--outBAMsortingBinsN\': \'$VIASH_PAR_OUTBAMSORTINGBINSN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTBAMSORTINGBINSN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outBAMsortingBinsN. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outBAMsortingBinsN=*) - [ -n "$VIASH_PAR_OUTBAMSORTINGBINSN" ] && ViashError Bad arguments for option \'--outBAMsortingBinsN=*\': \'$VIASH_PAR_OUTBAMSORTINGBINSN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTBAMSORTINGBINSN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --bamRemoveDuplicatesType) - [ -n "$VIASH_PAR_BAMREMOVEDUPLICATESTYPE" ] && ViashError Bad arguments for option \'--bamRemoveDuplicatesType\': \'$VIASH_PAR_BAMREMOVEDUPLICATESTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BAMREMOVEDUPLICATESTYPE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --bamRemoveDuplicatesType. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --bamRemoveDuplicatesType=*) - [ -n "$VIASH_PAR_BAMREMOVEDUPLICATESTYPE" ] && ViashError Bad arguments for option \'--bamRemoveDuplicatesType=*\': \'$VIASH_PAR_BAMREMOVEDUPLICATESTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BAMREMOVEDUPLICATESTYPE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --bamRemoveDuplicatesMate2basesN) - [ -n "$VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN" ] && ViashError Bad arguments for option \'--bamRemoveDuplicatesMate2basesN\': \'$VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --bamRemoveDuplicatesMate2basesN. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --bamRemoveDuplicatesMate2basesN=*) - [ -n "$VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN" ] && ViashError Bad arguments for option \'--bamRemoveDuplicatesMate2basesN=*\': \'$VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outWigType) - if [ -z "$VIASH_PAR_OUTWIGTYPE" ]; then - VIASH_PAR_OUTWIGTYPE="$2" - else - VIASH_PAR_OUTWIGTYPE="$VIASH_PAR_OUTWIGTYPE;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outWigType. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outWigType=*) - if [ -z "$VIASH_PAR_OUTWIGTYPE" ]; then - VIASH_PAR_OUTWIGTYPE=$(ViashRemoveFlags "$1") - else - VIASH_PAR_OUTWIGTYPE="$VIASH_PAR_OUTWIGTYPE;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --outWigStrand) - [ -n "$VIASH_PAR_OUTWIGSTRAND" ] && ViashError Bad arguments for option \'--outWigStrand\': \'$VIASH_PAR_OUTWIGSTRAND\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTWIGSTRAND="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outWigStrand. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outWigStrand=*) - [ -n "$VIASH_PAR_OUTWIGSTRAND" ] && ViashError Bad arguments for option \'--outWigStrand=*\': \'$VIASH_PAR_OUTWIGSTRAND\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTWIGSTRAND=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outWigReferencesPrefix) - [ -n "$VIASH_PAR_OUTWIGREFERENCESPREFIX" ] && ViashError Bad arguments for option \'--outWigReferencesPrefix\': \'$VIASH_PAR_OUTWIGREFERENCESPREFIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTWIGREFERENCESPREFIX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outWigReferencesPrefix. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outWigReferencesPrefix=*) - [ -n "$VIASH_PAR_OUTWIGREFERENCESPREFIX" ] && ViashError Bad arguments for option \'--outWigReferencesPrefix=*\': \'$VIASH_PAR_OUTWIGREFERENCESPREFIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTWIGREFERENCESPREFIX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outWigNorm) - [ -n "$VIASH_PAR_OUTWIGNORM" ] && ViashError Bad arguments for option \'--outWigNorm\': \'$VIASH_PAR_OUTWIGNORM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTWIGNORM="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outWigNorm. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outWigNorm=*) - [ -n "$VIASH_PAR_OUTWIGNORM" ] && ViashError Bad arguments for option \'--outWigNorm=*\': \'$VIASH_PAR_OUTWIGNORM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTWIGNORM=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outFilterType) - [ -n "$VIASH_PAR_OUTFILTERTYPE" ] && ViashError Bad arguments for option \'--outFilterType\': \'$VIASH_PAR_OUTFILTERTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERTYPE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterType. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outFilterType=*) - [ -n "$VIASH_PAR_OUTFILTERTYPE" ] && ViashError Bad arguments for option \'--outFilterType=*\': \'$VIASH_PAR_OUTFILTERTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERTYPE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outFilterMultimapScoreRange) - [ -n "$VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE" ] && ViashError Bad arguments for option \'--outFilterMultimapScoreRange\': \'$VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterMultimapScoreRange. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outFilterMultimapScoreRange=*) - [ -n "$VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE" ] && ViashError Bad arguments for option \'--outFilterMultimapScoreRange=*\': \'$VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outFilterMultimapNmax) - [ -n "$VIASH_PAR_OUTFILTERMULTIMAPNMAX" ] && ViashError Bad arguments for option \'--outFilterMultimapNmax\': \'$VIASH_PAR_OUTFILTERMULTIMAPNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERMULTIMAPNMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterMultimapNmax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outFilterMultimapNmax=*) - [ -n "$VIASH_PAR_OUTFILTERMULTIMAPNMAX" ] && ViashError Bad arguments for option \'--outFilterMultimapNmax=*\': \'$VIASH_PAR_OUTFILTERMULTIMAPNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERMULTIMAPNMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outFilterMismatchNmax) - [ -n "$VIASH_PAR_OUTFILTERMISMATCHNMAX" ] && ViashError Bad arguments for option \'--outFilterMismatchNmax\': \'$VIASH_PAR_OUTFILTERMISMATCHNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERMISMATCHNMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterMismatchNmax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outFilterMismatchNmax=*) - [ -n "$VIASH_PAR_OUTFILTERMISMATCHNMAX" ] && ViashError Bad arguments for option \'--outFilterMismatchNmax=*\': \'$VIASH_PAR_OUTFILTERMISMATCHNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERMISMATCHNMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outFilterMismatchNoverLmax) - [ -n "$VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX" ] && ViashError Bad arguments for option \'--outFilterMismatchNoverLmax\': \'$VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterMismatchNoverLmax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outFilterMismatchNoverLmax=*) - [ -n "$VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX" ] && ViashError Bad arguments for option \'--outFilterMismatchNoverLmax=*\': \'$VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outFilterMismatchNoverReadLmax) - [ -n "$VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX" ] && ViashError Bad arguments for option \'--outFilterMismatchNoverReadLmax\': \'$VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterMismatchNoverReadLmax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outFilterMismatchNoverReadLmax=*) - [ -n "$VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX" ] && ViashError Bad arguments for option \'--outFilterMismatchNoverReadLmax=*\': \'$VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outFilterScoreMin) - [ -n "$VIASH_PAR_OUTFILTERSCOREMIN" ] && ViashError Bad arguments for option \'--outFilterScoreMin\': \'$VIASH_PAR_OUTFILTERSCOREMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERSCOREMIN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterScoreMin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outFilterScoreMin=*) - [ -n "$VIASH_PAR_OUTFILTERSCOREMIN" ] && ViashError Bad arguments for option \'--outFilterScoreMin=*\': \'$VIASH_PAR_OUTFILTERSCOREMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERSCOREMIN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outFilterScoreMinOverLread) - [ -n "$VIASH_PAR_OUTFILTERSCOREMINOVERLREAD" ] && ViashError Bad arguments for option \'--outFilterScoreMinOverLread\': \'$VIASH_PAR_OUTFILTERSCOREMINOVERLREAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERSCOREMINOVERLREAD="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterScoreMinOverLread. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outFilterScoreMinOverLread=*) - [ -n "$VIASH_PAR_OUTFILTERSCOREMINOVERLREAD" ] && ViashError Bad arguments for option \'--outFilterScoreMinOverLread=*\': \'$VIASH_PAR_OUTFILTERSCOREMINOVERLREAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERSCOREMINOVERLREAD=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outFilterMatchNmin) - [ -n "$VIASH_PAR_OUTFILTERMATCHNMIN" ] && ViashError Bad arguments for option \'--outFilterMatchNmin\': \'$VIASH_PAR_OUTFILTERMATCHNMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERMATCHNMIN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterMatchNmin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outFilterMatchNmin=*) - [ -n "$VIASH_PAR_OUTFILTERMATCHNMIN" ] && ViashError Bad arguments for option \'--outFilterMatchNmin=*\': \'$VIASH_PAR_OUTFILTERMATCHNMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERMATCHNMIN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outFilterMatchNminOverLread) - [ -n "$VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD" ] && ViashError Bad arguments for option \'--outFilterMatchNminOverLread\': \'$VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterMatchNminOverLread. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outFilterMatchNminOverLread=*) - [ -n "$VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD" ] && ViashError Bad arguments for option \'--outFilterMatchNminOverLread=*\': \'$VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outFilterIntronMotifs) - [ -n "$VIASH_PAR_OUTFILTERINTRONMOTIFS" ] && ViashError Bad arguments for option \'--outFilterIntronMotifs\': \'$VIASH_PAR_OUTFILTERINTRONMOTIFS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERINTRONMOTIFS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterIntronMotifs. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outFilterIntronMotifs=*) - [ -n "$VIASH_PAR_OUTFILTERINTRONMOTIFS" ] && ViashError Bad arguments for option \'--outFilterIntronMotifs=*\': \'$VIASH_PAR_OUTFILTERINTRONMOTIFS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERINTRONMOTIFS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outFilterIntronStrands) - [ -n "$VIASH_PAR_OUTFILTERINTRONSTRANDS" ] && ViashError Bad arguments for option \'--outFilterIntronStrands\': \'$VIASH_PAR_OUTFILTERINTRONSTRANDS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERINTRONSTRANDS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterIntronStrands. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outFilterIntronStrands=*) - [ -n "$VIASH_PAR_OUTFILTERINTRONSTRANDS" ] && ViashError Bad arguments for option \'--outFilterIntronStrands=*\': \'$VIASH_PAR_OUTFILTERINTRONSTRANDS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTFILTERINTRONSTRANDS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outSJtype) - [ -n "$VIASH_PAR_OUTSJTYPE" ] && ViashError Bad arguments for option \'--outSJtype\': \'$VIASH_PAR_OUTSJTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSJTYPE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSJtype. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSJtype=*) - [ -n "$VIASH_PAR_OUTSJTYPE" ] && ViashError Bad arguments for option \'--outSJtype=*\': \'$VIASH_PAR_OUTSJTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSJTYPE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outSJfilterReads) - [ -n "$VIASH_PAR_OUTSJFILTERREADS" ] && ViashError Bad arguments for option \'--outSJfilterReads\': \'$VIASH_PAR_OUTSJFILTERREADS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSJFILTERREADS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSJfilterReads. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSJfilterReads=*) - [ -n "$VIASH_PAR_OUTSJFILTERREADS" ] && ViashError Bad arguments for option \'--outSJfilterReads=*\': \'$VIASH_PAR_OUTSJFILTERREADS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTSJFILTERREADS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --outSJfilterOverhangMin) - if [ -z "$VIASH_PAR_OUTSJFILTEROVERHANGMIN" ]; then - VIASH_PAR_OUTSJFILTEROVERHANGMIN="$2" - else - VIASH_PAR_OUTSJFILTEROVERHANGMIN="$VIASH_PAR_OUTSJFILTEROVERHANGMIN;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSJfilterOverhangMin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSJfilterOverhangMin=*) - if [ -z "$VIASH_PAR_OUTSJFILTEROVERHANGMIN" ]; then - VIASH_PAR_OUTSJFILTEROVERHANGMIN=$(ViashRemoveFlags "$1") - else - VIASH_PAR_OUTSJFILTEROVERHANGMIN="$VIASH_PAR_OUTSJFILTEROVERHANGMIN;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --outSJfilterCountUniqueMin) - if [ -z "$VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN" ]; then - VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN="$2" - else - VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN="$VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSJfilterCountUniqueMin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSJfilterCountUniqueMin=*) - if [ -z "$VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN" ]; then - VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN=$(ViashRemoveFlags "$1") - else - VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN="$VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --outSJfilterCountTotalMin) - if [ -z "$VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN" ]; then - VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN="$2" - else - VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN="$VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSJfilterCountTotalMin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSJfilterCountTotalMin=*) - if [ -z "$VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN" ]; then - VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN=$(ViashRemoveFlags "$1") - else - VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN="$VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --outSJfilterDistToOtherSJmin) - if [ -z "$VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN" ]; then - VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN="$2" - else - VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN="$VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSJfilterDistToOtherSJmin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSJfilterDistToOtherSJmin=*) - if [ -z "$VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN" ]; then - VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN=$(ViashRemoveFlags "$1") - else - VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN="$VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --outSJfilterIntronMaxVsReadN) - if [ -z "$VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN" ]; then - VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN="$2" - else - VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN="$VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSJfilterIntronMaxVsReadN. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --outSJfilterIntronMaxVsReadN=*) - if [ -z "$VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN" ]; then - VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN=$(ViashRemoveFlags "$1") - else - VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN="$VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --scoreGap) - [ -n "$VIASH_PAR_SCOREGAP" ] && ViashError Bad arguments for option \'--scoreGap\': \'$VIASH_PAR_SCOREGAP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCOREGAP="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreGap. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --scoreGap=*) - [ -n "$VIASH_PAR_SCOREGAP" ] && ViashError Bad arguments for option \'--scoreGap=*\': \'$VIASH_PAR_SCOREGAP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCOREGAP=$(ViashRemoveFlags "$1") - shift 1 - ;; - --scoreGapNoncan) - [ -n "$VIASH_PAR_SCOREGAPNONCAN" ] && ViashError Bad arguments for option \'--scoreGapNoncan\': \'$VIASH_PAR_SCOREGAPNONCAN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCOREGAPNONCAN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreGapNoncan. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --scoreGapNoncan=*) - [ -n "$VIASH_PAR_SCOREGAPNONCAN" ] && ViashError Bad arguments for option \'--scoreGapNoncan=*\': \'$VIASH_PAR_SCOREGAPNONCAN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCOREGAPNONCAN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --scoreGapGCAG) - [ -n "$VIASH_PAR_SCOREGAPGCAG" ] && ViashError Bad arguments for option \'--scoreGapGCAG\': \'$VIASH_PAR_SCOREGAPGCAG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCOREGAPGCAG="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreGapGCAG. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --scoreGapGCAG=*) - [ -n "$VIASH_PAR_SCOREGAPGCAG" ] && ViashError Bad arguments for option \'--scoreGapGCAG=*\': \'$VIASH_PAR_SCOREGAPGCAG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCOREGAPGCAG=$(ViashRemoveFlags "$1") - shift 1 - ;; - --scoreGapATAC) - [ -n "$VIASH_PAR_SCOREGAPATAC" ] && ViashError Bad arguments for option \'--scoreGapATAC\': \'$VIASH_PAR_SCOREGAPATAC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCOREGAPATAC="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreGapATAC. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --scoreGapATAC=*) - [ -n "$VIASH_PAR_SCOREGAPATAC" ] && ViashError Bad arguments for option \'--scoreGapATAC=*\': \'$VIASH_PAR_SCOREGAPATAC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCOREGAPATAC=$(ViashRemoveFlags "$1") - shift 1 - ;; - --scoreGenomicLengthLog2scale) - [ -n "$VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE" ] && ViashError Bad arguments for option \'--scoreGenomicLengthLog2scale\': \'$VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreGenomicLengthLog2scale. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --scoreGenomicLengthLog2scale=*) - [ -n "$VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE" ] && ViashError Bad arguments for option \'--scoreGenomicLengthLog2scale=*\': \'$VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --scoreDelOpen) - [ -n "$VIASH_PAR_SCOREDELOPEN" ] && ViashError Bad arguments for option \'--scoreDelOpen\': \'$VIASH_PAR_SCOREDELOPEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCOREDELOPEN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreDelOpen. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --scoreDelOpen=*) - [ -n "$VIASH_PAR_SCOREDELOPEN" ] && ViashError Bad arguments for option \'--scoreDelOpen=*\': \'$VIASH_PAR_SCOREDELOPEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCOREDELOPEN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --scoreDelBase) - [ -n "$VIASH_PAR_SCOREDELBASE" ] && ViashError Bad arguments for option \'--scoreDelBase\': \'$VIASH_PAR_SCOREDELBASE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCOREDELBASE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreDelBase. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --scoreDelBase=*) - [ -n "$VIASH_PAR_SCOREDELBASE" ] && ViashError Bad arguments for option \'--scoreDelBase=*\': \'$VIASH_PAR_SCOREDELBASE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCOREDELBASE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --scoreInsOpen) - [ -n "$VIASH_PAR_SCOREINSOPEN" ] && ViashError Bad arguments for option \'--scoreInsOpen\': \'$VIASH_PAR_SCOREINSOPEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCOREINSOPEN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreInsOpen. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --scoreInsOpen=*) - [ -n "$VIASH_PAR_SCOREINSOPEN" ] && ViashError Bad arguments for option \'--scoreInsOpen=*\': \'$VIASH_PAR_SCOREINSOPEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCOREINSOPEN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --scoreInsBase) - [ -n "$VIASH_PAR_SCOREINSBASE" ] && ViashError Bad arguments for option \'--scoreInsBase\': \'$VIASH_PAR_SCOREINSBASE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCOREINSBASE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreInsBase. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --scoreInsBase=*) - [ -n "$VIASH_PAR_SCOREINSBASE" ] && ViashError Bad arguments for option \'--scoreInsBase=*\': \'$VIASH_PAR_SCOREINSBASE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCOREINSBASE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --scoreStitchSJshift) - [ -n "$VIASH_PAR_SCORESTITCHSJSHIFT" ] && ViashError Bad arguments for option \'--scoreStitchSJshift\': \'$VIASH_PAR_SCORESTITCHSJSHIFT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCORESTITCHSJSHIFT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreStitchSJshift. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --scoreStitchSJshift=*) - [ -n "$VIASH_PAR_SCORESTITCHSJSHIFT" ] && ViashError Bad arguments for option \'--scoreStitchSJshift=*\': \'$VIASH_PAR_SCORESTITCHSJSHIFT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCORESTITCHSJSHIFT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --seedSearchStartLmax) - [ -n "$VIASH_PAR_SEEDSEARCHSTARTLMAX" ] && ViashError Bad arguments for option \'--seedSearchStartLmax\': \'$VIASH_PAR_SEEDSEARCHSTARTLMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEEDSEARCHSTARTLMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --seedSearchStartLmax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --seedSearchStartLmax=*) - [ -n "$VIASH_PAR_SEEDSEARCHSTARTLMAX" ] && ViashError Bad arguments for option \'--seedSearchStartLmax=*\': \'$VIASH_PAR_SEEDSEARCHSTARTLMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEEDSEARCHSTARTLMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --seedSearchStartLmaxOverLread) - [ -n "$VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD" ] && ViashError Bad arguments for option \'--seedSearchStartLmaxOverLread\': \'$VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --seedSearchStartLmaxOverLread. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --seedSearchStartLmaxOverLread=*) - [ -n "$VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD" ] && ViashError Bad arguments for option \'--seedSearchStartLmaxOverLread=*\': \'$VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD=$(ViashRemoveFlags "$1") - shift 1 - ;; - --seedSearchLmax) - [ -n "$VIASH_PAR_SEEDSEARCHLMAX" ] && ViashError Bad arguments for option \'--seedSearchLmax\': \'$VIASH_PAR_SEEDSEARCHLMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEEDSEARCHLMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --seedSearchLmax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --seedSearchLmax=*) - [ -n "$VIASH_PAR_SEEDSEARCHLMAX" ] && ViashError Bad arguments for option \'--seedSearchLmax=*\': \'$VIASH_PAR_SEEDSEARCHLMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEEDSEARCHLMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --seedMultimapNmax) - [ -n "$VIASH_PAR_SEEDMULTIMAPNMAX" ] && ViashError Bad arguments for option \'--seedMultimapNmax\': \'$VIASH_PAR_SEEDMULTIMAPNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEEDMULTIMAPNMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --seedMultimapNmax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --seedMultimapNmax=*) - [ -n "$VIASH_PAR_SEEDMULTIMAPNMAX" ] && ViashError Bad arguments for option \'--seedMultimapNmax=*\': \'$VIASH_PAR_SEEDMULTIMAPNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEEDMULTIMAPNMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --seedPerReadNmax) - [ -n "$VIASH_PAR_SEEDPERREADNMAX" ] && ViashError Bad arguments for option \'--seedPerReadNmax\': \'$VIASH_PAR_SEEDPERREADNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEEDPERREADNMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --seedPerReadNmax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --seedPerReadNmax=*) - [ -n "$VIASH_PAR_SEEDPERREADNMAX" ] && ViashError Bad arguments for option \'--seedPerReadNmax=*\': \'$VIASH_PAR_SEEDPERREADNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEEDPERREADNMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --seedPerWindowNmax) - [ -n "$VIASH_PAR_SEEDPERWINDOWNMAX" ] && ViashError Bad arguments for option \'--seedPerWindowNmax\': \'$VIASH_PAR_SEEDPERWINDOWNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEEDPERWINDOWNMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --seedPerWindowNmax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --seedPerWindowNmax=*) - [ -n "$VIASH_PAR_SEEDPERWINDOWNMAX" ] && ViashError Bad arguments for option \'--seedPerWindowNmax=*\': \'$VIASH_PAR_SEEDPERWINDOWNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEEDPERWINDOWNMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --seedNoneLociPerWindow) - [ -n "$VIASH_PAR_SEEDNONELOCIPERWINDOW" ] && ViashError Bad arguments for option \'--seedNoneLociPerWindow\': \'$VIASH_PAR_SEEDNONELOCIPERWINDOW\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEEDNONELOCIPERWINDOW="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --seedNoneLociPerWindow. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --seedNoneLociPerWindow=*) - [ -n "$VIASH_PAR_SEEDNONELOCIPERWINDOW" ] && ViashError Bad arguments for option \'--seedNoneLociPerWindow=*\': \'$VIASH_PAR_SEEDNONELOCIPERWINDOW\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEEDNONELOCIPERWINDOW=$(ViashRemoveFlags "$1") - shift 1 - ;; - --seedSplitMin) - [ -n "$VIASH_PAR_SEEDSPLITMIN" ] && ViashError Bad arguments for option \'--seedSplitMin\': \'$VIASH_PAR_SEEDSPLITMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEEDSPLITMIN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --seedSplitMin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --seedSplitMin=*) - [ -n "$VIASH_PAR_SEEDSPLITMIN" ] && ViashError Bad arguments for option \'--seedSplitMin=*\': \'$VIASH_PAR_SEEDSPLITMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEEDSPLITMIN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --seedMapMin) - [ -n "$VIASH_PAR_SEEDMAPMIN" ] && ViashError Bad arguments for option \'--seedMapMin\': \'$VIASH_PAR_SEEDMAPMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEEDMAPMIN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --seedMapMin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --seedMapMin=*) - [ -n "$VIASH_PAR_SEEDMAPMIN" ] && ViashError Bad arguments for option \'--seedMapMin=*\': \'$VIASH_PAR_SEEDMAPMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEEDMAPMIN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --alignIntronMin) - [ -n "$VIASH_PAR_ALIGNINTRONMIN" ] && ViashError Bad arguments for option \'--alignIntronMin\': \'$VIASH_PAR_ALIGNINTRONMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNINTRONMIN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignIntronMin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --alignIntronMin=*) - [ -n "$VIASH_PAR_ALIGNINTRONMIN" ] && ViashError Bad arguments for option \'--alignIntronMin=*\': \'$VIASH_PAR_ALIGNINTRONMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNINTRONMIN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --alignIntronMax) - [ -n "$VIASH_PAR_ALIGNINTRONMAX" ] && ViashError Bad arguments for option \'--alignIntronMax\': \'$VIASH_PAR_ALIGNINTRONMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNINTRONMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignIntronMax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --alignIntronMax=*) - [ -n "$VIASH_PAR_ALIGNINTRONMAX" ] && ViashError Bad arguments for option \'--alignIntronMax=*\': \'$VIASH_PAR_ALIGNINTRONMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNINTRONMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --alignMatesGapMax) - [ -n "$VIASH_PAR_ALIGNMATESGAPMAX" ] && ViashError Bad arguments for option \'--alignMatesGapMax\': \'$VIASH_PAR_ALIGNMATESGAPMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNMATESGAPMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignMatesGapMax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --alignMatesGapMax=*) - [ -n "$VIASH_PAR_ALIGNMATESGAPMAX" ] && ViashError Bad arguments for option \'--alignMatesGapMax=*\': \'$VIASH_PAR_ALIGNMATESGAPMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNMATESGAPMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --alignSJoverhangMin) - [ -n "$VIASH_PAR_ALIGNSJOVERHANGMIN" ] && ViashError Bad arguments for option \'--alignSJoverhangMin\': \'$VIASH_PAR_ALIGNSJOVERHANGMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNSJOVERHANGMIN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignSJoverhangMin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --alignSJoverhangMin=*) - [ -n "$VIASH_PAR_ALIGNSJOVERHANGMIN" ] && ViashError Bad arguments for option \'--alignSJoverhangMin=*\': \'$VIASH_PAR_ALIGNSJOVERHANGMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNSJOVERHANGMIN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --alignSJstitchMismatchNmax) - if [ -z "$VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX" ]; then - VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX="$2" - else - VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX="$VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignSJstitchMismatchNmax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --alignSJstitchMismatchNmax=*) - if [ -z "$VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX" ]; then - VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX=$(ViashRemoveFlags "$1") - else - VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX="$VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --alignSJDBoverhangMin) - [ -n "$VIASH_PAR_ALIGNSJDBOVERHANGMIN" ] && ViashError Bad arguments for option \'--alignSJDBoverhangMin\': \'$VIASH_PAR_ALIGNSJDBOVERHANGMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNSJDBOVERHANGMIN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignSJDBoverhangMin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --alignSJDBoverhangMin=*) - [ -n "$VIASH_PAR_ALIGNSJDBOVERHANGMIN" ] && ViashError Bad arguments for option \'--alignSJDBoverhangMin=*\': \'$VIASH_PAR_ALIGNSJDBOVERHANGMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNSJDBOVERHANGMIN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --alignSplicedMateMapLmin) - [ -n "$VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN" ] && ViashError Bad arguments for option \'--alignSplicedMateMapLmin\': \'$VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignSplicedMateMapLmin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --alignSplicedMateMapLmin=*) - [ -n "$VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN" ] && ViashError Bad arguments for option \'--alignSplicedMateMapLmin=*\': \'$VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --alignSplicedMateMapLminOverLmate) - [ -n "$VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE" ] && ViashError Bad arguments for option \'--alignSplicedMateMapLminOverLmate\': \'$VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignSplicedMateMapLminOverLmate. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --alignSplicedMateMapLminOverLmate=*) - [ -n "$VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE" ] && ViashError Bad arguments for option \'--alignSplicedMateMapLminOverLmate=*\': \'$VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --alignWindowsPerReadNmax) - [ -n "$VIASH_PAR_ALIGNWINDOWSPERREADNMAX" ] && ViashError Bad arguments for option \'--alignWindowsPerReadNmax\': \'$VIASH_PAR_ALIGNWINDOWSPERREADNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNWINDOWSPERREADNMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignWindowsPerReadNmax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --alignWindowsPerReadNmax=*) - [ -n "$VIASH_PAR_ALIGNWINDOWSPERREADNMAX" ] && ViashError Bad arguments for option \'--alignWindowsPerReadNmax=*\': \'$VIASH_PAR_ALIGNWINDOWSPERREADNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNWINDOWSPERREADNMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --alignTranscriptsPerWindowNmax) - [ -n "$VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX" ] && ViashError Bad arguments for option \'--alignTranscriptsPerWindowNmax\': \'$VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignTranscriptsPerWindowNmax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --alignTranscriptsPerWindowNmax=*) - [ -n "$VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX" ] && ViashError Bad arguments for option \'--alignTranscriptsPerWindowNmax=*\': \'$VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --alignTranscriptsPerReadNmax) - [ -n "$VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX" ] && ViashError Bad arguments for option \'--alignTranscriptsPerReadNmax\': \'$VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignTranscriptsPerReadNmax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --alignTranscriptsPerReadNmax=*) - [ -n "$VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX" ] && ViashError Bad arguments for option \'--alignTranscriptsPerReadNmax=*\': \'$VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --alignEndsType) - [ -n "$VIASH_PAR_ALIGNENDSTYPE" ] && ViashError Bad arguments for option \'--alignEndsType\': \'$VIASH_PAR_ALIGNENDSTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNENDSTYPE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignEndsType. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --alignEndsType=*) - [ -n "$VIASH_PAR_ALIGNENDSTYPE" ] && ViashError Bad arguments for option \'--alignEndsType=*\': \'$VIASH_PAR_ALIGNENDSTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNENDSTYPE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --alignEndsProtrude) - [ -n "$VIASH_PAR_ALIGNENDSPROTRUDE" ] && ViashError Bad arguments for option \'--alignEndsProtrude\': \'$VIASH_PAR_ALIGNENDSPROTRUDE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNENDSPROTRUDE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignEndsProtrude. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --alignEndsProtrude=*) - [ -n "$VIASH_PAR_ALIGNENDSPROTRUDE" ] && ViashError Bad arguments for option \'--alignEndsProtrude=*\': \'$VIASH_PAR_ALIGNENDSPROTRUDE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNENDSPROTRUDE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --alignSoftClipAtReferenceEnds) - [ -n "$VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS" ] && ViashError Bad arguments for option \'--alignSoftClipAtReferenceEnds\': \'$VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignSoftClipAtReferenceEnds. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --alignSoftClipAtReferenceEnds=*) - [ -n "$VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS" ] && ViashError Bad arguments for option \'--alignSoftClipAtReferenceEnds=*\': \'$VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --alignInsertionFlush) - [ -n "$VIASH_PAR_ALIGNINSERTIONFLUSH" ] && ViashError Bad arguments for option \'--alignInsertionFlush\': \'$VIASH_PAR_ALIGNINSERTIONFLUSH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNINSERTIONFLUSH="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignInsertionFlush. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --alignInsertionFlush=*) - [ -n "$VIASH_PAR_ALIGNINSERTIONFLUSH" ] && ViashError Bad arguments for option \'--alignInsertionFlush=*\': \'$VIASH_PAR_ALIGNINSERTIONFLUSH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALIGNINSERTIONFLUSH=$(ViashRemoveFlags "$1") - shift 1 - ;; - --peOverlapNbasesMin) - [ -n "$VIASH_PAR_PEOVERLAPNBASESMIN" ] && ViashError Bad arguments for option \'--peOverlapNbasesMin\': \'$VIASH_PAR_PEOVERLAPNBASESMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_PEOVERLAPNBASESMIN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --peOverlapNbasesMin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --peOverlapNbasesMin=*) - [ -n "$VIASH_PAR_PEOVERLAPNBASESMIN" ] && ViashError Bad arguments for option \'--peOverlapNbasesMin=*\': \'$VIASH_PAR_PEOVERLAPNBASESMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_PEOVERLAPNBASESMIN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --peOverlapMMp) - [ -n "$VIASH_PAR_PEOVERLAPMMP" ] && ViashError Bad arguments for option \'--peOverlapMMp\': \'$VIASH_PAR_PEOVERLAPMMP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_PEOVERLAPMMP="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --peOverlapMMp. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --peOverlapMMp=*) - [ -n "$VIASH_PAR_PEOVERLAPMMP" ] && ViashError Bad arguments for option \'--peOverlapMMp=*\': \'$VIASH_PAR_PEOVERLAPMMP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_PEOVERLAPMMP=$(ViashRemoveFlags "$1") - shift 1 - ;; - --winAnchorMultimapNmax) - [ -n "$VIASH_PAR_WINANCHORMULTIMAPNMAX" ] && ViashError Bad arguments for option \'--winAnchorMultimapNmax\': \'$VIASH_PAR_WINANCHORMULTIMAPNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_WINANCHORMULTIMAPNMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --winAnchorMultimapNmax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --winAnchorMultimapNmax=*) - [ -n "$VIASH_PAR_WINANCHORMULTIMAPNMAX" ] && ViashError Bad arguments for option \'--winAnchorMultimapNmax=*\': \'$VIASH_PAR_WINANCHORMULTIMAPNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_WINANCHORMULTIMAPNMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --winBinNbits) - [ -n "$VIASH_PAR_WINBINNBITS" ] && ViashError Bad arguments for option \'--winBinNbits\': \'$VIASH_PAR_WINBINNBITS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_WINBINNBITS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --winBinNbits. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --winBinNbits=*) - [ -n "$VIASH_PAR_WINBINNBITS" ] && ViashError Bad arguments for option \'--winBinNbits=*\': \'$VIASH_PAR_WINBINNBITS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_WINBINNBITS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --winAnchorDistNbins) - [ -n "$VIASH_PAR_WINANCHORDISTNBINS" ] && ViashError Bad arguments for option \'--winAnchorDistNbins\': \'$VIASH_PAR_WINANCHORDISTNBINS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_WINANCHORDISTNBINS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --winAnchorDistNbins. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --winAnchorDistNbins=*) - [ -n "$VIASH_PAR_WINANCHORDISTNBINS" ] && ViashError Bad arguments for option \'--winAnchorDistNbins=*\': \'$VIASH_PAR_WINANCHORDISTNBINS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_WINANCHORDISTNBINS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --winFlankNbins) - [ -n "$VIASH_PAR_WINFLANKNBINS" ] && ViashError Bad arguments for option \'--winFlankNbins\': \'$VIASH_PAR_WINFLANKNBINS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_WINFLANKNBINS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --winFlankNbins. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --winFlankNbins=*) - [ -n "$VIASH_PAR_WINFLANKNBINS" ] && ViashError Bad arguments for option \'--winFlankNbins=*\': \'$VIASH_PAR_WINFLANKNBINS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_WINFLANKNBINS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --winReadCoverageRelativeMin) - [ -n "$VIASH_PAR_WINREADCOVERAGERELATIVEMIN" ] && ViashError Bad arguments for option \'--winReadCoverageRelativeMin\': \'$VIASH_PAR_WINREADCOVERAGERELATIVEMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_WINREADCOVERAGERELATIVEMIN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --winReadCoverageRelativeMin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --winReadCoverageRelativeMin=*) - [ -n "$VIASH_PAR_WINREADCOVERAGERELATIVEMIN" ] && ViashError Bad arguments for option \'--winReadCoverageRelativeMin=*\': \'$VIASH_PAR_WINREADCOVERAGERELATIVEMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_WINREADCOVERAGERELATIVEMIN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --winReadCoverageBasesMin) - [ -n "$VIASH_PAR_WINREADCOVERAGEBASESMIN" ] && ViashError Bad arguments for option \'--winReadCoverageBasesMin\': \'$VIASH_PAR_WINREADCOVERAGEBASESMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_WINREADCOVERAGEBASESMIN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --winReadCoverageBasesMin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --winReadCoverageBasesMin=*) - [ -n "$VIASH_PAR_WINREADCOVERAGEBASESMIN" ] && ViashError Bad arguments for option \'--winReadCoverageBasesMin=*\': \'$VIASH_PAR_WINREADCOVERAGEBASESMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_WINREADCOVERAGEBASESMIN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --chimOutType) - if [ -z "$VIASH_PAR_CHIMOUTTYPE" ]; then - VIASH_PAR_CHIMOUTTYPE="$2" - else - VIASH_PAR_CHIMOUTTYPE="$VIASH_PAR_CHIMOUTTYPE;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimOutType. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --chimOutType=*) - if [ -z "$VIASH_PAR_CHIMOUTTYPE" ]; then - VIASH_PAR_CHIMOUTTYPE=$(ViashRemoveFlags "$1") - else - VIASH_PAR_CHIMOUTTYPE="$VIASH_PAR_CHIMOUTTYPE;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --chimSegmentMin) - [ -n "$VIASH_PAR_CHIMSEGMENTMIN" ] && ViashError Bad arguments for option \'--chimSegmentMin\': \'$VIASH_PAR_CHIMSEGMENTMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMSEGMENTMIN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimSegmentMin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --chimSegmentMin=*) - [ -n "$VIASH_PAR_CHIMSEGMENTMIN" ] && ViashError Bad arguments for option \'--chimSegmentMin=*\': \'$VIASH_PAR_CHIMSEGMENTMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMSEGMENTMIN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --chimScoreMin) - [ -n "$VIASH_PAR_CHIMSCOREMIN" ] && ViashError Bad arguments for option \'--chimScoreMin\': \'$VIASH_PAR_CHIMSCOREMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMSCOREMIN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimScoreMin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --chimScoreMin=*) - [ -n "$VIASH_PAR_CHIMSCOREMIN" ] && ViashError Bad arguments for option \'--chimScoreMin=*\': \'$VIASH_PAR_CHIMSCOREMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMSCOREMIN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --chimScoreDropMax) - [ -n "$VIASH_PAR_CHIMSCOREDROPMAX" ] && ViashError Bad arguments for option \'--chimScoreDropMax\': \'$VIASH_PAR_CHIMSCOREDROPMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMSCOREDROPMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimScoreDropMax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --chimScoreDropMax=*) - [ -n "$VIASH_PAR_CHIMSCOREDROPMAX" ] && ViashError Bad arguments for option \'--chimScoreDropMax=*\': \'$VIASH_PAR_CHIMSCOREDROPMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMSCOREDROPMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --chimScoreSeparation) - [ -n "$VIASH_PAR_CHIMSCORESEPARATION" ] && ViashError Bad arguments for option \'--chimScoreSeparation\': \'$VIASH_PAR_CHIMSCORESEPARATION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMSCORESEPARATION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimScoreSeparation. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --chimScoreSeparation=*) - [ -n "$VIASH_PAR_CHIMSCORESEPARATION" ] && ViashError Bad arguments for option \'--chimScoreSeparation=*\': \'$VIASH_PAR_CHIMSCORESEPARATION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMSCORESEPARATION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --chimScoreJunctionNonGTAG) - [ -n "$VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG" ] && ViashError Bad arguments for option \'--chimScoreJunctionNonGTAG\': \'$VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimScoreJunctionNonGTAG. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --chimScoreJunctionNonGTAG=*) - [ -n "$VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG" ] && ViashError Bad arguments for option \'--chimScoreJunctionNonGTAG=*\': \'$VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG=$(ViashRemoveFlags "$1") - shift 1 - ;; - --chimJunctionOverhangMin) - [ -n "$VIASH_PAR_CHIMJUNCTIONOVERHANGMIN" ] && ViashError Bad arguments for option \'--chimJunctionOverhangMin\': \'$VIASH_PAR_CHIMJUNCTIONOVERHANGMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMJUNCTIONOVERHANGMIN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimJunctionOverhangMin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --chimJunctionOverhangMin=*) - [ -n "$VIASH_PAR_CHIMJUNCTIONOVERHANGMIN" ] && ViashError Bad arguments for option \'--chimJunctionOverhangMin=*\': \'$VIASH_PAR_CHIMJUNCTIONOVERHANGMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMJUNCTIONOVERHANGMIN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --chimSegmentReadGapMax) - [ -n "$VIASH_PAR_CHIMSEGMENTREADGAPMAX" ] && ViashError Bad arguments for option \'--chimSegmentReadGapMax\': \'$VIASH_PAR_CHIMSEGMENTREADGAPMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMSEGMENTREADGAPMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimSegmentReadGapMax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --chimSegmentReadGapMax=*) - [ -n "$VIASH_PAR_CHIMSEGMENTREADGAPMAX" ] && ViashError Bad arguments for option \'--chimSegmentReadGapMax=*\': \'$VIASH_PAR_CHIMSEGMENTREADGAPMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMSEGMENTREADGAPMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --chimFilter) - if [ -z "$VIASH_PAR_CHIMFILTER" ]; then - VIASH_PAR_CHIMFILTER="$2" - else - VIASH_PAR_CHIMFILTER="$VIASH_PAR_CHIMFILTER;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimFilter. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --chimFilter=*) - if [ -z "$VIASH_PAR_CHIMFILTER" ]; then - VIASH_PAR_CHIMFILTER=$(ViashRemoveFlags "$1") - else - VIASH_PAR_CHIMFILTER="$VIASH_PAR_CHIMFILTER;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --chimMainSegmentMultNmax) - [ -n "$VIASH_PAR_CHIMMAINSEGMENTMULTNMAX" ] && ViashError Bad arguments for option \'--chimMainSegmentMultNmax\': \'$VIASH_PAR_CHIMMAINSEGMENTMULTNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMMAINSEGMENTMULTNMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimMainSegmentMultNmax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --chimMainSegmentMultNmax=*) - [ -n "$VIASH_PAR_CHIMMAINSEGMENTMULTNMAX" ] && ViashError Bad arguments for option \'--chimMainSegmentMultNmax=*\': \'$VIASH_PAR_CHIMMAINSEGMENTMULTNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMMAINSEGMENTMULTNMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --chimMultimapNmax) - [ -n "$VIASH_PAR_CHIMMULTIMAPNMAX" ] && ViashError Bad arguments for option \'--chimMultimapNmax\': \'$VIASH_PAR_CHIMMULTIMAPNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMMULTIMAPNMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimMultimapNmax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --chimMultimapNmax=*) - [ -n "$VIASH_PAR_CHIMMULTIMAPNMAX" ] && ViashError Bad arguments for option \'--chimMultimapNmax=*\': \'$VIASH_PAR_CHIMMULTIMAPNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMMULTIMAPNMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --chimMultimapScoreRange) - [ -n "$VIASH_PAR_CHIMMULTIMAPSCORERANGE" ] && ViashError Bad arguments for option \'--chimMultimapScoreRange\': \'$VIASH_PAR_CHIMMULTIMAPSCORERANGE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMMULTIMAPSCORERANGE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimMultimapScoreRange. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --chimMultimapScoreRange=*) - [ -n "$VIASH_PAR_CHIMMULTIMAPSCORERANGE" ] && ViashError Bad arguments for option \'--chimMultimapScoreRange=*\': \'$VIASH_PAR_CHIMMULTIMAPSCORERANGE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMMULTIMAPSCORERANGE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --chimNonchimScoreDropMin) - [ -n "$VIASH_PAR_CHIMNONCHIMSCOREDROPMIN" ] && ViashError Bad arguments for option \'--chimNonchimScoreDropMin\': \'$VIASH_PAR_CHIMNONCHIMSCOREDROPMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMNONCHIMSCOREDROPMIN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimNonchimScoreDropMin. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --chimNonchimScoreDropMin=*) - [ -n "$VIASH_PAR_CHIMNONCHIMSCOREDROPMIN" ] && ViashError Bad arguments for option \'--chimNonchimScoreDropMin=*\': \'$VIASH_PAR_CHIMNONCHIMSCOREDROPMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMNONCHIMSCOREDROPMIN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --chimOutJunctionFormat) - [ -n "$VIASH_PAR_CHIMOUTJUNCTIONFORMAT" ] && ViashError Bad arguments for option \'--chimOutJunctionFormat\': \'$VIASH_PAR_CHIMOUTJUNCTIONFORMAT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMOUTJUNCTIONFORMAT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimOutJunctionFormat. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --chimOutJunctionFormat=*) - [ -n "$VIASH_PAR_CHIMOUTJUNCTIONFORMAT" ] && ViashError Bad arguments for option \'--chimOutJunctionFormat=*\': \'$VIASH_PAR_CHIMOUTJUNCTIONFORMAT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHIMOUTJUNCTIONFORMAT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --quantMode) - if [ -z "$VIASH_PAR_QUANTMODE" ]; then - VIASH_PAR_QUANTMODE="$2" - else - VIASH_PAR_QUANTMODE="$VIASH_PAR_QUANTMODE;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --quantMode. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --quantMode=*) - if [ -z "$VIASH_PAR_QUANTMODE" ]; then - VIASH_PAR_QUANTMODE=$(ViashRemoveFlags "$1") - else - VIASH_PAR_QUANTMODE="$VIASH_PAR_QUANTMODE;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --quantTranscriptomeBAMcompression) - [ -n "$VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION" ] && ViashError Bad arguments for option \'--quantTranscriptomeBAMcompression\': \'$VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --quantTranscriptomeBAMcompression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --quantTranscriptomeBAMcompression=*) - [ -n "$VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION" ] && ViashError Bad arguments for option \'--quantTranscriptomeBAMcompression=*\': \'$VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --quantTranscriptomeBan) - [ -n "$VIASH_PAR_QUANTTRANSCRIPTOMEBAN" ] && ViashError Bad arguments for option \'--quantTranscriptomeBan\': \'$VIASH_PAR_QUANTTRANSCRIPTOMEBAN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_QUANTTRANSCRIPTOMEBAN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --quantTranscriptomeBan. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --quantTranscriptomeBan=*) - [ -n "$VIASH_PAR_QUANTTRANSCRIPTOMEBAN" ] && ViashError Bad arguments for option \'--quantTranscriptomeBan=*\': \'$VIASH_PAR_QUANTTRANSCRIPTOMEBAN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_QUANTTRANSCRIPTOMEBAN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --twopassMode) - [ -n "$VIASH_PAR_TWOPASSMODE" ] && ViashError Bad arguments for option \'--twopassMode\': \'$VIASH_PAR_TWOPASSMODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TWOPASSMODE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --twopassMode. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --twopassMode=*) - [ -n "$VIASH_PAR_TWOPASSMODE" ] && ViashError Bad arguments for option \'--twopassMode=*\': \'$VIASH_PAR_TWOPASSMODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TWOPASSMODE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --twopass1readsN) - [ -n "$VIASH_PAR_TWOPASS1READSN" ] && ViashError Bad arguments for option \'--twopass1readsN\': \'$VIASH_PAR_TWOPASS1READSN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TWOPASS1READSN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --twopass1readsN. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --twopass1readsN=*) - [ -n "$VIASH_PAR_TWOPASS1READSN" ] && ViashError Bad arguments for option \'--twopass1readsN=*\': \'$VIASH_PAR_TWOPASS1READSN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TWOPASS1READSN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --waspOutputMode) - [ -n "$VIASH_PAR_WASPOUTPUTMODE" ] && ViashError Bad arguments for option \'--waspOutputMode\': \'$VIASH_PAR_WASPOUTPUTMODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_WASPOUTPUTMODE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --waspOutputMode. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --waspOutputMode=*) - [ -n "$VIASH_PAR_WASPOUTPUTMODE" ] && ViashError Bad arguments for option \'--waspOutputMode=*\': \'$VIASH_PAR_WASPOUTPUTMODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_WASPOUTPUTMODE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --soloType) - if [ -z "$VIASH_PAR_SOLOTYPE" ]; then - VIASH_PAR_SOLOTYPE="$2" - else - VIASH_PAR_SOLOTYPE="$VIASH_PAR_SOLOTYPE;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloType. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloType=*) - if [ -z "$VIASH_PAR_SOLOTYPE" ]; then - VIASH_PAR_SOLOTYPE=$(ViashRemoveFlags "$1") - else - VIASH_PAR_SOLOTYPE="$VIASH_PAR_SOLOTYPE;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --soloCBwhitelist) - if [ -z "$VIASH_PAR_SOLOCBWHITELIST" ]; then - VIASH_PAR_SOLOCBWHITELIST="$2" - else - VIASH_PAR_SOLOCBWHITELIST="$VIASH_PAR_SOLOCBWHITELIST;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloCBwhitelist. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloCBwhitelist=*) - if [ -z "$VIASH_PAR_SOLOCBWHITELIST" ]; then - VIASH_PAR_SOLOCBWHITELIST=$(ViashRemoveFlags "$1") - else - VIASH_PAR_SOLOCBWHITELIST="$VIASH_PAR_SOLOCBWHITELIST;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --soloCBstart) - [ -n "$VIASH_PAR_SOLOCBSTART" ] && ViashError Bad arguments for option \'--soloCBstart\': \'$VIASH_PAR_SOLOCBSTART\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOCBSTART="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloCBstart. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloCBstart=*) - [ -n "$VIASH_PAR_SOLOCBSTART" ] && ViashError Bad arguments for option \'--soloCBstart=*\': \'$VIASH_PAR_SOLOCBSTART\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOCBSTART=$(ViashRemoveFlags "$1") - shift 1 - ;; - --soloCBlen) - [ -n "$VIASH_PAR_SOLOCBLEN" ] && ViashError Bad arguments for option \'--soloCBlen\': \'$VIASH_PAR_SOLOCBLEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOCBLEN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloCBlen. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloCBlen=*) - [ -n "$VIASH_PAR_SOLOCBLEN" ] && ViashError Bad arguments for option \'--soloCBlen=*\': \'$VIASH_PAR_SOLOCBLEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOCBLEN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --soloUMIstart) - [ -n "$VIASH_PAR_SOLOUMISTART" ] && ViashError Bad arguments for option \'--soloUMIstart\': \'$VIASH_PAR_SOLOUMISTART\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOUMISTART="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloUMIstart. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloUMIstart=*) - [ -n "$VIASH_PAR_SOLOUMISTART" ] && ViashError Bad arguments for option \'--soloUMIstart=*\': \'$VIASH_PAR_SOLOUMISTART\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOUMISTART=$(ViashRemoveFlags "$1") - shift 1 - ;; - --soloUMIlen) - [ -n "$VIASH_PAR_SOLOUMILEN" ] && ViashError Bad arguments for option \'--soloUMIlen\': \'$VIASH_PAR_SOLOUMILEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOUMILEN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloUMIlen. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloUMIlen=*) - [ -n "$VIASH_PAR_SOLOUMILEN" ] && ViashError Bad arguments for option \'--soloUMIlen=*\': \'$VIASH_PAR_SOLOUMILEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOUMILEN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --soloBarcodeReadLength) - [ -n "$VIASH_PAR_SOLOBARCODEREADLENGTH" ] && ViashError Bad arguments for option \'--soloBarcodeReadLength\': \'$VIASH_PAR_SOLOBARCODEREADLENGTH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOBARCODEREADLENGTH="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloBarcodeReadLength. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloBarcodeReadLength=*) - [ -n "$VIASH_PAR_SOLOBARCODEREADLENGTH" ] && ViashError Bad arguments for option \'--soloBarcodeReadLength=*\': \'$VIASH_PAR_SOLOBARCODEREADLENGTH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOBARCODEREADLENGTH=$(ViashRemoveFlags "$1") - shift 1 - ;; - --soloBarcodeMate) - [ -n "$VIASH_PAR_SOLOBARCODEMATE" ] && ViashError Bad arguments for option \'--soloBarcodeMate\': \'$VIASH_PAR_SOLOBARCODEMATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOBARCODEMATE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloBarcodeMate. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloBarcodeMate=*) - [ -n "$VIASH_PAR_SOLOBARCODEMATE" ] && ViashError Bad arguments for option \'--soloBarcodeMate=*\': \'$VIASH_PAR_SOLOBARCODEMATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOBARCODEMATE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --soloCBposition) - if [ -z "$VIASH_PAR_SOLOCBPOSITION" ]; then - VIASH_PAR_SOLOCBPOSITION="$2" - else - VIASH_PAR_SOLOCBPOSITION="$VIASH_PAR_SOLOCBPOSITION;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloCBposition. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloCBposition=*) - if [ -z "$VIASH_PAR_SOLOCBPOSITION" ]; then - VIASH_PAR_SOLOCBPOSITION=$(ViashRemoveFlags "$1") - else - VIASH_PAR_SOLOCBPOSITION="$VIASH_PAR_SOLOCBPOSITION;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --soloUMIposition) - [ -n "$VIASH_PAR_SOLOUMIPOSITION" ] && ViashError Bad arguments for option \'--soloUMIposition\': \'$VIASH_PAR_SOLOUMIPOSITION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOUMIPOSITION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloUMIposition. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloUMIposition=*) - [ -n "$VIASH_PAR_SOLOUMIPOSITION" ] && ViashError Bad arguments for option \'--soloUMIposition=*\': \'$VIASH_PAR_SOLOUMIPOSITION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOUMIPOSITION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --soloAdapterSequence) - [ -n "$VIASH_PAR_SOLOADAPTERSEQUENCE" ] && ViashError Bad arguments for option \'--soloAdapterSequence\': \'$VIASH_PAR_SOLOADAPTERSEQUENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOADAPTERSEQUENCE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloAdapterSequence. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloAdapterSequence=*) - [ -n "$VIASH_PAR_SOLOADAPTERSEQUENCE" ] && ViashError Bad arguments for option \'--soloAdapterSequence=*\': \'$VIASH_PAR_SOLOADAPTERSEQUENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOADAPTERSEQUENCE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --soloAdapterMismatchesNmax) - [ -n "$VIASH_PAR_SOLOADAPTERMISMATCHESNMAX" ] && ViashError Bad arguments for option \'--soloAdapterMismatchesNmax\': \'$VIASH_PAR_SOLOADAPTERMISMATCHESNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOADAPTERMISMATCHESNMAX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloAdapterMismatchesNmax. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloAdapterMismatchesNmax=*) - [ -n "$VIASH_PAR_SOLOADAPTERMISMATCHESNMAX" ] && ViashError Bad arguments for option \'--soloAdapterMismatchesNmax=*\': \'$VIASH_PAR_SOLOADAPTERMISMATCHESNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOADAPTERMISMATCHESNMAX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --soloCBmatchWLtype) - [ -n "$VIASH_PAR_SOLOCBMATCHWLTYPE" ] && ViashError Bad arguments for option \'--soloCBmatchWLtype\': \'$VIASH_PAR_SOLOCBMATCHWLTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOCBMATCHWLTYPE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloCBmatchWLtype. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloCBmatchWLtype=*) - [ -n "$VIASH_PAR_SOLOCBMATCHWLTYPE" ] && ViashError Bad arguments for option \'--soloCBmatchWLtype=*\': \'$VIASH_PAR_SOLOCBMATCHWLTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOCBMATCHWLTYPE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --soloInputSAMattrBarcodeSeq) - if [ -z "$VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ" ]; then - VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ="$2" - else - VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ="$VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloInputSAMattrBarcodeSeq. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloInputSAMattrBarcodeSeq=*) - if [ -z "$VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ" ]; then - VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ=$(ViashRemoveFlags "$1") - else - VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ="$VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --soloInputSAMattrBarcodeQual) - if [ -z "$VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL" ]; then - VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL="$2" - else - VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL="$VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloInputSAMattrBarcodeQual. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloInputSAMattrBarcodeQual=*) - if [ -z "$VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL" ]; then - VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL=$(ViashRemoveFlags "$1") - else - VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL="$VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --soloStrand) - [ -n "$VIASH_PAR_SOLOSTRAND" ] && ViashError Bad arguments for option \'--soloStrand\': \'$VIASH_PAR_SOLOSTRAND\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOSTRAND="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloStrand. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloStrand=*) - [ -n "$VIASH_PAR_SOLOSTRAND" ] && ViashError Bad arguments for option \'--soloStrand=*\': \'$VIASH_PAR_SOLOSTRAND\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOSTRAND=$(ViashRemoveFlags "$1") - shift 1 - ;; - --soloFeatures) - if [ -z "$VIASH_PAR_SOLOFEATURES" ]; then - VIASH_PAR_SOLOFEATURES="$2" - else - VIASH_PAR_SOLOFEATURES="$VIASH_PAR_SOLOFEATURES;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloFeatures. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloFeatures=*) - if [ -z "$VIASH_PAR_SOLOFEATURES" ]; then - VIASH_PAR_SOLOFEATURES=$(ViashRemoveFlags "$1") - else - VIASH_PAR_SOLOFEATURES="$VIASH_PAR_SOLOFEATURES;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --soloMultiMappers) - if [ -z "$VIASH_PAR_SOLOMULTIMAPPERS" ]; then - VIASH_PAR_SOLOMULTIMAPPERS="$2" - else - VIASH_PAR_SOLOMULTIMAPPERS="$VIASH_PAR_SOLOMULTIMAPPERS;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloMultiMappers. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloMultiMappers=*) - if [ -z "$VIASH_PAR_SOLOMULTIMAPPERS" ]; then - VIASH_PAR_SOLOMULTIMAPPERS=$(ViashRemoveFlags "$1") - else - VIASH_PAR_SOLOMULTIMAPPERS="$VIASH_PAR_SOLOMULTIMAPPERS;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --soloUMIdedup) - if [ -z "$VIASH_PAR_SOLOUMIDEDUP" ]; then - VIASH_PAR_SOLOUMIDEDUP="$2" - else - VIASH_PAR_SOLOUMIDEDUP="$VIASH_PAR_SOLOUMIDEDUP;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloUMIdedup. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloUMIdedup=*) - if [ -z "$VIASH_PAR_SOLOUMIDEDUP" ]; then - VIASH_PAR_SOLOUMIDEDUP=$(ViashRemoveFlags "$1") - else - VIASH_PAR_SOLOUMIDEDUP="$VIASH_PAR_SOLOUMIDEDUP;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --soloUMIfiltering) - if [ -z "$VIASH_PAR_SOLOUMIFILTERING" ]; then - VIASH_PAR_SOLOUMIFILTERING="$2" - else - VIASH_PAR_SOLOUMIFILTERING="$VIASH_PAR_SOLOUMIFILTERING;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloUMIfiltering. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloUMIfiltering=*) - if [ -z "$VIASH_PAR_SOLOUMIFILTERING" ]; then - VIASH_PAR_SOLOUMIFILTERING=$(ViashRemoveFlags "$1") - else - VIASH_PAR_SOLOUMIFILTERING="$VIASH_PAR_SOLOUMIFILTERING;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --soloOutFileNames) - if [ -z "$VIASH_PAR_SOLOOUTFILENAMES" ]; then - VIASH_PAR_SOLOOUTFILENAMES="$2" - else - VIASH_PAR_SOLOOUTFILENAMES="$VIASH_PAR_SOLOOUTFILENAMES;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloOutFileNames. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloOutFileNames=*) - if [ -z "$VIASH_PAR_SOLOOUTFILENAMES" ]; then - VIASH_PAR_SOLOOUTFILENAMES=$(ViashRemoveFlags "$1") - else - VIASH_PAR_SOLOOUTFILENAMES="$VIASH_PAR_SOLOOUTFILENAMES;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --soloCellFilter) - if [ -z "$VIASH_PAR_SOLOCELLFILTER" ]; then - VIASH_PAR_SOLOCELLFILTER="$2" - else - VIASH_PAR_SOLOCELLFILTER="$VIASH_PAR_SOLOCELLFILTER;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloCellFilter. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloCellFilter=*) - if [ -z "$VIASH_PAR_SOLOCELLFILTER" ]; then - VIASH_PAR_SOLOCELLFILTER=$(ViashRemoveFlags "$1") - else - VIASH_PAR_SOLOCELLFILTER="$VIASH_PAR_SOLOCELLFILTER;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --soloOutFormatFeaturesGeneField3) - if [ -z "$VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3" ]; then - VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3="$2" - else - VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3="$VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloOutFormatFeaturesGeneField3. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloOutFormatFeaturesGeneField3=*) - if [ -z "$VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3" ]; then - VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3=$(ViashRemoveFlags "$1") - else - VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3="$VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --soloCellReadStats) - [ -n "$VIASH_PAR_SOLOCELLREADSTATS" ] && ViashError Bad arguments for option \'--soloCellReadStats\': \'$VIASH_PAR_SOLOCELLREADSTATS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOCELLREADSTATS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloCellReadStats. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --soloCellReadStats=*) - [ -n "$VIASH_PAR_SOLOCELLREADSTATS" ] && ViashError Bad arguments for option \'--soloCellReadStats=*\': \'$VIASH_PAR_SOLOCELLREADSTATS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SOLOCELLREADSTATS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/mapping_star_align_v273a:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/mapping_star_align_v273a:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/mapping_star_align_v273a:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/mapping_star_align_v273a:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_REFERENCE+x} ]; then - ViashError '--reference' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ]; then - IFS=';' - set -f - for file in $VIASH_PAR_INPUT; do - unset IFS - if [ ! -e "$file" ]; then - ViashError "Input file '$file' does not exist." - exit 1 - fi - done - set +f -fi -if [ ! -z "$VIASH_PAR_REFERENCE" ] && [ ! -e "$VIASH_PAR_REFERENCE" ]; then - ViashError "Input file '$VIASH_PAR_REFERENCE' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_GENOMEFASTAFILES" ]; then - IFS=';' - set -f - for file in $VIASH_PAR_GENOMEFASTAFILES; do - unset IFS - if [ ! -e "$file" ]; then - ViashError "Input file '$file' does not exist." - exit 1 - fi - done - set +f -fi -if [ ! -z "$VIASH_PAR_SJDBGTFFILE" ] && [ ! -e "$VIASH_PAR_SJDBGTFFILE" ]; then - ViashError "Input file '$VIASH_PAR_SJDBGTFFILE' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_READFILESMANIFEST" ] && [ ! -e "$VIASH_PAR_READFILESMANIFEST" ]; then - ViashError "Input file '$VIASH_PAR_READFILESMANIFEST' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_RUNRNGSEED" ]]; then - if ! [[ "$VIASH_PAR_RUNRNGSEED" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--runRNGseed' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [ -n "$VIASH_PAR_GENOMEFILESIZES" ]; then - IFS=';' - set -f - for val in $VIASH_PAR_GENOMEFILESIZES; do - if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--genomeFileSizes' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi - done - set +f - unset IFS -fi - -if [[ -n "$VIASH_PAR_SJDBOVERHANG" ]]; then - if ! [[ "$VIASH_PAR_SJDBOVERHANG" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--sjdbOverhang' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SJDBSCORE" ]]; then - if ! [[ "$VIASH_PAR_SJDBSCORE" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--sjdbScore' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_READMAPNUMBER" ]]; then - if ! [[ "$VIASH_PAR_READMAPNUMBER" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--readMapNumber' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_READQUALITYSCOREBASE" ]]; then - if ! [[ "$VIASH_PAR_READQUALITYSCOREBASE" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--readQualityScoreBase' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [ -n "$VIASH_PAR_CLIP3PNBASES" ]; then - IFS=';' - set -f - for val in $VIASH_PAR_CLIP3PNBASES; do - if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--clip3pNbases' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi - done - set +f - unset IFS -fi - -if [ -n "$VIASH_PAR_CLIP3PADAPTERMMP" ]; then - IFS=';' - set -f - for val in $VIASH_PAR_CLIP3PADAPTERMMP; do - if ! [[ "${val}" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--clip3pAdapterMMp' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi - done - set +f - unset IFS -fi - -if [ -n "$VIASH_PAR_CLIP3PAFTERADAPTERNBASES" ]; then - IFS=';' - set -f - for val in $VIASH_PAR_CLIP3PAFTERADAPTERNBASES; do - if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--clip3pAfterAdapterNbases' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi - done - set +f - unset IFS -fi - -if [ -n "$VIASH_PAR_CLIP5PNBASES" ]; then - IFS=';' - set -f - for val in $VIASH_PAR_CLIP5PNBASES; do - if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--clip5pNbases' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi - done - set +f - unset IFS -fi - -if [[ -n "$VIASH_PAR_LIMITGENOMEGENERATERAM" ]]; then - if ! [[ "$VIASH_PAR_LIMITGENOMEGENERATERAM" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--limitGenomeGenerateRAM' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [ -n "$VIASH_PAR_LIMITIOBUFFERSIZE" ]; then - IFS=';' - set -f - for val in $VIASH_PAR_LIMITIOBUFFERSIZE; do - if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--limitIObufferSize' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi - done - set +f - unset IFS -fi - -if [[ -n "$VIASH_PAR_LIMITOUTSAMONEREADBYTES" ]]; then - if ! [[ "$VIASH_PAR_LIMITOUTSAMONEREADBYTES" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--limitOutSAMoneReadBytes' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_LIMITOUTSJONEREAD" ]]; then - if ! [[ "$VIASH_PAR_LIMITOUTSJONEREAD" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--limitOutSJoneRead' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_LIMITOUTSJCOLLAPSED" ]]; then - if ! [[ "$VIASH_PAR_LIMITOUTSJCOLLAPSED" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--limitOutSJcollapsed' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_LIMITBAMSORTRAM" ]]; then - if ! [[ "$VIASH_PAR_LIMITBAMSORTRAM" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--limitBAMsortRAM' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_LIMITSJDBINSERTNSJ" ]]; then - if ! [[ "$VIASH_PAR_LIMITSJDBINSERTNSJ" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--limitSjdbInsertNsj' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_LIMITNREADSSOFT" ]]; then - if ! [[ "$VIASH_PAR_LIMITNREADSSOFT" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--limitNreadsSoft' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTQSCONVERSIONADD" ]]; then - if ! [[ "$VIASH_PAR_OUTQSCONVERSIONADD" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outQSconversionAdd' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTSAMATTRIHSTART" ]]; then - if ! [[ "$VIASH_PAR_OUTSAMATTRIHSTART" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outSAMattrIHstart' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTSAMMAPQUNIQUE" ]]; then - if ! [[ "$VIASH_PAR_OUTSAMMAPQUNIQUE" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outSAMmapqUnique' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTSAMFLAGOR" ]]; then - if ! [[ "$VIASH_PAR_OUTSAMFLAGOR" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outSAMflagOR' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTSAMFLAGAND" ]]; then - if ! [[ "$VIASH_PAR_OUTSAMFLAGAND" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outSAMflagAND' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTSAMMULTNMAX" ]]; then - if ! [[ "$VIASH_PAR_OUTSAMMULTNMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outSAMmultNmax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTSAMTLEN" ]]; then - if ! [[ "$VIASH_PAR_OUTSAMTLEN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outSAMtlen' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTBAMCOMPRESSION" ]]; then - if ! [[ "$VIASH_PAR_OUTBAMCOMPRESSION" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outBAMcompression' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTBAMSORTINGTHREADN" ]]; then - if ! [[ "$VIASH_PAR_OUTBAMSORTINGTHREADN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outBAMsortingThreadN' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTBAMSORTINGBINSN" ]]; then - if ! [[ "$VIASH_PAR_OUTBAMSORTINGBINSN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outBAMsortingBinsN' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN" ]]; then - if ! [[ "$VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--bamRemoveDuplicatesMate2basesN' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE" ]]; then - if ! [[ "$VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outFilterMultimapScoreRange' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTFILTERMULTIMAPNMAX" ]]; then - if ! [[ "$VIASH_PAR_OUTFILTERMULTIMAPNMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outFilterMultimapNmax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTFILTERMISMATCHNMAX" ]]; then - if ! [[ "$VIASH_PAR_OUTFILTERMISMATCHNMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outFilterMismatchNmax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX" ]]; then - if ! [[ "$VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--outFilterMismatchNoverLmax' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX" ]]; then - if ! [[ "$VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--outFilterMismatchNoverReadLmax' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTFILTERSCOREMIN" ]]; then - if ! [[ "$VIASH_PAR_OUTFILTERSCOREMIN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outFilterScoreMin' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTFILTERSCOREMINOVERLREAD" ]]; then - if ! [[ "$VIASH_PAR_OUTFILTERSCOREMINOVERLREAD" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--outFilterScoreMinOverLread' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTFILTERMATCHNMIN" ]]; then - if ! [[ "$VIASH_PAR_OUTFILTERMATCHNMIN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outFilterMatchNmin' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD" ]]; then - if ! [[ "$VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--outFilterMatchNminOverLread' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [ -n "$VIASH_PAR_OUTSJFILTEROVERHANGMIN" ]; then - IFS=';' - set -f - for val in $VIASH_PAR_OUTSJFILTEROVERHANGMIN; do - if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outSJfilterOverhangMin' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi - done - set +f - unset IFS -fi - -if [ -n "$VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN" ]; then - IFS=';' - set -f - for val in $VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN; do - if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outSJfilterCountUniqueMin' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi - done - set +f - unset IFS -fi - -if [ -n "$VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN" ]; then - IFS=';' - set -f - for val in $VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN; do - if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outSJfilterCountTotalMin' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi - done - set +f - unset IFS -fi - -if [ -n "$VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN" ]; then - IFS=';' - set -f - for val in $VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN; do - if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outSJfilterDistToOtherSJmin' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi - done - set +f - unset IFS -fi - -if [ -n "$VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN" ]; then - IFS=';' - set -f - for val in $VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN; do - if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--outSJfilterIntronMaxVsReadN' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi - done - set +f - unset IFS -fi - -if [[ -n "$VIASH_PAR_SCOREGAP" ]]; then - if ! [[ "$VIASH_PAR_SCOREGAP" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--scoreGap' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SCOREGAPNONCAN" ]]; then - if ! [[ "$VIASH_PAR_SCOREGAPNONCAN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--scoreGapNoncan' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SCOREGAPGCAG" ]]; then - if ! [[ "$VIASH_PAR_SCOREGAPGCAG" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--scoreGapGCAG' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SCOREGAPATAC" ]]; then - if ! [[ "$VIASH_PAR_SCOREGAPATAC" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--scoreGapATAC' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE" ]]; then - if ! [[ "$VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--scoreGenomicLengthLog2scale' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SCOREDELOPEN" ]]; then - if ! [[ "$VIASH_PAR_SCOREDELOPEN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--scoreDelOpen' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SCOREDELBASE" ]]; then - if ! [[ "$VIASH_PAR_SCOREDELBASE" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--scoreDelBase' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SCOREINSOPEN" ]]; then - if ! [[ "$VIASH_PAR_SCOREINSOPEN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--scoreInsOpen' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SCOREINSBASE" ]]; then - if ! [[ "$VIASH_PAR_SCOREINSBASE" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--scoreInsBase' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SCORESTITCHSJSHIFT" ]]; then - if ! [[ "$VIASH_PAR_SCORESTITCHSJSHIFT" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--scoreStitchSJshift' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SEEDSEARCHSTARTLMAX" ]]; then - if ! [[ "$VIASH_PAR_SEEDSEARCHSTARTLMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--seedSearchStartLmax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD" ]]; then - if ! [[ "$VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--seedSearchStartLmaxOverLread' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SEEDSEARCHLMAX" ]]; then - if ! [[ "$VIASH_PAR_SEEDSEARCHLMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--seedSearchLmax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SEEDMULTIMAPNMAX" ]]; then - if ! [[ "$VIASH_PAR_SEEDMULTIMAPNMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--seedMultimapNmax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SEEDPERREADNMAX" ]]; then - if ! [[ "$VIASH_PAR_SEEDPERREADNMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--seedPerReadNmax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SEEDPERWINDOWNMAX" ]]; then - if ! [[ "$VIASH_PAR_SEEDPERWINDOWNMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--seedPerWindowNmax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SEEDNONELOCIPERWINDOW" ]]; then - if ! [[ "$VIASH_PAR_SEEDNONELOCIPERWINDOW" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--seedNoneLociPerWindow' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SEEDSPLITMIN" ]]; then - if ! [[ "$VIASH_PAR_SEEDSPLITMIN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--seedSplitMin' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SEEDMAPMIN" ]]; then - if ! [[ "$VIASH_PAR_SEEDMAPMIN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--seedMapMin' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_ALIGNINTRONMIN" ]]; then - if ! [[ "$VIASH_PAR_ALIGNINTRONMIN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--alignIntronMin' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_ALIGNINTRONMAX" ]]; then - if ! [[ "$VIASH_PAR_ALIGNINTRONMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--alignIntronMax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_ALIGNMATESGAPMAX" ]]; then - if ! [[ "$VIASH_PAR_ALIGNMATESGAPMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--alignMatesGapMax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_ALIGNSJOVERHANGMIN" ]]; then - if ! [[ "$VIASH_PAR_ALIGNSJOVERHANGMIN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--alignSJoverhangMin' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [ -n "$VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX" ]; then - IFS=';' - set -f - for val in $VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX; do - if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--alignSJstitchMismatchNmax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi - done - set +f - unset IFS -fi - -if [[ -n "$VIASH_PAR_ALIGNSJDBOVERHANGMIN" ]]; then - if ! [[ "$VIASH_PAR_ALIGNSJDBOVERHANGMIN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--alignSJDBoverhangMin' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN" ]]; then - if ! [[ "$VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--alignSplicedMateMapLmin' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE" ]]; then - if ! [[ "$VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--alignSplicedMateMapLminOverLmate' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_ALIGNWINDOWSPERREADNMAX" ]]; then - if ! [[ "$VIASH_PAR_ALIGNWINDOWSPERREADNMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--alignWindowsPerReadNmax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX" ]]; then - if ! [[ "$VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--alignTranscriptsPerWindowNmax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX" ]]; then - if ! [[ "$VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--alignTranscriptsPerReadNmax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_PEOVERLAPNBASESMIN" ]]; then - if ! [[ "$VIASH_PAR_PEOVERLAPNBASESMIN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--peOverlapNbasesMin' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_PEOVERLAPMMP" ]]; then - if ! [[ "$VIASH_PAR_PEOVERLAPMMP" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--peOverlapMMp' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_WINANCHORMULTIMAPNMAX" ]]; then - if ! [[ "$VIASH_PAR_WINANCHORMULTIMAPNMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--winAnchorMultimapNmax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_WINBINNBITS" ]]; then - if ! [[ "$VIASH_PAR_WINBINNBITS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--winBinNbits' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_WINANCHORDISTNBINS" ]]; then - if ! [[ "$VIASH_PAR_WINANCHORDISTNBINS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--winAnchorDistNbins' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_WINFLANKNBINS" ]]; then - if ! [[ "$VIASH_PAR_WINFLANKNBINS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--winFlankNbins' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_WINREADCOVERAGERELATIVEMIN" ]]; then - if ! [[ "$VIASH_PAR_WINREADCOVERAGERELATIVEMIN" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--winReadCoverageRelativeMin' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_WINREADCOVERAGEBASESMIN" ]]; then - if ! [[ "$VIASH_PAR_WINREADCOVERAGEBASESMIN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--winReadCoverageBasesMin' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_CHIMSEGMENTMIN" ]]; then - if ! [[ "$VIASH_PAR_CHIMSEGMENTMIN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--chimSegmentMin' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_CHIMSCOREMIN" ]]; then - if ! [[ "$VIASH_PAR_CHIMSCOREMIN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--chimScoreMin' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_CHIMSCOREDROPMAX" ]]; then - if ! [[ "$VIASH_PAR_CHIMSCOREDROPMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--chimScoreDropMax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_CHIMSCORESEPARATION" ]]; then - if ! [[ "$VIASH_PAR_CHIMSCORESEPARATION" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--chimScoreSeparation' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG" ]]; then - if ! [[ "$VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--chimScoreJunctionNonGTAG' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_CHIMJUNCTIONOVERHANGMIN" ]]; then - if ! [[ "$VIASH_PAR_CHIMJUNCTIONOVERHANGMIN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--chimJunctionOverhangMin' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_CHIMSEGMENTREADGAPMAX" ]]; then - if ! [[ "$VIASH_PAR_CHIMSEGMENTREADGAPMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--chimSegmentReadGapMax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_CHIMMAINSEGMENTMULTNMAX" ]]; then - if ! [[ "$VIASH_PAR_CHIMMAINSEGMENTMULTNMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--chimMainSegmentMultNmax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_CHIMMULTIMAPNMAX" ]]; then - if ! [[ "$VIASH_PAR_CHIMMULTIMAPNMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--chimMultimapNmax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_CHIMMULTIMAPSCORERANGE" ]]; then - if ! [[ "$VIASH_PAR_CHIMMULTIMAPSCORERANGE" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--chimMultimapScoreRange' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_CHIMNONCHIMSCOREDROPMIN" ]]; then - if ! [[ "$VIASH_PAR_CHIMNONCHIMSCOREDROPMIN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--chimNonchimScoreDropMin' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_CHIMOUTJUNCTIONFORMAT" ]]; then - if ! [[ "$VIASH_PAR_CHIMOUTJUNCTIONFORMAT" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--chimOutJunctionFormat' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION" ]]; then - if ! [[ "$VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--quantTranscriptomeBAMcompression' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_TWOPASS1READSN" ]]; then - if ! [[ "$VIASH_PAR_TWOPASS1READSN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--twopass1readsN' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SOLOCBSTART" ]]; then - if ! [[ "$VIASH_PAR_SOLOCBSTART" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--soloCBstart' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SOLOCBLEN" ]]; then - if ! [[ "$VIASH_PAR_SOLOCBLEN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--soloCBlen' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SOLOUMISTART" ]]; then - if ! [[ "$VIASH_PAR_SOLOUMISTART" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--soloUMIstart' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SOLOUMILEN" ]]; then - if ! [[ "$VIASH_PAR_SOLOUMILEN" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--soloUMIlen' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SOLOBARCODEREADLENGTH" ]]; then - if ! [[ "$VIASH_PAR_SOLOBARCODEREADLENGTH" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--soloBarcodeReadLength' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SOLOBARCODEMATE" ]]; then - if ! [[ "$VIASH_PAR_SOLOBARCODEMATE" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--soloBarcodeMate' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SOLOADAPTERMISMATCHESNMAX" ]]; then - if ! [[ "$VIASH_PAR_SOLOADAPTERMISMATCHESNMAX" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--soloAdapterMismatchesNmax' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_TEST_INPUT=() - IFS=';' - for var in $VIASH_PAR_INPUT; do - unset IFS - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$var")" ) - var=$(ViashAutodetectMount "$var") - VIASH_TEST_INPUT+=( "$var" ) - done - VIASH_PAR_INPUT=$(IFS=';' ; echo "${VIASH_TEST_INPUT[*]}") -fi -if [ ! -z "$VIASH_PAR_REFERENCE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_REFERENCE")" ) - VIASH_PAR_REFERENCE=$(ViashAutodetectMount "$VIASH_PAR_REFERENCE") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_PAR_GENOMEFASTAFILES" ]; then - VIASH_TEST_GENOMEFASTAFILES=() - IFS=';' - for var in $VIASH_PAR_GENOMEFASTAFILES; do - unset IFS - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$var")" ) - var=$(ViashAutodetectMount "$var") - VIASH_TEST_GENOMEFASTAFILES+=( "$var" ) - done - VIASH_PAR_GENOMEFASTAFILES=$(IFS=';' ; echo "${VIASH_TEST_GENOMEFASTAFILES[*]}") -fi -if [ ! -z "$VIASH_PAR_SJDBGTFFILE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_SJDBGTFFILE")" ) - VIASH_PAR_SJDBGTFFILE=$(ViashAutodetectMount "$VIASH_PAR_SJDBGTFFILE") -fi -if [ ! -z "$VIASH_PAR_READFILESMANIFEST" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_READFILESMANIFEST")" ) - VIASH_PAR_READFILESMANIFEST=$(ViashAutodetectMount "$VIASH_PAR_READFILESMANIFEST") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/mapping_star_align_v273a:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/mapping_star_align_v273a:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/mapping_star_align_v273a:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-star_align_v273a-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -import re -import tempfile -import subprocess -from pathlib import Path -import tarfile -import gzip -import shutil - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'reference': $( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "r'${VIASH_PAR_REFERENCE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'runRNGseed': $( if [ ! -z ${VIASH_PAR_RUNRNGSEED+x} ]; then echo "int(r'${VIASH_PAR_RUNRNGSEED//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'genomeLoad': $( if [ ! -z ${VIASH_PAR_GENOMELOAD+x} ]; then echo "r'${VIASH_PAR_GENOMELOAD//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'genomeFastaFiles': $( if [ ! -z ${VIASH_PAR_GENOMEFASTAFILES+x} ]; then echo "r'${VIASH_PAR_GENOMEFASTAFILES//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'genomeFileSizes': $( if [ ! -z ${VIASH_PAR_GENOMEFILESIZES+x} ]; then echo "list(map(int, r'${VIASH_PAR_GENOMEFILESIZES//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), - 'genomeTransformOutput': $( if [ ! -z ${VIASH_PAR_GENOMETRANSFORMOUTPUT+x} ]; then echo "r'${VIASH_PAR_GENOMETRANSFORMOUTPUT//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'genomeChrSetMitochondrial': $( if [ ! -z ${VIASH_PAR_GENOMECHRSETMITOCHONDRIAL+x} ]; then echo "r'${VIASH_PAR_GENOMECHRSETMITOCHONDRIAL//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'sjdbFileChrStartEnd': $( if [ ! -z ${VIASH_PAR_SJDBFILECHRSTARTEND+x} ]; then echo "r'${VIASH_PAR_SJDBFILECHRSTARTEND//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'sjdbGTFfile': $( if [ ! -z ${VIASH_PAR_SJDBGTFFILE+x} ]; then echo "r'${VIASH_PAR_SJDBGTFFILE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'sjdbGTFchrPrefix': $( if [ ! -z ${VIASH_PAR_SJDBGTFCHRPREFIX+x} ]; then echo "r'${VIASH_PAR_SJDBGTFCHRPREFIX//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'sjdbGTFfeatureExon': $( if [ ! -z ${VIASH_PAR_SJDBGTFFEATUREEXON+x} ]; then echo "r'${VIASH_PAR_SJDBGTFFEATUREEXON//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'sjdbGTFtagExonParentTranscript': $( if [ ! -z ${VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT+x} ]; then echo "r'${VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'sjdbGTFtagExonParentGene': $( if [ ! -z ${VIASH_PAR_SJDBGTFTAGEXONPARENTGENE+x} ]; then echo "r'${VIASH_PAR_SJDBGTFTAGEXONPARENTGENE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'sjdbGTFtagExonParentGeneName': $( if [ ! -z ${VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME+x} ]; then echo "r'${VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'sjdbGTFtagExonParentGeneType': $( if [ ! -z ${VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE+x} ]; then echo "r'${VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'sjdbOverhang': $( if [ ! -z ${VIASH_PAR_SJDBOVERHANG+x} ]; then echo "int(r'${VIASH_PAR_SJDBOVERHANG//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'sjdbScore': $( if [ ! -z ${VIASH_PAR_SJDBSCORE+x} ]; then echo "int(r'${VIASH_PAR_SJDBSCORE//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'sjdbInsertSave': $( if [ ! -z ${VIASH_PAR_SJDBINSERTSAVE+x} ]; then echo "r'${VIASH_PAR_SJDBINSERTSAVE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'varVCFfile': $( if [ ! -z ${VIASH_PAR_VARVCFFILE+x} ]; then echo "r'${VIASH_PAR_VARVCFFILE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'readFilesType': $( if [ ! -z ${VIASH_PAR_READFILESTYPE+x} ]; then echo "r'${VIASH_PAR_READFILESTYPE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'readFilesSAMattrKeep': $( if [ ! -z ${VIASH_PAR_READFILESSAMATTRKEEP+x} ]; then echo "r'${VIASH_PAR_READFILESSAMATTRKEEP//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'readFilesManifest': $( if [ ! -z ${VIASH_PAR_READFILESMANIFEST+x} ]; then echo "r'${VIASH_PAR_READFILESMANIFEST//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'readFilesPrefix': $( if [ ! -z ${VIASH_PAR_READFILESPREFIX+x} ]; then echo "r'${VIASH_PAR_READFILESPREFIX//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'readFilesCommand': $( if [ ! -z ${VIASH_PAR_READFILESCOMMAND+x} ]; then echo "r'${VIASH_PAR_READFILESCOMMAND//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'readMapNumber': $( if [ ! -z ${VIASH_PAR_READMAPNUMBER+x} ]; then echo "int(r'${VIASH_PAR_READMAPNUMBER//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'readMatesLengthsIn': $( if [ ! -z ${VIASH_PAR_READMATESLENGTHSIN+x} ]; then echo "r'${VIASH_PAR_READMATESLENGTHSIN//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'readNameSeparator': $( if [ ! -z ${VIASH_PAR_READNAMESEPARATOR+x} ]; then echo "r'${VIASH_PAR_READNAMESEPARATOR//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'readQualityScoreBase': $( if [ ! -z ${VIASH_PAR_READQUALITYSCOREBASE+x} ]; then echo "int(r'${VIASH_PAR_READQUALITYSCOREBASE//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'clipAdapterType': $( if [ ! -z ${VIASH_PAR_CLIPADAPTERTYPE+x} ]; then echo "r'${VIASH_PAR_CLIPADAPTERTYPE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'clip3pNbases': $( if [ ! -z ${VIASH_PAR_CLIP3PNBASES+x} ]; then echo "list(map(int, r'${VIASH_PAR_CLIP3PNBASES//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), - 'clip3pAdapterSeq': $( if [ ! -z ${VIASH_PAR_CLIP3PADAPTERSEQ+x} ]; then echo "r'${VIASH_PAR_CLIP3PADAPTERSEQ//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'clip3pAdapterMMp': $( if [ ! -z ${VIASH_PAR_CLIP3PADAPTERMMP+x} ]; then echo "list(map(float, r'${VIASH_PAR_CLIP3PADAPTERMMP//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), - 'clip3pAfterAdapterNbases': $( if [ ! -z ${VIASH_PAR_CLIP3PAFTERADAPTERNBASES+x} ]; then echo "list(map(int, r'${VIASH_PAR_CLIP3PAFTERADAPTERNBASES//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), - 'clip5pNbases': $( if [ ! -z ${VIASH_PAR_CLIP5PNBASES+x} ]; then echo "list(map(int, r'${VIASH_PAR_CLIP5PNBASES//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), - 'limitGenomeGenerateRAM': $( if [ ! -z ${VIASH_PAR_LIMITGENOMEGENERATERAM+x} ]; then echo "int(r'${VIASH_PAR_LIMITGENOMEGENERATERAM//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'limitIObufferSize': $( if [ ! -z ${VIASH_PAR_LIMITIOBUFFERSIZE+x} ]; then echo "list(map(int, r'${VIASH_PAR_LIMITIOBUFFERSIZE//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), - 'limitOutSAMoneReadBytes': $( if [ ! -z ${VIASH_PAR_LIMITOUTSAMONEREADBYTES+x} ]; then echo "int(r'${VIASH_PAR_LIMITOUTSAMONEREADBYTES//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'limitOutSJoneRead': $( if [ ! -z ${VIASH_PAR_LIMITOUTSJONEREAD+x} ]; then echo "int(r'${VIASH_PAR_LIMITOUTSJONEREAD//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'limitOutSJcollapsed': $( if [ ! -z ${VIASH_PAR_LIMITOUTSJCOLLAPSED+x} ]; then echo "int(r'${VIASH_PAR_LIMITOUTSJCOLLAPSED//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'limitBAMsortRAM': $( if [ ! -z ${VIASH_PAR_LIMITBAMSORTRAM+x} ]; then echo "int(r'${VIASH_PAR_LIMITBAMSORTRAM//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'limitSjdbInsertNsj': $( if [ ! -z ${VIASH_PAR_LIMITSJDBINSERTNSJ+x} ]; then echo "int(r'${VIASH_PAR_LIMITSJDBINSERTNSJ//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'limitNreadsSoft': $( if [ ! -z ${VIASH_PAR_LIMITNREADSSOFT+x} ]; then echo "int(r'${VIASH_PAR_LIMITNREADSSOFT//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outTmpKeep': $( if [ ! -z ${VIASH_PAR_OUTTMPKEEP+x} ]; then echo "r'${VIASH_PAR_OUTTMPKEEP//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'outStd': $( if [ ! -z ${VIASH_PAR_OUTSTD+x} ]; then echo "r'${VIASH_PAR_OUTSTD//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'outReadsUnmapped': $( if [ ! -z ${VIASH_PAR_OUTREADSUNMAPPED+x} ]; then echo "r'${VIASH_PAR_OUTREADSUNMAPPED//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'outQSconversionAdd': $( if [ ! -z ${VIASH_PAR_OUTQSCONVERSIONADD+x} ]; then echo "int(r'${VIASH_PAR_OUTQSCONVERSIONADD//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outMultimapperOrder': $( if [ ! -z ${VIASH_PAR_OUTMULTIMAPPERORDER+x} ]; then echo "r'${VIASH_PAR_OUTMULTIMAPPERORDER//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'outSAMtype': $( if [ ! -z ${VIASH_PAR_OUTSAMTYPE+x} ]; then echo "r'${VIASH_PAR_OUTSAMTYPE//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'outSAMmode': $( if [ ! -z ${VIASH_PAR_OUTSAMMODE+x} ]; then echo "r'${VIASH_PAR_OUTSAMMODE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'outSAMstrandField': $( if [ ! -z ${VIASH_PAR_OUTSAMSTRANDFIELD+x} ]; then echo "r'${VIASH_PAR_OUTSAMSTRANDFIELD//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'outSAMattributes': $( if [ ! -z ${VIASH_PAR_OUTSAMATTRIBUTES+x} ]; then echo "r'${VIASH_PAR_OUTSAMATTRIBUTES//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'outSAMattrIHstart': $( if [ ! -z ${VIASH_PAR_OUTSAMATTRIHSTART+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMATTRIHSTART//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outSAMunmapped': $( if [ ! -z ${VIASH_PAR_OUTSAMUNMAPPED+x} ]; then echo "r'${VIASH_PAR_OUTSAMUNMAPPED//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'outSAMorder': $( if [ ! -z ${VIASH_PAR_OUTSAMORDER+x} ]; then echo "r'${VIASH_PAR_OUTSAMORDER//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'outSAMprimaryFlag': $( if [ ! -z ${VIASH_PAR_OUTSAMPRIMARYFLAG+x} ]; then echo "r'${VIASH_PAR_OUTSAMPRIMARYFLAG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'outSAMreadID': $( if [ ! -z ${VIASH_PAR_OUTSAMREADID+x} ]; then echo "r'${VIASH_PAR_OUTSAMREADID//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'outSAMmapqUnique': $( if [ ! -z ${VIASH_PAR_OUTSAMMAPQUNIQUE+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMMAPQUNIQUE//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outSAMflagOR': $( if [ ! -z ${VIASH_PAR_OUTSAMFLAGOR+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMFLAGOR//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outSAMflagAND': $( if [ ! -z ${VIASH_PAR_OUTSAMFLAGAND+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMFLAGAND//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outSAMattrRGline': $( if [ ! -z ${VIASH_PAR_OUTSAMATTRRGLINE+x} ]; then echo "r'${VIASH_PAR_OUTSAMATTRRGLINE//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'outSAMheaderHD': $( if [ ! -z ${VIASH_PAR_OUTSAMHEADERHD+x} ]; then echo "r'${VIASH_PAR_OUTSAMHEADERHD//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'outSAMheaderPG': $( if [ ! -z ${VIASH_PAR_OUTSAMHEADERPG+x} ]; then echo "r'${VIASH_PAR_OUTSAMHEADERPG//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'outSAMheaderCommentFile': $( if [ ! -z ${VIASH_PAR_OUTSAMHEADERCOMMENTFILE+x} ]; then echo "r'${VIASH_PAR_OUTSAMHEADERCOMMENTFILE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'outSAMfilter': $( if [ ! -z ${VIASH_PAR_OUTSAMFILTER+x} ]; then echo "r'${VIASH_PAR_OUTSAMFILTER//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'outSAMmultNmax': $( if [ ! -z ${VIASH_PAR_OUTSAMMULTNMAX+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMMULTNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outSAMtlen': $( if [ ! -z ${VIASH_PAR_OUTSAMTLEN+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMTLEN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outBAMcompression': $( if [ ! -z ${VIASH_PAR_OUTBAMCOMPRESSION+x} ]; then echo "int(r'${VIASH_PAR_OUTBAMCOMPRESSION//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outBAMsortingThreadN': $( if [ ! -z ${VIASH_PAR_OUTBAMSORTINGTHREADN+x} ]; then echo "int(r'${VIASH_PAR_OUTBAMSORTINGTHREADN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outBAMsortingBinsN': $( if [ ! -z ${VIASH_PAR_OUTBAMSORTINGBINSN+x} ]; then echo "int(r'${VIASH_PAR_OUTBAMSORTINGBINSN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'bamRemoveDuplicatesType': $( if [ ! -z ${VIASH_PAR_BAMREMOVEDUPLICATESTYPE+x} ]; then echo "r'${VIASH_PAR_BAMREMOVEDUPLICATESTYPE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'bamRemoveDuplicatesMate2basesN': $( if [ ! -z ${VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN+x} ]; then echo "int(r'${VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outWigType': $( if [ ! -z ${VIASH_PAR_OUTWIGTYPE+x} ]; then echo "r'${VIASH_PAR_OUTWIGTYPE//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'outWigStrand': $( if [ ! -z ${VIASH_PAR_OUTWIGSTRAND+x} ]; then echo "r'${VIASH_PAR_OUTWIGSTRAND//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'outWigReferencesPrefix': $( if [ ! -z ${VIASH_PAR_OUTWIGREFERENCESPREFIX+x} ]; then echo "r'${VIASH_PAR_OUTWIGREFERENCESPREFIX//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'outWigNorm': $( if [ ! -z ${VIASH_PAR_OUTWIGNORM+x} ]; then echo "r'${VIASH_PAR_OUTWIGNORM//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'outFilterType': $( if [ ! -z ${VIASH_PAR_OUTFILTERTYPE+x} ]; then echo "r'${VIASH_PAR_OUTFILTERTYPE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'outFilterMultimapScoreRange': $( if [ ! -z ${VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outFilterMultimapNmax': $( if [ ! -z ${VIASH_PAR_OUTFILTERMULTIMAPNMAX+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERMULTIMAPNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outFilterMismatchNmax': $( if [ ! -z ${VIASH_PAR_OUTFILTERMISMATCHNMAX+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERMISMATCHNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outFilterMismatchNoverLmax': $( if [ ! -z ${VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX+x} ]; then echo "float(r'${VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outFilterMismatchNoverReadLmax': $( if [ ! -z ${VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX+x} ]; then echo "float(r'${VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outFilterScoreMin': $( if [ ! -z ${VIASH_PAR_OUTFILTERSCOREMIN+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERSCOREMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outFilterScoreMinOverLread': $( if [ ! -z ${VIASH_PAR_OUTFILTERSCOREMINOVERLREAD+x} ]; then echo "float(r'${VIASH_PAR_OUTFILTERSCOREMINOVERLREAD//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outFilterMatchNmin': $( if [ ! -z ${VIASH_PAR_OUTFILTERMATCHNMIN+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERMATCHNMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outFilterMatchNminOverLread': $( if [ ! -z ${VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD+x} ]; then echo "float(r'${VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'outFilterIntronMotifs': $( if [ ! -z ${VIASH_PAR_OUTFILTERINTRONMOTIFS+x} ]; then echo "r'${VIASH_PAR_OUTFILTERINTRONMOTIFS//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'outFilterIntronStrands': $( if [ ! -z ${VIASH_PAR_OUTFILTERINTRONSTRANDS+x} ]; then echo "r'${VIASH_PAR_OUTFILTERINTRONSTRANDS//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'outSJtype': $( if [ ! -z ${VIASH_PAR_OUTSJTYPE+x} ]; then echo "r'${VIASH_PAR_OUTSJTYPE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'outSJfilterReads': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERREADS+x} ]; then echo "r'${VIASH_PAR_OUTSJFILTERREADS//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'outSJfilterOverhangMin': $( if [ ! -z ${VIASH_PAR_OUTSJFILTEROVERHANGMIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTEROVERHANGMIN//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), - 'outSJfilterCountUniqueMin': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), - 'outSJfilterCountTotalMin': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), - 'outSJfilterDistToOtherSJmin': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), - 'outSJfilterIntronMaxVsReadN': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), - 'scoreGap': $( if [ ! -z ${VIASH_PAR_SCOREGAP+x} ]; then echo "int(r'${VIASH_PAR_SCOREGAP//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'scoreGapNoncan': $( if [ ! -z ${VIASH_PAR_SCOREGAPNONCAN+x} ]; then echo "int(r'${VIASH_PAR_SCOREGAPNONCAN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'scoreGapGCAG': $( if [ ! -z ${VIASH_PAR_SCOREGAPGCAG+x} ]; then echo "int(r'${VIASH_PAR_SCOREGAPGCAG//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'scoreGapATAC': $( if [ ! -z ${VIASH_PAR_SCOREGAPATAC+x} ]; then echo "int(r'${VIASH_PAR_SCOREGAPATAC//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'scoreGenomicLengthLog2scale': $( if [ ! -z ${VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE+x} ]; then echo "int(r'${VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'scoreDelOpen': $( if [ ! -z ${VIASH_PAR_SCOREDELOPEN+x} ]; then echo "int(r'${VIASH_PAR_SCOREDELOPEN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'scoreDelBase': $( if [ ! -z ${VIASH_PAR_SCOREDELBASE+x} ]; then echo "int(r'${VIASH_PAR_SCOREDELBASE//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'scoreInsOpen': $( if [ ! -z ${VIASH_PAR_SCOREINSOPEN+x} ]; then echo "int(r'${VIASH_PAR_SCOREINSOPEN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'scoreInsBase': $( if [ ! -z ${VIASH_PAR_SCOREINSBASE+x} ]; then echo "int(r'${VIASH_PAR_SCOREINSBASE//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'scoreStitchSJshift': $( if [ ! -z ${VIASH_PAR_SCORESTITCHSJSHIFT+x} ]; then echo "int(r'${VIASH_PAR_SCORESTITCHSJSHIFT//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'seedSearchStartLmax': $( if [ ! -z ${VIASH_PAR_SEEDSEARCHSTARTLMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDSEARCHSTARTLMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'seedSearchStartLmaxOverLread': $( if [ ! -z ${VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD+x} ]; then echo "float(r'${VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'seedSearchLmax': $( if [ ! -z ${VIASH_PAR_SEEDSEARCHLMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDSEARCHLMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'seedMultimapNmax': $( if [ ! -z ${VIASH_PAR_SEEDMULTIMAPNMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDMULTIMAPNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'seedPerReadNmax': $( if [ ! -z ${VIASH_PAR_SEEDPERREADNMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDPERREADNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'seedPerWindowNmax': $( if [ ! -z ${VIASH_PAR_SEEDPERWINDOWNMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDPERWINDOWNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'seedNoneLociPerWindow': $( if [ ! -z ${VIASH_PAR_SEEDNONELOCIPERWINDOW+x} ]; then echo "int(r'${VIASH_PAR_SEEDNONELOCIPERWINDOW//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'seedSplitMin': $( if [ ! -z ${VIASH_PAR_SEEDSPLITMIN+x} ]; then echo "int(r'${VIASH_PAR_SEEDSPLITMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'seedMapMin': $( if [ ! -z ${VIASH_PAR_SEEDMAPMIN+x} ]; then echo "int(r'${VIASH_PAR_SEEDMAPMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'alignIntronMin': $( if [ ! -z ${VIASH_PAR_ALIGNINTRONMIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGNINTRONMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'alignIntronMax': $( if [ ! -z ${VIASH_PAR_ALIGNINTRONMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNINTRONMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'alignMatesGapMax': $( if [ ! -z ${VIASH_PAR_ALIGNMATESGAPMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNMATESGAPMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'alignSJoverhangMin': $( if [ ! -z ${VIASH_PAR_ALIGNSJOVERHANGMIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGNSJOVERHANGMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'alignSJstitchMismatchNmax': $( if [ ! -z ${VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX+x} ]; then echo "list(map(int, r'${VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), - 'alignSJDBoverhangMin': $( if [ ! -z ${VIASH_PAR_ALIGNSJDBOVERHANGMIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGNSJDBOVERHANGMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'alignSplicedMateMapLmin': $( if [ ! -z ${VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'alignSplicedMateMapLminOverLmate': $( if [ ! -z ${VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE+x} ]; then echo "float(r'${VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'alignWindowsPerReadNmax': $( if [ ! -z ${VIASH_PAR_ALIGNWINDOWSPERREADNMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNWINDOWSPERREADNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'alignTranscriptsPerWindowNmax': $( if [ ! -z ${VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'alignTranscriptsPerReadNmax': $( if [ ! -z ${VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'alignEndsType': $( if [ ! -z ${VIASH_PAR_ALIGNENDSTYPE+x} ]; then echo "r'${VIASH_PAR_ALIGNENDSTYPE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'alignEndsProtrude': $( if [ ! -z ${VIASH_PAR_ALIGNENDSPROTRUDE+x} ]; then echo "r'${VIASH_PAR_ALIGNENDSPROTRUDE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'alignSoftClipAtReferenceEnds': $( if [ ! -z ${VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS+x} ]; then echo "r'${VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'alignInsertionFlush': $( if [ ! -z ${VIASH_PAR_ALIGNINSERTIONFLUSH+x} ]; then echo "r'${VIASH_PAR_ALIGNINSERTIONFLUSH//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'peOverlapNbasesMin': $( if [ ! -z ${VIASH_PAR_PEOVERLAPNBASESMIN+x} ]; then echo "int(r'${VIASH_PAR_PEOVERLAPNBASESMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'peOverlapMMp': $( if [ ! -z ${VIASH_PAR_PEOVERLAPMMP+x} ]; then echo "float(r'${VIASH_PAR_PEOVERLAPMMP//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'winAnchorMultimapNmax': $( if [ ! -z ${VIASH_PAR_WINANCHORMULTIMAPNMAX+x} ]; then echo "int(r'${VIASH_PAR_WINANCHORMULTIMAPNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'winBinNbits': $( if [ ! -z ${VIASH_PAR_WINBINNBITS+x} ]; then echo "int(r'${VIASH_PAR_WINBINNBITS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'winAnchorDistNbins': $( if [ ! -z ${VIASH_PAR_WINANCHORDISTNBINS+x} ]; then echo "int(r'${VIASH_PAR_WINANCHORDISTNBINS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'winFlankNbins': $( if [ ! -z ${VIASH_PAR_WINFLANKNBINS+x} ]; then echo "int(r'${VIASH_PAR_WINFLANKNBINS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'winReadCoverageRelativeMin': $( if [ ! -z ${VIASH_PAR_WINREADCOVERAGERELATIVEMIN+x} ]; then echo "float(r'${VIASH_PAR_WINREADCOVERAGERELATIVEMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'winReadCoverageBasesMin': $( if [ ! -z ${VIASH_PAR_WINREADCOVERAGEBASESMIN+x} ]; then echo "int(r'${VIASH_PAR_WINREADCOVERAGEBASESMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'chimOutType': $( if [ ! -z ${VIASH_PAR_CHIMOUTTYPE+x} ]; then echo "r'${VIASH_PAR_CHIMOUTTYPE//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'chimSegmentMin': $( if [ ! -z ${VIASH_PAR_CHIMSEGMENTMIN+x} ]; then echo "int(r'${VIASH_PAR_CHIMSEGMENTMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'chimScoreMin': $( if [ ! -z ${VIASH_PAR_CHIMSCOREMIN+x} ]; then echo "int(r'${VIASH_PAR_CHIMSCOREMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'chimScoreDropMax': $( if [ ! -z ${VIASH_PAR_CHIMSCOREDROPMAX+x} ]; then echo "int(r'${VIASH_PAR_CHIMSCOREDROPMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'chimScoreSeparation': $( if [ ! -z ${VIASH_PAR_CHIMSCORESEPARATION+x} ]; then echo "int(r'${VIASH_PAR_CHIMSCORESEPARATION//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'chimScoreJunctionNonGTAG': $( if [ ! -z ${VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG+x} ]; then echo "int(r'${VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'chimJunctionOverhangMin': $( if [ ! -z ${VIASH_PAR_CHIMJUNCTIONOVERHANGMIN+x} ]; then echo "int(r'${VIASH_PAR_CHIMJUNCTIONOVERHANGMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'chimSegmentReadGapMax': $( if [ ! -z ${VIASH_PAR_CHIMSEGMENTREADGAPMAX+x} ]; then echo "int(r'${VIASH_PAR_CHIMSEGMENTREADGAPMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'chimFilter': $( if [ ! -z ${VIASH_PAR_CHIMFILTER+x} ]; then echo "r'${VIASH_PAR_CHIMFILTER//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'chimMainSegmentMultNmax': $( if [ ! -z ${VIASH_PAR_CHIMMAINSEGMENTMULTNMAX+x} ]; then echo "int(r'${VIASH_PAR_CHIMMAINSEGMENTMULTNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'chimMultimapNmax': $( if [ ! -z ${VIASH_PAR_CHIMMULTIMAPNMAX+x} ]; then echo "int(r'${VIASH_PAR_CHIMMULTIMAPNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'chimMultimapScoreRange': $( if [ ! -z ${VIASH_PAR_CHIMMULTIMAPSCORERANGE+x} ]; then echo "int(r'${VIASH_PAR_CHIMMULTIMAPSCORERANGE//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'chimNonchimScoreDropMin': $( if [ ! -z ${VIASH_PAR_CHIMNONCHIMSCOREDROPMIN+x} ]; then echo "int(r'${VIASH_PAR_CHIMNONCHIMSCOREDROPMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'chimOutJunctionFormat': $( if [ ! -z ${VIASH_PAR_CHIMOUTJUNCTIONFORMAT+x} ]; then echo "int(r'${VIASH_PAR_CHIMOUTJUNCTIONFORMAT//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'quantMode': $( if [ ! -z ${VIASH_PAR_QUANTMODE+x} ]; then echo "r'${VIASH_PAR_QUANTMODE//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'quantTranscriptomeBAMcompression': $( if [ ! -z ${VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION+x} ]; then echo "int(r'${VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'quantTranscriptomeBan': $( if [ ! -z ${VIASH_PAR_QUANTTRANSCRIPTOMEBAN+x} ]; then echo "r'${VIASH_PAR_QUANTTRANSCRIPTOMEBAN//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'twopassMode': $( if [ ! -z ${VIASH_PAR_TWOPASSMODE+x} ]; then echo "r'${VIASH_PAR_TWOPASSMODE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'twopass1readsN': $( if [ ! -z ${VIASH_PAR_TWOPASS1READSN+x} ]; then echo "int(r'${VIASH_PAR_TWOPASS1READSN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'waspOutputMode': $( if [ ! -z ${VIASH_PAR_WASPOUTPUTMODE+x} ]; then echo "r'${VIASH_PAR_WASPOUTPUTMODE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'soloType': $( if [ ! -z ${VIASH_PAR_SOLOTYPE+x} ]; then echo "r'${VIASH_PAR_SOLOTYPE//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'soloCBwhitelist': $( if [ ! -z ${VIASH_PAR_SOLOCBWHITELIST+x} ]; then echo "r'${VIASH_PAR_SOLOCBWHITELIST//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'soloCBstart': $( if [ ! -z ${VIASH_PAR_SOLOCBSTART+x} ]; then echo "int(r'${VIASH_PAR_SOLOCBSTART//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'soloCBlen': $( if [ ! -z ${VIASH_PAR_SOLOCBLEN+x} ]; then echo "int(r'${VIASH_PAR_SOLOCBLEN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'soloUMIstart': $( if [ ! -z ${VIASH_PAR_SOLOUMISTART+x} ]; then echo "int(r'${VIASH_PAR_SOLOUMISTART//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'soloUMIlen': $( if [ ! -z ${VIASH_PAR_SOLOUMILEN+x} ]; then echo "int(r'${VIASH_PAR_SOLOUMILEN//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'soloBarcodeReadLength': $( if [ ! -z ${VIASH_PAR_SOLOBARCODEREADLENGTH+x} ]; then echo "int(r'${VIASH_PAR_SOLOBARCODEREADLENGTH//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'soloBarcodeMate': $( if [ ! -z ${VIASH_PAR_SOLOBARCODEMATE+x} ]; then echo "int(r'${VIASH_PAR_SOLOBARCODEMATE//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'soloCBposition': $( if [ ! -z ${VIASH_PAR_SOLOCBPOSITION+x} ]; then echo "r'${VIASH_PAR_SOLOCBPOSITION//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'soloUMIposition': $( if [ ! -z ${VIASH_PAR_SOLOUMIPOSITION+x} ]; then echo "r'${VIASH_PAR_SOLOUMIPOSITION//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'soloAdapterSequence': $( if [ ! -z ${VIASH_PAR_SOLOADAPTERSEQUENCE+x} ]; then echo "r'${VIASH_PAR_SOLOADAPTERSEQUENCE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'soloAdapterMismatchesNmax': $( if [ ! -z ${VIASH_PAR_SOLOADAPTERMISMATCHESNMAX+x} ]; then echo "int(r'${VIASH_PAR_SOLOADAPTERMISMATCHESNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'soloCBmatchWLtype': $( if [ ! -z ${VIASH_PAR_SOLOCBMATCHWLTYPE+x} ]; then echo "r'${VIASH_PAR_SOLOCBMATCHWLTYPE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'soloInputSAMattrBarcodeSeq': $( if [ ! -z ${VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ+x} ]; then echo "r'${VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'soloInputSAMattrBarcodeQual': $( if [ ! -z ${VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL+x} ]; then echo "r'${VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'soloStrand': $( if [ ! -z ${VIASH_PAR_SOLOSTRAND+x} ]; then echo "r'${VIASH_PAR_SOLOSTRAND//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'soloFeatures': $( if [ ! -z ${VIASH_PAR_SOLOFEATURES+x} ]; then echo "r'${VIASH_PAR_SOLOFEATURES//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'soloMultiMappers': $( if [ ! -z ${VIASH_PAR_SOLOMULTIMAPPERS+x} ]; then echo "r'${VIASH_PAR_SOLOMULTIMAPPERS//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'soloUMIdedup': $( if [ ! -z ${VIASH_PAR_SOLOUMIDEDUP+x} ]; then echo "r'${VIASH_PAR_SOLOUMIDEDUP//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'soloUMIfiltering': $( if [ ! -z ${VIASH_PAR_SOLOUMIFILTERING+x} ]; then echo "r'${VIASH_PAR_SOLOUMIFILTERING//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'soloOutFileNames': $( if [ ! -z ${VIASH_PAR_SOLOOUTFILENAMES+x} ]; then echo "r'${VIASH_PAR_SOLOOUTFILENAMES//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'soloCellFilter': $( if [ ! -z ${VIASH_PAR_SOLOCELLFILTER+x} ]; then echo "r'${VIASH_PAR_SOLOCELLFILTER//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'soloOutFormatFeaturesGeneField3': $( if [ ! -z ${VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3+x} ]; then echo "r'${VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'soloCellReadStats': $( if [ ! -z ${VIASH_PAR_SOLOCELLREADSTATS+x} ]; then echo "r'${VIASH_PAR_SOLOCELLREADSTATS//\'/\'\"\'\"r\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -## VIASH END - -######################## -### Helper functions ### -######################## - -# regex for matching R[12] fastq(gz) files -# examples: -# - TSP10_Fat_MAT_SS2_B134171_B115063_Immune_A1_L003_R1.fastq.gz -# - tinygex_S1_L001_I1_001.fastq.gz -fastqgz_regex = r'(.+)_(R\\d+)(_\\d+)?\\.fastq(\\.gz)?' - -# helper function for cheching whether something is a gzip -def is_gz_file(path: Path) -> bool: - with open(path, 'rb') as file: - return file.read(2) == b'\\x1f\\x8b' - -# look for fastq files in a directory -def search_fastqs(path: Path) -> list[Path]: - if path.is_dir(): - print(f"Input '{path}' is a directory, traversing to see if we can detect any FASTQ files.", flush=True) - value_paths = [file for file in path.iterdir() if re.match(fastqgz_regex, file.name) ] - return value_paths - else: - return [path] - -# if {par_value} is a Path, extract it to a temp_dir_path and return the resulting path -def extract_if_need_be(par_value: Path, temp_dir_path: Path) -> Path: - - if par_value.is_file() and tarfile.is_tarfile(par_value): - # Remove two extensions (if they exist) - extaction_dir_name = Path(par_value.stem).stem - unpacked_path = temp_dir_path / extaction_dir_name - print(f' Tar detected; extracting {par_value} to {unpacked_path}', flush=True) - - with tarfile.open(par_value, 'r') as open_tar: - members = open_tar.getmembers() - root_dirs = [member - for member in members - if member.isdir() and member.name != '.' and '/' not in member.name] - # if there is only one root_dir (and there are files in that directory) - # strip that directory name from the destination folder - if len(root_dirs) == 1: - for mem in members: - mem.path = Path(*Path(mem.path).parts[1:]) - members_to_move = [mem for mem in members if mem.path != Path('.')] - open_tar.extractall(unpacked_path, members=members_to_move) - return unpacked_path - - elif par_value.is_file() and is_gz_file(par_value): - # Remove extension (if it exists) - extaction_file_name = Path(par_value.stem) - unpacked_path = temp_dir_path / extaction_file_name - print(f' Gzip detected; extracting {par_value} to {unpacked_path}', flush=True) - - with gzip.open(par_value, 'rb') as f_in: - with open(unpacked_path, 'wb') as f_out: - shutil.copyfileobj(f_in, f_out) - return unpacked_path - - else: - return par_value - -######################## -### Main code ### -######################## - -# rename keys and convert path strings to Path -# note: only list file arguments here. if non-file arguments also need to be renamed, -# the \`processPar()\` generator needs to be adapted -to_rename = {'input': 'readFilesIn', 'reference': 'genomeDir', 'output': 'outFileNamePrefix'} - -def process_par(orig_par, to_rename): - for key, value in orig_par.items(): - # rename the key in par based on the \`to_rename\` dict - if key in to_rename.keys(): - new_key = to_rename[key] - - # also turn value into a Path - if isinstance(value, list): - new_value = [Path(val) for val in value] - else: - new_value = Path(value) - else: - new_key = key - new_value = value - yield new_key, new_value -par = dict(process_par(par, to_rename)) - -# create output dir if need be -par["outFileNamePrefix"].mkdir(parents=True, exist_ok=True) - -with tempfile.TemporaryDirectory(prefix="star-", dir=meta["temp_dir"], ignore_cleanup_errors=True) as temp_dir: - print(">> Check whether input files are directories", flush=True) - new_read_files_in = [] - for path in par["readFilesIn"]: - new_read_files_in.extend(search_fastqs(path)) - par["readFilesIn"] = new_read_files_in - print("", flush=True) - - # checking for compressed files, ungzip files if need be - temp_dir_path = Path(temp_dir) - for par_name in ["genomeDir", "readFilesIn"]: - par_values = par[par_name] - if par_values: - # turn value into list - is_multiple = isinstance(par_values, list) - if not is_multiple: - par_values = [ par_values ] - - # output list - new_values = [] - for par_value in par_values: - print(f'>> Check compression of --{par_name} with value: {par_value}', flush=True) - new_value = extract_if_need_be(par_value, temp_dir_path) - new_values.append(new_value) - - # unlist if need be - if not is_multiple: - new_values = new_values[0] - - # replace value - par[par_name] = new_values - # end ungzipping - print("", flush=True) - - print("Grouping R1/R2 input files into pairs", flush=True) - input_grouped = {} - for path in par['readFilesIn']: - key = re.search(fastqgz_regex, path.name).group(2) - if key not in input_grouped: - input_grouped[key] = [] - input_grouped[key].append(str(path)) - par['readFilesIn'] = [ ','.join(val) for val in input_grouped.values() ] - print("", flush=True) - - print(">> Constructing command", flush=True) - par["runMode"] = "alignReads" - par["outTmpDir"] = temp_dir_path / "run" - if 'cpus' in meta and meta['cpus']: - par["runThreadN"] = meta["cpus"] - # make sure there is a trailing / - par["outFileNamePrefix"] = f"{par['outFileNamePrefix']}/" - - cmd_args = [ "STAR" ] - for name, value in par.items(): - if value is not None: - if isinstance(value, list): - cmd_args.extend(["--" + name] + [str(x) for x in value]) - else: - cmd_args.extend(["--" + name, str(value)]) - print("", flush=True) - - print(">> Running STAR with command:", flush=True) - print("+ " + ' '.join([str(x) for x in cmd_args]), flush=True) - print("", flush=True) - - subprocess.run( - cmd_args, - check=True - ) -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - unset VIASH_TEST_INPUT - IFS=';' - for var in $VIASH_PAR_INPUT; do - unset IFS - if [ -z "$VIASH_TEST_INPUT" ]; then - VIASH_TEST_INPUT="$(ViashStripAutomount "$var")" - else - VIASH_TEST_INPUT="$VIASH_TEST_INPUT;""$(ViashStripAutomount "$var")" - fi - done - VIASH_PAR_INPUT="$VIASH_TEST_INPUT" -fi -if [ ! -z "$VIASH_PAR_REFERENCE" ]; then - VIASH_PAR_REFERENCE=$(ViashStripAutomount "$VIASH_PAR_REFERENCE") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_PAR_GENOMEFASTAFILES" ]; then - unset VIASH_TEST_GENOMEFASTAFILES - IFS=';' - for var in $VIASH_PAR_GENOMEFASTAFILES; do - unset IFS - if [ -z "$VIASH_TEST_GENOMEFASTAFILES" ]; then - VIASH_TEST_GENOMEFASTAFILES="$(ViashStripAutomount "$var")" - else - VIASH_TEST_GENOMEFASTAFILES="$VIASH_TEST_GENOMEFASTAFILES;""$(ViashStripAutomount "$var")" - fi - done - VIASH_PAR_GENOMEFASTAFILES="$VIASH_TEST_GENOMEFASTAFILES" -fi -if [ ! -z "$VIASH_PAR_SJDBGTFFILE" ]; then - VIASH_PAR_SJDBGTFFILE=$(ViashStripAutomount "$VIASH_PAR_SJDBGTFFILE") -fi -if [ ! -z "$VIASH_PAR_READFILESMANIFEST" ]; then - VIASH_PAR_READFILESMANIFEST=$(ViashStripAutomount "$VIASH_PAR_READFILESMANIFEST") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/mapping/star_build_reference/.config.vsh.yaml b/target/docker/mapping/star_build_reference/.config.vsh.yaml deleted file mode 100644 index 27a4226e047..00000000000 --- a/target/docker/mapping/star_build_reference/.config.vsh.yaml +++ /dev/null @@ -1,190 +0,0 @@ -functionality: - name: "star_build_reference" - namespace: "mapping" - version: "0.12.3" - authors: - - name: "Dries Schaumont" - roles: - - "author" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - argument_groups: - - name: "Input/Output" - arguments: - - type: "file" - name: "--genome_fasta" - alternatives: - - "--genomeFastaFiles" - description: "The fasta files to be included in the reference. Corresponds to\ - \ the --genomeFastaFiles argument in the STAR command." - info: null - example: - - "chr1.fasta" - - "chr2.fasta" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: true - multiple_sep: " " - dest: "par" - - type: "file" - name: "--transcriptome_gtf" - alternatives: - - "--sjdbGTFfile" - description: "Specifies the path to the file with annotated transcripts in the\ - \ standard GTF\nformat. STAR will extract splice junctions from this file\ - \ and use them to greatly improve\naccuracy of the mapping. Corresponds to\ - \ the --sjdbGTFfile argument in the STAR command.\n" - info: null - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "--genomeDir" - description: "Path to output directory. Corresponds to the --genomeDir argument\ - \ in the STAR command." - info: null - example: - - "/path/to/foo" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Genome indexing arguments" - arguments: - - type: "integer" - name: "--genomeSAindexNbases" - description: "Length (bases) of the SA pre-indexing string. Typically between\ - \ 10 and 15.\nLonger strings will use much more memory, but allow faster searches.\ - \ For small\ngenomes, the parameter {genomeSAindexNbases must be scaled down\ - \ to\nmin(14, log2(GenomeLength)/2 - 1).\n" - info: null - default: - - 14 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - description: "Create a reference for STAR from a set of fasta files." - test_resources: - - type: "bash_script" - path: "test.sh" - is_executable: true - - type: "file" - path: "../../../resources_test/cellranger_tiny_fastq" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "docker" - env: - - "STAR_VERSION 2.7.10b" - - "PACKAGES gcc g++ make wget zlib1g-dev unzip" - - type: "docker" - run: - - "apt-get update && \\\n apt-get install -y --no-install-recommends ${PACKAGES}\ - \ && \\\n cd /tmp && \\\n wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip\ - \ && \\\n unzip ${STAR_VERSION}.zip && \\\n cd STAR-${STAR_VERSION}/source\ - \ && \\\n make STARstatic CXXFLAGS_SIMD=-std=c++11 && \\\n cp STAR /usr/local/bin\ - \ && \\\n cd / && \\\n rm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip\ - \ && \\\n apt-get --purge autoremove -y ${PACKAGES} && \\\n apt-get clean\n" - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highmem" - - "highcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/mapping/star_build_reference/config.vsh.yml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/mapping/star_build_reference" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/mapping/star_build_reference/star_build_reference" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/mapping/star_build_reference/star_build_reference b/target/docker/mapping/star_build_reference/star_build_reference deleted file mode 100755 index 7e74bc45069..00000000000 --- a/target/docker/mapping/star_build_reference/star_build_reference +++ /dev/null @@ -1,1175 +0,0 @@ -#!/usr/bin/env bash - -# star_build_reference 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Dries Schaumont (author) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="star_build_reference" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "star_build_reference 0.12.3" - echo "" - echo "Create a reference for STAR from a set of fasta files." - echo "" - echo "Input/Output:" - echo " --genomeFastaFiles, --genome_fasta" - echo " type: file, required parameter, multiple values allowed, file must exist" - echo " example: chr1.fasta chr2.fasta" - echo " The fasta files to be included in the reference. Corresponds to the" - echo " --genomeFastaFiles argument in the STAR command." - echo "" - echo " --sjdbGTFfile, --transcriptome_gtf" - echo " type: file, file must exist" - echo " Specifies the path to the file with annotated transcripts in the" - echo " standard GTF" - echo " format. STAR will extract splice junctions from this file and use them" - echo " to greatly improve" - echo " accuracy of the mapping. Corresponds to the --sjdbGTFfile argument in" - echo " the STAR command." - echo "" - echo " --genomeDir, --output" - echo " type: file, required parameter, output, file must exist" - echo " example: /path/to/foo" - echo " Path to output directory. Corresponds to the --genomeDir argument in the" - echo " STAR command." - echo "" - echo "Genome indexing arguments:" - echo " --genomeSAindexNbases" - echo " type: integer" - echo " default: 14" - echo " Length (bases) of the SA pre-indexing string. Typically between 10 and" - echo " 15." - echo " Longer strings will use much more memory, but allow faster searches. For" - echo " small" - echo " genomes, the parameter {genomeSAindexNbases must be scaled down to" - echo " min(14, log2(GenomeLength)/2 - 1)." -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM python:3.10-slim - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ - rm -rf /var/lib/apt/lists/* - -ENV STAR_VERSION 2.7.10b -ENV PACKAGES gcc g++ make wget zlib1g-dev unzip -RUN apt-get update && \ - apt-get install -y --no-install-recommends ${PACKAGES} && \ - cd /tmp && \ - wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip && \ - unzip ${STAR_VERSION}.zip && \ - cd STAR-${STAR_VERSION}/source && \ - make STARstatic CXXFLAGS_SIMD=-std=c++11 && \ - cp STAR /usr/local/bin && \ - cd / && \ - rm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip && \ - apt-get --purge autoremove -y ${PACKAGES} && \ - apt-get clean - -LABEL org.opencontainers.image.authors="Dries Schaumont" -LABEL org.opencontainers.image.description="Companion container for running component mapping star_build_reference" -LABEL org.opencontainers.image.created="2024-01-25T10:14:00Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-star_build_reference-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "star_build_reference 0.12.3" - exit - ;; - --genome_fasta) - if [ -z "$VIASH_PAR_GENOME_FASTA" ]; then - VIASH_PAR_GENOME_FASTA="$2" - else - VIASH_PAR_GENOME_FASTA="$VIASH_PAR_GENOME_FASTA ""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --genome_fasta. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --genome_fasta=*) - if [ -z "$VIASH_PAR_GENOME_FASTA" ]; then - VIASH_PAR_GENOME_FASTA=$(ViashRemoveFlags "$1") - else - VIASH_PAR_GENOME_FASTA="$VIASH_PAR_GENOME_FASTA "$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --genomeFastaFiles) - if [ -z "$VIASH_PAR_GENOME_FASTA" ]; then - VIASH_PAR_GENOME_FASTA="$2" - else - VIASH_PAR_GENOME_FASTA="$VIASH_PAR_GENOME_FASTA ""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --genomeFastaFiles. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --transcriptome_gtf) - [ -n "$VIASH_PAR_TRANSCRIPTOME_GTF" ] && ViashError Bad arguments for option \'--transcriptome_gtf\': \'$VIASH_PAR_TRANSCRIPTOME_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TRANSCRIPTOME_GTF="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --transcriptome_gtf. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --transcriptome_gtf=*) - [ -n "$VIASH_PAR_TRANSCRIPTOME_GTF" ] && ViashError Bad arguments for option \'--transcriptome_gtf=*\': \'$VIASH_PAR_TRANSCRIPTOME_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TRANSCRIPTOME_GTF=$(ViashRemoveFlags "$1") - shift 1 - ;; - --sjdbGTFfile) - [ -n "$VIASH_PAR_TRANSCRIPTOME_GTF" ] && ViashError Bad arguments for option \'--sjdbGTFfile\': \'$VIASH_PAR_TRANSCRIPTOME_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TRANSCRIPTOME_GTF="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbGTFfile. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --genomeDir) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--genomeDir\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --genomeDir. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --genomeSAindexNbases) - [ -n "$VIASH_PAR_GENOMESAINDEXNBASES" ] && ViashError Bad arguments for option \'--genomeSAindexNbases\': \'$VIASH_PAR_GENOMESAINDEXNBASES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_GENOMESAINDEXNBASES="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --genomeSAindexNbases. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --genomeSAindexNbases=*) - [ -n "$VIASH_PAR_GENOMESAINDEXNBASES" ] && ViashError Bad arguments for option \'--genomeSAindexNbases=*\': \'$VIASH_PAR_GENOMESAINDEXNBASES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_GENOMESAINDEXNBASES=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/mapping_star_build_reference:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/mapping_star_build_reference:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/mapping_star_build_reference:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/mapping_star_build_reference:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_GENOME_FASTA+x} ]; then - ViashError '--genome_fasta' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_GENOMESAINDEXNBASES+x} ]; then - VIASH_PAR_GENOMESAINDEXNBASES="14" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_GENOME_FASTA" ]; then - IFS=' ' - set -f - for file in $VIASH_PAR_GENOME_FASTA; do - unset IFS - if [ ! -e "$file" ]; then - ViashError "Input file '$file' does not exist." - exit 1 - fi - done - set +f -fi -if [ ! -z "$VIASH_PAR_TRANSCRIPTOME_GTF" ] && [ ! -e "$VIASH_PAR_TRANSCRIPTOME_GTF" ]; then - ViashError "Input file '$VIASH_PAR_TRANSCRIPTOME_GTF' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_GENOMESAINDEXNBASES" ]]; then - if ! [[ "$VIASH_PAR_GENOMESAINDEXNBASES" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--genomeSAindexNbases' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_GENOME_FASTA" ]; then - VIASH_TEST_GENOME_FASTA=() - IFS=' ' - for var in $VIASH_PAR_GENOME_FASTA; do - unset IFS - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$var")" ) - var=$(ViashAutodetectMount "$var") - VIASH_TEST_GENOME_FASTA+=( "$var" ) - done - VIASH_PAR_GENOME_FASTA=$(IFS=' ' ; echo "${VIASH_TEST_GENOME_FASTA[*]}") -fi -if [ ! -z "$VIASH_PAR_TRANSCRIPTOME_GTF" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_TRANSCRIPTOME_GTF")" ) - VIASH_PAR_TRANSCRIPTOME_GTF=$(ViashAutodetectMount "$VIASH_PAR_TRANSCRIPTOME_GTF") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/mapping_star_build_reference:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/mapping_star_build_reference:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/mapping_star_build_reference:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-star_build_reference-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -import re -import tempfile -import subprocess -from pathlib import Path -import tarfile -import gzip -import shutil - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'genome_fasta': $( if [ ! -z ${VIASH_PAR_GENOME_FASTA+x} ]; then echo "r'${VIASH_PAR_GENOME_FASTA//\'/\'\"\'\"r\'}'.split(' ')"; else echo None; fi ), - 'transcriptome_gtf': $( if [ ! -z ${VIASH_PAR_TRANSCRIPTOME_GTF+x} ]; then echo "r'${VIASH_PAR_TRANSCRIPTOME_GTF//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'genomeSAindexNbases': $( if [ ! -z ${VIASH_PAR_GENOMESAINDEXNBASES+x} ]; then echo "int(r'${VIASH_PAR_GENOMESAINDEXNBASES//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -## VIASH END - -######################## -### Helper functions ### -######################## - -# helper function for cheching whether something is a gzip -def is_gz_file(path: Path) -> bool: - with open(path, 'rb') as file: - return file.read(2) == b'\\x1f\\x8b' - -# if {par_value} is a Path, extract it to a temp_dir_path and return the resulting path -def extract_if_need_be(par_value: Path, temp_dir_path: Path) -> Path: - if par_value.is_file() and tarfile.is_tarfile(par_value): - # Remove two extensions (if they exist) - extaction_dir_name = Path(par_value.stem).stem - unpacked_path = temp_dir_path / extaction_dir_name - print(f' Tar detected; extracting {par_value} to {unpacked_path}', flush=True) - - with tarfile.open(par_value, 'r') as open_tar: - members = open_tar.getmembers() - root_dirs = [member - for member in members - if member.isdir() and member.name != '.' and '/' not in member.name] - # if there is only one root_dir (and there are files in that directory) - # strip that directory name from the destination folder - if len(root_dirs) == 1: - for mem in members: - mem.path = Path(*Path(mem.path).parts[1:]) - members_to_move = [mem for mem in members if mem.path != Path('.')] - open_tar.extractall(unpacked_path, members=members_to_move) - return unpacked_path - - elif par_value.is_file() and is_gz_file(par_value): - # Remove extension (if it exists) - extaction_file_name = Path(par_value.stem) - unpacked_path = temp_dir_path / extaction_file_name - print(f' Gzip detected; extracting {par_value} to {unpacked_path}', flush=True) - - with gzip.open(par_value, 'rb') as f_in: - with open(unpacked_path, 'wb') as f_out: - shutil.copyfileobj(f_in, f_out) - return unpacked_path - - else: - return par_value - -######################## -### Main code ### -######################## - -# rename keys and convert path strings to Path -# note: only list file arguments here. if non-file arguments also need to be renamed, -# the \`processPar()\` generator needs to be adapted -to_rename = {'genome_fasta': 'genomeFastaFiles', 'output': 'genomeDir', 'transcriptome_gtf': 'sjdbGTFfile'} - -def process_par(orig_par, to_rename): - for key, value in orig_par.items(): - # rename the key in par based on the \`to_rename\` dict - if key in to_rename.keys(): - new_key = to_rename[key] - - # also turn value into a Path - if isinstance(value, list): - new_value = [Path(val) for val in value] - else: - new_value = Path(value) - else: - new_key = key - new_value = value - yield new_key, new_value -par = dict(process_par(par, to_rename)) - -# create output dir if need be -par["genomeDir"].mkdir(parents=True, exist_ok=True) - -with tempfile.TemporaryDirectory(prefix="star-", dir=meta["temp_dir"]) as temp_dir: - - # checking for compressed files, ungzip files if need be - temp_dir_path = Path(temp_dir) - for par_name in ["genomeFastaFiles", "sjdbGTFfile"]: - par_values = par[par_name] - if par_values: - # turn value into list - is_multiple = isinstance(par_values, list) - if not is_multiple: - par_values = [ par_values ] - - # output list - new_values = [] - for par_value in par_values: - print(f'>> Check compression of --{par_name} with value: {par_value}', flush=True) - new_value = extract_if_need_be(par_value, temp_dir_path) - new_values.append(new_value) - - # unlist if need be - if not is_multiple: - new_values = new_values[0] - - # replace value - par[par_name] = new_values - # end ungzipping - print("", flush=True) - - print(">> Constructing command", flush=True) - par["runMode"] = "genomeGenerate" - par["outTmpDir"] = temp_dir_path / "run" - if 'cpus' in meta and meta['cpus']: - par["runThreadN"] = meta["cpus"] - - - cmd_args = [ "STAR" ] - for name, value in par.items(): - if value is not None: - if isinstance(value, list): - cmd_args.extend(["--" + name] + [str(x) for x in value]) - else: - cmd_args.extend(["--" + name, str(value)]) - print("", flush=True) - - print(">> Running STAR with command:", flush=True) - print("+ " + ' '.join([str(x) for x in cmd_args]), flush=True) - print("", flush=True) - - subprocess.run( - cmd_args, - check=True - ) -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_GENOME_FASTA" ]; then - unset VIASH_TEST_GENOME_FASTA - IFS=' ' - for var in $VIASH_PAR_GENOME_FASTA; do - unset IFS - if [ -z "$VIASH_TEST_GENOME_FASTA" ]; then - VIASH_TEST_GENOME_FASTA="$(ViashStripAutomount "$var")" - else - VIASH_TEST_GENOME_FASTA="$VIASH_TEST_GENOME_FASTA ""$(ViashStripAutomount "$var")" - fi - done - VIASH_PAR_GENOME_FASTA="$VIASH_TEST_GENOME_FASTA" -fi -if [ ! -z "$VIASH_PAR_TRANSCRIPTOME_GTF" ]; then - VIASH_PAR_TRANSCRIPTOME_GTF=$(ViashStripAutomount "$VIASH_PAR_TRANSCRIPTOME_GTF") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/metadata/add_id/.config.vsh.yaml b/target/docker/metadata/add_id/.config.vsh.yaml deleted file mode 100644 index d8140155307..00000000000 --- a/target/docker/metadata/add_id/.config.vsh.yaml +++ /dev/null @@ -1,197 +0,0 @@ -functionality: - name: "add_id" - namespace: "metadata" - version: "0.12.3" - authors: - - name: "Dries Schaumont" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Path to the input .h5mu." - info: null - example: - - "sample_path" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--input_id" - description: "The input id." - info: null - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_output" - description: "Name of the .obs column where to store the id." - info: null - default: - - "sample_id" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--make_observation_keys_unique" - description: "Join the id to the .obs index (.obs_names)." - info: null - direction: "input" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Add id of .obs. Also allows to make .obs_names (the .obs index) unique\ - \ \nby prefixing the values with an unique id per .h5mu file.\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/concat_test_data/e18_mouse_brain_fresh_5k_filtered_feature_bc_matrix_subset_unique_obs.h5mu" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "native" - id: "native" -- type: "nextflow" - id: "nextflow" - directives: - label: - - "singlecpu" - - "lowmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/metadata/add_id/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/metadata/add_id" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/metadata/add_id/add_id" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/metadata/add_id/add_id b/target/docker/metadata/add_id/add_id deleted file mode 100755 index 663e58f4349..00000000000 --- a/target/docker/metadata/add_id/add_id +++ /dev/null @@ -1,1064 +0,0 @@ -#!/usr/bin/env bash - -# add_id 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Dries Schaumont (maintainer) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="add_id" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "add_id 0.12.3" - echo "" - echo "Add id of .obs. Also allows to make .obs_names (the .obs index) unique" - echo "by prefixing the values with an unique id per .h5mu file." - echo "" - echo "Arguments:" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " example: sample_path" - echo " Path to the input .h5mu." - echo "" - echo " --input_id" - echo " type: string, required parameter" - echo " The input id." - echo "" - echo " --obs_output" - echo " type: string" - echo " default: sample_id" - echo " Name of the .obs column where to store the id." - echo "" - echo " -o, --output" - echo " type: file, output, file must exist" - echo " example: output.h5mu" - echo "" - echo " --output_compression" - echo " type: string" - echo " example: gzip" - echo " choices: [ gzip, lzf ]" - echo " The compression format to be used on the output h5mu object." - echo "" - echo " --make_observation_keys_unique" - echo " type: boolean_true" - echo " Join the id to the .obs index (.obs_names)." -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM python:3.10-slim - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" - -LABEL org.opencontainers.image.authors="Dries Schaumont" -LABEL org.opencontainers.image.description="Companion container for running component metadata add_id" -LABEL org.opencontainers.image.created="2024-01-25T10:13:59Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-add_id-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "add_id 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -i) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input_id) - [ -n "$VIASH_PAR_INPUT_ID" ] && ViashError Bad arguments for option \'--input_id\': \'$VIASH_PAR_INPUT_ID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT_ID="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_id. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input_id=*) - [ -n "$VIASH_PAR_INPUT_ID" ] && ViashError Bad arguments for option \'--input_id=*\': \'$VIASH_PAR_INPUT_ID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT_ID=$(ViashRemoveFlags "$1") - shift 1 - ;; - --obs_output) - [ -n "$VIASH_PAR_OBS_OUTPUT" ] && ViashError Bad arguments for option \'--obs_output\': \'$VIASH_PAR_OBS_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBS_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obs_output=*) - [ -n "$VIASH_PAR_OBS_OUTPUT" ] && ViashError Bad arguments for option \'--obs_output=*\': \'$VIASH_PAR_OBS_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBS_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression=*) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --make_observation_keys_unique) - [ -n "$VIASH_PAR_MAKE_OBSERVATION_KEYS_UNIQUE" ] && ViashError Bad arguments for option \'--make_observation_keys_unique\': \'$VIASH_PAR_MAKE_OBSERVATION_KEYS_UNIQUE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAKE_OBSERVATION_KEYS_UNIQUE=true - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/metadata_add_id:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/metadata_add_id:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/metadata_add_id:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/metadata_add_id:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_INPUT_ID+x} ]; then - ViashError '--input_id' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_OBS_OUTPUT+x} ]; then - VIASH_PAR_OBS_OUTPUT="sample_id" -fi -if [ -z ${VIASH_PAR_MAKE_OBSERVATION_KEYS_UNIQUE+x} ]; then - VIASH_PAR_MAKE_OBSERVATION_KEYS_UNIQUE="false" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_MAKE_OBSERVATION_KEYS_UNIQUE" ]]; then - if ! [[ "$VIASH_PAR_MAKE_OBSERVATION_KEYS_UNIQUE" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--make_observation_keys_unique' has to be a boolean_true. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then - VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then - ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/metadata_add_id:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/metadata_add_id:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/metadata_add_id:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-add_id-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -from __future__ import annotations -import sys -from mudata import read_h5mu, MuData - -### VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'input_id': $( if [ ! -z ${VIASH_PAR_INPUT_ID+x} ]; then echo "r'${VIASH_PAR_INPUT_ID//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'obs_output': $( if [ ! -z ${VIASH_PAR_OBS_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OBS_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'make_observation_keys_unique': $( if [ ! -z ${VIASH_PAR_MAKE_OBSERVATION_KEYS_UNIQUE+x} ]; then echo "r'${VIASH_PAR_MAKE_OBSERVATION_KEYS_UNIQUE//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -### VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -def make_observation_keys_unique(sample_id: str, sample: MuData) -> None: - """ - Make the observation keys unique across all samples. At input, - the observation keys are unique within a sample. By adding the sample name - (unique for a sample) to each observation key, the observation key is made - unique across all samples as well. - """ - logger.info('Making observation keys unique across all samples.') - sample.obs.index = f"{sample_id}_" + sample.obs.index - make_observation_keys_unique_per_mod(sample_id, sample) - - -def make_observation_keys_unique_per_mod(sample_id: str, sample: MuData) -> None: - """ - Updating MuData.obs_names is not allowed (it is read-only). - So the observation keys for each modality has to be updated manually. - """ - for mod in sample.mod.values(): - mod.obs_names = f"{sample_id}_" + mod.obs_names - -def main(): - input_data = read_h5mu(par["input"]) - input_data.obs[par["obs_output"]] = par["input_id"] - for mod_data in input_data.mod.values(): - mod_data.obs[par["obs_output"]] = par["input_id"] - if par["make_observation_keys_unique"]: - make_observation_keys_unique(par["input_id"], input_data) - logger.info("Writing out data to '%s'.", par["output"]) - input_data.write_h5mu(par["output"], compression=par["output_compression"]) - -if __name__ == '__main__': - main() -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/metadata/add_id/setup_logger.py b/target/docker/metadata/add_id/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/docker/metadata/add_id/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/docker/metadata/grep_annotation_column/.config.vsh.yaml b/target/docker/metadata/grep_annotation_column/.config.vsh.yaml deleted file mode 100644 index 594bfcfaa9e..00000000000 --- a/target/docker/metadata/grep_annotation_column/.config.vsh.yaml +++ /dev/null @@ -1,244 +0,0 @@ -functionality: - name: "grep_annotation_column" - namespace: "metadata" - version: "0.12.3" - authors: - - name: "Dries Schaumont" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - argument_groups: - - name: "Inputs" - description: "Arguments related to the input dataset." - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Path to the input .h5mu." - info: null - example: - - "sample_path" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--input_column" - description: "Column to query. If not specified, use .var_names or .obs_names,\ - \ depending on the value of --matrix" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - description: "Which modality to get the annotation matrix from.\n" - info: null - example: - - "rna" - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--matrix" - description: "Matrix to fetch the column from that will be searched." - info: null - example: - - "var" - required: false - choices: - - "var" - - "obs" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Outputs" - description: "Arguments related to how the output will be written." - arguments: - - type: "file" - name: "--output" - alternatives: - - "-o" - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_match_column" - description: "Name of the column to write the result to." - info: null - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_fraction_column" - description: "For the opposite axis, name of the column to write the fraction\ - \ of \nobservations that matches to the pattern.\n" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Query options" - description: "Options related to the query" - arguments: - - type: "string" - name: "--regex_pattern" - description: "Regex to use to match with the input column." - info: null - example: - - "^[mM][tT]-" - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - description: "Perform a regex lookup on a column from the annotation matrices .obs\ - \ or .var.\nThe annotation matrix can originate from either a modality, or all\ - \ modalities (global .var or .obs).\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/concat_test_data/e18_mouse_brain_fresh_5k_filtered_feature_bc_matrix_subset_unique_obs.h5mu" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "native" - id: "native" -- type: "nextflow" - id: "nextflow" - directives: - label: - - "singlecpu" - - "lowmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/metadata/grep_annotation_column/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/metadata/grep_annotation_column" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/metadata/grep_annotation_column/grep_annotation_column" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/metadata/grep_annotation_column/grep_annotation_column b/target/docker/metadata/grep_annotation_column/grep_annotation_column deleted file mode 100755 index 39e2db2b16d..00000000000 --- a/target/docker/metadata/grep_annotation_column/grep_annotation_column +++ /dev/null @@ -1,1148 +0,0 @@ -#!/usr/bin/env bash - -# grep_annotation_column 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Dries Schaumont (maintainer) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="grep_annotation_column" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "grep_annotation_column 0.12.3" - echo "" - echo "Perform a regex lookup on a column from the annotation matrices .obs or .var." - echo "The annotation matrix can originate from either a modality, or all modalities" - echo "(global .var or .obs)." - echo "" - echo "Inputs:" - echo " Arguments related to the input dataset." - echo "" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " example: sample_path" - echo " Path to the input .h5mu." - echo "" - echo " --input_column" - echo " type: string" - echo " Column to query. If not specified, use .var_names or .obs_names," - echo " depending on the value of --matrix" - echo "" - echo " --modality" - echo " type: string, required parameter" - echo " example: rna" - echo " Which modality to get the annotation matrix from." - echo "" - echo " --matrix" - echo " type: string" - echo " example: var" - echo " choices: [ var, obs ]" - echo " Matrix to fetch the column from that will be searched." - echo "" - echo "Outputs:" - echo " Arguments related to how the output will be written." - echo "" - echo " -o, --output" - echo " type: file, output, file must exist" - echo " example: output.h5mu" - echo "" - echo " --output_compression" - echo " type: string" - echo " example: gzip" - echo " choices: [ gzip, lzf ]" - echo " The compression format to be used on the output h5mu object." - echo "" - echo " --output_match_column" - echo " type: string, required parameter" - echo " Name of the column to write the result to." - echo "" - echo " --output_fraction_column" - echo " type: string" - echo " For the opposite axis, name of the column to write the fraction of" - echo " observations that matches to the pattern." - echo "" - echo "Query options:" - echo " Options related to the query" - echo "" - echo " --regex_pattern" - echo " type: string, required parameter" - echo " example: ^[mM][tT]-" - echo " Regex to use to match with the input column." -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM python:3.10-slim - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" - -LABEL org.opencontainers.image.authors="Dries Schaumont" -LABEL org.opencontainers.image.description="Companion container for running component metadata grep_annotation_column" -LABEL org.opencontainers.image.created="2024-01-25T10:13:58Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-grep_annotation_column-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "grep_annotation_column 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -i) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input_column) - [ -n "$VIASH_PAR_INPUT_COLUMN" ] && ViashError Bad arguments for option \'--input_column\': \'$VIASH_PAR_INPUT_COLUMN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT_COLUMN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_column. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input_column=*) - [ -n "$VIASH_PAR_INPUT_COLUMN" ] && ViashError Bad arguments for option \'--input_column=*\': \'$VIASH_PAR_INPUT_COLUMN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT_COLUMN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --modality) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality=*) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --matrix) - [ -n "$VIASH_PAR_MATRIX" ] && ViashError Bad arguments for option \'--matrix\': \'$VIASH_PAR_MATRIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MATRIX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --matrix. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --matrix=*) - [ -n "$VIASH_PAR_MATRIX" ] && ViashError Bad arguments for option \'--matrix=*\': \'$VIASH_PAR_MATRIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MATRIX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression=*) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output_match_column) - [ -n "$VIASH_PAR_OUTPUT_MATCH_COLUMN" ] && ViashError Bad arguments for option \'--output_match_column\': \'$VIASH_PAR_OUTPUT_MATCH_COLUMN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_MATCH_COLUMN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_match_column. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_match_column=*) - [ -n "$VIASH_PAR_OUTPUT_MATCH_COLUMN" ] && ViashError Bad arguments for option \'--output_match_column=*\': \'$VIASH_PAR_OUTPUT_MATCH_COLUMN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_MATCH_COLUMN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output_fraction_column) - [ -n "$VIASH_PAR_OUTPUT_FRACTION_COLUMN" ] && ViashError Bad arguments for option \'--output_fraction_column\': \'$VIASH_PAR_OUTPUT_FRACTION_COLUMN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_FRACTION_COLUMN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_fraction_column. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_fraction_column=*) - [ -n "$VIASH_PAR_OUTPUT_FRACTION_COLUMN" ] && ViashError Bad arguments for option \'--output_fraction_column=*\': \'$VIASH_PAR_OUTPUT_FRACTION_COLUMN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_FRACTION_COLUMN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --regex_pattern) - [ -n "$VIASH_PAR_REGEX_PATTERN" ] && ViashError Bad arguments for option \'--regex_pattern\': \'$VIASH_PAR_REGEX_PATTERN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REGEX_PATTERN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --regex_pattern. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --regex_pattern=*) - [ -n "$VIASH_PAR_REGEX_PATTERN" ] && ViashError Bad arguments for option \'--regex_pattern=*\': \'$VIASH_PAR_REGEX_PATTERN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REGEX_PATTERN=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/metadata_grep_annotation_column:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/metadata_grep_annotation_column:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/metadata_grep_annotation_column:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/metadata_grep_annotation_column:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_MODALITY+x} ]; then - ViashError '--modality' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT_MATCH_COLUMN+x} ]; then - ViashError '--output_match_column' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_REGEX_PATTERN+x} ]; then - ViashError '--regex_pattern' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_MATRIX" ]; then - VIASH_PAR_MATRIX_CHOICES=("var:obs") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_MATRIX_CHOICES[*]}:" =~ ":$VIASH_PAR_MATRIX:" ]]; then - ViashError '--matrix' specified value of \'$VIASH_PAR_MATRIX\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then - VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then - ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/metadata_grep_annotation_column:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/metadata_grep_annotation_column:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/metadata_grep_annotation_column:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-grep_annotation_column-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -import mudata as mu -from pathlib import Path -from operator import attrgetter -import re -import numpy as np - - -### VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'input_column': $( if [ ! -z ${VIASH_PAR_INPUT_COLUMN+x} ]; then echo "r'${VIASH_PAR_INPUT_COLUMN//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'matrix': $( if [ ! -z ${VIASH_PAR_MATRIX+x} ]; then echo "r'${VIASH_PAR_MATRIX//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_match_column': $( if [ ! -z ${VIASH_PAR_OUTPUT_MATCH_COLUMN+x} ]; then echo "r'${VIASH_PAR_OUTPUT_MATCH_COLUMN//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_fraction_column': $( if [ ! -z ${VIASH_PAR_OUTPUT_FRACTION_COLUMN+x} ]; then echo "r'${VIASH_PAR_OUTPUT_FRACTION_COLUMN//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'regex_pattern': $( if [ ! -z ${VIASH_PAR_REGEX_PATTERN+x} ]; then echo "r'${VIASH_PAR_REGEX_PATTERN//\'/\'\"\'\"r\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -### VIASH END - -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -def main(par): - input_file, output_file, mod_name = Path(par["input"]), Path(par["output"]), par['modality'] - try: - compiled_regex = re.compile(par["regex_pattern"]) - except (TypeError, re.error) as e: - raise ValueError(f"{par['regex_pattern']} is not a valid regular expression pattern.") from e - else: - if compiled_regex.groups: - raise NotImplementedError("Using match groups is not supported by this component.") - logger.info('Reading input file %s, modality %s.', input_file, mod_name) - - mudata = mu.read_h5mu(input_file) - modality_data = mudata[mod_name] - annotation_matrix = getattr(modality_data, par['matrix']) - default_column = { - "var": attrgetter("var_names"), - "obs": attrgetter("obs_names") - } - if par["input_column"]: - try: - annotation_column = annotation_matrix[par["input_column"]] - except KeyError as e: - raise ValueError(f"Column {par['input_column']} could not be found for modality " - f"{par['modality']}. Available columns: {','.join(annotation_matrix.columns.to_list())}") from e - else: - annotation_column = default_column[par['matrix']](modality_data) - grep_result = annotation_column.str.contains(par["regex_pattern"], regex=True) - - other_axis_attribute = { - "var": "obs", - "obs": "var" - } - if par['output_fraction_column']: - pct_matching = np.ravel(np.sum(modality_data[:, grep_result].X, axis=1) / np.sum(modality_data.X, axis=1)) - getattr(modality_data, other_axis_attribute[par['matrix']])[par['output_fraction_column']] = pct_matching - getattr(modality_data, par['matrix'])[par["output_match_column"]] = grep_result - mudata.write(output_file, compression=par["output_compression"]) - -if __name__ == "__main__": - main(par) -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/metadata/join_csv/.config.vsh.yaml b/target/docker/metadata/join_csv/.config.vsh.yaml deleted file mode 100644 index beac67bda0a..00000000000 --- a/target/docker/metadata/join_csv/.config.vsh.yaml +++ /dev/null @@ -1,229 +0,0 @@ -functionality: - name: "join_csv" - namespace: "metadata" - version: "0.12.3" - authors: - - name: "Dries Schaumont" - roles: - - "author" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - argument_groups: - - name: "MuData Input" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input h5mu file" - info: null - example: - - "input.h5mu" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_key" - description: "Obs column name where the sample id can be found for each observation\ - \ to join on.\nUseful when adding metadata to concatenated samples.\nMutually\ - \ exclusive with `--var_key`.\"\n" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--var_key" - description: "Var column name where the sample id can be found for each variable\ - \ to join on.\nMutually exclusive with `--obs_key`.\"\n" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "MuData Output" - arguments: - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output h5mu file." - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Metadata Input" - arguments: - - type: "file" - name: "--input_csv" - description: ".csv file containing metadata" - info: null - example: - - "metadata.csv" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--csv_key" - description: "column of the the csv that corresponds to the sample id." - info: null - default: - - "id" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Join a csv containing metadata to the .obs or .var field of a mudata\ - \ file." - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "singlecpu" - - "lowmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/metadata/join_csv/config.vsh.yml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/metadata/join_csv" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/metadata/join_csv/join_csv" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/metadata/join_csv/join_csv b/target/docker/metadata/join_csv/join_csv deleted file mode 100755 index 5fac93215fd..00000000000 --- a/target/docker/metadata/join_csv/join_csv +++ /dev/null @@ -1,1119 +0,0 @@ -#!/usr/bin/env bash - -# join_csv 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Dries Schaumont (author) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="join_csv" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "join_csv 0.12.3" - echo "" - echo "Join a csv containing metadata to the .obs or .var field of a mudata file." - echo "" - echo "MuData Input:" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " example: input.h5mu" - echo " Input h5mu file" - echo "" - echo " --modality" - echo " type: string" - echo " default: rna" - echo "" - echo " --obs_key" - echo " type: string" - echo " Obs column name where the sample id can be found for each observation to" - echo " join on." - echo " Useful when adding metadata to concatenated samples." - echo " Mutually exclusive with \`--var_key\`.\"" - echo "" - echo " --var_key" - echo " type: string" - echo " Var column name where the sample id can be found for each variable to" - echo " join on." - echo " Mutually exclusive with \`--obs_key\`.\"" - echo "" - echo "MuData Output:" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " example: output.h5mu" - echo " Output h5mu file." - echo "" - echo " --output_compression" - echo " type: string" - echo " example: gzip" - echo " choices: [ gzip, lzf ]" - echo " The compression format to be used on the output h5mu object." - echo "" - echo "Metadata Input:" - echo " --input_csv" - echo " type: file, required parameter, file must exist" - echo " example: metadata.csv" - echo " .csv file containing metadata" - echo "" - echo " --csv_key" - echo " type: string" - echo " default: id" - echo " column of the the csv that corresponds to the sample id." -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM python:3.10-slim - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" - -LABEL org.opencontainers.image.authors="Dries Schaumont" -LABEL org.opencontainers.image.description="Companion container for running component metadata join_csv" -LABEL org.opencontainers.image.created="2024-01-25T10:13:59Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-join_csv-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "join_csv 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -i) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality=*) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --obs_key) - [ -n "$VIASH_PAR_OBS_KEY" ] && ViashError Bad arguments for option \'--obs_key\': \'$VIASH_PAR_OBS_KEY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBS_KEY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_key. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obs_key=*) - [ -n "$VIASH_PAR_OBS_KEY" ] && ViashError Bad arguments for option \'--obs_key=*\': \'$VIASH_PAR_OBS_KEY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBS_KEY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --var_key) - [ -n "$VIASH_PAR_VAR_KEY" ] && ViashError Bad arguments for option \'--var_key\': \'$VIASH_PAR_VAR_KEY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_VAR_KEY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --var_key. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --var_key=*) - [ -n "$VIASH_PAR_VAR_KEY" ] && ViashError Bad arguments for option \'--var_key=*\': \'$VIASH_PAR_VAR_KEY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_VAR_KEY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression=*) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --input_csv) - [ -n "$VIASH_PAR_INPUT_CSV" ] && ViashError Bad arguments for option \'--input_csv\': \'$VIASH_PAR_INPUT_CSV\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT_CSV="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_csv. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input_csv=*) - [ -n "$VIASH_PAR_INPUT_CSV" ] && ViashError Bad arguments for option \'--input_csv=*\': \'$VIASH_PAR_INPUT_CSV\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT_CSV=$(ViashRemoveFlags "$1") - shift 1 - ;; - --csv_key) - [ -n "$VIASH_PAR_CSV_KEY" ] && ViashError Bad arguments for option \'--csv_key\': \'$VIASH_PAR_CSV_KEY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CSV_KEY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --csv_key. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --csv_key=*) - [ -n "$VIASH_PAR_CSV_KEY" ] && ViashError Bad arguments for option \'--csv_key=*\': \'$VIASH_PAR_CSV_KEY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CSV_KEY=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/metadata_join_csv:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/metadata_join_csv:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/metadata_join_csv:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/metadata_join_csv:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_INPUT_CSV+x} ]; then - ViashError '--input_csv' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_MODALITY+x} ]; then - VIASH_PAR_MODALITY="rna" -fi -if [ -z ${VIASH_PAR_CSV_KEY+x} ]; then - VIASH_PAR_CSV_KEY="id" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_INPUT_CSV" ] && [ ! -e "$VIASH_PAR_INPUT_CSV" ]; then - ViashError "Input file '$VIASH_PAR_INPUT_CSV' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then - VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then - ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_PAR_INPUT_CSV" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT_CSV")" ) - VIASH_PAR_INPUT_CSV=$(ViashAutodetectMount "$VIASH_PAR_INPUT_CSV") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/metadata_join_csv:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/metadata_join_csv:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/metadata_join_csv:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-join_csv-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -import sys -import pandas as pd -from mudata import read_h5mu - -### VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'obs_key': $( if [ ! -z ${VIASH_PAR_OBS_KEY+x} ]; then echo "r'${VIASH_PAR_OBS_KEY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'var_key': $( if [ ! -z ${VIASH_PAR_VAR_KEY+x} ]; then echo "r'${VIASH_PAR_VAR_KEY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'input_csv': $( if [ ! -z ${VIASH_PAR_INPUT_CSV+x} ]; then echo "r'${VIASH_PAR_INPUT_CSV//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'csv_key': $( if [ ! -z ${VIASH_PAR_CSV_KEY+x} ]; then echo "r'${VIASH_PAR_CSV_KEY//\'/\'\"\'\"r\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -### VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -if par["obs_key"] and par["var_key"]: - raise ValueError("--obs_key can not be used in conjuction with --var_key.") -if not (par["obs_key"] or par["var_key"]): - raise ValueError("Must define either --obs_key or --var_key") - -logger.info("Read metadata csv from file") -metadata = pd.read_csv(par['input_csv'], sep=",", header=0, index_col=par["csv_key"]) -metadata.fillna('', inplace=True) - -logger.info("Read mudata from file") -mdata = read_h5mu(par['input']) -mod_data = mdata.mod[par['modality']] - -logger.info("Joining csv to mudata") -matrix = 'var' if par["var_key"] else 'obs' -matrix_sample_column_name = par["var_key"] if par["var_key"] else par["obs_key"] -original_matrix = getattr(mod_data, matrix) -sample_ids = original_matrix[matrix_sample_column_name] - -try: - new_columns = metadata.loc[sample_ids.tolist()] -except KeyError as e: - raise KeyError(f"Not all sample IDs selected from {matrix} " - "(using the column selected with --var_key or --obs_key) were found in " - "the csv file.") from e -new_matrix = pd.concat([original_matrix.reset_index(drop=True), - new_columns.reset_index(drop=True)], axis=1)\\ - .set_axis(original_matrix.index) -setattr(mod_data, matrix, new_matrix) - -logger.info("Write output to mudata file") -mdata.write_h5mu(par['output'], compression=par["output_compression"]) -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_PAR_INPUT_CSV" ]; then - VIASH_PAR_INPUT_CSV=$(ViashStripAutomount "$VIASH_PAR_INPUT_CSV") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/metadata/join_csv/setup_logger.py b/target/docker/metadata/join_csv/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/docker/metadata/join_csv/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/docker/metadata/join_uns_to_obs/.config.vsh.yaml b/target/docker/metadata/join_uns_to_obs/.config.vsh.yaml deleted file mode 100644 index 6efbf4fd030..00000000000 --- a/target/docker/metadata/join_uns_to_obs/.config.vsh.yaml +++ /dev/null @@ -1,171 +0,0 @@ -functionality: - name: "join_uns_to_obs" - namespace: "metadata" - version: "0.12.3" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input h5mu file" - info: null - example: - - "input.h5mu" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--uns_key" - info: null - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output h5mu file." - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Join a data frame of length 1 (1 row index value) in .uns containing\ - \ metadata to the .obs of a mudata file." - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "singlecpu" - - "lowmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/metadata/join_uns_to_obs/config.vsh.yml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/metadata/join_uns_to_obs" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/metadata/join_uns_to_obs/join_uns_to_obs" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/metadata/join_uns_to_obs/join_uns_to_obs b/target/docker/metadata/join_uns_to_obs/join_uns_to_obs deleted file mode 100755 index e59ad3a0701..00000000000 --- a/target/docker/metadata/join_uns_to_obs/join_uns_to_obs +++ /dev/null @@ -1,1035 +0,0 @@ -#!/usr/bin/env bash - -# join_uns_to_obs 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="join_uns_to_obs" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "join_uns_to_obs 0.12.3" - echo "" - echo "Join a data frame of length 1 (1 row index value) in .uns containing metadata to" - echo "the .obs of a mudata file." - echo "" - echo "Arguments:" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " example: input.h5mu" - echo " Input h5mu file" - echo "" - echo " --modality" - echo " type: string" - echo " default: rna" - echo "" - echo " --uns_key" - echo " type: string, required parameter" - echo "" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " example: output.h5mu" - echo " Output h5mu file." - echo "" - echo " --output_compression" - echo " type: string" - echo " example: gzip" - echo " choices: [ gzip, lzf ]" - echo " The compression format to be used on the output h5mu object." -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM python:3.10-slim - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" - -LABEL org.opencontainers.image.description="Companion container for running component metadata join_uns_to_obs" -LABEL org.opencontainers.image.created="2024-01-25T10:13:59Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-join_uns_to_obs-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "join_uns_to_obs 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -i) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality=*) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --uns_key) - [ -n "$VIASH_PAR_UNS_KEY" ] && ViashError Bad arguments for option \'--uns_key\': \'$VIASH_PAR_UNS_KEY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_UNS_KEY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --uns_key. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --uns_key=*) - [ -n "$VIASH_PAR_UNS_KEY" ] && ViashError Bad arguments for option \'--uns_key=*\': \'$VIASH_PAR_UNS_KEY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_UNS_KEY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression=*) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/metadata_join_uns_to_obs:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/metadata_join_uns_to_obs:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/metadata_join_uns_to_obs:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/metadata_join_uns_to_obs:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_UNS_KEY+x} ]; then - ViashError '--uns_key' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_MODALITY+x} ]; then - VIASH_PAR_MODALITY="rna" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then - VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then - ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/metadata_join_uns_to_obs:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/metadata_join_uns_to_obs:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/metadata_join_uns_to_obs:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-join_uns_to_obs-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -import sys -import pandas as pd -from mudata import read_h5mu - -### VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'uns_key': $( if [ ! -z ${VIASH_PAR_UNS_KEY+x} ]; then echo "r'${VIASH_PAR_UNS_KEY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -### VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -logger.info("Read mudata from file") -mdata = read_h5mu(par['input']) -mod_data = mdata.mod[par['modality']] - -logger.info("Joining uns to obs") -# get data frame -uns_df = mod_data.uns[par['uns_key']] - -# check for overlapping colnames -intersect_keys = uns_df.keys().intersection(mod_data.obs.keys()) -obs_drop = mod_data.obs.drop(intersect_keys, axis=1) - -# create data frame to join -uns_df_rep = uns_df.loc[uns_df.index.repeat(mod_data.n_obs)] -uns_df_rep.index = mod_data.obs_names - -# create new obs -mod_data.obs = pd.concat([obs_drop, uns_df_rep], axis=1) - -logger.info("Write output to mudata file") -mdata.write_h5mu(par['output'], compression=par["output_compression"]) - - -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/metadata/join_uns_to_obs/setup_logger.py b/target/docker/metadata/join_uns_to_obs/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/docker/metadata/join_uns_to_obs/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/docker/metadata/move_obsm_to_obs/.config.vsh.yaml b/target/docker/metadata/move_obsm_to_obs/.config.vsh.yaml deleted file mode 100644 index 121582d4551..00000000000 --- a/target/docker/metadata/move_obsm_to_obs/.config.vsh.yaml +++ /dev/null @@ -1,192 +0,0 @@ -functionality: - name: "move_obsm_to_obs" - namespace: "metadata" - version: "0.12.3" - authors: - - name: "Dries Schaumont" - roles: - - "author" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - argument_groups: - - name: "MuData Input" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input h5mu file" - info: null - example: - - "input.h5mu" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obsm_key" - description: "Key of a data structure to move from `.obsm` to `.obs`." - info: null - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "MuData Output" - arguments: - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output h5mu file." - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Move a matrix from .obsm to .obs. Newly created columns in .obs will\ - \ \nbe created from the .obsm key suffixed with an underscore and the name of\ - \ the columns\nof the specified .obsm matrix.\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "singlecpu" - - "lowmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/metadata/move_obsm_to_obs/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/metadata/move_obsm_to_obs" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/metadata/move_obsm_to_obs/move_obsm_to_obs" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/metadata/move_obsm_to_obs/move_obsm_to_obs b/target/docker/metadata/move_obsm_to_obs/move_obsm_to_obs deleted file mode 100755 index 9a91bbbad0d..00000000000 --- a/target/docker/metadata/move_obsm_to_obs/move_obsm_to_obs +++ /dev/null @@ -1,1054 +0,0 @@ -#!/usr/bin/env bash - -# move_obsm_to_obs 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Dries Schaumont (author) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="move_obsm_to_obs" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "move_obsm_to_obs 0.12.3" - echo "" - echo "Move a matrix from .obsm to .obs. Newly created columns in .obs will" - echo "be created from the .obsm key suffixed with an underscore and the name of the" - echo "columns" - echo "of the specified .obsm matrix." - echo "" - echo "MuData Input:" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " example: input.h5mu" - echo " Input h5mu file" - echo "" - echo " --modality" - echo " type: string" - echo " default: rna" - echo "" - echo " --obsm_key" - echo " type: string, required parameter" - echo " Key of a data structure to move from \`.obsm\` to \`.obs\`." - echo "" - echo "MuData Output:" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " example: output.h5mu" - echo " Output h5mu file." - echo "" - echo " --output_compression" - echo " type: string" - echo " example: gzip" - echo " choices: [ gzip, lzf ]" - echo " The compression format to be used on the output h5mu object." -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM python:3.10-slim - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" - -LABEL org.opencontainers.image.authors="Dries Schaumont" -LABEL org.opencontainers.image.description="Companion container for running component metadata move_obsm_to_obs" -LABEL org.opencontainers.image.created="2024-01-25T10:13:59Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-move_obsm_to_obs-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "move_obsm_to_obs 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -i) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality=*) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --obsm_key) - [ -n "$VIASH_PAR_OBSM_KEY" ] && ViashError Bad arguments for option \'--obsm_key\': \'$VIASH_PAR_OBSM_KEY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBSM_KEY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obsm_key. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obsm_key=*) - [ -n "$VIASH_PAR_OBSM_KEY" ] && ViashError Bad arguments for option \'--obsm_key=*\': \'$VIASH_PAR_OBSM_KEY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBSM_KEY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression=*) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/metadata_move_obsm_to_obs:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/metadata_move_obsm_to_obs:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/metadata_move_obsm_to_obs:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/metadata_move_obsm_to_obs:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OBSM_KEY+x} ]; then - ViashError '--obsm_key' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_MODALITY+x} ]; then - VIASH_PAR_MODALITY="rna" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then - VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then - ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/metadata_move_obsm_to_obs:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/metadata_move_obsm_to_obs:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/metadata_move_obsm_to_obs:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-move_obsm_to_obs-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -import sys -from functools import partial -from pandas.errors import MergeError -from mudata import read_h5mu - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'obsm_key': $( if [ ! -z ${VIASH_PAR_OBSM_KEY+x} ]; then echo "r'${VIASH_PAR_OBSM_KEY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -## VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -logger.info("Read mudata from file") -mdata = read_h5mu(par['input']) -try: - mod_data = mdata.mod[par['modality']] -except KeyError: - raise ValueError(f"Modality {par['modality']} does not exist.") - -logger.info("Moving .obm key %s", par["obsm_key"]) -try: - obsm_matrix = mod_data.obsm[par["obsm_key"]].copy() -except KeyError: - raise ValueError(f".obsm key {par['obsm_key']} was not found in " - f".obsm slot for modality {par['modality']}.") - - -obsm_matrix.rename(partial("{key}_{}".format, key=par["obsm_key"]), - axis="columns", copy=False, inplace=True) - -original_n_obs = len(mod_data.obs) -try: - logger.info(f".obs names: {mod_data.obs_names}") - logger.info(f".obsm index: {obsm_matrix.index}") - mod_data.obs = mod_data.obs.merge(obsm_matrix, how="left", - validate="one_to_one", - left_index=True, right_index=True) -except MergeError as e: - raise ValueError(f"Could not join .obsm matrix at {par['obsm_key']} to .obs because there " - "are some observation that are not overlapping between the two matrices " - "(indexes should overlap). This is either a bug or your mudata file is corrupt.") -del mod_data.obsm[par["obsm_key"]] - -logger.info("Write output to mudata file") -mdata.write_h5mu(par['output'], compression=par["output_compression"]) -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/metadata/move_obsm_to_obs/setup_logger.py b/target/docker/metadata/move_obsm_to_obs/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/docker/metadata/move_obsm_to_obs/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/docker/neighbors/bbknn/.config.vsh.yaml b/target/docker/neighbors/bbknn/.config.vsh.yaml deleted file mode 100644 index 87dff641d9b..00000000000 --- a/target/docker/neighbors/bbknn/.config.vsh.yaml +++ /dev/null @@ -1,289 +0,0 @@ -functionality: - name: "bbknn" - namespace: "neighbors" - version: "0.12.3" - authors: - - name: "Dries De Maeyer" - roles: - - "author" - info: - role: "Core Team Member" - links: - email: "ddemaeyer@gmail.com" - github: "ddemaeyer" - linkedin: "dries-de-maeyer-b46a814" - organizations: - - name: "Janssen Pharmaceuticals" - href: "https://www.janssen.com" - role: "Principal Scientist" - - name: "Dries Schaumont" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input h5mu file" - info: null - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obsm_input" - description: "The dimensionality reduction in `.obsm` to use for neighbour detection.\ - \ Defaults to X_pca." - info: null - default: - - "X_pca" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_batch" - description: ".obs column name discriminating between your batches." - info: null - default: - - "batch" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output .h5mu file." - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--uns_output" - description: "Mandatory .uns slot to store various neighbor output objects." - info: null - default: - - "neighbors" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obsp_distances" - description: "In which .obsp slot to store the distance matrix between the resulting\ - \ neighbors." - info: null - default: - - "distances" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obsp_connectivities" - description: "In which .obsp slot to store the connectivities matrix between the\ - \ resulting neighbors." - info: null - default: - - "connectivities" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--n_neighbors_within_batch" - description: "How many top neighbours to report for each batch; total number of\ - \ neighbours in the initial k-nearest-neighbours computation will be this number\ - \ times the number of batches." - info: null - default: - - 3 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--n_pcs" - description: "How many dimensions (in case of PCA, principal components) to use\ - \ in the analysis." - info: null - default: - - 50 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--n_trim" - description: "Trim the neighbours of each cell to these many top connectivities.\ - \ May help with population independence and improve the tidiness of clustering.\ - \ The lower the value the more independent the individual populations, at the\ - \ cost of more conserved batch effect. If `None` (default), sets the parameter\ - \ value automatically to 10 times `neighbors_within_batch` times the number\ - \ of batches. Set to 0 to skip." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - description: "BBKNN network generation\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - - "build-essential" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "scanpy~=1.9.5" - - "bbknn" - - "scikit-learn~=1.2.2" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "lowcpu" - - "highmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/neighbors/bbknn/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/neighbors/bbknn" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/neighbors/bbknn/bbknn" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/neighbors/bbknn/bbknn b/target/docker/neighbors/bbknn/bbknn deleted file mode 100755 index 12cada2b7c8..00000000000 --- a/target/docker/neighbors/bbknn/bbknn +++ /dev/null @@ -1,1184 +0,0 @@ -#!/usr/bin/env bash - -# bbknn 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Dries De Maeyer (author) -# * Dries Schaumont (maintainer) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="bbknn" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "bbknn 0.12.3" - echo "" - echo "BBKNN network generation" - echo "" - echo "Arguments:" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " Input h5mu file" - echo "" - echo " --modality" - echo " type: string" - echo " default: rna" - echo "" - echo " --obsm_input" - echo " type: string" - echo " default: X_pca" - echo " The dimensionality reduction in \`.obsm\` to use for neighbour detection." - echo " Defaults to X_pca." - echo "" - echo " --obs_batch" - echo " type: string" - echo " default: batch" - echo " .obs column name discriminating between your batches." - echo "" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " example: output.h5mu" - echo " Output .h5mu file." - echo "" - echo " --output_compression" - echo " type: string" - echo " example: gzip" - echo " choices: [ gzip, lzf ]" - echo " The compression format to be used on the output h5mu object." - echo "" - echo " --uns_output" - echo " type: string" - echo " default: neighbors" - echo " Mandatory .uns slot to store various neighbor output objects." - echo "" - echo " --obsp_distances" - echo " type: string" - echo " default: distances" - echo " In which .obsp slot to store the distance matrix between the resulting" - echo " neighbors." - echo "" - echo " --obsp_connectivities" - echo " type: string" - echo " default: connectivities" - echo " In which .obsp slot to store the connectivities matrix between the" - echo " resulting neighbors." - echo "" - echo " --n_neighbors_within_batch" - echo " type: integer" - echo " default: 3" - echo " How many top neighbours to report for each batch; total number of" - echo " neighbours in the initial k-nearest-neighbours computation will be this" - echo " number times the number of batches." - echo "" - echo " --n_pcs" - echo " type: integer" - echo " default: 50" - echo " How many dimensions (in case of PCA, principal components) to use in the" - echo " analysis." - echo "" - echo " --n_trim" - echo " type: integer" - echo " Trim the neighbours of each cell to these many top connectivities. May" - echo " help with population independence and improve the tidiness of" - echo " clustering. The lower the value the more independent the individual" - echo " populations, at the cost of more conserved batch effect. If \`None\`" - echo " (default), sets the parameter value automatically to 10 times" - echo " \`neighbors_within_batch\` times the number of batches. Set to 0 to skip." -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM python:3.10-slim - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y procps build-essential && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "scanpy~=1.9.5" "bbknn" "scikit-learn~=1.2.2" - -LABEL org.opencontainers.image.authors="Dries De Maeyer, Dries Schaumont" -LABEL org.opencontainers.image.description="Companion container for running component neighbors bbknn" -LABEL org.opencontainers.image.created="2024-01-25T10:14:00Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-bbknn-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "bbknn 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -i) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality=*) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --obsm_input) - [ -n "$VIASH_PAR_OBSM_INPUT" ] && ViashError Bad arguments for option \'--obsm_input\': \'$VIASH_PAR_OBSM_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBSM_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obsm_input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obsm_input=*) - [ -n "$VIASH_PAR_OBSM_INPUT" ] && ViashError Bad arguments for option \'--obsm_input=*\': \'$VIASH_PAR_OBSM_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBSM_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --obs_batch) - [ -n "$VIASH_PAR_OBS_BATCH" ] && ViashError Bad arguments for option \'--obs_batch\': \'$VIASH_PAR_OBS_BATCH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBS_BATCH="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_batch. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obs_batch=*) - [ -n "$VIASH_PAR_OBS_BATCH" ] && ViashError Bad arguments for option \'--obs_batch=*\': \'$VIASH_PAR_OBS_BATCH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBS_BATCH=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression=*) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --uns_output) - [ -n "$VIASH_PAR_UNS_OUTPUT" ] && ViashError Bad arguments for option \'--uns_output\': \'$VIASH_PAR_UNS_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_UNS_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --uns_output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --uns_output=*) - [ -n "$VIASH_PAR_UNS_OUTPUT" ] && ViashError Bad arguments for option \'--uns_output=*\': \'$VIASH_PAR_UNS_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_UNS_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --obsp_distances) - [ -n "$VIASH_PAR_OBSP_DISTANCES" ] && ViashError Bad arguments for option \'--obsp_distances\': \'$VIASH_PAR_OBSP_DISTANCES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBSP_DISTANCES="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obsp_distances. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obsp_distances=*) - [ -n "$VIASH_PAR_OBSP_DISTANCES" ] && ViashError Bad arguments for option \'--obsp_distances=*\': \'$VIASH_PAR_OBSP_DISTANCES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBSP_DISTANCES=$(ViashRemoveFlags "$1") - shift 1 - ;; - --obsp_connectivities) - [ -n "$VIASH_PAR_OBSP_CONNECTIVITIES" ] && ViashError Bad arguments for option \'--obsp_connectivities\': \'$VIASH_PAR_OBSP_CONNECTIVITIES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBSP_CONNECTIVITIES="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obsp_connectivities. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obsp_connectivities=*) - [ -n "$VIASH_PAR_OBSP_CONNECTIVITIES" ] && ViashError Bad arguments for option \'--obsp_connectivities=*\': \'$VIASH_PAR_OBSP_CONNECTIVITIES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBSP_CONNECTIVITIES=$(ViashRemoveFlags "$1") - shift 1 - ;; - --n_neighbors_within_batch) - [ -n "$VIASH_PAR_N_NEIGHBORS_WITHIN_BATCH" ] && ViashError Bad arguments for option \'--n_neighbors_within_batch\': \'$VIASH_PAR_N_NEIGHBORS_WITHIN_BATCH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_N_NEIGHBORS_WITHIN_BATCH="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --n_neighbors_within_batch. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --n_neighbors_within_batch=*) - [ -n "$VIASH_PAR_N_NEIGHBORS_WITHIN_BATCH" ] && ViashError Bad arguments for option \'--n_neighbors_within_batch=*\': \'$VIASH_PAR_N_NEIGHBORS_WITHIN_BATCH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_N_NEIGHBORS_WITHIN_BATCH=$(ViashRemoveFlags "$1") - shift 1 - ;; - --n_pcs) - [ -n "$VIASH_PAR_N_PCS" ] && ViashError Bad arguments for option \'--n_pcs\': \'$VIASH_PAR_N_PCS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_N_PCS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --n_pcs. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --n_pcs=*) - [ -n "$VIASH_PAR_N_PCS" ] && ViashError Bad arguments for option \'--n_pcs=*\': \'$VIASH_PAR_N_PCS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_N_PCS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --n_trim) - [ -n "$VIASH_PAR_N_TRIM" ] && ViashError Bad arguments for option \'--n_trim\': \'$VIASH_PAR_N_TRIM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_N_TRIM="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --n_trim. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --n_trim=*) - [ -n "$VIASH_PAR_N_TRIM" ] && ViashError Bad arguments for option \'--n_trim=*\': \'$VIASH_PAR_N_TRIM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_N_TRIM=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/neighbors_bbknn:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/neighbors_bbknn:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/neighbors_bbknn:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/neighbors_bbknn:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_MODALITY+x} ]; then - VIASH_PAR_MODALITY="rna" -fi -if [ -z ${VIASH_PAR_OBSM_INPUT+x} ]; then - VIASH_PAR_OBSM_INPUT="X_pca" -fi -if [ -z ${VIASH_PAR_OBS_BATCH+x} ]; then - VIASH_PAR_OBS_BATCH="batch" -fi -if [ -z ${VIASH_PAR_UNS_OUTPUT+x} ]; then - VIASH_PAR_UNS_OUTPUT="neighbors" -fi -if [ -z ${VIASH_PAR_OBSP_DISTANCES+x} ]; then - VIASH_PAR_OBSP_DISTANCES="distances" -fi -if [ -z ${VIASH_PAR_OBSP_CONNECTIVITIES+x} ]; then - VIASH_PAR_OBSP_CONNECTIVITIES="connectivities" -fi -if [ -z ${VIASH_PAR_N_NEIGHBORS_WITHIN_BATCH+x} ]; then - VIASH_PAR_N_NEIGHBORS_WITHIN_BATCH="3" -fi -if [ -z ${VIASH_PAR_N_PCS+x} ]; then - VIASH_PAR_N_PCS="50" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_N_NEIGHBORS_WITHIN_BATCH" ]]; then - if ! [[ "$VIASH_PAR_N_NEIGHBORS_WITHIN_BATCH" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--n_neighbors_within_batch' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_N_PCS" ]]; then - if ! [[ "$VIASH_PAR_N_PCS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--n_pcs' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_N_TRIM" ]]; then - if ! [[ "$VIASH_PAR_N_TRIM" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--n_trim' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then - VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then - ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/neighbors_bbknn:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/neighbors_bbknn:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/neighbors_bbknn:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-bbknn-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -from mudata import read_h5mu -import bbknn - -### VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'obsm_input': $( if [ ! -z ${VIASH_PAR_OBSM_INPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'obs_batch': $( if [ ! -z ${VIASH_PAR_OBS_BATCH+x} ]; then echo "r'${VIASH_PAR_OBS_BATCH//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'uns_output': $( if [ ! -z ${VIASH_PAR_UNS_OUTPUT+x} ]; then echo "r'${VIASH_PAR_UNS_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'obsp_distances': $( if [ ! -z ${VIASH_PAR_OBSP_DISTANCES+x} ]; then echo "r'${VIASH_PAR_OBSP_DISTANCES//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'obsp_connectivities': $( if [ ! -z ${VIASH_PAR_OBSP_CONNECTIVITIES+x} ]; then echo "r'${VIASH_PAR_OBSP_CONNECTIVITIES//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'n_neighbors_within_batch': $( if [ ! -z ${VIASH_PAR_N_NEIGHBORS_WITHIN_BATCH+x} ]; then echo "int(r'${VIASH_PAR_N_NEIGHBORS_WITHIN_BATCH//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'n_pcs': $( if [ ! -z ${VIASH_PAR_N_PCS+x} ]; then echo "int(r'${VIASH_PAR_N_PCS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'n_trim': $( if [ ! -z ${VIASH_PAR_N_TRIM+x} ]; then echo "int(r'${VIASH_PAR_N_TRIM//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -### VIASH END - -mudata = read_h5mu(par["input"]) -adata = mudata.mod[par["modality"]] - -# copy data -tmp_adata = adata.copy() -bbknn.bbknn( - tmp_adata, - use_rep=par["obsm_input"], - batch_key = par["obs_batch"], - neighbors_within_batch=par["n_neighbors_within_batch"], - n_pcs=par["n_pcs"], - trim=par["n_trim"] -) - -# store output -adata.obsp[par["obsp_connectivities"]] = tmp_adata.obsp["connectivities"] -adata.obsp[par["obsp_distances"]] = tmp_adata.obsp["distances"] -adata.uns[par["uns_output"]] = tmp_adata.uns["neighbors"] -adata.uns[par["uns_output"]]["distances_key"] = par["obsp_distances"] -adata.uns[par["uns_output"]]["connectivities_key"] = par["obsp_connectivities"] - -# write to file -mudata.write_h5mu(par["output"], compression=par["output_compression"]) -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/neighbors/find_neighbors/.config.vsh.yaml b/target/docker/neighbors/find_neighbors/.config.vsh.yaml deleted file mode 100644 index 4aaa7d85d25..00000000000 --- a/target/docker/neighbors/find_neighbors/.config.vsh.yaml +++ /dev/null @@ -1,309 +0,0 @@ -functionality: - name: "find_neighbors" - namespace: "neighbors" - version: "0.12.3" - authors: - - name: "Dries De Maeyer" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "ddemaeyer@gmail.com" - github: "ddemaeyer" - linkedin: "dries-de-maeyer-b46a814" - organizations: - - name: "Janssen Pharmaceuticals" - href: "https://www.janssen.com" - role: "Principal Scientist" - - name: "Robrecht Cannoodt" - roles: - - "contributor" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input h5mu file" - info: null - example: - - "input.h5mu" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obsm_input" - description: "Which .obsm slot to use as a starting PCA embedding." - info: null - default: - - "X_pca" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output h5mu file containing the found neighbors." - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--uns_output" - description: "Mandatory .uns slot to store various neighbor output objects." - info: null - default: - - "neighbors" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obsp_distances" - description: "In which .obsp slot to store the distance matrix between the resulting\ - \ neighbors." - info: null - default: - - "distances" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obsp_connectivities" - description: "In which .obsp slot to store the connectivities matrix between the\ - \ resulting neighbors." - info: null - default: - - "connectivities" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--metric" - description: "The distance metric to be used in the generation of the nearest\ - \ neighborhood network." - info: null - default: - - "euclidean" - required: false - choices: - - "cityblock" - - "cosine" - - "euclidean" - - "l1" - - "l2" - - "manhattan" - - "braycurtis" - - "canberra" - - "chebyshev" - - "correlation" - - "dice" - - "hamming" - - "jaccard" - - "kulsinski" - - "mahalanobis" - - "minkowski" - - "rogerstanimoto" - - "russellrao" - - "seuclidean" - - "sokalmichener" - - "sokalsneath" - - "sqeuclidean" - - "yule" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--num_neighbors" - description: "The size of local neighborhood (in terms of number of neighboring\ - \ data points) used for manifold approximation. Larger values result in more\ - \ global views of the manifold, while smaller values result in more local data\ - \ being preserved. In general values should be in the range 2 to 100. If knn\ - \ is True, number of nearest neighbors to be searched. If knn is False, a Gaussian\ - \ kernel width is set to the distance of the n_neighbors neighbor." - info: null - default: - - 15 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--seed" - description: "A random seed." - info: null - default: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Compute a neighborhood graph of observations [McInnes18].\n\nThe neighbor\ - \ search efficiency of this heavily relies on UMAP [McInnes18], which also provides\ - \ a method for estimating connectivities of data points - the connectivity of\ - \ the manifold (method=='umap'). If method=='gauss', connectivities are computed\ - \ according to [Coifman05], in the adaption of [Haghverdi16].\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "scanpy~=1.9.5" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "lowcpu" - - "midmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/neighbors/find_neighbors/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/neighbors/find_neighbors" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/neighbors/find_neighbors/find_neighbors" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/neighbors/find_neighbors/find_neighbors b/target/docker/neighbors/find_neighbors/find_neighbors deleted file mode 100755 index 110a9f5995e..00000000000 --- a/target/docker/neighbors/find_neighbors/find_neighbors +++ /dev/null @@ -1,1208 +0,0 @@ -#!/usr/bin/env bash - -# find_neighbors 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Dries De Maeyer (maintainer) -# * Robrecht Cannoodt (contributor) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="find_neighbors" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "find_neighbors 0.12.3" - echo "" - echo "Compute a neighborhood graph of observations [McInnes18]." - echo "" - echo "The neighbor search efficiency of this heavily relies on UMAP [McInnes18], which" - echo "also provides a method for estimating connectivities of data points - the" - echo "connectivity of the manifold (method=='umap'). If method=='gauss'," - echo "connectivities are computed according to [Coifman05], in the adaption of" - echo "[Haghverdi16]." - echo "" - echo "Arguments:" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " example: input.h5mu" - echo " Input h5mu file" - echo "" - echo " --modality" - echo " type: string" - echo " default: rna" - echo "" - echo " --obsm_input" - echo " type: string" - echo " default: X_pca" - echo " Which .obsm slot to use as a starting PCA embedding." - echo "" - echo " -o, --output" - echo " type: file, output, file must exist" - echo " example: output.h5mu" - echo " Output h5mu file containing the found neighbors." - echo "" - echo " --output_compression" - echo " type: string" - echo " example: gzip" - echo " choices: [ gzip, lzf ]" - echo " The compression format to be used on the output h5mu object." - echo "" - echo " --uns_output" - echo " type: string" - echo " default: neighbors" - echo " Mandatory .uns slot to store various neighbor output objects." - echo "" - echo " --obsp_distances" - echo " type: string" - echo " default: distances" - echo " In which .obsp slot to store the distance matrix between the resulting" - echo " neighbors." - echo "" - echo " --obsp_connectivities" - echo " type: string" - echo " default: connectivities" - echo " In which .obsp slot to store the connectivities matrix between the" - echo " resulting neighbors." - echo "" - echo " --metric" - echo " type: string" - echo " default: euclidean" - echo " choices: [ cityblock, cosine, euclidean, l1, l2, manhattan, braycurtis," - echo "canberra, chebyshev, correlation, dice, hamming, jaccard, kulsinski," - echo "mahalanobis, minkowski, rogerstanimoto, russellrao, seuclidean, sokalmichener," - echo "sokalsneath, sqeuclidean, yule ]" - echo " The distance metric to be used in the generation of the nearest" - echo " neighborhood network." - echo "" - echo " --num_neighbors" - echo " type: integer" - echo " default: 15" - echo " The size of local neighborhood (in terms of number of neighboring data" - echo " points) used for manifold approximation. Larger values result in more" - echo " global views of the manifold, while smaller values result in more local" - echo " data being preserved. In general values should be in the range 2 to 100." - echo " If knn is True, number of nearest neighbors to be searched. If knn is" - echo " False, a Gaussian kernel width is set to the distance of the n_neighbors" - echo " neighbor." - echo "" - echo " --seed" - echo " type: integer" - echo " default: 0" - echo " A random seed." -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM python:3.10-slim - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "scanpy~=1.9.5" - -LABEL org.opencontainers.image.authors="Dries De Maeyer, Robrecht Cannoodt" -LABEL org.opencontainers.image.description="Companion container for running component neighbors find_neighbors" -LABEL org.opencontainers.image.created="2024-01-25T10:14:00Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-find_neighbors-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "find_neighbors 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -i) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality=*) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --obsm_input) - [ -n "$VIASH_PAR_OBSM_INPUT" ] && ViashError Bad arguments for option \'--obsm_input\': \'$VIASH_PAR_OBSM_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBSM_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obsm_input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obsm_input=*) - [ -n "$VIASH_PAR_OBSM_INPUT" ] && ViashError Bad arguments for option \'--obsm_input=*\': \'$VIASH_PAR_OBSM_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBSM_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression=*) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --uns_output) - [ -n "$VIASH_PAR_UNS_OUTPUT" ] && ViashError Bad arguments for option \'--uns_output\': \'$VIASH_PAR_UNS_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_UNS_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --uns_output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --uns_output=*) - [ -n "$VIASH_PAR_UNS_OUTPUT" ] && ViashError Bad arguments for option \'--uns_output=*\': \'$VIASH_PAR_UNS_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_UNS_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --obsp_distances) - [ -n "$VIASH_PAR_OBSP_DISTANCES" ] && ViashError Bad arguments for option \'--obsp_distances\': \'$VIASH_PAR_OBSP_DISTANCES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBSP_DISTANCES="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obsp_distances. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obsp_distances=*) - [ -n "$VIASH_PAR_OBSP_DISTANCES" ] && ViashError Bad arguments for option \'--obsp_distances=*\': \'$VIASH_PAR_OBSP_DISTANCES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBSP_DISTANCES=$(ViashRemoveFlags "$1") - shift 1 - ;; - --obsp_connectivities) - [ -n "$VIASH_PAR_OBSP_CONNECTIVITIES" ] && ViashError Bad arguments for option \'--obsp_connectivities\': \'$VIASH_PAR_OBSP_CONNECTIVITIES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBSP_CONNECTIVITIES="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obsp_connectivities. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obsp_connectivities=*) - [ -n "$VIASH_PAR_OBSP_CONNECTIVITIES" ] && ViashError Bad arguments for option \'--obsp_connectivities=*\': \'$VIASH_PAR_OBSP_CONNECTIVITIES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBSP_CONNECTIVITIES=$(ViashRemoveFlags "$1") - shift 1 - ;; - --metric) - [ -n "$VIASH_PAR_METRIC" ] && ViashError Bad arguments for option \'--metric\': \'$VIASH_PAR_METRIC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_METRIC="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --metric. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --metric=*) - [ -n "$VIASH_PAR_METRIC" ] && ViashError Bad arguments for option \'--metric=*\': \'$VIASH_PAR_METRIC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_METRIC=$(ViashRemoveFlags "$1") - shift 1 - ;; - --num_neighbors) - [ -n "$VIASH_PAR_NUM_NEIGHBORS" ] && ViashError Bad arguments for option \'--num_neighbors\': \'$VIASH_PAR_NUM_NEIGHBORS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_NUM_NEIGHBORS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --num_neighbors. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --num_neighbors=*) - [ -n "$VIASH_PAR_NUM_NEIGHBORS" ] && ViashError Bad arguments for option \'--num_neighbors=*\': \'$VIASH_PAR_NUM_NEIGHBORS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_NUM_NEIGHBORS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --seed) - [ -n "$VIASH_PAR_SEED" ] && ViashError Bad arguments for option \'--seed\': \'$VIASH_PAR_SEED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEED="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --seed. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --seed=*) - [ -n "$VIASH_PAR_SEED" ] && ViashError Bad arguments for option \'--seed=*\': \'$VIASH_PAR_SEED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEED=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/neighbors_find_neighbors:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/neighbors_find_neighbors:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/neighbors_find_neighbors:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/neighbors_find_neighbors:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_MODALITY+x} ]; then - VIASH_PAR_MODALITY="rna" -fi -if [ -z ${VIASH_PAR_OBSM_INPUT+x} ]; then - VIASH_PAR_OBSM_INPUT="X_pca" -fi -if [ -z ${VIASH_PAR_UNS_OUTPUT+x} ]; then - VIASH_PAR_UNS_OUTPUT="neighbors" -fi -if [ -z ${VIASH_PAR_OBSP_DISTANCES+x} ]; then - VIASH_PAR_OBSP_DISTANCES="distances" -fi -if [ -z ${VIASH_PAR_OBSP_CONNECTIVITIES+x} ]; then - VIASH_PAR_OBSP_CONNECTIVITIES="connectivities" -fi -if [ -z ${VIASH_PAR_METRIC+x} ]; then - VIASH_PAR_METRIC="euclidean" -fi -if [ -z ${VIASH_PAR_NUM_NEIGHBORS+x} ]; then - VIASH_PAR_NUM_NEIGHBORS="15" -fi -if [ -z ${VIASH_PAR_SEED+x} ]; then - VIASH_PAR_SEED="0" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_NUM_NEIGHBORS" ]]; then - if ! [[ "$VIASH_PAR_NUM_NEIGHBORS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--num_neighbors' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SEED" ]]; then - if ! [[ "$VIASH_PAR_SEED" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--seed' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then - VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then - ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -if [ ! -z "$VIASH_PAR_METRIC" ]; then - VIASH_PAR_METRIC_CHOICES=("cityblock:cosine:euclidean:l1:l2:manhattan:braycurtis:canberra:chebyshev:correlation:dice:hamming:jaccard:kulsinski:mahalanobis:minkowski:rogerstanimoto:russellrao:seuclidean:sokalmichener:sokalsneath:sqeuclidean:yule") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_METRIC_CHOICES[*]}:" =~ ":$VIASH_PAR_METRIC:" ]]; then - ViashError '--metric' specified value of \'$VIASH_PAR_METRIC\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/neighbors_find_neighbors:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/neighbors_find_neighbors:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/neighbors_find_neighbors:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-find_neighbors-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -import mudata as mu -import scanpy as sc -import sys - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'obsm_input': $( if [ ! -z ${VIASH_PAR_OBSM_INPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'uns_output': $( if [ ! -z ${VIASH_PAR_UNS_OUTPUT+x} ]; then echo "r'${VIASH_PAR_UNS_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'obsp_distances': $( if [ ! -z ${VIASH_PAR_OBSP_DISTANCES+x} ]; then echo "r'${VIASH_PAR_OBSP_DISTANCES//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'obsp_connectivities': $( if [ ! -z ${VIASH_PAR_OBSP_CONNECTIVITIES+x} ]; then echo "r'${VIASH_PAR_OBSP_CONNECTIVITIES//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'metric': $( if [ ! -z ${VIASH_PAR_METRIC+x} ]; then echo "r'${VIASH_PAR_METRIC//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'num_neighbors': $( if [ ! -z ${VIASH_PAR_NUM_NEIGHBORS+x} ]; then echo "int(r'${VIASH_PAR_NUM_NEIGHBORS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'seed': $( if [ ! -z ${VIASH_PAR_SEED+x} ]; then echo "int(r'${VIASH_PAR_SEED//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -## VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -logger.info("Reading input mudata") -mdata = mu.read_h5mu(par["input"]) - -mod = par["modality"] -logger.info("Computing a neighborhood graph on modality %s", mod) -adata = mdata.mod[mod] -neighbors = sc.Neighbors(adata) -neighbors.compute_neighbors( - n_neighbors=par["num_neighbors"], - use_rep=par["obsm_input"], - metric=par["metric"], - random_state=par["seed"], - method="umap" -) - -adata.uns[par["uns_output"]] = { - 'connectivities_key': par["obsp_connectivities"], - 'distances_key': par["obsp_distances"], - 'params': { - 'n_neighbors': neighbors.n_neighbors, - 'method': "umap", - 'random_state': par["seed"], - 'metric': par["metric"], - 'use_rep': par["obsm_input"] - } -} - -adata.obsp[par["obsp_distances"]] = neighbors.distances -adata.obsp[par["obsp_connectivities"]] = neighbors.connectivities - -logger.info("Writing to %s", par["output"]) -mdata.write_h5mu(filename=par["output"], compression=par["output_compression"]) -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/neighbors/find_neighbors/setup_logger.py b/target/docker/neighbors/find_neighbors/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/docker/neighbors/find_neighbors/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/docker/process_10xh5/filter_10xh5/.config.vsh.yaml b/target/docker/process_10xh5/filter_10xh5/.config.vsh.yaml deleted file mode 100644 index 44bf64da637..00000000000 --- a/target/docker/process_10xh5/filter_10xh5/.config.vsh.yaml +++ /dev/null @@ -1,195 +0,0 @@ -functionality: - name: "filter_10xh5" - namespace: "process_10xh5" - version: "0.12.3" - authors: - - name: "Robrecht Cannoodt" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - arguments: - - type: "file" - name: "--input" - description: "An h5 file from the 10x genomics website." - info: null - example: - - "pbmc_1k_protein_v3_raw_feature_bc_matrix.h5" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - description: "Output h5 file." - info: null - example: - - "pbmc_1k_protein_v3_raw_feature_bc_matrix_filtered.h5" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--min_library_size" - description: "Minimum library size." - info: null - default: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--min_cells_per_gene" - description: "Minimum number of cells per gene." - info: null - default: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--keep_feature_types" - description: "Specify which feature types will never be filtered out" - info: null - example: - - "Antibody Capture" - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--verbose" - description: "Increase verbosity" - info: null - direction: "input" - dest: "par" - resources: - - type: "r_script" - path: "script.R" - is_executable: true - description: "Filter a 10x h5 dataset.\n" - usage: "filter_10xh5 \\\n --input pbmc_1k_protein_v3_raw_feature_bc_matrix.h5 \\\ - \n --output pbmc_1k_protein_v3_raw_feature_bc_matrix_filtered.h5 \\\n --min_library_size\ - \ 1000 --min_cells_per_gene 300\n" - test_resources: - - type: "r_script" - path: "run_test.R" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "eddelbuettel/r2u:22.04" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "libhdf5-dev python3-pip python3-dev" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "scanpy~=1.9.5" - upgrade: true - - type: "r" - cran: - - "testthat" - - "anndata" - - "hdf5r" - bioc_force_install: false - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "singlecpu" - - "lowmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/process_10xh5/filter_10xh5/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/process_10xh5/filter_10xh5" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/process_10xh5/filter_10xh5/filter_10xh5" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/process_10xh5/filter_10xh5/filter_10xh5 b/target/docker/process_10xh5/filter_10xh5/filter_10xh5 deleted file mode 100755 index c8c56f847ba..00000000000 --- a/target/docker/process_10xh5/filter_10xh5/filter_10xh5 +++ /dev/null @@ -1,1089 +0,0 @@ -#!/usr/bin/env bash - -# filter_10xh5 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Robrecht Cannoodt (maintainer) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="filter_10xh5" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "filter_10xh5 0.12.3" - echo "" - echo "Filter a 10x h5 dataset." - echo "" - echo "Usage:" - echo "filter_10xh5 \\" - echo " --input pbmc_1k_protein_v3_raw_feature_bc_matrix.h5 \\" - echo " --output pbmc_1k_protein_v3_raw_feature_bc_matrix_filtered.h5 \\" - echo " --min_library_size 1000 --min_cells_per_gene 300" - echo "" - echo "Arguments:" - echo " --input" - echo " type: file, required parameter, file must exist" - echo " example: pbmc_1k_protein_v3_raw_feature_bc_matrix.h5" - echo " An h5 file from the 10x genomics website." - echo "" - echo " --output" - echo " type: file, required parameter, output, file must exist" - echo " example: pbmc_1k_protein_v3_raw_feature_bc_matrix_filtered.h5" - echo " Output h5 file." - echo "" - echo " --min_library_size" - echo " type: integer" - echo " default: 0" - echo " Minimum library size." - echo "" - echo " --min_cells_per_gene" - echo " type: integer" - echo " default: 0" - echo " Minimum number of cells per gene." - echo "" - echo " --keep_feature_types" - echo " type: string, multiple values allowed" - echo " example: Antibody Capture" - echo " Specify which feature types will never be filtered out" - echo "" - echo " --verbose" - echo " type: boolean_true" - echo " Increase verbosity" -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM eddelbuettel/r2u:22.04 - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y libhdf5-dev python3-pip python3-dev && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "scanpy~=1.9.5" - -RUN Rscript -e 'if (!requireNamespace("remotes", quietly = TRUE)) install.packages("remotes")' && \ - Rscript -e 'remotes::install_cran(c("testthat", "anndata", "hdf5r"), repos = "https://cran.rstudio.com")' - -LABEL org.opencontainers.image.authors="Robrecht Cannoodt" -LABEL org.opencontainers.image.description="Companion container for running component process_10xh5 filter_10xh5" -LABEL org.opencontainers.image.created="2024-01-25T10:13:55Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-filter_10xh5-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "filter_10xh5 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --min_library_size) - [ -n "$VIASH_PAR_MIN_LIBRARY_SIZE" ] && ViashError Bad arguments for option \'--min_library_size\': \'$VIASH_PAR_MIN_LIBRARY_SIZE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_LIBRARY_SIZE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_library_size. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --min_library_size=*) - [ -n "$VIASH_PAR_MIN_LIBRARY_SIZE" ] && ViashError Bad arguments for option \'--min_library_size=*\': \'$VIASH_PAR_MIN_LIBRARY_SIZE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_LIBRARY_SIZE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --min_cells_per_gene) - [ -n "$VIASH_PAR_MIN_CELLS_PER_GENE" ] && ViashError Bad arguments for option \'--min_cells_per_gene\': \'$VIASH_PAR_MIN_CELLS_PER_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_CELLS_PER_GENE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_cells_per_gene. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --min_cells_per_gene=*) - [ -n "$VIASH_PAR_MIN_CELLS_PER_GENE" ] && ViashError Bad arguments for option \'--min_cells_per_gene=*\': \'$VIASH_PAR_MIN_CELLS_PER_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_CELLS_PER_GENE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --keep_feature_types) - if [ -z "$VIASH_PAR_KEEP_FEATURE_TYPES" ]; then - VIASH_PAR_KEEP_FEATURE_TYPES="$2" - else - VIASH_PAR_KEEP_FEATURE_TYPES="$VIASH_PAR_KEEP_FEATURE_TYPES:""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --keep_feature_types. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --keep_feature_types=*) - if [ -z "$VIASH_PAR_KEEP_FEATURE_TYPES" ]; then - VIASH_PAR_KEEP_FEATURE_TYPES=$(ViashRemoveFlags "$1") - else - VIASH_PAR_KEEP_FEATURE_TYPES="$VIASH_PAR_KEEP_FEATURE_TYPES:"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --verbose) - [ -n "$VIASH_PAR_VERBOSE" ] && ViashError Bad arguments for option \'--verbose\': \'$VIASH_PAR_VERBOSE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_VERBOSE=true - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/process_10xh5_filter_10xh5:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/process_10xh5_filter_10xh5:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/process_10xh5_filter_10xh5:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/process_10xh5_filter_10xh5:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_MIN_LIBRARY_SIZE+x} ]; then - VIASH_PAR_MIN_LIBRARY_SIZE="0" -fi -if [ -z ${VIASH_PAR_MIN_CELLS_PER_GENE+x} ]; then - VIASH_PAR_MIN_CELLS_PER_GENE="0" -fi -if [ -z ${VIASH_PAR_VERBOSE+x} ]; then - VIASH_PAR_VERBOSE="false" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_MIN_LIBRARY_SIZE" ]]; then - if ! [[ "$VIASH_PAR_MIN_LIBRARY_SIZE" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--min_library_size' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_MIN_CELLS_PER_GENE" ]]; then - if ! [[ "$VIASH_PAR_MIN_CELLS_PER_GENE" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--min_cells_per_gene' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_VERBOSE" ]]; then - if ! [[ "$VIASH_PAR_VERBOSE" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--verbose' has to be a boolean_true. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/process_10xh5_filter_10xh5:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/process_10xh5_filter_10xh5:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/process_10xh5_filter_10xh5:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-filter_10xh5-XXXXXX").R -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -## VIASH START -# The following code has been auto-generated by Viash. -# treat warnings as errors -.viash_orig_warn <- options(warn = 2) - -par <- list( - "input" = $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_INPUT" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), - "output" = $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_OUTPUT" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), - "min_library_size" = $( if [ ! -z ${VIASH_PAR_MIN_LIBRARY_SIZE+x} ]; then echo -n "as.integer('"; echo -n "$VIASH_PAR_MIN_LIBRARY_SIZE" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), - "min_cells_per_gene" = $( if [ ! -z ${VIASH_PAR_MIN_CELLS_PER_GENE+x} ]; then echo -n "as.integer('"; echo -n "$VIASH_PAR_MIN_CELLS_PER_GENE" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), - "keep_feature_types" = $( if [ ! -z ${VIASH_PAR_KEEP_FEATURE_TYPES+x} ]; then echo -n "strsplit('"; echo -n "$VIASH_PAR_KEEP_FEATURE_TYPES" | sed "s#['\\]#\\\\&#g"; echo "', split = ':')[[1]]"; else echo NULL; fi ), - "verbose" = $( if [ ! -z ${VIASH_PAR_VERBOSE+x} ]; then echo -n "as.logical(toupper('"; echo -n "$VIASH_PAR_VERBOSE" | sed "s#['\\]#\\\\&#g"; echo "'))"; else echo NULL; fi ) -) -meta <- list( - "functionality_name" = $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo -n "'"; echo -n "$VIASH_META_FUNCTIONALITY_NAME" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), - "resources_dir" = $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo -n "'"; echo -n "$VIASH_META_RESOURCES_DIR" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), - "executable" = $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo -n "'"; echo -n "$VIASH_META_EXECUTABLE" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), - "config" = $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo -n "'"; echo -n "$VIASH_META_CONFIG" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), - "temp_dir" = $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo -n "'"; echo -n "$VIASH_META_TEMP_DIR" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), - "cpus" = $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo -n "as.integer('"; echo -n "$VIASH_META_CPUS" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), - "memory_b" = $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_B" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), - "memory_kb" = $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_KB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), - "memory_mb" = $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_MB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), - "memory_gb" = $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_GB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), - "memory_tb" = $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_TB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), - "memory_pb" = $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_PB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ) -) - - -# restore original warn setting -options(.viash_orig_warn) -rm(.viash_orig_warn) - -## VIASH END - -if (par\$verbose) cat("Loading dependencies\\n") -requireNamespace("hdf5r", quietly = TRUE) - -if (par\$verbose) cat("Opening h5 file\\n") -h5 <- hdf5r::H5File\$new(par\$input, mode = "r") - -if (par\$verbose) cat("Reading data in memory\\n") -features__all_tag_keys <- h5[["matrix/features/_all_tag_keys"]][] - -features <- data.frame( - feature_type = h5[["matrix/features/feature_type"]][], - genome = h5[["matrix/features/genome"]][], - id = h5[["matrix/features/id"]][], - name = h5[["matrix/features/name"]][] -) - -mat <- Matrix::sparseMatrix( - i = h5[["matrix/indices"]][], - p = h5[["matrix/indptr"]][], - x = h5[["matrix/data"]][], - dims = h5[["matrix/shape"]][], - index1 = FALSE, - dimnames = list( - features\$id, - h5[["matrix/barcodes"]][] - ) -) - -if (par\$verbose) cat("Filtering out cells with library size < ", par\$min_library_size, "\\n", sep = "") -library_size <- Matrix::colSums(mat) -mat2 <- mat[, library_size >= par\$min_library_size, drop = FALSE] - -if (par\$verbose) cat("Filtering genes with num cells < ", par\$min_cells_per_gene, "\\n", sep = "") -num_cells <- Matrix::rowSums(mat2 > 0) -mat3 <- mat2[num_cells >= par\$min_cells_per_gene | features\$feature_type %in% par\$keep_feature_types, , drop = FALSE] -features2 <- features[match(rownames(mat3), features\$id), , drop = FALSE] - -# helper fun -set_with_type <- function(path, value) { - orig_dtype <- h5[[path]]\$get_type() - orig_chunk <- h5[[path]]\$chunk_dims - if (is.na(orig_chunk)) orig_chunk <- "auto" - h5new\$create_dataset(path, value, dtype = orig_dtype, chunk_dims = orig_chunk) -} - -# create new file -if (par\$verbose) cat("Saving h5 file at '", par\$output, "'\\n", sep = "") -h5new <- hdf5r::H5File\$new(par\$output, mode = "w") -zz <- h5new\$create_group("matrix") -zz <- h5new\$create_group("matrix/features") - -set_with_type("matrix/features/feature_type", features2\$feature_type) -set_with_type("matrix/features/genome", features2\$genome) -set_with_type("matrix/features/id", features2\$id) -set_with_type("matrix/features/name", features2\$name) -set_with_type("matrix/features/_all_tag_keys", features__all_tag_keys) -set_with_type("matrix/indices", mat3@i) -set_with_type("matrix/indptr", mat3@p) -set_with_type("matrix/data", as.integer(mat3@x)) -set_with_type("matrix/shape", dim(mat3)) -set_with_type("matrix/barcodes", colnames(mat3)) - -for (attname in hdf5r::h5attr_names(h5)) { - h5new\$create_attr(attname, hdf5r::h5attr(h5, attname)) -} -h5new\$close_all() -h5\$close_all() -VIASHMAIN -Rscript "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/qc/calculate_qc_metrics/.config.vsh.yaml b/target/docker/qc/calculate_qc_metrics/.config.vsh.yaml deleted file mode 100644 index 8e31355e8d3..00000000000 --- a/target/docker/qc/calculate_qc_metrics/.config.vsh.yaml +++ /dev/null @@ -1,235 +0,0 @@ -functionality: - name: "calculate_qc_metrics" - namespace: "qc" - version: "0.12.3" - authors: - - name: "Dries Schaumont" - roles: - - "author" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - argument_groups: - - name: "Inputs" - arguments: - - type: "file" - name: "--input" - description: "Input h5mu file" - info: null - example: - - "input.h5mu" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--layer" - info: null - example: - - "raw_counts" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--var_qc_metrics" - description: "Keys to select a boolean (containing only True or False) column\ - \ from .var.\nFor each cell, calculate the proportion of total values for\ - \ genes which are labeled 'True', \ncompared to the total sum of the values\ - \ for all genes.\n" - info: null - example: - - "ercc,highly_variable,mitochondrial" - required: false - direction: "input" - multiple: true - multiple_sep: "," - dest: "par" - - type: "boolean" - name: "--var_qc_metrics_fill_na_value" - description: "Fill any 'NA' values found in the columns specified with --var_qc_metrics\ - \ to 'True' or 'False'.\nas False.\n" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--top_n_vars" - description: "Number of top vars to be used to calculate cumulative proportions.\n\ - If not specified, proportions are not calculated. `--top_n_vars 20,50` finds\n\ - cumulative proportion to the 20th and 50th most expressed vars.\n" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: "," - dest: "par" - - name: "Outputs" - arguments: - - type: "file" - name: "--output" - description: "Output h5mu file." - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Add basic quality control metrics to an .h5mu file.\n\nThe metrics\ - \ are comparable to what scanpy.pp.calculate_qc_metrics output,\nalthough they\ - \ have slightly different names:\n\nVar metrics (name in this component -> name\ - \ in scanpy):\n - pct_dropout -> pct_dropout_by_{expr_type}\n - num_nonzero_obs\ - \ -> n_cells_by_{expr_type}\n - obs_mean -> mean_{expr_type}\n - total_counts\ - \ -> total_{expr_type}\n\nObs metrics:\n - num_nonzero_vars -> n_genes_by_{expr_type}\n\ - \ - pct_{var_qc_metrics} -> pct_{expr_type}_{qc_var}\n - total_counts_{var_qc_metrics}\ - \ -> total_{expr_type}_{qc_var}\n - pct_of_counts_in_top_{top_n_vars}_vars ->\ - \ pct_{expr_type}_in_top_{n}_{var_type}\n - total_counts -> total_{expr_type}\n\ - \ \n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.9-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "scikit-learn~=1.2.0" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - - "scanpy~=1.9.5" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "singlecpu" - - "midmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/qc/calculate_qc_metrics/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/qc/calculate_qc_metrics" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/qc/calculate_qc_metrics/calculate_qc_metrics" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/qc/calculate_qc_metrics/calculate_qc_metrics b/target/docker/qc/calculate_qc_metrics/calculate_qc_metrics deleted file mode 100755 index ac2dc69a960..00000000000 --- a/target/docker/qc/calculate_qc_metrics/calculate_qc_metrics +++ /dev/null @@ -1,1211 +0,0 @@ -#!/usr/bin/env bash - -# calculate_qc_metrics 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Dries Schaumont (author) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="calculate_qc_metrics" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "calculate_qc_metrics 0.12.3" - echo "" - echo "Add basic quality control metrics to an .h5mu file." - echo "" - echo "The metrics are comparable to what scanpy.pp.calculate_qc_metrics output," - echo "although they have slightly different names:" - echo "" - echo "Var metrics (name in this component -> name in scanpy):" - echo " - pct_dropout -> pct_dropout_by_{expr_type}" - echo " - num_nonzero_obs -> n_cells_by_{expr_type}" - echo " - obs_mean -> mean_{expr_type}" - echo " - total_counts -> total_{expr_type}" - echo "" - echo "Obs metrics:" - echo " - num_nonzero_vars -> n_genes_by_{expr_type}" - echo " - pct_{var_qc_metrics} -> pct_{expr_type}_{qc_var}" - echo " - total_counts_{var_qc_metrics} -> total_{expr_type}_{qc_var}" - echo " - pct_of_counts_in_top_{top_n_vars}_vars ->" - echo "pct_{expr_type}_in_top_{n}_{var_type}" - echo " - total_counts -> total_{expr_type}" - echo "" - echo "Inputs:" - echo " --input" - echo " type: file, required parameter, file must exist" - echo " example: input.h5mu" - echo " Input h5mu file" - echo "" - echo " --modality" - echo " type: string" - echo " default: rna" - echo "" - echo " --layer" - echo " type: string" - echo " example: raw_counts" - echo "" - echo " --var_qc_metrics" - echo " type: string, multiple values allowed" - echo " example: ercc,highly_variable,mitochondrial" - echo " Keys to select a boolean (containing only True or False) column from" - echo " .var." - echo " For each cell, calculate the proportion of total values for genes which" - echo " are labeled 'True'," - echo " compared to the total sum of the values for all genes." - echo "" - echo " --var_qc_metrics_fill_na_value" - echo " type: boolean" - echo " Fill any 'NA' values found in the columns specified with" - echo " --var_qc_metrics to 'True' or 'False'." - echo " as False." - echo "" - echo " --top_n_vars" - echo " type: integer, multiple values allowed" - echo " Number of top vars to be used to calculate cumulative proportions." - echo " If not specified, proportions are not calculated. \`--top_n_vars 20,50\`" - echo " finds" - echo " cumulative proportion to the 20th and 50th most expressed vars." - echo "" - echo "Outputs:" - echo " --output" - echo " type: file, output, file must exist" - echo " example: output.h5mu" - echo " Output h5mu file." - echo "" - echo " --output_compression" - echo " type: string" - echo " example: gzip" - echo " choices: [ gzip, lzf ]" - echo " The compression format to be used on the output h5mu object." -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM python:3.9-slim - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "scikit-learn~=1.2.0" - -LABEL org.opencontainers.image.authors="Dries Schaumont" -LABEL org.opencontainers.image.description="Companion container for running component qc calculate_qc_metrics" -LABEL org.opencontainers.image.created="2024-01-25T10:13:56Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-calculate_qc_metrics-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "calculate_qc_metrics 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --modality) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality=*) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --layer) - [ -n "$VIASH_PAR_LAYER" ] && ViashError Bad arguments for option \'--layer\': \'$VIASH_PAR_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LAYER="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --layer. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --layer=*) - [ -n "$VIASH_PAR_LAYER" ] && ViashError Bad arguments for option \'--layer=*\': \'$VIASH_PAR_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LAYER=$(ViashRemoveFlags "$1") - shift 1 - ;; - --var_qc_metrics) - if [ -z "$VIASH_PAR_VAR_QC_METRICS" ]; then - VIASH_PAR_VAR_QC_METRICS="$2" - else - VIASH_PAR_VAR_QC_METRICS="$VIASH_PAR_VAR_QC_METRICS,""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --var_qc_metrics. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --var_qc_metrics=*) - if [ -z "$VIASH_PAR_VAR_QC_METRICS" ]; then - VIASH_PAR_VAR_QC_METRICS=$(ViashRemoveFlags "$1") - else - VIASH_PAR_VAR_QC_METRICS="$VIASH_PAR_VAR_QC_METRICS,"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --var_qc_metrics_fill_na_value) - [ -n "$VIASH_PAR_VAR_QC_METRICS_FILL_NA_VALUE" ] && ViashError Bad arguments for option \'--var_qc_metrics_fill_na_value\': \'$VIASH_PAR_VAR_QC_METRICS_FILL_NA_VALUE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_VAR_QC_METRICS_FILL_NA_VALUE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --var_qc_metrics_fill_na_value. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --var_qc_metrics_fill_na_value=*) - [ -n "$VIASH_PAR_VAR_QC_METRICS_FILL_NA_VALUE" ] && ViashError Bad arguments for option \'--var_qc_metrics_fill_na_value=*\': \'$VIASH_PAR_VAR_QC_METRICS_FILL_NA_VALUE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_VAR_QC_METRICS_FILL_NA_VALUE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --top_n_vars) - if [ -z "$VIASH_PAR_TOP_N_VARS" ]; then - VIASH_PAR_TOP_N_VARS="$2" - else - VIASH_PAR_TOP_N_VARS="$VIASH_PAR_TOP_N_VARS,""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --top_n_vars. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --top_n_vars=*) - if [ -z "$VIASH_PAR_TOP_N_VARS" ]; then - VIASH_PAR_TOP_N_VARS=$(ViashRemoveFlags "$1") - else - VIASH_PAR_TOP_N_VARS="$VIASH_PAR_TOP_N_VARS,"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output_compression) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression=*) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/qc_calculate_qc_metrics:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/qc_calculate_qc_metrics:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/qc_calculate_qc_metrics:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/qc_calculate_qc_metrics:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_MODALITY+x} ]; then - VIASH_PAR_MODALITY="rna" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_VAR_QC_METRICS_FILL_NA_VALUE" ]]; then - if ! [[ "$VIASH_PAR_VAR_QC_METRICS_FILL_NA_VALUE" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--var_qc_metrics_fill_na_value' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [ -n "$VIASH_PAR_TOP_N_VARS" ]; then - IFS=',' - set -f - for val in $VIASH_PAR_TOP_N_VARS; do - if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--top_n_vars' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi - done - set +f - unset IFS -fi - -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then - VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then - ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/qc_calculate_qc_metrics:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/qc_calculate_qc_metrics:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/qc_calculate_qc_metrics:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-calculate_qc_metrics-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -import sys -from mudata import read_h5mu -from scipy.sparse import issparse, isspmatrix_coo, csr_matrix -from sklearn.utils.sparsefuncs import mean_variance_axis -import numpy as np - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'layer': $( if [ ! -z ${VIASH_PAR_LAYER+x} ]; then echo "r'${VIASH_PAR_LAYER//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'var_qc_metrics': $( if [ ! -z ${VIASH_PAR_VAR_QC_METRICS+x} ]; then echo "r'${VIASH_PAR_VAR_QC_METRICS//\'/\'\"\'\"r\'}'.split(',')"; else echo None; fi ), - 'var_qc_metrics_fill_na_value': $( if [ ! -z ${VIASH_PAR_VAR_QC_METRICS_FILL_NA_VALUE+x} ]; then echo "r'${VIASH_PAR_VAR_QC_METRICS_FILL_NA_VALUE//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), - 'top_n_vars': $( if [ ! -z ${VIASH_PAR_TOP_N_VARS+x} ]; then echo "list(map(int, r'${VIASH_PAR_TOP_N_VARS//\'/\'\"\'\"r\'}'.split(',')))"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -## VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -def main(): - input_data = read_h5mu(par["input"]) - modality_data = input_data.mod[par["modality"]] - var = modality_data.var - layer = modality_data.X if not par['layer'] else modality_data.layers[par['layer']] - if not issparse(layer): - raise NotImplementedError("Expected layer to be in sparse format.") - if isspmatrix_coo(layer): - layer = csr_matrix(layer) - layer.eliminate_zeros() - - # var statistics - num_nonzero_obs = layer.getnnz(axis=0) - obs_mean, _ = mean_variance_axis(layer, axis=0) - pct_dropout = (1 - num_nonzero_obs / layer.shape[0]) * 100 - total_counts_obs = np.ravel(layer.sum(axis=0)) - - # obs statistics - num_nonzero_vars = layer.getnnz(axis=1) - total_counts_var = np.ravel(layer.sum(axis=1)) - - top_metrics = {} - if par["top_n_vars"]: - par["top_n_vars"] = sorted(par["top_n_vars"]) - distributions = get_top_from_csr_matrix(layer, par["top_n_vars"]) - top_metrics = {distribution_size: distribution * 100 - for distribution_size, distribution - in zip(par["top_n_vars"], distributions.T)} - - total_expr_qc = {} - pct_expr_qc = {} - if par["var_qc_metrics"]: - for qc_metric in par["var_qc_metrics"]: - if not qc_metric in var: - raise ValueError(f"Value for --var_qc_metrics, {qc_metric} " - f"not found in .var for modality {par['modality']}") - qc_column = var[qc_metric] - if qc_column.isna().any(): - if par["var_qc_metrics_fill_na_value"] is None: - raise ValueError(f"The .var column '{qc_metric}', selected by '--var_qc_metrics', contains NA values. " - "It is ambiguous whether or not to include these values in the static calulation. " - "You can explicitly map the NA values to 'False' or 'True using '--var_qc_metrics_fill_na_value'") - else: - qc_column = qc_column.fillna(par['var_qc_metrics_fill_na_value'], inplace=False) - qc_column = qc_column.values - if set(np.unique(qc_column)) - {True, False}: - raise ValueError(f"Column {qc_metric} in .var for modality {par['modality']} " - f"must only contain boolean values") - - total_expr_qc[qc_metric] = np.ravel(layer[:, qc_column].sum(axis=1)) - pct_expr_qc[qc_metric] = total_expr_qc[qc_metric] / total_counts_var * 100 - - # Write all of the calculated statistics - modality_data.var = modality_data.var.assign( - **{"pct_dropout": pct_dropout, - "num_nonzero_obs": num_nonzero_obs, - "obs_mean": obs_mean, - "total_counts": total_counts_obs}) - - modality_data.obs = modality_data.obs.assign( - **({"num_nonzero_vars": num_nonzero_vars, - "total_counts": total_counts_var} | \\ - {f"pct_{qc_metric}": col for qc_metric, col in pct_expr_qc.items()} | \\ - {f"total_counts_{qc_metrix}": col for qc_metrix, col in total_expr_qc.items()}) | \\ - {f"pct_of_counts_in_top_{n_top}_vars": col for n_top, col in top_metrics.items()}) - - input_data.write(par["output"], compression=par["output_compression"]) - -def get_top_from_csr_matrix(matrix, top_n_genes): - # csr matrices stores a 3D matrix in a format such that data for individual cells - # are stored in 1 array. Another array (indptr) here stores the ranges of indices - # to select from the data-array (.e.g. data[indptr[0]:indptr[1]] for row 0) for each row. - # Another array 'indices' maps each element of data to a column - # (data and indices arrays have the same length) - top_n_genes = np.array(top_n_genes).astype(np.int64) - assert np.all(top_n_genes[:-1] <= top_n_genes[1:]), "top_n_genes must be sorted" - row_indices, data = matrix.indptr, matrix.data - number_of_rows, max_genes_to_parse = row_indices.size-1, top_n_genes[-1] - top_data = np.zeros((number_of_rows, max_genes_to_parse), - dtype=data.dtype) - # Loop over each row to create a dense matrix without the 0 counts, - # but not for the whole matrix, only store the genes up until - # the largest number of top n genes. - for row_number in range(number_of_rows): - row_start_index, row_end_index = row_indices[row_number], row_indices[row_number+1] - row_data = data[row_start_index:row_end_index] # all non-zero counts for an row - try: - # There are less genes with counts in the row than the - # maximum number of genes we would like to select - # all these genes are in the top genes, just store them - top_data[row_number, :row_end_index-row_start_index] = row_data - except ValueError: - # Store the counts for the top genes - top_data[row_number, :] = np.partition(row_data, -max_genes_to_parse)[-max_genes_to_parse:] - - # Partition works from smallest to largest, but we want largest - # so do smallest to largest first (but with reversed indices) - top_data = np.partition(top_data, max_genes_to_parse - top_n_genes) - # And then switch the order around - top_data = np.flip(top_data, axis=1) - - cumulative = top_data.cumsum(axis=1, dtype=np.float64)[:,top_n_genes-1] - return cumulative / np.array(matrix.sum(axis=1)) - -if __name__ == "__main__": - main() -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/qc/calculate_qc_metrics/setup_logger.py b/target/docker/qc/calculate_qc_metrics/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/docker/qc/calculate_qc_metrics/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/docker/qc/fastqc/.config.vsh.yaml b/target/docker/qc/fastqc/.config.vsh.yaml deleted file mode 100644 index 5b9bc7c206c..00000000000 --- a/target/docker/qc/fastqc/.config.vsh.yaml +++ /dev/null @@ -1,156 +0,0 @@ -functionality: - name: "fastqc" - namespace: "qc" - version: "0.12.3" - arguments: - - type: "string" - name: "--mode" - alternatives: - - "-m" - description: "The mode in which the component works. Can be either files or dir." - info: null - default: - - "files" - required: false - choices: - - "files" - - "dir" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Directory containing input fastq files." - info: null - example: - - "fastq_dir" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output directory to write reports to." - info: null - example: - - "qc" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--threads" - alternatives: - - "-t" - description: "Specifies the number of files which can be processed simultaneously.\ - \ Each thread will be allocated 250MB of\nmemory.\n" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "bash_script" - path: "script.sh" - is_executable: true - description: "Fastqc component, please see https://www.bioinformatics.babraham.ac.uk/projects/fastqc/.\ - \ This component can take one or more files (by means of shell globbing) or a\ - \ complete directory.\n" - test_resources: - - type: "bash_script" - path: "test.sh" - is_executable: true - - type: "file" - path: "resources_test/cellranger_tiny_fastq/cellranger_tiny_fastq" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "ubuntu:22.04" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "fastqc" - interactive: false - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "lowcpu" - - "midmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/qc/fastqc/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/qc/fastqc" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/qc/fastqc/fastqc" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/qc/fastqc/fastqc b/target/docker/qc/fastqc/fastqc deleted file mode 100755 index 7cfbcb08d90..00000000000 --- a/target/docker/qc/fastqc/fastqc +++ /dev/null @@ -1,994 +0,0 @@ -#!/usr/bin/env bash - -# fastqc 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="fastqc" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "fastqc 0.12.3" - echo "" - echo "Fastqc component, please see" - echo "https://www.bioinformatics.babraham.ac.uk/projects/fastqc/. This component can" - echo "take one or more files (by means of shell globbing) or a complete directory." - echo "" - echo "Arguments:" - echo " -m, --mode" - echo " type: string" - echo " default: files" - echo " choices: [ files, dir ]" - echo " The mode in which the component works. Can be either files or dir." - echo "" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " example: fastq_dir" - echo " Directory containing input fastq files." - echo "" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " example: qc" - echo " Output directory to write reports to." - echo "" - echo " -t, --threads" - echo " type: integer" - echo " Specifies the number of files which can be processed simultaneously." - echo " Each thread will be allocated 250MB of" - echo " memory." -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM ubuntu:22.04 - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y fastqc && \ - rm -rf /var/lib/apt/lists/* - -LABEL org.opencontainers.image.description="Companion container for running component qc fastqc" -LABEL org.opencontainers.image.created="2024-01-25T10:13:56Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-fastqc-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "fastqc 0.12.3" - exit - ;; - --mode) - [ -n "$VIASH_PAR_MODE" ] && ViashError Bad arguments for option \'--mode\': \'$VIASH_PAR_MODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --mode. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --mode=*) - [ -n "$VIASH_PAR_MODE" ] && ViashError Bad arguments for option \'--mode=*\': \'$VIASH_PAR_MODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODE=$(ViashRemoveFlags "$1") - shift 1 - ;; - -m) - [ -n "$VIASH_PAR_MODE" ] && ViashError Bad arguments for option \'-m\': \'$VIASH_PAR_MODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -m. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -i) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --threads) - [ -n "$VIASH_PAR_THREADS" ] && ViashError Bad arguments for option \'--threads\': \'$VIASH_PAR_THREADS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_THREADS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --threads. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --threads=*) - [ -n "$VIASH_PAR_THREADS" ] && ViashError Bad arguments for option \'--threads=*\': \'$VIASH_PAR_THREADS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_THREADS=$(ViashRemoveFlags "$1") - shift 1 - ;; - -t) - [ -n "$VIASH_PAR_THREADS" ] && ViashError Bad arguments for option \'-t\': \'$VIASH_PAR_THREADS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_THREADS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -t. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/qc_fastqc:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/qc_fastqc:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/qc_fastqc:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/qc_fastqc:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_MODE+x} ]; then - VIASH_PAR_MODE="files" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_THREADS" ]]; then - if ! [[ "$VIASH_PAR_THREADS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--threads' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_MODE" ]; then - VIASH_PAR_MODE_CHOICES=("files:dir") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_MODE_CHOICES[*]}:" =~ ":$VIASH_PAR_MODE:" ]]; then - ViashError '--mode' specified value of \'$VIASH_PAR_MODE\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/qc_fastqc:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/qc_fastqc:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/qc_fastqc:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-fastqc-XXXXXX").sh -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -## VIASH START -# The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_MODE+x} ]; then echo "${VIASH_PAR_MODE}" | sed "s#'#'\"'\"'#g;s#.*#par_mode='&'#" ; else echo "# par_mode="; fi ) -$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) -$( if [ ! -z ${VIASH_PAR_THREADS+x} ]; then echo "${VIASH_PAR_THREADS}" | sed "s#'#'\"'\"'#g;s#.*#par_threads='&'#" ; else echo "# par_threads="; fi ) -$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) -$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) -$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) -$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) -$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) -$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) - -## VIASH END -#!/bin/bash - -set -eo pipefail - -mkdir -p "\$par_output" - -if [ "\$par_mode" == "dir" ]; then - par_input="\$par_input/*.fastq.gz" -fi - -eval fastqc \${par_threads:+--threads \$par_threads} -o "\$par_output" "\$par_input" -VIASHMAIN -bash "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/qc/multiqc/.config.vsh.yaml b/target/docker/qc/multiqc/.config.vsh.yaml deleted file mode 100644 index 29ac6ccd722..00000000000 --- a/target/docker/qc/multiqc/.config.vsh.yaml +++ /dev/null @@ -1,140 +0,0 @@ -functionality: - name: "multiqc" - namespace: "qc" - version: "0.12.3" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Inputs for MultiQC." - info: null - example: - - "input.txt" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Create report in the specified output directory." - info: null - example: - - "report" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - description: "MultiQC aggregates results from bioinformatics analyses across many\ - \ samples into a single report.\nIt searches a given directory for analysis logs\ - \ and compiles a HTML report. It's a general use tool, perfect for summarising\ - \ the output from numerous bioinformatics tools.\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/10x_5k_anticmv/fastqc/" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "multiqc" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "singlecpu" - - "lowmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/qc/multiqc/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/qc/multiqc" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/qc/multiqc/multiqc" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/qc/multiqc/multiqc b/target/docker/qc/multiqc/multiqc deleted file mode 100755 index 78551b56046..00000000000 --- a/target/docker/qc/multiqc/multiqc +++ /dev/null @@ -1,959 +0,0 @@ -#!/usr/bin/env bash - -# multiqc 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="multiqc" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "multiqc 0.12.3" - echo "" - echo "MultiQC aggregates results from bioinformatics analyses across many samples into" - echo "a single report." - echo "It searches a given directory for analysis logs and compiles a HTML report. It's" - echo "a general use tool, perfect for summarising the output from numerous" - echo "bioinformatics tools." - echo "" - echo "Arguments:" - echo " -i, --input" - echo " type: file, required parameter, multiple values allowed, file must exist" - echo " example: input.txt" - echo " Inputs for MultiQC." - echo "" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " example: report" - echo " Create report in the specified output directory." -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM python:3.10-slim - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "multiqc" - -LABEL org.opencontainers.image.description="Companion container for running component qc multiqc" -LABEL org.opencontainers.image.created="2024-01-25T10:13:56Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-multiqc-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "multiqc 0.12.3" - exit - ;; - --input) - if [ -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT="$2" - else - VIASH_PAR_INPUT="$VIASH_PAR_INPUT:""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - if [ -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - else - VIASH_PAR_INPUT="$VIASH_PAR_INPUT:"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - -i) - if [ -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT="$2" - else - VIASH_PAR_INPUT="$VIASH_PAR_INPUT:""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/qc_multiqc:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/qc_multiqc:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/qc_multiqc:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/qc_multiqc:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ]; then - IFS=':' - set -f - for file in $VIASH_PAR_INPUT; do - unset IFS - if [ ! -e "$file" ]; then - ViashError "Input file '$file' does not exist." - exit 1 - fi - done - set +f -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_TEST_INPUT=() - IFS=':' - for var in $VIASH_PAR_INPUT; do - unset IFS - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$var")" ) - var=$(ViashAutodetectMount "$var") - VIASH_TEST_INPUT+=( "$var" ) - done - VIASH_PAR_INPUT=$(IFS=':' ; echo "${VIASH_TEST_INPUT[*]}") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/qc_multiqc:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/qc_multiqc:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/qc_multiqc:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-multiqc-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -import subprocess - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'.split(':')"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -## VIASH END - -# Run MultiQC -subprocess.run(["multiqc", "-o", par["output"]] + par["input"]) -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - unset VIASH_TEST_INPUT - IFS=':' - for var in $VIASH_PAR_INPUT; do - unset IFS - if [ -z "$VIASH_TEST_INPUT" ]; then - VIASH_TEST_INPUT="$(ViashStripAutomount "$var")" - else - VIASH_TEST_INPUT="$VIASH_TEST_INPUT:""$(ViashStripAutomount "$var")" - fi - done - VIASH_PAR_INPUT="$VIASH_TEST_INPUT" -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/query/cellxgene_census/.config.vsh.yaml b/target/docker/query/cellxgene_census/.config.vsh.yaml deleted file mode 100644 index 5d70f14e279..00000000000 --- a/target/docker/query/cellxgene_census/.config.vsh.yaml +++ /dev/null @@ -1,260 +0,0 @@ -functionality: - name: "cellxgene_census" - namespace: "query" - version: "0.12.3" - authors: - - name: "Matthias Beyens" - info: - role: "Contributor" - links: - github: "MatthiasBeyens" - orcid: "0000-0003-3304-0706" - email: "matthias.beyens@gmail.com" - linkedin: "mbeyens" - organizations: - - name: "Janssen Pharmaceuticals" - href: "https://www.janssen.com" - role: "Principal Scientist" - - name: "Dries De Maeyer" - roles: - - "author" - info: - role: "Core Team Member" - links: - email: "ddemaeyer@gmail.com" - github: "ddemaeyer" - linkedin: "dries-de-maeyer-b46a814" - organizations: - - name: "Janssen Pharmaceuticals" - href: "https://www.janssen.com" - role: "Principal Scientist" - argument_groups: - - name: "Inputs" - description: "Arguments related to the input (aka query) dataset." - arguments: - - type: "string" - name: "--input_database" - description: "Full input database S3 prefix URL. Default: CellxGene Census" - info: null - example: - - "s3://" - default: - - "CellxGene" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - description: "Which modality to store the output in." - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--cellxgene_release" - description: "CellxGene Census release date. More information: https://chanzuckerberg.github.io/cellxgene-census/cellxgene_census_docsite_data_release_info.html" - info: null - default: - - "2023-05-15" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Query" - description: "Arguments related to the query." - arguments: - - type: "string" - name: "--species" - description: "Specie(s) of interest. If not specified, Homo Sapiens will be\ - \ queried." - info: null - example: - - "homo_sapiens" - default: - - "homo_sapiens" - required: false - choices: - - "homo_sapiens" - - "mus_musculus" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--cell_query" - description: "The query for selecting the cells as defined by the cellxgene\ - \ census schema." - info: null - example: - - "is_primary_data == True and cell_type_ontology_term_id in ['CL:0000136',\ - \ 'CL:1000311', 'CL:0002616'] and suspension_type == 'cell'" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--cells_filter_columns" - description: "The query for selecting the cells as defined by the cellxgene\ - \ census schema." - info: null - example: - - "dataset_id" - - "tissue" - - "assay" - - "disease" - - "cell_type" - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--min_cells_filter_columns" - description: "Minimum of amount of summed cells_filter_columns cells" - info: null - example: - - 100.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Outputs" - description: "Output arguments." - arguments: - - type: "file" - name: "--output" - description: "Output h5mu file." - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Query CellxGene Census or user-specified TileDBSoma object, and eventually\ - \ fetch cell and gene metadata or/and expression counts." - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.9" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "cellxgene-census~=1.2.0" - - "obonet~=1.0.0" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highmem" - - "midcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/query/cellxgene_census/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/query/cellxgene_census" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/query/cellxgene_census/cellxgene_census" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/query/cellxgene_census/cellxgene_census b/target/docker/query/cellxgene_census/cellxgene_census deleted file mode 100755 index 216f6adccf7..00000000000 --- a/target/docker/query/cellxgene_census/cellxgene_census +++ /dev/null @@ -1,1223 +0,0 @@ -#!/usr/bin/env bash - -# cellxgene_census 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Matthias Beyens -# * Dries De Maeyer (author) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="cellxgene_census" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "cellxgene_census 0.12.3" - echo "" - echo "Query CellxGene Census or user-specified TileDBSoma object, and eventually fetch" - echo "cell and gene metadata or/and expression counts." - echo "" - echo "Inputs:" - echo " Arguments related to the input (aka query) dataset." - echo "" - echo " --input_database" - echo " type: string" - echo " default: CellxGene" - echo " example: s3://" - echo " Full input database S3 prefix URL. Default: CellxGene Census" - echo "" - echo " --modality" - echo " type: string" - echo " default: rna" - echo " Which modality to store the output in." - echo "" - echo " --cellxgene_release" - echo " type: string" - echo " default: 2023-05-15" - echo " CellxGene Census release date. More information:" - echo " " - echo "https://chanzuckerberg.github.io/cellxgene-census/cellxgene_census_docsite_data_release_info.html" - echo "" - echo "Query:" - echo " Arguments related to the query." - echo "" - echo " --species" - echo " type: string" - echo " default: homo_sapiens" - echo " example: homo_sapiens" - echo " choices: [ homo_sapiens, mus_musculus ]" - echo " Specie(s) of interest. If not specified, Homo Sapiens will be queried." - echo "" - echo " --cell_query" - echo " type: string" - echo " example: is_primary_data == True and cell_type_ontology_term_id in" - echo "['CL:0000136', 'CL:1000311', 'CL:0002616'] and suspension_type == 'cell'" - echo " The query for selecting the cells as defined by the cellxgene census" - echo " schema." - echo "" - echo " --cells_filter_columns" - echo " type: string, multiple values allowed" - echo " example: dataset_id:tissue:assay:disease:cell_type" - echo " The query for selecting the cells as defined by the cellxgene census" - echo " schema." - echo "" - echo " --min_cells_filter_columns" - echo " type: double" - echo " example: 100.0" - echo " Minimum of amount of summed cells_filter_columns cells" - echo "" - echo "Outputs:" - echo " Output arguments." - echo "" - echo " --output" - echo " type: file, required parameter, output, file must exist" - echo " example: output.h5mu" - echo " Output h5mu file." - echo "" - echo " --output_compression" - echo " type: string" - echo " example: gzip" - echo " choices: [ gzip, lzf ]" -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM python:3.9 - -ENTRYPOINT [] - - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "cellxgene-census~=1.2.0" "obonet~=1.0.0" - -LABEL org.opencontainers.image.authors="Matthias Beyens, Dries De Maeyer" -LABEL org.opencontainers.image.description="Companion container for running component query cellxgene_census" -LABEL org.opencontainers.image.created="2024-01-25T10:13:59Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-cellxgene_census-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "cellxgene_census 0.12.3" - exit - ;; - --input_database) - [ -n "$VIASH_PAR_INPUT_DATABASE" ] && ViashError Bad arguments for option \'--input_database\': \'$VIASH_PAR_INPUT_DATABASE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT_DATABASE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_database. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input_database=*) - [ -n "$VIASH_PAR_INPUT_DATABASE" ] && ViashError Bad arguments for option \'--input_database=*\': \'$VIASH_PAR_INPUT_DATABASE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT_DATABASE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --modality) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality=*) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --cellxgene_release) - [ -n "$VIASH_PAR_CELLXGENE_RELEASE" ] && ViashError Bad arguments for option \'--cellxgene_release\': \'$VIASH_PAR_CELLXGENE_RELEASE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CELLXGENE_RELEASE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --cellxgene_release. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --cellxgene_release=*) - [ -n "$VIASH_PAR_CELLXGENE_RELEASE" ] && ViashError Bad arguments for option \'--cellxgene_release=*\': \'$VIASH_PAR_CELLXGENE_RELEASE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CELLXGENE_RELEASE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --species) - [ -n "$VIASH_PAR_SPECIES" ] && ViashError Bad arguments for option \'--species\': \'$VIASH_PAR_SPECIES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SPECIES="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --species. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --species=*) - [ -n "$VIASH_PAR_SPECIES" ] && ViashError Bad arguments for option \'--species=*\': \'$VIASH_PAR_SPECIES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SPECIES=$(ViashRemoveFlags "$1") - shift 1 - ;; - --cell_query) - [ -n "$VIASH_PAR_CELL_QUERY" ] && ViashError Bad arguments for option \'--cell_query\': \'$VIASH_PAR_CELL_QUERY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CELL_QUERY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --cell_query. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --cell_query=*) - [ -n "$VIASH_PAR_CELL_QUERY" ] && ViashError Bad arguments for option \'--cell_query=*\': \'$VIASH_PAR_CELL_QUERY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CELL_QUERY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --cells_filter_columns) - if [ -z "$VIASH_PAR_CELLS_FILTER_COLUMNS" ]; then - VIASH_PAR_CELLS_FILTER_COLUMNS="$2" - else - VIASH_PAR_CELLS_FILTER_COLUMNS="$VIASH_PAR_CELLS_FILTER_COLUMNS:""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --cells_filter_columns. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --cells_filter_columns=*) - if [ -z "$VIASH_PAR_CELLS_FILTER_COLUMNS" ]; then - VIASH_PAR_CELLS_FILTER_COLUMNS=$(ViashRemoveFlags "$1") - else - VIASH_PAR_CELLS_FILTER_COLUMNS="$VIASH_PAR_CELLS_FILTER_COLUMNS:"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --min_cells_filter_columns) - [ -n "$VIASH_PAR_MIN_CELLS_FILTER_COLUMNS" ] && ViashError Bad arguments for option \'--min_cells_filter_columns\': \'$VIASH_PAR_MIN_CELLS_FILTER_COLUMNS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_CELLS_FILTER_COLUMNS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_cells_filter_columns. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --min_cells_filter_columns=*) - [ -n "$VIASH_PAR_MIN_CELLS_FILTER_COLUMNS" ] && ViashError Bad arguments for option \'--min_cells_filter_columns=*\': \'$VIASH_PAR_MIN_CELLS_FILTER_COLUMNS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_CELLS_FILTER_COLUMNS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output_compression) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression=*) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/query_cellxgene_census:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/query_cellxgene_census:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/query_cellxgene_census:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/query_cellxgene_census:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_INPUT_DATABASE+x} ]; then - VIASH_PAR_INPUT_DATABASE="CellxGene" -fi -if [ -z ${VIASH_PAR_MODALITY+x} ]; then - VIASH_PAR_MODALITY="rna" -fi -if [ -z ${VIASH_PAR_CELLXGENE_RELEASE+x} ]; then - VIASH_PAR_CELLXGENE_RELEASE="2023-05-15" -fi -if [ -z ${VIASH_PAR_SPECIES+x} ]; then - VIASH_PAR_SPECIES="homo_sapiens" -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_MIN_CELLS_FILTER_COLUMNS" ]]; then - if ! [[ "$VIASH_PAR_MIN_CELLS_FILTER_COLUMNS" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--min_cells_filter_columns' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_SPECIES" ]; then - VIASH_PAR_SPECIES_CHOICES=("homo_sapiens:mus_musculus") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_SPECIES_CHOICES[*]}:" =~ ":$VIASH_PAR_SPECIES:" ]]; then - ViashError '--species' specified value of \'$VIASH_PAR_SPECIES\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then - VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then - ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/query_cellxgene_census:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/query_cellxgene_census:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/query_cellxgene_census:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-cellxgene_census-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -import sys -import os -import cellxgene_census -import mudata as mu -import anndata as ad - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input_database': $( if [ ! -z ${VIASH_PAR_INPUT_DATABASE+x} ]; then echo "r'${VIASH_PAR_INPUT_DATABASE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cellxgene_release': $( if [ ! -z ${VIASH_PAR_CELLXGENE_RELEASE+x} ]; then echo "r'${VIASH_PAR_CELLXGENE_RELEASE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'species': $( if [ ! -z ${VIASH_PAR_SPECIES+x} ]; then echo "r'${VIASH_PAR_SPECIES//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cell_query': $( if [ ! -z ${VIASH_PAR_CELL_QUERY+x} ]; then echo "r'${VIASH_PAR_CELL_QUERY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cells_filter_columns': $( if [ ! -z ${VIASH_PAR_CELLS_FILTER_COLUMNS+x} ]; then echo "r'${VIASH_PAR_CELLS_FILTER_COLUMNS//\'/\'\"\'\"r\'}'.split(':')"; else echo None; fi ), - 'min_cells_filter_columns': $( if [ ! -z ${VIASH_PAR_MIN_CELLS_FILTER_COLUMNS+x} ]; then echo "float(r'${VIASH_PAR_MIN_CELLS_FILTER_COLUMNS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -### VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -def connect_census(input_database, release): - """ - Connect to CellxGene Census or user-provided TileDBSoma object - """ - if input_database != "CellxGene": - raise NotImplementedError( - "Custom census database is not implemented yet!" - ) - - logger.info( - "Initializing %s release %s", - input_database, release - ) - return cellxgene_census.open_soma( - census_version = release - ) - - -def get_anndata(census_connection, cell_query, species): - logger.info( - "Getting gene expression data based on %s query.", - cell_query - ) - return cellxgene_census.get_anndata( - census = census_connection, - obs_value_filter = cell_query, - organism = species - ) - - -def add_cellcensus_metadata_obs(census_connection, query_data): - logger.info( - "Adding extented metadata to gene expression data." - ) - census_datasets = census_connection["census_info"]["datasets"].read().concat().to_pandas() - - query_data.obs.dataset_id = query_data.obs.dataset_id.astype("category") - - dataset_info = census_datasets[census_datasets.dataset_id.isin(query_data.obs.dataset_id.cat.categories)]\\ - [['collection_id', 'collection_name', 'collection_doi', 'dataset_id', 'dataset_title']]\\ - .reset_index(drop=True)\\ - .apply(lambda x: x.astype('category')) - - return query_data.obs.merge( - dataset_info, on='dataset_id', how = 'left' - ) - - -def cellcensus_cell_filter(query_data, cells_filter_columns, min_cells_filter_columns): - t0 = query_data.shape - query_data = query_data[ - query_data.obs.groupby(cells_filter_columns)["soma_joinid"].transform('count') >= min_cells_filter_columns - ] - t1 = query_data.shape - logger.info( - 'Removed %s cells based on %s min_cells_filter_columns of %s cells_filter_columns.' - % ((t0[0] - t1[0]), min_cells_filter_columns, cells_filter_columns) - ) - return query_data - - -def write_mudata(mdata, output_location, compression): - logger.info("Writing %s", output_location) - mdata.write_h5mu( - output_location, - compression=compression - ) - - -def main(): - - # start dev - logger.info('cells_filter_columns: %s' % par["cells_filter_columns"]) - logger.info('min_cells_filter_columns: %s' % par["min_cells_filter_columns"]) - # end dev - - census_connection = connect_census( - par["input_database"], - par["cellxgene_release"] - ) - - query_data = get_anndata( - census_connection, - par["cell_query"], - par["species"] - ) - - query_data.obs = add_cellcensus_metadata_obs( - census_connection, - query_data - ) - - census_connection.close() - del census_connection - - if par["cells_filter_columns"]: - if not par["min_cells_filter_columns"]: - raise NotImplementedError( - "You specified cells_filter_columns, thus add min_cells_filter_columns!" - ) - query_data = cellcensus_cell_filter( - query_data, - par["cells_filter_columns"], - par["min_cells_filter_columns"] - ) - - query_data.var_names = query_data.var["feature_id"] - query_data.var["gene_symbol"] = query_data.var["feature_name"] - - # Create empty mudata file - mdata = mu.MuData({par["modality"]: ad.AnnData()}) - - write_mudata( - mdata, - par["output"], - par["output_compression"] - ) - - mu.write_h5ad(par["output"], data=query_data, mod=par["modality"]) - -if __name__ == "__main__": - main() -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/query/cellxgene_census/setup_logger.py b/target/docker/query/cellxgene_census/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/docker/query/cellxgene_census/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/docker/reference/build_bdrhap_reference/.config.vsh.yaml b/target/docker/reference/build_bdrhap_reference/.config.vsh.yaml deleted file mode 100644 index b5a7481b59d..00000000000 --- a/target/docker/reference/build_bdrhap_reference/.config.vsh.yaml +++ /dev/null @@ -1,186 +0,0 @@ -functionality: - name: "build_bdrhap_reference" - namespace: "reference" - version: "0.12.3" - authors: - - name: "Angela Oliveira Pisco" - roles: - - "author" - info: - role: "Contributor" - links: - github: "aopisco" - orcid: "0000-0003-0142-2355" - linkedin: "aopisco" - organizations: - - name: "Insitro" - href: "https://insitro.com" - role: "Director of Computational Biology" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - - name: "Robrecht Cannoodt" - roles: - - "author" - - "maintainer" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - arguments: - - type: "file" - name: "--genome_fasta" - description: "Reference genome fasta." - info: null - example: - - "genome_sequence.fa.gz" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--transcriptome_gtf" - description: "Reference transcriptome annotation." - info: null - example: - - "transcriptome_annotation.gtf.gz" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - description: "Star index" - info: null - example: - - "star_index.tar.gz" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "bash_script" - path: "script.sh" - is_executable: true - description: "Compile a reference into a STAR index compatible with the BD Rhapsody\ - \ pipeline." - test_resources: - - type: "bash_script" - path: "run_test.sh" - is_executable: true - - type: "file" - path: "resources_test/reference_gencodev41_chr1" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "bdgenomics/rhapsody:1.10.1" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "pigz" - interactive: false - test_setup: - - type: "docker" - env: - - "GOPATH /root/go" - - "GOBIN /root/go/bin" - - "PATH \"${PATH}:/root/go/bin\"" - - type: "apt" - packages: - - "golang" - interactive: false - - type: "docker" - run: - - "go get golang.org/dl/go1.20.6 && go1.20.6 download && \\\ngit clone --branch\ - \ v2.5.0 https://github.com/shenwei356/seqkit.git && \\\ncd seqkit/seqkit/ &&\ - \ go1.20.6 build && cp seqkit /usr/bin/ && cd ../../ && rm -rf seqkit\n" - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highmem" - - "highcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/reference/build_bdrhap_reference/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/reference/build_bdrhap_reference" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/reference/build_bdrhap_reference/build_bdrhap_reference" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/reference/build_bdrhap_reference/build_bdrhap_reference b/target/docker/reference/build_bdrhap_reference/build_bdrhap_reference deleted file mode 100755 index e5be7c7e6c9..00000000000 --- a/target/docker/reference/build_bdrhap_reference/build_bdrhap_reference +++ /dev/null @@ -1,972 +0,0 @@ -#!/usr/bin/env bash - -# build_bdrhap_reference 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Angela Oliveira Pisco (author) -# * Robrecht Cannoodt (author, maintainer) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="build_bdrhap_reference" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "build_bdrhap_reference 0.12.3" - echo "" - echo "Compile a reference into a STAR index compatible with the BD Rhapsody pipeline." - echo "" - echo "Arguments:" - echo " --genome_fasta" - echo " type: file, required parameter, file must exist" - echo " example: genome_sequence.fa.gz" - echo " Reference genome fasta." - echo "" - echo " --transcriptome_gtf" - echo " type: file, required parameter, file must exist" - echo " example: transcriptome_annotation.gtf.gz" - echo " Reference transcriptome annotation." - echo "" - echo " --output" - echo " type: file, required parameter, output, file must exist" - echo " example: star_index.tar.gz" - echo " Star index" -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM bdgenomics/rhapsody:1.10.1 - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y pigz && \ - rm -rf /var/lib/apt/lists/* - -LABEL org.opencontainers.image.authors="Angela Oliveira Pisco, Robrecht Cannoodt" -LABEL org.opencontainers.image.description="Companion container for running component reference build_bdrhap_reference" -LABEL org.opencontainers.image.created="2024-01-25T10:13:59Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-build_bdrhap_reference-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "build_bdrhap_reference 0.12.3" - exit - ;; - --genome_fasta) - [ -n "$VIASH_PAR_GENOME_FASTA" ] && ViashError Bad arguments for option \'--genome_fasta\': \'$VIASH_PAR_GENOME_FASTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_GENOME_FASTA="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --genome_fasta. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --genome_fasta=*) - [ -n "$VIASH_PAR_GENOME_FASTA" ] && ViashError Bad arguments for option \'--genome_fasta=*\': \'$VIASH_PAR_GENOME_FASTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_GENOME_FASTA=$(ViashRemoveFlags "$1") - shift 1 - ;; - --transcriptome_gtf) - [ -n "$VIASH_PAR_TRANSCRIPTOME_GTF" ] && ViashError Bad arguments for option \'--transcriptome_gtf\': \'$VIASH_PAR_TRANSCRIPTOME_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TRANSCRIPTOME_GTF="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --transcriptome_gtf. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --transcriptome_gtf=*) - [ -n "$VIASH_PAR_TRANSCRIPTOME_GTF" ] && ViashError Bad arguments for option \'--transcriptome_gtf=*\': \'$VIASH_PAR_TRANSCRIPTOME_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TRANSCRIPTOME_GTF=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/reference_build_bdrhap_reference:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/reference_build_bdrhap_reference:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/reference_build_bdrhap_reference:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/reference_build_bdrhap_reference:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_GENOME_FASTA+x} ]; then - ViashError '--genome_fasta' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_TRANSCRIPTOME_GTF+x} ]; then - ViashError '--transcriptome_gtf' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_GENOME_FASTA" ] && [ ! -e "$VIASH_PAR_GENOME_FASTA" ]; then - ViashError "Input file '$VIASH_PAR_GENOME_FASTA' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_TRANSCRIPTOME_GTF" ] && [ ! -e "$VIASH_PAR_TRANSCRIPTOME_GTF" ]; then - ViashError "Input file '$VIASH_PAR_TRANSCRIPTOME_GTF' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_GENOME_FASTA" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_GENOME_FASTA")" ) - VIASH_PAR_GENOME_FASTA=$(ViashAutodetectMount "$VIASH_PAR_GENOME_FASTA") -fi -if [ ! -z "$VIASH_PAR_TRANSCRIPTOME_GTF" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_TRANSCRIPTOME_GTF")" ) - VIASH_PAR_TRANSCRIPTOME_GTF=$(ViashAutodetectMount "$VIASH_PAR_TRANSCRIPTOME_GTF") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/reference_build_bdrhap_reference:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/reference_build_bdrhap_reference:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/reference_build_bdrhap_reference:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-build_bdrhap_reference-XXXXXX").sh -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -#!/bin/bash - -set -eo pipefail - -## VIASH START -# The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_GENOME_FASTA+x} ]; then echo "${VIASH_PAR_GENOME_FASTA}" | sed "s#'#'\"'\"'#g;s#.*#par_genome_fasta='&'#" ; else echo "# par_genome_fasta="; fi ) -$( if [ ! -z ${VIASH_PAR_TRANSCRIPTOME_GTF+x} ]; then echo "${VIASH_PAR_TRANSCRIPTOME_GTF}" | sed "s#'#'\"'\"'#g;s#.*#par_transcriptome_gtf='&'#" ; else echo "# par_transcriptome_gtf="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) -$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) -$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) -$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) -$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) -$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) -$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) - -## VIASH END - -# create temporary directory -tmpdir=\$(mktemp -d "$VIASH_TEMP/\$meta_functionality_name-XXXXXXXX") -function clean_up { - rm -rf "\$tmpdir" -} -trap clean_up EXIT - -meta_cpus="\${meta_cpus:-1}" - -# process params -extra_params=( ) - -if [ ! -z "\$meta_cpus" ]; then - extra_params+=( "--runThreadN \$meta_cpus" ) -fi - -echo "> Unzipping input files" -unpigz -c "\$par_genome_fasta" > "\$tmpdir/genome.fa" -unpigz -c "\$par_transcriptome_gtf" > "\$tmpdir/transcriptome.gtf" - -echo "> Building star index" -mkdir "\$tmpdir/out" -STAR \\ - --runMode genomeGenerate \\ - --genomeDir "\$tmpdir/out" \\ - --genomeFastaFiles "\$tmpdir/genome.fa" \\ - --sjdbGTFfile "\$tmpdir/transcriptome.gtf" \\ - --sjdbOverhang 100 \\ - --genomeSAindexNbases 11 \\ - "\${extra_params[@]}" - -echo "> Creating archive" -tar --use-compress-program="pigz -k " -cf "\$par_output" -C "\$tmpdir/out" . -VIASHMAIN -bash "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_GENOME_FASTA" ]; then - VIASH_PAR_GENOME_FASTA=$(ViashStripAutomount "$VIASH_PAR_GENOME_FASTA") -fi -if [ ! -z "$VIASH_PAR_TRANSCRIPTOME_GTF" ]; then - VIASH_PAR_TRANSCRIPTOME_GTF=$(ViashStripAutomount "$VIASH_PAR_TRANSCRIPTOME_GTF") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/reference/build_cellranger_reference/.config.vsh.yaml b/target/docker/reference/build_cellranger_reference/.config.vsh.yaml deleted file mode 100644 index 37e084ee631..00000000000 --- a/target/docker/reference/build_cellranger_reference/.config.vsh.yaml +++ /dev/null @@ -1,187 +0,0 @@ -functionality: - name: "build_cellranger_reference" - namespace: "reference" - version: "0.12.3" - authors: - - name: "Angela Oliveira Pisco" - roles: - - "author" - info: - role: "Contributor" - links: - github: "aopisco" - orcid: "0000-0003-0142-2355" - linkedin: "aopisco" - organizations: - - name: "Insitro" - href: "https://insitro.com" - role: "Director of Computational Biology" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - - name: "Robrecht Cannoodt" - roles: - - "author" - - "maintainer" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - arguments: - - type: "file" - name: "--genome_fasta" - description: "Reference genome fasta." - info: null - example: - - "genome_sequence.fa.gz" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--transcriptome_gtf" - description: "Reference transcriptome annotation." - info: null - example: - - "transcriptome_annotation.gtf.gz" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - description: "Output folder" - info: null - example: - - "cellranger_reference" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "bash_script" - path: "script.sh" - is_executable: true - description: "Build a Cell Ranger-compatible reference folder from user-supplied\ - \ genome FASTA and gene GTF files. Creates a new folder named after the genome." - test_resources: - - type: "bash_script" - path: "run_test.sh" - is_executable: true - - type: "file" - path: "resources_test/reference_gencodev41_chr1" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "ghcr.io/data-intuitive/cellranger:7.0" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "pigz" - interactive: false - test_setup: - - type: "docker" - env: - - "GOPATH /root/go" - - "GOBIN /root/go/bin" - - "PATH \"${PATH}:/root/go/bin\"" - - type: "apt" - packages: - - "golang" - - "git" - interactive: false - - type: "docker" - run: - - "go install golang.org/dl/go1.20.6@latest && go1.20.6 download && \\\ngit clone\ - \ --branch v2.5.0 https://github.com/shenwei356/seqkit.git && \\\ncd seqkit/seqkit/\ - \ && go1.20.6 build && cp seqkit /usr/bin/ && cd ../../ && rm -rf seqkit\n" - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highmem" - - "highcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/reference/build_cellranger_reference/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/reference/build_cellranger_reference" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/reference/build_cellranger_reference/build_cellranger_reference" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/reference/build_cellranger_reference/build_cellranger_reference b/target/docker/reference/build_cellranger_reference/build_cellranger_reference deleted file mode 100755 index b9488372256..00000000000 --- a/target/docker/reference/build_cellranger_reference/build_cellranger_reference +++ /dev/null @@ -1,977 +0,0 @@ -#!/usr/bin/env bash - -# build_cellranger_reference 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Angela Oliveira Pisco (author) -# * Robrecht Cannoodt (author, maintainer) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="build_cellranger_reference" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "build_cellranger_reference 0.12.3" - echo "" - echo "Build a Cell Ranger-compatible reference folder from user-supplied genome FASTA" - echo "and gene GTF files. Creates a new folder named after the genome." - echo "" - echo "Arguments:" - echo " --genome_fasta" - echo " type: file, required parameter, file must exist" - echo " example: genome_sequence.fa.gz" - echo " Reference genome fasta." - echo "" - echo " --transcriptome_gtf" - echo " type: file, required parameter, file must exist" - echo " example: transcriptome_annotation.gtf.gz" - echo " Reference transcriptome annotation." - echo "" - echo " --output" - echo " type: file, required parameter, output, file must exist" - echo " example: cellranger_reference" - echo " Output folder" -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM ghcr.io/data-intuitive/cellranger:7.0 - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y pigz && \ - rm -rf /var/lib/apt/lists/* - -LABEL org.opencontainers.image.authors="Angela Oliveira Pisco, Robrecht Cannoodt" -LABEL org.opencontainers.image.description="Companion container for running component reference build_cellranger_reference" -LABEL org.opencontainers.image.created="2024-01-25T10:13:58Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-build_cellranger_reference-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "build_cellranger_reference 0.12.3" - exit - ;; - --genome_fasta) - [ -n "$VIASH_PAR_GENOME_FASTA" ] && ViashError Bad arguments for option \'--genome_fasta\': \'$VIASH_PAR_GENOME_FASTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_GENOME_FASTA="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --genome_fasta. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --genome_fasta=*) - [ -n "$VIASH_PAR_GENOME_FASTA" ] && ViashError Bad arguments for option \'--genome_fasta=*\': \'$VIASH_PAR_GENOME_FASTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_GENOME_FASTA=$(ViashRemoveFlags "$1") - shift 1 - ;; - --transcriptome_gtf) - [ -n "$VIASH_PAR_TRANSCRIPTOME_GTF" ] && ViashError Bad arguments for option \'--transcriptome_gtf\': \'$VIASH_PAR_TRANSCRIPTOME_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TRANSCRIPTOME_GTF="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --transcriptome_gtf. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --transcriptome_gtf=*) - [ -n "$VIASH_PAR_TRANSCRIPTOME_GTF" ] && ViashError Bad arguments for option \'--transcriptome_gtf=*\': \'$VIASH_PAR_TRANSCRIPTOME_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TRANSCRIPTOME_GTF=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/reference_build_cellranger_reference:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/reference_build_cellranger_reference:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/reference_build_cellranger_reference:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/reference_build_cellranger_reference:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_GENOME_FASTA+x} ]; then - ViashError '--genome_fasta' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_TRANSCRIPTOME_GTF+x} ]; then - ViashError '--transcriptome_gtf' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_GENOME_FASTA" ] && [ ! -e "$VIASH_PAR_GENOME_FASTA" ]; then - ViashError "Input file '$VIASH_PAR_GENOME_FASTA' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_TRANSCRIPTOME_GTF" ] && [ ! -e "$VIASH_PAR_TRANSCRIPTOME_GTF" ]; then - ViashError "Input file '$VIASH_PAR_TRANSCRIPTOME_GTF' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_GENOME_FASTA" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_GENOME_FASTA")" ) - VIASH_PAR_GENOME_FASTA=$(ViashAutodetectMount "$VIASH_PAR_GENOME_FASTA") -fi -if [ ! -z "$VIASH_PAR_TRANSCRIPTOME_GTF" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_TRANSCRIPTOME_GTF")" ) - VIASH_PAR_TRANSCRIPTOME_GTF=$(ViashAutodetectMount "$VIASH_PAR_TRANSCRIPTOME_GTF") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/reference_build_cellranger_reference:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/reference_build_cellranger_reference:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/reference_build_cellranger_reference:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-build_cellranger_reference-XXXXXX").sh -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -#!/bin/bash - -set -eo pipefail - -## VIASH START -# The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_GENOME_FASTA+x} ]; then echo "${VIASH_PAR_GENOME_FASTA}" | sed "s#'#'\"'\"'#g;s#.*#par_genome_fasta='&'#" ; else echo "# par_genome_fasta="; fi ) -$( if [ ! -z ${VIASH_PAR_TRANSCRIPTOME_GTF+x} ]; then echo "${VIASH_PAR_TRANSCRIPTOME_GTF}" | sed "s#'#'\"'\"'#g;s#.*#par_transcriptome_gtf='&'#" ; else echo "# par_transcriptome_gtf="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) -$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) -$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) -$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) -$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) -$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) -$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) - -## VIASH END - -# create temporary directory -tmpdir=\$(mktemp -d "$VIASH_TEMP/\$meta_functionality_name-XXXXXXXX") -function clean_up { - rm -rf "\$tmpdir" -} -trap clean_up EXIT - -# just to make sure -par_genome_fasta=\`realpath \$par_genome_fasta\` -par_transcriptome_gtf=\`realpath \$par_transcriptome_gtf\` -par_output=\`realpath \$par_output\` - -# process params -extra_params=( ) - -if [ ! -z "\$meta_cpus" ]; then - extra_params+=( "--nthreads=\$meta_cpus" ) -fi -if [ ! -z "\$meta_memory_gb" ]; then - # always keep 2gb for the OS itself - memory_gb=\`python -c "print(int('\$meta_memory_gb') - 2)"\` - extra_params+=( "--memgb=\$memory_gb" ) -fi - -echo "> Unzipping input files" -unpigz -c "\$par_genome_fasta" > "\$tmpdir/genome.fa" - -echo "> Building star index" -cd "\$tmpdir" -cellranger mkref \\ - --fasta "\$tmpdir/genome.fa" \\ - --genes "\$par_transcriptome_gtf" \\ - --genome output \\ - "\${extra_params[@]}" - -echo "> Creating archive" -tar --use-compress-program="pigz -k " -cf "\$par_output" -C "\$tmpdir/output" . -VIASHMAIN -bash "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_GENOME_FASTA" ]; then - VIASH_PAR_GENOME_FASTA=$(ViashStripAutomount "$VIASH_PAR_GENOME_FASTA") -fi -if [ ! -z "$VIASH_PAR_TRANSCRIPTOME_GTF" ]; then - VIASH_PAR_TRANSCRIPTOME_GTF=$(ViashStripAutomount "$VIASH_PAR_TRANSCRIPTOME_GTF") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/reference/make_reference/.config.vsh.yaml b/target/docker/reference/make_reference/.config.vsh.yaml deleted file mode 100644 index 96b46117140..00000000000 --- a/target/docker/reference/make_reference/.config.vsh.yaml +++ /dev/null @@ -1,212 +0,0 @@ -functionality: - name: "make_reference" - namespace: "reference" - version: "0.12.3" - authors: - - name: "Angela Oliveira Pisco" - roles: - - "author" - info: - role: "Contributor" - links: - github: "aopisco" - orcid: "0000-0003-0142-2355" - linkedin: "aopisco" - organizations: - - name: "Insitro" - href: "https://insitro.com" - role: "Director of Computational Biology" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - - name: "Robrecht Cannoodt" - roles: - - "author" - - "maintainer" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - arguments: - - type: "file" - name: "--genome_fasta" - description: "Reference genome fasta. Example: " - info: null - example: - - "genome_fasta.fa.gz" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--transcriptome_gtf" - description: "Reference transcriptome annotation." - info: null - example: - - "transcriptome.gtf.gz" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--ercc" - description: "ERCC sequence and annotation file." - info: null - example: - - "ercc.zip" - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--subset_regex" - description: "Will subset the reference chromosomes using the given regex." - info: null - example: - - "(ERCC-00002|chr1)" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output_fasta" - description: "Output genome sequence fasta." - info: null - example: - - "genome_sequence.fa.gz" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output_gtf" - description: "Output transcriptome annotation gtf." - info: null - example: - - "transcriptome_annotation.gtf.gz" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "bash_script" - path: "script.sh" - is_executable: true - description: "Preprocess and build a transcriptome reference.\n\nExample input files\ - \ are:\n - `genome_fasta`: https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_41/GRCh38.primary_assembly.genome.fa.gz\n\ - \ - `transcriptome_gtf`: https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_41/gencode.v41.annotation.gtf.gz\n\ - \ - `ercc`: https://assets.thermofisher.com/TFS-Assets/LSG/manuals/ERCC92.zip\n" - test_resources: - - type: "bash_script" - path: "run_test.sh" - is_executable: true - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "ubuntu:22.04" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "pigz" - - "seqkit" - - "curl" - - "wget" - - "unzip" - interactive: false - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highmem" - - "highcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/reference/make_reference/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/reference/make_reference" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/reference/make_reference/make_reference" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/reference/make_reference/make_reference b/target/docker/reference/make_reference/make_reference deleted file mode 100755 index e42d725f88d..00000000000 --- a/target/docker/reference/make_reference/make_reference +++ /dev/null @@ -1,1076 +0,0 @@ -#!/usr/bin/env bash - -# make_reference 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Angela Oliveira Pisco (author) -# * Robrecht Cannoodt (author, maintainer) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="make_reference" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "make_reference 0.12.3" - echo "" - echo "Preprocess and build a transcriptome reference." - echo "" - echo "Example input files are:" - echo " - \`genome_fasta\`:" - echo "https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_41/GRCh38.primary_assembly.genome.fa.gz" - echo " - \`transcriptome_gtf\`:" - echo "https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_41/gencode.v41.annotation.gtf.gz" - echo " - \`ercc\`: https://assets.thermofisher.com/TFS-Assets/LSG/manuals/ERCC92.zip" - echo "" - echo "Arguments:" - echo " --genome_fasta" - echo " type: file, required parameter, file must exist" - echo " example: genome_fasta.fa.gz" - echo " Reference genome fasta. Example:" - echo "" - echo " --transcriptome_gtf" - echo " type: file, required parameter, file must exist" - echo " example: transcriptome.gtf.gz" - echo " Reference transcriptome annotation." - echo "" - echo " --ercc" - echo " type: file, file must exist" - echo " example: ercc.zip" - echo " ERCC sequence and annotation file." - echo "" - echo " --subset_regex" - echo " type: string" - echo " example: (ERCC-00002|chr1)" - echo " Will subset the reference chromosomes using the given regex." - echo "" - echo " --output_fasta" - echo " type: file, required parameter, output, file must exist" - echo " example: genome_sequence.fa.gz" - echo " Output genome sequence fasta." - echo "" - echo " --output_gtf" - echo " type: file, required parameter, output, file must exist" - echo " example: transcriptome_annotation.gtf.gz" - echo " Output transcriptome annotation gtf." -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM ubuntu:22.04 - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y pigz seqkit curl wget unzip && \ - rm -rf /var/lib/apt/lists/* - -LABEL org.opencontainers.image.authors="Angela Oliveira Pisco, Robrecht Cannoodt" -LABEL org.opencontainers.image.description="Companion container for running component reference make_reference" -LABEL org.opencontainers.image.created="2024-01-25T10:13:58Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-make_reference-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "make_reference 0.12.3" - exit - ;; - --genome_fasta) - [ -n "$VIASH_PAR_GENOME_FASTA" ] && ViashError Bad arguments for option \'--genome_fasta\': \'$VIASH_PAR_GENOME_FASTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_GENOME_FASTA="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --genome_fasta. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --genome_fasta=*) - [ -n "$VIASH_PAR_GENOME_FASTA" ] && ViashError Bad arguments for option \'--genome_fasta=*\': \'$VIASH_PAR_GENOME_FASTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_GENOME_FASTA=$(ViashRemoveFlags "$1") - shift 1 - ;; - --transcriptome_gtf) - [ -n "$VIASH_PAR_TRANSCRIPTOME_GTF" ] && ViashError Bad arguments for option \'--transcriptome_gtf\': \'$VIASH_PAR_TRANSCRIPTOME_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TRANSCRIPTOME_GTF="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --transcriptome_gtf. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --transcriptome_gtf=*) - [ -n "$VIASH_PAR_TRANSCRIPTOME_GTF" ] && ViashError Bad arguments for option \'--transcriptome_gtf=*\': \'$VIASH_PAR_TRANSCRIPTOME_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TRANSCRIPTOME_GTF=$(ViashRemoveFlags "$1") - shift 1 - ;; - --ercc) - [ -n "$VIASH_PAR_ERCC" ] && ViashError Bad arguments for option \'--ercc\': \'$VIASH_PAR_ERCC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ERCC="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --ercc. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --ercc=*) - [ -n "$VIASH_PAR_ERCC" ] && ViashError Bad arguments for option \'--ercc=*\': \'$VIASH_PAR_ERCC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ERCC=$(ViashRemoveFlags "$1") - shift 1 - ;; - --subset_regex) - [ -n "$VIASH_PAR_SUBSET_REGEX" ] && ViashError Bad arguments for option \'--subset_regex\': \'$VIASH_PAR_SUBSET_REGEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SUBSET_REGEX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --subset_regex. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --subset_regex=*) - [ -n "$VIASH_PAR_SUBSET_REGEX" ] && ViashError Bad arguments for option \'--subset_regex=*\': \'$VIASH_PAR_SUBSET_REGEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SUBSET_REGEX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output_fasta) - [ -n "$VIASH_PAR_OUTPUT_FASTA" ] && ViashError Bad arguments for option \'--output_fasta\': \'$VIASH_PAR_OUTPUT_FASTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_FASTA="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_fasta. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_fasta=*) - [ -n "$VIASH_PAR_OUTPUT_FASTA" ] && ViashError Bad arguments for option \'--output_fasta=*\': \'$VIASH_PAR_OUTPUT_FASTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_FASTA=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output_gtf) - [ -n "$VIASH_PAR_OUTPUT_GTF" ] && ViashError Bad arguments for option \'--output_gtf\': \'$VIASH_PAR_OUTPUT_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_GTF="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_gtf. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_gtf=*) - [ -n "$VIASH_PAR_OUTPUT_GTF" ] && ViashError Bad arguments for option \'--output_gtf=*\': \'$VIASH_PAR_OUTPUT_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_GTF=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/reference_make_reference:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/reference_make_reference:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/reference_make_reference:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/reference_make_reference:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_GENOME_FASTA+x} ]; then - ViashError '--genome_fasta' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_TRANSCRIPTOME_GTF+x} ]; then - ViashError '--transcriptome_gtf' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT_FASTA+x} ]; then - ViashError '--output_fasta' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT_GTF+x} ]; then - ViashError '--output_gtf' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_GENOME_FASTA" ] && [ ! -e "$VIASH_PAR_GENOME_FASTA" ]; then - ViashError "Input file '$VIASH_PAR_GENOME_FASTA' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_TRANSCRIPTOME_GTF" ] && [ ! -e "$VIASH_PAR_TRANSCRIPTOME_GTF" ]; then - ViashError "Input file '$VIASH_PAR_TRANSCRIPTOME_GTF' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_ERCC" ] && [ ! -e "$VIASH_PAR_ERCC" ]; then - ViashError "Input file '$VIASH_PAR_ERCC' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT_FASTA" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_FASTA")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_FASTA")" -fi -if [ ! -z "$VIASH_PAR_OUTPUT_GTF" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_GTF")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_GTF")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_GENOME_FASTA" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_GENOME_FASTA")" ) - VIASH_PAR_GENOME_FASTA=$(ViashAutodetectMount "$VIASH_PAR_GENOME_FASTA") -fi -if [ ! -z "$VIASH_PAR_TRANSCRIPTOME_GTF" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_TRANSCRIPTOME_GTF")" ) - VIASH_PAR_TRANSCRIPTOME_GTF=$(ViashAutodetectMount "$VIASH_PAR_TRANSCRIPTOME_GTF") -fi -if [ ! -z "$VIASH_PAR_ERCC" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_ERCC")" ) - VIASH_PAR_ERCC=$(ViashAutodetectMount "$VIASH_PAR_ERCC") -fi -if [ ! -z "$VIASH_PAR_OUTPUT_FASTA" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT_FASTA")" ) - VIASH_PAR_OUTPUT_FASTA=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT_FASTA") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_FASTA" ) -fi -if [ ! -z "$VIASH_PAR_OUTPUT_GTF" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT_GTF")" ) - VIASH_PAR_OUTPUT_GTF=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT_GTF") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_GTF" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/reference_make_reference:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/reference_make_reference:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/reference_make_reference:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-make_reference-XXXXXX").sh -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -#!/bin/bash - -set -eo pipefail - -## VIASH START -# The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_GENOME_FASTA+x} ]; then echo "${VIASH_PAR_GENOME_FASTA}" | sed "s#'#'\"'\"'#g;s#.*#par_genome_fasta='&'#" ; else echo "# par_genome_fasta="; fi ) -$( if [ ! -z ${VIASH_PAR_TRANSCRIPTOME_GTF+x} ]; then echo "${VIASH_PAR_TRANSCRIPTOME_GTF}" | sed "s#'#'\"'\"'#g;s#.*#par_transcriptome_gtf='&'#" ; else echo "# par_transcriptome_gtf="; fi ) -$( if [ ! -z ${VIASH_PAR_ERCC+x} ]; then echo "${VIASH_PAR_ERCC}" | sed "s#'#'\"'\"'#g;s#.*#par_ercc='&'#" ; else echo "# par_ercc="; fi ) -$( if [ ! -z ${VIASH_PAR_SUBSET_REGEX+x} ]; then echo "${VIASH_PAR_SUBSET_REGEX}" | sed "s#'#'\"'\"'#g;s#.*#par_subset_regex='&'#" ; else echo "# par_subset_regex="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT_FASTA+x} ]; then echo "${VIASH_PAR_OUTPUT_FASTA}" | sed "s#'#'\"'\"'#g;s#.*#par_output_fasta='&'#" ; else echo "# par_output_fasta="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT_GTF+x} ]; then echo "${VIASH_PAR_OUTPUT_GTF}" | sed "s#'#'\"'\"'#g;s#.*#par_output_gtf='&'#" ; else echo "# par_output_gtf="; fi ) -$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) -$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) -$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) -$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) -$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) -$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) - -## VIASH END - -# create temporary directory -tmpdir=\$(mktemp -d "$VIASH_TEMP/\$meta_functionality_name-XXXXXXXX") -function clean_up { - rm -rf "\$tmpdir" -} -trap clean_up EXIT - -echo "> Processing genome sequence" -genome_fasta="\$tmpdir/genome_sequence.fa" -# curl "\$par_genome_fasta" | gunzip > "\$genome_fasta" -gunzip -c "\$par_genome_fasta" > "\$genome_fasta" - -echo "> Processing transcriptome annotation" -transcriptome_gtf="\$tmpdir/transcriptome_annotation.gtf" -# curl "\$par_transcriptome_gtf" | gunzip > "\$transcriptome_gtf" -gunzip -c "\$par_transcriptome_gtf"> "\$transcriptome_gtf" - -if [[ ! -z \$par_ercc ]]; then - echo "> Processing ERCC sequences" - # wget "\$par_ercc" -O "\$tmpdir/ercc.zip" - # unzip "\$tmpdir/ercc.zip" -d "\$tmpdir" - unzip "\$par_ercc" -d "\$tmpdir" - cat "\$tmpdir/ERCC92.fa" >> "\$genome_fasta" - cat "\$tmpdir/ERCC92.gtf" >> "\$transcriptome_gtf" -fi - -# create output & filter reference if so desired -if [[ ! -z \$par_subset_regex ]]; then - echo "> Subsetting reference with regex '\$par_subset_regex'" - awk '{print \$1}' "\$genome_fasta" | seqkit grep -r -p "^\$par_subset_regex\\\$" > "\$tmpdir/genome_sequence_filtered.fa" - genome_fasta="\$tmpdir/genome_sequence_filtered.fa" - grep -E "^\$par_subset_regex[^A-Za-z0-9]" "\$transcriptome_gtf" > "\$tmpdir/transcriptome_annotation_filtered.gtf" - transcriptome_gtf="\$tmpdir/transcriptome_annotation_filtered.gtf" - - echo - echo "Matched tags:" - cat "\$genome_fasta" | grep '^>' | sed 's#^>##' | sed 's# .*##' | sort | uniq - echo -fi - -echo "> Gzipping outputs" -pigz -c "\$genome_fasta" > "\$par_output_fasta" -pigz -c "\$transcriptome_gtf" > "\$par_output_gtf" - -# to do: re enable -# echo "> Sanity check of outputs" -# readarray -t fasta_tags < <( cat "\$genome_fasta" | grep '^>' | sed 's#^>##' | sed 's# .*##' | sort | uniq ) -# readarray -t transcriptome_tags < <( cat "\$transcriptome_gtf" | cut -d\$'\\t' -f1 | sort | uniq | grep '^[^#]' ) -# [ "\${fasta_tags[*]}" == "\${transcriptome_tags[*]}" ] || { echo "Warning: fasta tags differ from transcriptome tags"; exit 1; } -VIASHMAIN -bash "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_GENOME_FASTA" ]; then - VIASH_PAR_GENOME_FASTA=$(ViashStripAutomount "$VIASH_PAR_GENOME_FASTA") -fi -if [ ! -z "$VIASH_PAR_TRANSCRIPTOME_GTF" ]; then - VIASH_PAR_TRANSCRIPTOME_GTF=$(ViashStripAutomount "$VIASH_PAR_TRANSCRIPTOME_GTF") -fi -if [ ! -z "$VIASH_PAR_ERCC" ]; then - VIASH_PAR_ERCC=$(ViashStripAutomount "$VIASH_PAR_ERCC") -fi -if [ ! -z "$VIASH_PAR_OUTPUT_FASTA" ]; then - VIASH_PAR_OUTPUT_FASTA=$(ViashStripAutomount "$VIASH_PAR_OUTPUT_FASTA") -fi -if [ ! -z "$VIASH_PAR_OUTPUT_GTF" ]; then - VIASH_PAR_OUTPUT_GTF=$(ViashStripAutomount "$VIASH_PAR_OUTPUT_GTF") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT_FASTA" ] && [ ! -e "$VIASH_PAR_OUTPUT_FASTA" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT_FASTA' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_OUTPUT_GTF" ] && [ ! -e "$VIASH_PAR_OUTPUT_GTF" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT_GTF' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/report/mermaid/.config.vsh.yaml b/target/docker/report/mermaid/.config.vsh.yaml deleted file mode 100644 index 37e563fa2ed..00000000000 --- a/target/docker/report/mermaid/.config.vsh.yaml +++ /dev/null @@ -1,185 +0,0 @@ -functionality: - name: "mermaid" - namespace: "report" - version: "0.12.3" - authors: - - name: "Dries De Maeyer" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "ddemaeyer@gmail.com" - github: "ddemaeyer" - linkedin: "dries-de-maeyer-b46a814" - organizations: - - name: "Janssen Pharmaceuticals" - href: "https://www.janssen.com" - role: "Principal Scientist" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input directory" - info: null - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Generated network as output." - info: null - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_format" - description: "Output format for the generated image. By default will be inferred\ - \ from the extension \nof the file specified with --output.\n" - info: null - required: false - choices: - - "svg" - - "png" - - "pdf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--width" - description: "Width of the page" - info: null - default: - - 800 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--height" - description: "Height of the page" - info: null - default: - - 600 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--background_color" - description: "Background color for pngs/svgs (not pdfs)" - info: null - example: - - "#F0F0F0" - default: - - "white" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "bash_script" - path: "script.sh" - is_executable: true - - type: "file" - path: "./puppeteer-config.json" - description: "Generates a network from mermaid code.\n" - test_resources: - - type: "bash_script" - path: "test.sh" - is_executable: true - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "node:20-bullseye" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "javascript" - npm: - - "@mermaid-js/mermaid-cli" - - type: "apt" - packages: - - "chromium" - interactive: false - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/report/mermaid/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/report/mermaid" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/report/mermaid/mermaid" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/report/mermaid/mermaid b/target/docker/report/mermaid/mermaid deleted file mode 100755 index 77fec013964..00000000000 --- a/target/docker/report/mermaid/mermaid +++ /dev/null @@ -1,1029 +0,0 @@ -#!/usr/bin/env bash - -# mermaid 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Dries De Maeyer (maintainer) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="mermaid" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "mermaid 0.12.3" - echo "" - echo "Generates a network from mermaid code." - echo "" - echo "Arguments:" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " Input directory" - echo "" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " Generated network as output." - echo "" - echo " --output_format" - echo " type: string" - echo " choices: [ svg, png, pdf ]" - echo " Output format for the generated image. By default will be inferred from" - echo " the extension" - echo " of the file specified with --output." - echo "" - echo " --width" - echo " type: integer" - echo " default: 800" - echo " Width of the page" - echo "" - echo " --height" - echo " type: integer" - echo " default: 600" - echo " Height of the page" - echo "" - echo " --background_color" - echo " type: string" - echo " default: white" - echo " example: #F0F0F0" - echo " Background color for pngs/svgs (not pdfs)" -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM node:20-bullseye - -ENTRYPOINT [] - - -RUN npm install -g "@mermaid-js/mermaid-cli" - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y chromium && \ - rm -rf /var/lib/apt/lists/* - -LABEL org.opencontainers.image.authors="Dries De Maeyer" -LABEL org.opencontainers.image.description="Companion container for running component report mermaid" -LABEL org.opencontainers.image.created="2024-01-25T10:13:57Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-mermaid-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "mermaid 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -i) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_format) - [ -n "$VIASH_PAR_OUTPUT_FORMAT" ] && ViashError Bad arguments for option \'--output_format\': \'$VIASH_PAR_OUTPUT_FORMAT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_FORMAT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_format. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_format=*) - [ -n "$VIASH_PAR_OUTPUT_FORMAT" ] && ViashError Bad arguments for option \'--output_format=*\': \'$VIASH_PAR_OUTPUT_FORMAT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_FORMAT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --width) - [ -n "$VIASH_PAR_WIDTH" ] && ViashError Bad arguments for option \'--width\': \'$VIASH_PAR_WIDTH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_WIDTH="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --width. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --width=*) - [ -n "$VIASH_PAR_WIDTH" ] && ViashError Bad arguments for option \'--width=*\': \'$VIASH_PAR_WIDTH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_WIDTH=$(ViashRemoveFlags "$1") - shift 1 - ;; - --height) - [ -n "$VIASH_PAR_HEIGHT" ] && ViashError Bad arguments for option \'--height\': \'$VIASH_PAR_HEIGHT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_HEIGHT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --height. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --height=*) - [ -n "$VIASH_PAR_HEIGHT" ] && ViashError Bad arguments for option \'--height=*\': \'$VIASH_PAR_HEIGHT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_HEIGHT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --background_color) - [ -n "$VIASH_PAR_BACKGROUND_COLOR" ] && ViashError Bad arguments for option \'--background_color\': \'$VIASH_PAR_BACKGROUND_COLOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BACKGROUND_COLOR="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --background_color. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --background_color=*) - [ -n "$VIASH_PAR_BACKGROUND_COLOR" ] && ViashError Bad arguments for option \'--background_color=*\': \'$VIASH_PAR_BACKGROUND_COLOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BACKGROUND_COLOR=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/report_mermaid:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/report_mermaid:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/report_mermaid:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/report_mermaid:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_WIDTH+x} ]; then - VIASH_PAR_WIDTH="800" -fi -if [ -z ${VIASH_PAR_HEIGHT+x} ]; then - VIASH_PAR_HEIGHT="600" -fi -if [ -z ${VIASH_PAR_BACKGROUND_COLOR+x} ]; then - VIASH_PAR_BACKGROUND_COLOR="white" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_WIDTH" ]]; then - if ! [[ "$VIASH_PAR_WIDTH" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--width' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_HEIGHT" ]]; then - if ! [[ "$VIASH_PAR_HEIGHT" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--height' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_OUTPUT_FORMAT" ]; then - VIASH_PAR_OUTPUT_FORMAT_CHOICES=("svg:png:pdf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_FORMAT_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_FORMAT:" ]]; then - ViashError '--output_format' specified value of \'$VIASH_PAR_OUTPUT_FORMAT\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/report_mermaid:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/report_mermaid:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/report_mermaid:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-mermaid-XXXXXX").sh -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -## VIASH START -# The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT_FORMAT+x} ]; then echo "${VIASH_PAR_OUTPUT_FORMAT}" | sed "s#'#'\"'\"'#g;s#.*#par_output_format='&'#" ; else echo "# par_output_format="; fi ) -$( if [ ! -z ${VIASH_PAR_WIDTH+x} ]; then echo "${VIASH_PAR_WIDTH}" | sed "s#'#'\"'\"'#g;s#.*#par_width='&'#" ; else echo "# par_width="; fi ) -$( if [ ! -z ${VIASH_PAR_HEIGHT+x} ]; then echo "${VIASH_PAR_HEIGHT}" | sed "s#'#'\"'\"'#g;s#.*#par_height='&'#" ; else echo "# par_height="; fi ) -$( if [ ! -z ${VIASH_PAR_BACKGROUND_COLOR+x} ]; then echo "${VIASH_PAR_BACKGROUND_COLOR}" | sed "s#'#'\"'\"'#g;s#.*#par_background_color='&'#" ; else echo "# par_background_color="; fi ) -$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) -$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) -$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) -$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) -$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) -$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) - -## VIASH END -#!/bin/bash - -mmdc -p "\$meta_resources_dir/puppeteer-config.json" \\ - -i "\$par_input" \\ - -o "\$par_output" \\ - --width "\$par_width" \\ - --height "\$par_height" \\ - \${par_background_color:+--backgroundColor \$par_background_color} \\ - \${output_format:+--outputFormat \$par_output_format} -VIASHMAIN -bash "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/report/mermaid/puppeteer-config.json b/target/docker/report/mermaid/puppeteer-config.json deleted file mode 100644 index 7b2851c2995..00000000000 --- a/target/docker/report/mermaid/puppeteer-config.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "executablePath": "/usr/bin/chromium", - "args": [ - "--no-sandbox" - ] -} \ No newline at end of file diff --git a/target/docker/transfer/publish/.config.vsh.yaml b/target/docker/transfer/publish/.config.vsh.yaml deleted file mode 100644 index d52f774e80f..00000000000 --- a/target/docker/transfer/publish/.config.vsh.yaml +++ /dev/null @@ -1,125 +0,0 @@ -functionality: - name: "publish" - namespace: "transfer" - version: "0.12.3" - authors: - - name: "Toni Verbeiren" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - github: "tverbeiren" - linkedin: "verbeiren" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist and CEO" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input filename" - info: null - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output filename" - info: null - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "bash_script" - path: "script.sh" - is_executable: true - description: "Publish an artifact and optionally rename with parameters" - test_resources: - - type: "bash_script" - path: "run_test.sh" - is_executable: true - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "ubuntu:22.04" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/transfer/publish/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/transfer/publish" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/transfer/publish/publish" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/transfer/publish/publish b/target/docker/transfer/publish/publish deleted file mode 100755 index d6f5f305068..00000000000 --- a/target/docker/transfer/publish/publish +++ /dev/null @@ -1,919 +0,0 @@ -#!/usr/bin/env bash - -# publish 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Toni Verbeiren (maintainer) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="publish" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "publish 0.12.3" - echo "" - echo "Publish an artifact and optionally rename with parameters" - echo "" - echo "Arguments:" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " Input filename" - echo "" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " Output filename" -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM ubuntu:22.04 - -ENTRYPOINT [] - - -RUN : -LABEL org.opencontainers.image.authors="Toni Verbeiren" -LABEL org.opencontainers.image.description="Companion container for running component transfer publish" -LABEL org.opencontainers.image.created="2024-01-25T10:13:59Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-publish-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "publish 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -i) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/transfer_publish:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/transfer_publish:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/transfer_publish:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/transfer_publish:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/transfer_publish:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/transfer_publish:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/transfer_publish:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-publish-XXXXXX").sh -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -#!/bin/bash - -set -eo pipefail - -## VIASH START -# The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) -$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) -$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) -$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) -$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) -$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) -$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) - -## VIASH END - -parent=\`dirname "\$par_output"\` -if [[ ! -d "\$parent" ]]; then - mkdir -p "\$parent" -fi - -cp -r "\$par_input" "\$par_output" -VIASHMAIN -bash "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/transform/clr/.config.vsh.yaml b/target/docker/transform/clr/.config.vsh.yaml deleted file mode 100644 index 3943f5cddda..00000000000 --- a/target/docker/transform/clr/.config.vsh.yaml +++ /dev/null @@ -1,188 +0,0 @@ -functionality: - name: "clr" - namespace: "transform" - version: "0.12.3" - authors: - - name: "Dries Schaumont" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input h5mu file" - info: null - example: - - "input.h5mu" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - default: - - "prot" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output h5mu file." - info: null - default: - - "output.h5mu" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_layer" - description: "Output layer to use. By default, use X." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - description: "Perform CLR normalization on CITE-seq data (Stoeckius et al., 2017).\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "scanpy~=1.9.5" - - "muon~=0.1.5" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "lowmem" - - "midcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/transform/clr/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/transform/clr" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/transform/clr/clr" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/transform/clr/clr b/target/docker/transform/clr/clr deleted file mode 100755 index 65f8f44ee05..00000000000 --- a/target/docker/transform/clr/clr +++ /dev/null @@ -1,1005 +0,0 @@ -#!/usr/bin/env bash - -# clr 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Dries Schaumont (maintainer) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="clr" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "clr 0.12.3" - echo "" - echo "Perform CLR normalization on CITE-seq data (Stoeckius et al., 2017)." - echo "" - echo "Arguments:" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " example: input.h5mu" - echo " Input h5mu file" - echo "" - echo " --modality" - echo " type: string" - echo " default: prot" - echo "" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " default: output.h5mu" - echo " Output h5mu file." - echo "" - echo " --output_compression" - echo " type: string" - echo " example: gzip" - echo " choices: [ gzip, lzf ]" - echo " The compression format to be used on the output h5mu object." - echo "" - echo " --output_layer" - echo " type: string" - echo " Output layer to use. By default, use X." -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM python:3.10-slim - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "scanpy~=1.9.5" "muon~=0.1.5" - -LABEL org.opencontainers.image.authors="Dries Schaumont" -LABEL org.opencontainers.image.description="Companion container for running component transform clr" -LABEL org.opencontainers.image.created="2024-01-25T10:13:54Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-clr-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "clr 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -i) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality=*) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression=*) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output_layer) - [ -n "$VIASH_PAR_OUTPUT_LAYER" ] && ViashError Bad arguments for option \'--output_layer\': \'$VIASH_PAR_OUTPUT_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_LAYER="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_layer. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_layer=*) - [ -n "$VIASH_PAR_OUTPUT_LAYER" ] && ViashError Bad arguments for option \'--output_layer=*\': \'$VIASH_PAR_OUTPUT_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_LAYER=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/transform_clr:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/transform_clr:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/transform_clr:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/transform_clr:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_MODALITY+x} ]; then - VIASH_PAR_MODALITY="prot" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then - VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then - ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/transform_clr:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/transform_clr:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/transform_clr:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-clr-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -from muon import prot as pt -from mudata import read_h5mu - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_layer': $( if [ ! -z ${VIASH_PAR_OUTPUT_LAYER+x} ]; then echo "r'${VIASH_PAR_OUTPUT_LAYER//\'/\'\"\'\"r\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -## VIASH END - - -def main(): - input_h5mu = read_h5mu(par['input']) - modality = input_h5mu[par['modality']] - normalized_counts = pt.pp.clr(modality, inplace=False if par['output_layer'] else True) - if par['output_layer'] and not normalized_counts: - raise RuntimeError("CLR failed to return the requested output layer") - if normalized_counts: - input_h5mu[par["modality"]].layers[par['output_layer']] = normalized_counts.X - input_h5mu.write_h5mu(par['output'], compression=par["output_compression"]) - -if __name__ == "__main__": - main() -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/transform/delete_layer/.config.vsh.yaml b/target/docker/transform/delete_layer/.config.vsh.yaml deleted file mode 100644 index 3ee51374380..00000000000 --- a/target/docker/transform/delete_layer/.config.vsh.yaml +++ /dev/null @@ -1,196 +0,0 @@ -functionality: - name: "delete_layer" - namespace: "transform" - version: "0.12.3" - authors: - - name: "Dries Schaumont" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input h5mu file" - info: null - example: - - "input.h5mu" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--layer" - description: "Input layer to remove" - info: null - required: true - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output h5mu file." - info: null - default: - - "output.h5mu" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--missing_ok" - description: "Do not raise an error if the layer does not exist for all modalities." - info: null - direction: "input" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/compress_h5mu.py" - - type: "file" - path: "src/utils/setup_logger.py" - description: "Delete an anndata layer from one or more modalities.\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.9-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "midmem" - - "singlecpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/transform/delete_layer/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/transform/delete_layer" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/transform/delete_layer/delete_layer" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/transform/delete_layer/compress_h5mu.py b/target/docker/transform/delete_layer/compress_h5mu.py deleted file mode 100644 index 9d92395a573..00000000000 --- a/target/docker/transform/delete_layer/compress_h5mu.py +++ /dev/null @@ -1,49 +0,0 @@ -from h5py import File as H5File -from h5py import Group, Dataset -from pathlib import Path -from typing import Union, Literal -from functools import partial - - -def compress_h5mu(input_path: Union[str, Path], - output_path: Union[str, Path], - compression: Union[Literal['gzip'], Literal['lzf']]): - input_path, output_path = str(input_path), str(output_path) - - def copy_attributes(in_object, out_object): - for key, value in in_object.attrs.items(): - out_object.attrs[key] = value - - def visit_path(output_h5: H5File, - compression: Union[Literal['gzip'], Literal['lzf']], - name: str, object: Union[Group, Dataset]): - if isinstance(object, Group): - new_group = output_h5.create_group(name) - copy_attributes(object, new_group) - elif isinstance(object, Dataset): - # Compression only works for non-scalar Dataset objects - # Scalar objects dont have a shape defined - if not object.compression and object.shape not in [None, ()]: - new_dataset = output_h5.create_dataset(name, data=object, compression=compression) - copy_attributes(object, new_dataset) - else: - output_h5.copy(object, name) - else: - raise NotImplementedError(f"Could not copy element {name}, " - f"type has not been implemented yet: {type(object)}") - - with H5File(input_path, 'r') as input_h5, H5File(output_path, 'w', userblock_size=512) as output_h5: - copy_attributes(input_h5, output_h5) - input_h5.visititems(partial(visit_path, output_h5, compression)) - - with open(input_path, "rb") as input_bytes: - # Mudata puts metadata like this in the first 512 bytes: - # MuData (format-version=0.1.0;creator=muon;creator-version=0.2.0) - # See mudata/_core/io.py, read_h5mu() function - starting_metadata = input_bytes.read(100) - # The metadata is padded with extra null bytes up until 512 bytes - truncate_location = starting_metadata.find(b"\x00") - starting_metadata = starting_metadata[:truncate_location] - with open(output_path, "br+") as f: - nbytes = f.write(starting_metadata) - f.write(b"\0" * (512 - nbytes)) diff --git a/target/docker/transform/delete_layer/delete_layer b/target/docker/transform/delete_layer/delete_layer deleted file mode 100755 index 326a2403ebb..00000000000 --- a/target/docker/transform/delete_layer/delete_layer +++ /dev/null @@ -1,1122 +0,0 @@ -#!/usr/bin/env bash - -# delete_layer 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Dries Schaumont (maintainer) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="delete_layer" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "delete_layer 0.12.3" - echo "" - echo "Delete an anndata layer from one or more modalities." - echo "" - echo "Arguments:" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " example: input.h5mu" - echo " Input h5mu file" - echo "" - echo " --modality" - echo " type: string" - echo " default: rna" - echo "" - echo " --layer" - echo " type: string, required parameter, multiple values allowed" - echo " Input layer to remove" - echo "" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " default: output.h5mu" - echo " Output h5mu file." - echo "" - echo " --output_compression" - echo " type: string" - echo " example: gzip" - echo " choices: [ gzip, lzf ]" - echo " The compression format to be used on the output h5mu object." - echo "" - echo " --missing_ok" - echo " type: boolean_true" - echo " Do not raise an error if the layer does not exist for all modalities." -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM python:3.9-slim - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" - -LABEL org.opencontainers.image.authors="Dries Schaumont" -LABEL org.opencontainers.image.description="Companion container for running component transform delete_layer" -LABEL org.opencontainers.image.created="2024-01-25T10:13:55Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-delete_layer-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "delete_layer 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -i) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality=*) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --layer) - if [ -z "$VIASH_PAR_LAYER" ]; then - VIASH_PAR_LAYER="$2" - else - VIASH_PAR_LAYER="$VIASH_PAR_LAYER:""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --layer. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --layer=*) - if [ -z "$VIASH_PAR_LAYER" ]; then - VIASH_PAR_LAYER=$(ViashRemoveFlags "$1") - else - VIASH_PAR_LAYER="$VIASH_PAR_LAYER:"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression=*) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --missing_ok) - [ -n "$VIASH_PAR_MISSING_OK" ] && ViashError Bad arguments for option \'--missing_ok\': \'$VIASH_PAR_MISSING_OK\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MISSING_OK=true - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/transform_delete_layer:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/transform_delete_layer:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/transform_delete_layer:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/transform_delete_layer:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_LAYER+x} ]; then - ViashError '--layer' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_MODALITY+x} ]; then - VIASH_PAR_MODALITY="rna" -fi -if [ -z ${VIASH_PAR_MISSING_OK+x} ]; then - VIASH_PAR_MISSING_OK="false" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_MISSING_OK" ]]; then - if ! [[ "$VIASH_PAR_MISSING_OK" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--missing_ok' has to be a boolean_true. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then - VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then - ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/transform_delete_layer:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/transform_delete_layer:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/transform_delete_layer:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-delete_layer-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -import sys -from mudata import read_h5ad, write_h5ad -import shutil -from pathlib import Path - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'layer': $( if [ ! -z ${VIASH_PAR_LAYER+x} ]; then echo "r'${VIASH_PAR_LAYER//\'/\'\"\'\"r\'}'.split(':')"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'missing_ok': $( if [ ! -z ${VIASH_PAR_MISSING_OK+x} ]; then echo "r'${VIASH_PAR_MISSING_OK//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -## VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -# START TEMPORARY WORKAROUND compress_h5mu -# reason: resources aren't available when using Nextflow fusion -# from compress_h5mu import compress_h5mu -from h5py import File as H5File -from h5py import Group, Dataset -from pathlib import Path -from typing import Union, Literal -from functools import partial - - -def compress_h5mu(input_path: Union[str, Path], - output_path: Union[str, Path], - compression: Union[Literal['gzip'], Literal['lzf']]): - input_path, output_path = str(input_path), str(output_path) - - def copy_attributes(in_object, out_object): - for key, value in in_object.attrs.items(): - out_object.attrs[key] = value - - def visit_path(output_h5: H5File, - compression: Union[Literal['gzip'], Literal['lzf']], - name: str, object: Union[Group, Dataset]): - if isinstance(object, Group): - new_group = output_h5.create_group(name) - copy_attributes(object, new_group) - elif isinstance(object, Dataset): - # Compression only works for non-scalar Dataset objects - # Scalar objects dont have a shape defined - if not object.compression and object.shape not in [None, ()]: - new_dataset = output_h5.create_dataset(name, data=object, compression=compression) - copy_attributes(object, new_dataset) - else: - output_h5.copy(object, name) - else: - raise NotImplementedError(f"Could not copy element {name}, " - f"type has not been implemented yet: {type(object)}") - - with H5File(input_path, 'r') as input_h5, H5File(output_path, 'w', userblock_size=512) as output_h5: - copy_attributes(input_h5, output_h5) - input_h5.visititems(partial(visit_path, output_h5, compression)) - - with open(input_path, "rb") as input_bytes: - # Mudata puts metadata like this in the first 512 bytes: - # MuData (format-version=0.1.0;creator=muon;creator-version=0.2.0) - # See mudata/_core/io.py, read_h5mu() function - starting_metadata = input_bytes.read(100) - # The metadata is padded with extra null bytes up until 512 bytes - truncate_location = starting_metadata.find(b"\\x00") - starting_metadata = starting_metadata[:truncate_location] - with open(output_path, "br+") as f: - nbytes = f.write(starting_metadata) - f.write(b"\\0" * (512 - nbytes)) -# END TEMPORARY WORKAROUND compress_h5mu - -def main(): - input_file, output_file, mod_name = Path(par["input"]), Path(par["output"]), par['modality'] - - logger.info('Reading input file %s, modality %s.', input_file, mod_name) - mod = read_h5ad(input_file, mod=mod_name) - for layer in par['layer']: - if layer not in mod.layers: - if par['missing_ok']: - continue - raise ValueError(f"Layer '{layer}' is not present in modality {mod_name}.") - logger.info('Deleting layer %s from modality %s.', layer, mod_name) - del mod.layers[layer] - - logger.info('Writing output to %s.', par['output']) - output_file_uncompressed = output_file.with_name(output_file.stem + "_uncompressed.h5mu") \\ - if par["output_compression"] else output_file - shutil.copyfile(par['input'], output_file_uncompressed) - write_h5ad(filename=output_file_uncompressed, mod=mod_name, data=mod) - if par["output_compression"]: - compress_h5mu(output_file_uncompressed, output_file, compression=par["output_compression"]) - output_file_uncompressed.unlink() - - logger.info('Finished.') - -if __name__ == "__main__": - main() -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/transform/delete_layer/setup_logger.py b/target/docker/transform/delete_layer/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/docker/transform/delete_layer/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/docker/transform/log1p/.config.vsh.yaml b/target/docker/transform/log1p/.config.vsh.yaml deleted file mode 100644 index 2399737e5c9..00000000000 --- a/target/docker/transform/log1p/.config.vsh.yaml +++ /dev/null @@ -1,225 +0,0 @@ -functionality: - name: "log1p" - namespace: "transform" - version: "0.12.3" - authors: - - name: "Dries De Maeyer" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "ddemaeyer@gmail.com" - github: "ddemaeyer" - linkedin: "dries-de-maeyer-b46a814" - organizations: - - name: "Janssen Pharmaceuticals" - href: "https://www.janssen.com" - role: "Principal Scientist" - - name: "Robrecht Cannoodt" - roles: - - "contributor" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input h5mu file" - info: null - example: - - "input.h5mu" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--input_layer" - description: "Input layer to use. If None, X is normalized" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_layer" - description: "Output layer to use. By default, use X." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output h5mu file." - info: null - default: - - "output.h5mu" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--base" - info: null - example: - - 2.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Logarithmize the data matrix. Computes X = log(X + 1), where log denotes\ - \ the natural logarithm unless a different base is given.\n" - test_resources: - - type: "python_script" - path: "run_test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.9-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "scanpy~=1.9.5" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "midmem" - - "lowcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/transform/log1p/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/transform/log1p" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/transform/log1p/log1p" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/transform/log1p/log1p b/target/docker/transform/log1p/log1p deleted file mode 100755 index 545cf35f8bd..00000000000 --- a/target/docker/transform/log1p/log1p +++ /dev/null @@ -1,1068 +0,0 @@ -#!/usr/bin/env bash - -# log1p 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Dries De Maeyer (maintainer) -# * Robrecht Cannoodt (contributor) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="log1p" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "log1p 0.12.3" - echo "" - echo "Logarithmize the data matrix. Computes X = log(X + 1), where log denotes the" - echo "natural logarithm unless a different base is given." - echo "" - echo "Arguments:" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " example: input.h5mu" - echo " Input h5mu file" - echo "" - echo " --modality" - echo " type: string" - echo " default: rna" - echo "" - echo " --input_layer" - echo " type: string" - echo " Input layer to use. If None, X is normalized" - echo "" - echo " --output_layer" - echo " type: string" - echo " Output layer to use. By default, use X." - echo "" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " default: output.h5mu" - echo " Output h5mu file." - echo "" - echo " --output_compression" - echo " type: string" - echo " example: gzip" - echo " choices: [ gzip, lzf ]" - echo " The compression format to be used on the output h5mu object." - echo "" - echo " --base" - echo " type: double" - echo " example: 2.0" -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM python:3.9-slim - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "scanpy~=1.9.5" - -LABEL org.opencontainers.image.authors="Dries De Maeyer, Robrecht Cannoodt" -LABEL org.opencontainers.image.description="Companion container for running component transform log1p" -LABEL org.opencontainers.image.created="2024-01-25T10:13:55Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-log1p-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "log1p 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -i) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality=*) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --input_layer) - [ -n "$VIASH_PAR_INPUT_LAYER" ] && ViashError Bad arguments for option \'--input_layer\': \'$VIASH_PAR_INPUT_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT_LAYER="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_layer. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input_layer=*) - [ -n "$VIASH_PAR_INPUT_LAYER" ] && ViashError Bad arguments for option \'--input_layer=*\': \'$VIASH_PAR_INPUT_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT_LAYER=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output_layer) - [ -n "$VIASH_PAR_OUTPUT_LAYER" ] && ViashError Bad arguments for option \'--output_layer\': \'$VIASH_PAR_OUTPUT_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_LAYER="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_layer. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_layer=*) - [ -n "$VIASH_PAR_OUTPUT_LAYER" ] && ViashError Bad arguments for option \'--output_layer=*\': \'$VIASH_PAR_OUTPUT_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_LAYER=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression=*) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --base) - [ -n "$VIASH_PAR_BASE" ] && ViashError Bad arguments for option \'--base\': \'$VIASH_PAR_BASE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BASE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --base. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --base=*) - [ -n "$VIASH_PAR_BASE" ] && ViashError Bad arguments for option \'--base=*\': \'$VIASH_PAR_BASE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BASE=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/transform_log1p:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/transform_log1p:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/transform_log1p:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/transform_log1p:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_MODALITY+x} ]; then - VIASH_PAR_MODALITY="rna" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_BASE" ]]; then - if ! [[ "$VIASH_PAR_BASE" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--base' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then - VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then - ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/transform_log1p:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/transform_log1p:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/transform_log1p:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-log1p-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -import scanpy as sc -import mudata as mu -import sys - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'input_layer': $( if [ ! -z ${VIASH_PAR_INPUT_LAYER+x} ]; then echo "r'${VIASH_PAR_INPUT_LAYER//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_layer': $( if [ ! -z ${VIASH_PAR_OUTPUT_LAYER+x} ]; then echo "r'${VIASH_PAR_OUTPUT_LAYER//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'base': $( if [ ! -z ${VIASH_PAR_BASE+x} ]; then echo "float(r'${VIASH_PAR_BASE//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -## VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -logger.info("Reading input mudata") -mdata = mu.read_h5mu(par["input"]) -mdata.var_names_make_unique() - -mod = par["modality"] -logger.info("Performing log transformation on modality %s", mod) -data = mdata.mod[mod] -new_layer = sc.pp.log1p(data, - base=par["base"], - copy=True if par['output_layer'] else False) -if new_layer: - data.layers[par['output_layer']] = new_layer.X - data.uns['log1p'] = new_layer.uns['log1p'] - -logger.info("Writing to file %s", par["output"]) -mdata.write_h5mu(filename=par["output"], compression=par["output_compression"]) -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/transform/log1p/setup_logger.py b/target/docker/transform/log1p/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/docker/transform/log1p/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/docker/transform/normalize_total/.config.vsh.yaml b/target/docker/transform/normalize_total/.config.vsh.yaml deleted file mode 100644 index 251aa7c1ece..00000000000 --- a/target/docker/transform/normalize_total/.config.vsh.yaml +++ /dev/null @@ -1,242 +0,0 @@ -functionality: - name: "normalize_total" - namespace: "transform" - version: "0.12.3" - authors: - - name: "Dries De Maeyer" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "ddemaeyer@gmail.com" - github: "ddemaeyer" - linkedin: "dries-de-maeyer-b46a814" - organizations: - - name: "Janssen Pharmaceuticals" - href: "https://www.janssen.com" - role: "Principal Scientist" - - name: "Robrecht Cannoodt" - roles: - - "contributor" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input h5mu file" - info: null - example: - - "input.h5mu" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--input_layer" - description: "Input layer to use. By default, X is normalized" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output h5mu file." - info: null - default: - - "output.h5mu" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_layer" - description: "Output layer to use. By default, use X." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--target_sum" - description: "If None, after normalization, each observation (cell) has a total\ - \ count equal to the median of total counts for observations (cells) before\ - \ normalization." - info: null - default: - - 10000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--exclude_highly_expressed" - description: "Exclude (very) highly expressed genes for the computation of the\ - \ normalization factor (size factor) for each cell. A gene is considered highly\ - \ expressed, if it has more than max_fraction of the total counts in at least\ - \ one cell. The not-excluded genes will sum up to target_sum." - info: null - direction: "input" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Normalize counts per cell.\n\nNormalize each cell by total counts\ - \ over all genes, so that every cell has the same total count after normalization.\ - \ If choosing target_sum=1e6, this is CPM normalization.\n\nIf exclude_highly_expressed=True,\ - \ very highly expressed genes are excluded from the computation of the normalization\ - \ factor (size factor) for each cell. This is meaningful as these can strongly\ - \ influence the resulting normalized values for all other genes [Weinreb17].\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim-bullseye" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "libhdf5-dev" - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "scanpy~=1.9.5" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "midmem" - - "lowcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/transform/normalize_total/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/transform/normalize_total" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/transform/normalize_total/normalize_total" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/transform/normalize_total/normalize_total b/target/docker/transform/normalize_total/normalize_total deleted file mode 100755 index bb35c7c2a20..00000000000 --- a/target/docker/transform/normalize_total/normalize_total +++ /dev/null @@ -1,1108 +0,0 @@ -#!/usr/bin/env bash - -# normalize_total 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Dries De Maeyer (maintainer) -# * Robrecht Cannoodt (contributor) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="normalize_total" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "normalize_total 0.12.3" - echo "" - echo "Normalize counts per cell." - echo "" - echo "Normalize each cell by total counts over all genes, so that every cell has the" - echo "same total count after normalization. If choosing target_sum=1e6, this is CPM" - echo "normalization." - echo "" - echo "If exclude_highly_expressed=True, very highly expressed genes are excluded from" - echo "the computation of the normalization factor (size factor) for each cell. This is" - echo "meaningful as these can strongly influence the resulting normalized values for" - echo "all other genes [Weinreb17]." - echo "" - echo "Arguments:" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " example: input.h5mu" - echo " Input h5mu file" - echo "" - echo " --modality" - echo " type: string" - echo " default: rna" - echo "" - echo " --input_layer" - echo " type: string" - echo " Input layer to use. By default, X is normalized" - echo "" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " default: output.h5mu" - echo " Output h5mu file." - echo "" - echo " --output_compression" - echo " type: string" - echo " example: gzip" - echo " choices: [ gzip, lzf ]" - echo " The compression format to be used on the output h5mu object." - echo "" - echo " --output_layer" - echo " type: string" - echo " Output layer to use. By default, use X." - echo "" - echo " --target_sum" - echo " type: integer" - echo " default: 10000" - echo " If None, after normalization, each observation (cell) has a total count" - echo " equal to the median of total counts for observations (cells) before" - echo " normalization." - echo "" - echo " --exclude_highly_expressed" - echo " type: boolean_true" - echo " Exclude (very) highly expressed genes for the computation of the" - echo " normalization factor (size factor) for each cell. A gene is considered" - echo " highly expressed, if it has more than max_fraction of the total counts" - echo " in at least one cell. The not-excluded genes will sum up to target_sum." -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM python:3.10-slim-bullseye - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y libhdf5-dev procps && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "scanpy~=1.9.5" - -LABEL org.opencontainers.image.authors="Dries De Maeyer, Robrecht Cannoodt" -LABEL org.opencontainers.image.description="Companion container for running component transform normalize_total" -LABEL org.opencontainers.image.created="2024-01-25T10:13:59Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-normalize_total-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "normalize_total 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -i) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality=*) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --input_layer) - [ -n "$VIASH_PAR_INPUT_LAYER" ] && ViashError Bad arguments for option \'--input_layer\': \'$VIASH_PAR_INPUT_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT_LAYER="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_layer. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input_layer=*) - [ -n "$VIASH_PAR_INPUT_LAYER" ] && ViashError Bad arguments for option \'--input_layer=*\': \'$VIASH_PAR_INPUT_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT_LAYER=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression=*) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output_layer) - [ -n "$VIASH_PAR_OUTPUT_LAYER" ] && ViashError Bad arguments for option \'--output_layer\': \'$VIASH_PAR_OUTPUT_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_LAYER="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_layer. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_layer=*) - [ -n "$VIASH_PAR_OUTPUT_LAYER" ] && ViashError Bad arguments for option \'--output_layer=*\': \'$VIASH_PAR_OUTPUT_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_LAYER=$(ViashRemoveFlags "$1") - shift 1 - ;; - --target_sum) - [ -n "$VIASH_PAR_TARGET_SUM" ] && ViashError Bad arguments for option \'--target_sum\': \'$VIASH_PAR_TARGET_SUM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TARGET_SUM="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --target_sum. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --target_sum=*) - [ -n "$VIASH_PAR_TARGET_SUM" ] && ViashError Bad arguments for option \'--target_sum=*\': \'$VIASH_PAR_TARGET_SUM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TARGET_SUM=$(ViashRemoveFlags "$1") - shift 1 - ;; - --exclude_highly_expressed) - [ -n "$VIASH_PAR_EXCLUDE_HIGHLY_EXPRESSED" ] && ViashError Bad arguments for option \'--exclude_highly_expressed\': \'$VIASH_PAR_EXCLUDE_HIGHLY_EXPRESSED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EXCLUDE_HIGHLY_EXPRESSED=true - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/transform_normalize_total:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/transform_normalize_total:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/transform_normalize_total:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/transform_normalize_total:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_MODALITY+x} ]; then - VIASH_PAR_MODALITY="rna" -fi -if [ -z ${VIASH_PAR_TARGET_SUM+x} ]; then - VIASH_PAR_TARGET_SUM="10000" -fi -if [ -z ${VIASH_PAR_EXCLUDE_HIGHLY_EXPRESSED+x} ]; then - VIASH_PAR_EXCLUDE_HIGHLY_EXPRESSED="false" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_TARGET_SUM" ]]; then - if ! [[ "$VIASH_PAR_TARGET_SUM" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--target_sum' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_EXCLUDE_HIGHLY_EXPRESSED" ]]; then - if ! [[ "$VIASH_PAR_EXCLUDE_HIGHLY_EXPRESSED" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--exclude_highly_expressed' has to be a boolean_true. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then - VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then - ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/transform_normalize_total:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/transform_normalize_total:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/transform_normalize_total:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-normalize_total-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -import sys -import scanpy as sc -import mudata as mu - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'input_layer': $( if [ ! -z ${VIASH_PAR_INPUT_LAYER+x} ]; then echo "r'${VIASH_PAR_INPUT_LAYER//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_layer': $( if [ ! -z ${VIASH_PAR_OUTPUT_LAYER+x} ]; then echo "r'${VIASH_PAR_OUTPUT_LAYER//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'target_sum': $( if [ ! -z ${VIASH_PAR_TARGET_SUM+x} ]; then echo "int(r'${VIASH_PAR_TARGET_SUM//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'exclude_highly_expressed': $( if [ ! -z ${VIASH_PAR_EXCLUDE_HIGHLY_EXPRESSED+x} ]; then echo "r'${VIASH_PAR_EXCLUDE_HIGHLY_EXPRESSED//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -## VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -logger.info("Reading input mudata") -mdata = mu.read_h5mu(par["input"]) -mdata.var_names_make_unique() - -logger.info(par) - -mod = par["modality"] -logger.info("Performing total normalization on modality %s", mod) -dat = mdata.mod[mod] -if par['input_layer'] and not par['input_layer'] in dat.layers.keys(): - raise ValueError(f"Input layer {par['input_layer']} not found in {mod}") -output_data = sc.pp.normalize_total(dat, - layer=par["input_layer"], - copy=True if par["output_layer"] else False) - -if output_data: - dat.layers[par["output_layer"]] = output_data.X - -logger.info("Writing to file") -mdata.write_h5mu(filename=par["output"], compression=par["output_compression"]) -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/transform/normalize_total/setup_logger.py b/target/docker/transform/normalize_total/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/docker/transform/normalize_total/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/docker/transform/regress_out/.config.vsh.yaml b/target/docker/transform/regress_out/.config.vsh.yaml deleted file mode 100644 index 855d304927a..00000000000 --- a/target/docker/transform/regress_out/.config.vsh.yaml +++ /dev/null @@ -1,195 +0,0 @@ -functionality: - name: "regress_out" - namespace: "transform" - version: "0.12.3" - authors: - - name: "Robrecht Cannoodt" - roles: - - "maintainer" - - "contributor" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - arguments: - - type: "file" - name: "--input" - description: "Input h5mu file" - info: null - example: - - "input.h5mu" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output h5mu file." - info: null - default: - - "output.h5mu" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - description: "Which modality (one or more) to run this component on." - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_keys" - description: "Which .obs keys to regress on." - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Regress out (mostly) unwanted sources of variation.\nUses simple linear\ - \ regression. This is inspired by Seurat's regressOut function in R [Satija15].\ - \ \nNote that this function tends to overcorrect in certain circumstances as described\ - \ in issue theislab/scanpy#526.\nSee https://github.com/theislab/scanpy/issues/526.\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "scanpy~=1.9.5" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "lowmem" - - "lowcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/transform/regress_out/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/transform/regress_out" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/transform/regress_out/regress_out" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/transform/regress_out/regress_out b/target/docker/transform/regress_out/regress_out deleted file mode 100755 index 847f0f0405f..00000000000 --- a/target/docker/transform/regress_out/regress_out +++ /dev/null @@ -1,1039 +0,0 @@ -#!/usr/bin/env bash - -# regress_out 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Robrecht Cannoodt (maintainer, contributor) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="regress_out" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "regress_out 0.12.3" - echo "" - echo "Regress out (mostly) unwanted sources of variation." - echo "Uses simple linear regression. This is inspired by Seurat's regressOut function" - echo "in R [Satija15]." - echo "Note that this function tends to overcorrect in certain circumstances as" - echo "described in issue theislab/scanpy#526." - echo "See https://github.com/theislab/scanpy/issues/526." - echo "" - echo "Arguments:" - echo " --input" - echo " type: file, required parameter, file must exist" - echo " example: input.h5mu" - echo " Input h5mu file" - echo "" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " default: output.h5mu" - echo " Output h5mu file." - echo "" - echo " --output_compression" - echo " type: string" - echo " example: gzip" - echo " choices: [ gzip, lzf ]" - echo " The compression format to be used on the output h5mu object." - echo "" - echo " --modality" - echo " type: string" - echo " default: rna" - echo " Which modality (one or more) to run this component on." - echo "" - echo " --obs_keys" - echo " type: string, multiple values allowed" - echo " Which .obs keys to regress on." -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM python:3.10-slim - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "scanpy~=1.9.5" - -LABEL org.opencontainers.image.authors="Robrecht Cannoodt" -LABEL org.opencontainers.image.description="Companion container for running component transform regress_out" -LABEL org.opencontainers.image.created="2024-01-25T10:13:59Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-regress_out-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "regress_out 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression=*) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --modality) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality=*) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --obs_keys) - if [ -z "$VIASH_PAR_OBS_KEYS" ]; then - VIASH_PAR_OBS_KEYS="$2" - else - VIASH_PAR_OBS_KEYS="$VIASH_PAR_OBS_KEYS:""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_keys. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obs_keys=*) - if [ -z "$VIASH_PAR_OBS_KEYS" ]; then - VIASH_PAR_OBS_KEYS=$(ViashRemoveFlags "$1") - else - VIASH_PAR_OBS_KEYS="$VIASH_PAR_OBS_KEYS:"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/transform_regress_out:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/transform_regress_out:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/transform_regress_out:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/transform_regress_out:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_MODALITY+x} ]; then - VIASH_PAR_MODALITY="rna" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then - VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then - ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/transform_regress_out:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/transform_regress_out:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/transform_regress_out:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-regress_out-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -import scanpy as sc -import mudata as mu -import multiprocessing -import sys - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'obs_keys': $( if [ ! -z ${VIASH_PAR_OBS_KEYS+x} ]; then echo "r'${VIASH_PAR_OBS_KEYS//\'/\'\"\'\"r\'}'.split(':')"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -## VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -logger.info("Reading input mudata") -mdata = mu.read_h5mu(par["input"]) -mdata.var_names_make_unique() - -if ( - par["obs_keys"] is not None - and len(par["obs_keys"]) > 0 -): - mod = par["modality"] - logger.info("Regress out variables on modality %s", mod) - data = mdata.mod[mod] - - sc.pp.regress_out( - data, - keys=par["obs_keys"], - n_jobs=multiprocessing.cpu_count() - 1 - ) - -logger.info("Writing to file") -mdata.write_h5mu(filename=par["output"], compression=par["output_compression"]) -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/transform/regress_out/setup_logger.py b/target/docker/transform/regress_out/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/docker/transform/regress_out/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/docker/transform/scale/.config.vsh.yaml b/target/docker/transform/scale/.config.vsh.yaml deleted file mode 100644 index d41bde0466b..00000000000 --- a/target/docker/transform/scale/.config.vsh.yaml +++ /dev/null @@ -1,205 +0,0 @@ -functionality: - name: "scale" - namespace: "transform" - version: "0.12.3" - authors: - - name: "Dries Schaumont" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input h5mu file." - info: null - example: - - "input.h5mu" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - description: "List of modalities to process." - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--max_value" - description: "Clip (truncate) to this value after scaling. Does not clip by default." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean" - name: "--zero_center" - description: "If False, omit zero-centering variables, which allows to handle\ - \ sparse input efficiently." - info: null - default: - - true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output h5mu file." - info: null - default: - - "output.h5mu" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Scale data to unit variance and zero mean.\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim-bullseye" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "libhdf5-dev" - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "scanpy~=1.9.5" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "lowmem" - - "lowcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -- type: "native" - id: "native" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/transform/scaling/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/transform/scale" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/transform/scale/scale" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/transform/scale/scale b/target/docker/transform/scale/scale deleted file mode 100755 index ffa4c94baef..00000000000 --- a/target/docker/transform/scale/scale +++ /dev/null @@ -1,1063 +0,0 @@ -#!/usr/bin/env bash - -# scale 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Dries Schaumont (maintainer) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="scale" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "scale 0.12.3" - echo "" - echo "Scale data to unit variance and zero mean." - echo "" - echo "Arguments:" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " example: input.h5mu" - echo " Input h5mu file." - echo "" - echo " --modality" - echo " type: string" - echo " default: rna" - echo " List of modalities to process." - echo "" - echo " --max_value" - echo " type: double" - echo " Clip (truncate) to this value after scaling. Does not clip by default." - echo "" - echo " --zero_center" - echo " type: boolean" - echo " default: true" - echo " If False, omit zero-centering variables, which allows to handle sparse" - echo " input efficiently." - echo "" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " default: output.h5mu" - echo " Output h5mu file." - echo "" - echo " --output_compression" - echo " type: string" - echo " example: gzip" - echo " choices: [ gzip, lzf ]" - echo " The compression format to be used on the output h5mu object." -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM python:3.10-slim-bullseye - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y libhdf5-dev procps && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "scanpy~=1.9.5" - -LABEL org.opencontainers.image.authors="Dries Schaumont" -LABEL org.opencontainers.image.description="Companion container for running component transform scale" -LABEL org.opencontainers.image.created="2024-01-25T10:14:00Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-scale-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "scale 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -i) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality=*) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --max_value) - [ -n "$VIASH_PAR_MAX_VALUE" ] && ViashError Bad arguments for option \'--max_value\': \'$VIASH_PAR_MAX_VALUE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAX_VALUE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --max_value. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --max_value=*) - [ -n "$VIASH_PAR_MAX_VALUE" ] && ViashError Bad arguments for option \'--max_value=*\': \'$VIASH_PAR_MAX_VALUE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAX_VALUE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --zero_center) - [ -n "$VIASH_PAR_ZERO_CENTER" ] && ViashError Bad arguments for option \'--zero_center\': \'$VIASH_PAR_ZERO_CENTER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ZERO_CENTER="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --zero_center. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --zero_center=*) - [ -n "$VIASH_PAR_ZERO_CENTER" ] && ViashError Bad arguments for option \'--zero_center=*\': \'$VIASH_PAR_ZERO_CENTER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ZERO_CENTER=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression=*) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/transform_scale:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/transform_scale:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/transform_scale:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/transform_scale:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_MODALITY+x} ]; then - VIASH_PAR_MODALITY="rna" -fi -if [ -z ${VIASH_PAR_ZERO_CENTER+x} ]; then - VIASH_PAR_ZERO_CENTER="true" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_MAX_VALUE" ]]; then - if ! [[ "$VIASH_PAR_MAX_VALUE" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--max_value' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_ZERO_CENTER" ]]; then - if ! [[ "$VIASH_PAR_ZERO_CENTER" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--zero_center' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then - VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then - ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/transform_scale:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/transform_scale:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/transform_scale:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-scale-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -import sys -from mudata import read_h5mu -import scanpy - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'max_value': $( if [ ! -z ${VIASH_PAR_MAX_VALUE+x} ]; then echo "float(r'${VIASH_PAR_MAX_VALUE//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'zero_center': $( if [ ! -z ${VIASH_PAR_ZERO_CENTER+x} ]; then echo "r'${VIASH_PAR_ZERO_CENTER//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -## VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -def main(): - logger.info(f'Reading .h5mu file: {par["input"]}') - mudata = read_h5mu(par["input"]) - mod = par["modality"] - data = mudata.mod[mod] - - logger.info("Scaling modality: %s", mod) - scanpy.pp.scale(data, - zero_center=par["zero_center"], - max_value=par["max_value"]) - - logger.info("Writing to %s", par["output"]) - mudata.write_h5mu(filename=par["output"], compression=par["output_compression"]) - logger.info("Finished") - -if __name__ == "__main__": - main() -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/transform/scale/setup_logger.py b/target/docker/transform/scale/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/docker/transform/scale/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/docker/velocity/scvelo/.config.vsh.yaml b/target/docker/velocity/scvelo/.config.vsh.yaml deleted file mode 100644 index 15437092c10..00000000000 --- a/target/docker/velocity/scvelo/.config.vsh.yaml +++ /dev/null @@ -1,276 +0,0 @@ -functionality: - name: "scvelo" - namespace: "velocity" - version: "0.12.3" - authors: - - name: "Dries Schaumont" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - argument_groups: - - name: "Inputs" - arguments: - - type: "file" - name: "--input" - description: "Velocyto loom file." - info: null - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Outputs" - arguments: - - type: "file" - name: "--output" - description: "Output directory. If it does not exist, will be created." - info: null - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Filtering and normalization" - description: "Arguments for filtering, normalization an log transform (see scvelo.pp.filter_and_normalize\ - \ function)" - arguments: - - type: "integer" - name: "--min_counts" - description: "Minimum number of counts required for a gene to pass filtering\ - \ (spliced)." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--min_counts_u" - description: "Minimum number of counts required for a gene to pass filtering\ - \ (unspliced)." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--min_cells" - description: "Minimum number of cells expressed required to pass filtering (spliced)." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--min_cells_u" - description: "Minimum number of cells expressed required to pass filtering (unspliced)." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--min_shared_counts" - description: "Minimum number of counts (both unspliced and spliced) required\ - \ for a gene." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--min_shared_cells" - description: "Minimum number of cells required to be expressed (both unspliced\ - \ and spliced)." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--n_top_genes" - description: "Number of genes to keep." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean" - name: "--log_transform" - description: "Do not log transform counts." - info: null - default: - - true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Fitting parameters" - description: "Arguments for fitting the data" - arguments: - - type: "integer" - name: "--n_principal_components" - description: "Number of principal components to use for calculating moments." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--n_neighbors" - description: "Number of neighbors to use. First/second-order moments are computed\ - \ for each\ncell across its nearest neighbors, where the neighbor graph is\ - \ obtained from\neuclidean distances in PCA space.\n" - info: null - default: - - 30 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/rna_velocity/velocyto_processed/cellranger_tiny.loom" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.9-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "scvelo~=0.2.5" - - "numpy~=1.23.5" - - "matplotlib<3.8.0" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "native" - id: "native" -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highmem" - - "highcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/velocity/scvelo/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/velocity/scvelo" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/velocity/scvelo/scvelo" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/velocity/scvelo/scvelo b/target/docker/velocity/scvelo/scvelo deleted file mode 100755 index 21662a9f061..00000000000 --- a/target/docker/velocity/scvelo/scvelo +++ /dev/null @@ -1,1272 +0,0 @@ -#!/usr/bin/env bash - -# scvelo 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Dries Schaumont (maintainer) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="scvelo" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "scvelo 0.12.3" - echo "" - echo "Inputs:" - echo " --input" - echo " type: file, required parameter, file must exist" - echo " Velocyto loom file." - echo "" - echo "Outputs:" - echo " --output" - echo " type: file, required parameter, output, file must exist" - echo " Output directory. If it does not exist, will be created." - echo "" - echo " --output_compression" - echo " type: string" - echo " example: gzip" - echo " choices: [ gzip, lzf ]" - echo " The compression format to be used on the output h5mu object." - echo "" - echo "Filtering and normalization:" - echo " Arguments for filtering, normalization an log transform (see" - echo " scvelo.pp.filter_and_normalize function)" - echo "" - echo " --min_counts" - echo " type: integer" - echo " Minimum number of counts required for a gene to pass filtering" - echo " (spliced)." - echo "" - echo " --min_counts_u" - echo " type: integer" - echo " Minimum number of counts required for a gene to pass filtering" - echo " (unspliced)." - echo "" - echo " --min_cells" - echo " type: integer" - echo " Minimum number of cells expressed required to pass filtering (spliced)." - echo "" - echo " --min_cells_u" - echo " type: integer" - echo " Minimum number of cells expressed required to pass filtering" - echo " (unspliced)." - echo "" - echo " --min_shared_counts" - echo " type: integer" - echo " Minimum number of counts (both unspliced and spliced) required for a" - echo " gene." - echo "" - echo " --min_shared_cells" - echo " type: integer" - echo " Minimum number of cells required to be expressed (both unspliced and" - echo " spliced)." - echo "" - echo " --n_top_genes" - echo " type: integer" - echo " Number of genes to keep." - echo "" - echo " --log_transform" - echo " type: boolean" - echo " default: true" - echo " Do not log transform counts." - echo "" - echo "Fitting parameters:" - echo " Arguments for fitting the data" - echo "" - echo " --n_principal_components" - echo " type: integer" - echo " Number of principal components to use for calculating moments." - echo "" - echo " --n_neighbors" - echo " type: integer" - echo " default: 30" - echo " Number of neighbors to use. First/second-order moments are computed for" - echo " each" - echo " cell across its nearest neighbors, where the neighbor graph is obtained" - echo " from" - echo " euclidean distances in PCA space." -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM python:3.9-slim - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "scvelo~=0.2.5" "numpy~=1.23.5" "matplotlib<3.8.0" - -LABEL org.opencontainers.image.authors="Dries Schaumont" -LABEL org.opencontainers.image.description="Companion container for running component velocity scvelo" -LABEL org.opencontainers.image.created="2024-01-25T10:13:57Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-scvelo-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "scvelo 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output_compression) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression=*) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --min_counts) - [ -n "$VIASH_PAR_MIN_COUNTS" ] && ViashError Bad arguments for option \'--min_counts\': \'$VIASH_PAR_MIN_COUNTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_COUNTS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_counts. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --min_counts=*) - [ -n "$VIASH_PAR_MIN_COUNTS" ] && ViashError Bad arguments for option \'--min_counts=*\': \'$VIASH_PAR_MIN_COUNTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_COUNTS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --min_counts_u) - [ -n "$VIASH_PAR_MIN_COUNTS_U" ] && ViashError Bad arguments for option \'--min_counts_u\': \'$VIASH_PAR_MIN_COUNTS_U\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_COUNTS_U="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_counts_u. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --min_counts_u=*) - [ -n "$VIASH_PAR_MIN_COUNTS_U" ] && ViashError Bad arguments for option \'--min_counts_u=*\': \'$VIASH_PAR_MIN_COUNTS_U\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_COUNTS_U=$(ViashRemoveFlags "$1") - shift 1 - ;; - --min_cells) - [ -n "$VIASH_PAR_MIN_CELLS" ] && ViashError Bad arguments for option \'--min_cells\': \'$VIASH_PAR_MIN_CELLS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_CELLS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_cells. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --min_cells=*) - [ -n "$VIASH_PAR_MIN_CELLS" ] && ViashError Bad arguments for option \'--min_cells=*\': \'$VIASH_PAR_MIN_CELLS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_CELLS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --min_cells_u) - [ -n "$VIASH_PAR_MIN_CELLS_U" ] && ViashError Bad arguments for option \'--min_cells_u\': \'$VIASH_PAR_MIN_CELLS_U\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_CELLS_U="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_cells_u. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --min_cells_u=*) - [ -n "$VIASH_PAR_MIN_CELLS_U" ] && ViashError Bad arguments for option \'--min_cells_u=*\': \'$VIASH_PAR_MIN_CELLS_U\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_CELLS_U=$(ViashRemoveFlags "$1") - shift 1 - ;; - --min_shared_counts) - [ -n "$VIASH_PAR_MIN_SHARED_COUNTS" ] && ViashError Bad arguments for option \'--min_shared_counts\': \'$VIASH_PAR_MIN_SHARED_COUNTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_SHARED_COUNTS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_shared_counts. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --min_shared_counts=*) - [ -n "$VIASH_PAR_MIN_SHARED_COUNTS" ] && ViashError Bad arguments for option \'--min_shared_counts=*\': \'$VIASH_PAR_MIN_SHARED_COUNTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_SHARED_COUNTS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --min_shared_cells) - [ -n "$VIASH_PAR_MIN_SHARED_CELLS" ] && ViashError Bad arguments for option \'--min_shared_cells\': \'$VIASH_PAR_MIN_SHARED_CELLS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_SHARED_CELLS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_shared_cells. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --min_shared_cells=*) - [ -n "$VIASH_PAR_MIN_SHARED_CELLS" ] && ViashError Bad arguments for option \'--min_shared_cells=*\': \'$VIASH_PAR_MIN_SHARED_CELLS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_SHARED_CELLS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --n_top_genes) - [ -n "$VIASH_PAR_N_TOP_GENES" ] && ViashError Bad arguments for option \'--n_top_genes\': \'$VIASH_PAR_N_TOP_GENES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_N_TOP_GENES="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --n_top_genes. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --n_top_genes=*) - [ -n "$VIASH_PAR_N_TOP_GENES" ] && ViashError Bad arguments for option \'--n_top_genes=*\': \'$VIASH_PAR_N_TOP_GENES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_N_TOP_GENES=$(ViashRemoveFlags "$1") - shift 1 - ;; - --log_transform) - [ -n "$VIASH_PAR_LOG_TRANSFORM" ] && ViashError Bad arguments for option \'--log_transform\': \'$VIASH_PAR_LOG_TRANSFORM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LOG_TRANSFORM="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --log_transform. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --log_transform=*) - [ -n "$VIASH_PAR_LOG_TRANSFORM" ] && ViashError Bad arguments for option \'--log_transform=*\': \'$VIASH_PAR_LOG_TRANSFORM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LOG_TRANSFORM=$(ViashRemoveFlags "$1") - shift 1 - ;; - --n_principal_components) - [ -n "$VIASH_PAR_N_PRINCIPAL_COMPONENTS" ] && ViashError Bad arguments for option \'--n_principal_components\': \'$VIASH_PAR_N_PRINCIPAL_COMPONENTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_N_PRINCIPAL_COMPONENTS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --n_principal_components. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --n_principal_components=*) - [ -n "$VIASH_PAR_N_PRINCIPAL_COMPONENTS" ] && ViashError Bad arguments for option \'--n_principal_components=*\': \'$VIASH_PAR_N_PRINCIPAL_COMPONENTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_N_PRINCIPAL_COMPONENTS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --n_neighbors) - [ -n "$VIASH_PAR_N_NEIGHBORS" ] && ViashError Bad arguments for option \'--n_neighbors\': \'$VIASH_PAR_N_NEIGHBORS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_N_NEIGHBORS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --n_neighbors. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --n_neighbors=*) - [ -n "$VIASH_PAR_N_NEIGHBORS" ] && ViashError Bad arguments for option \'--n_neighbors=*\': \'$VIASH_PAR_N_NEIGHBORS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_N_NEIGHBORS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/velocity_scvelo:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/velocity_scvelo:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/velocity_scvelo:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/velocity_scvelo:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_LOG_TRANSFORM+x} ]; then - VIASH_PAR_LOG_TRANSFORM="true" -fi -if [ -z ${VIASH_PAR_N_NEIGHBORS+x} ]; then - VIASH_PAR_N_NEIGHBORS="30" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_MIN_COUNTS" ]]; then - if ! [[ "$VIASH_PAR_MIN_COUNTS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--min_counts' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_MIN_COUNTS_U" ]]; then - if ! [[ "$VIASH_PAR_MIN_COUNTS_U" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--min_counts_u' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_MIN_CELLS" ]]; then - if ! [[ "$VIASH_PAR_MIN_CELLS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--min_cells' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_MIN_CELLS_U" ]]; then - if ! [[ "$VIASH_PAR_MIN_CELLS_U" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--min_cells_u' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_MIN_SHARED_COUNTS" ]]; then - if ! [[ "$VIASH_PAR_MIN_SHARED_COUNTS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--min_shared_counts' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_MIN_SHARED_CELLS" ]]; then - if ! [[ "$VIASH_PAR_MIN_SHARED_CELLS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--min_shared_cells' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_N_TOP_GENES" ]]; then - if ! [[ "$VIASH_PAR_N_TOP_GENES" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--n_top_genes' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_LOG_TRANSFORM" ]]; then - if ! [[ "$VIASH_PAR_LOG_TRANSFORM" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--log_transform' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_N_PRINCIPAL_COMPONENTS" ]]; then - if ! [[ "$VIASH_PAR_N_PRINCIPAL_COMPONENTS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--n_principal_components' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_N_NEIGHBORS" ]]; then - if ! [[ "$VIASH_PAR_N_NEIGHBORS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--n_neighbors' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then - VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then - ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/velocity_scvelo:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/velocity_scvelo:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/velocity_scvelo:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-scvelo-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -import sys -import scvelo -import mudata -from contextlib import redirect_stdout -from pathlib import Path -import matplotlib as mpl - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'min_counts': $( if [ ! -z ${VIASH_PAR_MIN_COUNTS+x} ]; then echo "int(r'${VIASH_PAR_MIN_COUNTS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'min_counts_u': $( if [ ! -z ${VIASH_PAR_MIN_COUNTS_U+x} ]; then echo "int(r'${VIASH_PAR_MIN_COUNTS_U//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'min_cells': $( if [ ! -z ${VIASH_PAR_MIN_CELLS+x} ]; then echo "int(r'${VIASH_PAR_MIN_CELLS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'min_cells_u': $( if [ ! -z ${VIASH_PAR_MIN_CELLS_U+x} ]; then echo "int(r'${VIASH_PAR_MIN_CELLS_U//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'min_shared_counts': $( if [ ! -z ${VIASH_PAR_MIN_SHARED_COUNTS+x} ]; then echo "int(r'${VIASH_PAR_MIN_SHARED_COUNTS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'min_shared_cells': $( if [ ! -z ${VIASH_PAR_MIN_SHARED_CELLS+x} ]; then echo "int(r'${VIASH_PAR_MIN_SHARED_CELLS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'n_top_genes': $( if [ ! -z ${VIASH_PAR_N_TOP_GENES+x} ]; then echo "int(r'${VIASH_PAR_N_TOP_GENES//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'log_transform': $( if [ ! -z ${VIASH_PAR_LOG_TRANSFORM+x} ]; then echo "r'${VIASH_PAR_LOG_TRANSFORM//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), - 'n_principal_components': $( if [ ! -z ${VIASH_PAR_N_PRINCIPAL_COMPONENTS+x} ]; then echo "int(r'${VIASH_PAR_N_PRINCIPAL_COMPONENTS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'n_neighbors': $( if [ ! -z ${VIASH_PAR_N_NEIGHBORS+x} ]; then echo "int(r'${VIASH_PAR_N_NEIGHBORS//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -## VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -mpl.rcParams['savefig.dpi']=150 - -# Script must be wrapped into a main function because scvelo spawn subprocesses -# and this fails when the functions are not wrapped. -def main(): - # Create output directory - output_dir = Path(par['output']) - output_dir.mkdir(parents=True, exist_ok=True) - scvelo.settings.figdir = str(output_dir) - - - # Calculate the sample name - sample_name = par["output"].removesuffix(".loom") - sample_name = Path(sample_name).name - - # Read the input data - adata = scvelo.read(par['input']) - - # Save spliced vs unspliced proportions to file - with (output_dir / "proportions.txt").open('w') as target: - with redirect_stdout(target): - scvelo.utils.show_proportions(adata) - - # Plot piecharts of spliced vs unspliced proportions - scvelo.pl.proportions(adata, save=True, show=False) - - # Perform preprocessing - scvelo.pp.filter_and_normalize(adata, - min_counts=par["min_counts"], - min_counts_u=par["min_counts_u"], - min_cells=par["min_cells"], - min_cells_u=par["min_cells_u"], - min_shared_counts=par["min_shared_counts"], - min_shared_cells=par["min_shared_cells"], - n_top_genes=par["n_top_genes"], - log=par["log_transform"]) - - # Fitting - scvelo.pp.moments(adata, - n_pcs=par["n_principal_components"], - n_neighbors=par["n_neighbors"]) - - - # Second step in velocyto calculations - # Velocity calculation and visualization - # From the scvelo manual: - # The solution to the full dynamical model is obtained by setting mode='dynamical', - # which requires to run scv.tl.recover_dynamics(adata) beforehand - scvelo.tl.recover_dynamics(adata) - scvelo.tl.velocity(adata, mode="dynamical") - scvelo.tl.velocity_graph(adata) - scvelo.pl.velocity_graph(adata, save=str(output_dir / "scvelo_graph.pdf"), show=False) - - # Plotting - # TODO: add more here. - scvelo.pl.velocity_embedding_stream(adata, save=str(output_dir / "scvelo_embedding.pdf"), show=False) - - # Create output - ouput_data = mudata.MuData({'rna_velocity': adata}) - ouput_data.write_h5mu(output_dir / f"{sample_name}.h5mu", compression=par["output_compression"]) - -if __name__ == "__main__": - main() -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/docker/velocity/scvelo/setup_logger.py b/target/docker/velocity/scvelo/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/docker/velocity/scvelo/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/docker/velocity/velocyto/.config.vsh.yaml b/target/docker/velocity/velocyto/.config.vsh.yaml deleted file mode 100644 index ab3aaef9215..00000000000 --- a/target/docker/velocity/velocyto/.config.vsh.yaml +++ /dev/null @@ -1,225 +0,0 @@ -functionality: - name: "velocyto" - namespace: "velocity" - version: "0.12.3" - authors: - - name: "Robrecht Cannoodt" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Path to BAM file" - info: null - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--transcriptome" - alternatives: - - "-t" - description: "Path to GTF file" - info: null - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--barcode" - alternatives: - - "-b" - description: "Valid barcodes file, to filter the bam. If --bcfile is not specified\ - \ all the cell barcodes will be included.\nCell barcodes should be specified\ - \ in the bcfile as the 'CB' tag for each read\n" - info: null - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--without_umi" - description: "foo" - info: null - direction: "input" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Velocyto loom file" - info: null - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--logic" - alternatives: - - "-l" - description: "The logic to use for the filtering." - info: null - default: - - "Default" - required: false - choices: - - "Default" - - "Permissive10X" - - "Intermediate10X" - - "ValidatedIntrons10X" - - "Stricter10X" - - "ObservedSpanning10X" - - "Discordant10X" - - "SmartSeq2" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "bash_script" - path: "script.sh" - is_executable: true - description: "Runs the velocity analysis on a BAM file, outputting a loom file." - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/cellranger_tiny_fastq" - - type: "file" - path: "resources_test/rna_velocity" - - type: "file" - path: "resources_test/reference_gencodev41_chr1" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.9-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - - "build-essential" - - "file" - interactive: false - - type: "python" - user: false - pip: - - "numpy" - - "Cython" - upgrade: true - - type: "python" - user: false - pip: - - "velocyto" - upgrade: true - - type: "apt" - packages: - - "samtools" - interactive: false - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "native" - id: "native" -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highmem" - - "lowcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/velocity/velocyto/config.vsh.yaml" - platform: "docker" - output: "/home/runner/work/openpipeline/openpipeline/target/docker/velocity/velocyto" - executable: "/home/runner/work/openpipeline/openpipeline/target/docker/velocity/velocyto/velocyto" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/docker/velocity/velocyto/velocyto b/target/docker/velocity/velocyto/velocyto deleted file mode 100755 index d01f23cb3ed..00000000000 --- a/target/docker/velocity/velocyto/velocyto +++ /dev/null @@ -1,1097 +0,0 @@ -#!/usr/bin/env bash - -# velocyto 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Robrecht Cannoodt (maintainer) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="velocyto" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "velocyto 0.12.3" - echo "" - echo "Runs the velocity analysis on a BAM file, outputting a loom file." - echo "" - echo "Arguments:" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " Path to BAM file" - echo "" - echo " -t, --transcriptome" - echo " type: file, required parameter, file must exist" - echo " Path to GTF file" - echo "" - echo " -b, --barcode" - echo " type: file, file must exist" - echo " Valid barcodes file, to filter the bam. If --bcfile is not specified all" - echo " the cell barcodes will be included." - echo " Cell barcodes should be specified in the bcfile as the 'CB' tag for each" - echo " read" - echo "" - echo " --without_umi" - echo " type: boolean_true" - echo " foo" - echo "" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " Velocyto loom file" - echo "" - echo " -l, --logic" - echo " type: string" - echo " default: Default" - echo " choices: [ Default, Permissive10X, Intermediate10X, ValidatedIntrons10X," - echo "Stricter10X, ObservedSpanning10X, Discordant10X, SmartSeq2 ]" - echo " The logic to use for the filtering." -} - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - save=$-; set +e - docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' push succeeded." - else - ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - save=$-; set +e - ViashDockerPull $1 - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - VSHD_ID="$1" - VSHD_STRAT="$2" - if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then - ViashDockerBuild $VSHD_ID - elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then - save=$-; set +e - ViashDockerLocalTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $VSHD_ID already exists" - elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then - ViashDockerBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then - ViashDockerPull $VSHD_ID - elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $VSHD_ID --no-cache - elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $VSHD_ID - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi - elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then - ViashDockerPush "$VSHD_ID" - elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then - save=$-; set +e - ViashDockerRemoteTagCheck $VSHD_ID - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$VSHD_ID' exists, doing nothing." - else - ViashNotice "Container '$VSHD_ID' does not yet exist." - ViashDockerPush "$VSHD_ID" - fi - elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $VSHD_STRAT" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - tag=$1 - shift 1 - commands="$@" - save=$-; set +e - missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$tag' does not contain command '$missing'." - exit 1 - fi -} - - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - cat << 'VIASHDOCKER' -FROM python:3.9-slim - -ENTRYPOINT [] - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y procps build-essential file && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "numpy" "Cython" - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "velocyto" - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y samtools && \ - rm -rf /var/lib/apt/lists/* - -LABEL org.opencontainers.image.authors="Robrecht Cannoodt" -LABEL org.opencontainers.image.description="Companion container for running component velocity velocyto" -LABEL org.opencontainers.image.created="2024-01-25T10:13:58Z" -LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" -LABEL org.opencontainers.image.revision="827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" -LABEL org.opencontainers.image.version="0.12.3" - -VIASHDOCKER -} - -# ViashDockerBuild: build a docker container -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was built -function ViashDockerBuild { - # create temporary directory to store dockerfile & optional resources in - tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-velocyto-XXXXXX") - dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile > $dockerfile - - # Build the container - ViashNotice "Building container '$1' with Dockerfile" - ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" - save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile - else - docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log - fi - out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$1'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi - ViashDockerCheckCommands "$1" 'ps' 'bash' -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker -# examples: -# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashAutodetectMount { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - echo "$mount_target/$base_name" -} -function ViashAutodetectMountArg { - abs_path=$(ViashAbsolutePath "$1") - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - mount_target="/viash_automount$mount_source" - ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashStripAutomount { - abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#/viash_automount}" -} -# ViashExtractFlags: Retain leading flag -# $1 : string with a possible leading flag -# return : leading flag -# examples: -# ViashExtractFlags --foo=bar # returns --foo -function ViashExtractFlags { - echo $1 | sed 's/=.*//' -} -# initialise variables -VIASH_EXTRA_MOUNTS=() - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "velocyto 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -i) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --transcriptome) - [ -n "$VIASH_PAR_TRANSCRIPTOME" ] && ViashError Bad arguments for option \'--transcriptome\': \'$VIASH_PAR_TRANSCRIPTOME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TRANSCRIPTOME="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --transcriptome. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --transcriptome=*) - [ -n "$VIASH_PAR_TRANSCRIPTOME" ] && ViashError Bad arguments for option \'--transcriptome=*\': \'$VIASH_PAR_TRANSCRIPTOME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TRANSCRIPTOME=$(ViashRemoveFlags "$1") - shift 1 - ;; - -t) - [ -n "$VIASH_PAR_TRANSCRIPTOME" ] && ViashError Bad arguments for option \'-t\': \'$VIASH_PAR_TRANSCRIPTOME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TRANSCRIPTOME="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -t. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --barcode) - [ -n "$VIASH_PAR_BARCODE" ] && ViashError Bad arguments for option \'--barcode\': \'$VIASH_PAR_BARCODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BARCODE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --barcode. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --barcode=*) - [ -n "$VIASH_PAR_BARCODE" ] && ViashError Bad arguments for option \'--barcode=*\': \'$VIASH_PAR_BARCODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BARCODE=$(ViashRemoveFlags "$1") - shift 1 - ;; - -b) - [ -n "$VIASH_PAR_BARCODE" ] && ViashError Bad arguments for option \'-b\': \'$VIASH_PAR_BARCODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BARCODE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -b. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --without_umi) - [ -n "$VIASH_PAR_WITHOUT_UMI" ] && ViashError Bad arguments for option \'--without_umi\': \'$VIASH_PAR_WITHOUT_UMI\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_WITHOUT_UMI=true - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --logic) - [ -n "$VIASH_PAR_LOGIC" ] && ViashError Bad arguments for option \'--logic\': \'$VIASH_PAR_LOGIC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LOGIC="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --logic. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --logic=*) - [ -n "$VIASH_PAR_LOGIC" ] && ViashError Bad arguments for option \'--logic=*\': \'$VIASH_PAR_LOGIC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LOGIC=$(ViashRemoveFlags "$1") - shift 1 - ;; - -l) - [ -n "$VIASH_PAR_LOGIC" ] && ViashError Bad arguments for option \'-l\': \'$VIASH_PAR_LOGIC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LOGIC="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -l. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---setup) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$2" - shift 1 - ;; - ---setup=*) - VIASH_MODE='docker_setup' - VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 2 - ;; - ---dockerfile) - ViashDockerfile - exit 0 - ;; - ---v|---volume) - VIASH_EXTRA_MOUNTS+=("--volume='$2'") - shift 2 - ;; - ---volume=*) - VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") - shift 1 - ;; - ---debug) - VIASH_MODE='docker_debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -ViashDockerInstallationCheck - -if [ $VIASH_MODE == "docker_setup" ]; then - ViashDockerSetup 'ghcr.io/openpipelines-bio/velocity_velocyto:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" - exit 0 -fi -ViashDockerSetup 'ghcr.io/openpipelines-bio/velocity_velocyto:0.12.0' ifneedbepullelsecachedbuild - -if [ $VIASH_MODE == "docker_debug" ]; then - ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/velocity_velocyto:0.12.0'" - docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/velocity_velocyto:0.12.0' - exit 0 -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_TRANSCRIPTOME+x} ]; then - ViashError '--transcriptome' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_WITHOUT_UMI+x} ]; then - VIASH_PAR_WITHOUT_UMI="false" -fi -if [ -z ${VIASH_PAR_LOGIC+x} ]; then - VIASH_PAR_LOGIC="Default" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_TRANSCRIPTOME" ] && [ ! -e "$VIASH_PAR_TRANSCRIPTOME" ]; then - ViashError "Input file '$VIASH_PAR_TRANSCRIPTOME' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_BARCODE" ] && [ ! -e "$VIASH_PAR_BARCODE" ]; then - ViashError "Input file '$VIASH_PAR_BARCODE' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_WITHOUT_UMI" ]]; then - if ! [[ "$VIASH_PAR_WITHOUT_UMI" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--without_umi' has to be a boolean_true. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_LOGIC" ]; then - VIASH_PAR_LOGIC_CHOICES=("Default:Permissive10X:Intermediate10X:ValidatedIntrons10X:Stricter10X:ObservedSpanning10X:Discordant10X:SmartSeq2") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_LOGIC_CHOICES[*]}:" =~ ":$VIASH_PAR_LOGIC:" ]]; then - ViashError '--logic' specified value of \'$VIASH_PAR_LOGIC\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -# detect volumes from file arguments -VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_TRANSCRIPTOME" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_TRANSCRIPTOME")" ) - VIASH_PAR_TRANSCRIPTOME=$(ViashAutodetectMount "$VIASH_PAR_TRANSCRIPTOME") -fi -if [ ! -z "$VIASH_PAR_BARCODE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_BARCODE")" ) - VIASH_PAR_BARCODE=$(ViashAutodetectMount "$VIASH_PAR_BARCODE") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") -fi - -# get unique mounts -VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) - -# change file ownership -function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/velocity_velocyto:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} - set -e - fi -} -trap ViashPerformChown EXIT - -# helper function for filling in extra docker args -VIASH_EXTRA_DOCKER_ARGS="" -if [ ! -z "$VIASH_META_MEMORY_MB" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" -fi -if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" -fi - -ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/velocity_velocyto:0.12.0)" -cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/velocity_velocyto:0.12.0 -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-velocyto-XXXXXX").sh -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -#!/bin/bash - -set -eo pipefail - -## VIASH START -# The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) -$( if [ ! -z ${VIASH_PAR_TRANSCRIPTOME+x} ]; then echo "${VIASH_PAR_TRANSCRIPTOME}" | sed "s#'#'\"'\"'#g;s#.*#par_transcriptome='&'#" ; else echo "# par_transcriptome="; fi ) -$( if [ ! -z ${VIASH_PAR_BARCODE+x} ]; then echo "${VIASH_PAR_BARCODE}" | sed "s#'#'\"'\"'#g;s#.*#par_barcode='&'#" ; else echo "# par_barcode="; fi ) -$( if [ ! -z ${VIASH_PAR_WITHOUT_UMI+x} ]; then echo "${VIASH_PAR_WITHOUT_UMI}" | sed "s#'#'\"'\"'#g;s#.*#par_without_umi='&'#" ; else echo "# par_without_umi="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) -$( if [ ! -z ${VIASH_PAR_LOGIC+x} ]; then echo "${VIASH_PAR_LOGIC}" | sed "s#'#'\"'\"'#g;s#.*#par_logic='&'#" ; else echo "# par_logic="; fi ) -$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) -$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) -$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) -$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) -$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) -$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) - -## VIASH END - -extra_params=( ) - -if [ ! -z "\$par_barcode" ]; then - extra_params+=( "--bcfile=\$par_barcode" ) -fi -if [ "\$par_without_umi" == "true" ]; then - extra_params+=( "--without-umi" ) -fi -if [ ! -z "\$meta_cpus" ]; then - extra_params+=( "--samtools-threads" "\$meta_cpus" ) -fi -if [ ! -z "\$meta_memory_mb" ]; then - extra_params+=( "--samtools-memory" "\$meta_memory_mb" ) -fi - -output_dir=\`dirname "\$par_output"\` -sample_id=\`basename "\$par_output" .loom\` - -if (file \`readlink -f "\$par_transcriptome"\` | grep -q compressed ) ; then - # create temporary directory - tmpdir=\$(mktemp -d "\$meta_temp_dir/\$meta_functionality_name-XXXXXXXX") - function clean_up { - rm -rf "\$tmpdir" - } - trap clean_up EXIT - - zcat "\$par_transcriptome" > "\$tmpdir/genes.gtf" - par_transcriptome="\$tmpdir/genes.gtf" -fi - -velocyto run \\ - "\$par_input" \\ - "\$par_transcriptome" \\ - "\${extra_params[@]}" \\ - --outputfolder "\$output_dir" \\ - --sampleid "\$sample_id" -VIASHMAIN -bash "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# strip viash automount from file paths -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_TRANSCRIPTOME" ]; then - VIASH_PAR_TRANSCRIPTOME=$(ViashStripAutomount "$VIASH_PAR_TRANSCRIPTOME") -fi -if [ ! -z "$VIASH_PAR_BARCODE" ]; then - VIASH_PAR_BARCODE=$(ViashStripAutomount "$VIASH_PAR_BARCODE") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/native/compression/compress_h5mu/.config.vsh.yaml b/target/native/compression/compress_h5mu/.config.vsh.yaml deleted file mode 100644 index 796957e4b21..00000000000 --- a/target/native/compression/compress_h5mu/.config.vsh.yaml +++ /dev/null @@ -1,167 +0,0 @@ -functionality: - name: "compress_h5mu" - namespace: "compression" - version: "0.12.3" - authors: - - name: "Dries Schaumont" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Path to the input .h5mu." - info: null - example: - - "sample_path" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - description: "location of output file." - info: null - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--compression" - description: "Compression type." - info: null - default: - - "gzip" - required: false - choices: - - "lzf" - - "gzip" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "../../utils/compress_h5mu.py" - description: "Compress a MuData file. \n" - test_resources: - - type: "python_script" - path: "run_test.py" - is_executable: true - - type: "file" - path: "resources_test/concat_test_data/e18_mouse_brain_fresh_5k_filtered_feature_bc_matrix_subset_unique_obs.h5mu" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "native" - id: "native" -- type: "nextflow" - id: "nextflow" - directives: - label: - - "singlecpu" - - "lowmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/compression/compress_h5mu/config.vsh.yaml" - platform: "native" - output: "/home/runner/work/openpipeline/openpipeline/target/native/compression/compress_h5mu" - executable: "/home/runner/work/openpipeline/openpipeline/target/native/compression/compress_h5mu/compress_h5mu" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/native/compression/compress_h5mu/compress_h5mu b/target/native/compression/compress_h5mu/compress_h5mu deleted file mode 100755 index 71f693b7868..00000000000 --- a/target/native/compression/compress_h5mu/compress_h5mu +++ /dev/null @@ -1,537 +0,0 @@ -#!/usr/bin/env bash - -# compress_h5mu 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Dries Schaumont (maintainer) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="compress_h5mu" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "compress_h5mu 0.12.3" - echo "" - echo "Compress a MuData file." - echo "" - echo "Arguments:" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " example: sample_path" - echo " Path to the input .h5mu." - echo "" - echo " --output" - echo " type: file, required parameter, output, file must exist" - echo " location of output file." - echo "" - echo " --compression" - echo " type: string" - echo " default: gzip" - echo " choices: [ lzf, gzip ]" - echo " Compression type." -} - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "compress_h5mu 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -i) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --compression) - [ -n "$VIASH_PAR_COMPRESSION" ] && ViashError Bad arguments for option \'--compression\': \'$VIASH_PAR_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --compression=*) - [ -n "$VIASH_PAR_COMPRESSION" ] && ViashError Bad arguments for option \'--compression=*\': \'$VIASH_PAR_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_COMPRESSION+x} ]; then - VIASH_PAR_COMPRESSION="gzip" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_COMPRESSION" ]; then - VIASH_PAR_COMPRESSION_CHOICES=("lzf:gzip") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_COMPRESSION:" ]]; then - ViashError '--compression' specified value of \'$VIASH_PAR_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -ViashDebug "Running command: bash" -cat << VIASHEOF | bash -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-compress_h5mu-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -import sys -### VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'compression': $( if [ ! -z ${VIASH_PAR_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -### VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND compress_h5mu -# reason: resources aren't available when using Nextflow fusion -# from compress_h5mu import compress_h5mu -from h5py import File as H5File -from h5py import Group, Dataset -from pathlib import Path -from typing import Union, Literal -from functools import partial - - -def compress_h5mu(input_path: Union[str, Path], - output_path: Union[str, Path], - compression: Union[Literal['gzip'], Literal['lzf']]): - input_path, output_path = str(input_path), str(output_path) - - def copy_attributes(in_object, out_object): - for key, value in in_object.attrs.items(): - out_object.attrs[key] = value - - def visit_path(output_h5: H5File, - compression: Union[Literal['gzip'], Literal['lzf']], - name: str, object: Union[Group, Dataset]): - if isinstance(object, Group): - new_group = output_h5.create_group(name) - copy_attributes(object, new_group) - elif isinstance(object, Dataset): - # Compression only works for non-scalar Dataset objects - # Scalar objects dont have a shape defined - if not object.compression and object.shape not in [None, ()]: - new_dataset = output_h5.create_dataset(name, data=object, compression=compression) - copy_attributes(object, new_dataset) - else: - output_h5.copy(object, name) - else: - raise NotImplementedError(f"Could not copy element {name}, " - f"type has not been implemented yet: {type(object)}") - - with H5File(input_path, 'r') as input_h5, H5File(output_path, 'w', userblock_size=512) as output_h5: - copy_attributes(input_h5, output_h5) - input_h5.visititems(partial(visit_path, output_h5, compression)) - - with open(input_path, "rb") as input_bytes: - # Mudata puts metadata like this in the first 512 bytes: - # MuData (format-version=0.1.0;creator=muon;creator-version=0.2.0) - # See mudata/_core/io.py, read_h5mu() function - starting_metadata = input_bytes.read(100) - # The metadata is padded with extra null bytes up until 512 bytes - truncate_location = starting_metadata.find(b"\\x00") - starting_metadata = starting_metadata[:truncate_location] - with open(output_path, "br+") as f: - nbytes = f.write(starting_metadata) - f.write(b"\\0" * (512 - nbytes)) -# END TEMPORARY WORKAROUND compress_h5mu - -if __name__ == "__main__": - compress_h5mu(par["input"], par["output"], compression=par["compression"]) -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/native/compression/compress_h5mu/compress_h5mu.py b/target/native/compression/compress_h5mu/compress_h5mu.py deleted file mode 100644 index 9d92395a573..00000000000 --- a/target/native/compression/compress_h5mu/compress_h5mu.py +++ /dev/null @@ -1,49 +0,0 @@ -from h5py import File as H5File -from h5py import Group, Dataset -from pathlib import Path -from typing import Union, Literal -from functools import partial - - -def compress_h5mu(input_path: Union[str, Path], - output_path: Union[str, Path], - compression: Union[Literal['gzip'], Literal['lzf']]): - input_path, output_path = str(input_path), str(output_path) - - def copy_attributes(in_object, out_object): - for key, value in in_object.attrs.items(): - out_object.attrs[key] = value - - def visit_path(output_h5: H5File, - compression: Union[Literal['gzip'], Literal['lzf']], - name: str, object: Union[Group, Dataset]): - if isinstance(object, Group): - new_group = output_h5.create_group(name) - copy_attributes(object, new_group) - elif isinstance(object, Dataset): - # Compression only works for non-scalar Dataset objects - # Scalar objects dont have a shape defined - if not object.compression and object.shape not in [None, ()]: - new_dataset = output_h5.create_dataset(name, data=object, compression=compression) - copy_attributes(object, new_dataset) - else: - output_h5.copy(object, name) - else: - raise NotImplementedError(f"Could not copy element {name}, " - f"type has not been implemented yet: {type(object)}") - - with H5File(input_path, 'r') as input_h5, H5File(output_path, 'w', userblock_size=512) as output_h5: - copy_attributes(input_h5, output_h5) - input_h5.visititems(partial(visit_path, output_h5, compression)) - - with open(input_path, "rb") as input_bytes: - # Mudata puts metadata like this in the first 512 bytes: - # MuData (format-version=0.1.0;creator=muon;creator-version=0.2.0) - # See mudata/_core/io.py, read_h5mu() function - starting_metadata = input_bytes.read(100) - # The metadata is padded with extra null bytes up until 512 bytes - truncate_location = starting_metadata.find(b"\x00") - starting_metadata = starting_metadata[:truncate_location] - with open(output_path, "br+") as f: - nbytes = f.write(starting_metadata) - f.write(b"\0" * (512 - nbytes)) diff --git a/target/native/compression/tar_extract/.config.vsh.yaml b/target/native/compression/tar_extract/.config.vsh.yaml deleted file mode 100644 index 54961584036..00000000000 --- a/target/native/compression/tar_extract/.config.vsh.yaml +++ /dev/null @@ -1,106 +0,0 @@ -functionality: - name: "tar_extract" - namespace: "compression" - version: "0.12.3" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input file" - info: null - example: - - "input.tar.gz" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Folder to restore file(s) to." - info: null - example: - - "output_folder" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--strip_components" - alternatives: - - "-s" - description: "Strip this amount of leading components from file names on extraction.\ - \ For example, to extract only 'myfile.txt' from an archive containing the structure\ - \ `this/goes/deep/myfile.txt', use 3 to strip 'this/goes/deep/'." - info: null - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--exclude" - alternatives: - - "-e" - description: "Prevents any file or member whose name matches the shell wildcard\ - \ (pattern) from being extracted." - info: null - example: - - "docs/figures" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "bash_script" - path: "script.sh" - is_executable: true - description: "Extract files from a tar archive" - test_resources: - - type: "bash_script" - path: "test.sh" - is_executable: true - - type: "file" - path: "../../../LICENSE" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "native" - id: "native" -- type: "docker" - id: "docker" - image: "ubuntu:latest" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - entrypoint: [] - cmd: null -info: - config: "/home/runner/work/openpipeline/openpipeline/src/compression/tar_extract/config.vsh.yaml" - platform: "native" - output: "/home/runner/work/openpipeline/openpipeline/target/native/compression/tar_extract" - executable: "/home/runner/work/openpipeline/openpipeline/target/native/compression/tar_extract/tar_extract" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/native/compression/tar_extract/tar_extract b/target/native/compression/tar_extract/tar_extract deleted file mode 100755 index 166c374c0a7..00000000000 --- a/target/native/compression/tar_extract/tar_extract +++ /dev/null @@ -1,514 +0,0 @@ -#!/usr/bin/env bash - -# tar_extract 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="tar_extract" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "tar_extract 0.12.3" - echo "" - echo "Extract files from a tar archive" - echo "" - echo "Arguments:" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " example: input.tar.gz" - echo " Input file" - echo "" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " example: output_folder" - echo " Folder to restore file(s) to." - echo "" - echo " -s, --strip_components" - echo " type: integer" - echo " example: 1" - echo " Strip this amount of leading components from file names on extraction." - echo " For example, to extract only 'myfile.txt' from an archive containing the" - echo " structure \`this/goes/deep/myfile.txt', use 3 to strip 'this/goes/deep/'." - echo "" - echo " -e, --exclude" - echo " type: string" - echo " example: docs/figures" - echo " Prevents any file or member whose name matches the shell wildcard" - echo " (pattern) from being extracted." -} - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "tar_extract 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -i) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --strip_components) - [ -n "$VIASH_PAR_STRIP_COMPONENTS" ] && ViashError Bad arguments for option \'--strip_components\': \'$VIASH_PAR_STRIP_COMPONENTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_STRIP_COMPONENTS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --strip_components. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --strip_components=*) - [ -n "$VIASH_PAR_STRIP_COMPONENTS" ] && ViashError Bad arguments for option \'--strip_components=*\': \'$VIASH_PAR_STRIP_COMPONENTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_STRIP_COMPONENTS=$(ViashRemoveFlags "$1") - shift 1 - ;; - -s) - [ -n "$VIASH_PAR_STRIP_COMPONENTS" ] && ViashError Bad arguments for option \'-s\': \'$VIASH_PAR_STRIP_COMPONENTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_STRIP_COMPONENTS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -s. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --exclude) - [ -n "$VIASH_PAR_EXCLUDE" ] && ViashError Bad arguments for option \'--exclude\': \'$VIASH_PAR_EXCLUDE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EXCLUDE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --exclude. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --exclude=*) - [ -n "$VIASH_PAR_EXCLUDE" ] && ViashError Bad arguments for option \'--exclude=*\': \'$VIASH_PAR_EXCLUDE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EXCLUDE=$(ViashRemoveFlags "$1") - shift 1 - ;; - -e) - [ -n "$VIASH_PAR_EXCLUDE" ] && ViashError Bad arguments for option \'-e\': \'$VIASH_PAR_EXCLUDE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EXCLUDE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -e. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_STRIP_COMPONENTS" ]]; then - if ! [[ "$VIASH_PAR_STRIP_COMPONENTS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--strip_components' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -ViashDebug "Running command: bash" -cat << VIASHEOF | bash -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-tar_extract-XXXXXX").sh -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -#!/usr/bin/env bash - -## VIASH START -# The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) -$( if [ ! -z ${VIASH_PAR_STRIP_COMPONENTS+x} ]; then echo "${VIASH_PAR_STRIP_COMPONENTS}" | sed "s#'#'\"'\"'#g;s#.*#par_strip_components='&'#" ; else echo "# par_strip_components="; fi ) -$( if [ ! -z ${VIASH_PAR_EXCLUDE+x} ]; then echo "${VIASH_PAR_EXCLUDE}" | sed "s#'#'\"'\"'#g;s#.*#par_exclude='&'#" ; else echo "# par_exclude="; fi ) -$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) -$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) -$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) -$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) -$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) -$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) - -## VIASH END - -extra_params=() -mkdir -p \$par_output # Create output directory if it doesn't exist already - -if [ "\$par_strip_components" != "" ]; then - extra_params+=("--strip-components=\$par_strip_components") -fi - -if [ "\$par_exclude" != "" ]; then - extra_params+=("--exclude=\$par_exclude") -fi - -echo "Extracting \$par_input to \$par_output..." -echo "" -tar "\${extra_params[@]}" -xvf "\$par_input" -C "\$par_output" -VIASHMAIN -bash "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/native/dataflow/concat/.config.vsh.yaml b/target/native/dataflow/concat/.config.vsh.yaml deleted file mode 100644 index a8c84537551..00000000000 --- a/target/native/dataflow/concat/.config.vsh.yaml +++ /dev/null @@ -1,222 +0,0 @@ -functionality: - name: "concat" - namespace: "dataflow" - version: "0.12.3" - authors: - - name: "Dries Schaumont" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Paths to the different samples to be concatenated." - info: null - example: - - "sample_paths" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: true - multiple_sep: "," - dest: "par" - - type: "string" - name: "--input_id" - description: "Names of the different samples that have to be concatenated. Must\ - \ be specified when using '--mode move'.\nIn this case, the ids will be used\ - \ for the columns names of the dataframes registring the conflicts.\nIf specified,\ - \ must be of same length as `--input`.\n" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: "," - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_sample_name" - description: "Name of the .obs key under which to add the sample names." - info: null - default: - - "sample_id" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--other_axis_mode" - description: "How to handle the merging of other axis (var, obs, ...).\n\n - None:\ - \ keep no data\n - same: only keep elements of the matrices which are the same\ - \ in each of the samples\n - unique: only keep elements for which there is only\ - \ 1 possible value (1 value that can occur in multiple samples)\n - first: keep\ - \ the annotation from the first sample\n - only: keep elements that show up\ - \ in only one of the objects (1 unique element in only 1 sample)\n - move: identical\ - \ to 'same', but moving the conflicting values to .varm or .obsm\n" - info: null - default: - - "move" - required: false - choices: - - "same" - - "unique" - - "first" - - "only" - - "concat" - - "move" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Concatenates several uni-modal samples in .h5mu files into a single\ - \ file.\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/concat_test_data/e18_mouse_brain_fresh_5k_filtered_feature_bc_matrix_subset_unique_obs.h5mu" - - type: "file" - path: "resources_test/concat_test_data/human_brain_3k_filtered_feature_bc_matrix_subset_unique_obs.h5mu" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "pandas~=2.1.1" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - - "muon" - upgrade: true - entrypoint: [] - cmd: null -- type: "native" - id: "native" -- type: "nextflow" - id: "nextflow" - directives: - label: - - "midcpu" - - "highmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/dataflow/concat/config.vsh.yaml" - platform: "native" - output: "/home/runner/work/openpipeline/openpipeline/target/native/dataflow/concat" - executable: "/home/runner/work/openpipeline/openpipeline/target/native/dataflow/concat/concat" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/native/dataflow/concat/concat b/target/native/dataflow/concat/concat deleted file mode 100755 index bf13d879091..00000000000 --- a/target/native/dataflow/concat/concat +++ /dev/null @@ -1,898 +0,0 @@ -#!/usr/bin/env bash - -# concat 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Dries Schaumont (maintainer) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="concat" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "concat 0.12.3" - echo "" - echo "Concatenates several uni-modal samples in .h5mu files into a single file." - echo "" - echo "Arguments:" - echo " -i, --input" - echo " type: file, required parameter, multiple values allowed, file must exist" - echo " example: sample_paths" - echo " Paths to the different samples to be concatenated." - echo "" - echo " --input_id" - echo " type: string, multiple values allowed" - echo " Names of the different samples that have to be concatenated. Must be" - echo " specified when using '--mode move'." - echo " In this case, the ids will be used for the columns names of the" - echo " dataframes registring the conflicts." - echo " If specified, must be of same length as \`--input\`." - echo "" - echo " -o, --output" - echo " type: file, output, file must exist" - echo " example: output.h5mu" - echo "" - echo " --output_compression" - echo " type: string" - echo " example: gzip" - echo " choices: [ gzip, lzf ]" - echo " The compression format to be used on the output h5mu object." - echo "" - echo " --obs_sample_name" - echo " type: string" - echo " default: sample_id" - echo " Name of the .obs key under which to add the sample names." - echo "" - echo " --other_axis_mode" - echo " type: string" - echo " default: move" - echo " choices: [ same, unique, first, only, concat, move ]" - echo " How to handle the merging of other axis (var, obs, ...)." - echo " - None: keep no data" - echo " - same: only keep elements of the matrices which are the same in each" - echo " of the samples" - echo " - unique: only keep elements for which there is only 1 possible value" - echo " (1 value that can occur in multiple samples)" - echo " - first: keep the annotation from the first sample" - echo " - only: keep elements that show up in only one of the objects (1 unique" - echo " element in only 1 sample)" - echo " - move: identical to 'same', but moving the conflicting values to .varm" - echo " or .obsm" -} - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "concat 0.12.3" - exit - ;; - --input) - if [ -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT="$2" - else - VIASH_PAR_INPUT="$VIASH_PAR_INPUT,""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - if [ -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - else - VIASH_PAR_INPUT="$VIASH_PAR_INPUT,"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - -i) - if [ -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT="$2" - else - VIASH_PAR_INPUT="$VIASH_PAR_INPUT,""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input_id) - if [ -z "$VIASH_PAR_INPUT_ID" ]; then - VIASH_PAR_INPUT_ID="$2" - else - VIASH_PAR_INPUT_ID="$VIASH_PAR_INPUT_ID,""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_id. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input_id=*) - if [ -z "$VIASH_PAR_INPUT_ID" ]; then - VIASH_PAR_INPUT_ID=$(ViashRemoveFlags "$1") - else - VIASH_PAR_INPUT_ID="$VIASH_PAR_INPUT_ID,"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression=*) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --obs_sample_name) - [ -n "$VIASH_PAR_OBS_SAMPLE_NAME" ] && ViashError Bad arguments for option \'--obs_sample_name\': \'$VIASH_PAR_OBS_SAMPLE_NAME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBS_SAMPLE_NAME="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_sample_name. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obs_sample_name=*) - [ -n "$VIASH_PAR_OBS_SAMPLE_NAME" ] && ViashError Bad arguments for option \'--obs_sample_name=*\': \'$VIASH_PAR_OBS_SAMPLE_NAME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBS_SAMPLE_NAME=$(ViashRemoveFlags "$1") - shift 1 - ;; - --other_axis_mode) - [ -n "$VIASH_PAR_OTHER_AXIS_MODE" ] && ViashError Bad arguments for option \'--other_axis_mode\': \'$VIASH_PAR_OTHER_AXIS_MODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OTHER_AXIS_MODE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --other_axis_mode. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --other_axis_mode=*) - [ -n "$VIASH_PAR_OTHER_AXIS_MODE" ] && ViashError Bad arguments for option \'--other_axis_mode=*\': \'$VIASH_PAR_OTHER_AXIS_MODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OTHER_AXIS_MODE=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_OBS_SAMPLE_NAME+x} ]; then - VIASH_PAR_OBS_SAMPLE_NAME="sample_id" -fi -if [ -z ${VIASH_PAR_OTHER_AXIS_MODE+x} ]; then - VIASH_PAR_OTHER_AXIS_MODE="move" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ]; then - IFS=',' - set -f - for file in $VIASH_PAR_INPUT; do - unset IFS - if [ ! -e "$file" ]; then - ViashError "Input file '$file' does not exist." - exit 1 - fi - done - set +f -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then - VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then - ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -if [ ! -z "$VIASH_PAR_OTHER_AXIS_MODE" ]; then - VIASH_PAR_OTHER_AXIS_MODE_CHOICES=("same:unique:first:only:concat:move") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OTHER_AXIS_MODE_CHOICES[*]}:" =~ ":$VIASH_PAR_OTHER_AXIS_MODE:" ]]; then - ViashError '--other_axis_mode' specified value of \'$VIASH_PAR_OTHER_AXIS_MODE\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -ViashDebug "Running command: bash" -cat << VIASHEOF | bash -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-concat-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -from __future__ import annotations -import sys -import anndata -import mudata as mu -import pandas as pd -import numpy as np -from collections.abc import Iterable -from multiprocessing import Pool -from pathlib import Path -from h5py import File as H5File -from typing import Literal -import shutil - -### VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'.split(',')"; else echo None; fi ), - 'input_id': $( if [ ! -z ${VIASH_PAR_INPUT_ID+x} ]; then echo "r'${VIASH_PAR_INPUT_ID//\'/\'\"\'\"r\'}'.split(',')"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'obs_sample_name': $( if [ ! -z ${VIASH_PAR_OBS_SAMPLE_NAME+x} ]; then echo "r'${VIASH_PAR_OBS_SAMPLE_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'other_axis_mode': $( if [ ! -z ${VIASH_PAR_OTHER_AXIS_MODE+x} ]; then echo "r'${VIASH_PAR_OTHER_AXIS_MODE//\'/\'\"\'\"r\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -### VIASH END - -sys.path.append(meta["resources_dir"]) - -# START TEMPORARY WORKAROUND compress_h5mu -# reason: resources aren't available when using Nextflow fusion - -# from compress_h5mu import compress_h5mu -from h5py import Group, Dataset -from typing import Union -from functools import partial - -def compress_h5mu(input_path: Union[str, Path], - output_path: Union[str, Path], - compression: Union[Literal['gzip'], Literal['lzf']]): - input_path, output_path = str(input_path), str(output_path) - - def copy_attributes(in_object, out_object): - for key, value in in_object.attrs.items(): - out_object.attrs[key] = value - - def visit_path(output_h5: H5File, - compression: Union[Literal['gzip'], Literal['lzf']], - name: str, object: Union[Group, Dataset]): - if isinstance(object, Group): - new_group = output_h5.create_group(name) - copy_attributes(object, new_group) - elif isinstance(object, Dataset): - # Compression only works for non-scalar Dataset objects - # Scalar objects dont have a shape defined - if not object.compression and object.shape not in [None, ()]: - new_dataset = output_h5.create_dataset(name, data=object, compression=compression) - copy_attributes(object, new_dataset) - else: - output_h5.copy(object, name) - else: - raise NotImplementedError(f"Could not copy element {name}, " - f"type has not been implemented yet: {type(object)}") - - with H5File(input_path, 'r') as input_h5, H5File(output_path, 'w', userblock_size=512) as output_h5: - copy_attributes(input_h5, output_h5) - input_h5.visititems(partial(visit_path, output_h5, compression)) - - with open(input_path, "rb") as input_bytes: - # Mudata puts metadata like this in the first 512 bytes: - # MuData (format-version=0.1.0;creator=muon;creator-version=0.2.0) - # See mudata/_core/io.py, read_h5mu() function - starting_metadata = input_bytes.read(100) - # The metadata is padded with extra null bytes up until 512 bytes - truncate_location = starting_metadata.find(b"\\x00") - starting_metadata = starting_metadata[:truncate_location] - with open(output_path, "br+") as f: - nbytes = f.write(starting_metadata) - f.write(b"\\0" * (512 - nbytes)) -# END TEMPORARY WORKAROUND compress_h5mu - -# START TEMPORARY WORKAROUND setup_logger -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -def indexes_unique(indices: Iterable[pd.Index]) -> bool: - combined_indices = indices[0].append(indices[1:]) - return combined_indices.is_unique - -def check_observations_unique(samples: Iterable[anndata.AnnData]) -> None: - observation_ids = [sample.obs.index for sample in samples] - if not indexes_unique(observation_ids): - raise ValueError("Observations are not unique across samples.") - - -def nunique(row): - unique = pd.unique(row) - unique_without_na = pd.core.dtypes.missing.remove_na_arraylike(unique) - return len(unique_without_na) > 1 - -def any_row_contains_duplicate_values(n_processes: int, frame: pd.DataFrame) -> bool: - """ - Check if any row contains duplicate values, that are not NA. - """ - numpy_array = frame.to_numpy() - with Pool(n_processes) as pool: - is_duplicated = pool.map(nunique, iter(numpy_array)) - return any(is_duplicated) - -def concatenate_matrices(n_processes: int, matrices: dict[str, pd.DataFrame], align_to: pd.Index | None) \\ - -> tuple[dict[str, pd.DataFrame], pd.DataFrame | None, dict[str, pd.core.dtypes.dtypes.Dtype]]: - """ - Merge matrices by combining columns that have the same name. - Columns that contain conflicting values (e.i. the columns have different values), - are not merged, but instead moved to a new dataframe. - """ - column_names = set(column_name for var in matrices.values() for column_name in var) - logger.debug('Trying to concatenate columns: %s.', ",".join(column_names)) - if not column_names: - return {}, pd.DataFrame(index=align_to) - conflicts, concatenated_matrix = \\ - split_conflicts_and_concatenated_columns(n_processes, - matrices, - column_names, - align_to) - concatenated_matrix = cast_to_writeable_dtype(concatenated_matrix) - conflicts = {conflict_name: cast_to_writeable_dtype(conflict_df) - for conflict_name, conflict_df in conflicts.items()} - return conflicts, concatenated_matrix - -def get_first_non_na_value_vector(df): - numpy_arr = df.to_numpy() - n_rows, n_cols = numpy_arr.shape - col_index = pd.isna(numpy_arr).argmin(axis=1) - flat_index = n_cols * np.arange(n_rows) + col_index - return pd.Series(numpy_arr.ravel()[flat_index], index=df.index, name=df.columns[0]) - -def split_conflicts_and_concatenated_columns(n_processes: int, - matrices: dict[str, pd.DataFrame], - column_names: Iterable[str], - align_to: pd.Index | None = None) -> \\ - tuple[dict[str, pd.DataFrame], pd.DataFrame]: - """ - Retrieve columns with the same name from a list of dataframes which are - identical across all the frames (ignoring NA values). - Columns which are not the same are regarded as 'conflicts', - which are stored in seperate dataframes, one per columns - with the same name that store conflicting values. - """ - conflicts = {} - concatenated_matrix = [] - for column_name in column_names: - columns = {input_id: var[column_name] - for input_id, var in matrices.items() - if column_name in var} - assert columns, "Some columns should have been found." - concatenated_columns = pd.concat(columns.values(), axis=1, - join="outer", sort=False) - if any_row_contains_duplicate_values(n_processes, concatenated_columns): - concatenated_columns.columns = columns.keys() # Use the sample id as column name - if align_to is not None: - concatenated_columns = concatenated_columns.reindex(align_to, copy=False) - conflicts[f'conflict_{column_name}'] = concatenated_columns - else: - unique_values = get_first_non_na_value_vector(concatenated_columns) - concatenated_matrix.append(unique_values) - if not concatenated_matrix: - return conflicts, pd.DataFrame(index=align_to) - concatenated_matrix = pd.concat(concatenated_matrix, join="outer", - axis=1, sort=False) - if align_to is not None: - concatenated_matrix = concatenated_matrix.reindex(align_to, copy=False) - return conflicts, concatenated_matrix - -def cast_to_writeable_dtype(result: pd.DataFrame) -> pd.DataFrame: - """ - Cast the dataframe to dtypes that can be written by mudata. - """ - # dtype inferral workfs better with np.nan - result = result.replace({pd.NA: np.nan}) - - # MuData supports nullable booleans and ints - # ie. \`IntegerArray\` and \`BooleanArray\` - result = result.convert_dtypes(infer_objects=True, - convert_integer=True, - convert_string=False, - convert_boolean=True, - convert_floating=False) - - # Convert leftover 'object' columns to string - # However, na values are supported, so convert all values except NA's to string - object_cols = result.select_dtypes(include='object').columns.values - for obj_col in object_cols: - result[obj_col] = result[obj_col].where(result[obj_col].isna(), result[obj_col].astype(str)).astype('category') - return result - -def split_conflicts_modalities(n_processes: int, samples: dict[str, anndata.AnnData], output: anndata.AnnData) \\ - -> anndata.AnnData: - """ - Merge .var and .obs matrices of the anndata objects. Columns are merged - when the values (excl NA) are the same in each of the matrices. - Conflicting columns are moved to a separate dataframe (one dataframe for each column, - containing all the corresponding column from each sample). - """ - matrices_to_parse = ("var", "obs") - for matrix_name in matrices_to_parse: - matrices = {sample_id: getattr(sample, matrix_name) for sample_id, sample in samples.items()} - output_index = getattr(output, matrix_name).index - align_to = output_index if matrix_name == "var" else None - conflicts, concatenated_matrix = concatenate_matrices(n_processes, matrices, align_to) - if concatenated_matrix.empty: - concatenated_matrix.index = output_index - # Write the conflicts to the output - for conflict_name, conflict_data in conflicts.items(): - getattr(output, f"{matrix_name}m")[conflict_name] = conflict_data - - # Set other annotation matrices in the output - setattr(output, matrix_name, concatenated_matrix) - - return output - - -def concatenate_modality(n_processes: int, mod: str, input_files: Iterable[str | Path], - other_axis_mode: str, input_ids: tuple[str]) -> anndata.AnnData: - - concat_modes = { - "move": None, - } - other_axis_mode_to_apply = concat_modes.get(other_axis_mode, other_axis_mode) - - mod_data = {} - for input_id, input_file in zip(input_ids, input_files): - try: - mod_data[input_id] = mu.read_h5ad(input_file, mod=mod) - except KeyError as e: # Modality does not exist for this sample, skip it - if f"Unable to open object '{mod}' doesn't exist" not in str(e): - raise e - pass - check_observations_unique(mod_data.values()) - - concatenated_data = anndata.concat(mod_data.values(), join='outer', merge=other_axis_mode_to_apply) - - if other_axis_mode == "move": - concatenated_data = split_conflicts_modalities(n_processes, mod_data, concatenated_data) - - return concatenated_data - -def concatenate_modalities(n_processes: int, modalities: list[str], input_files: Path | str, - other_axis_mode: str, output_file: Path | str, - compression: Literal['gzip'] | Literal['lzf'], - input_ids: tuple[str] | None = None) -> None: - """ - Join the modalities together into a single multimodal sample. - """ - logger.info('Concatenating samples.') - output_file, input_files = Path(output_file), [Path(input_file) for input_file in input_files] - output_file_uncompressed = output_file.with_name(output_file.stem + "_uncompressed.h5mu") - output_file_uncompressed.touch() - # Create empty mudata file - mdata = mu.MuData({modality: anndata.AnnData() for modality in modalities}) - mdata.write(output_file_uncompressed, compression=compression) - - for mod_name in modalities: - new_mod = concatenate_modality(n_processes, mod_name, - input_files, other_axis_mode, - input_ids) - logger.info("Writing out modality '%s' to '%s' with compression '%s'.", - mod_name, output_file_uncompressed, compression) - mu.write_h5ad(output_file_uncompressed, data=new_mod, mod=mod_name) - - if compression: - compress_h5mu(output_file_uncompressed, output_file, compression=compression) - output_file_uncompressed.unlink() - else: - shutil.move(output_file_uncompressed, output_file) - - logger.info("Concatenation successful.") - -def main() -> None: - # Get a list of all possible modalities - mods = set() - for path in par["input"]: - try: - with H5File(path, 'r') as f_root: - mods = mods | set(f_root["mod"].keys()) - except OSError: - raise OSError(f"Failed to load {path}. Is it a valid h5 file?") - - input_ids = None - if par["input_id"]: - input_ids: tuple[str] = tuple(i.strip() for i in par["input_id"]) - if len(input_ids) != len(par["input"]): - raise ValueError("The number of sample names must match the number of sample files.") - - if len(set(input_ids)) != len(input_ids): - raise ValueError("The sample names should be unique.") - - logger.info("\\nConcatenating data from paths:\\n\\t%s", - "\\n\\t".join(par["input"])) - - if par["other_axis_mode"] == "move" and not input_ids: - raise ValueError("--mode 'move' requires --input_ids.") - - n_processes = meta["cpus"] if meta["cpus"] else 1 - concatenate_modalities(n_processes, - list(mods), - par["input"], - par["other_axis_mode"], - par["output"], - par["output_compression"], - input_ids=input_ids) - - -if __name__ == "__main__": - main() -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/native/dataflow/concat/setup_logger.py b/target/native/dataflow/concat/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/native/dataflow/concat/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/native/dataflow/merge/.config.vsh.yaml b/target/native/dataflow/merge/.config.vsh.yaml deleted file mode 100644 index ae5c396225e..00000000000 --- a/target/native/dataflow/merge/.config.vsh.yaml +++ /dev/null @@ -1,175 +0,0 @@ -functionality: - name: "merge" - namespace: "dataflow" - version: "0.12.3" - authors: - - name: "Dries Schaumont" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Paths to the single-modality .h5mu files that need to be combined" - info: null - default: - - "sample_paths" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: true - multiple_sep: "," - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Path to the output file." - info: null - default: - - "output.h5mu" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Combine one or more single-modality .h5mu files together into one\ - \ .h5mu file.\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "../../../resources_test/merge_test_data/pbmc_1k_protein_v3_filtered_feature_bc_matrix_rna.h5mu" - - type: "file" - path: "../../../resources_test/merge_test_data/pbmc_1k_protein_v3_filtered_feature_bc_matrix_prot.h5mu" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "pandas~=2.0.0" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "native" - id: "native" -- type: "nextflow" - id: "nextflow" - directives: - label: - - "singlecpu" - - "highmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/dataflow/merge/config.vsh.yml" - platform: "native" - output: "/home/runner/work/openpipeline/openpipeline/target/native/dataflow/merge" - executable: "/home/runner/work/openpipeline/openpipeline/target/native/dataflow/merge/merge" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/native/dataflow/merge/merge b/target/native/dataflow/merge/merge deleted file mode 100755 index 19c8c65e64a..00000000000 --- a/target/native/dataflow/merge/merge +++ /dev/null @@ -1,563 +0,0 @@ -#!/usr/bin/env bash - -# merge 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Dries Schaumont (maintainer) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="merge" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "merge 0.12.3" - echo "" - echo "Combine one or more single-modality .h5mu files together into one .h5mu file." - echo "" - echo "Arguments:" - echo " -i, --input" - echo " type: file, required parameter, multiple values allowed, file must exist" - echo " default: sample_paths" - echo " Paths to the single-modality .h5mu files that need to be combined" - echo "" - echo " -o, --output" - echo " type: file, output, file must exist" - echo " default: output.h5mu" - echo " Path to the output file." - echo "" - echo " --output_compression" - echo " type: string" - echo " example: gzip" - echo " choices: [ gzip, lzf ]" - echo " The compression format to be used on the output h5mu object." -} - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "merge 0.12.3" - exit - ;; - --input) - if [ -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT="$2" - else - VIASH_PAR_INPUT="$VIASH_PAR_INPUT,""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - if [ -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - else - VIASH_PAR_INPUT="$VIASH_PAR_INPUT,"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - -i) - if [ -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT="$2" - else - VIASH_PAR_INPUT="$VIASH_PAR_INPUT,""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression=*) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - VIASH_PAR_OUTPUT="output.h5mu" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ]; then - IFS=',' - set -f - for file in $VIASH_PAR_INPUT; do - unset IFS - if [ ! -e "$file" ]; then - ViashError "Input file '$file' does not exist." - exit 1 - fi - done - set +f -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then - VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then - ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -ViashDebug "Running command: bash" -cat << VIASHEOF | bash -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-merge-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -from __future__ import annotations -import sys -import mudata as md -import pandas as pd -import numpy as np - - -### VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'.split(',')"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -### VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -def main(): - logger.info('Reading input files %s', ",".join(par["input"])) - input_samples = [md.read_h5mu(path) for path in par["input"]] - - logger.info('Merging into single object.') - sample_modalities = {} - for input_sample in input_samples: - for mod_name, mod_data in input_sample.mod.items(): - if mod_name in sample_modalities: - raise ValueError(f"Modality '{mod_name}' was found in more than 1 sample.") - sample_modalities[mod_name] = mod_data - - merged = md.MuData(sample_modalities) - merged.update() - for df_attr in ("var", "obs"): - df = getattr(merged, df_attr) - df = df.replace({pd.NA: np.nan}, inplace=False) - - # MuData supports nullable booleans and ints - # ie. \`IntegerArray\` and \`BooleanArray\` - df = df.convert_dtypes(infer_objects=True, - convert_integer=True, - convert_string=False, - convert_boolean=True, - convert_floating=False) - - # Convert leftover 'object' columns to string - object_cols = df.select_dtypes(include='object').columns.values - for obj_col in object_cols: - df[obj_col].astype(str).astype('category') - setattr(merged, df_attr, df) - - merged.write_h5mu(par["output"], compression=par["output_compression"]) - logger.info('Finished') - - -if __name__ == '__main__': - main() -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/native/dataflow/merge/setup_logger.py b/target/native/dataflow/merge/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/native/dataflow/merge/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/native/dataflow/split_modalities/.config.vsh.yaml b/target/native/dataflow/split_modalities/.config.vsh.yaml deleted file mode 100644 index 9e246e14997..00000000000 --- a/target/native/dataflow/split_modalities/.config.vsh.yaml +++ /dev/null @@ -1,214 +0,0 @@ -functionality: - name: "split_modalities" - namespace: "dataflow" - version: "0.12.3" - authors: - - name: "Dries Schaumont" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - - name: "Robrecht Cannoodt" - roles: - - "contributor" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Path to a single .h5mu file." - info: null - default: - - "sample_path" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output directory containing multiple h5mu files." - info: null - example: - - "/path/to/output" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output_types" - description: "A csv containing the base filename and modality type per output\ - \ file." - info: null - example: - - "types.csv" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--compression" - description: "The compression format to be used on the final h5mu object." - info: null - default: - - "gzip" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Split the modalities from a single .h5mu multimodal sample into seperate\ - \ .h5mu files. \n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "native" - id: "native" -- type: "nextflow" - id: "nextflow" - directives: - label: - - "singlecpu" - - "lowmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/dataflow/split_modalities/config.vsh.yaml" - platform: "native" - output: "/home/runner/work/openpipeline/openpipeline/target/native/dataflow/split_modalities" - executable: "/home/runner/work/openpipeline/openpipeline/target/native/dataflow/split_modalities/split_modalities" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/native/dataflow/split_modalities/setup_logger.py b/target/native/dataflow/split_modalities/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/native/dataflow/split_modalities/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/native/dataflow/split_modalities/split_modalities b/target/native/dataflow/split_modalities/split_modalities deleted file mode 100755 index 0f9a5adcc6d..00000000000 --- a/target/native/dataflow/split_modalities/split_modalities +++ /dev/null @@ -1,586 +0,0 @@ -#!/usr/bin/env bash - -# split_modalities 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Dries Schaumont (maintainer) -# * Robrecht Cannoodt (contributor) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="split_modalities" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "split_modalities 0.12.3" - echo "" - echo "Split the modalities from a single .h5mu multimodal sample into seperate .h5mu" - echo "files." - echo "" - echo "Arguments:" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " default: sample_path" - echo " Path to a single .h5mu file." - echo "" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " example: /path/to/output" - echo " Output directory containing multiple h5mu files." - echo "" - echo " --output_compression" - echo " type: string" - echo " example: gzip" - echo " choices: [ gzip, lzf ]" - echo " The compression format to be used on the output h5mu object." - echo "" - echo " --output_types" - echo " type: file, required parameter, output, file must exist" - echo " example: types.csv" - echo " A csv containing the base filename and modality type per output file." - echo "" - echo " --compression" - echo " type: string" - echo " default: gzip" - echo " The compression format to be used on the final h5mu object." -} - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "split_modalities 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -i) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression=*) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output_types) - [ -n "$VIASH_PAR_OUTPUT_TYPES" ] && ViashError Bad arguments for option \'--output_types\': \'$VIASH_PAR_OUTPUT_TYPES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_TYPES="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_types. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_types=*) - [ -n "$VIASH_PAR_OUTPUT_TYPES" ] && ViashError Bad arguments for option \'--output_types=*\': \'$VIASH_PAR_OUTPUT_TYPES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_TYPES=$(ViashRemoveFlags "$1") - shift 1 - ;; - --compression) - [ -n "$VIASH_PAR_COMPRESSION" ] && ViashError Bad arguments for option \'--compression\': \'$VIASH_PAR_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --compression=*) - [ -n "$VIASH_PAR_COMPRESSION" ] && ViashError Bad arguments for option \'--compression=*\': \'$VIASH_PAR_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT_TYPES+x} ]; then - ViashError '--output_types' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_COMPRESSION+x} ]; then - VIASH_PAR_COMPRESSION="gzip" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then - VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then - ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi -if [ ! -z "$VIASH_PAR_OUTPUT_TYPES" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_TYPES")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_TYPES")" -fi - -ViashDebug "Running command: bash" -cat << VIASHEOF | bash -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-split_modalities-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -from __future__ import annotations -import sys -import mudata as md -from sys import stdout -from pathlib import Path -import pandas as pd - -### VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_types': $( if [ ! -z ${VIASH_PAR_OUTPUT_TYPES+x} ]; then echo "r'${VIASH_PAR_OUTPUT_TYPES//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'compression': $( if [ ! -z ${VIASH_PAR_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -### VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -def main() -> None: - output_dir = Path(par["output"]) - if not output_dir.is_dir(): - output_dir.mkdir(parents=True) - - logger.info('Reading input file %s', par['input']) - sample = md.read_h5mu(par["input"].strip()) - input_file = Path(par["input"]) - - logger.info('Creating output types csv') - - names = {mod_name: f"{input_file.stem}_{mod_name}.h5mu" - for mod_name in sample.mod.keys() } - df = pd.DataFrame({"name": list(names.keys()), "filename": list(names.values())}) - df.to_csv(par["output_types"], index=False) - - logger.info('Splitting up modalities %s', ", ".join(sample.mod.keys())) - for mod_name, mod in sample.mod.items(): - new_sample = md.MuData({mod_name: mod}) - logger.info('Writing to %s', names[mod_name]) - new_sample.write_h5mu(output_dir / names[mod_name], compression=par["output_compression"]) - - logger.info("Finished") - - -if __name__ == "__main__": - main() -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_OUTPUT_TYPES" ] && [ ! -e "$VIASH_PAR_OUTPUT_TYPES" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT_TYPES' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/native/download/sync_test_resources/.config.vsh.yaml b/target/native/download/sync_test_resources/.config.vsh.yaml deleted file mode 100644 index 00597bdad52..00000000000 --- a/target/native/download/sync_test_resources/.config.vsh.yaml +++ /dev/null @@ -1,170 +0,0 @@ -functionality: - name: "sync_test_resources" - namespace: "download" - version: "0.12.3" - authors: - - name: "Robrecht Cannoodt" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - arguments: - - type: "string" - name: "--input" - alternatives: - - "-i" - description: "Path to the S3 bucket to sync from." - info: null - default: - - "s3://openpipelines-data" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Path to the test resource directory." - info: null - default: - - "resources_test" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--quiet" - description: "Displays the operations that would be performed using the specified\ - \ command without actually running them." - info: null - direction: "input" - dest: "par" - - type: "boolean_true" - name: "--dryrun" - description: "Does not display the operations performed from the specified command." - info: null - direction: "input" - dest: "par" - - type: "boolean_true" - name: "--delete" - description: "Files that exist in the destination but not in the source are deleted\ - \ during sync." - info: null - direction: "input" - dest: "par" - - type: "string" - name: "--exclude" - description: "Exclude all files or objects from the command that matches the specified\ - \ pattern." - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - resources: - - type: "bash_script" - path: "script.sh" - is_executable: true - description: "Synchronise the test resources from s3://openpipelines-data to resources_test" - usage: "sync_test_resources\nsync_test_resources --input s3://openpipelines-data\ - \ --output resources_test\n" - test_resources: - - type: "bash_script" - path: "run_test.sh" - is_executable: true - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "amazon/aws-cli:2.11.0" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "yum" - packages: - - "procps" - entrypoint: [] - cmd: null -- type: "native" - id: "native" -- type: "nextflow" - id: "nextflow" - directives: - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/download/sync_test_resources/config.vsh.yaml" - platform: "native" - output: "/home/runner/work/openpipeline/openpipeline/target/native/download/sync_test_resources" - executable: "/home/runner/work/openpipeline/openpipeline/target/native/download/sync_test_resources/sync_test_resources" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/native/download/sync_test_resources/sync_test_resources b/target/native/download/sync_test_resources/sync_test_resources deleted file mode 100755 index 519ee9a476f..00000000000 --- a/target/native/download/sync_test_resources/sync_test_resources +++ /dev/null @@ -1,557 +0,0 @@ -#!/usr/bin/env bash - -# sync_test_resources 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Robrecht Cannoodt (maintainer) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="sync_test_resources" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "sync_test_resources 0.12.3" - echo "" - echo "Synchronise the test resources from s3://openpipelines-data to resources_test" - echo "" - echo "Usage:" - echo "sync_test_resources" - echo "sync_test_resources --input s3://openpipelines-data --output resources_test" - echo "" - echo "Arguments:" - echo " -i, --input" - echo " type: string" - echo " default: s3://openpipelines-data" - echo " Path to the S3 bucket to sync from." - echo "" - echo " -o, --output" - echo " type: file, output, file must exist" - echo " default: resources_test" - echo " Path to the test resource directory." - echo "" - echo " --quiet" - echo " type: boolean_true" - echo " Displays the operations that would be performed using the specified" - echo " command without actually running them." - echo "" - echo " --dryrun" - echo " type: boolean_true" - echo " Does not display the operations performed from the specified command." - echo "" - echo " --delete" - echo " type: boolean_true" - echo " Files that exist in the destination but not in the source are deleted" - echo " during sync." - echo "" - echo " --exclude" - echo " type: string, multiple values allowed" - echo " Exclude all files or objects from the command that matches the specified" - echo " pattern." -} - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "sync_test_resources 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -i) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --quiet) - [ -n "$VIASH_PAR_QUIET" ] && ViashError Bad arguments for option \'--quiet\': \'$VIASH_PAR_QUIET\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_QUIET=true - shift 1 - ;; - --dryrun) - [ -n "$VIASH_PAR_DRYRUN" ] && ViashError Bad arguments for option \'--dryrun\': \'$VIASH_PAR_DRYRUN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_DRYRUN=true - shift 1 - ;; - --delete) - [ -n "$VIASH_PAR_DELETE" ] && ViashError Bad arguments for option \'--delete\': \'$VIASH_PAR_DELETE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_DELETE=true - shift 1 - ;; - --exclude) - if [ -z "$VIASH_PAR_EXCLUDE" ]; then - VIASH_PAR_EXCLUDE="$2" - else - VIASH_PAR_EXCLUDE="$VIASH_PAR_EXCLUDE:""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --exclude. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --exclude=*) - if [ -z "$VIASH_PAR_EXCLUDE" ]; then - VIASH_PAR_EXCLUDE=$(ViashRemoveFlags "$1") - else - VIASH_PAR_EXCLUDE="$VIASH_PAR_EXCLUDE:"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_INPUT+x} ]; then - VIASH_PAR_INPUT="s3://openpipelines-data" -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - VIASH_PAR_OUTPUT="resources_test" -fi -if [ -z ${VIASH_PAR_QUIET+x} ]; then - VIASH_PAR_QUIET="false" -fi -if [ -z ${VIASH_PAR_DRYRUN+x} ]; then - VIASH_PAR_DRYRUN="false" -fi -if [ -z ${VIASH_PAR_DELETE+x} ]; then - VIASH_PAR_DELETE="false" -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_QUIET" ]]; then - if ! [[ "$VIASH_PAR_QUIET" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--quiet' has to be a boolean_true. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_DRYRUN" ]]; then - if ! [[ "$VIASH_PAR_DRYRUN" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--dryrun' has to be a boolean_true. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_DELETE" ]]; then - if ! [[ "$VIASH_PAR_DELETE" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--delete' has to be a boolean_true. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -ViashDebug "Running command: bash" -cat << VIASHEOF | bash -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-sync_test_resources-XXXXXX").sh -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -#!/bin/bash - -set -eo pipefail - -## VIASH START -# The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) -$( if [ ! -z ${VIASH_PAR_QUIET+x} ]; then echo "${VIASH_PAR_QUIET}" | sed "s#'#'\"'\"'#g;s#.*#par_quiet='&'#" ; else echo "# par_quiet="; fi ) -$( if [ ! -z ${VIASH_PAR_DRYRUN+x} ]; then echo "${VIASH_PAR_DRYRUN}" | sed "s#'#'\"'\"'#g;s#.*#par_dryrun='&'#" ; else echo "# par_dryrun="; fi ) -$( if [ ! -z ${VIASH_PAR_DELETE+x} ]; then echo "${VIASH_PAR_DELETE}" | sed "s#'#'\"'\"'#g;s#.*#par_delete='&'#" ; else echo "# par_delete="; fi ) -$( if [ ! -z ${VIASH_PAR_EXCLUDE+x} ]; then echo "${VIASH_PAR_EXCLUDE}" | sed "s#'#'\"'\"'#g;s#.*#par_exclude='&'#" ; else echo "# par_exclude="; fi ) -$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) -$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) -$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) -$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) -$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) -$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) - -## VIASH END - -extra_params=( ) - -if [ "\$par_quiet" == "true" ]; then - extra_params+=( "--quiet" ) -fi -if [ "\$par_dryrun" == "true" ]; then - extra_params+=( "--dryrun" ) -fi -if [ "\$par_delete" == "true" ]; then - extra_params+=( "--delete" ) -fi - -if [ ! -z \${par_exclude+x} ]; then - IFS=":" - for var in \$par_exclude; do - unset IFS - extra_params+=( "--exclude" "\$var" ) - done -fi - - -# Disable the use of the Amazon EC2 instance metadata service (IMDS). -# see https://florian.ec/blog/github-actions-awscli-errors/ -# or https://github.com/aws/aws-cli/issues/5234#issuecomment-705831465 -export AWS_EC2_METADATA_DISABLED=true - -aws s3 sync "\$par_input" "\$par_output" --no-sign-request "\${extra_params[@]}" -VIASHMAIN -bash "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/native/integrate/scarches/.config.vsh.yaml b/target/native/integrate/scarches/.config.vsh.yaml deleted file mode 100644 index 5cd42ef7185..00000000000 --- a/target/native/integrate/scarches/.config.vsh.yaml +++ /dev/null @@ -1,331 +0,0 @@ -functionality: - name: "scarches" - namespace: "integrate" - version: "0.12.3" - authors: - - name: "Vladimir Shitov" - info: - role: "Contributor" - links: - email: "vladimir.shitov@helmholtz-muenchen.de" - github: "vladimirshitov" - orcid: "0000-0002-1960-8812" - linkedin: "vladimir-shitov-9a659513b" - organizations: - - name: "Helmholtz Munich" - href: "https://www.helmholtz-munich.de" - role: "PhD Candidate" - argument_groups: - - name: "Inputs" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input h5mu file to use as a query" - info: null - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--reference" - alternatives: - - "-r" - description: "Path to the directory with reference model or a web link. For\ - \ HLCA use https://zenodo.org/record/6337966/files/HLCA_reference_model.zip" - info: null - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--dataset_name" - description: "Name of query dataset to use as a batch name. If not set, name\ - \ of the input file is used" - info: null - default: - - "test_dataset" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Outputs" - arguments: - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output h5mu file." - info: null - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--model_output" - description: "Output directory for model" - info: null - default: - - "model" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obsm_output" - description: "In which .obsm slot to store the resulting integrated embedding." - info: null - default: - - "X_integrated_scanvi" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Early stopping arguments" - arguments: - - type: "boolean" - name: "--early_stopping" - description: "Whether to perform early stopping with respect to the validation\ - \ set." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--early_stopping_monitor" - description: "Metric logged during validation set epoch." - info: null - default: - - "elbo_validation" - required: false - choices: - - "elbo_validation" - - "reconstruction_loss_validation" - - "kl_local_validation" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--early_stopping_patience" - description: "Number of validation epochs with no improvement after which training\ - \ will be stopped." - info: null - default: - - 45 - required: false - min: 1 - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--early_stopping_min_delta" - description: "Minimum change in the monitored quantity to qualify as an improvement,\ - \ i.e. an absolute change of less than min_delta, will count as no improvement." - info: null - default: - - 0.0 - required: false - min: 0.0 - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Learning parameters" - arguments: - - type: "integer" - name: "--max_epochs" - description: "Number of passes through the dataset, defaults to (20000 / number\ - \ of cells) * 400 or 400; whichever is smallest." - info: null - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean" - name: "--reduce_lr_on_plateau" - description: "Whether to monitor validation loss and reduce learning rate when\ - \ validation set `lr_scheduler_metric` plateaus." - info: null - default: - - true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--lr_factor" - description: "Factor to reduce learning rate." - info: null - default: - - 0.6 - required: false - min: 0.0 - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--lr_patience" - description: "Number of epochs with no improvement after which learning rate\ - \ will be reduced." - info: null - default: - - 30.0 - required: false - min: 0.0 - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Performs reference mapping with scArches" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu" - - type: "file" - path: "resources_test/HLCA_reference_model/HLCA_reference_model.zip" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "nvcr.io/nvidia/pytorch:23.09-py3" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "scvi-tools~=1.0.3" - - "pandas~=2.1.0" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highmem" - - "highcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -- type: "native" - id: "native" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/integrate/scarches/config.vsh.yaml" - platform: "native" - output: "/home/runner/work/openpipeline/openpipeline/target/native/integrate/scarches" - executable: "/home/runner/work/openpipeline/openpipeline/target/native/integrate/scarches/scarches" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/native/integrate/scarches/scarches b/target/native/integrate/scarches/scarches deleted file mode 100755 index 50478f24076..00000000000 --- a/target/native/integrate/scarches/scarches +++ /dev/null @@ -1,1086 +0,0 @@ -#!/usr/bin/env bash - -# scarches 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Vladimir Shitov - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="scarches" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "scarches 0.12.3" - echo "" - echo "Performs reference mapping with scArches" - echo "" - echo "Inputs:" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " Input h5mu file to use as a query" - echo "" - echo " --modality" - echo " type: string" - echo " default: rna" - echo "" - echo " -r, --reference" - echo " type: file, required parameter, file must exist" - echo " Path to the directory with reference model or a web link. For HLCA use" - echo " https://zenodo.org/record/6337966/files/HLCA_reference_model.zip" - echo "" - echo " --dataset_name" - echo " type: string" - echo " default: test_dataset" - echo " Name of query dataset to use as a batch name. If not set, name of the" - echo " input file is used" - echo "" - echo "Outputs:" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " Output h5mu file." - echo "" - echo " --output_compression" - echo " type: string" - echo " example: gzip" - echo " choices: [ gzip, lzf ]" - echo " The compression format to be used on the output h5mu object." - echo "" - echo " --model_output" - echo " type: file, output, file must exist" - echo " default: model" - echo " Output directory for model" - echo "" - echo " --obsm_output" - echo " type: string" - echo " default: X_integrated_scanvi" - echo " In which .obsm slot to store the resulting integrated embedding." - echo "" - echo "Early stopping arguments:" - echo " --early_stopping" - echo " type: boolean" - echo " Whether to perform early stopping with respect to the validation set." - echo "" - echo " --early_stopping_monitor" - echo " type: string" - echo " default: elbo_validation" - echo " choices: [ elbo_validation, reconstruction_loss_validation," - echo "kl_local_validation ]" - echo " Metric logged during validation set epoch." - echo "" - echo " --early_stopping_patience" - echo " type: integer" - echo " default: 45" - echo " min: 1" - echo " Number of validation epochs with no improvement after which training" - echo " will be stopped." - echo "" - echo " --early_stopping_min_delta" - echo " type: double" - echo " default: 0.0" - echo " min: 0.0" - echo " Minimum change in the monitored quantity to qualify as an improvement," - echo " i.e. an absolute change of less than min_delta, will count as no" - echo " improvement." - echo "" - echo "Learning parameters:" - echo " --max_epochs" - echo " type: integer, required parameter" - echo " Number of passes through the dataset, defaults to (20000 / number of" - echo " cells) * 400 or 400; whichever is smallest." - echo "" - echo " --reduce_lr_on_plateau" - echo " type: boolean" - echo " default: true" - echo " Whether to monitor validation loss and reduce learning rate when" - echo " validation set \`lr_scheduler_metric\` plateaus." - echo "" - echo " --lr_factor" - echo " type: double" - echo " default: 0.6" - echo " min: 0.0" - echo " Factor to reduce learning rate." - echo "" - echo " --lr_patience" - echo " type: double" - echo " default: 30.0" - echo " min: 0.0" - echo " Number of epochs with no improvement after which learning rate will be" - echo " reduced." -} - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "scarches 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -i) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality=*) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --reference) - [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reference=*) - [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference=*\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE=$(ViashRemoveFlags "$1") - shift 1 - ;; - -r) - [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'-r\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -r. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --dataset_name) - [ -n "$VIASH_PAR_DATASET_NAME" ] && ViashError Bad arguments for option \'--dataset_name\': \'$VIASH_PAR_DATASET_NAME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_DATASET_NAME="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --dataset_name. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --dataset_name=*) - [ -n "$VIASH_PAR_DATASET_NAME" ] && ViashError Bad arguments for option \'--dataset_name=*\': \'$VIASH_PAR_DATASET_NAME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_DATASET_NAME=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression=*) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --model_output) - [ -n "$VIASH_PAR_MODEL_OUTPUT" ] && ViashError Bad arguments for option \'--model_output\': \'$VIASH_PAR_MODEL_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODEL_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --model_output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --model_output=*) - [ -n "$VIASH_PAR_MODEL_OUTPUT" ] && ViashError Bad arguments for option \'--model_output=*\': \'$VIASH_PAR_MODEL_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODEL_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --obsm_output) - [ -n "$VIASH_PAR_OBSM_OUTPUT" ] && ViashError Bad arguments for option \'--obsm_output\': \'$VIASH_PAR_OBSM_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBSM_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obsm_output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obsm_output=*) - [ -n "$VIASH_PAR_OBSM_OUTPUT" ] && ViashError Bad arguments for option \'--obsm_output=*\': \'$VIASH_PAR_OBSM_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBSM_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --early_stopping) - [ -n "$VIASH_PAR_EARLY_STOPPING" ] && ViashError Bad arguments for option \'--early_stopping\': \'$VIASH_PAR_EARLY_STOPPING\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EARLY_STOPPING="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --early_stopping. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --early_stopping=*) - [ -n "$VIASH_PAR_EARLY_STOPPING" ] && ViashError Bad arguments for option \'--early_stopping=*\': \'$VIASH_PAR_EARLY_STOPPING\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EARLY_STOPPING=$(ViashRemoveFlags "$1") - shift 1 - ;; - --early_stopping_monitor) - [ -n "$VIASH_PAR_EARLY_STOPPING_MONITOR" ] && ViashError Bad arguments for option \'--early_stopping_monitor\': \'$VIASH_PAR_EARLY_STOPPING_MONITOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EARLY_STOPPING_MONITOR="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --early_stopping_monitor. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --early_stopping_monitor=*) - [ -n "$VIASH_PAR_EARLY_STOPPING_MONITOR" ] && ViashError Bad arguments for option \'--early_stopping_monitor=*\': \'$VIASH_PAR_EARLY_STOPPING_MONITOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EARLY_STOPPING_MONITOR=$(ViashRemoveFlags "$1") - shift 1 - ;; - --early_stopping_patience) - [ -n "$VIASH_PAR_EARLY_STOPPING_PATIENCE" ] && ViashError Bad arguments for option \'--early_stopping_patience\': \'$VIASH_PAR_EARLY_STOPPING_PATIENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EARLY_STOPPING_PATIENCE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --early_stopping_patience. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --early_stopping_patience=*) - [ -n "$VIASH_PAR_EARLY_STOPPING_PATIENCE" ] && ViashError Bad arguments for option \'--early_stopping_patience=*\': \'$VIASH_PAR_EARLY_STOPPING_PATIENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EARLY_STOPPING_PATIENCE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --early_stopping_min_delta) - [ -n "$VIASH_PAR_EARLY_STOPPING_MIN_DELTA" ] && ViashError Bad arguments for option \'--early_stopping_min_delta\': \'$VIASH_PAR_EARLY_STOPPING_MIN_DELTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EARLY_STOPPING_MIN_DELTA="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --early_stopping_min_delta. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --early_stopping_min_delta=*) - [ -n "$VIASH_PAR_EARLY_STOPPING_MIN_DELTA" ] && ViashError Bad arguments for option \'--early_stopping_min_delta=*\': \'$VIASH_PAR_EARLY_STOPPING_MIN_DELTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EARLY_STOPPING_MIN_DELTA=$(ViashRemoveFlags "$1") - shift 1 - ;; - --max_epochs) - [ -n "$VIASH_PAR_MAX_EPOCHS" ] && ViashError Bad arguments for option \'--max_epochs\': \'$VIASH_PAR_MAX_EPOCHS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAX_EPOCHS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --max_epochs. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --max_epochs=*) - [ -n "$VIASH_PAR_MAX_EPOCHS" ] && ViashError Bad arguments for option \'--max_epochs=*\': \'$VIASH_PAR_MAX_EPOCHS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAX_EPOCHS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --reduce_lr_on_plateau) - [ -n "$VIASH_PAR_REDUCE_LR_ON_PLATEAU" ] && ViashError Bad arguments for option \'--reduce_lr_on_plateau\': \'$VIASH_PAR_REDUCE_LR_ON_PLATEAU\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REDUCE_LR_ON_PLATEAU="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --reduce_lr_on_plateau. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reduce_lr_on_plateau=*) - [ -n "$VIASH_PAR_REDUCE_LR_ON_PLATEAU" ] && ViashError Bad arguments for option \'--reduce_lr_on_plateau=*\': \'$VIASH_PAR_REDUCE_LR_ON_PLATEAU\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REDUCE_LR_ON_PLATEAU=$(ViashRemoveFlags "$1") - shift 1 - ;; - --lr_factor) - [ -n "$VIASH_PAR_LR_FACTOR" ] && ViashError Bad arguments for option \'--lr_factor\': \'$VIASH_PAR_LR_FACTOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LR_FACTOR="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --lr_factor. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --lr_factor=*) - [ -n "$VIASH_PAR_LR_FACTOR" ] && ViashError Bad arguments for option \'--lr_factor=*\': \'$VIASH_PAR_LR_FACTOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LR_FACTOR=$(ViashRemoveFlags "$1") - shift 1 - ;; - --lr_patience) - [ -n "$VIASH_PAR_LR_PATIENCE" ] && ViashError Bad arguments for option \'--lr_patience\': \'$VIASH_PAR_LR_PATIENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LR_PATIENCE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --lr_patience. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --lr_patience=*) - [ -n "$VIASH_PAR_LR_PATIENCE" ] && ViashError Bad arguments for option \'--lr_patience=*\': \'$VIASH_PAR_LR_PATIENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LR_PATIENCE=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_REFERENCE+x} ]; then - ViashError '--reference' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_MAX_EPOCHS+x} ]; then - ViashError '--max_epochs' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_MODALITY+x} ]; then - VIASH_PAR_MODALITY="rna" -fi -if [ -z ${VIASH_PAR_DATASET_NAME+x} ]; then - VIASH_PAR_DATASET_NAME="test_dataset" -fi -if [ -z ${VIASH_PAR_MODEL_OUTPUT+x} ]; then - VIASH_PAR_MODEL_OUTPUT="model" -fi -if [ -z ${VIASH_PAR_OBSM_OUTPUT+x} ]; then - VIASH_PAR_OBSM_OUTPUT="X_integrated_scanvi" -fi -if [ -z ${VIASH_PAR_EARLY_STOPPING_MONITOR+x} ]; then - VIASH_PAR_EARLY_STOPPING_MONITOR="elbo_validation" -fi -if [ -z ${VIASH_PAR_EARLY_STOPPING_PATIENCE+x} ]; then - VIASH_PAR_EARLY_STOPPING_PATIENCE="45" -fi -if [ -z ${VIASH_PAR_EARLY_STOPPING_MIN_DELTA+x} ]; then - VIASH_PAR_EARLY_STOPPING_MIN_DELTA="0.0" -fi -if [ -z ${VIASH_PAR_REDUCE_LR_ON_PLATEAU+x} ]; then - VIASH_PAR_REDUCE_LR_ON_PLATEAU="true" -fi -if [ -z ${VIASH_PAR_LR_FACTOR+x} ]; then - VIASH_PAR_LR_FACTOR="0.6" -fi -if [ -z ${VIASH_PAR_LR_PATIENCE+x} ]; then - VIASH_PAR_LR_PATIENCE="30.0" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_REFERENCE" ] && [ ! -e "$VIASH_PAR_REFERENCE" ]; then - ViashError "Input file '$VIASH_PAR_REFERENCE' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_EARLY_STOPPING" ]]; then - if ! [[ "$VIASH_PAR_EARLY_STOPPING" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--early_stopping' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_EARLY_STOPPING_PATIENCE" ]]; then - if ! [[ "$VIASH_PAR_EARLY_STOPPING_PATIENCE" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--early_stopping_patience' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi - if [[ $VIASH_PAR_EARLY_STOPPING_PATIENCE -lt 1 ]]; then - ViashError '--early_stopping_patience' has be more than or equal to 1. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_EARLY_STOPPING_MIN_DELTA" ]]; then - if ! [[ "$VIASH_PAR_EARLY_STOPPING_MIN_DELTA" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--early_stopping_min_delta' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi - if command -v bc &> /dev/null; then - if ! [[ `echo $VIASH_PAR_EARLY_STOPPING_MIN_DELTA '>=' 0.0 | bc` -eq 1 ]]; then - ViashError '--early_stopping_min_delta' has be more than or equal to 0.0. Use "--help" to get more information on the parameters. - exit 1 - fi - elif command -v awk &> /dev/null; then - if ! [[ `awk -v n1=$VIASH_PAR_EARLY_STOPPING_MIN_DELTA -v n2=0.0 'BEGIN { print (n1 >= n2) ? "1" : "0" }'` -eq 1 ]]; then - ViashError '--early_stopping_min_delta' has be more than or equal to 0.0. Use "--help" to get more information on the parameters. - exit 1 - fi - else - ViashWarning '--early_stopping_min_delta' specifies a minimum value but the value was not verified as neither \'bc\' or \`awk\` are present on the system. - fi -fi -if [[ -n "$VIASH_PAR_MAX_EPOCHS" ]]; then - if ! [[ "$VIASH_PAR_MAX_EPOCHS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--max_epochs' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_REDUCE_LR_ON_PLATEAU" ]]; then - if ! [[ "$VIASH_PAR_REDUCE_LR_ON_PLATEAU" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--reduce_lr_on_plateau' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_LR_FACTOR" ]]; then - if ! [[ "$VIASH_PAR_LR_FACTOR" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--lr_factor' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi - if command -v bc &> /dev/null; then - if ! [[ `echo $VIASH_PAR_LR_FACTOR '>=' 0.0 | bc` -eq 1 ]]; then - ViashError '--lr_factor' has be more than or equal to 0.0. Use "--help" to get more information on the parameters. - exit 1 - fi - elif command -v awk &> /dev/null; then - if ! [[ `awk -v n1=$VIASH_PAR_LR_FACTOR -v n2=0.0 'BEGIN { print (n1 >= n2) ? "1" : "0" }'` -eq 1 ]]; then - ViashError '--lr_factor' has be more than or equal to 0.0. Use "--help" to get more information on the parameters. - exit 1 - fi - else - ViashWarning '--lr_factor' specifies a minimum value but the value was not verified as neither \'bc\' or \`awk\` are present on the system. - fi -fi -if [[ -n "$VIASH_PAR_LR_PATIENCE" ]]; then - if ! [[ "$VIASH_PAR_LR_PATIENCE" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--lr_patience' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi - if command -v bc &> /dev/null; then - if ! [[ `echo $VIASH_PAR_LR_PATIENCE '>=' 0.0 | bc` -eq 1 ]]; then - ViashError '--lr_patience' has be more than or equal to 0.0. Use "--help" to get more information on the parameters. - exit 1 - fi - elif command -v awk &> /dev/null; then - if ! [[ `awk -v n1=$VIASH_PAR_LR_PATIENCE -v n2=0.0 'BEGIN { print (n1 >= n2) ? "1" : "0" }'` -eq 1 ]]; then - ViashError '--lr_patience' has be more than or equal to 0.0. Use "--help" to get more information on the parameters. - exit 1 - fi - else - ViashWarning '--lr_patience' specifies a minimum value but the value was not verified as neither \'bc\' or \`awk\` are present on the system. - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then - VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then - ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -if [ ! -z "$VIASH_PAR_EARLY_STOPPING_MONITOR" ]; then - VIASH_PAR_EARLY_STOPPING_MONITOR_CHOICES=("elbo_validation:reconstruction_loss_validation:kl_local_validation") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_EARLY_STOPPING_MONITOR_CHOICES[*]}:" =~ ":$VIASH_PAR_EARLY_STOPPING_MONITOR:" ]]; then - ViashError '--early_stopping_monitor' specified value of \'$VIASH_PAR_EARLY_STOPPING_MONITOR\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi -if [ ! -z "$VIASH_PAR_MODEL_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_MODEL_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_MODEL_OUTPUT")" -fi - -ViashDebug "Running command: bash" -cat << VIASHEOF | bash -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-scarches-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -import sys -import mudata -import scvi -from torch.cuda import is_available as cuda_is_available -try: - from torch.backends.mps import is_available as mps_is_available -except ModuleNotFoundError: - # Older pytorch versions - # MacOS GPUs - def mps_is_available(): - return False - -### VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'reference': $( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "r'${VIASH_PAR_REFERENCE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'dataset_name': $( if [ ! -z ${VIASH_PAR_DATASET_NAME+x} ]; then echo "r'${VIASH_PAR_DATASET_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'model_output': $( if [ ! -z ${VIASH_PAR_MODEL_OUTPUT+x} ]; then echo "r'${VIASH_PAR_MODEL_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'obsm_output': $( if [ ! -z ${VIASH_PAR_OBSM_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'early_stopping': $( if [ ! -z ${VIASH_PAR_EARLY_STOPPING+x} ]; then echo "r'${VIASH_PAR_EARLY_STOPPING//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), - 'early_stopping_monitor': $( if [ ! -z ${VIASH_PAR_EARLY_STOPPING_MONITOR+x} ]; then echo "r'${VIASH_PAR_EARLY_STOPPING_MONITOR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'early_stopping_patience': $( if [ ! -z ${VIASH_PAR_EARLY_STOPPING_PATIENCE+x} ]; then echo "int(r'${VIASH_PAR_EARLY_STOPPING_PATIENCE//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'early_stopping_min_delta': $( if [ ! -z ${VIASH_PAR_EARLY_STOPPING_MIN_DELTA+x} ]; then echo "float(r'${VIASH_PAR_EARLY_STOPPING_MIN_DELTA//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'max_epochs': $( if [ ! -z ${VIASH_PAR_MAX_EPOCHS+x} ]; then echo "int(r'${VIASH_PAR_MAX_EPOCHS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'reduce_lr_on_plateau': $( if [ ! -z ${VIASH_PAR_REDUCE_LR_ON_PLATEAU+x} ]; then echo "r'${VIASH_PAR_REDUCE_LR_ON_PLATEAU//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), - 'lr_factor': $( if [ ! -z ${VIASH_PAR_LR_FACTOR+x} ]; then echo "float(r'${VIASH_PAR_LR_FACTOR//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'lr_patience': $( if [ ! -z ${VIASH_PAR_LR_PATIENCE+x} ]; then echo "float(r'${VIASH_PAR_LR_PATIENCE//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -### VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -def _read_model_name_from_registry(model_path) -> str: - """Read registry with information about the model, return the model name""" - registry = scvi.model.base.BaseModelClass.load_registry(model_path) - return registry["model_name"] - - -def _detect_base_model(model_path): - """Read from the model's file which scvi_tools model it contains""" - - names_to_models_map = { - "AUTOZI": scvi.model.AUTOZI, - "CondSCVI": scvi.model.CondSCVI, - "DestVI": scvi.model.DestVI, - "LinearSCVI": scvi.model.LinearSCVI, - "PEAKVI": scvi.model.PEAKVI, - "SCANVI": scvi.model.SCANVI, - "SCVI": scvi.model.SCVI, - "TOTALVI": scvi.model.TOTALVI, - "MULTIVI": scvi.model.MULTIVI, - "AmortizedLDA": scvi.model.AmortizedLDA, - "JaxSCVI": scvi.model.JaxSCVI, - } - - return names_to_models_map[_read_model_name_from_registry(model_path)] - - -def extract_file_name(file_path): - """Return the name of the file from path to this file - - Examples - -------- - >>> extract_file_name("resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu") - pbmc_1k_protein_v3_mms - """ - slash_position = file_path.rfind("/") - dot_position = file_path.rfind(".") - - return file_path[slash_position + 1: dot_position] - - -def map_to_existing_reference(adata_query, model_path, check_val_every_n_epoch=1): - """ - A function to map the query data to the reference atlas - - Input: - * adata_query: An AnnData object with the query - * model_path: The reference model directory - - Output: - * vae_query: the trained scvi_tools model - * adata_query: The AnnData object with the query preprocessed for the mapping to the reference - """ - model = _detect_base_model(model_path) - - try: - model.prepare_query_anndata(adata_query, model_path) - except ValueError: - logger.warning("ValueError thrown when preparing adata for mapping. Clearing .varm field to prevent it") - adata_query.varm.clear() - model.prepare_query_anndata(adata_query, model_path) - - # Load query data into the model - vae_query = model.load_query_data( - adata_query, - model_path, - freeze_dropout=True - ) - - # Train scArches model for query mapping - vae_query.train( - max_epochs=par["max_epochs"], - early_stopping=par['early_stopping'], - early_stopping_monitor=par['early_stopping_monitor'], - early_stopping_patience=par['early_stopping_patience'], - early_stopping_min_delta=par['early_stopping_min_delta'], - check_val_every_n_epoch=check_val_every_n_epoch, - use_gpu=(cuda_is_available() or mps_is_available()) - ) - - return vae_query, adata_query - - -def _convert_object_dtypes_to_strings(adata): - """Convert object dtypes in .var and .obs to string to prevent error when saving file""" - def convert_cols(df): - object_cols = df.columns[df.dtypes == "object"] - for col in object_cols: - df[col] = df[col].astype(str) - return df - - adata.var = convert_cols(adata.var) - adata.obs = convert_cols(adata.obs) - - return adata - - -def _get_model_path(model_path: str): - """Obtain path to the directory with reference model. If the proposed \`model_path\` is a .zip archive, unzip it. If nesessary, convert model to the new format - - Parameters - ---------- - model_path : str - Path to a directory, where to search for the model or to a zip file containing the model - - Returns - ------- - Path to a directory with reference model in format of scvi-tools>=0.15 - """ - import os - import zipfile - import tempfile - from pathlib import Path - - if os.path.isdir(model_path) and "model.pt" in os.listdir(model_path): - # Probably, the \`model_path\` already contains model in the output format of scvi-tools>=0.15 - return model_path - - # The model either has old format or is a zip file downloaded from Zenodo - new_directory = Path(tempfile.TemporaryDirectory().name) - - if zipfile.is_zipfile(model_path): - with zipfile.ZipFile(model_path) as archive: - archive.extractall(new_directory) - model_dir = next(new_directory.glob("**/*.pt")).parent - - else: - model_dir = next(Path(model_path).glob("**/*.pt")).parent - - if "model_params.pt" in os.listdir(model_dir): - # The model is in the \`directory\`, but it was generated with scvi-tools<0.15 - # TODO: for new references (that could not be SCANVI based), we need to check the base class somehow. Reading registry does not work with models generated by scvi-tools<0.15 - # Here I assume that the reference model is for HLCA and thus is SCANVI based - converted_model_path = os.path.join(model_dir, "converted") - scvi.model.SCANVI.convert_legacy_save(model_dir, converted_model_path) - return converted_model_path - - elif "model.pt" in os.listdir(model_dir): - # Archive contained model in the new format, so just return the directory - return model_dir - - else: - raise ValueError("Cannot find model in the provided reference path. Please, provide a path or a link to the directory with reference model. For HLCA use https://zenodo.org/record/6337966/files/HLCA_reference_model.zip") - - -def main(): - - mdata_query = mudata.read(par["input"].strip()) - adata_query = mdata_query.mod[par["modality"]].copy() - - if "dataset" not in adata_query.obs.columns: - # Write name of the dataset as batch variable - if par["dataset_name"] is None: - logger.info("Detecting dataset name") - par["dataset_name"] = extract_file_name(par["input"]) - logger.info(f"Detected {par['dataset_name']}") - - adata_query.obs["dataset"] = par["dataset_name"] - - model_path = _get_model_path(par["reference"]) - vae_query, adata_query = map_to_existing_reference(adata_query, model_path=model_path) - model_name = _read_model_name_from_registry(model_path) - - # Save info about the used model - mdata_query.mod[par["modality"]].uns["integration_method"] = model_name - - logger.info("Trying to write latent representation") - output_key = par["obsm_output"].format(model_name=model_name) - mdata_query.mod[par["modality"]].obsm[output_key] = vae_query.get_latent_representation() - - logger.info("Converting dtypes") - mdata_query.mod[par["modality"]] = _convert_object_dtypes_to_strings(mdata_query.mod[par["modality"]]) - - logger.info("Updating mudata") - try: - mdata_query.update() # Without that error might be thrown during file saving - except KeyError: - # Sometimes this error is thrown, but then everything is magically fixed, and the file gets saved normally - # This is discussed here a bit: https://github.com/scverse/mudata/issues/27 - logger.warning("KeyError was thrown during updating mudata. Probably, the file is fixed after that, but be careful") - - logger.info("Saving h5mu file") - mdata_query.write_h5mu(par["output"].strip(), compression=par["output_compression"]) - - logger.info("Saving model") - vae_query.save(par["model_output"], overwrite=True) - -if __name__ == "__main__": - main() -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_MODEL_OUTPUT" ] && [ ! -e "$VIASH_PAR_MODEL_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_MODEL_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/native/integrate/scarches/setup_logger.py b/target/native/integrate/scarches/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/native/integrate/scarches/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/native/integrate/totalvi/.config.vsh.yaml b/target/native/integrate/totalvi/.config.vsh.yaml deleted file mode 100644 index 54eeb4e7f9b..00000000000 --- a/target/native/integrate/totalvi/.config.vsh.yaml +++ /dev/null @@ -1,348 +0,0 @@ -functionality: - name: "totalvi" - namespace: "integrate" - version: "0.12.3" - authors: - - name: "Vladimir Shitov" - info: - role: "Contributor" - links: - email: "vladimir.shitov@helmholtz-muenchen.de" - github: "vladimirshitov" - orcid: "0000-0002-1960-8812" - linkedin: "vladimir-shitov-9a659513b" - organizations: - - name: "Helmholtz Munich" - href: "https://www.helmholtz-munich.de" - role: "PhD Candidate" - argument_groups: - - name: "Inputs" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input h5mu file with query data to integrate with reference." - info: null - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--reference" - alternatives: - - "-r" - description: "Input h5mu file with reference data to train the TOTALVI model." - info: null - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--force_retrain" - alternatives: - - "-f" - description: "If true, retrain the model and save it to reference_model_path" - info: null - direction: "input" - dest: "par" - - type: "string" - name: "--query_modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--query_proteins_modality" - description: "Name of the modality in the input (query) h5mu file containing\ - \ protein data" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--reference_modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--reference_proteins_modality" - description: "Name of the modality containing proteins in the reference" - info: null - default: - - "prot" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--input_layer" - description: "Input layer to use. If None, X is used" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_batch" - description: "Column name discriminating between your batches." - info: null - default: - - "sample_id" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--var_input" - description: ".var column containing highly variable genes. By default, do not\ - \ subset genes." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Outputs" - arguments: - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output h5mu file." - info: null - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obsm_output" - description: "In which .obsm slot to store the resulting integrated embedding." - info: null - default: - - "X_integrated_totalvi" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obsm_normalized_rna_output" - description: "In which .obsm slot to store the normalized RNA from TOTALVI." - info: null - default: - - "X_totalvi_normalized_rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obsm_normalized_protein_output" - description: "In which .obsm slot to store the normalized protein data from\ - \ TOTALVI." - info: null - default: - - "X_totalvi_normalized_protein" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--reference_model_path" - description: "Directory with the reference model. If not exists, trained model\ - \ will be saved there" - info: null - default: - - "totalvi_model_reference" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--query_model_path" - description: "Directory, where the query model will be saved" - info: null - default: - - "totalvi_model_query" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Learning parameters" - arguments: - - type: "integer" - name: "--max_epochs" - description: "Number of passes through the dataset" - info: null - default: - - 400 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--max_query_epochs" - description: "Number of passes through the dataset, when fine-tuning model for\ - \ query" - info: null - default: - - 200 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--weight_decay" - description: "Weight decay, when fine-tuning model for query" - info: null - default: - - 0.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Performs mapping to the reference by totalvi model: https://docs.scvi-tools.org/en/stable/tutorials/notebooks/scarches_scvi_tools.html#Reference-mapping-with-TOTALVI" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.9" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "libopenblas-dev" - - "liblapack-dev" - - "gfortran" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "torchmetrics~=0.11.0" - - "scvi-tools~=1.0.3" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highmem" - - "highcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -- type: "native" - id: "native" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/integrate/totalvi/config.vsh.yaml" - platform: "native" - output: "/home/runner/work/openpipeline/openpipeline/target/native/integrate/totalvi" - executable: "/home/runner/work/openpipeline/openpipeline/target/native/integrate/totalvi/totalvi" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/native/integrate/totalvi/setup_logger.py b/target/native/integrate/totalvi/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/native/integrate/totalvi/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/native/integrate/totalvi/totalvi b/target/native/integrate/totalvi/totalvi deleted file mode 100755 index a6e4b85ef67..00000000000 --- a/target/native/integrate/totalvi/totalvi +++ /dev/null @@ -1,985 +0,0 @@ -#!/usr/bin/env bash - -# totalvi 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Vladimir Shitov - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="totalvi" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "totalvi 0.12.3" - echo "" - echo "Performs mapping to the reference by totalvi model:" - echo "https://docs.scvi-tools.org/en/stable/tutorials/notebooks/scarches_scvi_tools.html#Reference-mapping-with-TOTALVI" - echo "" - echo "Inputs:" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " Input h5mu file with query data to integrate with reference." - echo "" - echo " -r, --reference" - echo " type: file, required parameter, file must exist" - echo " Input h5mu file with reference data to train the TOTALVI model." - echo "" - echo " -f, --force_retrain" - echo " type: boolean_true" - echo " If true, retrain the model and save it to reference_model_path" - echo "" - echo " --query_modality" - echo " type: string" - echo " default: rna" - echo "" - echo " --query_proteins_modality" - echo " type: string" - echo " Name of the modality in the input (query) h5mu file containing protein" - echo " data" - echo "" - echo " --reference_modality" - echo " type: string" - echo " default: rna" - echo "" - echo " --reference_proteins_modality" - echo " type: string" - echo " default: prot" - echo " Name of the modality containing proteins in the reference" - echo "" - echo " --input_layer" - echo " type: string" - echo " Input layer to use. If None, X is used" - echo "" - echo " --obs_batch" - echo " type: string" - echo " default: sample_id" - echo " Column name discriminating between your batches." - echo "" - echo " --var_input" - echo " type: string" - echo " .var column containing highly variable genes. By default, do not subset" - echo " genes." - echo "" - echo "Outputs:" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " Output h5mu file." - echo "" - echo " --obsm_output" - echo " type: string" - echo " default: X_integrated_totalvi" - echo " In which .obsm slot to store the resulting integrated embedding." - echo "" - echo " --obsm_normalized_rna_output" - echo " type: string" - echo " default: X_totalvi_normalized_rna" - echo " In which .obsm slot to store the normalized RNA from TOTALVI." - echo "" - echo " --obsm_normalized_protein_output" - echo " type: string" - echo " default: X_totalvi_normalized_protein" - echo " In which .obsm slot to store the normalized protein data from TOTALVI." - echo "" - echo " --reference_model_path" - echo " type: file, output, file must exist" - echo " default: totalvi_model_reference" - echo " Directory with the reference model. If not exists, trained model will be" - echo " saved there" - echo "" - echo " --query_model_path" - echo " type: file, output, file must exist" - echo " default: totalvi_model_query" - echo " Directory, where the query model will be saved" - echo "" - echo "Learning parameters:" - echo " --max_epochs" - echo " type: integer" - echo " default: 400" - echo " Number of passes through the dataset" - echo "" - echo " --max_query_epochs" - echo " type: integer" - echo " default: 200" - echo " Number of passes through the dataset, when fine-tuning model for query" - echo "" - echo " --weight_decay" - echo " type: double" - echo " default: 0.0" - echo " Weight decay, when fine-tuning model for query" -} - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "totalvi 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -i) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reference) - [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reference=*) - [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference=*\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE=$(ViashRemoveFlags "$1") - shift 1 - ;; - -r) - [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'-r\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -r. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --force_retrain) - [ -n "$VIASH_PAR_FORCE_RETRAIN" ] && ViashError Bad arguments for option \'--force_retrain\': \'$VIASH_PAR_FORCE_RETRAIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_FORCE_RETRAIN=true - shift 1 - ;; - -f) - [ -n "$VIASH_PAR_FORCE_RETRAIN" ] && ViashError Bad arguments for option \'-f\': \'$VIASH_PAR_FORCE_RETRAIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_FORCE_RETRAIN=true - shift 1 - ;; - --query_modality) - [ -n "$VIASH_PAR_QUERY_MODALITY" ] && ViashError Bad arguments for option \'--query_modality\': \'$VIASH_PAR_QUERY_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_QUERY_MODALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --query_modality. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --query_modality=*) - [ -n "$VIASH_PAR_QUERY_MODALITY" ] && ViashError Bad arguments for option \'--query_modality=*\': \'$VIASH_PAR_QUERY_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_QUERY_MODALITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --query_proteins_modality) - [ -n "$VIASH_PAR_QUERY_PROTEINS_MODALITY" ] && ViashError Bad arguments for option \'--query_proteins_modality\': \'$VIASH_PAR_QUERY_PROTEINS_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_QUERY_PROTEINS_MODALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --query_proteins_modality. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --query_proteins_modality=*) - [ -n "$VIASH_PAR_QUERY_PROTEINS_MODALITY" ] && ViashError Bad arguments for option \'--query_proteins_modality=*\': \'$VIASH_PAR_QUERY_PROTEINS_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_QUERY_PROTEINS_MODALITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --reference_modality) - [ -n "$VIASH_PAR_REFERENCE_MODALITY" ] && ViashError Bad arguments for option \'--reference_modality\': \'$VIASH_PAR_REFERENCE_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE_MODALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference_modality. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reference_modality=*) - [ -n "$VIASH_PAR_REFERENCE_MODALITY" ] && ViashError Bad arguments for option \'--reference_modality=*\': \'$VIASH_PAR_REFERENCE_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE_MODALITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --reference_proteins_modality) - [ -n "$VIASH_PAR_REFERENCE_PROTEINS_MODALITY" ] && ViashError Bad arguments for option \'--reference_proteins_modality\': \'$VIASH_PAR_REFERENCE_PROTEINS_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE_PROTEINS_MODALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference_proteins_modality. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reference_proteins_modality=*) - [ -n "$VIASH_PAR_REFERENCE_PROTEINS_MODALITY" ] && ViashError Bad arguments for option \'--reference_proteins_modality=*\': \'$VIASH_PAR_REFERENCE_PROTEINS_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE_PROTEINS_MODALITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --input_layer) - [ -n "$VIASH_PAR_INPUT_LAYER" ] && ViashError Bad arguments for option \'--input_layer\': \'$VIASH_PAR_INPUT_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT_LAYER="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_layer. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input_layer=*) - [ -n "$VIASH_PAR_INPUT_LAYER" ] && ViashError Bad arguments for option \'--input_layer=*\': \'$VIASH_PAR_INPUT_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT_LAYER=$(ViashRemoveFlags "$1") - shift 1 - ;; - --obs_batch) - [ -n "$VIASH_PAR_OBS_BATCH" ] && ViashError Bad arguments for option \'--obs_batch\': \'$VIASH_PAR_OBS_BATCH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBS_BATCH="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_batch. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obs_batch=*) - [ -n "$VIASH_PAR_OBS_BATCH" ] && ViashError Bad arguments for option \'--obs_batch=*\': \'$VIASH_PAR_OBS_BATCH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBS_BATCH=$(ViashRemoveFlags "$1") - shift 1 - ;; - --var_input) - [ -n "$VIASH_PAR_VAR_INPUT" ] && ViashError Bad arguments for option \'--var_input\': \'$VIASH_PAR_VAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_VAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --var_input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --var_input=*) - [ -n "$VIASH_PAR_VAR_INPUT" ] && ViashError Bad arguments for option \'--var_input=*\': \'$VIASH_PAR_VAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_VAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obsm_output) - [ -n "$VIASH_PAR_OBSM_OUTPUT" ] && ViashError Bad arguments for option \'--obsm_output\': \'$VIASH_PAR_OBSM_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBSM_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obsm_output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obsm_output=*) - [ -n "$VIASH_PAR_OBSM_OUTPUT" ] && ViashError Bad arguments for option \'--obsm_output=*\': \'$VIASH_PAR_OBSM_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBSM_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --obsm_normalized_rna_output) - [ -n "$VIASH_PAR_OBSM_NORMALIZED_RNA_OUTPUT" ] && ViashError Bad arguments for option \'--obsm_normalized_rna_output\': \'$VIASH_PAR_OBSM_NORMALIZED_RNA_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBSM_NORMALIZED_RNA_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obsm_normalized_rna_output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obsm_normalized_rna_output=*) - [ -n "$VIASH_PAR_OBSM_NORMALIZED_RNA_OUTPUT" ] && ViashError Bad arguments for option \'--obsm_normalized_rna_output=*\': \'$VIASH_PAR_OBSM_NORMALIZED_RNA_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBSM_NORMALIZED_RNA_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --obsm_normalized_protein_output) - [ -n "$VIASH_PAR_OBSM_NORMALIZED_PROTEIN_OUTPUT" ] && ViashError Bad arguments for option \'--obsm_normalized_protein_output\': \'$VIASH_PAR_OBSM_NORMALIZED_PROTEIN_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBSM_NORMALIZED_PROTEIN_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obsm_normalized_protein_output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obsm_normalized_protein_output=*) - [ -n "$VIASH_PAR_OBSM_NORMALIZED_PROTEIN_OUTPUT" ] && ViashError Bad arguments for option \'--obsm_normalized_protein_output=*\': \'$VIASH_PAR_OBSM_NORMALIZED_PROTEIN_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBSM_NORMALIZED_PROTEIN_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --reference_model_path) - [ -n "$VIASH_PAR_REFERENCE_MODEL_PATH" ] && ViashError Bad arguments for option \'--reference_model_path\': \'$VIASH_PAR_REFERENCE_MODEL_PATH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE_MODEL_PATH="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference_model_path. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reference_model_path=*) - [ -n "$VIASH_PAR_REFERENCE_MODEL_PATH" ] && ViashError Bad arguments for option \'--reference_model_path=*\': \'$VIASH_PAR_REFERENCE_MODEL_PATH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE_MODEL_PATH=$(ViashRemoveFlags "$1") - shift 1 - ;; - --query_model_path) - [ -n "$VIASH_PAR_QUERY_MODEL_PATH" ] && ViashError Bad arguments for option \'--query_model_path\': \'$VIASH_PAR_QUERY_MODEL_PATH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_QUERY_MODEL_PATH="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --query_model_path. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --query_model_path=*) - [ -n "$VIASH_PAR_QUERY_MODEL_PATH" ] && ViashError Bad arguments for option \'--query_model_path=*\': \'$VIASH_PAR_QUERY_MODEL_PATH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_QUERY_MODEL_PATH=$(ViashRemoveFlags "$1") - shift 1 - ;; - --max_epochs) - [ -n "$VIASH_PAR_MAX_EPOCHS" ] && ViashError Bad arguments for option \'--max_epochs\': \'$VIASH_PAR_MAX_EPOCHS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAX_EPOCHS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --max_epochs. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --max_epochs=*) - [ -n "$VIASH_PAR_MAX_EPOCHS" ] && ViashError Bad arguments for option \'--max_epochs=*\': \'$VIASH_PAR_MAX_EPOCHS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAX_EPOCHS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --max_query_epochs) - [ -n "$VIASH_PAR_MAX_QUERY_EPOCHS" ] && ViashError Bad arguments for option \'--max_query_epochs\': \'$VIASH_PAR_MAX_QUERY_EPOCHS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAX_QUERY_EPOCHS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --max_query_epochs. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --max_query_epochs=*) - [ -n "$VIASH_PAR_MAX_QUERY_EPOCHS" ] && ViashError Bad arguments for option \'--max_query_epochs=*\': \'$VIASH_PAR_MAX_QUERY_EPOCHS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAX_QUERY_EPOCHS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --weight_decay) - [ -n "$VIASH_PAR_WEIGHT_DECAY" ] && ViashError Bad arguments for option \'--weight_decay\': \'$VIASH_PAR_WEIGHT_DECAY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_WEIGHT_DECAY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --weight_decay. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --weight_decay=*) - [ -n "$VIASH_PAR_WEIGHT_DECAY" ] && ViashError Bad arguments for option \'--weight_decay=*\': \'$VIASH_PAR_WEIGHT_DECAY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_WEIGHT_DECAY=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_REFERENCE+x} ]; then - ViashError '--reference' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_FORCE_RETRAIN+x} ]; then - VIASH_PAR_FORCE_RETRAIN="false" -fi -if [ -z ${VIASH_PAR_QUERY_MODALITY+x} ]; then - VIASH_PAR_QUERY_MODALITY="rna" -fi -if [ -z ${VIASH_PAR_REFERENCE_MODALITY+x} ]; then - VIASH_PAR_REFERENCE_MODALITY="rna" -fi -if [ -z ${VIASH_PAR_REFERENCE_PROTEINS_MODALITY+x} ]; then - VIASH_PAR_REFERENCE_PROTEINS_MODALITY="prot" -fi -if [ -z ${VIASH_PAR_OBS_BATCH+x} ]; then - VIASH_PAR_OBS_BATCH="sample_id" -fi -if [ -z ${VIASH_PAR_OBSM_OUTPUT+x} ]; then - VIASH_PAR_OBSM_OUTPUT="X_integrated_totalvi" -fi -if [ -z ${VIASH_PAR_OBSM_NORMALIZED_RNA_OUTPUT+x} ]; then - VIASH_PAR_OBSM_NORMALIZED_RNA_OUTPUT="X_totalvi_normalized_rna" -fi -if [ -z ${VIASH_PAR_OBSM_NORMALIZED_PROTEIN_OUTPUT+x} ]; then - VIASH_PAR_OBSM_NORMALIZED_PROTEIN_OUTPUT="X_totalvi_normalized_protein" -fi -if [ -z ${VIASH_PAR_REFERENCE_MODEL_PATH+x} ]; then - VIASH_PAR_REFERENCE_MODEL_PATH="totalvi_model_reference" -fi -if [ -z ${VIASH_PAR_QUERY_MODEL_PATH+x} ]; then - VIASH_PAR_QUERY_MODEL_PATH="totalvi_model_query" -fi -if [ -z ${VIASH_PAR_MAX_EPOCHS+x} ]; then - VIASH_PAR_MAX_EPOCHS="400" -fi -if [ -z ${VIASH_PAR_MAX_QUERY_EPOCHS+x} ]; then - VIASH_PAR_MAX_QUERY_EPOCHS="200" -fi -if [ -z ${VIASH_PAR_WEIGHT_DECAY+x} ]; then - VIASH_PAR_WEIGHT_DECAY="0.0" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_REFERENCE" ] && [ ! -e "$VIASH_PAR_REFERENCE" ]; then - ViashError "Input file '$VIASH_PAR_REFERENCE' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_FORCE_RETRAIN" ]]; then - if ! [[ "$VIASH_PAR_FORCE_RETRAIN" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--force_retrain' has to be a boolean_true. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_MAX_EPOCHS" ]]; then - if ! [[ "$VIASH_PAR_MAX_EPOCHS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--max_epochs' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_MAX_QUERY_EPOCHS" ]]; then - if ! [[ "$VIASH_PAR_MAX_QUERY_EPOCHS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--max_query_epochs' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_WEIGHT_DECAY" ]]; then - if ! [[ "$VIASH_PAR_WEIGHT_DECAY" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--weight_decay' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi -if [ ! -z "$VIASH_PAR_REFERENCE_MODEL_PATH" ] && [ ! -d "$(dirname "$VIASH_PAR_REFERENCE_MODEL_PATH")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_REFERENCE_MODEL_PATH")" -fi -if [ ! -z "$VIASH_PAR_QUERY_MODEL_PATH" ] && [ ! -d "$(dirname "$VIASH_PAR_QUERY_MODEL_PATH")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_QUERY_MODEL_PATH")" -fi - -ViashDebug "Running command: bash" -cat << VIASHEOF | bash -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-totalvi-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -from typing import Tuple - -import os -import sys -import mudata -from anndata import AnnData # For type hints -from mudata import MuData # For type hints -import numpy as np -import scvi -from scipy.sparse import issparse - - -### VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'reference': $( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "r'${VIASH_PAR_REFERENCE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'force_retrain': $( if [ ! -z ${VIASH_PAR_FORCE_RETRAIN+x} ]; then echo "r'${VIASH_PAR_FORCE_RETRAIN//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), - 'query_modality': $( if [ ! -z ${VIASH_PAR_QUERY_MODALITY+x} ]; then echo "r'${VIASH_PAR_QUERY_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'query_proteins_modality': $( if [ ! -z ${VIASH_PAR_QUERY_PROTEINS_MODALITY+x} ]; then echo "r'${VIASH_PAR_QUERY_PROTEINS_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'reference_modality': $( if [ ! -z ${VIASH_PAR_REFERENCE_MODALITY+x} ]; then echo "r'${VIASH_PAR_REFERENCE_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'reference_proteins_modality': $( if [ ! -z ${VIASH_PAR_REFERENCE_PROTEINS_MODALITY+x} ]; then echo "r'${VIASH_PAR_REFERENCE_PROTEINS_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'input_layer': $( if [ ! -z ${VIASH_PAR_INPUT_LAYER+x} ]; then echo "r'${VIASH_PAR_INPUT_LAYER//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'obs_batch': $( if [ ! -z ${VIASH_PAR_OBS_BATCH+x} ]; then echo "r'${VIASH_PAR_OBS_BATCH//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'var_input': $( if [ ! -z ${VIASH_PAR_VAR_INPUT+x} ]; then echo "r'${VIASH_PAR_VAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'obsm_output': $( if [ ! -z ${VIASH_PAR_OBSM_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'obsm_normalized_rna_output': $( if [ ! -z ${VIASH_PAR_OBSM_NORMALIZED_RNA_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_NORMALIZED_RNA_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'obsm_normalized_protein_output': $( if [ ! -z ${VIASH_PAR_OBSM_NORMALIZED_PROTEIN_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_NORMALIZED_PROTEIN_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'reference_model_path': $( if [ ! -z ${VIASH_PAR_REFERENCE_MODEL_PATH+x} ]; then echo "r'${VIASH_PAR_REFERENCE_MODEL_PATH//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'query_model_path': $( if [ ! -z ${VIASH_PAR_QUERY_MODEL_PATH+x} ]; then echo "r'${VIASH_PAR_QUERY_MODEL_PATH//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'max_epochs': $( if [ ! -z ${VIASH_PAR_MAX_EPOCHS+x} ]; then echo "int(r'${VIASH_PAR_MAX_EPOCHS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'max_query_epochs': $( if [ ! -z ${VIASH_PAR_MAX_QUERY_EPOCHS+x} ]; then echo "int(r'${VIASH_PAR_MAX_QUERY_EPOCHS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'weight_decay': $( if [ ! -z ${VIASH_PAR_WEIGHT_DECAY+x} ]; then echo "float(r'${VIASH_PAR_WEIGHT_DECAY//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -### VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -def align_proteins_names(adata_reference: AnnData, mdata_query: MuData, adata_query: AnnData, reference_proteins_key: str, query_proteins_key: str) -> AnnData: - """Make sure that proteins are located in the same .obsm slot in reference and query. Pad query proteins with zeros if they are absent""" - proteins_reference = adata_reference.obsm[reference_proteins_key] - - # If query has no protein data, put matrix of zeros - if not query_proteins_key or query_proteins_key not in mdata_query.mod: - adata_query.obsm[reference_proteins_key] = np.zeros((adata_query.n_obs, proteins_reference.shape[1])) - else: - # Make sure that proteins expression has the same key in query and reference - adata_query.obsm[reference_proteins_key] = adata_query.obsm[query_proteins_key] - - return adata_query - - -def extract_proteins_to_anndata(mdata: MuData, rna_modality_key, protein_modality_key, input_layer, hvg_var_key=None) -> AnnData: - """TOTALVI requires data to be stored in AnnData format with protein counts in .obsm slot. This function performs the conversion""" - adata: AnnData = mdata.mod[rna_modality_key].copy() - - if hvg_var_key: - selected_genes = adata.var_names[adata.var[hvg_var_key]] - adata = adata[:, selected_genes].copy() - - if protein_modality_key in mdata.mod: - # Put the proteins modality into .obsm slot - proteins_reference_adata = mdata.mod[protein_modality_key].copy() - - if input_layer is None: - proteins = proteins_reference_adata.X - else: - proteins = proteins_reference_adata.obsm[input_layer] - - if issparse(proteins): - proteins = proteins.toarray() - - adata.obsm[protein_modality_key] = proteins - - return adata - - -def build_reference_model(adata_reference: AnnData, max_train_epochs: int = 400) -> scvi.model.TOTALVI: - - vae_reference = scvi.model.TOTALVI(adata_reference, use_layer_norm="both", use_batch_norm="none") - vae_reference.train(max_train_epochs) - - vae_reference.save(par["reference_model_path"]) - - return vae_reference - -def is_retraining_model() -> bool: - """Decide, whether reference model should be trained. It happens when no model exists or force_retrain flag is on""" - - trained_model_exists = os.path.isdir(par["reference_model_path"]) and ("model.pt" in os.listdir(par["reference_model_path"])) - return not trained_model_exists or par["force_retrain"] - - -def map_query_to_reference(mdata_reference: MuData, mdata_query: MuData, adata_query: AnnData) -> Tuple[scvi.model.TOTALVI, AnnData]: - """Build model on the provided reference if necessary, and map query to the reference""" - - adata_reference: AnnData = extract_proteins_to_anndata(mdata_reference, rna_modality_key=par["reference_modality"], protein_modality_key=par["reference_proteins_modality"], - input_layer=par["input_layer"], hvg_var_key=par["var_input"]) - - scvi.model.TOTALVI.setup_anndata( - adata_reference, - batch_key=par["obs_batch"], - protein_expression_obsm_key=par["reference_proteins_modality"] - ) - - if is_retraining_model(): - vae_reference = build_reference_model(adata_reference, max_train_epochs=par["max_epochs"]) - else: - vae_reference = scvi.model.TOTALVI.load(dir_path=par["reference_model_path"], adata=adata_reference) - - adata_query: AnnData = align_proteins_names(adata_reference, mdata_query, adata_query, reference_proteins_key=par["reference_proteins_modality"], - query_proteins_key=par["query_proteins_modality"]) - - # Reorder genes and pad missing genes with 0s - scvi.model.TOTALVI.prepare_query_anndata(adata_query, vae_reference) - - # Train the model for query - vae_query = scvi.model.TOTALVI.load_query_data( - adata_query, - vae_reference - ) - vae_query.train(par["max_query_epochs"], plan_kwargs=dict(weight_decay=par["weight_decay"])) - - return vae_query, adata_query - -def main(): - mdata_query = mudata.read(par["input"].strip()) - adata_query = extract_proteins_to_anndata(mdata_query, - rna_modality_key=par["query_modality"], - protein_modality_key=par["query_proteins_modality"], - input_layer=par["input_layer"], - hvg_var_key=par["var_input"]) - - if par["reference"].endswith(".h5mu"): - logger.info("Reading reference") - mdata_reference = mudata.read(par["reference"].strip()) - - logger.info("Mapping query to the reference") - vae_query, adata_query = map_query_to_reference(mdata_reference, mdata_query, adata_query) - else: - raise ValueError("Incorrect format of reference, please provide a .h5mu file") - - adata_query.uns["integration_method"] = "totalvi" - - logger.info("Getting the latent representation of query") - mdata_query.mod[par["query_modality"]].obsm[par["obsm_output"]] = vae_query.get_latent_representation() - - norm_rna, norm_protein = vae_query.get_normalized_expression() - mdata_query.mod[par["query_modality"]].obsm[par["obsm_normalized_rna_output"]] = norm_rna.to_numpy() - - if par["query_proteins_modality"] in mdata_query.mod: - mdata_query.mod[par["query_proteins_modality"]].obsm[par["obsm_normalized_protein_output"]] = norm_protein.to_numpy() - - logger.info("Updating mdata") - mdata_query.update() - - logger.info("Saving updated query data") - mdata_query.write_h5mu(par["output"].strip()) - - logger.info("Saving query model") - vae_query.save(par["query_model_path"], overwrite=True) - -if __name__ == "__main__": - main() -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_REFERENCE_MODEL_PATH" ] && [ ! -e "$VIASH_PAR_REFERENCE_MODEL_PATH" ]; then - ViashError "Output file '$VIASH_PAR_REFERENCE_MODEL_PATH' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_QUERY_MODEL_PATH" ] && [ ! -e "$VIASH_PAR_QUERY_MODEL_PATH" ]; then - ViashError "Output file '$VIASH_PAR_QUERY_MODEL_PATH' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/native/labels_transfer/knn/.config.vsh.yaml b/target/native/labels_transfer/knn/.config.vsh.yaml deleted file mode 100644 index 0b6913a03af..00000000000 --- a/target/native/labels_transfer/knn/.config.vsh.yaml +++ /dev/null @@ -1,379 +0,0 @@ -functionality: - name: "knn" - namespace: "labels_transfer" - version: "0.12.3" - authors: - - name: "Vladimir Shitov" - roles: - - "author" - info: - role: "Contributor" - links: - email: "vladimir.shitov@helmholtz-muenchen.de" - github: "vladimirshitov" - orcid: "0000-0002-1960-8812" - linkedin: "vladimir-shitov-9a659513b" - organizations: - - name: "Helmholtz Munich" - href: "https://www.helmholtz-munich.de" - role: "PhD Candidate" - argument_groups: - - name: "Input dataset (query) arguments" - arguments: - - type: "file" - name: "--input" - description: "The query data to transfer the labels to. Should be a .h5mu file." - info: - label: "Query" - file_format: - type: "h5mu" - mod: - rna: - description: "Modality in AnnData format containing RNA data." - required: true - slots: - X: - type: "double" - name: "features" - required: false - description: "The expression data to use for the classifier's inference,\ - \ if `--input_obsm_features` argument is not provided.\n" - obsm: - - type: "double" - name: "features" - example: "X_integrated_scanvi" - required: false - description: "The embedding to use for the classifier's inference.\ - \ Override using the `--input_obsm_features` argument. If not\ - \ provided, the `.X` slot will be used instead.\nMake sure that\ - \ embedding was obtained in the same way as the reference embedding\ - \ (e.g. by the same model or preprocessing).\n" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - description: "Which modality to use." - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--input_obsm_features" - description: "The `.obsm` key of the embedding to use for the classifier's inference.\ - \ If not provided, the `.X` slot will be used instead.\nMake sure that embedding\ - \ was obtained in the same way as the reference embedding (e.g. by the same\ - \ model or preprocessing).\n" - info: null - example: - - "X_integrated_scanvi" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Reference dataset arguments" - arguments: - - type: "file" - name: "--reference" - description: "The reference data to train classifiers on." - info: - label: "Reference" - file_format: - type: "h5ad" - X: - type: "double" - name: "features" - required: false - description: "The expression data to use for the classifier's training,\ - \ if `--input_obsm_features` argument is not provided.\n" - obsm: - - type: "double" - name: "features" - example: "X_integrated_scanvi" - description: "The embedding to use for the classifier's training. Override\ - \ using the `--reference_obsm_features` argument.\nMake sure that embedding\ - \ was obtained in the same way as the query embedding (e.g. by the same\ - \ model or preprocessing).\n" - required: true - obs: - - type: "string" - name: "targets" - multiple: true - example: - - "ann_level_1" - - "ann_level_2" - - "ann_level_3" - - "ann_level_4" - - "ann_level_5" - - "ann_finest_level" - description: "The target labels to transfer. Override using the `--reference_obs_targets`\ - \ argument." - required: true - example: - - "https:/zenodo.org/record/6337966/files/HLCA_emb_and_metadata.h5ad" - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--reference_obsm_features" - description: "The `.obsm` key of the embedding to use for the classifier's training.\n\ - Make sure that embedding was obtained in the same way as the query embedding\ - \ (e.g. by the same model or preprocessing).\n" - info: null - default: - - "X_integrated_scanvi" - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--reference_obs_targets" - description: "The `.obs` key of the target labels to tranfer." - info: null - default: - - "ann_level_1" - - "ann_level_2" - - "ann_level_3" - - "ann_level_4" - - "ann_level_5" - - "ann_finest_level" - required: false - direction: "input" - multiple: true - multiple_sep: "," - dest: "par" - - name: "Outputs" - arguments: - - type: "file" - name: "--output" - description: "The query data in .h5mu format with predicted labels transfered\ - \ from the reference." - info: - label: "Output data" - file_format: - type: "h5mu" - mod: - rna: - description: "Modality in AnnData format containing RNA data." - required: true - obs: - - type: "string" - name: "predictions" - description: "The predicted labels. Override using the `--output_obs_predictions`\ - \ argument." - required: true - - type: "double" - name: "uncertainty" - description: "The uncertainty of the predicted labels. Override using\ - \ the `--output_obs_uncertainty` argument." - required: false - obsm: - - type: "double" - name: "X_integrated_scanvi" - description: "The embedding used for the classifier's inference. Could\ - \ have any name, specified by `input_obsm_features` argument.\"" - required: false - uns: - - type: "string" - name: "parameters" - example: "labels_tranfer" - description: "Additional information about the parameters used for\ - \ the label transfer." - required: true - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_obs_predictions" - description: "In which `.obs` slots to store the predicted information.\nIf\ - \ provided, must have the same length as `--reference_obs_targets`.\nIf empty,\ - \ will default to the `reference_obs_targets` combined with the `\"_pred\"\ - ` suffix.\n" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_obs_uncertainty" - description: "In which `.obs` slots to store the uncertainty of the predictions.\n\ - If provided, must have the same length as `--reference_obs_targets`.\nIf empty,\ - \ will default to the `reference_obs_targets` combined with the `\"_uncertainty\"\ - ` suffix.\n" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_uns_parameters" - description: "The `.uns` key to store additional information about the parameters\ - \ used for the label transfer." - info: null - default: - - "labels_transfer" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Learning parameters" - arguments: - - type: "integer" - name: "--n_neighbors" - alternatives: - - "-k" - description: "Number of nearest neighbors to use for classification" - info: null - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "../utils/helper.py" - - type: "file" - path: "../../utils/setup_logger.py" - description: "Performs label transfer from reference to query using KNN classifier" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/annotation_test_data/" - - type: "file" - path: "resources_test/pbmc_1k_protein_v3/" - info: - method_id: "KNN_pynndescent" - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - - "git" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - upgrade: true - - type: "apt" - packages: - - "libopenblas-dev" - - "liblapack-dev" - - "gfortran" - interactive: false - - type: "python" - user: false - packages: - - "scanpy~=1.9.5" - - "pynndescent~=0.5.8" - - "numba~=0.56.4" - - "numpy~=1.23.5" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highmem" - - "highcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -- type: "native" - id: "native" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/labels_transfer/knn/config.vsh.yaml" - platform: "native" - output: "/home/runner/work/openpipeline/openpipeline/target/native/labels_transfer/knn" - executable: "/home/runner/work/openpipeline/openpipeline/target/native/labels_transfer/knn/knn" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/native/labels_transfer/knn/helper.py b/target/native/labels_transfer/knn/helper.py deleted file mode 100644 index a90bf59efdb..00000000000 --- a/target/native/labels_transfer/knn/helper.py +++ /dev/null @@ -1,32 +0,0 @@ -def check_arguments(par): - # check output .obs predictions - if not par["output_obs_predictions"]: - par["output_obs_predictions"] = [ t + "_pred" for t in par["reference_obs_targets"]] - assert len(par["output_obs_predictions"]) == len(par["reference_obs_targets"]), f"Number of output_obs_predictions must match number of reference_obs_targets\npar: {par}" - - # check output .obs uncertainty - if not par["output_obs_uncertainty"]: - par["output_obs_uncertainty"] = [ t + "_uncertainty" for t in par["reference_obs_targets"]] - assert len(par["output_obs_uncertainty"]) == len(par["reference_obs_targets"]), f"Number of output_obs_uncertainty must match number of reference_obs_targets\npar: {par}" - - return par - -def get_reference_features(adata_reference, par, logger): - if par["reference_obsm_features"] is None: - logger.info("Using .X of reference data") - train_data = adata_reference.X - else: - logger.info(f"Using .obsm[{par['reference_obsm_features']}] of reference data") - train_data = adata_reference.obsm[par["reference_obsm_features"]] - - return train_data - -def get_query_features(adata, par, logger): - if par["input_obsm_features"] is None: - logger.info("Using .X of query data") - query_data = adata.X - else: - logger.info(f"Using .obsm[{par['input_obsm_features']}] of query data") - query_data = adata.obsm[par["input_obsm_features"]] - - return query_data \ No newline at end of file diff --git a/target/native/labels_transfer/knn/knn b/target/native/labels_transfer/knn/knn deleted file mode 100755 index ac701e326a7..00000000000 --- a/target/native/labels_transfer/knn/knn +++ /dev/null @@ -1,773 +0,0 @@ -#!/usr/bin/env bash - -# knn 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Vladimir Shitov (author) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="knn" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "knn 0.12.3" - echo "" - echo "Performs label transfer from reference to query using KNN classifier" - echo "" - echo "Input dataset (query) arguments:" - echo " --input" - echo " type: file, required parameter, file must exist" - echo " The query data to transfer the labels to. Should be a .h5mu file." - echo "" - echo " --modality" - echo " type: string" - echo " default: rna" - echo " Which modality to use." - echo "" - echo " --input_obsm_features" - echo " type: string" - echo " example: X_integrated_scanvi" - echo " The \`.obsm\` key of the embedding to use for the classifier's inference." - echo " If not provided, the \`.X\` slot will be used instead." - echo " Make sure that embedding was obtained in the same way as the reference" - echo " embedding (e.g. by the same model or preprocessing)." - echo "" - echo "Reference dataset arguments:" - echo " --reference" - echo " type: file, file must exist" - echo " example:" - echo "https:/zenodo.org/record/6337966/files/HLCA_emb_and_metadata.h5ad" - echo " The reference data to train classifiers on." - echo "" - echo " --reference_obsm_features" - echo " type: string, required parameter" - echo " default: X_integrated_scanvi" - echo " The \`.obsm\` key of the embedding to use for the classifier's training." - echo " Make sure that embedding was obtained in the same way as the query" - echo " embedding (e.g. by the same model or preprocessing)." - echo "" - echo " --reference_obs_targets" - echo " type: string, multiple values allowed" - echo " default:" - echo "ann_level_1,ann_level_2,ann_level_3,ann_level_4,ann_level_5,ann_finest_level" - echo " The \`.obs\` key of the target labels to tranfer." - echo "" - echo "Outputs:" - echo " --output" - echo " type: file, required parameter, output, file must exist" - echo " The query data in .h5mu format with predicted labels transfered from the" - echo " reference." - echo "" - echo " --output_obs_predictions" - echo " type: string, multiple values allowed" - echo " In which \`.obs\` slots to store the predicted information." - echo " If provided, must have the same length as \`--reference_obs_targets\`." - echo " If empty, will default to the \`reference_obs_targets\` combined with the" - echo " \`\"_pred\"\` suffix." - echo "" - echo " --output_obs_uncertainty" - echo " type: string, multiple values allowed" - echo " In which \`.obs\` slots to store the uncertainty of the predictions." - echo " If provided, must have the same length as \`--reference_obs_targets\`." - echo " If empty, will default to the \`reference_obs_targets\` combined with the" - echo " \`\"_uncertainty\"\` suffix." - echo "" - echo " --output_uns_parameters" - echo " type: string" - echo " default: labels_transfer" - echo " The \`.uns\` key to store additional information about the parameters used" - echo " for the label transfer." - echo "" - echo "Learning parameters:" - echo " -k, --n_neighbors" - echo " type: integer, required parameter" - echo " Number of nearest neighbors to use for classification" -} - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "knn 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --modality) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality=*) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --input_obsm_features) - [ -n "$VIASH_PAR_INPUT_OBSM_FEATURES" ] && ViashError Bad arguments for option \'--input_obsm_features\': \'$VIASH_PAR_INPUT_OBSM_FEATURES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT_OBSM_FEATURES="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_obsm_features. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input_obsm_features=*) - [ -n "$VIASH_PAR_INPUT_OBSM_FEATURES" ] && ViashError Bad arguments for option \'--input_obsm_features=*\': \'$VIASH_PAR_INPUT_OBSM_FEATURES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT_OBSM_FEATURES=$(ViashRemoveFlags "$1") - shift 1 - ;; - --reference) - [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reference=*) - [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference=*\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --reference_obsm_features) - [ -n "$VIASH_PAR_REFERENCE_OBSM_FEATURES" ] && ViashError Bad arguments for option \'--reference_obsm_features\': \'$VIASH_PAR_REFERENCE_OBSM_FEATURES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE_OBSM_FEATURES="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference_obsm_features. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reference_obsm_features=*) - [ -n "$VIASH_PAR_REFERENCE_OBSM_FEATURES" ] && ViashError Bad arguments for option \'--reference_obsm_features=*\': \'$VIASH_PAR_REFERENCE_OBSM_FEATURES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE_OBSM_FEATURES=$(ViashRemoveFlags "$1") - shift 1 - ;; - --reference_obs_targets) - if [ -z "$VIASH_PAR_REFERENCE_OBS_TARGETS" ]; then - VIASH_PAR_REFERENCE_OBS_TARGETS="$2" - else - VIASH_PAR_REFERENCE_OBS_TARGETS="$VIASH_PAR_REFERENCE_OBS_TARGETS,""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference_obs_targets. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reference_obs_targets=*) - if [ -z "$VIASH_PAR_REFERENCE_OBS_TARGETS" ]; then - VIASH_PAR_REFERENCE_OBS_TARGETS=$(ViashRemoveFlags "$1") - else - VIASH_PAR_REFERENCE_OBS_TARGETS="$VIASH_PAR_REFERENCE_OBS_TARGETS,"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output_obs_predictions) - if [ -z "$VIASH_PAR_OUTPUT_OBS_PREDICTIONS" ]; then - VIASH_PAR_OUTPUT_OBS_PREDICTIONS="$2" - else - VIASH_PAR_OUTPUT_OBS_PREDICTIONS="$VIASH_PAR_OUTPUT_OBS_PREDICTIONS:""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_obs_predictions. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_obs_predictions=*) - if [ -z "$VIASH_PAR_OUTPUT_OBS_PREDICTIONS" ]; then - VIASH_PAR_OUTPUT_OBS_PREDICTIONS=$(ViashRemoveFlags "$1") - else - VIASH_PAR_OUTPUT_OBS_PREDICTIONS="$VIASH_PAR_OUTPUT_OBS_PREDICTIONS:"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --output_obs_uncertainty) - if [ -z "$VIASH_PAR_OUTPUT_OBS_UNCERTAINTY" ]; then - VIASH_PAR_OUTPUT_OBS_UNCERTAINTY="$2" - else - VIASH_PAR_OUTPUT_OBS_UNCERTAINTY="$VIASH_PAR_OUTPUT_OBS_UNCERTAINTY:""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_obs_uncertainty. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_obs_uncertainty=*) - if [ -z "$VIASH_PAR_OUTPUT_OBS_UNCERTAINTY" ]; then - VIASH_PAR_OUTPUT_OBS_UNCERTAINTY=$(ViashRemoveFlags "$1") - else - VIASH_PAR_OUTPUT_OBS_UNCERTAINTY="$VIASH_PAR_OUTPUT_OBS_UNCERTAINTY:"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --output_uns_parameters) - [ -n "$VIASH_PAR_OUTPUT_UNS_PARAMETERS" ] && ViashError Bad arguments for option \'--output_uns_parameters\': \'$VIASH_PAR_OUTPUT_UNS_PARAMETERS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_UNS_PARAMETERS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_uns_parameters. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_uns_parameters=*) - [ -n "$VIASH_PAR_OUTPUT_UNS_PARAMETERS" ] && ViashError Bad arguments for option \'--output_uns_parameters=*\': \'$VIASH_PAR_OUTPUT_UNS_PARAMETERS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_UNS_PARAMETERS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --n_neighbors) - [ -n "$VIASH_PAR_N_NEIGHBORS" ] && ViashError Bad arguments for option \'--n_neighbors\': \'$VIASH_PAR_N_NEIGHBORS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_N_NEIGHBORS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --n_neighbors. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --n_neighbors=*) - [ -n "$VIASH_PAR_N_NEIGHBORS" ] && ViashError Bad arguments for option \'--n_neighbors=*\': \'$VIASH_PAR_N_NEIGHBORS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_N_NEIGHBORS=$(ViashRemoveFlags "$1") - shift 1 - ;; - -k) - [ -n "$VIASH_PAR_N_NEIGHBORS" ] && ViashError Bad arguments for option \'-k\': \'$VIASH_PAR_N_NEIGHBORS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_N_NEIGHBORS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -k. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_REFERENCE_OBSM_FEATURES+x} ]; then - ViashError '--reference_obsm_features' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_N_NEIGHBORS+x} ]; then - ViashError '--n_neighbors' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_MODALITY+x} ]; then - VIASH_PAR_MODALITY="rna" -fi -if [ -z ${VIASH_PAR_REFERENCE_OBS_TARGETS+x} ]; then - VIASH_PAR_REFERENCE_OBS_TARGETS="ann_level_1,ann_level_2,ann_level_3,ann_level_4,ann_level_5,ann_finest_level" -fi -if [ -z ${VIASH_PAR_OUTPUT_UNS_PARAMETERS+x} ]; then - VIASH_PAR_OUTPUT_UNS_PARAMETERS="labels_transfer" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_REFERENCE" ] && [ ! -e "$VIASH_PAR_REFERENCE" ]; then - ViashError "Input file '$VIASH_PAR_REFERENCE' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_N_NEIGHBORS" ]]; then - if ! [[ "$VIASH_PAR_N_NEIGHBORS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--n_neighbors' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -ViashDebug "Running command: bash" -cat << VIASHEOF | bash -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-knn-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -import sys -import warnings - -import mudata -import numpy as np -import scanpy as sc -from scipy.sparse import issparse -import pynndescent -import numba - - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'input_obsm_features': $( if [ ! -z ${VIASH_PAR_INPUT_OBSM_FEATURES+x} ]; then echo "r'${VIASH_PAR_INPUT_OBSM_FEATURES//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'reference': $( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "r'${VIASH_PAR_REFERENCE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'reference_obsm_features': $( if [ ! -z ${VIASH_PAR_REFERENCE_OBSM_FEATURES+x} ]; then echo "r'${VIASH_PAR_REFERENCE_OBSM_FEATURES//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'reference_obs_targets': $( if [ ! -z ${VIASH_PAR_REFERENCE_OBS_TARGETS+x} ]; then echo "r'${VIASH_PAR_REFERENCE_OBS_TARGETS//\'/\'\"\'\"r\'}'.split(',')"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_obs_predictions': $( if [ ! -z ${VIASH_PAR_OUTPUT_OBS_PREDICTIONS+x} ]; then echo "r'${VIASH_PAR_OUTPUT_OBS_PREDICTIONS//\'/\'\"\'\"r\'}'.split(':')"; else echo None; fi ), - 'output_obs_uncertainty': $( if [ ! -z ${VIASH_PAR_OUTPUT_OBS_UNCERTAINTY+x} ]; then echo "r'${VIASH_PAR_OUTPUT_OBS_UNCERTAINTY//\'/\'\"\'\"r\'}'.split(':')"; else echo None; fi ), - 'output_uns_parameters': $( if [ ! -z ${VIASH_PAR_OUTPUT_UNS_PARAMETERS+x} ]; then echo "r'${VIASH_PAR_OUTPUT_UNS_PARAMETERS//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'n_neighbors': $( if [ ! -z ${VIASH_PAR_N_NEIGHBORS+x} ]; then echo "int(r'${VIASH_PAR_N_NEIGHBORS//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -## VIASH END - -sys.path.append(meta["resources_dir"]) -from helper import check_arguments, get_reference_features, get_query_features -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger - -@numba.njit -def weighted_prediction(weights, ref_cats): - """Get highest weight category.""" - N = len(weights) - predictions = np.zeros((N,), dtype=ref_cats.dtype) - uncertainty = np.zeros((N,)) - for i in range(N): - obs_weights = weights[i] - obs_cats = ref_cats[i] - best_prob = 0 - for c in np.unique(obs_cats): - cand_prob = np.sum(obs_weights[obs_cats == c]) - if cand_prob > best_prob: - best_prob = cand_prob - predictions[i] = c - uncertainty[i] = max(1 - best_prob, 0) - - return predictions, uncertainty - -def distances_to_affinities(distances): - stds = np.std(distances, axis=1) - stds = (2.0 / stds) ** 2 - stds = stds.reshape(-1, 1) - distances_tilda = np.exp(-np.true_divide(distances, stds)) - - return distances_tilda / np.sum(distances_tilda, axis=1, keepdims=True) - -def main(par): - logger = setup_logger() - - logger.info("Checking arguments") - par = check_arguments(par) - - logger.info("Reading input (query) data") - mdata = mudata.read(par["input"]) - adata = mdata.mod[par["modality"]] - - logger.info("Reading reference data") - adata_reference = sc.read(par["reference"], backup_url=par["reference"]) - - # fetch feature data - train_data = get_reference_features(adata_reference, par, logger) - query_data = get_query_features(adata, par, logger) - - # pynndescent does not support sparse matrices - if issparse(train_data): - warnings.warn("Converting sparse matrix to dense. This may consume a lot of memory.") - train_data = train_data.toarray() - - logger.debug(f"Shape of train data: {train_data.shape}") - - logger.info("Building NN index") - ref_nn_index = pynndescent.NNDescent(train_data, n_neighbors=par["n_neighbors"]) - ref_nn_index.prepare() - - ref_neighbors, ref_distances = ref_nn_index.query(query_data, k=par["n_neighbors"]) - - weights = distances_to_affinities(ref_distances) - - output_uns_parameters = adata.uns.get(par["output_uns_parameters"], {}) - - # for each annotation level, get prediction and uncertainty - - for obs_tar, obs_pred, obs_unc in zip(par["reference_obs_targets"], par["output_obs_predictions"], par["output_obs_uncertainty"]): - logger.info(f"Predicting labels for {obs_tar}") - ref_cats = adata_reference.obs[obs_tar].cat.codes.to_numpy()[ref_neighbors] - prediction, uncertainty = weighted_prediction(weights, ref_cats) - prediction = np.asarray(adata_reference.obs[obs_tar].cat.categories)[prediction] - - adata.obs[obs_pred], adata.obs[obs_unc] = prediction, uncertainty - - # Write information about labels transfer to uns - output_uns_parameters[obs_tar] = { - "method": "KNN_pynndescent", - "n_neighbors": par["n_neighbors"], - "reference": par["reference"] - } - - adata.uns[par["output_uns_parameters"]] = output_uns_parameters - - mdata.mod[par['modality']] = adata - mdata.update() - mdata.write_h5mu(par['output'].strip()) - -if __name__ == "__main__": - main(par) -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/native/labels_transfer/knn/setup_logger.py b/target/native/labels_transfer/knn/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/native/labels_transfer/knn/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/native/labels_transfer/xgboost/.config.vsh.yaml b/target/native/labels_transfer/xgboost/.config.vsh.yaml deleted file mode 100644 index 73880630f3e..00000000000 --- a/target/native/labels_transfer/xgboost/.config.vsh.yaml +++ /dev/null @@ -1,594 +0,0 @@ -functionality: - name: "xgboost" - namespace: "labels_transfer" - version: "0.12.3" - authors: - - name: "Vladimir Shitov" - roles: - - "author" - info: - role: "Contributor" - links: - email: "vladimir.shitov@helmholtz-muenchen.de" - github: "vladimirshitov" - orcid: "0000-0002-1960-8812" - linkedin: "vladimir-shitov-9a659513b" - organizations: - - name: "Helmholtz Munich" - href: "https://www.helmholtz-munich.de" - role: "PhD Candidate" - argument_groups: - - name: "Input dataset (query) arguments" - arguments: - - type: "file" - name: "--input" - description: "The query data to transfer the labels to. Should be a .h5mu file." - info: - label: "Query" - file_format: - type: "h5mu" - mod: - rna: - description: "Modality in AnnData format containing RNA data." - required: true - slots: - X: - type: "double" - name: "features" - required: false - description: "The expression data to use for the classifier's inference,\ - \ if `--input_obsm_features` argument is not provided.\n" - obsm: - - type: "double" - name: "features" - example: "X_integrated_scanvi" - required: false - description: "The embedding to use for the classifier's inference.\ - \ Override using the `--input_obsm_features` argument. If not\ - \ provided, the `.X` slot will be used instead.\nMake sure that\ - \ embedding was obtained in the same way as the reference embedding\ - \ (e.g. by the same model or preprocessing).\n" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - description: "Which modality to use." - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--input_obsm_features" - description: "The `.obsm` key of the embedding to use for the classifier's inference.\ - \ If not provided, the `.X` slot will be used instead.\nMake sure that embedding\ - \ was obtained in the same way as the reference embedding (e.g. by the same\ - \ model or preprocessing).\n" - info: null - example: - - "X_integrated_scanvi" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Reference dataset arguments" - arguments: - - type: "file" - name: "--reference" - description: "The reference data to train classifiers on." - info: - label: "Reference" - file_format: - type: "h5ad" - X: - type: "double" - name: "features" - required: false - description: "The expression data to use for the classifier's training,\ - \ if `--input_obsm_features` argument is not provided.\n" - obsm: - - type: "double" - name: "features" - example: "X_integrated_scanvi" - description: "The embedding to use for the classifier's training. Override\ - \ using the `--reference_obsm_features` argument.\nMake sure that embedding\ - \ was obtained in the same way as the query embedding (e.g. by the same\ - \ model or preprocessing).\n" - required: true - obs: - - type: "string" - name: "targets" - multiple: true - example: - - "ann_level_1" - - "ann_level_2" - - "ann_level_3" - - "ann_level_4" - - "ann_level_5" - - "ann_finest_level" - description: "The target labels to transfer. Override using the `--reference_obs_targets`\ - \ argument." - required: true - example: - - "https:/zenodo.org/record/6337966/files/HLCA_emb_and_metadata.h5ad" - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--reference_obsm_features" - description: "The `.obsm` key of the embedding to use for the classifier's training.\n\ - Make sure that embedding was obtained in the same way as the query embedding\ - \ (e.g. by the same model or preprocessing).\n" - info: null - default: - - "X_integrated_scanvi" - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--reference_obs_targets" - description: "The `.obs` key of the target labels to tranfer." - info: null - default: - - "ann_level_1" - - "ann_level_2" - - "ann_level_3" - - "ann_level_4" - - "ann_level_5" - - "ann_finest_level" - required: false - direction: "input" - multiple: true - multiple_sep: "," - dest: "par" - - name: "Outputs" - arguments: - - type: "file" - name: "--output" - description: "The query data in .h5mu format with predicted labels transfered\ - \ from the reference." - info: - label: "Output data" - file_format: - type: "h5mu" - mod: - rna: - description: "Modality in AnnData format containing RNA data." - required: true - obs: - - type: "string" - name: "predictions" - description: "The predicted labels. Override using the `--output_obs_predictions`\ - \ argument." - required: true - - type: "double" - name: "uncertainty" - description: "The uncertainty of the predicted labels. Override using\ - \ the `--output_obs_uncertainty` argument." - required: false - obsm: - - type: "double" - name: "X_integrated_scanvi" - description: "The embedding used for the classifier's inference. Could\ - \ have any name, specified by `input_obsm_features` argument.\"" - required: false - uns: - - type: "string" - name: "parameters" - example: "labels_tranfer" - description: "Additional information about the parameters used for\ - \ the label transfer." - required: true - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_obs_predictions" - description: "In which `.obs` slots to store the predicted information.\nIf\ - \ provided, must have the same length as `--reference_obs_targets`.\nIf empty,\ - \ will default to the `reference_obs_targets` combined with the `\"_pred\"\ - ` suffix.\n" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_obs_uncertainty" - description: "In which `.obs` slots to store the uncertainty of the predictions.\n\ - If provided, must have the same length as `--reference_obs_targets`.\nIf empty,\ - \ will default to the `reference_obs_targets` combined with the `\"_uncertainty\"\ - ` suffix.\n" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_uns_parameters" - description: "The `.uns` key to store additional information about the parameters\ - \ used for the label transfer." - info: null - default: - - "labels_transfer" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Execution arguments" - arguments: - - type: "boolean_true" - name: "--force_retrain" - alternatives: - - "-f" - description: "Retrain models on the reference even if model_output directory\ - \ already has trained classifiers. WARNING! It will rewrite existing classifiers\ - \ for targets in the model_output directory!" - info: null - direction: "input" - dest: "par" - - type: "boolean" - name: "--use_gpu" - description: "Use GPU during models training and inference (recommended)." - info: null - default: - - false - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--verbosity" - alternatives: - - "-v" - description: "The verbosity level for evaluation of the classifier from the\ - \ range [0,2]" - info: null - default: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--model_output" - description: "Output directory for model" - info: null - default: - - "model" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Learning parameters" - arguments: - - type: "double" - name: "--learning_rate" - alternatives: - - "--eta" - description: "Step size shrinkage used in update to prevents overfitting. Range:\ - \ [0,1]. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ - \ for the reference" - info: null - default: - - 0.3 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--min_split_loss" - alternatives: - - "--gamma" - description: "Minimum loss reduction required to make a further partition on\ - \ a leaf node of the tree. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ - \ for the reference" - info: null - default: - - 0.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--max_depth" - alternatives: - - "-d" - description: "Maximum depth of a tree. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ - \ for the reference" - info: null - default: - - 6 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--min_child_weight" - description: "Minimum sum of instance weight (hessian) needed in a child. See\ - \ https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ - \ for the reference" - info: null - default: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--max_delta_step" - description: "Maximum delta step we allow each leaf output to be. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ - \ for the reference" - info: null - default: - - 0.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--subsample" - description: "Subsample ratio of the training instances. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ - \ for the reference" - info: null - default: - - 1.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--sampling_method" - description: "The method to use to sample the training instances. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ - \ for the reference" - info: null - default: - - "uniform" - required: false - choices: - - "uniform" - - "gradient_based" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--colsample_bytree" - description: "Fraction of columns to be subsampled. Range (0, 1]. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ - \ for the reference" - info: null - default: - - 1.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--colsample_bylevel" - description: "Subsample ratio of columns for each level. Range (0, 1]. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ - \ for the reference" - info: null - default: - - 1.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--colsample_bynode" - description: "Subsample ratio of columns for each node (split). Range (0, 1].\ - \ See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ - \ for the reference" - info: null - default: - - 1.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--reg_lambda" - alternatives: - - "--lambda" - description: "L2 regularization term on weights. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ - \ for the reference" - info: null - default: - - 1.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--reg_alpha" - alternatives: - - "--alpha" - description: "L1 regularization term on weights. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ - \ for the reference" - info: null - default: - - 0.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--scale_pos_weight" - description: "Control the balance of positive and negative weights, useful for\ - \ unbalanced classes. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ - \ for the reference" - info: null - default: - - 1.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "../utils/helper.py" - - type: "file" - path: "src/utils/setup_logger.py" - description: "Performs label transfer from reference to query using XGBoost classifier" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/annotation_test_data/" - - type: "file" - path: "resources_test/pbmc_1k_protein_v3/" - info: - method_id: "XGBClassifier" - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - - "git" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - upgrade: true - - type: "apt" - packages: - - "libopenblas-dev" - - "liblapack-dev" - - "gfortran" - interactive: false - - type: "python" - user: false - packages: - - "scanpy~=1.9.5" - - "xgboost~=1.7.1" - - "scikit-learn~=1.1.1" - - "numpy~=1.23.5" - - "pandas~=1.4.4" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highmem" - - "highcpu" - - "gpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -- type: "native" - id: "native" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/labels_transfer/xgboost/config.vsh.yaml" - platform: "native" - output: "/home/runner/work/openpipeline/openpipeline/target/native/labels_transfer/xgboost" - executable: "/home/runner/work/openpipeline/openpipeline/target/native/labels_transfer/xgboost/xgboost" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/native/labels_transfer/xgboost/helper.py b/target/native/labels_transfer/xgboost/helper.py deleted file mode 100644 index a90bf59efdb..00000000000 --- a/target/native/labels_transfer/xgboost/helper.py +++ /dev/null @@ -1,32 +0,0 @@ -def check_arguments(par): - # check output .obs predictions - if not par["output_obs_predictions"]: - par["output_obs_predictions"] = [ t + "_pred" for t in par["reference_obs_targets"]] - assert len(par["output_obs_predictions"]) == len(par["reference_obs_targets"]), f"Number of output_obs_predictions must match number of reference_obs_targets\npar: {par}" - - # check output .obs uncertainty - if not par["output_obs_uncertainty"]: - par["output_obs_uncertainty"] = [ t + "_uncertainty" for t in par["reference_obs_targets"]] - assert len(par["output_obs_uncertainty"]) == len(par["reference_obs_targets"]), f"Number of output_obs_uncertainty must match number of reference_obs_targets\npar: {par}" - - return par - -def get_reference_features(adata_reference, par, logger): - if par["reference_obsm_features"] is None: - logger.info("Using .X of reference data") - train_data = adata_reference.X - else: - logger.info(f"Using .obsm[{par['reference_obsm_features']}] of reference data") - train_data = adata_reference.obsm[par["reference_obsm_features"]] - - return train_data - -def get_query_features(adata, par, logger): - if par["input_obsm_features"] is None: - logger.info("Using .X of query data") - query_data = adata.X - else: - logger.info(f"Using .obsm[{par['input_obsm_features']}] of query data") - query_data = adata.obsm[par["input_obsm_features"]] - - return query_data \ No newline at end of file diff --git a/target/native/labels_transfer/xgboost/setup_logger.py b/target/native/labels_transfer/xgboost/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/native/labels_transfer/xgboost/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/native/labels_transfer/xgboost/xgboost b/target/native/labels_transfer/xgboost/xgboost deleted file mode 100755 index b71872e8cca..00000000000 --- a/target/native/labels_transfer/xgboost/xgboost +++ /dev/null @@ -1,1520 +0,0 @@ -#!/usr/bin/env bash - -# xgboost 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Vladimir Shitov (author) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="xgboost" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "xgboost 0.12.3" - echo "" - echo "Performs label transfer from reference to query using XGBoost classifier" - echo "" - echo "Input dataset (query) arguments:" - echo " --input" - echo " type: file, required parameter, file must exist" - echo " The query data to transfer the labels to. Should be a .h5mu file." - echo "" - echo " --modality" - echo " type: string" - echo " default: rna" - echo " Which modality to use." - echo "" - echo " --input_obsm_features" - echo " type: string" - echo " example: X_integrated_scanvi" - echo " The \`.obsm\` key of the embedding to use for the classifier's inference." - echo " If not provided, the \`.X\` slot will be used instead." - echo " Make sure that embedding was obtained in the same way as the reference" - echo " embedding (e.g. by the same model or preprocessing)." - echo "" - echo "Reference dataset arguments:" - echo " --reference" - echo " type: file, file must exist" - echo " example:" - echo "https:/zenodo.org/record/6337966/files/HLCA_emb_and_metadata.h5ad" - echo " The reference data to train classifiers on." - echo "" - echo " --reference_obsm_features" - echo " type: string, required parameter" - echo " default: X_integrated_scanvi" - echo " The \`.obsm\` key of the embedding to use for the classifier's training." - echo " Make sure that embedding was obtained in the same way as the query" - echo " embedding (e.g. by the same model or preprocessing)." - echo "" - echo " --reference_obs_targets" - echo " type: string, multiple values allowed" - echo " default:" - echo "ann_level_1,ann_level_2,ann_level_3,ann_level_4,ann_level_5,ann_finest_level" - echo " The \`.obs\` key of the target labels to tranfer." - echo "" - echo "Outputs:" - echo " --output" - echo " type: file, required parameter, output, file must exist" - echo " The query data in .h5mu format with predicted labels transfered from the" - echo " reference." - echo "" - echo " --output_obs_predictions" - echo " type: string, multiple values allowed" - echo " In which \`.obs\` slots to store the predicted information." - echo " If provided, must have the same length as \`--reference_obs_targets\`." - echo " If empty, will default to the \`reference_obs_targets\` combined with the" - echo " \`\"_pred\"\` suffix." - echo "" - echo " --output_obs_uncertainty" - echo " type: string, multiple values allowed" - echo " In which \`.obs\` slots to store the uncertainty of the predictions." - echo " If provided, must have the same length as \`--reference_obs_targets\`." - echo " If empty, will default to the \`reference_obs_targets\` combined with the" - echo " \`\"_uncertainty\"\` suffix." - echo "" - echo " --output_uns_parameters" - echo " type: string" - echo " default: labels_transfer" - echo " The \`.uns\` key to store additional information about the parameters used" - echo " for the label transfer." - echo "" - echo "Execution arguments:" - echo " -f, --force_retrain" - echo " type: boolean_true" - echo " Retrain models on the reference even if model_output directory already" - echo " has trained classifiers. WARNING! It will rewrite existing classifiers" - echo " for targets in the model_output directory!" - echo "" - echo " --use_gpu" - echo " type: boolean" - echo " default: false" - echo " Use GPU during models training and inference (recommended)." - echo "" - echo " -v, --verbosity" - echo " type: integer" - echo " default: 1" - echo " The verbosity level for evaluation of the classifier from the range" - echo " [0,2]" - echo "" - echo " --model_output" - echo " type: file, output, file must exist" - echo " default: model" - echo " Output directory for model" - echo "" - echo "Learning parameters:" - echo " --eta, --learning_rate" - echo " type: double" - echo " default: 0.3" - echo " Step size shrinkage used in update to prevents overfitting. Range:" - echo " [0,1]. See" - echo " " - echo "https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster" - echo " for the reference" - echo "" - echo " --gamma, --min_split_loss" - echo " type: double" - echo " default: 0.0" - echo " Minimum loss reduction required to make a further partition on a leaf" - echo " node of the tree. See" - echo " " - echo "https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster" - echo " for the reference" - echo "" - echo " -d, --max_depth" - echo " type: integer" - echo " default: 6" - echo " Maximum depth of a tree. See" - echo " " - echo "https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster" - echo " for the reference" - echo "" - echo " --min_child_weight" - echo " type: integer" - echo " default: 1" - echo " Minimum sum of instance weight (hessian) needed in a child. See" - echo " " - echo "https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster" - echo " for the reference" - echo "" - echo " --max_delta_step" - echo " type: double" - echo " default: 0.0" - echo " Maximum delta step we allow each leaf output to be. See" - echo " " - echo "https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster" - echo " for the reference" - echo "" - echo " --subsample" - echo " type: double" - echo " default: 1.0" - echo " Subsample ratio of the training instances. See" - echo " " - echo "https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster" - echo " for the reference" - echo "" - echo " --sampling_method" - echo " type: string" - echo " default: uniform" - echo " choices: [ uniform, gradient_based ]" - echo " The method to use to sample the training instances. See" - echo " " - echo "https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster" - echo " for the reference" - echo "" - echo " --colsample_bytree" - echo " type: double" - echo " default: 1.0" - echo " Fraction of columns to be subsampled. Range (0, 1]. See" - echo " " - echo "https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster" - echo " for the reference" - echo "" - echo " --colsample_bylevel" - echo " type: double" - echo " default: 1.0" - echo " Subsample ratio of columns for each level. Range (0, 1]. See" - echo " " - echo "https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster" - echo " for the reference" - echo "" - echo " --colsample_bynode" - echo " type: double" - echo " default: 1.0" - echo " Subsample ratio of columns for each node (split). Range (0, 1]. See" - echo " " - echo "https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster" - echo " for the reference" - echo "" - echo " --lambda, --reg_lambda" - echo " type: double" - echo " default: 1.0" - echo " L2 regularization term on weights. See" - echo " " - echo "https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster" - echo " for the reference" - echo "" - echo " --alpha, --reg_alpha" - echo " type: double" - echo " default: 0.0" - echo " L1 regularization term on weights. See" - echo " " - echo "https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster" - echo " for the reference" - echo "" - echo " --scale_pos_weight" - echo " type: double" - echo " default: 1.0" - echo " Control the balance of positive and negative weights, useful for" - echo " unbalanced classes. See" - echo " " - echo "https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster" - echo " for the reference" -} - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "xgboost 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --modality) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality=*) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --input_obsm_features) - [ -n "$VIASH_PAR_INPUT_OBSM_FEATURES" ] && ViashError Bad arguments for option \'--input_obsm_features\': \'$VIASH_PAR_INPUT_OBSM_FEATURES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT_OBSM_FEATURES="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_obsm_features. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input_obsm_features=*) - [ -n "$VIASH_PAR_INPUT_OBSM_FEATURES" ] && ViashError Bad arguments for option \'--input_obsm_features=*\': \'$VIASH_PAR_INPUT_OBSM_FEATURES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT_OBSM_FEATURES=$(ViashRemoveFlags "$1") - shift 1 - ;; - --reference) - [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reference=*) - [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference=*\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --reference_obsm_features) - [ -n "$VIASH_PAR_REFERENCE_OBSM_FEATURES" ] && ViashError Bad arguments for option \'--reference_obsm_features\': \'$VIASH_PAR_REFERENCE_OBSM_FEATURES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE_OBSM_FEATURES="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference_obsm_features. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reference_obsm_features=*) - [ -n "$VIASH_PAR_REFERENCE_OBSM_FEATURES" ] && ViashError Bad arguments for option \'--reference_obsm_features=*\': \'$VIASH_PAR_REFERENCE_OBSM_FEATURES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFERENCE_OBSM_FEATURES=$(ViashRemoveFlags "$1") - shift 1 - ;; - --reference_obs_targets) - if [ -z "$VIASH_PAR_REFERENCE_OBS_TARGETS" ]; then - VIASH_PAR_REFERENCE_OBS_TARGETS="$2" - else - VIASH_PAR_REFERENCE_OBS_TARGETS="$VIASH_PAR_REFERENCE_OBS_TARGETS,""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference_obs_targets. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reference_obs_targets=*) - if [ -z "$VIASH_PAR_REFERENCE_OBS_TARGETS" ]; then - VIASH_PAR_REFERENCE_OBS_TARGETS=$(ViashRemoveFlags "$1") - else - VIASH_PAR_REFERENCE_OBS_TARGETS="$VIASH_PAR_REFERENCE_OBS_TARGETS,"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output_obs_predictions) - if [ -z "$VIASH_PAR_OUTPUT_OBS_PREDICTIONS" ]; then - VIASH_PAR_OUTPUT_OBS_PREDICTIONS="$2" - else - VIASH_PAR_OUTPUT_OBS_PREDICTIONS="$VIASH_PAR_OUTPUT_OBS_PREDICTIONS:""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_obs_predictions. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_obs_predictions=*) - if [ -z "$VIASH_PAR_OUTPUT_OBS_PREDICTIONS" ]; then - VIASH_PAR_OUTPUT_OBS_PREDICTIONS=$(ViashRemoveFlags "$1") - else - VIASH_PAR_OUTPUT_OBS_PREDICTIONS="$VIASH_PAR_OUTPUT_OBS_PREDICTIONS:"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --output_obs_uncertainty) - if [ -z "$VIASH_PAR_OUTPUT_OBS_UNCERTAINTY" ]; then - VIASH_PAR_OUTPUT_OBS_UNCERTAINTY="$2" - else - VIASH_PAR_OUTPUT_OBS_UNCERTAINTY="$VIASH_PAR_OUTPUT_OBS_UNCERTAINTY:""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_obs_uncertainty. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_obs_uncertainty=*) - if [ -z "$VIASH_PAR_OUTPUT_OBS_UNCERTAINTY" ]; then - VIASH_PAR_OUTPUT_OBS_UNCERTAINTY=$(ViashRemoveFlags "$1") - else - VIASH_PAR_OUTPUT_OBS_UNCERTAINTY="$VIASH_PAR_OUTPUT_OBS_UNCERTAINTY:"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --output_uns_parameters) - [ -n "$VIASH_PAR_OUTPUT_UNS_PARAMETERS" ] && ViashError Bad arguments for option \'--output_uns_parameters\': \'$VIASH_PAR_OUTPUT_UNS_PARAMETERS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_UNS_PARAMETERS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_uns_parameters. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_uns_parameters=*) - [ -n "$VIASH_PAR_OUTPUT_UNS_PARAMETERS" ] && ViashError Bad arguments for option \'--output_uns_parameters=*\': \'$VIASH_PAR_OUTPUT_UNS_PARAMETERS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_UNS_PARAMETERS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --force_retrain) - [ -n "$VIASH_PAR_FORCE_RETRAIN" ] && ViashError Bad arguments for option \'--force_retrain\': \'$VIASH_PAR_FORCE_RETRAIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_FORCE_RETRAIN=true - shift 1 - ;; - -f) - [ -n "$VIASH_PAR_FORCE_RETRAIN" ] && ViashError Bad arguments for option \'-f\': \'$VIASH_PAR_FORCE_RETRAIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_FORCE_RETRAIN=true - shift 1 - ;; - --use_gpu) - [ -n "$VIASH_PAR_USE_GPU" ] && ViashError Bad arguments for option \'--use_gpu\': \'$VIASH_PAR_USE_GPU\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_USE_GPU="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --use_gpu. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --use_gpu=*) - [ -n "$VIASH_PAR_USE_GPU" ] && ViashError Bad arguments for option \'--use_gpu=*\': \'$VIASH_PAR_USE_GPU\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_USE_GPU=$(ViashRemoveFlags "$1") - shift 1 - ;; - --verbosity) - [ -n "$VIASH_PAR_VERBOSITY" ] && ViashError Bad arguments for option \'--verbosity\': \'$VIASH_PAR_VERBOSITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_VERBOSITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --verbosity. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --verbosity=*) - [ -n "$VIASH_PAR_VERBOSITY" ] && ViashError Bad arguments for option \'--verbosity=*\': \'$VIASH_PAR_VERBOSITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_VERBOSITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - -v) - [ -n "$VIASH_PAR_VERBOSITY" ] && ViashError Bad arguments for option \'-v\': \'$VIASH_PAR_VERBOSITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_VERBOSITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -v. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --model_output) - [ -n "$VIASH_PAR_MODEL_OUTPUT" ] && ViashError Bad arguments for option \'--model_output\': \'$VIASH_PAR_MODEL_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODEL_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --model_output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --model_output=*) - [ -n "$VIASH_PAR_MODEL_OUTPUT" ] && ViashError Bad arguments for option \'--model_output=*\': \'$VIASH_PAR_MODEL_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODEL_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --learning_rate) - [ -n "$VIASH_PAR_LEARNING_RATE" ] && ViashError Bad arguments for option \'--learning_rate\': \'$VIASH_PAR_LEARNING_RATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LEARNING_RATE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --learning_rate. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --learning_rate=*) - [ -n "$VIASH_PAR_LEARNING_RATE" ] && ViashError Bad arguments for option \'--learning_rate=*\': \'$VIASH_PAR_LEARNING_RATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LEARNING_RATE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --eta) - [ -n "$VIASH_PAR_LEARNING_RATE" ] && ViashError Bad arguments for option \'--eta\': \'$VIASH_PAR_LEARNING_RATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LEARNING_RATE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --eta. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --min_split_loss) - [ -n "$VIASH_PAR_MIN_SPLIT_LOSS" ] && ViashError Bad arguments for option \'--min_split_loss\': \'$VIASH_PAR_MIN_SPLIT_LOSS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_SPLIT_LOSS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_split_loss. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --min_split_loss=*) - [ -n "$VIASH_PAR_MIN_SPLIT_LOSS" ] && ViashError Bad arguments for option \'--min_split_loss=*\': \'$VIASH_PAR_MIN_SPLIT_LOSS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_SPLIT_LOSS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --gamma) - [ -n "$VIASH_PAR_MIN_SPLIT_LOSS" ] && ViashError Bad arguments for option \'--gamma\': \'$VIASH_PAR_MIN_SPLIT_LOSS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_SPLIT_LOSS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --gamma. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --max_depth) - [ -n "$VIASH_PAR_MAX_DEPTH" ] && ViashError Bad arguments for option \'--max_depth\': \'$VIASH_PAR_MAX_DEPTH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAX_DEPTH="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --max_depth. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --max_depth=*) - [ -n "$VIASH_PAR_MAX_DEPTH" ] && ViashError Bad arguments for option \'--max_depth=*\': \'$VIASH_PAR_MAX_DEPTH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAX_DEPTH=$(ViashRemoveFlags "$1") - shift 1 - ;; - -d) - [ -n "$VIASH_PAR_MAX_DEPTH" ] && ViashError Bad arguments for option \'-d\': \'$VIASH_PAR_MAX_DEPTH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAX_DEPTH="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -d. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --min_child_weight) - [ -n "$VIASH_PAR_MIN_CHILD_WEIGHT" ] && ViashError Bad arguments for option \'--min_child_weight\': \'$VIASH_PAR_MIN_CHILD_WEIGHT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_CHILD_WEIGHT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_child_weight. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --min_child_weight=*) - [ -n "$VIASH_PAR_MIN_CHILD_WEIGHT" ] && ViashError Bad arguments for option \'--min_child_weight=*\': \'$VIASH_PAR_MIN_CHILD_WEIGHT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_CHILD_WEIGHT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --max_delta_step) - [ -n "$VIASH_PAR_MAX_DELTA_STEP" ] && ViashError Bad arguments for option \'--max_delta_step\': \'$VIASH_PAR_MAX_DELTA_STEP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAX_DELTA_STEP="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --max_delta_step. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --max_delta_step=*) - [ -n "$VIASH_PAR_MAX_DELTA_STEP" ] && ViashError Bad arguments for option \'--max_delta_step=*\': \'$VIASH_PAR_MAX_DELTA_STEP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAX_DELTA_STEP=$(ViashRemoveFlags "$1") - shift 1 - ;; - --subsample) - [ -n "$VIASH_PAR_SUBSAMPLE" ] && ViashError Bad arguments for option \'--subsample\': \'$VIASH_PAR_SUBSAMPLE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SUBSAMPLE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --subsample. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --subsample=*) - [ -n "$VIASH_PAR_SUBSAMPLE" ] && ViashError Bad arguments for option \'--subsample=*\': \'$VIASH_PAR_SUBSAMPLE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SUBSAMPLE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --sampling_method) - [ -n "$VIASH_PAR_SAMPLING_METHOD" ] && ViashError Bad arguments for option \'--sampling_method\': \'$VIASH_PAR_SAMPLING_METHOD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SAMPLING_METHOD="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --sampling_method. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --sampling_method=*) - [ -n "$VIASH_PAR_SAMPLING_METHOD" ] && ViashError Bad arguments for option \'--sampling_method=*\': \'$VIASH_PAR_SAMPLING_METHOD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SAMPLING_METHOD=$(ViashRemoveFlags "$1") - shift 1 - ;; - --colsample_bytree) - [ -n "$VIASH_PAR_COLSAMPLE_BYTREE" ] && ViashError Bad arguments for option \'--colsample_bytree\': \'$VIASH_PAR_COLSAMPLE_BYTREE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_COLSAMPLE_BYTREE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --colsample_bytree. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --colsample_bytree=*) - [ -n "$VIASH_PAR_COLSAMPLE_BYTREE" ] && ViashError Bad arguments for option \'--colsample_bytree=*\': \'$VIASH_PAR_COLSAMPLE_BYTREE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_COLSAMPLE_BYTREE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --colsample_bylevel) - [ -n "$VIASH_PAR_COLSAMPLE_BYLEVEL" ] && ViashError Bad arguments for option \'--colsample_bylevel\': \'$VIASH_PAR_COLSAMPLE_BYLEVEL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_COLSAMPLE_BYLEVEL="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --colsample_bylevel. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --colsample_bylevel=*) - [ -n "$VIASH_PAR_COLSAMPLE_BYLEVEL" ] && ViashError Bad arguments for option \'--colsample_bylevel=*\': \'$VIASH_PAR_COLSAMPLE_BYLEVEL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_COLSAMPLE_BYLEVEL=$(ViashRemoveFlags "$1") - shift 1 - ;; - --colsample_bynode) - [ -n "$VIASH_PAR_COLSAMPLE_BYNODE" ] && ViashError Bad arguments for option \'--colsample_bynode\': \'$VIASH_PAR_COLSAMPLE_BYNODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_COLSAMPLE_BYNODE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --colsample_bynode. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --colsample_bynode=*) - [ -n "$VIASH_PAR_COLSAMPLE_BYNODE" ] && ViashError Bad arguments for option \'--colsample_bynode=*\': \'$VIASH_PAR_COLSAMPLE_BYNODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_COLSAMPLE_BYNODE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --reg_lambda) - [ -n "$VIASH_PAR_REG_LAMBDA" ] && ViashError Bad arguments for option \'--reg_lambda\': \'$VIASH_PAR_REG_LAMBDA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REG_LAMBDA="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --reg_lambda. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reg_lambda=*) - [ -n "$VIASH_PAR_REG_LAMBDA" ] && ViashError Bad arguments for option \'--reg_lambda=*\': \'$VIASH_PAR_REG_LAMBDA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REG_LAMBDA=$(ViashRemoveFlags "$1") - shift 1 - ;; - --lambda) - [ -n "$VIASH_PAR_REG_LAMBDA" ] && ViashError Bad arguments for option \'--lambda\': \'$VIASH_PAR_REG_LAMBDA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REG_LAMBDA="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --lambda. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reg_alpha) - [ -n "$VIASH_PAR_REG_ALPHA" ] && ViashError Bad arguments for option \'--reg_alpha\': \'$VIASH_PAR_REG_ALPHA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REG_ALPHA="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --reg_alpha. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --reg_alpha=*) - [ -n "$VIASH_PAR_REG_ALPHA" ] && ViashError Bad arguments for option \'--reg_alpha=*\': \'$VIASH_PAR_REG_ALPHA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REG_ALPHA=$(ViashRemoveFlags "$1") - shift 1 - ;; - --alpha) - [ -n "$VIASH_PAR_REG_ALPHA" ] && ViashError Bad arguments for option \'--alpha\': \'$VIASH_PAR_REG_ALPHA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REG_ALPHA="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --alpha. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --scale_pos_weight) - [ -n "$VIASH_PAR_SCALE_POS_WEIGHT" ] && ViashError Bad arguments for option \'--scale_pos_weight\': \'$VIASH_PAR_SCALE_POS_WEIGHT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCALE_POS_WEIGHT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --scale_pos_weight. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --scale_pos_weight=*) - [ -n "$VIASH_PAR_SCALE_POS_WEIGHT" ] && ViashError Bad arguments for option \'--scale_pos_weight=*\': \'$VIASH_PAR_SCALE_POS_WEIGHT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SCALE_POS_WEIGHT=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_REFERENCE_OBSM_FEATURES+x} ]; then - ViashError '--reference_obsm_features' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_MODALITY+x} ]; then - VIASH_PAR_MODALITY="rna" -fi -if [ -z ${VIASH_PAR_REFERENCE_OBS_TARGETS+x} ]; then - VIASH_PAR_REFERENCE_OBS_TARGETS="ann_level_1,ann_level_2,ann_level_3,ann_level_4,ann_level_5,ann_finest_level" -fi -if [ -z ${VIASH_PAR_OUTPUT_UNS_PARAMETERS+x} ]; then - VIASH_PAR_OUTPUT_UNS_PARAMETERS="labels_transfer" -fi -if [ -z ${VIASH_PAR_FORCE_RETRAIN+x} ]; then - VIASH_PAR_FORCE_RETRAIN="false" -fi -if [ -z ${VIASH_PAR_USE_GPU+x} ]; then - VIASH_PAR_USE_GPU="false" -fi -if [ -z ${VIASH_PAR_VERBOSITY+x} ]; then - VIASH_PAR_VERBOSITY="1" -fi -if [ -z ${VIASH_PAR_MODEL_OUTPUT+x} ]; then - VIASH_PAR_MODEL_OUTPUT="model" -fi -if [ -z ${VIASH_PAR_LEARNING_RATE+x} ]; then - VIASH_PAR_LEARNING_RATE="0.3" -fi -if [ -z ${VIASH_PAR_MIN_SPLIT_LOSS+x} ]; then - VIASH_PAR_MIN_SPLIT_LOSS="0.0" -fi -if [ -z ${VIASH_PAR_MAX_DEPTH+x} ]; then - VIASH_PAR_MAX_DEPTH="6" -fi -if [ -z ${VIASH_PAR_MIN_CHILD_WEIGHT+x} ]; then - VIASH_PAR_MIN_CHILD_WEIGHT="1" -fi -if [ -z ${VIASH_PAR_MAX_DELTA_STEP+x} ]; then - VIASH_PAR_MAX_DELTA_STEP="0.0" -fi -if [ -z ${VIASH_PAR_SUBSAMPLE+x} ]; then - VIASH_PAR_SUBSAMPLE="1.0" -fi -if [ -z ${VIASH_PAR_SAMPLING_METHOD+x} ]; then - VIASH_PAR_SAMPLING_METHOD="uniform" -fi -if [ -z ${VIASH_PAR_COLSAMPLE_BYTREE+x} ]; then - VIASH_PAR_COLSAMPLE_BYTREE="1.0" -fi -if [ -z ${VIASH_PAR_COLSAMPLE_BYLEVEL+x} ]; then - VIASH_PAR_COLSAMPLE_BYLEVEL="1.0" -fi -if [ -z ${VIASH_PAR_COLSAMPLE_BYNODE+x} ]; then - VIASH_PAR_COLSAMPLE_BYNODE="1.0" -fi -if [ -z ${VIASH_PAR_REG_LAMBDA+x} ]; then - VIASH_PAR_REG_LAMBDA="1.0" -fi -if [ -z ${VIASH_PAR_REG_ALPHA+x} ]; then - VIASH_PAR_REG_ALPHA="0.0" -fi -if [ -z ${VIASH_PAR_SCALE_POS_WEIGHT+x} ]; then - VIASH_PAR_SCALE_POS_WEIGHT="1.0" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_REFERENCE" ] && [ ! -e "$VIASH_PAR_REFERENCE" ]; then - ViashError "Input file '$VIASH_PAR_REFERENCE' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_FORCE_RETRAIN" ]]; then - if ! [[ "$VIASH_PAR_FORCE_RETRAIN" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--force_retrain' has to be a boolean_true. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_USE_GPU" ]]; then - if ! [[ "$VIASH_PAR_USE_GPU" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--use_gpu' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_VERBOSITY" ]]; then - if ! [[ "$VIASH_PAR_VERBOSITY" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--verbosity' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_LEARNING_RATE" ]]; then - if ! [[ "$VIASH_PAR_LEARNING_RATE" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--learning_rate' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_MIN_SPLIT_LOSS" ]]; then - if ! [[ "$VIASH_PAR_MIN_SPLIT_LOSS" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--min_split_loss' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_MAX_DEPTH" ]]; then - if ! [[ "$VIASH_PAR_MAX_DEPTH" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--max_depth' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_MIN_CHILD_WEIGHT" ]]; then - if ! [[ "$VIASH_PAR_MIN_CHILD_WEIGHT" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--min_child_weight' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_MAX_DELTA_STEP" ]]; then - if ! [[ "$VIASH_PAR_MAX_DELTA_STEP" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--max_delta_step' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SUBSAMPLE" ]]; then - if ! [[ "$VIASH_PAR_SUBSAMPLE" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--subsample' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_COLSAMPLE_BYTREE" ]]; then - if ! [[ "$VIASH_PAR_COLSAMPLE_BYTREE" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--colsample_bytree' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_COLSAMPLE_BYLEVEL" ]]; then - if ! [[ "$VIASH_PAR_COLSAMPLE_BYLEVEL" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--colsample_bylevel' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_COLSAMPLE_BYNODE" ]]; then - if ! [[ "$VIASH_PAR_COLSAMPLE_BYNODE" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--colsample_bynode' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_REG_LAMBDA" ]]; then - if ! [[ "$VIASH_PAR_REG_LAMBDA" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--reg_lambda' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_REG_ALPHA" ]]; then - if ! [[ "$VIASH_PAR_REG_ALPHA" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--reg_alpha' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SCALE_POS_WEIGHT" ]]; then - if ! [[ "$VIASH_PAR_SCALE_POS_WEIGHT" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--scale_pos_weight' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_SAMPLING_METHOD" ]; then - VIASH_PAR_SAMPLING_METHOD_CHOICES=("uniform:gradient_based") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_SAMPLING_METHOD_CHOICES[*]}:" =~ ":$VIASH_PAR_SAMPLING_METHOD:" ]]; then - ViashError '--sampling_method' specified value of \'$VIASH_PAR_SAMPLING_METHOD\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi -if [ ! -z "$VIASH_PAR_MODEL_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_MODEL_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_MODEL_OUTPUT")" -fi - -ViashDebug "Running command: bash" -cat << VIASHEOF | bash -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-xgboost-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -import sys -import json -import os -from typing import Optional -import yaml -from pathlib import Path - -import mudata -import numpy as np -import scanpy as sc -import pandas as pd -import xgboost as xgb -from sklearn.model_selection import train_test_split -from sklearn.metrics import classification_report -from sklearn.preprocessing import LabelEncoder - - -### VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'input_obsm_features': $( if [ ! -z ${VIASH_PAR_INPUT_OBSM_FEATURES+x} ]; then echo "r'${VIASH_PAR_INPUT_OBSM_FEATURES//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'reference': $( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "r'${VIASH_PAR_REFERENCE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'reference_obsm_features': $( if [ ! -z ${VIASH_PAR_REFERENCE_OBSM_FEATURES+x} ]; then echo "r'${VIASH_PAR_REFERENCE_OBSM_FEATURES//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'reference_obs_targets': $( if [ ! -z ${VIASH_PAR_REFERENCE_OBS_TARGETS+x} ]; then echo "r'${VIASH_PAR_REFERENCE_OBS_TARGETS//\'/\'\"\'\"r\'}'.split(',')"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_obs_predictions': $( if [ ! -z ${VIASH_PAR_OUTPUT_OBS_PREDICTIONS+x} ]; then echo "r'${VIASH_PAR_OUTPUT_OBS_PREDICTIONS//\'/\'\"\'\"r\'}'.split(':')"; else echo None; fi ), - 'output_obs_uncertainty': $( if [ ! -z ${VIASH_PAR_OUTPUT_OBS_UNCERTAINTY+x} ]; then echo "r'${VIASH_PAR_OUTPUT_OBS_UNCERTAINTY//\'/\'\"\'\"r\'}'.split(':')"; else echo None; fi ), - 'output_uns_parameters': $( if [ ! -z ${VIASH_PAR_OUTPUT_UNS_PARAMETERS+x} ]; then echo "r'${VIASH_PAR_OUTPUT_UNS_PARAMETERS//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'force_retrain': $( if [ ! -z ${VIASH_PAR_FORCE_RETRAIN+x} ]; then echo "r'${VIASH_PAR_FORCE_RETRAIN//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), - 'use_gpu': $( if [ ! -z ${VIASH_PAR_USE_GPU+x} ]; then echo "r'${VIASH_PAR_USE_GPU//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), - 'verbosity': $( if [ ! -z ${VIASH_PAR_VERBOSITY+x} ]; then echo "int(r'${VIASH_PAR_VERBOSITY//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'model_output': $( if [ ! -z ${VIASH_PAR_MODEL_OUTPUT+x} ]; then echo "r'${VIASH_PAR_MODEL_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'learning_rate': $( if [ ! -z ${VIASH_PAR_LEARNING_RATE+x} ]; then echo "float(r'${VIASH_PAR_LEARNING_RATE//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'min_split_loss': $( if [ ! -z ${VIASH_PAR_MIN_SPLIT_LOSS+x} ]; then echo "float(r'${VIASH_PAR_MIN_SPLIT_LOSS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'max_depth': $( if [ ! -z ${VIASH_PAR_MAX_DEPTH+x} ]; then echo "int(r'${VIASH_PAR_MAX_DEPTH//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'min_child_weight': $( if [ ! -z ${VIASH_PAR_MIN_CHILD_WEIGHT+x} ]; then echo "int(r'${VIASH_PAR_MIN_CHILD_WEIGHT//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'max_delta_step': $( if [ ! -z ${VIASH_PAR_MAX_DELTA_STEP+x} ]; then echo "float(r'${VIASH_PAR_MAX_DELTA_STEP//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'subsample': $( if [ ! -z ${VIASH_PAR_SUBSAMPLE+x} ]; then echo "float(r'${VIASH_PAR_SUBSAMPLE//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'sampling_method': $( if [ ! -z ${VIASH_PAR_SAMPLING_METHOD+x} ]; then echo "r'${VIASH_PAR_SAMPLING_METHOD//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'colsample_bytree': $( if [ ! -z ${VIASH_PAR_COLSAMPLE_BYTREE+x} ]; then echo "float(r'${VIASH_PAR_COLSAMPLE_BYTREE//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'colsample_bylevel': $( if [ ! -z ${VIASH_PAR_COLSAMPLE_BYLEVEL+x} ]; then echo "float(r'${VIASH_PAR_COLSAMPLE_BYLEVEL//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'colsample_bynode': $( if [ ! -z ${VIASH_PAR_COLSAMPLE_BYNODE+x} ]; then echo "float(r'${VIASH_PAR_COLSAMPLE_BYNODE//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'reg_lambda': $( if [ ! -z ${VIASH_PAR_REG_LAMBDA+x} ]; then echo "float(r'${VIASH_PAR_REG_LAMBDA//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'reg_alpha': $( if [ ! -z ${VIASH_PAR_REG_ALPHA+x} ]; then echo "float(r'${VIASH_PAR_REG_ALPHA//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'scale_pos_weight': $( if [ ! -z ${VIASH_PAR_SCALE_POS_WEIGHT+x} ]; then echo "float(r'${VIASH_PAR_SCALE_POS_WEIGHT//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -### VIASH END - -sys.path.append(meta["resources_dir"]) -from helper import check_arguments, get_reference_features, get_query_features -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -# read config arguments -config = yaml.safe_load(Path(meta["config"]).read_text()) - -# look for training params for method -argument_groups = { grp["name"]: grp["arguments"] for grp in config["functionality"]["argument_groups"] } -training_arg_names = [ arg["name"].replace("--", "") for arg in argument_groups["Learning parameters"] ] -training_params = { arg_name: par[arg_name] for arg_name in training_arg_names } - -def encode_labels(y): - labels_encoder = LabelEncoder() - labels_encoder.fit(y) - - return labels_encoder.transform(y), labels_encoder - - -def get_model_eval(xgb_model, X_test, y_test, labels_encoder): - preds = xgb_model.predict(X_test) - - cr = classification_report(labels_encoder.inverse_transform(y_test), - labels_encoder.inverse_transform(preds), - output_dict=True) - cr_df = pd.DataFrame(cr).transpose() - - return cr_df - - -def train_test_split_adata(adata, labels): - train_data = pd.DataFrame(data=adata.X, index=adata.obs_names) - - X_train, X_test, y_train, y_test = train_test_split( - train_data, labels, test_size=0.2, random_state=42, stratify=labels) - - return X_train, X_test, y_train, y_test - - -def train_xgb_model(X_train, y_train, gpu=True) -> xgb.XGBClassifier: - n_classes = len(np.unique(y_train)) - objective = "binary:logistic" if n_classes == 2 else "multi:softprob" - - tree_method = "gpu_hist" if gpu else "hist" - xgbc = xgb.XGBClassifier(tree_method=tree_method, objective=objective, **training_params) - xgbc.fit(X_train, y_train) - - return xgbc - - -def build_classifier(X, y, labels_encoder, label_key, eval_verbosity: Optional[int] = 1, gpu=True) -> xgb.XGBClassifier: - # Adata prep - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y) - #Note: Do we need a new train-test split for each classifier? - - # Model training - xgb_model = train_xgb_model(X_train, y_train, gpu=gpu) - - # Model eval - if eval_verbosity != 0: - cr_df = get_model_eval(xgb_model, X_test, y_test, labels_encoder) - - if eval_verbosity == 2: - print(cr_df) - - else: - overall_accuracy = cr_df["support"]["accuracy"] - low_prec_key = cr_df.precision.idxmin() - low_prec_val = cr_df.precision.min() - low_rec_key = cr_df.recall.idxmin() - low_rec_val = cr_df.recall.min() - low_f1_key = cr_df["f1-score"].idxmin() - low_f1_val = cr_df["f1-score"].min() - - print("") - print(f"Summary stats for {label_key} model:") - print(f"Overall accuracy: {overall_accuracy}") - print(f"Min. precision: {low_prec_key}: {low_prec_val}") - print(f"Min. Recall: {low_rec_key}: {low_rec_val}") - print(f"Min. F1-score: {low_f1_key}: {low_f1_val}") - print("") - - return xgb_model - - -def build_ref_classifiers(adata_reference, targets, model_path, - eval_verbosity: Optional[int] = 1, gpu: Optional[bool] = True) -> None: - """ - This function builds xgboost classifiers on a reference embedding for a designated number of - adata_reference.obs columns. Classifier .xgb files and a model_info.json file is written to the \`model_path\` - directory. Model evaluation is printed to stdout. - - Inputs: - * \`adata_reference\`: The AnnData object that was used to train the reference model - * \`model_path\`: The reference model directory where the classifiers will also be stored - * \`eval_verbosity\`: The verbosity level for evaluation of the classifier from the range [0;2]. - * \`gpu\`: Boolean indicating whether a gpu is available for classifier training - - - Example: - \`\`\` - >>> adata - AnnData object with n_obs x n_vars = 700 x 765 - obs: "ann_finest_level", "ann_level_1" - - >>> os.listdir("/path/to/model") - model_params.pt* - - >>> build_ref_classifiers(adata, "path/to/model", eval_verbosity=1, gpu=True) - >>> os.listdir("/path/to/model") - classifier_ann_finest_level.xgb* model_info.json* - classifier_ann_level_1.xgb* model_params.pt* - \`\`\` - """ - - # Check inputs - if not isinstance(eval_verbosity, int): - raise TypeError("\`eval_verbosity\` should be an integer between 0 and 2.") - - if eval_verbosity < 0 or eval_verbosity > 2: - raise ValueError("\`eval_verbosity\` should be an integer between 0 and 2.") - - train_data = get_reference_features(adata_reference, par, logger) - - if not os.path.exists(model_path): - os.makedirs(model_path, exist_ok=True) - - # Map from name of classifier to file names - classifiers = dict() - - for label, obs_pred in zip(targets, par["output_obs_predictions"]): - if label not in adata_reference.obs: - raise ValueError(f"{label} is not in the \`adata\` object passed!") - - filename = "classifier_" + label + ".xgb" - - labels, labels_encoder = encode_labels(adata_reference.obs[label]) - logger.info(f"Classes: {labels_encoder.classes_}") - - logger.info(f"Building classifier for {label}...") - xgb_model = build_classifier( - X=train_data, - y=labels, - labels_encoder=labels_encoder, - label_key=label, - eval_verbosity=eval_verbosity, - gpu=gpu - ) - - # Save classifier - logger.info("Saving model") - xgb_model.save_model(os.path.join(model_path, filename)) - - # Store classifier info - classifiers[label] = { - "filename": filename, - "labels": labels_encoder.classes_.tolist(), - "obs_column": obs_pred, - "model_params": training_params, - } - - # Store model_info.json file - model_info = { - "classifier_info": classifiers - } - - logger.info("Writing model_info to the file") - # Read previous file if it exists - if os.path.exists(model_path + "/model_info.json"): - logger.info("Old model_info file found, updating") - with open(model_path + "/model_info.json", "r") as f: - old_model_info = json.loads(f.read()) - - for key in old_model_info: - if key in model_info: - old_model_info[key].update(model_info[key]) - json_string = json.dumps(old_model_info, indent=4) - - else: - logger.info("Creating a new file") - json_string = json.dumps(model_info, indent=4) - - with open(model_path + "/model_info.json", "w") as f: - f.write(json_string) - - -def project_labels( - query_dataset, - cell_type_classifier_model: xgb.XGBClassifier, - annotation_column_name='label_pred', - uncertainty_column_name='label_uncertainty', - uncertainty_thresh=None # Note: currently not passed to predict function -): - """ - A function that projects predicted labels onto the query dataset, along with uncertainty scores. - Performs in-place update of the adata object, adding columns to the \`obs\` DataFrame. - - Input: - * \`query_dataset\`: The query \`AnnData\` object - * \`model_file\`: Path to the classification model file - * \`prediction_key\`: Column name in \`adata.obs\` where to store the predicted labels - * \`uncertainty_key\`: Column name in \`adata.obs\` where to store the uncertainty scores - * \`uncertainty_thresh\`: The uncertainty threshold above which we call a cell 'Unknown' - - Output: - Nothing is output, the passed anndata is modified inplace - - """ - - if (uncertainty_thresh is not None) and (uncertainty_thresh < 0 or uncertainty_thresh > 1): - raise ValueError(f'\`uncertainty_thresh\` must be \`None\` or between 0 and 1.') - - query_data = get_query_features(query_dataset, par, logger) - - # Predict labels and probabilities - query_dataset.obs[annotation_column_name] = cell_type_classifier_model.predict(query_data) - - logger.info("Predicting probabilities") - probs = cell_type_classifier_model.predict_proba(query_data) - - # Format probabilities - df_probs = pd.DataFrame(probs, columns=cell_type_classifier_model.classes_, index=query_dataset.obs_names) - query_dataset.obs[uncertainty_column_name] = 1 - df_probs.max(1) - - # Note: this is here in case we want to propose a set of values for the user to accept to seed the - # manual curation of predicted labels - if uncertainty_thresh is not None: - logger.info("Marking uncertain predictions") - query_dataset.obs[annotation_column_name + "_filtered"] = [ - val if query_dataset.obs[uncertainty_column_name][i] < uncertainty_thresh - else "Unknown" for i, val in enumerate(query_dataset.obs[annotation_column_name])] - - return query_dataset - - -def predict( - query_dataset, - cell_type_classifier_model_path, - annotation_column_name: str, - prediction_column_name: str, - uncertainty_column_name: str, - models_info, - use_gpu: bool = False -) -> pd.DataFrame: - """ - Returns \`obs\` DataFrame with prediction columns appended - """ - - tree_method = "gpu_hist" if use_gpu else "hist" - - labels = models_info["classifier_info"][annotation_column_name]["labels"] - - objective = "binary:logistic" if len(labels) == 2 else "multi:softprob" - cell_type_classifier_model = xgb.XGBClassifier(tree_method=tree_method, objective=objective) - - logger.info("Loading model") - cell_type_classifier_model.load_model(fname=cell_type_classifier_model_path) - - logger.info("Predicting labels") - project_labels(query_dataset, - cell_type_classifier_model, - annotation_column_name=prediction_column_name, - uncertainty_column_name=uncertainty_column_name) - - logger.info("Converting labels from numbers to classes") - labels_encoder = LabelEncoder() - labels_encoder.classes_ = np.array(labels) - query_dataset.obs[prediction_column_name] = labels_encoder.inverse_transform(query_dataset.obs[prediction_column_name]) - - return query_dataset - - -def main(par): - logger.info("Checking arguments") - par = check_arguments(par) - - mdata = mudata.read(par["input"].strip()) - adata = mdata.mod[par["modality"]] - - adata_reference = sc.read(par["reference"], backup_url=par["reference"]) - - # If classifiers for targets are in the model_output directory, simply open them and run (unless \`retrain\` != True) - # If some classifiers are missing, train and save them first - # Predict and save the query data - - targets_to_train = [] - - for obs_target in par["reference_obs_targets"]: - if not os.path.exists(par["model_output"]) or f"classifier_{obs_target}.xgb" not in os.listdir(par["model_output"]) or par["force_retrain"]: - logger.info(f"Classifier for {obs_target} added to a training schedule") - targets_to_train.append(obs_target) - else: - logger.info(f"Found classifier for {obs_target}, no retraining required") - - build_ref_classifiers(adata_reference, targets_to_train, model_path=par["model_output"], - gpu=par["use_gpu"], eval_verbosity=par["verbosity"]) - - output_uns_parameters = adata.uns.get(par["output_uns_parameters"], {}) - - with open(par["model_output"] + "/model_info.json", "r") as f: - models_info = json.loads(f.read()) - - for obs_target, obs_pred, obs_unc in zip(par["reference_obs_targets"], par["output_obs_predictions"], par["output_obs_uncertainty"]): - logger.info(f"Predicting {obs_target}") - - adata = predict(query_dataset=adata, - cell_type_classifier_model_path=os.path.join(par["model_output"], "classifier_" + obs_target + ".xgb"), - annotation_column_name=obs_target, - prediction_column_name=obs_pred, - uncertainty_column_name=obs_unc, - models_info=models_info, - use_gpu=par["use_gpu"]) - - if obs_target in targets_to_train: - # Save information about the transfer to .uns - output_uns_parameters[obs_target] = { - "method": "XGBClassifier", - **training_params - } - - adata.uns[par["output_uns_parameters"]] = output_uns_parameters - - logger.info("Updating mdata") - mdata.mod[par['modality']] = adata - mdata.update() - - logger.info("Writing output") - mdata.write_h5mu(par['output'].strip()) - -if __name__ == "__main__": - main(par) -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_MODEL_OUTPUT" ] && [ ! -e "$VIASH_PAR_MODEL_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_MODEL_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/native/metadata/add_id/.config.vsh.yaml b/target/native/metadata/add_id/.config.vsh.yaml deleted file mode 100644 index 5f6d540e16b..00000000000 --- a/target/native/metadata/add_id/.config.vsh.yaml +++ /dev/null @@ -1,197 +0,0 @@ -functionality: - name: "add_id" - namespace: "metadata" - version: "0.12.3" - authors: - - name: "Dries Schaumont" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Path to the input .h5mu." - info: null - example: - - "sample_path" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--input_id" - description: "The input id." - info: null - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_output" - description: "Name of the .obs column where to store the id." - info: null - default: - - "sample_id" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--make_observation_keys_unique" - description: "Join the id to the .obs index (.obs_names)." - info: null - direction: "input" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Add id of .obs. Also allows to make .obs_names (the .obs index) unique\ - \ \nby prefixing the values with an unique id per .h5mu file.\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/concat_test_data/e18_mouse_brain_fresh_5k_filtered_feature_bc_matrix_subset_unique_obs.h5mu" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "native" - id: "native" -- type: "nextflow" - id: "nextflow" - directives: - label: - - "singlecpu" - - "lowmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/metadata/add_id/config.vsh.yaml" - platform: "native" - output: "/home/runner/work/openpipeline/openpipeline/target/native/metadata/add_id" - executable: "/home/runner/work/openpipeline/openpipeline/target/native/metadata/add_id/add_id" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/native/metadata/add_id/add_id b/target/native/metadata/add_id/add_id deleted file mode 100755 index 7f1a1c3f839..00000000000 --- a/target/native/metadata/add_id/add_id +++ /dev/null @@ -1,593 +0,0 @@ -#!/usr/bin/env bash - -# add_id 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Dries Schaumont (maintainer) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="add_id" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "add_id 0.12.3" - echo "" - echo "Add id of .obs. Also allows to make .obs_names (the .obs index) unique" - echo "by prefixing the values with an unique id per .h5mu file." - echo "" - echo "Arguments:" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " example: sample_path" - echo " Path to the input .h5mu." - echo "" - echo " --input_id" - echo " type: string, required parameter" - echo " The input id." - echo "" - echo " --obs_output" - echo " type: string" - echo " default: sample_id" - echo " Name of the .obs column where to store the id." - echo "" - echo " -o, --output" - echo " type: file, output, file must exist" - echo " example: output.h5mu" - echo "" - echo " --output_compression" - echo " type: string" - echo " example: gzip" - echo " choices: [ gzip, lzf ]" - echo " The compression format to be used on the output h5mu object." - echo "" - echo " --make_observation_keys_unique" - echo " type: boolean_true" - echo " Join the id to the .obs index (.obs_names)." -} - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "add_id 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -i) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input_id) - [ -n "$VIASH_PAR_INPUT_ID" ] && ViashError Bad arguments for option \'--input_id\': \'$VIASH_PAR_INPUT_ID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT_ID="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_id. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input_id=*) - [ -n "$VIASH_PAR_INPUT_ID" ] && ViashError Bad arguments for option \'--input_id=*\': \'$VIASH_PAR_INPUT_ID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT_ID=$(ViashRemoveFlags "$1") - shift 1 - ;; - --obs_output) - [ -n "$VIASH_PAR_OBS_OUTPUT" ] && ViashError Bad arguments for option \'--obs_output\': \'$VIASH_PAR_OBS_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBS_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --obs_output=*) - [ -n "$VIASH_PAR_OBS_OUTPUT" ] && ViashError Bad arguments for option \'--obs_output=*\': \'$VIASH_PAR_OBS_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OBS_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression=*) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --make_observation_keys_unique) - [ -n "$VIASH_PAR_MAKE_OBSERVATION_KEYS_UNIQUE" ] && ViashError Bad arguments for option \'--make_observation_keys_unique\': \'$VIASH_PAR_MAKE_OBSERVATION_KEYS_UNIQUE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAKE_OBSERVATION_KEYS_UNIQUE=true - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_INPUT_ID+x} ]; then - ViashError '--input_id' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_OBS_OUTPUT+x} ]; then - VIASH_PAR_OBS_OUTPUT="sample_id" -fi -if [ -z ${VIASH_PAR_MAKE_OBSERVATION_KEYS_UNIQUE+x} ]; then - VIASH_PAR_MAKE_OBSERVATION_KEYS_UNIQUE="false" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_MAKE_OBSERVATION_KEYS_UNIQUE" ]]; then - if ! [[ "$VIASH_PAR_MAKE_OBSERVATION_KEYS_UNIQUE" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--make_observation_keys_unique' has to be a boolean_true. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then - VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then - ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -ViashDebug "Running command: bash" -cat << VIASHEOF | bash -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-add_id-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -from __future__ import annotations -import sys -from mudata import read_h5mu, MuData - -### VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'input_id': $( if [ ! -z ${VIASH_PAR_INPUT_ID+x} ]; then echo "r'${VIASH_PAR_INPUT_ID//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'obs_output': $( if [ ! -z ${VIASH_PAR_OBS_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OBS_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'make_observation_keys_unique': $( if [ ! -z ${VIASH_PAR_MAKE_OBSERVATION_KEYS_UNIQUE+x} ]; then echo "r'${VIASH_PAR_MAKE_OBSERVATION_KEYS_UNIQUE//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -### VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -def make_observation_keys_unique(sample_id: str, sample: MuData) -> None: - """ - Make the observation keys unique across all samples. At input, - the observation keys are unique within a sample. By adding the sample name - (unique for a sample) to each observation key, the observation key is made - unique across all samples as well. - """ - logger.info('Making observation keys unique across all samples.') - sample.obs.index = f"{sample_id}_" + sample.obs.index - make_observation_keys_unique_per_mod(sample_id, sample) - - -def make_observation_keys_unique_per_mod(sample_id: str, sample: MuData) -> None: - """ - Updating MuData.obs_names is not allowed (it is read-only). - So the observation keys for each modality has to be updated manually. - """ - for mod in sample.mod.values(): - mod.obs_names = f"{sample_id}_" + mod.obs_names - -def main(): - input_data = read_h5mu(par["input"]) - input_data.obs[par["obs_output"]] = par["input_id"] - for mod_data in input_data.mod.values(): - mod_data.obs[par["obs_output"]] = par["input_id"] - if par["make_observation_keys_unique"]: - make_observation_keys_unique(par["input_id"], input_data) - logger.info("Writing out data to '%s'.", par["output"]) - input_data.write_h5mu(par["output"], compression=par["output_compression"]) - -if __name__ == '__main__': - main() -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/native/metadata/add_id/setup_logger.py b/target/native/metadata/add_id/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/native/metadata/add_id/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/native/metadata/grep_annotation_column/.config.vsh.yaml b/target/native/metadata/grep_annotation_column/.config.vsh.yaml deleted file mode 100644 index 68ce018021c..00000000000 --- a/target/native/metadata/grep_annotation_column/.config.vsh.yaml +++ /dev/null @@ -1,244 +0,0 @@ -functionality: - name: "grep_annotation_column" - namespace: "metadata" - version: "0.12.3" - authors: - - name: "Dries Schaumont" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - argument_groups: - - name: "Inputs" - description: "Arguments related to the input dataset." - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Path to the input .h5mu." - info: null - example: - - "sample_path" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--input_column" - description: "Column to query. If not specified, use .var_names or .obs_names,\ - \ depending on the value of --matrix" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - description: "Which modality to get the annotation matrix from.\n" - info: null - example: - - "rna" - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--matrix" - description: "Matrix to fetch the column from that will be searched." - info: null - example: - - "var" - required: false - choices: - - "var" - - "obs" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Outputs" - description: "Arguments related to how the output will be written." - arguments: - - type: "file" - name: "--output" - alternatives: - - "-o" - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_match_column" - description: "Name of the column to write the result to." - info: null - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_fraction_column" - description: "For the opposite axis, name of the column to write the fraction\ - \ of \nobservations that matches to the pattern.\n" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Query options" - description: "Options related to the query" - arguments: - - type: "string" - name: "--regex_pattern" - description: "Regex to use to match with the input column." - info: null - example: - - "^[mM][tT]-" - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - description: "Perform a regex lookup on a column from the annotation matrices .obs\ - \ or .var.\nThe annotation matrix can originate from either a modality, or all\ - \ modalities (global .var or .obs).\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/concat_test_data/e18_mouse_brain_fresh_5k_filtered_feature_bc_matrix_subset_unique_obs.h5mu" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "native" - id: "native" -- type: "nextflow" - id: "nextflow" - directives: - label: - - "singlecpu" - - "lowmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/metadata/grep_annotation_column/config.vsh.yaml" - platform: "native" - output: "/home/runner/work/openpipeline/openpipeline/target/native/metadata/grep_annotation_column" - executable: "/home/runner/work/openpipeline/openpipeline/target/native/metadata/grep_annotation_column/grep_annotation_column" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/native/metadata/grep_annotation_column/grep_annotation_column b/target/native/metadata/grep_annotation_column/grep_annotation_column deleted file mode 100755 index 804df779336..00000000000 --- a/target/native/metadata/grep_annotation_column/grep_annotation_column +++ /dev/null @@ -1,677 +0,0 @@ -#!/usr/bin/env bash - -# grep_annotation_column 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Dries Schaumont (maintainer) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="grep_annotation_column" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "grep_annotation_column 0.12.3" - echo "" - echo "Perform a regex lookup on a column from the annotation matrices .obs or .var." - echo "The annotation matrix can originate from either a modality, or all modalities" - echo "(global .var or .obs)." - echo "" - echo "Inputs:" - echo " Arguments related to the input dataset." - echo "" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " example: sample_path" - echo " Path to the input .h5mu." - echo "" - echo " --input_column" - echo " type: string" - echo " Column to query. If not specified, use .var_names or .obs_names," - echo " depending on the value of --matrix" - echo "" - echo " --modality" - echo " type: string, required parameter" - echo " example: rna" - echo " Which modality to get the annotation matrix from." - echo "" - echo " --matrix" - echo " type: string" - echo " example: var" - echo " choices: [ var, obs ]" - echo " Matrix to fetch the column from that will be searched." - echo "" - echo "Outputs:" - echo " Arguments related to how the output will be written." - echo "" - echo " -o, --output" - echo " type: file, output, file must exist" - echo " example: output.h5mu" - echo "" - echo " --output_compression" - echo " type: string" - echo " example: gzip" - echo " choices: [ gzip, lzf ]" - echo " The compression format to be used on the output h5mu object." - echo "" - echo " --output_match_column" - echo " type: string, required parameter" - echo " Name of the column to write the result to." - echo "" - echo " --output_fraction_column" - echo " type: string" - echo " For the opposite axis, name of the column to write the fraction of" - echo " observations that matches to the pattern." - echo "" - echo "Query options:" - echo " Options related to the query" - echo "" - echo " --regex_pattern" - echo " type: string, required parameter" - echo " example: ^[mM][tT]-" - echo " Regex to use to match with the input column." -} - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "grep_annotation_column 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -i) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input_column) - [ -n "$VIASH_PAR_INPUT_COLUMN" ] && ViashError Bad arguments for option \'--input_column\': \'$VIASH_PAR_INPUT_COLUMN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT_COLUMN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_column. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input_column=*) - [ -n "$VIASH_PAR_INPUT_COLUMN" ] && ViashError Bad arguments for option \'--input_column=*\': \'$VIASH_PAR_INPUT_COLUMN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT_COLUMN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --modality) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality=*) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --matrix) - [ -n "$VIASH_PAR_MATRIX" ] && ViashError Bad arguments for option \'--matrix\': \'$VIASH_PAR_MATRIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MATRIX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --matrix. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --matrix=*) - [ -n "$VIASH_PAR_MATRIX" ] && ViashError Bad arguments for option \'--matrix=*\': \'$VIASH_PAR_MATRIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MATRIX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression=*) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output_match_column) - [ -n "$VIASH_PAR_OUTPUT_MATCH_COLUMN" ] && ViashError Bad arguments for option \'--output_match_column\': \'$VIASH_PAR_OUTPUT_MATCH_COLUMN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_MATCH_COLUMN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_match_column. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_match_column=*) - [ -n "$VIASH_PAR_OUTPUT_MATCH_COLUMN" ] && ViashError Bad arguments for option \'--output_match_column=*\': \'$VIASH_PAR_OUTPUT_MATCH_COLUMN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_MATCH_COLUMN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output_fraction_column) - [ -n "$VIASH_PAR_OUTPUT_FRACTION_COLUMN" ] && ViashError Bad arguments for option \'--output_fraction_column\': \'$VIASH_PAR_OUTPUT_FRACTION_COLUMN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_FRACTION_COLUMN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_fraction_column. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_fraction_column=*) - [ -n "$VIASH_PAR_OUTPUT_FRACTION_COLUMN" ] && ViashError Bad arguments for option \'--output_fraction_column=*\': \'$VIASH_PAR_OUTPUT_FRACTION_COLUMN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_FRACTION_COLUMN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --regex_pattern) - [ -n "$VIASH_PAR_REGEX_PATTERN" ] && ViashError Bad arguments for option \'--regex_pattern\': \'$VIASH_PAR_REGEX_PATTERN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REGEX_PATTERN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --regex_pattern. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --regex_pattern=*) - [ -n "$VIASH_PAR_REGEX_PATTERN" ] && ViashError Bad arguments for option \'--regex_pattern=*\': \'$VIASH_PAR_REGEX_PATTERN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REGEX_PATTERN=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_MODALITY+x} ]; then - ViashError '--modality' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT_MATCH_COLUMN+x} ]; then - ViashError '--output_match_column' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_REGEX_PATTERN+x} ]; then - ViashError '--regex_pattern' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_MATRIX" ]; then - VIASH_PAR_MATRIX_CHOICES=("var:obs") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_MATRIX_CHOICES[*]}:" =~ ":$VIASH_PAR_MATRIX:" ]]; then - ViashError '--matrix' specified value of \'$VIASH_PAR_MATRIX\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then - VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then - ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -ViashDebug "Running command: bash" -cat << VIASHEOF | bash -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-grep_annotation_column-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -import mudata as mu -from pathlib import Path -from operator import attrgetter -import re -import numpy as np - - -### VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'input_column': $( if [ ! -z ${VIASH_PAR_INPUT_COLUMN+x} ]; then echo "r'${VIASH_PAR_INPUT_COLUMN//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'matrix': $( if [ ! -z ${VIASH_PAR_MATRIX+x} ]; then echo "r'${VIASH_PAR_MATRIX//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_match_column': $( if [ ! -z ${VIASH_PAR_OUTPUT_MATCH_COLUMN+x} ]; then echo "r'${VIASH_PAR_OUTPUT_MATCH_COLUMN//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_fraction_column': $( if [ ! -z ${VIASH_PAR_OUTPUT_FRACTION_COLUMN+x} ]; then echo "r'${VIASH_PAR_OUTPUT_FRACTION_COLUMN//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'regex_pattern': $( if [ ! -z ${VIASH_PAR_REGEX_PATTERN+x} ]; then echo "r'${VIASH_PAR_REGEX_PATTERN//\'/\'\"\'\"r\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -### VIASH END - -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -def main(par): - input_file, output_file, mod_name = Path(par["input"]), Path(par["output"]), par['modality'] - try: - compiled_regex = re.compile(par["regex_pattern"]) - except (TypeError, re.error) as e: - raise ValueError(f"{par['regex_pattern']} is not a valid regular expression pattern.") from e - else: - if compiled_regex.groups: - raise NotImplementedError("Using match groups is not supported by this component.") - logger.info('Reading input file %s, modality %s.', input_file, mod_name) - - mudata = mu.read_h5mu(input_file) - modality_data = mudata[mod_name] - annotation_matrix = getattr(modality_data, par['matrix']) - default_column = { - "var": attrgetter("var_names"), - "obs": attrgetter("obs_names") - } - if par["input_column"]: - try: - annotation_column = annotation_matrix[par["input_column"]] - except KeyError as e: - raise ValueError(f"Column {par['input_column']} could not be found for modality " - f"{par['modality']}. Available columns: {','.join(annotation_matrix.columns.to_list())}") from e - else: - annotation_column = default_column[par['matrix']](modality_data) - grep_result = annotation_column.str.contains(par["regex_pattern"], regex=True) - - other_axis_attribute = { - "var": "obs", - "obs": "var" - } - if par['output_fraction_column']: - pct_matching = np.ravel(np.sum(modality_data[:, grep_result].X, axis=1) / np.sum(modality_data.X, axis=1)) - getattr(modality_data, other_axis_attribute[par['matrix']])[par['output_fraction_column']] = pct_matching - getattr(modality_data, par['matrix'])[par["output_match_column"]] = grep_result - mudata.write(output_file, compression=par["output_compression"]) - -if __name__ == "__main__": - main(par) -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/native/transform/scale/.config.vsh.yaml b/target/native/transform/scale/.config.vsh.yaml deleted file mode 100644 index fb77d3602b9..00000000000 --- a/target/native/transform/scale/.config.vsh.yaml +++ /dev/null @@ -1,205 +0,0 @@ -functionality: - name: "scale" - namespace: "transform" - version: "0.12.3" - authors: - - name: "Dries Schaumont" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input h5mu file." - info: null - example: - - "input.h5mu" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - description: "List of modalities to process." - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--max_value" - description: "Clip (truncate) to this value after scaling. Does not clip by default." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean" - name: "--zero_center" - description: "If False, omit zero-centering variables, which allows to handle\ - \ sparse input efficiently." - info: null - default: - - true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output h5mu file." - info: null - default: - - "output.h5mu" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Scale data to unit variance and zero mean.\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim-bullseye" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "libhdf5-dev" - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "scanpy~=1.9.5" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "lowmem" - - "lowcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -- type: "native" - id: "native" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/transform/scaling/config.vsh.yaml" - platform: "native" - output: "/home/runner/work/openpipeline/openpipeline/target/native/transform/scale" - executable: "/home/runner/work/openpipeline/openpipeline/target/native/transform/scale/scale" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/native/transform/scale/scale b/target/native/transform/scale/scale deleted file mode 100755 index 3d52ae6be83..00000000000 --- a/target/native/transform/scale/scale +++ /dev/null @@ -1,592 +0,0 @@ -#!/usr/bin/env bash - -# scale 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Dries Schaumont (maintainer) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="scale" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "scale 0.12.3" - echo "" - echo "Scale data to unit variance and zero mean." - echo "" - echo "Arguments:" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " example: input.h5mu" - echo " Input h5mu file." - echo "" - echo " --modality" - echo " type: string" - echo " default: rna" - echo " List of modalities to process." - echo "" - echo " --max_value" - echo " type: double" - echo " Clip (truncate) to this value after scaling. Does not clip by default." - echo "" - echo " --zero_center" - echo " type: boolean" - echo " default: true" - echo " If False, omit zero-centering variables, which allows to handle sparse" - echo " input efficiently." - echo "" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " default: output.h5mu" - echo " Output h5mu file." - echo "" - echo " --output_compression" - echo " type: string" - echo " example: gzip" - echo " choices: [ gzip, lzf ]" - echo " The compression format to be used on the output h5mu object." -} - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "scale 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -i) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --modality=*) - [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --max_value) - [ -n "$VIASH_PAR_MAX_VALUE" ] && ViashError Bad arguments for option \'--max_value\': \'$VIASH_PAR_MAX_VALUE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAX_VALUE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --max_value. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --max_value=*) - [ -n "$VIASH_PAR_MAX_VALUE" ] && ViashError Bad arguments for option \'--max_value=*\': \'$VIASH_PAR_MAX_VALUE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAX_VALUE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --zero_center) - [ -n "$VIASH_PAR_ZERO_CENTER" ] && ViashError Bad arguments for option \'--zero_center\': \'$VIASH_PAR_ZERO_CENTER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ZERO_CENTER="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --zero_center. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --zero_center=*) - [ -n "$VIASH_PAR_ZERO_CENTER" ] && ViashError Bad arguments for option \'--zero_center=*\': \'$VIASH_PAR_ZERO_CENTER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ZERO_CENTER=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression=*) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_MODALITY+x} ]; then - VIASH_PAR_MODALITY="rna" -fi -if [ -z ${VIASH_PAR_ZERO_CENTER+x} ]; then - VIASH_PAR_ZERO_CENTER="true" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_MAX_VALUE" ]]; then - if ! [[ "$VIASH_PAR_MAX_VALUE" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--max_value' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_ZERO_CENTER" ]]; then - if ! [[ "$VIASH_PAR_ZERO_CENTER" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--zero_center' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then - VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then - ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -ViashDebug "Running command: bash" -cat << VIASHEOF | bash -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-scale-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -import sys -from mudata import read_h5mu -import scanpy - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'max_value': $( if [ ! -z ${VIASH_PAR_MAX_VALUE+x} ]; then echo "float(r'${VIASH_PAR_MAX_VALUE//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'zero_center': $( if [ ! -z ${VIASH_PAR_ZERO_CENTER+x} ]; then echo "r'${VIASH_PAR_ZERO_CENTER//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -## VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -def main(): - logger.info(f'Reading .h5mu file: {par["input"]}') - mudata = read_h5mu(par["input"]) - mod = par["modality"] - data = mudata.mod[mod] - - logger.info("Scaling modality: %s", mod) - scanpy.pp.scale(data, - zero_center=par["zero_center"], - max_value=par["max_value"]) - - logger.info("Writing to %s", par["output"]) - mudata.write_h5mu(filename=par["output"], compression=par["output_compression"]) - logger.info("Finished") - -if __name__ == "__main__": - main() -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/native/transform/scale/setup_logger.py b/target/native/transform/scale/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/native/transform/scale/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/native/velocity/scvelo/.config.vsh.yaml b/target/native/velocity/scvelo/.config.vsh.yaml deleted file mode 100644 index 6675c39e806..00000000000 --- a/target/native/velocity/scvelo/.config.vsh.yaml +++ /dev/null @@ -1,276 +0,0 @@ -functionality: - name: "scvelo" - namespace: "velocity" - version: "0.12.3" - authors: - - name: "Dries Schaumont" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - argument_groups: - - name: "Inputs" - arguments: - - type: "file" - name: "--input" - description: "Velocyto loom file." - info: null - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Outputs" - arguments: - - type: "file" - name: "--output" - description: "Output directory. If it does not exist, will be created." - info: null - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Filtering and normalization" - description: "Arguments for filtering, normalization an log transform (see scvelo.pp.filter_and_normalize\ - \ function)" - arguments: - - type: "integer" - name: "--min_counts" - description: "Minimum number of counts required for a gene to pass filtering\ - \ (spliced)." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--min_counts_u" - description: "Minimum number of counts required for a gene to pass filtering\ - \ (unspliced)." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--min_cells" - description: "Minimum number of cells expressed required to pass filtering (spliced)." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--min_cells_u" - description: "Minimum number of cells expressed required to pass filtering (unspliced)." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--min_shared_counts" - description: "Minimum number of counts (both unspliced and spliced) required\ - \ for a gene." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--min_shared_cells" - description: "Minimum number of cells required to be expressed (both unspliced\ - \ and spliced)." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--n_top_genes" - description: "Number of genes to keep." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean" - name: "--log_transform" - description: "Do not log transform counts." - info: null - default: - - true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Fitting parameters" - description: "Arguments for fitting the data" - arguments: - - type: "integer" - name: "--n_principal_components" - description: "Number of principal components to use for calculating moments." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--n_neighbors" - description: "Number of neighbors to use. First/second-order moments are computed\ - \ for each\ncell across its nearest neighbors, where the neighbor graph is\ - \ obtained from\neuclidean distances in PCA space.\n" - info: null - default: - - 30 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/rna_velocity/velocyto_processed/cellranger_tiny.loom" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.9-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "scvelo~=0.2.5" - - "numpy~=1.23.5" - - "matplotlib<3.8.0" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "native" - id: "native" -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highmem" - - "highcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/velocity/scvelo/config.vsh.yaml" - platform: "native" - output: "/home/runner/work/openpipeline/openpipeline/target/native/velocity/scvelo" - executable: "/home/runner/work/openpipeline/openpipeline/target/native/velocity/scvelo/scvelo" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/native/velocity/scvelo/scvelo b/target/native/velocity/scvelo/scvelo deleted file mode 100755 index 43d2bcb2f8b..00000000000 --- a/target/native/velocity/scvelo/scvelo +++ /dev/null @@ -1,801 +0,0 @@ -#!/usr/bin/env bash - -# scvelo 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Dries Schaumont (maintainer) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="scvelo" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "scvelo 0.12.3" - echo "" - echo "Inputs:" - echo " --input" - echo " type: file, required parameter, file must exist" - echo " Velocyto loom file." - echo "" - echo "Outputs:" - echo " --output" - echo " type: file, required parameter, output, file must exist" - echo " Output directory. If it does not exist, will be created." - echo "" - echo " --output_compression" - echo " type: string" - echo " example: gzip" - echo " choices: [ gzip, lzf ]" - echo " The compression format to be used on the output h5mu object." - echo "" - echo "Filtering and normalization:" - echo " Arguments for filtering, normalization an log transform (see" - echo " scvelo.pp.filter_and_normalize function)" - echo "" - echo " --min_counts" - echo " type: integer" - echo " Minimum number of counts required for a gene to pass filtering" - echo " (spliced)." - echo "" - echo " --min_counts_u" - echo " type: integer" - echo " Minimum number of counts required for a gene to pass filtering" - echo " (unspliced)." - echo "" - echo " --min_cells" - echo " type: integer" - echo " Minimum number of cells expressed required to pass filtering (spliced)." - echo "" - echo " --min_cells_u" - echo " type: integer" - echo " Minimum number of cells expressed required to pass filtering" - echo " (unspliced)." - echo "" - echo " --min_shared_counts" - echo " type: integer" - echo " Minimum number of counts (both unspliced and spliced) required for a" - echo " gene." - echo "" - echo " --min_shared_cells" - echo " type: integer" - echo " Minimum number of cells required to be expressed (both unspliced and" - echo " spliced)." - echo "" - echo " --n_top_genes" - echo " type: integer" - echo " Number of genes to keep." - echo "" - echo " --log_transform" - echo " type: boolean" - echo " default: true" - echo " Do not log transform counts." - echo "" - echo "Fitting parameters:" - echo " Arguments for fitting the data" - echo "" - echo " --n_principal_components" - echo " type: integer" - echo " Number of principal components to use for calculating moments." - echo "" - echo " --n_neighbors" - echo " type: integer" - echo " default: 30" - echo " Number of neighbors to use. First/second-order moments are computed for" - echo " each" - echo " cell across its nearest neighbors, where the neighbor graph is obtained" - echo " from" - echo " euclidean distances in PCA space." -} - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "scvelo 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output_compression) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_compression=*) - [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") - shift 1 - ;; - --min_counts) - [ -n "$VIASH_PAR_MIN_COUNTS" ] && ViashError Bad arguments for option \'--min_counts\': \'$VIASH_PAR_MIN_COUNTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_COUNTS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_counts. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --min_counts=*) - [ -n "$VIASH_PAR_MIN_COUNTS" ] && ViashError Bad arguments for option \'--min_counts=*\': \'$VIASH_PAR_MIN_COUNTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_COUNTS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --min_counts_u) - [ -n "$VIASH_PAR_MIN_COUNTS_U" ] && ViashError Bad arguments for option \'--min_counts_u\': \'$VIASH_PAR_MIN_COUNTS_U\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_COUNTS_U="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_counts_u. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --min_counts_u=*) - [ -n "$VIASH_PAR_MIN_COUNTS_U" ] && ViashError Bad arguments for option \'--min_counts_u=*\': \'$VIASH_PAR_MIN_COUNTS_U\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_COUNTS_U=$(ViashRemoveFlags "$1") - shift 1 - ;; - --min_cells) - [ -n "$VIASH_PAR_MIN_CELLS" ] && ViashError Bad arguments for option \'--min_cells\': \'$VIASH_PAR_MIN_CELLS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_CELLS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_cells. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --min_cells=*) - [ -n "$VIASH_PAR_MIN_CELLS" ] && ViashError Bad arguments for option \'--min_cells=*\': \'$VIASH_PAR_MIN_CELLS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_CELLS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --min_cells_u) - [ -n "$VIASH_PAR_MIN_CELLS_U" ] && ViashError Bad arguments for option \'--min_cells_u\': \'$VIASH_PAR_MIN_CELLS_U\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_CELLS_U="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_cells_u. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --min_cells_u=*) - [ -n "$VIASH_PAR_MIN_CELLS_U" ] && ViashError Bad arguments for option \'--min_cells_u=*\': \'$VIASH_PAR_MIN_CELLS_U\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_CELLS_U=$(ViashRemoveFlags "$1") - shift 1 - ;; - --min_shared_counts) - [ -n "$VIASH_PAR_MIN_SHARED_COUNTS" ] && ViashError Bad arguments for option \'--min_shared_counts\': \'$VIASH_PAR_MIN_SHARED_COUNTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_SHARED_COUNTS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_shared_counts. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --min_shared_counts=*) - [ -n "$VIASH_PAR_MIN_SHARED_COUNTS" ] && ViashError Bad arguments for option \'--min_shared_counts=*\': \'$VIASH_PAR_MIN_SHARED_COUNTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_SHARED_COUNTS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --min_shared_cells) - [ -n "$VIASH_PAR_MIN_SHARED_CELLS" ] && ViashError Bad arguments for option \'--min_shared_cells\': \'$VIASH_PAR_MIN_SHARED_CELLS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_SHARED_CELLS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_shared_cells. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --min_shared_cells=*) - [ -n "$VIASH_PAR_MIN_SHARED_CELLS" ] && ViashError Bad arguments for option \'--min_shared_cells=*\': \'$VIASH_PAR_MIN_SHARED_CELLS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_SHARED_CELLS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --n_top_genes) - [ -n "$VIASH_PAR_N_TOP_GENES" ] && ViashError Bad arguments for option \'--n_top_genes\': \'$VIASH_PAR_N_TOP_GENES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_N_TOP_GENES="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --n_top_genes. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --n_top_genes=*) - [ -n "$VIASH_PAR_N_TOP_GENES" ] && ViashError Bad arguments for option \'--n_top_genes=*\': \'$VIASH_PAR_N_TOP_GENES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_N_TOP_GENES=$(ViashRemoveFlags "$1") - shift 1 - ;; - --log_transform) - [ -n "$VIASH_PAR_LOG_TRANSFORM" ] && ViashError Bad arguments for option \'--log_transform\': \'$VIASH_PAR_LOG_TRANSFORM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LOG_TRANSFORM="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --log_transform. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --log_transform=*) - [ -n "$VIASH_PAR_LOG_TRANSFORM" ] && ViashError Bad arguments for option \'--log_transform=*\': \'$VIASH_PAR_LOG_TRANSFORM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LOG_TRANSFORM=$(ViashRemoveFlags "$1") - shift 1 - ;; - --n_principal_components) - [ -n "$VIASH_PAR_N_PRINCIPAL_COMPONENTS" ] && ViashError Bad arguments for option \'--n_principal_components\': \'$VIASH_PAR_N_PRINCIPAL_COMPONENTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_N_PRINCIPAL_COMPONENTS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --n_principal_components. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --n_principal_components=*) - [ -n "$VIASH_PAR_N_PRINCIPAL_COMPONENTS" ] && ViashError Bad arguments for option \'--n_principal_components=*\': \'$VIASH_PAR_N_PRINCIPAL_COMPONENTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_N_PRINCIPAL_COMPONENTS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --n_neighbors) - [ -n "$VIASH_PAR_N_NEIGHBORS" ] && ViashError Bad arguments for option \'--n_neighbors\': \'$VIASH_PAR_N_NEIGHBORS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_N_NEIGHBORS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --n_neighbors. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --n_neighbors=*) - [ -n "$VIASH_PAR_N_NEIGHBORS" ] && ViashError Bad arguments for option \'--n_neighbors=*\': \'$VIASH_PAR_N_NEIGHBORS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_N_NEIGHBORS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_LOG_TRANSFORM+x} ]; then - VIASH_PAR_LOG_TRANSFORM="true" -fi -if [ -z ${VIASH_PAR_N_NEIGHBORS+x} ]; then - VIASH_PAR_N_NEIGHBORS="30" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_MIN_COUNTS" ]]; then - if ! [[ "$VIASH_PAR_MIN_COUNTS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--min_counts' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_MIN_COUNTS_U" ]]; then - if ! [[ "$VIASH_PAR_MIN_COUNTS_U" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--min_counts_u' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_MIN_CELLS" ]]; then - if ! [[ "$VIASH_PAR_MIN_CELLS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--min_cells' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_MIN_CELLS_U" ]]; then - if ! [[ "$VIASH_PAR_MIN_CELLS_U" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--min_cells_u' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_MIN_SHARED_COUNTS" ]]; then - if ! [[ "$VIASH_PAR_MIN_SHARED_COUNTS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--min_shared_counts' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_MIN_SHARED_CELLS" ]]; then - if ! [[ "$VIASH_PAR_MIN_SHARED_CELLS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--min_shared_cells' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_N_TOP_GENES" ]]; then - if ! [[ "$VIASH_PAR_N_TOP_GENES" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--n_top_genes' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_LOG_TRANSFORM" ]]; then - if ! [[ "$VIASH_PAR_LOG_TRANSFORM" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--log_transform' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_N_PRINCIPAL_COMPONENTS" ]]; then - if ! [[ "$VIASH_PAR_N_PRINCIPAL_COMPONENTS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--n_principal_components' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_N_NEIGHBORS" ]]; then - if ! [[ "$VIASH_PAR_N_NEIGHBORS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--n_neighbors' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then - VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then - ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -ViashDebug "Running command: bash" -cat << VIASHEOF | bash -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-scvelo-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -import sys -import scvelo -import mudata -from contextlib import redirect_stdout -from pathlib import Path -import matplotlib as mpl - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'min_counts': $( if [ ! -z ${VIASH_PAR_MIN_COUNTS+x} ]; then echo "int(r'${VIASH_PAR_MIN_COUNTS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'min_counts_u': $( if [ ! -z ${VIASH_PAR_MIN_COUNTS_U+x} ]; then echo "int(r'${VIASH_PAR_MIN_COUNTS_U//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'min_cells': $( if [ ! -z ${VIASH_PAR_MIN_CELLS+x} ]; then echo "int(r'${VIASH_PAR_MIN_CELLS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'min_cells_u': $( if [ ! -z ${VIASH_PAR_MIN_CELLS_U+x} ]; then echo "int(r'${VIASH_PAR_MIN_CELLS_U//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'min_shared_counts': $( if [ ! -z ${VIASH_PAR_MIN_SHARED_COUNTS+x} ]; then echo "int(r'${VIASH_PAR_MIN_SHARED_COUNTS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'min_shared_cells': $( if [ ! -z ${VIASH_PAR_MIN_SHARED_CELLS+x} ]; then echo "int(r'${VIASH_PAR_MIN_SHARED_CELLS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'n_top_genes': $( if [ ! -z ${VIASH_PAR_N_TOP_GENES+x} ]; then echo "int(r'${VIASH_PAR_N_TOP_GENES//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'log_transform': $( if [ ! -z ${VIASH_PAR_LOG_TRANSFORM+x} ]; then echo "r'${VIASH_PAR_LOG_TRANSFORM//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), - 'n_principal_components': $( if [ ! -z ${VIASH_PAR_N_PRINCIPAL_COMPONENTS+x} ]; then echo "int(r'${VIASH_PAR_N_PRINCIPAL_COMPONENTS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'n_neighbors': $( if [ ! -z ${VIASH_PAR_N_NEIGHBORS+x} ]; then echo "int(r'${VIASH_PAR_N_NEIGHBORS//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} - -## VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -mpl.rcParams['savefig.dpi']=150 - -# Script must be wrapped into a main function because scvelo spawn subprocesses -# and this fails when the functions are not wrapped. -def main(): - # Create output directory - output_dir = Path(par['output']) - output_dir.mkdir(parents=True, exist_ok=True) - scvelo.settings.figdir = str(output_dir) - - - # Calculate the sample name - sample_name = par["output"].removesuffix(".loom") - sample_name = Path(sample_name).name - - # Read the input data - adata = scvelo.read(par['input']) - - # Save spliced vs unspliced proportions to file - with (output_dir / "proportions.txt").open('w') as target: - with redirect_stdout(target): - scvelo.utils.show_proportions(adata) - - # Plot piecharts of spliced vs unspliced proportions - scvelo.pl.proportions(adata, save=True, show=False) - - # Perform preprocessing - scvelo.pp.filter_and_normalize(adata, - min_counts=par["min_counts"], - min_counts_u=par["min_counts_u"], - min_cells=par["min_cells"], - min_cells_u=par["min_cells_u"], - min_shared_counts=par["min_shared_counts"], - min_shared_cells=par["min_shared_cells"], - n_top_genes=par["n_top_genes"], - log=par["log_transform"]) - - # Fitting - scvelo.pp.moments(adata, - n_pcs=par["n_principal_components"], - n_neighbors=par["n_neighbors"]) - - - # Second step in velocyto calculations - # Velocity calculation and visualization - # From the scvelo manual: - # The solution to the full dynamical model is obtained by setting mode='dynamical', - # which requires to run scv.tl.recover_dynamics(adata) beforehand - scvelo.tl.recover_dynamics(adata) - scvelo.tl.velocity(adata, mode="dynamical") - scvelo.tl.velocity_graph(adata) - scvelo.pl.velocity_graph(adata, save=str(output_dir / "scvelo_graph.pdf"), show=False) - - # Plotting - # TODO: add more here. - scvelo.pl.velocity_embedding_stream(adata, save=str(output_dir / "scvelo_embedding.pdf"), show=False) - - # Create output - ouput_data = mudata.MuData({'rna_velocity': adata}) - ouput_data.write_h5mu(output_dir / f"{sample_name}.h5mu", compression=par["output_compression"]) - -if __name__ == "__main__": - main() -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/native/velocity/scvelo/setup_logger.py b/target/native/velocity/scvelo/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/native/velocity/scvelo/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/native/velocity/velocyto/.config.vsh.yaml b/target/native/velocity/velocyto/.config.vsh.yaml deleted file mode 100644 index c0ad44c1c4e..00000000000 --- a/target/native/velocity/velocyto/.config.vsh.yaml +++ /dev/null @@ -1,225 +0,0 @@ -functionality: - name: "velocyto" - namespace: "velocity" - version: "0.12.3" - authors: - - name: "Robrecht Cannoodt" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Path to BAM file" - info: null - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--transcriptome" - alternatives: - - "-t" - description: "Path to GTF file" - info: null - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--barcode" - alternatives: - - "-b" - description: "Valid barcodes file, to filter the bam. If --bcfile is not specified\ - \ all the cell barcodes will be included.\nCell barcodes should be specified\ - \ in the bcfile as the 'CB' tag for each read\n" - info: null - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--without_umi" - description: "foo" - info: null - direction: "input" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Velocyto loom file" - info: null - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--logic" - alternatives: - - "-l" - description: "The logic to use for the filtering." - info: null - default: - - "Default" - required: false - choices: - - "Default" - - "Permissive10X" - - "Intermediate10X" - - "ValidatedIntrons10X" - - "Stricter10X" - - "ObservedSpanning10X" - - "Discordant10X" - - "SmartSeq2" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "bash_script" - path: "script.sh" - is_executable: true - description: "Runs the velocity analysis on a BAM file, outputting a loom file." - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/cellranger_tiny_fastq" - - type: "file" - path: "resources_test/rna_velocity" - - type: "file" - path: "resources_test/reference_gencodev41_chr1" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.9-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - - "build-essential" - - "file" - interactive: false - - type: "python" - user: false - pip: - - "numpy" - - "Cython" - upgrade: true - - type: "python" - user: false - pip: - - "velocyto" - upgrade: true - - type: "apt" - packages: - - "samtools" - interactive: false - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "native" - id: "native" -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highmem" - - "lowcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/velocity/velocyto/config.vsh.yaml" - platform: "native" - output: "/home/runner/work/openpipeline/openpipeline/target/native/velocity/velocyto" - executable: "/home/runner/work/openpipeline/openpipeline/target/native/velocity/velocyto/velocyto" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/native/velocity/velocyto/velocyto b/target/native/velocity/velocyto/velocyto deleted file mode 100755 index 252d813ba0f..00000000000 --- a/target/native/velocity/velocyto/velocyto +++ /dev/null @@ -1,605 +0,0 @@ -#!/usr/bin/env bash - -# velocyto 0.12.3 -# -# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. -# -# Component authors: -# * Robrecht Cannoodt (maintainer) - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - SOURCE="$1" - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" - SOURCE="$(readlink "$SOURCE")" - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done - cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# define meta fields -VIASH_META_FUNCTIONALITY_NAME="velocyto" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "velocyto 0.12.3" - echo "" - echo "Runs the velocity analysis on a BAM file, outputting a loom file." - echo "" - echo "Arguments:" - echo " -i, --input" - echo " type: file, required parameter, file must exist" - echo " Path to BAM file" - echo "" - echo " -t, --transcriptome" - echo " type: file, required parameter, file must exist" - echo " Path to GTF file" - echo "" - echo " -b, --barcode" - echo " type: file, file must exist" - echo " Valid barcodes file, to filter the bam. If --bcfile is not specified all" - echo " the cell barcodes will be included." - echo " Cell barcodes should be specified in the bcfile as the 'CB' tag for each" - echo " read" - echo "" - echo " --without_umi" - echo " type: boolean_true" - echo " foo" - echo "" - echo " -o, --output" - echo " type: file, required parameter, output, file must exist" - echo " Velocyto loom file" - echo "" - echo " -l, --logic" - echo " type: string" - echo " default: Default" - echo " choices: [ Default, Permissive10X, Intermediate10X, ValidatedIntrons10X," - echo "Stricter10X, ObservedSpanning10X, Discordant10X, SmartSeq2 ]" - echo " The logic to use for the filtering." -} - -# initialise array -VIASH_POSITIONAL_ARGS='' -VIASH_MODE='run' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "velocyto 0.12.3" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -i) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --transcriptome) - [ -n "$VIASH_PAR_TRANSCRIPTOME" ] && ViashError Bad arguments for option \'--transcriptome\': \'$VIASH_PAR_TRANSCRIPTOME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TRANSCRIPTOME="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --transcriptome. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --transcriptome=*) - [ -n "$VIASH_PAR_TRANSCRIPTOME" ] && ViashError Bad arguments for option \'--transcriptome=*\': \'$VIASH_PAR_TRANSCRIPTOME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TRANSCRIPTOME=$(ViashRemoveFlags "$1") - shift 1 - ;; - -t) - [ -n "$VIASH_PAR_TRANSCRIPTOME" ] && ViashError Bad arguments for option \'-t\': \'$VIASH_PAR_TRANSCRIPTOME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TRANSCRIPTOME="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -t. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --barcode) - [ -n "$VIASH_PAR_BARCODE" ] && ViashError Bad arguments for option \'--barcode\': \'$VIASH_PAR_BARCODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BARCODE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --barcode. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --barcode=*) - [ -n "$VIASH_PAR_BARCODE" ] && ViashError Bad arguments for option \'--barcode=*\': \'$VIASH_PAR_BARCODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BARCODE=$(ViashRemoveFlags "$1") - shift 1 - ;; - -b) - [ -n "$VIASH_PAR_BARCODE" ] && ViashError Bad arguments for option \'-b\': \'$VIASH_PAR_BARCODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BARCODE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -b. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --without_umi) - [ -n "$VIASH_PAR_WITHOUT_UMI" ] && ViashError Bad arguments for option \'--without_umi\': \'$VIASH_PAR_WITHOUT_UMI\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_WITHOUT_UMI=true - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --logic) - [ -n "$VIASH_PAR_LOGIC" ] && ViashError Bad arguments for option \'--logic\': \'$VIASH_PAR_LOGIC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LOGIC="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --logic. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --logic=*) - [ -n "$VIASH_PAR_LOGIC" ] && ViashError Bad arguments for option \'--logic=*\': \'$VIASH_PAR_LOGIC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LOGIC=$(ViashRemoveFlags "$1") - shift 1 - ;; - -l) - [ -n "$VIASH_PAR_LOGIC" ] && ViashError Bad arguments for option \'-l\': \'$VIASH_PAR_LOGIC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LOGIC="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -l. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1024 )) ;; - mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; - gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_TRANSCRIPTOME+x} ]; then - ViashError '--transcriptome' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_WITHOUT_UMI+x} ]; then - VIASH_PAR_WITHOUT_UMI="false" -fi -if [ -z ${VIASH_PAR_LOGIC+x} ]; then - VIASH_PAR_LOGIC="Default" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_TRANSCRIPTOME" ] && [ ! -e "$VIASH_PAR_TRANSCRIPTOME" ]; then - ViashError "Input file '$VIASH_PAR_TRANSCRIPTOME' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_BARCODE" ] && [ ! -e "$VIASH_PAR_BARCODE" ]; then - ViashError "Input file '$VIASH_PAR_BARCODE' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_WITHOUT_UMI" ]]; then - if ! [[ "$VIASH_PAR_WITHOUT_UMI" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--without_umi' has to be a boolean_true. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_LOGIC" ]; then - VIASH_PAR_LOGIC_CHOICES=("Default:Permissive10X:Intermediate10X:ValidatedIntrons10X:Stricter10X:ObservedSpanning10X:Discordant10X:SmartSeq2") - IFS=':' - set -f - if ! [[ ":${VIASH_PAR_LOGIC_CHOICES[*]}:" =~ ":$VIASH_PAR_LOGIC:" ]]; then - ViashError '--logic' specified value of \'$VIASH_PAR_LOGIC\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -ViashDebug "Running command: bash" -cat << VIASHEOF | bash -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-velocyto-XXXXXX").sh -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -#!/bin/bash - -set -eo pipefail - -## VIASH START -# The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) -$( if [ ! -z ${VIASH_PAR_TRANSCRIPTOME+x} ]; then echo "${VIASH_PAR_TRANSCRIPTOME}" | sed "s#'#'\"'\"'#g;s#.*#par_transcriptome='&'#" ; else echo "# par_transcriptome="; fi ) -$( if [ ! -z ${VIASH_PAR_BARCODE+x} ]; then echo "${VIASH_PAR_BARCODE}" | sed "s#'#'\"'\"'#g;s#.*#par_barcode='&'#" ; else echo "# par_barcode="; fi ) -$( if [ ! -z ${VIASH_PAR_WITHOUT_UMI+x} ]; then echo "${VIASH_PAR_WITHOUT_UMI}" | sed "s#'#'\"'\"'#g;s#.*#par_without_umi='&'#" ; else echo "# par_without_umi="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) -$( if [ ! -z ${VIASH_PAR_LOGIC+x} ]; then echo "${VIASH_PAR_LOGIC}" | sed "s#'#'\"'\"'#g;s#.*#par_logic='&'#" ; else echo "# par_logic="; fi ) -$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) -$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) -$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) -$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) -$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) -$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) - -## VIASH END - -extra_params=( ) - -if [ ! -z "\$par_barcode" ]; then - extra_params+=( "--bcfile=\$par_barcode" ) -fi -if [ "\$par_without_umi" == "true" ]; then - extra_params+=( "--without-umi" ) -fi -if [ ! -z "\$meta_cpus" ]; then - extra_params+=( "--samtools-threads" "\$meta_cpus" ) -fi -if [ ! -z "\$meta_memory_mb" ]; then - extra_params+=( "--samtools-memory" "\$meta_memory_mb" ) -fi - -output_dir=\`dirname "\$par_output"\` -sample_id=\`basename "\$par_output" .loom\` - -if (file \`readlink -f "\$par_transcriptome"\` | grep -q compressed ) ; then - # create temporary directory - tmpdir=\$(mktemp -d "\$meta_temp_dir/\$meta_functionality_name-XXXXXXXX") - function clean_up { - rm -rf "\$tmpdir" - } - trap clean_up EXIT - - zcat "\$par_transcriptome" > "\$tmpdir/genes.gtf" - par_transcriptome="\$tmpdir/genes.gtf" -fi - -velocyto run \\ - "\$par_input" \\ - "\$par_transcriptome" \\ - "\${extra_params[@]}" \\ - --outputfolder "\$output_dir" \\ - --sampleid "\$sample_id" -VIASHMAIN -bash "\$tempscript" & -wait "\$!" - -VIASHEOF - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/nextflow/annotate/popv/.config.vsh.yaml b/target/nextflow/annotate/popv/.config.vsh.yaml deleted file mode 100644 index 7518edac8f4..00000000000 --- a/target/nextflow/annotate/popv/.config.vsh.yaml +++ /dev/null @@ -1,346 +0,0 @@ -functionality: - name: "popv" - namespace: "annotate" - version: "0.12.3" - authors: - - name: "Matthias Beyens" - roles: - - "author" - info: - role: "Contributor" - links: - github: "MatthiasBeyens" - orcid: "0000-0003-3304-0706" - email: "matthias.beyens@gmail.com" - linkedin: "mbeyens" - organizations: - - name: "Janssen Pharmaceuticals" - href: "https://www.janssen.com" - role: "Principal Scientist" - - name: "Robrecht Cannoodt" - roles: - - "author" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - argument_groups: - - name: "Inputs" - description: "Arguments related to the input (aka query) dataset." - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input h5mu file." - info: null - example: - - "input.h5mu" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - description: "Which modality to process." - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--input_layer" - description: "Which layer to use. If no value is provided, the counts are assumed\ - \ to be in the `.X` slot. Otherwise, count data is expected to be in `.layers[input_layer]`." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--input_obs_batch" - description: "Key in obs field of input adata for batch information. If no value\ - \ is provided, batch label is assumed to be unknown." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--input_var_subset" - description: "Subset the input object with this column." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--input_obs_label" - description: "Key in obs field of input adata for label information. This is\ - \ only used for training scANVI. Unlabelled cells should be set to `\"unknown_celltype_label\"\ - `." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--unknown_celltype_label" - description: "If `input_obs_label` is specified, cells with this value will\ - \ be treated as unknown and will be predicted by the model." - info: null - default: - - "unknown" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Reference" - description: "Arguments related to the reference dataset." - arguments: - - type: "file" - name: "--reference" - description: "User-provided reference tissue. The data that will be used as\ - \ reference to call cell types." - info: null - example: - - "TS_Bladder_filtered.h5ad" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--reference_layer" - description: "Which layer to use. If no value is provided, the counts are assumed\ - \ to be in the `.X` slot. Otherwise, count data is expected to be in `.layers[reference_layer]`." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--reference_obs_label" - description: "Key in obs field of reference AnnData with cell-type information." - info: null - default: - - "cell_ontology_class" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--reference_obs_batch" - description: "Key in obs field of input adata for batch information." - info: null - default: - - "donor_assay" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Outputs" - description: "Output arguments." - arguments: - - type: "file" - name: "--output" - description: "Output h5mu file." - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Arguments" - description: "Other arguments." - arguments: - - type: "string" - name: "--methods" - description: "Methods to call cell types. By default, runs to knn_on_scvi and\ - \ scanvi." - info: null - example: - - "knn_on_scvi" - - "scanvi" - required: true - choices: - - "celltypist" - - "knn_on_bbknn" - - "knn_on_scanorama" - - "knn_on_scvi" - - "onclass" - - "rf" - - "scanvi" - - "svm" - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Performs popular major vote cell typing on single cell sequence data\ - \ using multiple algorithms. Note that this is a one-shot version of PopV." - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/annotation_test_data/" - - type: "file" - path: "resources_test/pbmc_1k_protein_v3/" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.9-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - - "git" - - "build-essential" - - "wget" - interactive: false - - type: "python" - user: false - packages: - - "scanpy~=1.9.5" - - "scvi-tools~=1.0.3" - - "popv~=0.3.2" - - "jax==0.4.10" - - "jaxlib==0.4.10" - - "ml-dtypes<0.3.0" - upgrade: true - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - upgrade: true - - type: "docker" - run: - - "cd /opt && git clone --depth 1 https://github.com/YosefLab/PopV.git && \\\n\ - \ cd PopV && git fetch --depth 1 origin tag v0.2 && git checkout v0.2\n" - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highmem" - - "highcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/annotate/popv/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/annotate/popv" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/annotate/popv/popv" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/annotate/popv/main.nf b/target/nextflow/annotate/popv/main.nf deleted file mode 100644 index f950802b925..00000000000 --- a/target/nextflow/annotate/popv/main.nf +++ /dev/null @@ -1,2958 +0,0 @@ -// popv 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Matthias Beyens (author) -// * Robrecht Cannoodt (author) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "popv", - "namespace" : "annotate", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Matthias Beyens", - "roles" : [ - "author" - ], - "info" : { - "role" : "Contributor", - "links" : { - "github" : "MatthiasBeyens", - "orcid" : "0000-0003-3304-0706", - "email" : "matthias.beyens@gmail.com", - "linkedin" : "mbeyens" - }, - "organizations" : [ - { - "name" : "Janssen Pharmaceuticals", - "href" : "https://www.janssen.com", - "role" : "Principal Scientist" - } - ] - } - }, - { - "name" : "Robrecht Cannoodt", - "roles" : [ - "author" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "robrecht@data-intuitive.com", - "github" : "rcannood", - "orcid" : "0000-0003-3641-729X", - "linkedin" : "robrechtcannoodt" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Science Engineer" - }, - { - "name" : "Open Problems", - "href" : "https://openproblems.bio", - "role" : "Core Member" - } - ] - } - } - ], - "argument_groups" : [ - { - "name" : "Inputs", - "description" : "Arguments related to the input (aka query) dataset.", - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "alternatives" : [ - "-i" - ], - "description" : "Input h5mu file.", - "example" : [ - "input.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--modality", - "description" : "Which modality to process.", - "default" : [ - "rna" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--input_layer", - "description" : "Which layer to use. If no value is provided, the counts are assumed to be in the `.X` slot. Otherwise, count data is expected to be in `.layers[input_layer]`.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--input_obs_batch", - "description" : "Key in obs field of input adata for batch information. If no value is provided, batch label is assumed to be unknown.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--input_var_subset", - "description" : "Subset the input object with this column.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--input_obs_label", - "description" : "Key in obs field of input adata for label information. This is only used for training scANVI. Unlabelled cells should be set to `\\"unknown_celltype_label\\"`.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--unknown_celltype_label", - "description" : "If `input_obs_label` is specified, cells with this value will be treated as unknown and will be predicted by the model.", - "default" : [ - "unknown" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Reference", - "description" : "Arguments related to the reference dataset.", - "arguments" : [ - { - "type" : "file", - "name" : "--reference", - "description" : "User-provided reference tissue. The data that will be used as reference to call cell types.", - "example" : [ - "TS_Bladder_filtered.h5ad" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--reference_layer", - "description" : "Which layer to use. If no value is provided, the counts are assumed to be in the `.X` slot. Otherwise, count data is expected to be in `.layers[reference_layer]`.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--reference_obs_label", - "description" : "Key in obs field of reference AnnData with cell-type information.", - "default" : [ - "cell_ontology_class" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--reference_obs_batch", - "description" : "Key in obs field of input adata for batch information.", - "default" : [ - "donor_assay" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Outputs", - "description" : "Output arguments.", - "arguments" : [ - { - "type" : "file", - "name" : "--output", - "description" : "Output h5mu file.", - "example" : [ - "output.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_compression", - "example" : [ - "gzip" - ], - "required" : false, - "choices" : [ - "gzip", - "lzf" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Arguments", - "description" : "Other arguments.", - "arguments" : [ - { - "type" : "string", - "name" : "--methods", - "description" : "Methods to call cell types. By default, runs to knn_on_scvi and scanvi.", - "example" : [ - "knn_on_scvi", - "scanvi" - ], - "required" : true, - "choices" : [ - "celltypist", - "knn_on_bbknn", - "knn_on_scanorama", - "knn_on_scvi", - "onclass", - "rf", - "scanvi", - "svm" - ], - "direction" : "input", - "multiple" : true, - "multiple_sep" : ":", - "dest" : "par" - } - ] - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/annotate/popv/" - }, - { - "type" : "file", - "path" : "src/utils/setup_logger.py", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "description" : "Performs popular major vote cell typing on single cell sequence data using multiple algorithms. Note that this is a one-shot version of PopV.", - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/annotate/popv/" - }, - { - "type" : "file", - "path" : "resources_test/annotation_test_data/", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - }, - { - "type" : "file", - "path" : "resources_test/pbmc_1k_protein_v3/", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "python:3.9-slim", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "procps", - "git", - "build-essential", - "wget" - ], - "interactive" : false - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "scanpy~=1.9.5", - "scvi-tools~=1.0.3", - "popv~=0.3.2", - "jax==0.4.10", - "jaxlib==0.4.10", - "ml-dtypes<0.3.0" - ], - "upgrade" : true - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "mudata~=0.2.3", - "anndata~=0.9.1" - ], - "upgrade" : true - }, - { - "type" : "docker", - "run" : [ - "cd /opt && git clone --depth 1 https://github.com/YosefLab/PopV.git && \\\\\n cd PopV && git fetch --depth 1 origin tag v0.2 && git checkout v0.2\n" - ] - } - ], - "test_setup" : [ - { - "type" : "python", - "user" : false, - "packages" : [ - "viashpy==0.5.0" - ], - "upgrade" : true - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "highmem", - "highcpu" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/annotate/popv/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/annotate/popv", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -import sys -import re -import tempfile -import typing -import numpy as np -import mudata as mu -import anndata as ad -import popv - -# todo: is this still needed? -from torch.cuda import is_available as cuda_is_available -try: - from torch.backends.mps import is_available as mps_is_available -except ModuleNotFoundError: - # Older pytorch versions - # MacOS GPUs - def mps_is_available(): - return False - -# where to find the obo files -cl_obo_folder = "/opt/PopV/ontology/" - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'input_layer': $( if [ ! -z ${VIASH_PAR_INPUT_LAYER+x} ]; then echo "r'${VIASH_PAR_INPUT_LAYER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'input_obs_batch': $( if [ ! -z ${VIASH_PAR_INPUT_OBS_BATCH+x} ]; then echo "r'${VIASH_PAR_INPUT_OBS_BATCH//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'input_var_subset': $( if [ ! -z ${VIASH_PAR_INPUT_VAR_SUBSET+x} ]; then echo "r'${VIASH_PAR_INPUT_VAR_SUBSET//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'input_obs_label': $( if [ ! -z ${VIASH_PAR_INPUT_OBS_LABEL+x} ]; then echo "r'${VIASH_PAR_INPUT_OBS_LABEL//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'unknown_celltype_label': $( if [ ! -z ${VIASH_PAR_UNKNOWN_CELLTYPE_LABEL+x} ]; then echo "r'${VIASH_PAR_UNKNOWN_CELLTYPE_LABEL//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'reference': $( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "r'${VIASH_PAR_REFERENCE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'reference_layer': $( if [ ! -z ${VIASH_PAR_REFERENCE_LAYER+x} ]; then echo "r'${VIASH_PAR_REFERENCE_LAYER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'reference_obs_label': $( if [ ! -z ${VIASH_PAR_REFERENCE_OBS_LABEL+x} ]; then echo "r'${VIASH_PAR_REFERENCE_OBS_LABEL//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'reference_obs_batch': $( if [ ! -z ${VIASH_PAR_REFERENCE_OBS_BATCH+x} ]; then echo "r'${VIASH_PAR_REFERENCE_OBS_BATCH//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'methods': $( if [ ! -z ${VIASH_PAR_METHODS+x} ]; then echo "r'${VIASH_PAR_METHODS//\\'/\\'\\"\\'\\"r\\'}'.split(':')"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -## VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -use_gpu = cuda_is_available() or mps_is_available() -logger.info("GPU enabled? %s", use_gpu) - -# Helper functions -def get_X(adata: ad.AnnData, layer: typing.Optional[str], var_index: typing.Optional[str]): - """Fetch the counts data from X or a layer. Subset columns by var_index if so desired.""" - if var_index: - adata = adata[:, var_index] - if layer: - return adata.layers[layer] - else: - return adata.X -def get_obs(adata: ad.AnnData, obs_par_names): - """Subset the obs dataframe to just the columns defined by the obs_label and obs_batch.""" - obs_columns = [par[x] for x in obs_par_names if par[x]] - return adata.obs[obs_columns] -def get_var(adata: ad.AnnData, var_index: list[str]): - """Fetch the var dataframe. Subset rows by var_index if so desired.""" - return adata.var.loc[var_index] - -def main(par, meta): - assert len(par["methods"]) >= 1, "Please, specify at least one method for cell typing." - logger.info("Cell typing methods: {}".format(par["methods"])) - - ### PREPROCESSING REFERENCE ### - logger.info("### PREPROCESSING REFERENCE ###") - - # take a look at reference data - logger.info("Reading reference data '%s'", par["reference"]) - reference = ad.read_h5ad(par["reference"]) - - logger.info("Setting reference var index to Ensembl IDs") - reference.var["gene_symbol"] = list(reference.var.index) - reference.var.index = [re.sub("\\\\\\\\.[0-9]+\\$", "", s) for s in reference.var["ensemblid"]] - - logger.info("Detect number of samples per label") - min_celltype_size = np.min(reference.obs.groupby(par["reference_obs_batch"]).size()) - n_samples_per_label = np.max((min_celltype_size, 100)) - - ### PREPROCESSING INPUT ### - logger.info("### PREPROCESSING INPUT ###") - logger.info("Reading '%s'", par["input"]) - input = mu.read_h5mu(par["input"]) - input_modality = input.mod[par["modality"]] - - # subset with var column - if par["input_var_subset"]: - logger.info("Subset input with .var['%s']", par["input_var_subset"]) - assert par["input_var_subset"] in input_modality.var, f"--input_var_subset='{par['input_var_subset']}' needs to be a column in .var" - input_modality = input_modality[:,input_modality.var[par["input_var_subset"]]] - - ### ALIGN REFERENCE AND INPUT ### - logger.info("### ALIGN REFERENCE AND INPUT ###") - - logger.info("Detecting common vars based on ensembl ids") - common_ens_ids = list(set(reference.var.index).intersection(set(input_modality.var.index))) - - logger.info(" reference n_vars: %i", reference.n_vars) - logger.info(" input n_vars: %i", input_modality.n_vars) - logger.info(" intersect n_vars: %i", len(common_ens_ids)) - assert len(common_ens_ids) >= 100, "The intersection of genes is too small." - - # subset input objects to make sure popv is using the data we expect - input_modality = ad.AnnData( - X = get_X(input_modality, par["input_layer"], common_ens_ids), - obs = get_obs(input_modality, ["input_obs_label", "input_obs_batch"]), - var = get_var(input_modality, common_ens_ids) - ) - reference = ad.AnnData( - X = get_X(reference, par["reference_layer"], common_ens_ids), - obs = get_obs(reference, ["reference_obs_label", "reference_obs_batch"]), - var = get_var(reference, common_ens_ids) - ) - - # remove layers that - - ### ALIGN REFERENCE AND INPUT ### - logger.info("### ALIGN REFERENCE AND INPUT ###") - - with tempfile.TemporaryDirectory(prefix="popv-", dir=meta["temp_dir"]) as temp_dir: - logger.info("Run PopV processing") - pq = popv.preprocessing.Process_Query( - # input - query_adata=input_modality, - query_labels_key=par["input_obs_label"], - query_batch_key=par["input_obs_batch"], - query_layers_key=None, # this is taken care of by subset - # reference - ref_adata=reference, - ref_labels_key=par["reference_obs_label"], - ref_batch_key=par["reference_obs_batch"], - # options - unknown_celltype_label=par["unknown_celltype_label"], - n_samples_per_label=n_samples_per_label, - # pretrained model - # Might need to be parameterized at some point - prediction_mode="retrain", - pretrained_scvi_path=None, - # outputs - # Might need to be parameterized at some point - save_path_trained_models=temp_dir, - # hardcoded values - cl_obo_folder=cl_obo_folder, - use_gpu=use_gpu - ) - method_kwargs = {} - if 'scanorama' in par['methods']: - method_kwargs['scanorama'] = {'approx': False} - logger.info("Annotate data") - popv.annotation.annotate_data( - adata=pq.adata, - methods=par["methods"], - methods_kwargs=method_kwargs - ) - - popv_input = pq.adata[input_modality.obs_names] - - # select columns starting with "popv_" - popv_obs_cols = popv_input.obs.columns[popv_input.obs.columns.str.startswith("popv_")] - - # create new data frame with selected columns - df_popv = popv_input.obs[popv_obs_cols] - - # remove prefix from column names - df_popv.columns = df_popv.columns.str.replace("popv_", "") - - # store output in mudata .obsm - input.mod[par["modality"]].obsm["popv_output"] = df_popv - - # copy important output in mudata .obs - for col in ["popv_prediction"]: - if col in popv_input.obs.columns: - input.mod[par["modality"]].obs[col] = popv_input.obs[col] - - # code to explore how the output differs from the original - # for attr in ["obs", "var", "uns", "obsm", "layers", "obsp"]: - # old_keys = set(getattr(pq_adata_orig, attr).keys()) - # new_keys = set(getattr(pq.adata, attr).keys()) - # diff_keys = list(new_keys.difference(old_keys)) - # diff_keys.sort() - # print(f"{attr}:", flush=True) - # for key in diff_keys: - # print(f" {key}", flush=True) - - # write output - logger.info("Writing %s", par["output"]) - input.write_h5mu(par["output"], compression=par["output_compression"]) - -if __name__ == "__main__": - main(par, meta) -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/annotate_popv", - "tag" : "0.12.0" - }, - "label" : [ - "highmem", - "highcpu" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/annotate/popv/nextflow.config b/target/nextflow/annotate/popv/nextflow.config deleted file mode 100644 index a4a942690c4..00000000000 --- a/target/nextflow/annotate/popv/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'popv' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Performs popular major vote cell typing on single cell sequence data using multiple algorithms. Note that this is a one-shot version of PopV.' - author = 'Matthias Beyens, Robrecht Cannoodt' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/annotate/popv/nextflow_params.yaml b/target/nextflow/annotate/popv/nextflow_params.yaml deleted file mode 100644 index e58b114fd16..00000000000 --- a/target/nextflow/annotate/popv/nextflow_params.yaml +++ /dev/null @@ -1,25 +0,0 @@ -# Inputs -input: # please fill in - example: "input.h5mu" -modality: "rna" -# input_layer: "foo" -# input_obs_batch: "foo" -# input_var_subset: "foo" -# input_obs_label: "foo" -unknown_celltype_label: "unknown" - -# Outputs -# output: "$id.$key.output.h5mu" -# output_compression: "gzip" - -# Arguments -methods: # please fill in - example: ["knn_on_scvi", "scanvi"] - -# Reference -reference: # please fill in - example: "TS_Bladder_filtered.h5ad" -# reference_layer: "foo" -reference_obs_label: "cell_ontology_class" -reference_obs_batch: "donor_assay" - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/annotate/popv/nextflow_schema.json b/target/nextflow/annotate/popv/nextflow_schema.json deleted file mode 100644 index a610d2385f8..00000000000 --- a/target/nextflow/annotate/popv/nextflow_schema.json +++ /dev/null @@ -1,171 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "popv", - "description": "Performs popular major vote cell typing on single cell sequence data using multiple algorithms. Note that this is a one-shot version of PopV.", - "type": "object", - "definitions": { - "inputs" : { - "title": "Inputs", - "type": "object", - "description": "Arguments related to the input (aka query) dataset.", - "properties": { - - "input": { - "type": "string", - "description": "Type: `file`, required, example: `input.h5mu`. Input h5mu file", - "help_text": "Type: `file`, required, example: `input.h5mu`. Input h5mu file." - }, - - "modality": { - "type": "string", - "description": "Type: `string`, default: `rna`. Which modality to process", - "help_text": "Type: `string`, default: `rna`. Which modality to process.", - "default": "rna" - }, - - "input_layer": { - "type": "string", - "description": "Type: `string`. Which layer to use", - "help_text": "Type: `string`. Which layer to use. If no value is provided, the counts are assumed to be in the `.X` slot. Otherwise, count data is expected to be in `.layers[input_layer]`." - }, - - "input_obs_batch": { - "type": "string", - "description": "Type: `string`. Key in obs field of input adata for batch information", - "help_text": "Type: `string`. Key in obs field of input adata for batch information. If no value is provided, batch label is assumed to be unknown." - }, - - "input_var_subset": { - "type": "string", - "description": "Type: `string`. Subset the input object with this column", - "help_text": "Type: `string`. Subset the input object with this column." - }, - - "input_obs_label": { - "type": "string", - "description": "Type: `string`. Key in obs field of input adata for label information", - "help_text": "Type: `string`. Key in obs field of input adata for label information. This is only used for training scANVI. Unlabelled cells should be set to `\"unknown_celltype_label\"`." - }, - - "unknown_celltype_label": { - "type": "string", - "description": "Type: `string`, default: `unknown`. If `input_obs_label` is specified, cells with this value will be treated as unknown and will be predicted by the model", - "help_text": "Type: `string`, default: `unknown`. If `input_obs_label` is specified, cells with this value will be treated as unknown and will be predicted by the model.", - "default": "unknown" - } - - } - }, - "outputs" : { - "title": "Outputs", - "type": "object", - "description": "Output arguments.", - "properties": { - - "output": { - "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file", - "help_text": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file.", - "default": "$id.$key.output.h5mu" - }, - - "output_compression": { - "type": "string", - "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. ", - "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. ", - "enum": ["gzip", "lzf"] - - } - - } - }, - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "Other arguments.", - "properties": { - - "methods": { - "type": "string", - "description": "Type: List of `string`, required, example: `knn_on_scvi:scanvi`, multiple_sep: `\":\"`, choices: ``celltypist`, `knn_on_bbknn`, `knn_on_scanorama`, `knn_on_scvi`, `onclass`, `rf`, `scanvi`, `svm``. Methods to call cell types", - "help_text": "Type: List of `string`, required, example: `knn_on_scvi:scanvi`, multiple_sep: `\":\"`, choices: ``celltypist`, `knn_on_bbknn`, `knn_on_scanorama`, `knn_on_scvi`, `onclass`, `rf`, `scanvi`, `svm``. Methods to call cell types. By default, runs to knn_on_scvi and scanvi.", - "enum": ["celltypist", "knn_on_bbknn", "knn_on_scanorama", "knn_on_scvi", "onclass", "rf", "scanvi", "svm"] - - } - - } - }, - "reference" : { - "title": "Reference", - "type": "object", - "description": "Arguments related to the reference dataset.", - "properties": { - - "reference": { - "type": "string", - "description": "Type: `file`, required, example: `TS_Bladder_filtered.h5ad`. User-provided reference tissue", - "help_text": "Type: `file`, required, example: `TS_Bladder_filtered.h5ad`. User-provided reference tissue. The data that will be used as reference to call cell types." - }, - - "reference_layer": { - "type": "string", - "description": "Type: `string`. Which layer to use", - "help_text": "Type: `string`. Which layer to use. If no value is provided, the counts are assumed to be in the `.X` slot. Otherwise, count data is expected to be in `.layers[reference_layer]`." - }, - - "reference_obs_label": { - "type": "string", - "description": "Type: `string`, default: `cell_ontology_class`. Key in obs field of reference AnnData with cell-type information", - "help_text": "Type: `string`, default: `cell_ontology_class`. Key in obs field of reference AnnData with cell-type information.", - "default": "cell_ontology_class" - }, - - "reference_obs_batch": { - "type": "string", - "description": "Type: `string`, default: `donor_assay`. Key in obs field of input adata for batch information", - "help_text": "Type: `string`, default: `donor_assay`. Key in obs field of input adata for batch information.", - "default": "donor_assay" - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/inputs" - }, - { - "$ref": "#/definitions/outputs" - }, - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/reference" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/annotate/popv/setup_logger.py b/target/nextflow/annotate/popv/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/nextflow/annotate/popv/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/nextflow/cluster/leiden/.config.vsh.yaml b/target/nextflow/cluster/leiden/.config.vsh.yaml deleted file mode 100644 index d25c28179c2..00000000000 --- a/target/nextflow/cluster/leiden/.config.vsh.yaml +++ /dev/null @@ -1,219 +0,0 @@ -functionality: - name: "leiden" - namespace: "cluster" - version: "0.12.3" - authors: - - name: "Dries De Maeyer" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "ddemaeyer@gmail.com" - github: "ddemaeyer" - linkedin: "dries-de-maeyer-b46a814" - organizations: - - name: "Janssen Pharmaceuticals" - href: "https://www.janssen.com" - role: "Principal Scientist" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input file." - info: null - example: - - "input.h5mu" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obsp_connectivities" - description: "In which .obsp slot the neighbor connectivities can be found." - info: null - default: - - "connectivities" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output file." - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obsm_name" - description: "Name of the .obsm key under which to add the cluster labels.\nThe\ - \ name of the columns in the matrix will correspond to the resolutions.\n" - info: null - default: - - "leiden" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--resolution" - description: "A parameter value controlling the coarseness of the clustering.\ - \ Higher values lead to more clusters.\nMultiple values will result in clustering\ - \ being performed multiple times.\n" - info: null - default: - - 1.0 - required: true - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Cluster cells using the Leiden algorithm [Traag18] implemented in\ - \ the Scanpy framework [Wolf18]. \nLeiden is an improved version of the Louvain\ - \ algorithm [Blondel08]. \nIt has been proposed for single-cell analysis by [Levine15].\ - \ \nThis requires having ran `neighbors/find_neighbors` or `neighbors/bbknn` first.\n\ - \nBlondel08: Blondel et al. (2008), Fast unfolding of communities in large networks,\ - \ J. Stat. Mech. \nLevine15: Levine et al. (2015), Data-Driven Phenotypic Dissection\ - \ of AML Reveals Progenitor-like Cells that Correlate with Prognosis, Cell. \n\ - Traag18: Traag et al. (2018), From Louvain to Leiden: guaranteeing well-connected\ - \ communities arXiv. \nWolf18: Wolf et al. (2018), Scanpy: large-scale single-cell\ - \ gene expression data analysis, Genome Biology. \n" - test_resources: - - type: "python_script" - path: "run_test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.8-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "cmake" - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "scanpy~=1.9.5" - - "leidenalg~=0.8.9" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highcpu" - - "midmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/cluster/leiden/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/cluster/leiden" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/cluster/leiden/leiden" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/cluster/leiden/main.nf b/target/nextflow/cluster/leiden/main.nf deleted file mode 100644 index cdc45b85237..00000000000 --- a/target/nextflow/cluster/leiden/main.nf +++ /dev/null @@ -1,2631 +0,0 @@ -// leiden 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Dries De Maeyer (maintainer) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "leiden", - "namespace" : "cluster", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Dries De Maeyer", - "roles" : [ - "maintainer" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "ddemaeyer@gmail.com", - "github" : "ddemaeyer", - "linkedin" : "dries-de-maeyer-b46a814" - }, - "organizations" : [ - { - "name" : "Janssen Pharmaceuticals", - "href" : "https://www.janssen.com", - "role" : "Principal Scientist" - } - ] - } - } - ], - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "alternatives" : [ - "-i" - ], - "description" : "Input file.", - "example" : [ - "input.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--modality", - "default" : [ - "rna" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--obsp_connectivities", - "description" : "In which .obsp slot the neighbor connectivities can be found.", - "default" : [ - "connectivities" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--output", - "alternatives" : [ - "-o" - ], - "description" : "Output file.", - "example" : [ - "output.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_compression", - "example" : [ - "gzip" - ], - "required" : false, - "choices" : [ - "gzip", - "lzf" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--obsm_name", - "description" : "Name of the .obsm key under which to add the cluster labels.\nThe name of the columns in the matrix will correspond to the resolutions.\n", - "default" : [ - "leiden" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--resolution", - "description" : "A parameter value controlling the coarseness of the clustering. Higher values lead to more clusters.\nMultiple values will result in clustering being performed multiple times.\n", - "default" : [ - 1.0 - ], - "required" : true, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ":", - "dest" : "par" - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/cluster/leiden/" - }, - { - "type" : "file", - "path" : "src/utils/setup_logger.py", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "description" : "Cluster cells using the Leiden algorithm [Traag18] implemented in the Scanpy framework [Wolf18]. \nLeiden is an improved version of the Louvain algorithm [Blondel08]. \nIt has been proposed for single-cell analysis by [Levine15]. \nThis requires having ran `neighbors/find_neighbors` or `neighbors/bbknn` first.\n\nBlondel08: Blondel et al. (2008), Fast unfolding of communities in large networks, J. Stat. Mech. \nLevine15: Levine et al. (2015), Data-Driven Phenotypic Dissection of AML Reveals Progenitor-like Cells that Correlate with Prognosis, Cell. \nTraag18: Traag et al. (2018), From Louvain to Leiden: guaranteeing well-connected communities arXiv. \nWolf18: Wolf et al. (2018), Scanpy: large-scale single-cell gene expression data analysis, Genome Biology. \n", - "test_resources" : [ - { - "type" : "python_script", - "path" : "run_test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/cluster/leiden/" - }, - { - "type" : "file", - "path" : "resources_test/pbmc_1k_protein_v3", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "python:3.8-slim", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "cmake", - "procps" - ], - "interactive" : false - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "mudata~=0.2.3", - "anndata~=0.9.1", - "scanpy~=1.9.5", - "leidenalg~=0.8.9" - ], - "upgrade" : true - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "highcpu", - "midmem" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/cluster/leiden/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/cluster/leiden", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -import sys -import mudata as mu -import pandas as pd -import scanpy as sc - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'obsp_connectivities': $( if [ ! -z ${VIASH_PAR_OBSP_CONNECTIVITIES+x} ]; then echo "r'${VIASH_PAR_OBSP_CONNECTIVITIES//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'obsm_name': $( if [ ! -z ${VIASH_PAR_OBSM_NAME+x} ]; then echo "r'${VIASH_PAR_OBSM_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resolution': $( if [ ! -z ${VIASH_PAR_RESOLUTION+x} ]; then echo "list(map(float, r'${VIASH_PAR_RESOLUTION//\\'/\\'\\"\\'\\"r\\'}'.split(':')))"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -## VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -logger.info("Reading %s.", par["input"]) -mdata = mu.read_h5mu(par["input"]) - - -def run_single_resolution(adata, resolution): - adata_out = sc.tl.leiden( - adata, - resolution=resolution, - key_added=str(resolution), - obsp=par['obsp_connectivities'], - copy=True - ) - return adata_out.obs[str(resolution)] - -logger.info("Processing modality '%s'.", par['modality']) -data = mdata.mod[par['modality']] -results = {str(resolution): run_single_resolution(data, resolution) for resolution in par["resolution"]} -data.obsm[par["obsm_name"]] = pd.DataFrame(results) -logger.info("Writing to %s.", par["output"]) -mdata.write_h5mu(filename=par["output"], compression=par["output_compression"]) -logger.info("Finished.") -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/cluster_leiden", - "tag" : "0.12.0" - }, - "label" : [ - "highcpu", - "midmem" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/cluster/leiden/nextflow.config b/target/nextflow/cluster/leiden/nextflow.config deleted file mode 100644 index a5b92bcbd4c..00000000000 --- a/target/nextflow/cluster/leiden/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'leiden' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Cluster cells using the Leiden algorithm [Traag18] implemented in the Scanpy framework [Wolf18]. \nLeiden is an improved version of the Louvain algorithm [Blondel08]. \nIt has been proposed for single-cell analysis by [Levine15]. \nThis requires having ran `neighbors/find_neighbors` or `neighbors/bbknn` first.\n\nBlondel08: Blondel et al. (2008), Fast unfolding of communities in large networks, J. Stat. Mech. \nLevine15: Levine et al. (2015), Data-Driven Phenotypic Dissection of AML Reveals Progenitor-like Cells that Correlate with Prognosis, Cell. \nTraag18: Traag et al. (2018), From Louvain to Leiden: guaranteeing well-connected communities arXiv. \nWolf18: Wolf et al. (2018), Scanpy: large-scale single-cell gene expression data analysis, Genome Biology. \n' - author = 'Dries De Maeyer' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/cluster/leiden/nextflow_params.yaml b/target/nextflow/cluster/leiden/nextflow_params.yaml deleted file mode 100644 index b051a6181b3..00000000000 --- a/target/nextflow/cluster/leiden/nextflow_params.yaml +++ /dev/null @@ -1,12 +0,0 @@ -# Arguments -input: # please fill in - example: "input.h5mu" -modality: "rna" -obsp_connectivities: "connectivities" -# output: "$id.$key.output.h5mu" -# output_compression: "gzip" -obsm_name: "leiden" -resolution: # please fill in - example: [1] - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/cluster/leiden/nextflow_schema.json b/target/nextflow/cluster/leiden/nextflow_schema.json deleted file mode 100644 index 4da3785970a..00000000000 --- a/target/nextflow/cluster/leiden/nextflow_schema.json +++ /dev/null @@ -1,94 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "leiden", - "description": "Cluster cells using the Leiden algorithm [Traag18] implemented in the Scanpy framework [Wolf18]. \nLeiden is an improved version of the Louvain algorithm [Blondel08]. \nIt has been proposed for single-cell analysis by [Levine15]. \nThis requires having ran `neighbors/find_neighbors` or `neighbors/bbknn` first.\n\nBlondel08: Blondel et al. (2008), Fast unfolding of communities in large networks, J. Stat. Mech. \nLevine15: Levine et al. (2015), Data-Driven Phenotypic Dissection of AML Reveals Progenitor-like Cells that Correlate with Prognosis, Cell. \nTraag18: Traag et al. (2018), From Louvain to Leiden: guaranteeing well-connected communities arXiv. \nWolf18: Wolf et al. (2018), Scanpy: large-scale single-cell gene expression data analysis, Genome Biology. \n", - "type": "object", - "definitions": { - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: `file`, required, example: `input.h5mu`. Input file", - "help_text": "Type: `file`, required, example: `input.h5mu`. Input file." - }, - - "modality": { - "type": "string", - "description": "Type: `string`, default: `rna`. ", - "help_text": "Type: `string`, default: `rna`. ", - "default": "rna" - }, - - "obsp_connectivities": { - "type": "string", - "description": "Type: `string`, default: `connectivities`. In which ", - "help_text": "Type: `string`, default: `connectivities`. In which .obsp slot the neighbor connectivities can be found.", - "default": "connectivities" - }, - - "output": { - "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output file", - "help_text": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output file.", - "default": "$id.$key.output.h5mu" - }, - - "output_compression": { - "type": "string", - "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. ", - "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. ", - "enum": ["gzip", "lzf"] - - }, - - "obsm_name": { - "type": "string", - "description": "Type: `string`, default: `leiden`. Name of the ", - "help_text": "Type: `string`, default: `leiden`. Name of the .obsm key under which to add the cluster labels.\nThe name of the columns in the matrix will correspond to the resolutions.\n", - "default": "leiden" - }, - - "resolution": { - "type": "string", - "description": "Type: List of `double`, required, default: `1`, multiple_sep: `\":\"`. A parameter value controlling the coarseness of the clustering", - "help_text": "Type: List of `double`, required, default: `1`, multiple_sep: `\":\"`. A parameter value controlling the coarseness of the clustering. Higher values lead to more clusters.\nMultiple values will result in clustering being performed multiple times.\n", - "default": "1" - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/cluster/leiden/setup_logger.py b/target/nextflow/cluster/leiden/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/nextflow/cluster/leiden/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/nextflow/compression/compress_h5mu/.config.vsh.yaml b/target/nextflow/compression/compress_h5mu/.config.vsh.yaml deleted file mode 100644 index 925a68624c3..00000000000 --- a/target/nextflow/compression/compress_h5mu/.config.vsh.yaml +++ /dev/null @@ -1,167 +0,0 @@ -functionality: - name: "compress_h5mu" - namespace: "compression" - version: "0.12.3" - authors: - - name: "Dries Schaumont" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Path to the input .h5mu." - info: null - example: - - "sample_path" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - description: "location of output file." - info: null - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--compression" - description: "Compression type." - info: null - default: - - "gzip" - required: false - choices: - - "lzf" - - "gzip" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "../../utils/compress_h5mu.py" - description: "Compress a MuData file. \n" - test_resources: - - type: "python_script" - path: "run_test.py" - is_executable: true - - type: "file" - path: "resources_test/concat_test_data/e18_mouse_brain_fresh_5k_filtered_feature_bc_matrix_subset_unique_obs.h5mu" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "native" - id: "native" -- type: "nextflow" - id: "nextflow" - directives: - label: - - "singlecpu" - - "lowmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/compression/compress_h5mu/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/compression/compress_h5mu" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/compression/compress_h5mu/compress_h5mu" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/compression/compress_h5mu/compress_h5mu.py b/target/nextflow/compression/compress_h5mu/compress_h5mu.py deleted file mode 100644 index 9d92395a573..00000000000 --- a/target/nextflow/compression/compress_h5mu/compress_h5mu.py +++ /dev/null @@ -1,49 +0,0 @@ -from h5py import File as H5File -from h5py import Group, Dataset -from pathlib import Path -from typing import Union, Literal -from functools import partial - - -def compress_h5mu(input_path: Union[str, Path], - output_path: Union[str, Path], - compression: Union[Literal['gzip'], Literal['lzf']]): - input_path, output_path = str(input_path), str(output_path) - - def copy_attributes(in_object, out_object): - for key, value in in_object.attrs.items(): - out_object.attrs[key] = value - - def visit_path(output_h5: H5File, - compression: Union[Literal['gzip'], Literal['lzf']], - name: str, object: Union[Group, Dataset]): - if isinstance(object, Group): - new_group = output_h5.create_group(name) - copy_attributes(object, new_group) - elif isinstance(object, Dataset): - # Compression only works for non-scalar Dataset objects - # Scalar objects dont have a shape defined - if not object.compression and object.shape not in [None, ()]: - new_dataset = output_h5.create_dataset(name, data=object, compression=compression) - copy_attributes(object, new_dataset) - else: - output_h5.copy(object, name) - else: - raise NotImplementedError(f"Could not copy element {name}, " - f"type has not been implemented yet: {type(object)}") - - with H5File(input_path, 'r') as input_h5, H5File(output_path, 'w', userblock_size=512) as output_h5: - copy_attributes(input_h5, output_h5) - input_h5.visititems(partial(visit_path, output_h5, compression)) - - with open(input_path, "rb") as input_bytes: - # Mudata puts metadata like this in the first 512 bytes: - # MuData (format-version=0.1.0;creator=muon;creator-version=0.2.0) - # See mudata/_core/io.py, read_h5mu() function - starting_metadata = input_bytes.read(100) - # The metadata is padded with extra null bytes up until 512 bytes - truncate_location = starting_metadata.find(b"\x00") - starting_metadata = starting_metadata[:truncate_location] - with open(output_path, "br+") as f: - nbytes = f.write(starting_metadata) - f.write(b"\0" * (512 - nbytes)) diff --git a/target/nextflow/compression/compress_h5mu/main.nf b/target/nextflow/compression/compress_h5mu/main.nf deleted file mode 100644 index 8254d02b797..00000000000 --- a/target/nextflow/compression/compress_h5mu/main.nf +++ /dev/null @@ -1,2596 +0,0 @@ -// compress_h5mu 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Dries Schaumont (maintainer) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "compress_h5mu", - "namespace" : "compression", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Dries Schaumont", - "roles" : [ - "maintainer" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "dries@data-intuitive.com", - "github" : "DriesSchaumont", - "orcid" : "0000-0002-4389-0440", - "linkedin" : "dries-schaumont" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Scientist" - } - ] - } - } - ], - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "alternatives" : [ - "-i" - ], - "description" : "Path to the input .h5mu.", - "example" : [ - "sample_path" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--output", - "description" : "location of output file.", - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--compression", - "description" : "Compression type.", - "default" : [ - "gzip" - ], - "required" : false, - "choices" : [ - "lzf", - "gzip" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/compression/compress_h5mu/" - }, - { - "type" : "file", - "path" : "../../utils/compress_h5mu.py", - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/compression/compress_h5mu/" - } - ], - "description" : "Compress a MuData file. \n", - "test_resources" : [ - { - "type" : "python_script", - "path" : "run_test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/compression/compress_h5mu/" - }, - { - "type" : "file", - "path" : "resources_test/concat_test_data/e18_mouse_brain_fresh_5k_filtered_feature_bc_matrix_subset_unique_obs.h5mu", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "python:3.10-slim", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "procps" - ], - "interactive" : false - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "mudata~=0.2.3", - "anndata~=0.9.1" - ], - "upgrade" : true - } - ], - "test_setup" : [ - { - "type" : "python", - "user" : false, - "packages" : [ - "viashpy==0.5.0" - ], - "upgrade" : true - } - ] - }, - { - "type" : "native", - "id" : "native" - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "singlecpu", - "lowmem" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/compression/compress_h5mu/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/compression/compress_h5mu", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -import sys -### VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'compression': $( if [ ! -z ${VIASH_PAR_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -### VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND compress_h5mu -# reason: resources aren't available when using Nextflow fusion -# from compress_h5mu import compress_h5mu -from h5py import File as H5File -from h5py import Group, Dataset -from pathlib import Path -from typing import Union, Literal -from functools import partial - - -def compress_h5mu(input_path: Union[str, Path], - output_path: Union[str, Path], - compression: Union[Literal['gzip'], Literal['lzf']]): - input_path, output_path = str(input_path), str(output_path) - - def copy_attributes(in_object, out_object): - for key, value in in_object.attrs.items(): - out_object.attrs[key] = value - - def visit_path(output_h5: H5File, - compression: Union[Literal['gzip'], Literal['lzf']], - name: str, object: Union[Group, Dataset]): - if isinstance(object, Group): - new_group = output_h5.create_group(name) - copy_attributes(object, new_group) - elif isinstance(object, Dataset): - # Compression only works for non-scalar Dataset objects - # Scalar objects dont have a shape defined - if not object.compression and object.shape not in [None, ()]: - new_dataset = output_h5.create_dataset(name, data=object, compression=compression) - copy_attributes(object, new_dataset) - else: - output_h5.copy(object, name) - else: - raise NotImplementedError(f"Could not copy element {name}, " - f"type has not been implemented yet: {type(object)}") - - with H5File(input_path, 'r') as input_h5, H5File(output_path, 'w', userblock_size=512) as output_h5: - copy_attributes(input_h5, output_h5) - input_h5.visititems(partial(visit_path, output_h5, compression)) - - with open(input_path, "rb") as input_bytes: - # Mudata puts metadata like this in the first 512 bytes: - # MuData (format-version=0.1.0;creator=muon;creator-version=0.2.0) - # See mudata/_core/io.py, read_h5mu() function - starting_metadata = input_bytes.read(100) - # The metadata is padded with extra null bytes up until 512 bytes - truncate_location = starting_metadata.find(b"\\\\x00") - starting_metadata = starting_metadata[:truncate_location] - with open(output_path, "br+") as f: - nbytes = f.write(starting_metadata) - f.write(b"\\\\0" * (512 - nbytes)) -# END TEMPORARY WORKAROUND compress_h5mu - -if __name__ == "__main__": - compress_h5mu(par["input"], par["output"], compression=par["compression"]) -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/compression_compress_h5mu", - "tag" : "0.12.0" - }, - "label" : [ - "singlecpu", - "lowmem" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/compression/compress_h5mu/nextflow.config b/target/nextflow/compression/compress_h5mu/nextflow.config deleted file mode 100644 index d5ef2529754..00000000000 --- a/target/nextflow/compression/compress_h5mu/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'compress_h5mu' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Compress a MuData file. \n' - author = 'Dries Schaumont' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/compression/compress_h5mu/nextflow_params.yaml b/target/nextflow/compression/compress_h5mu/nextflow_params.yaml deleted file mode 100644 index 740452fec8b..00000000000 --- a/target/nextflow/compression/compress_h5mu/nextflow_params.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# Arguments -input: # please fill in - example: "sample_path" -# output: "$id.$key.output.output" -compression: "gzip" - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/compression/compress_h5mu/nextflow_schema.json b/target/nextflow/compression/compress_h5mu/nextflow_schema.json deleted file mode 100644 index 6f15cbff98d..00000000000 --- a/target/nextflow/compression/compress_h5mu/nextflow_schema.json +++ /dev/null @@ -1,67 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "compress_h5mu", - "description": "Compress a MuData file. \n", - "type": "object", - "definitions": { - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: `file`, required, example: `sample_path`. Path to the input ", - "help_text": "Type: `file`, required, example: `sample_path`. Path to the input .h5mu." - }, - - "output": { - "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.output`. location of output file", - "help_text": "Type: `file`, required, default: `$id.$key.output.output`. location of output file.", - "default": "$id.$key.output.output" - }, - - "compression": { - "type": "string", - "description": "Type: `string`, default: `gzip`, choices: ``lzf`, `gzip``. Compression type", - "help_text": "Type: `string`, default: `gzip`, choices: ``lzf`, `gzip``. Compression type.", - "enum": ["lzf", "gzip"] - , - "default": "gzip" - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/convert/from_10xh5_to_h5mu/.config.vsh.yaml b/target/nextflow/convert/from_10xh5_to_h5mu/.config.vsh.yaml deleted file mode 100644 index b951eb52bcc..00000000000 --- a/target/nextflow/convert/from_10xh5_to_h5mu/.config.vsh.yaml +++ /dev/null @@ -1,272 +0,0 @@ -functionality: - name: "from_10xh5_to_h5mu" - namespace: "convert" - version: "0.12.3" - authors: - - name: "Robrecht Cannoodt" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - argument_groups: - - name: "Inputs" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "A 10x h5 file as generated by Cell Ranger." - info: null - example: - - "raw_feature_bc_matrix.h5" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--input_metrics_summary" - description: "A metrics summary csv file as generated by Cell Ranger." - info: null - example: - - "metrics_cellranger.h5" - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Outputs" - arguments: - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output h5mu file." - info: - slots: - mod: - - name: "rna" - required: true - description: "Gene expression counts." - slots: - var: - - name: "gene_symbol" - type: "string" - description: "Identification of the gene." - required: true - - name: "feature_types" - type: "string" - description: "The full name of the modality." - required: true - - name: "genome" - type: "string" - description: "Reference that was used to generate the data." - required: true - - name: "prot" - required: false - description: "Protein abundancy" - slots: - var: - - name: "gene_symbol" - type: "string" - description: "Identification of the gene." - required: true - - name: "feature_types" - type: "string" - description: "The full name of the modality." - required: true - - name: "genome" - type: "string" - description: "Reference that was used to generate the data." - required: true - - name: "vdj" - required: false - description: "VDJ transcript counts" - slots: - var: - - name: "gene_symbol" - type: "string" - description: "Identification of the gene." - required: true - - name: "feature_types" - type: "string" - description: "The full name of the modality." - required: true - - name: "genome" - type: "string" - description: "Reference that was used to generate the data." - required: true - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--uns_metrics" - description: "Name of the .uns slot under which to QC metrics (if any)." - info: null - default: - - "metrics_cellranger" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Arguments" - arguments: - - type: "integer" - name: "--min_genes" - description: "Minimum number of counts required for a cell to pass filtering." - info: null - example: - - 100 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--min_counts" - description: "Minimum number of genes expressed required for a cell to pass\ - \ filtering." - info: null - example: - - 1000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Converts a 10x h5 into an h5mu file.\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "scanpy~=1.9.5" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "lowmem" - - "singlecpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/convert/from_10xh5_to_h5mu/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/convert/from_10xh5_to_h5mu" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/convert/from_10xh5_to_h5mu/from_10xh5_to_h5mu" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/convert/from_10xh5_to_h5mu/main.nf b/target/nextflow/convert/from_10xh5_to_h5mu/main.nf deleted file mode 100644 index 7cbe56f7886..00000000000 --- a/target/nextflow/convert/from_10xh5_to_h5mu/main.nf +++ /dev/null @@ -1,2767 +0,0 @@ -// from_10xh5_to_h5mu 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Robrecht Cannoodt (maintainer) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "from_10xh5_to_h5mu", - "namespace" : "convert", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Robrecht Cannoodt", - "roles" : [ - "maintainer" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "robrecht@data-intuitive.com", - "github" : "rcannood", - "orcid" : "0000-0003-3641-729X", - "linkedin" : "robrechtcannoodt" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Science Engineer" - }, - { - "name" : "Open Problems", - "href" : "https://openproblems.bio", - "role" : "Core Member" - } - ] - } - } - ], - "argument_groups" : [ - { - "name" : "Inputs", - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "alternatives" : [ - "-i" - ], - "description" : "A 10x h5 file as generated by Cell Ranger.", - "example" : [ - "raw_feature_bc_matrix.h5" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--input_metrics_summary", - "description" : "A metrics summary csv file as generated by Cell Ranger.", - "example" : [ - "metrics_cellranger.h5" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Outputs", - "arguments" : [ - { - "type" : "file", - "name" : "--output", - "alternatives" : [ - "-o" - ], - "description" : "Output h5mu file.", - "info" : { - "slots" : { - "mod" : [ - { - "name" : "rna", - "required" : true, - "description" : "Gene expression counts.", - "slots" : { - "var" : [ - { - "name" : "gene_symbol", - "type" : "string", - "description" : "Identification of the gene.", - "required" : true - }, - { - "name" : "feature_types", - "type" : "string", - "description" : "The full name of the modality.", - "required" : true - }, - { - "name" : "genome", - "type" : "string", - "description" : "Reference that was used to generate the data.", - "required" : true - } - ] - } - }, - { - "name" : "prot", - "required" : false, - "description" : "Protein abundancy", - "slots" : { - "var" : [ - { - "name" : "gene_symbol", - "type" : "string", - "description" : "Identification of the gene.", - "required" : true - }, - { - "name" : "feature_types", - "type" : "string", - "description" : "The full name of the modality.", - "required" : true - }, - { - "name" : "genome", - "type" : "string", - "description" : "Reference that was used to generate the data.", - "required" : true - } - ] - } - }, - { - "name" : "vdj", - "required" : false, - "description" : "VDJ transcript counts", - "slots" : { - "var" : [ - { - "name" : "gene_symbol", - "type" : "string", - "description" : "Identification of the gene.", - "required" : true - }, - { - "name" : "feature_types", - "type" : "string", - "description" : "The full name of the modality.", - "required" : true - }, - { - "name" : "genome", - "type" : "string", - "description" : "Reference that was used to generate the data.", - "required" : true - } - ] - } - } - ] - } - }, - "example" : [ - "output.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_compression", - "example" : [ - "gzip" - ], - "required" : false, - "choices" : [ - "gzip", - "lzf" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--uns_metrics", - "description" : "Name of the .uns slot under which to QC metrics (if any).", - "default" : [ - "metrics_cellranger" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Arguments", - "arguments" : [ - { - "type" : "integer", - "name" : "--min_genes", - "description" : "Minimum number of counts required for a cell to pass filtering.", - "example" : [ - 100 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--min_counts", - "description" : "Minimum number of genes expressed required for a cell to pass filtering.", - "example" : [ - 1000 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/convert/from_10xh5_to_h5mu/" - }, - { - "type" : "file", - "path" : "src/utils/setup_logger.py", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "description" : "Converts a 10x h5 into an h5mu file.\n", - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/convert/from_10xh5_to_h5mu/" - }, - { - "type" : "file", - "path" : "resources_test/pbmc_1k_protein_v3", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "python:3.10-slim", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "procps" - ], - "interactive" : false - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "mudata~=0.2.3", - "anndata~=0.9.1", - "scanpy~=1.9.5" - ], - "upgrade" : true - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "lowmem", - "singlecpu" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/convert/from_10xh5_to_h5mu/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/convert/from_10xh5_to_h5mu", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -import mudata -import scanpy as sc -import sys -import pandas as pd - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'input_metrics_summary': $( if [ ! -z ${VIASH_PAR_INPUT_METRICS_SUMMARY+x} ]; then echo "r'${VIASH_PAR_INPUT_METRICS_SUMMARY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'uns_metrics': $( if [ ! -z ${VIASH_PAR_UNS_METRICS+x} ]; then echo "r'${VIASH_PAR_UNS_METRICS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'min_genes': $( if [ ! -z ${VIASH_PAR_MIN_GENES+x} ]; then echo "int(r'${VIASH_PAR_MIN_GENES//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'min_counts': $( if [ ! -z ${VIASH_PAR_MIN_COUNTS+x} ]; then echo "int(r'${VIASH_PAR_MIN_COUNTS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -## VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -logger.info("Reading %s.", par["input"]) -adata = sc.read_10x_h5(par["input"], gex_only=False) - -# set the gene ids as var_names -logger.info("Renaming var columns") -adata.var = adata.var\\\\ - .rename_axis("gene_symbol")\\\\ - .reset_index()\\\\ - .set_index("gene_ids") - -# parse metrics summary file and store in .obsm or .obs -if par["input_metrics_summary"] and par["uns_metrics"]: - logger.info("Reading metrics summary file '%s'", par['input_metrics_summary']) - - def read_percentage(val): - try: - return float(val.strip('%')) / 100 - except AttributeError: - return val - - metrics_summary = pd.read_csv(par["input_metrics_summary"], decimal=".", quotechar='"', thousands=",").applymap(read_percentage) - - logger.info("Storing metrics summary in .uns['%s']", par['uns_metrics']) - adata.uns[par["uns_metrics"]] = metrics_summary -else: - is_none = "input_metrics_summary" if not par["input_metrics_summary"] else "uns_metrics" - logger.info("Not storing metrics summary because par['%s'] is None", is_none) - -# might perform basic filtering to get rid of some data -# applicable when starting from the raw counts -if par["min_genes"]: - logger.info("Filtering with min_genes=%d", par['min_genes']) - sc.pp.filter_cells(adata, min_genes=par["min_genes"]) - -if par["min_counts"]: - logger.info("Filtering with min_counts=%d", par['min_counts']) - sc.pp.filter_cells(adata, min_counts=par["min_counts"]) - -# generate output -logger.info("Convert to mudata") -mdata = mudata.MuData(adata) - -# override root .obs -mdata.obs = adata.obs - -# write output -logger.info("Writing %s", par["output"]) -mdata.write_h5mu(par["output"], compression=par["output_compression"]) -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/convert_from_10xh5_to_h5mu", - "tag" : "0.12.0" - }, - "label" : [ - "lowmem", - "singlecpu" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/convert/from_10xh5_to_h5mu/nextflow.config b/target/nextflow/convert/from_10xh5_to_h5mu/nextflow.config deleted file mode 100644 index 8339a73c46f..00000000000 --- a/target/nextflow/convert/from_10xh5_to_h5mu/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'from_10xh5_to_h5mu' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Converts a 10x h5 into an h5mu file.\n' - author = 'Robrecht Cannoodt' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/convert/from_10xh5_to_h5mu/nextflow_params.yaml b/target/nextflow/convert/from_10xh5_to_h5mu/nextflow_params.yaml deleted file mode 100644 index cd471c4544c..00000000000 --- a/target/nextflow/convert/from_10xh5_to_h5mu/nextflow_params.yaml +++ /dev/null @@ -1,16 +0,0 @@ -# Inputs -input: # please fill in - example: "raw_feature_bc_matrix.h5" -# input_metrics_summary: "metrics_cellranger.h5" - -# Outputs -# output: "$id.$key.output.h5mu" -# output_compression: "gzip" -uns_metrics: "metrics_cellranger" - -# Arguments -# min_genes: 100 -# min_counts: 1000 - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/convert/from_10xh5_to_h5mu/nextflow_schema.json b/target/nextflow/convert/from_10xh5_to_h5mu/nextflow_schema.json deleted file mode 100644 index 394644d23fa..00000000000 --- a/target/nextflow/convert/from_10xh5_to_h5mu/nextflow_schema.json +++ /dev/null @@ -1,113 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "from_10xh5_to_h5mu", - "description": "Converts a 10x h5 into an h5mu file.\n", - "type": "object", - "definitions": { - "inputs" : { - "title": "Inputs", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: `file`, required, example: `raw_feature_bc_matrix.h5`. A 10x h5 file as generated by Cell Ranger", - "help_text": "Type: `file`, required, example: `raw_feature_bc_matrix.h5`. A 10x h5 file as generated by Cell Ranger." - }, - - "input_metrics_summary": { - "type": "string", - "description": "Type: `file`, example: `metrics_cellranger.h5`. A metrics summary csv file as generated by Cell Ranger", - "help_text": "Type: `file`, example: `metrics_cellranger.h5`. A metrics summary csv file as generated by Cell Ranger." - } - - } - }, - "outputs" : { - "title": "Outputs", - "type": "object", - "description": "No description", - "properties": { - - "output": { - "type": "string", - "description": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file", - "help_text": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file.", - "default": "$id.$key.output.h5mu" - }, - - "output_compression": { - "type": "string", - "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. ", - "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. ", - "enum": ["gzip", "lzf"] - - }, - - "uns_metrics": { - "type": "string", - "description": "Type: `string`, default: `metrics_cellranger`. Name of the ", - "help_text": "Type: `string`, default: `metrics_cellranger`. Name of the .uns slot under which to QC metrics (if any).", - "default": "metrics_cellranger" - } - - } - }, - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "min_genes": { - "type": "integer", - "description": "Type: `integer`, example: `100`. Minimum number of counts required for a cell to pass filtering", - "help_text": "Type: `integer`, example: `100`. Minimum number of counts required for a cell to pass filtering." - }, - - "min_counts": { - "type": "integer", - "description": "Type: `integer`, example: `1000`. Minimum number of genes expressed required for a cell to pass filtering", - "help_text": "Type: `integer`, example: `1000`. Minimum number of genes expressed required for a cell to pass filtering." - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/inputs" - }, - { - "$ref": "#/definitions/outputs" - }, - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/convert/from_10xh5_to_h5mu/setup_logger.py b/target/nextflow/convert/from_10xh5_to_h5mu/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/nextflow/convert/from_10xh5_to_h5mu/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/nextflow/convert/from_10xmtx_to_h5mu/.config.vsh.yaml b/target/nextflow/convert/from_10xmtx_to_h5mu/.config.vsh.yaml deleted file mode 100644 index 86116d80b03..00000000000 --- a/target/nextflow/convert/from_10xmtx_to_h5mu/.config.vsh.yaml +++ /dev/null @@ -1,166 +0,0 @@ -functionality: - name: "from_10xmtx_to_h5mu" - namespace: "convert" - version: "0.12.3" - authors: - - name: "Robrecht Cannoodt" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input mtx folder" - info: null - example: - - "input_dir_containing_gz_files" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output h5mu file." - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Converts a 10x mtx into an h5mu file.\n" - test_resources: - - type: "python_script" - path: "run_test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.8-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "scanpy~=1.9.5" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "lowmem" - - "singlecpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/convert/from_10xmtx_to_h5mu/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/convert/from_10xmtx_to_h5mu" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/convert/from_10xmtx_to_h5mu/from_10xmtx_to_h5mu" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/convert/from_10xmtx_to_h5mu/main.nf b/target/nextflow/convert/from_10xmtx_to_h5mu/main.nf deleted file mode 100644 index 523ad8fcdf0..00000000000 --- a/target/nextflow/convert/from_10xmtx_to_h5mu/main.nf +++ /dev/null @@ -1,2577 +0,0 @@ -// from_10xmtx_to_h5mu 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Robrecht Cannoodt (maintainer) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "from_10xmtx_to_h5mu", - "namespace" : "convert", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Robrecht Cannoodt", - "roles" : [ - "maintainer" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "robrecht@data-intuitive.com", - "github" : "rcannood", - "orcid" : "0000-0003-3641-729X", - "linkedin" : "robrechtcannoodt" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Science Engineer" - }, - { - "name" : "Open Problems", - "href" : "https://openproblems.bio", - "role" : "Core Member" - } - ] - } - } - ], - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "alternatives" : [ - "-i" - ], - "description" : "Input mtx folder", - "example" : [ - "input_dir_containing_gz_files" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--output", - "alternatives" : [ - "-o" - ], - "description" : "Output h5mu file.", - "example" : [ - "output.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_compression", - "example" : [ - "gzip" - ], - "required" : false, - "choices" : [ - "gzip", - "lzf" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/convert/from_10xmtx_to_h5mu/" - }, - { - "type" : "file", - "path" : "src/utils/setup_logger.py", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "description" : "Converts a 10x mtx into an h5mu file.\n", - "test_resources" : [ - { - "type" : "python_script", - "path" : "run_test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/convert/from_10xmtx_to_h5mu/" - }, - { - "type" : "file", - "path" : "resources_test/pbmc_1k_protein_v3", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "python:3.8-slim", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "procps" - ], - "interactive" : false - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "mudata~=0.2.3", - "anndata~=0.9.1", - "scanpy~=1.9.5" - ], - "upgrade" : true - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "lowmem", - "singlecpu" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/convert/from_10xmtx_to_h5mu/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/convert/from_10xmtx_to_h5mu", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -import mudata as mu -import scanpy as sc -import sys - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -## VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -logger.info("Reading %s.", par["input"]) -adata = sc.read_10x_mtx(par["input"], gex_only=False) - -logger.info("Renaming keys.") -adata.var = adata.var\\\\ - .rename_axis("gene_symbol")\\\\ - .reset_index()\\\\ - .set_index("gene_ids") - -# generate output -logger.info("Convert to mudata") -mdata = mu.MuData(adata) - -# override root .obs -mdata.obs = adata.obs - -# write output -logger.info("Writing %s", par["output"]) -mdata.write_h5mu(par["output"], compression=par["output_compression"]) -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/convert_from_10xmtx_to_h5mu", - "tag" : "0.12.0" - }, - "label" : [ - "lowmem", - "singlecpu" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/convert/from_10xmtx_to_h5mu/nextflow.config b/target/nextflow/convert/from_10xmtx_to_h5mu/nextflow.config deleted file mode 100644 index 0e1499d8295..00000000000 --- a/target/nextflow/convert/from_10xmtx_to_h5mu/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'from_10xmtx_to_h5mu' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Converts a 10x mtx into an h5mu file.\n' - author = 'Robrecht Cannoodt' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/convert/from_10xmtx_to_h5mu/nextflow_params.yaml b/target/nextflow/convert/from_10xmtx_to_h5mu/nextflow_params.yaml deleted file mode 100644 index 8087527e8a2..00000000000 --- a/target/nextflow/convert/from_10xmtx_to_h5mu/nextflow_params.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# Arguments -input: # please fill in - example: "input_dir_containing_gz_files" -# output: "$id.$key.output.h5mu" -# output_compression: "gzip" - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/convert/from_10xmtx_to_h5mu/nextflow_schema.json b/target/nextflow/convert/from_10xmtx_to_h5mu/nextflow_schema.json deleted file mode 100644 index 374aff5dd93..00000000000 --- a/target/nextflow/convert/from_10xmtx_to_h5mu/nextflow_schema.json +++ /dev/null @@ -1,66 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "from_10xmtx_to_h5mu", - "description": "Converts a 10x mtx into an h5mu file.\n", - "type": "object", - "definitions": { - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: `file`, required, example: `input_dir_containing_gz_files`. Input mtx folder", - "help_text": "Type: `file`, required, example: `input_dir_containing_gz_files`. Input mtx folder" - }, - - "output": { - "type": "string", - "description": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file", - "help_text": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file.", - "default": "$id.$key.output.h5mu" - }, - - "output_compression": { - "type": "string", - "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. ", - "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. ", - "enum": ["gzip", "lzf"] - - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/convert/from_10xmtx_to_h5mu/setup_logger.py b/target/nextflow/convert/from_10xmtx_to_h5mu/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/nextflow/convert/from_10xmtx_to_h5mu/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/nextflow/convert/from_bd_to_10x_molecular_barcode_tags/.config.vsh.yaml b/target/nextflow/convert/from_bd_to_10x_molecular_barcode_tags/.config.vsh.yaml deleted file mode 100644 index 05904731e0e..00000000000 --- a/target/nextflow/convert/from_bd_to_10x_molecular_barcode_tags/.config.vsh.yaml +++ /dev/null @@ -1,159 +0,0 @@ -functionality: - name: "from_bd_to_10x_molecular_barcode_tags" - namespace: "convert" - version: "0.12.3" - authors: - - name: "Dries Schaumont" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input SAM or BAM file." - info: null - example: - - "input.bam" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output alignment file." - info: null - example: - - "output.sam" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--bam" - description: "Output a BAM file." - info: null - direction: "input" - dest: "par" - - type: "integer" - name: "--threads" - alternatives: - - "-t" - description: "Number of threads" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "bash_script" - path: "script.sh" - is_executable: true - description: "Convert the molecular barcode sequence SAM tag from BD format (MA)\ - \ to 10X format (UB).\n" - test_resources: - - type: "bash_script" - path: "run_test.sh" - is_executable: true - - type: "file" - path: "resources_test/bdrhap_5kjrt/processed/WTA.bd_rhapsody.output_raw" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "ubuntu:latest" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "samtools" - interactive: false - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "lowmem" - - "singlecpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/convert/from_bd_to_10x_molecular_barcode_tags/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/convert/from_bd_to_10x_molecular_barcode_tags" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/convert/from_bd_to_10x_molecular_barcode_tags/from_bd_to_10x_molecular_barcode_tags" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/convert/from_bd_to_10x_molecular_barcode_tags/main.nf b/target/nextflow/convert/from_bd_to_10x_molecular_barcode_tags/main.nf deleted file mode 100644 index 2b89e2d05fb..00000000000 --- a/target/nextflow/convert/from_bd_to_10x_molecular_barcode_tags/main.nf +++ /dev/null @@ -1,2586 +0,0 @@ -// from_bd_to_10x_molecular_barcode_tags 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Dries Schaumont (maintainer) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "from_bd_to_10x_molecular_barcode_tags", - "namespace" : "convert", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Dries Schaumont", - "roles" : [ - "maintainer" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "dries@data-intuitive.com", - "github" : "DriesSchaumont", - "orcid" : "0000-0002-4389-0440", - "linkedin" : "dries-schaumont" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Scientist" - } - ] - } - } - ], - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "alternatives" : [ - "-i" - ], - "description" : "Input SAM or BAM file.", - "example" : [ - "input.bam" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--output", - "alternatives" : [ - "-o" - ], - "description" : "Output alignment file.", - "example" : [ - "output.sam" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "boolean_true", - "name" : "--bam", - "description" : "Output a BAM file.", - "direction" : "input", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--threads", - "alternatives" : [ - "-t" - ], - "description" : "Number of threads", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ], - "resources" : [ - { - "type" : "bash_script", - "path" : "script.sh", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/convert/from_bd_to_10x_molecular_barcode_tags/" - } - ], - "description" : "Convert the molecular barcode sequence SAM tag from BD format (MA) to 10X format (UB).\n", - "test_resources" : [ - { - "type" : "bash_script", - "path" : "run_test.sh", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/convert/from_bd_to_10x_molecular_barcode_tags/" - }, - { - "type" : "file", - "path" : "resources_test/bdrhap_5kjrt/processed/WTA.bd_rhapsody.output_raw", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "ubuntu:latest", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "samtools" - ], - "interactive" : false - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "lowmem", - "singlecpu" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/convert/from_bd_to_10x_molecular_barcode_tags/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/convert/from_bd_to_10x_molecular_barcode_tags", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -## VIASH START -# The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) -$( if [ ! -z ${VIASH_PAR_BAM+x} ]; then echo "${VIASH_PAR_BAM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bam='&'#" ; else echo "# par_bam="; fi ) -$( if [ ! -z ${VIASH_PAR_THREADS+x} ]; then echo "${VIASH_PAR_THREADS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_threads='&'#" ; else echo "# par_threads="; fi ) -$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) -$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) -$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) -$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) -$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) -$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) - -## VIASH END -#!/bin/bash - -set -eo pipefail - -# Sam tags added by BD Rhapsody Pipeline -# From: https://www.bd.com/documents/guides/user-guides/GMX_BD-Rhapsody-genomics-informatics_UG_EN.pdf -# -# ========================================================================================= -# | | Definition | -# ========================================================================================= -# | CB | A number between 1 and 96 3 (884,736) representing a unique cell label sequence | -# | | (CB = 0 when no cell label sequence is detected) | -# ----------------------------------------------------------------------------------------- -# | MR | Raw molecular identifier sequence | -# ----------------------------------------------------------------------------------------- -# | MA | RSEC-adjusted molecular identifier sequence. If not a true cell, the raw UMI is | -# | | repeated in this tag. | -# ----------------------------------------------------------------------------------------- -# | PT | T if a poly(T) tail was found in the expected position on R1, or F if poly(T) | -# | | was not found | -# ----------------------------------------------------------------------------------------- -# | CN | Indicates if a sequence is derived from a putative cell, as determined by the | -# | | cell label filtering algorithm (T: putative cell; x: invalid cell label or noise | -# | | cell) Note: You can distinguish between an invalid cell label and a noise cell | -# | | with the CB tag (invalid cell labels are 0). | -# ----------------------------------------------------------------------------------------- -# | ST | The value is 1-12, indicating the Sample Tag of the called putative cell, or M | -# | | for multiplet, or x for undetermined. | -# ========================================================================================= - - -# SAM tags added by 10X -# https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/output/bam -# ========================================================================================= -# | | Definition | -# ========================================================================================= -# | CB | Chromium cellular barcode sequence that is error-corrected and confirmed against | -# | | a list of known-good barcode sequences. For multiplex Fixed RNA Profiling, the | -# | | cellular barcode is a combination of the 10x GEM Barcode and Probe Barcode | -# | | sequences. | -# ----------------------------------------------------------------------------------------- -# | CR | Chromium cellular barcode sequence as reported by the sequencer. For multiplex | -# | | Fixed RNA Profiling, the cellular barcode is a combination of the 10x GEM | -# | | Barcode and Probe Barcode sequences. | -# ----------------------------------------------------------------------------------------- -# | CY | Chromium cellular barcode read quality. For multiplex Fixed RNA Profiling, the | -# | | cellular barcode is a combination of the 10x GEM Barcode and Probe Barcode | -# | | sequences. Phred scores as reported by sequencer. | -# ----------------------------------------------------------------------------------------- -# | UB | Chromium molecular barcode sequence that is error-corrected among other | -# | | molecular barcodes with the same cellular barcode and gene alignment. | -# ----------------------------------------------------------------------------------------- -# | UR | Chromium molecular barcode sequence as reported by the sequencer. | -# ----------------------------------------------------------------------------------------- -# | UY | Chromium molecular barcode read quality. Phred scores as reported by sequencer. | -# ----------------------------------------------------------------------------------------- -# | TR | Trimmed sequence. For the Single Cell 3' v1 chemistry, this is trailing sequence | -# | | following the UMI on Read 2. For the Single Cell 3' v2 chemistry, this is | -# | | trailing sequence following the cell and molecular barcodes on Read 1. | -# ========================================================================================= - -extra_params=() - -if [ "\\$par_bam" == "true" ]; then - extra_params+=("--bam") -fi - -cat \\\\ - <(samtools view -SH "\\$par_input") \\\\ - <(samtools view "\\$par_input" | grep "MA:Z:*" | sed "s/MA:Z:/UB:Z:/" ) | \\\\ -samtools view -Sh "\\${extra_params[@]}" -@"\\$par_threads" - > "\\$par_output" -VIASHMAIN -bash "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/convert_from_bd_to_10x_molecular_barcode_tags", - "tag" : "0.12.0" - }, - "label" : [ - "lowmem", - "singlecpu" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/convert/from_bd_to_10x_molecular_barcode_tags/nextflow.config b/target/nextflow/convert/from_bd_to_10x_molecular_barcode_tags/nextflow.config deleted file mode 100644 index 0c032b2e72f..00000000000 --- a/target/nextflow/convert/from_bd_to_10x_molecular_barcode_tags/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'from_bd_to_10x_molecular_barcode_tags' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Convert the molecular barcode sequence SAM tag from BD format (MA) to 10X format (UB).\n' - author = 'Dries Schaumont' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/convert/from_bd_to_10x_molecular_barcode_tags/nextflow_params.yaml b/target/nextflow/convert/from_bd_to_10x_molecular_barcode_tags/nextflow_params.yaml deleted file mode 100644 index 547450d3a74..00000000000 --- a/target/nextflow/convert/from_bd_to_10x_molecular_barcode_tags/nextflow_params.yaml +++ /dev/null @@ -1,9 +0,0 @@ -# Arguments -input: # please fill in - example: "input.bam" -# output: "$id.$key.output.sam" -bam: false -# threads: 123 - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/convert/from_bd_to_10x_molecular_barcode_tags/nextflow_schema.json b/target/nextflow/convert/from_bd_to_10x_molecular_barcode_tags/nextflow_schema.json deleted file mode 100644 index 76c9c72277d..00000000000 --- a/target/nextflow/convert/from_bd_to_10x_molecular_barcode_tags/nextflow_schema.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "from_bd_to_10x_molecular_barcode_tags", - "description": "Convert the molecular barcode sequence SAM tag from BD format (MA) to 10X format (UB).\n", - "type": "object", - "definitions": { - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: `file`, required, example: `input.bam`. Input SAM or BAM file", - "help_text": "Type: `file`, required, example: `input.bam`. Input SAM or BAM file." - }, - - "output": { - "type": "string", - "description": "Type: `file`, default: `$id.$key.output.sam`, example: `output.sam`. Output alignment file", - "help_text": "Type: `file`, default: `$id.$key.output.sam`, example: `output.sam`. Output alignment file.", - "default": "$id.$key.output.sam" - }, - - "bam": { - "type": "boolean", - "description": "Type: `boolean_true`, default: `false`. Output a BAM file", - "help_text": "Type: `boolean_true`, default: `false`. Output a BAM file.", - "default": "False" - }, - - "threads": { - "type": "integer", - "description": "Type: `integer`. Number of threads", - "help_text": "Type: `integer`. Number of threads" - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/convert/from_bdrhap_to_h5mu/.config.vsh.yaml b/target/nextflow/convert/from_bdrhap_to_h5mu/.config.vsh.yaml deleted file mode 100644 index 2eb7adf87f1..00000000000 --- a/target/nextflow/convert/from_bdrhap_to_h5mu/.config.vsh.yaml +++ /dev/null @@ -1,181 +0,0 @@ -functionality: - name: "from_bdrhap_to_h5mu" - namespace: "convert" - version: "0.12.3" - authors: - - name: "Robrecht Cannoodt" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - argument_groups: - - name: "Inputs" - arguments: - - type: "string" - name: "--id" - description: "A sample ID." - info: null - example: - - "my_id" - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "The output of a BD Rhapsody workflow." - info: null - example: - - "input_dir" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Outputs" - arguments: - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output h5mu file." - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "r_script" - path: "script.R" - is_executable: true - description: "Convert the output of a BD Rhapsody WTA pipeline to a MuData h5 file.\n" - test_resources: - - type: "python_script" - path: "run_test.py" - is_executable: true - - type: "file" - path: "resources_test/bdrhap_5kjrt/processed/WTA.bd_rhapsody.output_raw" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "ghcr.io/data-intuitive/randpy:r4.2_py3.9" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "docker" - run: - - "apt update && apt upgrade -y" - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - upgrade: true - - type: "r" - cran: - - "anndata" - bioc_force_install: false - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "lowmem" - - "singlecpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/convert/from_bdrhap_to_h5mu/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/convert/from_bdrhap_to_h5mu" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/convert/from_bdrhap_to_h5mu/from_bdrhap_to_h5mu" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/convert/from_bdrhap_to_h5mu/main.nf b/target/nextflow/convert/from_bdrhap_to_h5mu/main.nf deleted file mode 100644 index 18e88385f54..00000000000 --- a/target/nextflow/convert/from_bdrhap_to_h5mu/main.nf +++ /dev/null @@ -1,2801 +0,0 @@ -// from_bdrhap_to_h5mu 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Robrecht Cannoodt (maintainer) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "from_bdrhap_to_h5mu", - "namespace" : "convert", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Robrecht Cannoodt", - "roles" : [ - "maintainer" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "robrecht@data-intuitive.com", - "github" : "rcannood", - "orcid" : "0000-0003-3641-729X", - "linkedin" : "robrechtcannoodt" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Science Engineer" - }, - { - "name" : "Open Problems", - "href" : "https://openproblems.bio", - "role" : "Core Member" - } - ] - } - } - ], - "argument_groups" : [ - { - "name" : "Inputs", - "arguments" : [ - { - "type" : "string", - "name" : "--id", - "description" : "A sample ID.", - "example" : [ - "my_id" - ], - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--input", - "alternatives" : [ - "-i" - ], - "description" : "The output of a BD Rhapsody workflow.", - "example" : [ - "input_dir" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Outputs", - "arguments" : [ - { - "type" : "file", - "name" : "--output", - "alternatives" : [ - "-o" - ], - "description" : "Output h5mu file.", - "example" : [ - "output.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_compression", - "example" : [ - "gzip" - ], - "required" : false, - "choices" : [ - "gzip", - "lzf" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - } - ], - "resources" : [ - { - "type" : "r_script", - "path" : "script.R", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/convert/from_bdrhap_to_h5mu/" - } - ], - "description" : "Convert the output of a BD Rhapsody WTA pipeline to a MuData h5 file.\n", - "test_resources" : [ - { - "type" : "python_script", - "path" : "run_test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/convert/from_bdrhap_to_h5mu/" - }, - { - "type" : "file", - "path" : "resources_test/bdrhap_5kjrt/processed/WTA.bd_rhapsody.output_raw", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "ghcr.io/data-intuitive/randpy:r4.2_py3.9", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "docker", - "run" : [ - "apt update && apt upgrade -y" - ] - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "mudata~=0.2.3", - "anndata~=0.9.1" - ], - "upgrade" : true - }, - { - "type" : "r", - "cran" : [ - "anndata" - ], - "bioc_force_install" : false - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "lowmem", - "singlecpu" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/convert/from_bdrhap_to_h5mu/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/convert/from_bdrhap_to_h5mu", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -cat("Loading libraries\\\\n") -options(tidyverse.quiet = TRUE) -library(tidyverse) -requireNamespace("anndata", quietly = TRUE) -requireNamespace("reticulate", quietly = TRUE) -library(assertthat) -mudata <- reticulate::import("mudata") - -## VIASH START -# The following code has been auto-generated by Viash. -# treat warnings as errors -.viash_orig_warn <- options(warn = 2) - -par <- list( - "id" = $( if [ ! -z ${VIASH_PAR_ID+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_ID" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), - "input" = $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_INPUT" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), - "output" = $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_OUTPUT" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), - "output_compression" = $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_OUTPUT_COMPRESSION" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ) -) -meta <- list( - "functionality_name" = $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo -n "'"; echo -n "$VIASH_META_FUNCTIONALITY_NAME" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), - "resources_dir" = $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo -n "'"; echo -n "$VIASH_META_RESOURCES_DIR" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), - "executable" = $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo -n "'"; echo -n "$VIASH_META_EXECUTABLE" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), - "config" = $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo -n "'"; echo -n "$VIASH_META_CONFIG" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), - "temp_dir" = $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo -n "'"; echo -n "$VIASH_META_TEMP_DIR" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), - "cpus" = $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo -n "as.integer('"; echo -n "$VIASH_META_CPUS" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), - "memory_b" = $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_B" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), - "memory_kb" = $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_KB" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), - "memory_mb" = $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_MB" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), - "memory_gb" = $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_GB" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), - "memory_tb" = $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_TB" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), - "memory_pb" = $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_PB" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ) -) - - -# restore original warn setting -options(.viash_orig_warn) -rm(.viash_orig_warn) - -## VIASH END - -read_metrics <- function(file) { - metric_lines <- readr::read_lines(file) - metric_lines_no_header <- metric_lines[!grepl("^##", metric_lines)] - - # parse sub data frames - group_title_regex <- "^#([^#]*)#" - group_title_ix <- grep(group_title_regex, metric_lines_no_header) - group_titles <- gsub(group_title_regex, "\\\\\\\\1", metric_lines_no_header[group_title_ix]) - group_ix_from <- group_title_ix+1 - group_ix_to <- c(group_title_ix[-1]-1, length(metric_lines_no_header)) - metric_dfs <- pmap( - list( - from = group_ix_from, - to = group_ix_to - ), - function(from, to) { - lines <- metric_lines_no_header[from:to] - lines <- lines[lines != ""] - readr::read_csv(paste0(lines, collapse = "\\\\n")) %>% - mutate(run_id = par\\$id) %>% - select(run_id, everything()) - } - ) - names(metric_dfs) <- gsub(" ", "_", tolower(group_titles)) - metric_dfs -} -cat("Reading in metric summaries\\\\n") -metrics_file <- list.files(par\\$input, pattern = "_Metrics_Summary.csv\\$", full.names = TRUE) -assert_that( - length(metrics_file) == 1, - msg = paste0("Exactly one *_Metrics_Summary.csv should be found, found ", length(metrics_file), " files instead.") -) -metric_dfs <- read_metrics(metrics_file) - -cat("Reading in count data\\\\n") -counts_file <- list.files(par\\$input, pattern = "_DBEC_MolsPerCell.csv\\$", full.names = TRUE) -if (length(counts_file) == 0) { - cat("Warning: could not find DBEC file, looking for RSEC file instead.\\\\n") - counts_file <- list.files(par\\$input, pattern = "_RSEC_MolsPerCell.csv\\$", full.names = TRUE) -} -assert_that( - length(counts_file) == 1, - msg = paste0("Exactly one *_(RSEC|DBEC)_MolsPerCell.csv should be found, found ", length(counts_file), " files instead.") -) -counts <- - readr::read_csv( - counts_file, - col_types = cols(.default = col_integer()), - comment = "#" - ) %>% - tibble::column_to_rownames("Cell_Index") %>% - as.matrix %>% - Matrix::Matrix(sparse = TRUE) - -# processing VDJ data -vdj_file <- list.files(par\\$input, pattern = "_VDJ_perCell.csv\\$", full.names = TRUE) -vdj_data <- - if (length(vdj_file) == 1) { - cat("Reading in VDJ data\\\\n") - readr::read_csv( - vdj_file, - comment = "#" - ) - } else { - NULL - } - -cat("Reading in VDJ metric summaries\\\\n") -vdj_metrics_file <- list.files(par\\$input, pattern = "_VDJ_metrics.csv\\$", full.names = TRUE) -vdj_metric_dfs <- - if (length(vdj_metrics_file) == 1) { - read_metrics(vdj_metrics_file) - } else { - list() - } - -# processing SMK data -smk_file <- list.files(par\\$input, pattern = "_Sample_Tag_Calls.csv\\$", full.names = TRUE) -smk_calls <- - if (length(smk_file) == 1) { - cat("Processing sample tags\\\\n") - readr::read_csv( - smk_file, - comment = "#" - ) - } else { - NULL - } -smk_metrics_file <- list.files(par\\$input, pattern = "_Sample_Tag_Metrics.csv\\$", full.names = TRUE) -smk_metrics <- - if (length(smk_metrics_file) == 1) { - readr::read_csv( - smk_metrics_file, - comment = "#" - ) - } else { - NULL - } - -cat("Constructing obs\\\\n") -library_id <- metric_dfs[["sequencing_quality"]]\\$Library -if (length(library_id) > 1) { - library_id <- paste(library_id[library_id != "Combined_stats"], collapse = " & ") -} - -obs <- tibble( - cell_id = rownames(counts), - run_id = par\\$id, - library_id = library_id -) - -if (!is.null(smk_calls)) { - obs <- left_join( - obs, - smk_calls %>% transmute( - cell_id = as.character(Cell_Index), - sample_tag = Sample_Tag, - sample_id = Sample_Name - ), - by = "cell_id" - ) -} else { - obs <- obs %>% mutate(sample_id = library_id) -} - -obs <- obs %>% - mutate(sample_id = ifelse(!is.na(sample_id), sample_id, run_id)) %>% - as.data.frame() %>% - column_to_rownames("cell_id") - -cat("Constructing var\\\\n") -# determine feature types of genes -var0 <- tryCatch({ - feature_types_file <- list.files(par\\$input, pattern = "feature_types.tsv\\$", full.names = TRUE) - - # abseq fasta reference has trailing info which apparently gets stripped off by the bd rhapsody pipeline - readr::read_tsv(feature_types_file) %>% - mutate( - trimmed_feature_id = gsub(" .*", "", feature_id), - i = match(feature_id, colnames(counts)), - j = match(trimmed_feature_id, colnames(counts)), - ij = ifelse(is.na(i), j, i), - final_feature_id = ifelse(!is.na(i), feature_id, trimmed_feature_id) - ) %>% - filter(!is.na(ij)) %>% - select(feature_id = final_feature_id, feature_type, reference_file) -}, error = function(e) { - cat("Feature matching error: ", e\\$message, "\\\\n", sep = "") - tibble( - feature_id = character() - ) -}) - -# in case the feature types are missing -missing_features <- tibble( - feature_id = setdiff(colnames(counts), var0\\$feature_id), - feature_type = "Gene Expression", - reference_file = NA_character_, - note = "Feature annotation file missing, assuming type is Gene Expression" -) - -var1 <- - if (nrow(missing_features) > 0) { - cat("Feature annotation file missing, assuming type is Gene Expression\\\\n") - bind_rows(var0, missing_features) %>% - slice(match(colnames(counts), feature_id)) - # Avoid nullable string columnns https://github.com/scverse/anndata/issues/679 - missing_features %>% mutate(across(reference_file, as.factor)) - } else { - var0 - } - -# create var -var <- var1 %>% - transmute(gene_ids = feature_id, gene_name = feature_id, feature_types = feature_type, reference_file) %>% - as.data.frame() %>% - column_to_rownames("gene_ids") - -cat("Constructing uns\\\\n") -names(metric_dfs) <- paste0("mapping_qc_", names(metric_dfs)) -smk_metric_dfs <- - if (!is.null(smk_metrics)) { - list(mapping_qc_smk_metrics = smk_metrics) - } else { - NULL - } -uns <- c(metric_dfs, smk_metric_dfs) - -cat("Constructing RNA (&ABC?) AnnData") -adata <- anndata::AnnData( - X = counts, - obs = obs, - var = var, - uns = uns -) - -adata_prot <- adata[, adata\\$var\\$feature_types == "Antibody Capture"] -if (ncol(adata_prot) == 0) { - adata_prot <- NULL -} -adata_rna <- adata[, adata\\$var\\$feature_types != "Antibody Capture"] - -adata_vdj <- - if (!is.null(vdj_data)) { - cat("Constructing VDJ AnnData\\\\n") - names(vdj_metric_dfs) <- paste0("mapping_qc_", names(vdj_metric_dfs)) - anndata::AnnData( - obs = vdj_data, - uns = vdj_metric_dfs, - shape = c(nrow(vdj_data), 0L) - ) - } else { - NULL - } - -cat("Constructing MuData object\\\\n") -modalities <- - list( - rna = adata_rna, - prot = adata_prot, - vdj = adata_vdj - ) -mdata <- mudata\\$MuData(modalities[!sapply(modalities, is.null)]) - -cat("Writing to h5mu file\\\\n") -mdata\\$write(par\\$output, compression=par\\$output_compression) -VIASHMAIN -Rscript "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/convert_from_bdrhap_to_h5mu", - "tag" : "0.12.0" - }, - "label" : [ - "lowmem", - "singlecpu" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/convert/from_bdrhap_to_h5mu/nextflow.config b/target/nextflow/convert/from_bdrhap_to_h5mu/nextflow.config deleted file mode 100644 index 17c07a7c158..00000000000 --- a/target/nextflow/convert/from_bdrhap_to_h5mu/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'from_bdrhap_to_h5mu' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Convert the output of a BD Rhapsody WTA pipeline to a MuData h5 file.\n' - author = 'Robrecht Cannoodt' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/convert/from_bdrhap_to_h5mu/nextflow_params.yaml b/target/nextflow/convert/from_bdrhap_to_h5mu/nextflow_params.yaml deleted file mode 100644 index 1ceae40b890..00000000000 --- a/target/nextflow/convert/from_bdrhap_to_h5mu/nextflow_params.yaml +++ /dev/null @@ -1,11 +0,0 @@ -# Inputs -id: # please fill in - example: "my_id" -input: # please fill in - example: "input_dir/" - -# Outputs -# output: "$id.$key.output.h5mu" -# output_compression: "gzip" - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/convert/from_bdrhap_to_h5mu/nextflow_schema.json b/target/nextflow/convert/from_bdrhap_to_h5mu/nextflow_schema.json deleted file mode 100644 index 54cf703bb73..00000000000 --- a/target/nextflow/convert/from_bdrhap_to_h5mu/nextflow_schema.json +++ /dev/null @@ -1,83 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "from_bdrhap_to_h5mu", - "description": "Convert the output of a BD Rhapsody WTA pipeline to a MuData h5 file.\n", - "type": "object", - "definitions": { - "inputs" : { - "title": "Inputs", - "type": "object", - "description": "No description", - "properties": { - - "id": { - "type": "string", - "description": "Type: `string`, required, example: `my_id`. A sample ID", - "help_text": "Type: `string`, required, example: `my_id`. A sample ID." - }, - - "input": { - "type": "string", - "description": "Type: `file`, required, example: `input_dir/`. The output of a BD Rhapsody workflow", - "help_text": "Type: `file`, required, example: `input_dir/`. The output of a BD Rhapsody workflow." - } - - } - }, - "outputs" : { - "title": "Outputs", - "type": "object", - "description": "No description", - "properties": { - - "output": { - "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file", - "help_text": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file.", - "default": "$id.$key.output.h5mu" - }, - - "output_compression": { - "type": "string", - "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. ", - "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. ", - "enum": ["gzip", "lzf"] - - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/inputs" - }, - { - "$ref": "#/definitions/outputs" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/convert/from_cellranger_multi_to_h5mu/.config.vsh.yaml b/target/nextflow/convert/from_cellranger_multi_to_h5mu/.config.vsh.yaml deleted file mode 100644 index 3637e858568..00000000000 --- a/target/nextflow/convert/from_cellranger_multi_to_h5mu/.config.vsh.yaml +++ /dev/null @@ -1,190 +0,0 @@ -functionality: - name: "from_cellranger_multi_to_h5mu" - namespace: "convert" - version: "0.12.3" - authors: - - name: "Dries Schaumont" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input folder. Must contain the output from a cellranger multi run." - info: null - example: - - "input_dir_containing_modalities" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output h5mu file." - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--uns_metrics" - description: "Name of the .uns slot under which to QC metrics (if any)." - info: null - default: - - "metrics_cellranger" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Converts the output from cellranger multi to a single .h5mu file.\n\ - By default, will map the following library type names to modality names:\n -\ - \ Gene Expression: rna\n - Peaks: atac\n - Antibody Capture: prot\n - VDJ:\ - \ vdj\n - VDJ-T: vdj_t\n - VDJ-B: vdj_b\n - CRISPR Guide Capture: crispr\n\ - \ - Multiplexing Capture: hashing\n\nOther library types have their whitepace\ - \ removed and dashes replaced by\nunderscores to generate the modality name.\n\ - \nCurrently does not allow parsing the output from cell barcode demultiplexing.\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/10x_5k_anticmv" - - type: "file" - path: "resources_test/10x_5k_lung_crispr" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "scanpy~=1.9.5" - - "scirpy~=0.11.1" - - "pandas~=2.0.0" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "lowmem" - - "singlecpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/convert/from_cellranger_multi_to_h5mu/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/convert/from_cellranger_multi_to_h5mu" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/convert/from_cellranger_multi_to_h5mu/from_cellranger_multi_to_h5mu" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/convert/from_cellranger_multi_to_h5mu/main.nf b/target/nextflow/convert/from_cellranger_multi_to_h5mu/main.nf deleted file mode 100644 index 51e2c6b7e1c..00000000000 --- a/target/nextflow/convert/from_cellranger_multi_to_h5mu/main.nf +++ /dev/null @@ -1,2744 +0,0 @@ -// from_cellranger_multi_to_h5mu 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Dries Schaumont (maintainer) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "from_cellranger_multi_to_h5mu", - "namespace" : "convert", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Dries Schaumont", - "roles" : [ - "maintainer" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "dries@data-intuitive.com", - "github" : "DriesSchaumont", - "orcid" : "0000-0002-4389-0440", - "linkedin" : "dries-schaumont" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Scientist" - } - ] - } - } - ], - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "alternatives" : [ - "-i" - ], - "description" : "Input folder. Must contain the output from a cellranger multi run.", - "example" : [ - "input_dir_containing_modalities" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--output", - "alternatives" : [ - "-o" - ], - "description" : "Output h5mu file.", - "example" : [ - "output.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_compression", - "example" : [ - "gzip" - ], - "required" : false, - "choices" : [ - "gzip", - "lzf" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--uns_metrics", - "description" : "Name of the .uns slot under which to QC metrics (if any).", - "default" : [ - "metrics_cellranger" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/convert/from_cellranger_multi_to_h5mu/" - }, - { - "type" : "file", - "path" : "src/utils/setup_logger.py", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "description" : "Converts the output from cellranger multi to a single .h5mu file.\nBy default, will map the following library type names to modality names:\n - Gene Expression: rna\n - Peaks: atac\n - Antibody Capture: prot\n - VDJ: vdj\n - VDJ-T: vdj_t\n - VDJ-B: vdj_b\n - CRISPR Guide Capture: crispr\n - Multiplexing Capture: hashing\n\nOther library types have their whitepace removed and dashes replaced by\nunderscores to generate the modality name.\n\nCurrently does not allow parsing the output from cell barcode demultiplexing.\n", - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/convert/from_cellranger_multi_to_h5mu/" - }, - { - "type" : "file", - "path" : "resources_test/10x_5k_anticmv", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - }, - { - "type" : "file", - "path" : "resources_test/10x_5k_lung_crispr", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "python:3.10-slim", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "procps" - ], - "interactive" : false - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "mudata~=0.2.3", - "anndata~=0.9.1", - "scanpy~=1.9.5", - "scirpy~=0.11.1", - "pandas~=2.0.0" - ], - "upgrade" : true - } - ], - "test_setup" : [ - { - "type" : "python", - "user" : false, - "packages" : [ - "viashpy==0.5.0" - ], - "upgrade" : true - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "lowmem", - "singlecpu" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/convert/from_cellranger_multi_to_h5mu/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/convert/from_cellranger_multi_to_h5mu", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -from pathlib import Path -import sys -import scanpy -import pandas as pd -import mudata -from scirpy.io import read_10x_vdj -from collections import defaultdict -from functools import partial - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'uns_metrics': $( if [ ! -z ${VIASH_PAR_UNS_METRICS+x} ]; then echo "r'${VIASH_PAR_UNS_METRICS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -## VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -POSSIBLE_LIBRARY_TYPES = ('vdj_t', 'vdj_b', 'vdj_t_gd', 'count') - -FEATURE_TYPES_NAMES = { - "Gene Expression": "rna", - "Peaks": "atac", - "Antibody Capture": "prot", - "VDJ": "vdj", - "VDJ-T": "vdj_t", - "VDJ-B": "vdj_b", - "CRISPR Guide Capture": "gdo", - "Multiplexing Capture": "hto" - } - -def gather_input_data(dir: Path): - if not dir.is_dir(): - raise ValueError("Specified input is not a directory.") - folder_contents = list(dir.iterdir()) - config = dir / 'config.csv' - if config not in folder_contents: - logger.warning('Config.csv not found in input directory, this folder might not be a valid cellranger multi output.') - - required_subfolders = [dir / subfolder_name for subfolder_name in ('multi', 'per_sample_outs')] - found_input = {key_: None for key_ in POSSIBLE_LIBRARY_TYPES + ('metrics_summary',)} - for required_subfolder in required_subfolders: - if not required_subfolder in folder_contents: - raise ValueError(f"Input folder must contain the subfolder {required_subfolder} please make " - "sure that the specified input folder is a valid cellranger multi output.") - - multi_dir = dir / 'multi' - for library_type in multi_dir.iterdir(): - if not library_type.is_dir(): - logger.warning("%s is not a directory. Contents of the multi folder " - "must be directories to be recognized as valid input data", - library_type) - continue - if library_type.name not in POSSIBLE_LIBRARY_TYPES: - raise ValueError(f"Contents of the 'multi' folder must be found one of the following: {','.join(POSSIBLE_LIBRARY_TYPES)}.") - - found_input[library_type.name] = library_type - - per_sample_outs_dir = dir / 'per_sample_outs' - for file_glob in ('*/metrics_summary.csv', '*/count/feature_reference.csv', - '*/count/crispr_analysis/perturbation_efficiencies_by_feature.csv', - '*/count/crispr_analysis/perturbation_efficiencies_by_target.csv'): - found_files = list(per_sample_outs_dir.glob(file_glob)) - if len(found_files) > 1: - raise ValueError(f"Found more than one file for glob '{file_glob}' file. " - "This component currently only supports parsing cellranger multi output for one sample.") - file_name = Path(file_glob).name.removesuffix('.csv') - found_input[file_name] = found_files[0] if found_files else None - - return found_input - - -def proces_perturbation(key_name: str, mudata: mudata.MuData, efficiency_file: Path): - assert 'gdo' in mudata.mod - eff_df = pd.read_csv(efficiency_file, index_col="Perturbation", sep=",", decimal=".", quotechar='"') - mudata.mod['gdo'].uns[key_name] = eff_df - return mudata - -def process_feature_reference(mudata: mudata.MuData, efficiency_file: Path): - df = pd.read_csv(efficiency_file, index_col="id", sep=",", decimal=".", quotechar='"') - assert 'feature_type' in df.columns, "Columns 'feature_type' should be present in features_reference file." - feature_types = df['feature_type'] - if set(feature_types) - set(FEATURE_TYPES_NAMES): - raise ValueError("Not all feature types present in the features_reference file are supported by this component.") - for feature_type in feature_types: - modality = FEATURE_TYPES_NAMES[feature_type] - subset_df = df.loc[df['feature_type'] == feature_type] - mudata.mod[modality].uns['feature_reference'] = subset_df - return mudata - -def process_counts(counts_folder: Path): - counts_matrix_file = counts_folder / "raw_feature_bc_matrix.h5" - logger.info("Reading %s.", counts_matrix_file) - adata = scanpy.read_10x_h5(counts_matrix_file, gex_only=False) - - # set the gene ids as var_names - logger.info("Renaming var columns") - adata.var = adata.var\\\\ - .rename_axis("gene_symbol")\\\\ - .reset_index()\\\\ - .set_index("gene_ids") - - # generate output - logger.info("Convert to mudata") - - def modality_name_factory(library_type): - return ("".join(library_type.replace("-", "_").split())).lower() - - feature_types = defaultdict(modality_name_factory, FEATURE_TYPES_NAMES) - return mudata.MuData(adata, feature_types_names=feature_types) - -def process_metrics_summary(mudata: mudata.MuData, metrics_file: Path): - def read_percentage(val): - try: - return float(val.strip('%')) / 100 - except (AttributeError, ValueError): - return val - - metrics_summary = pd.read_csv(metrics_file, - decimal=".", - quotechar='"', - thousands=",").applymap(read_percentage) - - mudata.uns[par["uns_metrics"]] = metrics_summary - for colname, coldata in metrics_summary.items(): - try: - new_column = coldata.astype(str, copy=True).astype({colname: "category"}) - metrics_summary[colname] = new_column - except (ValueError, TypeError): - logger.warning(f"Could not store column {colname} from metrics.") - pass - return mudata - -def process_vdj(mudata: mudata.MuData, vdj_folder_path: Path): - # https://scverse.org/scirpy/latest/generated/scirpy.io.read_10x_vdj.html#scirpy-io-read-10x-vdj - # According to docs, using the json is preferred as this file includes intron info. - all_config_json_file = vdj_folder_path / "all_contig_annotations.json" - vdj_anndata = read_10x_vdj(all_config_json_file) - vdj_type = vdj_folder_path.name - mudata.mod[vdj_type] = vdj_anndata - return mudata - -def get_modalities(input_data): - dispatcher = { - 'vdj_t': process_vdj, - 'vdj_b': process_vdj, - 'vdj_t_gd': process_vdj, - 'metrics_summary': process_metrics_summary, - 'feature_reference': process_feature_reference, - 'perturbation_efficiencies_by_feature': partial(proces_perturbation, 'perturbation_efficiencies_by_feature'), - 'perturbation_efficiencies_by_target': partial(proces_perturbation, 'perturbation_efficiencies_by_target'), - } - mudata_file = process_counts(input_data['count']) - for modality_name, modality_data_path in input_data.items(): - if modality_name == "count" or not modality_data_path: - continue - try: - parser_function = dispatcher[modality_name] - except KeyError as e: - raise ValueError("This component does not support the " - f"parsing of the '{modality_name}' yet.") from e - mudata_file = parser_function(mudata_file, modality_data_path) - return mudata_file - -def main(): - cellranger_multi_dir = Path(par["input"]) - input_data = gather_input_data(cellranger_multi_dir) - result = get_modalities(input_data) - logger.info("Writing %s", par["output"]) - result.write_h5mu(par["output"], compression=par["output_compression"]) - -if __name__ == "__main__": - main() -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/convert_from_cellranger_multi_to_h5mu", - "tag" : "0.12.0" - }, - "label" : [ - "lowmem", - "singlecpu" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/convert/from_cellranger_multi_to_h5mu/nextflow.config b/target/nextflow/convert/from_cellranger_multi_to_h5mu/nextflow.config deleted file mode 100644 index 3a4fb9b8e62..00000000000 --- a/target/nextflow/convert/from_cellranger_multi_to_h5mu/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'from_cellranger_multi_to_h5mu' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Converts the output from cellranger multi to a single .h5mu file.\nBy default, will map the following library type names to modality names:\n - Gene Expression: rna\n - Peaks: atac\n - Antibody Capture: prot\n - VDJ: vdj\n - VDJ-T: vdj_t\n - VDJ-B: vdj_b\n - CRISPR Guide Capture: crispr\n - Multiplexing Capture: hashing\n\nOther library types have their whitepace removed and dashes replaced by\nunderscores to generate the modality name.\n\nCurrently does not allow parsing the output from cell barcode demultiplexing.\n' - author = 'Dries Schaumont' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/convert/from_cellranger_multi_to_h5mu/nextflow_params.yaml b/target/nextflow/convert/from_cellranger_multi_to_h5mu/nextflow_params.yaml deleted file mode 100644 index c336d480ec8..00000000000 --- a/target/nextflow/convert/from_cellranger_multi_to_h5mu/nextflow_params.yaml +++ /dev/null @@ -1,9 +0,0 @@ -# Arguments -input: # please fill in - example: "input_dir_containing_modalities" -# output: "$id.$key.output.h5mu" -# output_compression: "gzip" -uns_metrics: "metrics_cellranger" - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/convert/from_cellranger_multi_to_h5mu/nextflow_schema.json b/target/nextflow/convert/from_cellranger_multi_to_h5mu/nextflow_schema.json deleted file mode 100644 index aea87733525..00000000000 --- a/target/nextflow/convert/from_cellranger_multi_to_h5mu/nextflow_schema.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "from_cellranger_multi_to_h5mu", - "description": "Converts the output from cellranger multi to a single .h5mu file.\nBy default, will map the following library type names to modality names:\n - Gene Expression: rna\n - Peaks: atac\n - Antibody Capture: prot\n - VDJ: vdj\n - VDJ-T: vdj_t\n - VDJ-B: vdj_b\n - CRISPR Guide Capture: crispr\n - Multiplexing Capture: hashing\n\nOther library types have their whitepace removed and dashes replaced by\nunderscores to generate the modality name.\n\nCurrently does not allow parsing the output from cell barcode demultiplexing.\n", - "type": "object", - "definitions": { - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: `file`, required, example: `input_dir_containing_modalities`. Input folder", - "help_text": "Type: `file`, required, example: `input_dir_containing_modalities`. Input folder. Must contain the output from a cellranger multi run." - }, - - "output": { - "type": "string", - "description": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file", - "help_text": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file.", - "default": "$id.$key.output.h5mu" - }, - - "output_compression": { - "type": "string", - "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. ", - "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. ", - "enum": ["gzip", "lzf"] - - }, - - "uns_metrics": { - "type": "string", - "description": "Type: `string`, default: `metrics_cellranger`. Name of the ", - "help_text": "Type: `string`, default: `metrics_cellranger`. Name of the .uns slot under which to QC metrics (if any).", - "default": "metrics_cellranger" - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/convert/from_cellranger_multi_to_h5mu/setup_logger.py b/target/nextflow/convert/from_cellranger_multi_to_h5mu/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/nextflow/convert/from_cellranger_multi_to_h5mu/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/nextflow/convert/from_h5ad_to_h5mu/.config.vsh.yaml b/target/nextflow/convert/from_h5ad_to_h5mu/.config.vsh.yaml deleted file mode 100644 index debbdefa32d..00000000000 --- a/target/nextflow/convert/from_h5ad_to_h5mu/.config.vsh.yaml +++ /dev/null @@ -1,177 +0,0 @@ -functionality: - name: "from_h5ad_to_h5mu" - namespace: "convert" - version: "0.12.3" - authors: - - name: "Dries De Maeyer" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "ddemaeyer@gmail.com" - github: "ddemaeyer" - linkedin: "dries-de-maeyer-b46a814" - organizations: - - name: "Janssen Pharmaceuticals" - href: "https://www.janssen.com" - role: "Principal Scientist" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input h5ad files" - info: null - default: - - "input.h5ad" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output MuData file." - info: null - default: - - "output.h5mu" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Converts a single layer h5ad file into a single MuData object\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.9-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "lowmem" - - "singlecpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/convert/from_h5ad_to_h5mu/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/convert/from_h5ad_to_h5mu" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/convert/from_h5ad_to_h5mu/from_h5ad_to_h5mu" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/convert/from_h5ad_to_h5mu/main.nf b/target/nextflow/convert/from_h5ad_to_h5mu/main.nf deleted file mode 100644 index db66231f3de..00000000000 --- a/target/nextflow/convert/from_h5ad_to_h5mu/main.nf +++ /dev/null @@ -1,2596 +0,0 @@ -// from_h5ad_to_h5mu 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Dries De Maeyer (maintainer) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "from_h5ad_to_h5mu", - "namespace" : "convert", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Dries De Maeyer", - "roles" : [ - "maintainer" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "ddemaeyer@gmail.com", - "github" : "ddemaeyer", - "linkedin" : "dries-de-maeyer-b46a814" - }, - "organizations" : [ - { - "name" : "Janssen Pharmaceuticals", - "href" : "https://www.janssen.com", - "role" : "Principal Scientist" - } - ] - } - } - ], - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "alternatives" : [ - "-i" - ], - "description" : "Input h5ad files", - "default" : [ - "input.h5ad" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--modality", - "default" : [ - "rna" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--output", - "alternatives" : [ - "-o" - ], - "description" : "Output MuData file.", - "default" : [ - "output.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_compression", - "example" : [ - "gzip" - ], - "required" : false, - "choices" : [ - "gzip", - "lzf" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/convert/from_h5ad_to_h5mu/" - }, - { - "type" : "file", - "path" : "src/utils/setup_logger.py", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "description" : "Converts a single layer h5ad file into a single MuData object\n", - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/convert/from_h5ad_to_h5mu/" - }, - { - "type" : "file", - "path" : "resources_test/pbmc_1k_protein_v3", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "python:3.9-slim", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "procps" - ], - "interactive" : false - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "mudata~=0.2.3", - "anndata~=0.9.1" - ], - "upgrade" : true - } - ], - "test_setup" : [ - { - "type" : "python", - "user" : false, - "packages" : [ - "viashpy==0.5.0" - ], - "upgrade" : true - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "lowmem", - "singlecpu" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/convert/from_h5ad_to_h5mu/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/convert/from_h5ad_to_h5mu", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -import mudata as mu -import anndata -import sys - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'.split(':')"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'.split(':')"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -## VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -assert len(par["input"]) == len(par["modality"]), "Number of input files should be the same length as the number of modalities" - -logger.info("Reading input files") -data = { key: anndata.read_h5ad(path) for key, path in zip(par["modality"], par["input"]) } - -try: - data.var_names_make_unique() -except: - pass - -logger.info("Converting to mudata") -mudata = mu.MuData(data) - -try: - mudata.var_names_make_unique() -except: - pass - -logger.info("Writing to %s.", par['output']) -mudata.write_h5mu(par["output"], compression=par["output_compression"]) - -logger.info("Finished") -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/convert_from_h5ad_to_h5mu", - "tag" : "0.12.0" - }, - "label" : [ - "lowmem", - "singlecpu" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/convert/from_h5ad_to_h5mu/nextflow.config b/target/nextflow/convert/from_h5ad_to_h5mu/nextflow.config deleted file mode 100644 index 80292de0c4a..00000000000 --- a/target/nextflow/convert/from_h5ad_to_h5mu/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'from_h5ad_to_h5mu' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Converts a single layer h5ad file into a single MuData object\n' - author = 'Dries De Maeyer' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/convert/from_h5ad_to_h5mu/nextflow_params.yaml b/target/nextflow/convert/from_h5ad_to_h5mu/nextflow_params.yaml deleted file mode 100644 index 56e4002b284..00000000000 --- a/target/nextflow/convert/from_h5ad_to_h5mu/nextflow_params.yaml +++ /dev/null @@ -1,9 +0,0 @@ -# Arguments -input: # please fill in - example: ["input.h5ad"] -modality: ["rna"] -# output: "$id.$key.output.h5mu" -# output_compression: "gzip" - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/convert/from_h5ad_to_h5mu/nextflow_schema.json b/target/nextflow/convert/from_h5ad_to_h5mu/nextflow_schema.json deleted file mode 100644 index 685e9bedaf3..00000000000 --- a/target/nextflow/convert/from_h5ad_to_h5mu/nextflow_schema.json +++ /dev/null @@ -1,74 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "from_h5ad_to_h5mu", - "description": "Converts a single layer h5ad file into a single MuData object\n", - "type": "object", - "definitions": { - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: List of `file`, required, default: `input.h5ad`, multiple_sep: `\":\"`. Input h5ad files", - "help_text": "Type: List of `file`, required, default: `input.h5ad`, multiple_sep: `\":\"`. Input h5ad files", - "default": "input.h5ad" - }, - - "modality": { - "type": "string", - "description": "Type: List of `string`, default: `rna`, multiple_sep: `\":\"`. ", - "help_text": "Type: List of `string`, default: `rna`, multiple_sep: `\":\"`. ", - "default": "rna" - }, - - "output": { - "type": "string", - "description": "Type: `file`, default: `$id.$key.output.h5mu`. Output MuData file", - "help_text": "Type: `file`, default: `$id.$key.output.h5mu`. Output MuData file.", - "default": "$id.$key.output.h5mu" - }, - - "output_compression": { - "type": "string", - "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. ", - "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. ", - "enum": ["gzip", "lzf"] - - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/convert/from_h5ad_to_h5mu/setup_logger.py b/target/nextflow/convert/from_h5ad_to_h5mu/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/nextflow/convert/from_h5ad_to_h5mu/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/nextflow/convert/from_h5mu_to_h5ad/.config.vsh.yaml b/target/nextflow/convert/from_h5mu_to_h5ad/.config.vsh.yaml deleted file mode 100644 index 4625071bb70..00000000000 --- a/target/nextflow/convert/from_h5mu_to_h5ad/.config.vsh.yaml +++ /dev/null @@ -1,182 +0,0 @@ -functionality: - name: "from_h5mu_to_h5ad" - namespace: "convert" - version: "0.12.3" - authors: - - name: "Robrecht Cannoodt" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input MuData file" - info: null - default: - - "input.h5mu" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output AnnData file." - info: null - default: - - "output.h5ad" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the final h5ad object." - info: null - default: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Converts a h5mu file into a h5ad file.\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.9-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "lowmem" - - "singlecpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/convert/from_h5mu_to_h5ad/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/convert/from_h5mu_to_h5ad" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/convert/from_h5mu_to_h5ad/from_h5mu_to_h5ad" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/convert/from_h5mu_to_h5ad/main.nf b/target/nextflow/convert/from_h5mu_to_h5ad/main.nf deleted file mode 100644 index f96ba6d0319..00000000000 --- a/target/nextflow/convert/from_h5mu_to_h5ad/main.nf +++ /dev/null @@ -1,2592 +0,0 @@ -// from_h5mu_to_h5ad 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Robrecht Cannoodt (maintainer) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "from_h5mu_to_h5ad", - "namespace" : "convert", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Robrecht Cannoodt", - "roles" : [ - "maintainer" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "robrecht@data-intuitive.com", - "github" : "rcannood", - "orcid" : "0000-0003-3641-729X", - "linkedin" : "robrechtcannoodt" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Science Engineer" - }, - { - "name" : "Open Problems", - "href" : "https://openproblems.bio", - "role" : "Core Member" - } - ] - } - } - ], - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "alternatives" : [ - "-i" - ], - "description" : "Input MuData file", - "default" : [ - "input.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--modality", - "default" : [ - "rna" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--output", - "alternatives" : [ - "-o" - ], - "description" : "Output AnnData file.", - "default" : [ - "output.h5ad" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_compression", - "description" : "The compression format to be used on the final h5ad object.", - "default" : [ - "gzip" - ], - "required" : false, - "choices" : [ - "gzip", - "lzf" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/convert/from_h5mu_to_h5ad/" - }, - { - "type" : "file", - "path" : "src/utils/setup_logger.py", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "description" : "Converts a h5mu file into a h5ad file.\n", - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/convert/from_h5mu_to_h5ad/" - }, - { - "type" : "file", - "path" : "resources_test/pbmc_1k_protein_v3", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "python:3.9-slim", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "procps" - ], - "interactive" : false - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "mudata~=0.2.3", - "anndata~=0.9.1" - ], - "upgrade" : true - } - ], - "test_setup" : [ - { - "type" : "python", - "user" : false, - "packages" : [ - "viashpy==0.5.0" - ], - "upgrade" : true - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "lowmem", - "singlecpu" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/convert/from_h5mu_to_h5ad/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/convert/from_h5mu_to_h5ad", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -import mudata as mu -import sys - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -## VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -# TODO: Merge modalities into one layer - -logger.info("Reading input h5mu file") -dat = mu.read_h5mu(par["input"]) - -logger.info("Converting to h5ad") -adat = dat.mod[par["modality"]] - -logger.info("Writing to %s.", par['output']) -adat.write_h5ad(par["output"], compression=par["output_compression"]) - -logger.info("Finished") -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/convert_from_h5mu_to_h5ad", - "tag" : "0.12.0" - }, - "label" : [ - "lowmem", - "singlecpu" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/convert/from_h5mu_to_h5ad/nextflow.config b/target/nextflow/convert/from_h5mu_to_h5ad/nextflow.config deleted file mode 100644 index 94c50f449cd..00000000000 --- a/target/nextflow/convert/from_h5mu_to_h5ad/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'from_h5mu_to_h5ad' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Converts a h5mu file into a h5ad file.\n' - author = 'Robrecht Cannoodt' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/convert/from_h5mu_to_h5ad/nextflow_params.yaml b/target/nextflow/convert/from_h5mu_to_h5ad/nextflow_params.yaml deleted file mode 100644 index 5a88b692a83..00000000000 --- a/target/nextflow/convert/from_h5mu_to_h5ad/nextflow_params.yaml +++ /dev/null @@ -1,9 +0,0 @@ -# Arguments -input: # please fill in - example: "input.h5mu" -modality: "rna" -# output: "$id.$key.output.h5ad" -output_compression: "gzip" - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/convert/from_h5mu_to_h5ad/nextflow_schema.json b/target/nextflow/convert/from_h5mu_to_h5ad/nextflow_schema.json deleted file mode 100644 index 06fe54649d8..00000000000 --- a/target/nextflow/convert/from_h5mu_to_h5ad/nextflow_schema.json +++ /dev/null @@ -1,75 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "from_h5mu_to_h5ad", - "description": "Converts a h5mu file into a h5ad file.\n", - "type": "object", - "definitions": { - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: `file`, required, default: `input.h5mu`. Input MuData file", - "help_text": "Type: `file`, required, default: `input.h5mu`. Input MuData file", - "default": "input.h5mu" - }, - - "modality": { - "type": "string", - "description": "Type: `string`, default: `rna`. ", - "help_text": "Type: `string`, default: `rna`. ", - "default": "rna" - }, - - "output": { - "type": "string", - "description": "Type: `file`, default: `$id.$key.output.h5ad`. Output AnnData file", - "help_text": "Type: `file`, default: `$id.$key.output.h5ad`. Output AnnData file.", - "default": "$id.$key.output.h5ad" - }, - - "output_compression": { - "type": "string", - "description": "Type: `string`, default: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the final h5ad object", - "help_text": "Type: `string`, default: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the final h5ad object.", - "enum": ["gzip", "lzf"] - , - "default": "gzip" - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/convert/from_h5mu_to_h5ad/setup_logger.py b/target/nextflow/convert/from_h5mu_to_h5ad/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/nextflow/convert/from_h5mu_to_h5ad/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/nextflow/convert/velocyto_to_h5mu/.config.vsh.yaml b/target/nextflow/convert/velocyto_to_h5mu/.config.vsh.yaml deleted file mode 100644 index a86e9fae886..00000000000 --- a/target/nextflow/convert/velocyto_to_h5mu/.config.vsh.yaml +++ /dev/null @@ -1,255 +0,0 @@ -functionality: - name: "velocyto_to_h5mu" - namespace: "convert" - version: "0.12.3" - authors: - - name: "Dries Schaumont" - roles: - - "maintainer" - - "author" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - - name: "Robrecht Cannoodt" - roles: - - "author" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - - name: "Angela Oliveira Pisco" - roles: - - "contributor" - info: - role: "Contributor" - links: - github: "aopisco" - orcid: "0000-0003-0142-2355" - linkedin: "aopisco" - organizations: - - name: "Insitro" - href: "https://insitro.com" - role: "Director of Computational Biology" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - argument_groups: - - name: "Inputs" - arguments: - - type: "file" - name: "--input_loom" - description: "Path to the input loom file." - info: null - example: - - "input.loom" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--input_h5mu" - description: "If a MuData file is provided," - info: null - example: - - "input.h5mu" - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - description: "The name of the modality to operate on." - info: null - default: - - "rna_velocity" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Outputs" - arguments: - - type: "file" - name: "--output" - description: "Path to the output MuData file." - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--layer_spliced" - description: "Output layer for the spliced reads." - info: null - default: - - "velo_spliced" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--layer_unspliced" - description: "Output layer for the unspliced reads." - info: null - default: - - "velo_unspliced" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--layer_ambiguous" - description: "Output layer for the ambiguous reads." - info: null - default: - - "velo_ambiguous" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - description: "Convert a velocyto loom file to a h5mu file.\n\nIf an input h5mu file\ - \ is also provided, the velocity\nh5ad object will get added to that h5mu instead.\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/cellranger_tiny_fastq" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "scanpy~=1.9.5" - - "loompy" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "lowmem" - - "lowcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/velocity/velocyto_to_h5mu/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/convert/velocyto_to_h5mu" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/convert/velocyto_to_h5mu/velocyto_to_h5mu" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/convert/velocyto_to_h5mu/main.nf b/target/nextflow/convert/velocyto_to_h5mu/main.nf deleted file mode 100644 index bf693750051..00000000000 --- a/target/nextflow/convert/velocyto_to_h5mu/main.nf +++ /dev/null @@ -1,2693 +0,0 @@ -// velocyto_to_h5mu 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Dries Schaumont (maintainer, author) -// * Robrecht Cannoodt (author) -// * Angela Oliveira Pisco (contributor) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "velocyto_to_h5mu", - "namespace" : "convert", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Dries Schaumont", - "roles" : [ - "maintainer", - "author" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "dries@data-intuitive.com", - "github" : "DriesSchaumont", - "orcid" : "0000-0002-4389-0440", - "linkedin" : "dries-schaumont" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Scientist" - } - ] - } - }, - { - "name" : "Robrecht Cannoodt", - "roles" : [ - "author" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "robrecht@data-intuitive.com", - "github" : "rcannood", - "orcid" : "0000-0003-3641-729X", - "linkedin" : "robrechtcannoodt" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Science Engineer" - }, - { - "name" : "Open Problems", - "href" : "https://openproblems.bio", - "role" : "Core Member" - } - ] - } - }, - { - "name" : "Angela Oliveira Pisco", - "roles" : [ - "contributor" - ], - "info" : { - "role" : "Contributor", - "links" : { - "github" : "aopisco", - "orcid" : "0000-0003-0142-2355", - "linkedin" : "aopisco" - }, - "organizations" : [ - { - "name" : "Insitro", - "href" : "https://insitro.com", - "role" : "Director of Computational Biology" - }, - { - "name" : "Open Problems", - "href" : "https://openproblems.bio", - "role" : "Core Member" - } - ] - } - } - ], - "argument_groups" : [ - { - "name" : "Inputs", - "arguments" : [ - { - "type" : "file", - "name" : "--input_loom", - "description" : "Path to the input loom file.", - "example" : [ - "input.loom" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--input_h5mu", - "description" : "If a MuData file is provided,", - "example" : [ - "input.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--modality", - "description" : "The name of the modality to operate on.", - "default" : [ - "rna_velocity" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Outputs", - "arguments" : [ - { - "type" : "file", - "name" : "--output", - "description" : "Path to the output MuData file.", - "example" : [ - "output.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_compression", - "description" : "The compression format to be used on the output h5mu object.", - "example" : [ - "gzip" - ], - "required" : false, - "choices" : [ - "gzip", - "lzf" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--layer_spliced", - "description" : "Output layer for the spliced reads.", - "default" : [ - "velo_spliced" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--layer_unspliced", - "description" : "Output layer for the unspliced reads.", - "default" : [ - "velo_unspliced" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--layer_ambiguous", - "description" : "Output layer for the ambiguous reads.", - "default" : [ - "velo_ambiguous" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/velocity/velocyto_to_h5mu/" - } - ], - "description" : "Convert a velocyto loom file to a h5mu file.\n\nIf an input h5mu file is also provided, the velocity\nh5ad object will get added to that h5mu instead.\n", - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/velocity/velocyto_to_h5mu/" - }, - { - "type" : "file", - "path" : "resources_test/cellranger_tiny_fastq", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "python:3.10-slim", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "procps" - ], - "interactive" : false - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "mudata~=0.2.3", - "anndata~=0.9.1", - "scanpy~=1.9.5", - "loompy" - ], - "upgrade" : true - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "lowmem", - "lowcpu" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/velocity/velocyto_to_h5mu/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/convert/velocyto_to_h5mu", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -import anndata as ad -import mudata as mu - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input_loom': $( if [ ! -z ${VIASH_PAR_INPUT_LOOM+x} ]; then echo "r'${VIASH_PAR_INPUT_LOOM//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'input_h5mu': $( if [ ! -z ${VIASH_PAR_INPUT_H5MU+x} ]; then echo "r'${VIASH_PAR_INPUT_H5MU//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'layer_spliced': $( if [ ! -z ${VIASH_PAR_LAYER_SPLICED+x} ]; then echo "r'${VIASH_PAR_LAYER_SPLICED//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'layer_unspliced': $( if [ ! -z ${VIASH_PAR_LAYER_UNSPLICED+x} ]; then echo "r'${VIASH_PAR_LAYER_UNSPLICED//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'layer_ambiguous': $( if [ ! -z ${VIASH_PAR_LAYER_AMBIGUOUS+x} ]; then echo "r'${VIASH_PAR_LAYER_AMBIGUOUS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -## VIASH END - -print("Parameters:", par, flush=True) - -print("Reading AnnData from loom", flush=True) -adata_in = ad.read_loom(par["input_loom"]) -adata_in.var_names = adata_in.var["Accession"] - -print("Creating clean AnnData", flush=True) -adata = ad.AnnData( - obs=adata_in.obs[[]], - var=adata_in.var[[]], - layers={ - par["layer_spliced"]: adata_in.layers["spliced"], - par["layer_unspliced"]: adata_in.layers["unspliced"], - par["layer_ambiguous"]: adata_in.layers["ambiguous"] - } -) - -if par["input_h5mu"]: - print("Received input h5mu to read", flush=True) - mdata = mu.read_h5mu(par["input_h5mu"]) - - print(f"Storing AnnData in modality {par['modality']}", flush=True) - mdata.mod[par["modality"]] = adata -else: - print("Creating h5mu from scratch", flush=True) - mdata = mu.MuData({par["modality"]: adata}) - -print("Resulting mudata:", mdata, flush=True) - -print("Writing h5mu to file", flush=True) -mdata.write_h5mu(par["output"], compression=par["output_compression"]) -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/convert_velocyto_to_h5mu", - "tag" : "0.12.0" - }, - "label" : [ - "lowmem", - "lowcpu" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/convert/velocyto_to_h5mu/nextflow.config b/target/nextflow/convert/velocyto_to_h5mu/nextflow.config deleted file mode 100644 index 9dfcb671ec2..00000000000 --- a/target/nextflow/convert/velocyto_to_h5mu/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'velocyto_to_h5mu' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Convert a velocyto loom file to a h5mu file.\n\nIf an input h5mu file is also provided, the velocity\nh5ad object will get added to that h5mu instead.\n' - author = 'Dries Schaumont, Robrecht Cannoodt, Angela Oliveira Pisco' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/convert/velocyto_to_h5mu/nextflow_params.yaml b/target/nextflow/convert/velocyto_to_h5mu/nextflow_params.yaml deleted file mode 100644 index a3f43db0942..00000000000 --- a/target/nextflow/convert/velocyto_to_h5mu/nextflow_params.yaml +++ /dev/null @@ -1,15 +0,0 @@ -# Inputs -input_loom: # please fill in - example: "input.loom" -# input_h5mu: "input.h5mu" -modality: "rna_velocity" - -# Outputs -# output: "$id.$key.output.h5mu" -# output_compression: "gzip" -layer_spliced: "velo_spliced" -layer_unspliced: "velo_unspliced" -layer_ambiguous: "velo_ambiguous" - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/convert/velocyto_to_h5mu/nextflow_schema.json b/target/nextflow/convert/velocyto_to_h5mu/nextflow_schema.json deleted file mode 100644 index ea1fdf4fffb..00000000000 --- a/target/nextflow/convert/velocyto_to_h5mu/nextflow_schema.json +++ /dev/null @@ -1,111 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "velocyto_to_h5mu", - "description": "Convert a velocyto loom file to a h5mu file.\n\nIf an input h5mu file is also provided, the velocity\nh5ad object will get added to that h5mu instead.\n", - "type": "object", - "definitions": { - "inputs" : { - "title": "Inputs", - "type": "object", - "description": "No description", - "properties": { - - "input_loom": { - "type": "string", - "description": "Type: `file`, required, example: `input.loom`. Path to the input loom file", - "help_text": "Type: `file`, required, example: `input.loom`. Path to the input loom file." - }, - - "input_h5mu": { - "type": "string", - "description": "Type: `file`, example: `input.h5mu`. If a MuData file is provided,", - "help_text": "Type: `file`, example: `input.h5mu`. If a MuData file is provided," - }, - - "modality": { - "type": "string", - "description": "Type: `string`, default: `rna_velocity`. The name of the modality to operate on", - "help_text": "Type: `string`, default: `rna_velocity`. The name of the modality to operate on.", - "default": "rna_velocity" - } - - } - }, - "outputs" : { - "title": "Outputs", - "type": "object", - "description": "No description", - "properties": { - - "output": { - "type": "string", - "description": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Path to the output MuData file", - "help_text": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Path to the output MuData file.", - "default": "$id.$key.output.h5mu" - }, - - "output_compression": { - "type": "string", - "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", - "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", - "enum": ["gzip", "lzf"] - - }, - - "layer_spliced": { - "type": "string", - "description": "Type: `string`, default: `velo_spliced`. Output layer for the spliced reads", - "help_text": "Type: `string`, default: `velo_spliced`. Output layer for the spliced reads.", - "default": "velo_spliced" - }, - - "layer_unspliced": { - "type": "string", - "description": "Type: `string`, default: `velo_unspliced`. Output layer for the unspliced reads", - "help_text": "Type: `string`, default: `velo_unspliced`. Output layer for the unspliced reads.", - "default": "velo_unspliced" - }, - - "layer_ambiguous": { - "type": "string", - "description": "Type: `string`, default: `velo_ambiguous`. Output layer for the ambiguous reads", - "help_text": "Type: `string`, default: `velo_ambiguous`. Output layer for the ambiguous reads.", - "default": "velo_ambiguous" - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/inputs" - }, - { - "$ref": "#/definitions/outputs" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/correction/cellbender_remove_background/.config.vsh.yaml b/target/nextflow/correction/cellbender_remove_background/.config.vsh.yaml deleted file mode 100644 index e07121119b5..00000000000 --- a/target/nextflow/correction/cellbender_remove_background/.config.vsh.yaml +++ /dev/null @@ -1,637 +0,0 @@ -functionality: - name: "cellbender_remove_background" - namespace: "correction" - version: "0.12.3" - argument_groups: - - name: "Inputs" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input h5mu file. Data file on which to run tool. Data must be\ - \ un-filtered: it should include empty droplets." - info: null - example: - - "input.h5mu" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - description: "List of modalities to process." - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Outputs" - arguments: - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Full count matrix as an h5mu file, with background RNA removed.\ - \ This file contains all the original droplet barcodes." - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--layer_output" - description: "Output layer" - info: null - default: - - "cellbender_corrected" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_background_fraction" - info: null - default: - - "cellbender_background_fraction" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_cell_probability" - info: null - default: - - "cellbender_cell_probability" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_cell_size" - info: null - default: - - "cellbender_cell_size" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_droplet_efficiency" - info: null - default: - - "cellbender_droplet_efficiency" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_latent_scale" - info: null - default: - - "cellbender_latent_scale" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--var_ambient_expression" - info: null - default: - - "cellbender_ambient_expression" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obsm_gene_expression_encoding" - info: null - default: - - "cellbender_gene_expression_encoding" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Arguments" - arguments: - - type: "boolean" - name: "--expected_cells_from_qc" - description: "Will use the Cell Ranger QC to determine the estimated number\ - \ of cells" - info: null - default: - - false - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--expected_cells" - description: "Number of cells expected in the dataset (a rough estimate within\ - \ a factor of 2 is sufficient)." - info: null - example: - - 1000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--total_droplets_included" - description: "The number of droplets from the rank-ordered UMI plot\nthat will\ - \ have their cell probabilities inferred as an\noutput. Include the droplets\ - \ which might contain cells.\nDroplets beyond TOTAL_DROPLETS_INCLUDED should\ - \ be\n'surely empty' droplets.\n" - info: null - example: - - 25000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--force_cell_umi_prior" - description: "Ignore CellBender's heuristic prior estimation, and use this prior\ - \ for UMI counts in cells." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--force_empty_umi_prior" - description: "Ignore CellBender's heuristic prior estimation, and use this prior\ - \ for UMI counts in empty droplets." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--model" - description: "Which model is being used for count data.\n\n* 'naive' subtracts\ - \ the estimated ambient profile.\n* 'simple' does not model either ambient\ - \ RNA or random barcode swapping (for debugging purposes -- not recommended).\n\ - * 'ambient' assumes background RNA is incorporated into droplets.\n* 'swapping'\ - \ assumes background RNA comes from random barcode swapping (via PCR chimeras).\n\ - * 'full' uses a combined ambient and swapping model.\n" - info: null - default: - - "full" - required: false - choices: - - "naive" - - "simple" - - "ambient" - - "swapping" - - "full" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--epochs" - description: "Number of epochs to train." - info: null - default: - - 150 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--low_count_threshold" - description: "Droplets with UMI counts below this number are completely \nexcluded\ - \ from the analysis. This can help identify the correct \nprior for empty\ - \ droplet counts in the rare case where empty \ncounts are extremely high\ - \ (over 200).\n" - info: null - default: - - 5 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--z_dim" - description: "Dimension of latent variable z.\n" - info: null - default: - - 64 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--z_layers" - description: "Dimension of hidden layers in the encoder for z.\n" - info: null - default: - - 512 - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--training_fraction" - description: "Training detail: the fraction of the data used for training.\n\ - The rest is never seen by the inference algorithm. Speeds up learning.\n" - info: null - default: - - 0.9 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--empty_drop_training_fraction" - description: "Training detail: the fraction of the training data each epoch\ - \ that \nis drawn (randomly sampled) from surely empty droplets.\n" - info: null - default: - - 0.2 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--ignore_features" - description: "Integer indices of features to ignore entirely. In the output\n\ - count matrix, the counts for these features will be unchanged.\n" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--fpr" - description: "Target 'delta' false positive rate in [0, 1). Use 0 for a cohort\n\ - of samples which will be jointly analyzed for differential expression.\nA\ - \ false positive is a true signal count that is erroneously removed.\nMore\ - \ background removal is accompanied by more signal removal at\nhigh values\ - \ of FPR. You can specify multiple values, which will\ncreate multiple output\ - \ files.\n" - info: null - default: - - 0.01 - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--exclude_feature_types" - description: "Feature types to ignore during the analysis. These features will\n\ - be left unchanged in the output file.\n" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--projected_ambient_count_threshold" - description: "Controls how many features are included in the analysis, which\n\ - can lead to a large speedup. If a feature is expected to have less\nthan PROJECTED_AMBIENT_COUNT_THRESHOLD\ - \ counts total in all cells\n(summed), then that gene is excluded, and it\ - \ will be unchanged\nin the output count matrix. For example, \nPROJECTED_AMBIENT_COUNT_THRESHOLD\ - \ = 0 will include all features\nwhich have even a single count in any empty\ - \ droplet.\n" - info: null - default: - - 0.1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--learning_rate" - description: "Training detail: lower learning rate for inference.\nA OneCycle\ - \ learning rate schedule is used, where the\nupper learning rate is ten times\ - \ this value. (For this\nvalue, probably do not exceed 1e-3).\n" - info: null - default: - - 1.0E-4 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--final_elbo_fail_fraction" - description: "Training is considered to have failed if \n(best_test_ELBO - final_test_ELBO)/(best_test_ELBO\ - \ - initial_test_ELBO) > FINAL_ELBO_FAIL_FRACTION.\nTraining will automatically\ - \ re-run if --num-training-tries > 1.\nBy default, will not fail training\ - \ based on final_training_ELBO.\n" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--epoch_elbo_fail_fraction" - description: "Training is considered to have failed if \n(previous_epoch_test_ELBO\ - \ - current_epoch_test_ELBO)/(previous_epoch_test_ELBO - initial_train_ELBO)\ - \ > EPOCH_ELBO_FAIL_FRACTION.\nTraining will automatically re-run if --num-training-tries\ - \ > 1.\nBy default, will not fail training based on epoch_training_ELBO.\n" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--num_training_tries" - description: "Number of times to attempt to train the model. At each subsequent\ - \ attempt,\nthe learning rate is multiplied by LEARNING_RATE_RETRY_MULT.\n" - info: null - default: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--learning_rate_retry_mult" - description: "Learning rate is multiplied by this amount each time a new training\n\ - attempt is made. (This parameter is only used if training fails based\non\ - \ EPOCH_ELBO_FAIL_FRACTION or FINAL_ELBO_FAIL_FRACTION and\nNUM_TRAINING_TRIES\ - \ is > 1.) \n" - info: null - default: - - 0.2 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--posterior_batch_size" - description: "Training detail: size of batches when creating the posterior.\n\ - Reduce this to avoid running out of GPU memory creating the posterior\n(will\ - \ be slower).\n" - info: null - default: - - 128 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--posterior_regulation" - description: "Posterior regularization method. (For experts: not required for\ - \ normal usage,\nsee documentation). \n\n* PRq is approximate quantile-targeting.\n\ - * PRmu is approximate mean-targeting aggregated over genes (behavior of v0.2.0).\n\ - * PRmu_gene is approximate mean-targeting per gene.\n" - info: null - required: false - choices: - - "PRq" - - "PRmu" - - "PRmu_gene" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--alpha" - description: "Tunable parameter alpha for the PRq posterior regularization method\n\ - (not normally used: see documentation).\n" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--q" - description: "Tunable parameter q for the CDF threshold estimation method (not\n\ - normally used: see documentation).\n" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--estimator" - description: "Output denoised count estimation method. (For experts: not required\n\ - for normal usage, see documentation).\n" - info: null - default: - - "mckp" - required: false - choices: - - "map" - - "mean" - - "cdf" - - "sample" - - "mckp" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--estimator_multiple_cpu" - description: "Including the flag --estimator-multiple-cpu will use more than\ - \ one\nCPU to compute the MCKP output count estimator in parallel (does nothing\n\ - for other estimators).\n" - info: null - direction: "input" - dest: "par" - - type: "boolean" - name: "--constant_learning_rate" - description: "Including the flag --constant-learning-rate will use the ClippedAdam\n\ - optimizer instead of the OneCycleLR learning rate schedule, which is\nthe\ - \ default. Learning is faster with the OneCycleLR schedule.\nHowever, training\ - \ can easily be continued from a checkpoint for more\nepochs than the initial\ - \ command specified when using ClippedAdam. On\nthe other hand, if using the\ - \ OneCycleLR schedule with 150 epochs\nspecified, it is not possible to pick\ - \ up from that final checkpoint\nand continue training until 250 epochs.\n" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--debug" - description: "Including the flag --debug will log extra messages useful for\ - \ debugging.\n" - info: null - direction: "input" - dest: "par" - - type: "boolean_true" - name: "--cuda" - description: "Including the flag --cuda will run the inference on a\nGPU.\n" - info: null - direction: "input" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Eliminating technical artifacts from high-throughput single-cell RNA\ - \ sequencing data.\n\nThis module removes counts due to ambient RNA molecules\ - \ and random barcode swapping from (raw) UMI-based scRNA-seq count matrices. \n\ - At the moment, only the count matrices produced by the CellRanger count pipeline\ - \ is supported. Support for additional tools and protocols \nwill be added in\ - \ the future. A quick start tutorial can be found here.\n\nFleming et al. 2022,\ - \ bioRxiv.\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_raw_feature_bc_matrix.h5" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "nvcr.io/nvidia/cuda:11.8.0-devel-ubuntu22.04" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "docker" - run: - - "apt update && DEBIAN_FRONTEND=noninteractive apt install -y make build-essential\ - \ libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev wget ca-certificates\ - \ curl llvm libncurses5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev libffi-dev\ - \ liblzma-dev mecab-ipadic-utf8 git \\\n&& curl https://pyenv.run | bash \\\n\ - && pyenv update \\\n&& pyenv install $PYTHON_VERSION \\\n&& pyenv global $PYTHON_VERSION\ - \ \\\n&& apt-get clean\n" - env: - - "PYENV_ROOT=\"/root/.pyenv\"" - - "PATH=\"$PYENV_ROOT/shims:$PYENV_ROOT/bin:$PATH\"" - - "PYTHON_VERSION=3.7.16" - - type: "python" - user: false - packages: - - "mudata~=0.2.1" - - "cellbender~=0.3.0" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "muon" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "singlecpu" - - "lowmem" - - "gpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/correction/cellbender_remove_background/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/correction/cellbender_remove_background" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/correction/cellbender_remove_background/cellbender_remove_background" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/correction/cellbender_remove_background/main.nf b/target/nextflow/correction/cellbender_remove_background/main.nf deleted file mode 100644 index 7fea2cc19c3..00000000000 --- a/target/nextflow/correction/cellbender_remove_background/main.nf +++ /dev/null @@ -1,3212 +0,0 @@ -// cellbender_remove_background 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "cellbender_remove_background", - "namespace" : "correction", - "version" : "0.12.3", - "argument_groups" : [ - { - "name" : "Inputs", - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "alternatives" : [ - "-i" - ], - "description" : "Input h5mu file. Data file on which to run tool. Data must be un-filtered: it should include empty droplets.", - "example" : [ - "input.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--modality", - "description" : "List of modalities to process.", - "default" : [ - "rna" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Outputs", - "arguments" : [ - { - "type" : "file", - "name" : "--output", - "alternatives" : [ - "-o" - ], - "description" : "Full count matrix as an h5mu file, with background RNA removed. This file contains all the original droplet barcodes.", - "example" : [ - "output.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_compression", - "example" : [ - "gzip" - ], - "required" : false, - "choices" : [ - "gzip", - "lzf" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--layer_output", - "description" : "Output layer", - "default" : [ - "cellbender_corrected" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--obs_background_fraction", - "default" : [ - "cellbender_background_fraction" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--obs_cell_probability", - "default" : [ - "cellbender_cell_probability" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--obs_cell_size", - "default" : [ - "cellbender_cell_size" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--obs_droplet_efficiency", - "default" : [ - "cellbender_droplet_efficiency" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--obs_latent_scale", - "default" : [ - "cellbender_latent_scale" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--var_ambient_expression", - "default" : [ - "cellbender_ambient_expression" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--obsm_gene_expression_encoding", - "default" : [ - "cellbender_gene_expression_encoding" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Arguments", - "arguments" : [ - { - "type" : "boolean", - "name" : "--expected_cells_from_qc", - "description" : "Will use the Cell Ranger QC to determine the estimated number of cells", - "default" : [ - false - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--expected_cells", - "description" : "Number of cells expected in the dataset (a rough estimate within a factor of 2 is sufficient).", - "example" : [ - 1000 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--total_droplets_included", - "description" : "The number of droplets from the rank-ordered UMI plot\nthat will have their cell probabilities inferred as an\noutput. Include the droplets which might contain cells.\nDroplets beyond TOTAL_DROPLETS_INCLUDED should be\n'surely empty' droplets.\n", - "example" : [ - 25000 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--force_cell_umi_prior", - "description" : "Ignore CellBender's heuristic prior estimation, and use this prior for UMI counts in cells.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--force_empty_umi_prior", - "description" : "Ignore CellBender's heuristic prior estimation, and use this prior for UMI counts in empty droplets.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--model", - "description" : "Which model is being used for count data.\n\n* 'naive' subtracts the estimated ambient profile.\n* 'simple' does not model either ambient RNA or random barcode swapping (for debugging purposes -- not recommended).\n* 'ambient' assumes background RNA is incorporated into droplets.\n* 'swapping' assumes background RNA comes from random barcode swapping (via PCR chimeras).\n* 'full' uses a combined ambient and swapping model.\n", - "default" : [ - "full" - ], - "required" : false, - "choices" : [ - "naive", - "simple", - "ambient", - "swapping", - "full" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--epochs", - "description" : "Number of epochs to train.", - "default" : [ - 150 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--low_count_threshold", - "description" : "Droplets with UMI counts below this number are completely \nexcluded from the analysis. This can help identify the correct \nprior for empty droplet counts in the rare case where empty \ncounts are extremely high (over 200).\n", - "default" : [ - 5 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--z_dim", - "description" : "Dimension of latent variable z.\n", - "default" : [ - 64 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--z_layers", - "description" : "Dimension of hidden layers in the encoder for z.\n", - "default" : [ - 512 - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--training_fraction", - "description" : "Training detail: the fraction of the data used for training.\nThe rest is never seen by the inference algorithm. Speeds up learning.\n", - "default" : [ - 0.9 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--empty_drop_training_fraction", - "description" : "Training detail: the fraction of the training data each epoch that \nis drawn (randomly sampled) from surely empty droplets.\n", - "default" : [ - 0.2 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--ignore_features", - "description" : "Integer indices of features to ignore entirely. In the output\ncount matrix, the counts for these features will be unchanged.\n", - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--fpr", - "description" : "Target 'delta' false positive rate in [0, 1). Use 0 for a cohort\nof samples which will be jointly analyzed for differential expression.\nA false positive is a true signal count that is erroneously removed.\nMore background removal is accompanied by more signal removal at\nhigh values of FPR. You can specify multiple values, which will\ncreate multiple output files.\n", - "default" : [ - 0.01 - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--exclude_feature_types", - "description" : "Feature types to ignore during the analysis. These features will\nbe left unchanged in the output file.\n", - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--projected_ambient_count_threshold", - "description" : "Controls how many features are included in the analysis, which\ncan lead to a large speedup. If a feature is expected to have less\nthan PROJECTED_AMBIENT_COUNT_THRESHOLD counts total in all cells\n(summed), then that gene is excluded, and it will be unchanged\nin the output count matrix. For example, \nPROJECTED_AMBIENT_COUNT_THRESHOLD = 0 will include all features\nwhich have even a single count in any empty droplet.\n", - "default" : [ - 0.1 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--learning_rate", - "description" : "Training detail: lower learning rate for inference.\nA OneCycle learning rate schedule is used, where the\nupper learning rate is ten times this value. (For this\nvalue, probably do not exceed 1e-3).\n", - "default" : [ - 1.0E-4 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--final_elbo_fail_fraction", - "description" : "Training is considered to have failed if \n(best_test_ELBO - final_test_ELBO)/(best_test_ELBO - initial_test_ELBO) > FINAL_ELBO_FAIL_FRACTION.\nTraining will automatically re-run if --num-training-tries > 1.\nBy default, will not fail training based on final_training_ELBO.\n", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--epoch_elbo_fail_fraction", - "description" : "Training is considered to have failed if \n(previous_epoch_test_ELBO - current_epoch_test_ELBO)/(previous_epoch_test_ELBO - initial_train_ELBO) > EPOCH_ELBO_FAIL_FRACTION.\nTraining will automatically re-run if --num-training-tries > 1.\nBy default, will not fail training based on epoch_training_ELBO.\n", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--num_training_tries", - "description" : "Number of times to attempt to train the model. At each subsequent attempt,\nthe learning rate is multiplied by LEARNING_RATE_RETRY_MULT.\n", - "default" : [ - 1 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--learning_rate_retry_mult", - "description" : "Learning rate is multiplied by this amount each time a new training\nattempt is made. (This parameter is only used if training fails based\non EPOCH_ELBO_FAIL_FRACTION or FINAL_ELBO_FAIL_FRACTION and\nNUM_TRAINING_TRIES is > 1.) \n", - "default" : [ - 0.2 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--posterior_batch_size", - "description" : "Training detail: size of batches when creating the posterior.\nReduce this to avoid running out of GPU memory creating the posterior\n(will be slower).\n", - "default" : [ - 128 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--posterior_regulation", - "description" : "Posterior regularization method. (For experts: not required for normal usage,\nsee documentation). \n\n* PRq is approximate quantile-targeting.\n* PRmu is approximate mean-targeting aggregated over genes (behavior of v0.2.0).\n* PRmu_gene is approximate mean-targeting per gene.\n", - "required" : false, - "choices" : [ - "PRq", - "PRmu", - "PRmu_gene" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--alpha", - "description" : "Tunable parameter alpha for the PRq posterior regularization method\n(not normally used: see documentation).\n", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--q", - "description" : "Tunable parameter q for the CDF threshold estimation method (not\nnormally used: see documentation).\n", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--estimator", - "description" : "Output denoised count estimation method. (For experts: not required\nfor normal usage, see documentation).\n", - "default" : [ - "mckp" - ], - "required" : false, - "choices" : [ - "map", - "mean", - "cdf", - "sample", - "mckp" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "boolean_true", - "name" : "--estimator_multiple_cpu", - "description" : "Including the flag --estimator-multiple-cpu will use more than one\nCPU to compute the MCKP output count estimator in parallel (does nothing\nfor other estimators).\n", - "direction" : "input", - "dest" : "par" - }, - { - "type" : "boolean", - "name" : "--constant_learning_rate", - "description" : "Including the flag --constant-learning-rate will use the ClippedAdam\noptimizer instead of the OneCycleLR learning rate schedule, which is\nthe default. Learning is faster with the OneCycleLR schedule.\nHowever, training can easily be continued from a checkpoint for more\nepochs than the initial command specified when using ClippedAdam. On\nthe other hand, if using the OneCycleLR schedule with 150 epochs\nspecified, it is not possible to pick up from that final checkpoint\nand continue training until 250 epochs.\n", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "boolean_true", - "name" : "--debug", - "description" : "Including the flag --debug will log extra messages useful for debugging.\n", - "direction" : "input", - "dest" : "par" - }, - { - "type" : "boolean_true", - "name" : "--cuda", - "description" : "Including the flag --cuda will run the inference on a\nGPU.\n", - "direction" : "input", - "dest" : "par" - } - ] - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/correction/cellbender_remove_background/" - }, - { - "type" : "file", - "path" : "src/utils/setup_logger.py", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "description" : "Eliminating technical artifacts from high-throughput single-cell RNA sequencing data.\n\nThis module removes counts due to ambient RNA molecules and random barcode swapping from (raw) UMI-based scRNA-seq count matrices. \nAt the moment, only the count matrices produced by the CellRanger count pipeline is supported. Support for additional tools and protocols \nwill be added in the future. A quick start tutorial can be found here.\n\nFleming et al. 2022, bioRxiv.\n", - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/correction/cellbender_remove_background/" - }, - { - "type" : "file", - "path" : "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_raw_feature_bc_matrix.h5", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "nvcr.io/nvidia/cuda:11.8.0-devel-ubuntu22.04", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "docker", - "run" : [ - "apt update && DEBIAN_FRONTEND=noninteractive apt install -y make build-essential libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev wget ca-certificates curl llvm libncurses5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev libffi-dev liblzma-dev mecab-ipadic-utf8 git \\\\\n&& curl https://pyenv.run | bash \\\\\n&& pyenv update \\\\\n&& pyenv install $PYTHON_VERSION \\\\\n&& pyenv global $PYTHON_VERSION \\\\\n&& apt-get clean\n" - ], - "env" : [ - "PYENV_ROOT=\\"/root/.pyenv\\"", - "PATH=\\"$PYENV_ROOT/shims:$PYENV_ROOT/bin:$PATH\\"", - "PYTHON_VERSION=3.7.16" - ] - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "mudata~=0.2.1", - "cellbender~=0.3.0" - ], - "upgrade" : true - } - ], - "test_setup" : [ - { - "type" : "python", - "user" : false, - "packages" : [ - "muon" - ], - "upgrade" : true - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "singlecpu", - "lowmem", - "gpu" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/correction/cellbender_remove_background/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/correction/cellbender_remove_background", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -import mudata as mu -import tempfile -import subprocess -import os -import sys -import numpy as np -from scipy.sparse import csr_matrix -from cellbender.remove_background.downstream import anndata_from_h5 -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'layer_output': $( if [ ! -z ${VIASH_PAR_LAYER_OUTPUT+x} ]; then echo "r'${VIASH_PAR_LAYER_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'obs_background_fraction': $( if [ ! -z ${VIASH_PAR_OBS_BACKGROUND_FRACTION+x} ]; then echo "r'${VIASH_PAR_OBS_BACKGROUND_FRACTION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'obs_cell_probability': $( if [ ! -z ${VIASH_PAR_OBS_CELL_PROBABILITY+x} ]; then echo "r'${VIASH_PAR_OBS_CELL_PROBABILITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'obs_cell_size': $( if [ ! -z ${VIASH_PAR_OBS_CELL_SIZE+x} ]; then echo "r'${VIASH_PAR_OBS_CELL_SIZE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'obs_droplet_efficiency': $( if [ ! -z ${VIASH_PAR_OBS_DROPLET_EFFICIENCY+x} ]; then echo "r'${VIASH_PAR_OBS_DROPLET_EFFICIENCY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'obs_latent_scale': $( if [ ! -z ${VIASH_PAR_OBS_LATENT_SCALE+x} ]; then echo "r'${VIASH_PAR_OBS_LATENT_SCALE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'var_ambient_expression': $( if [ ! -z ${VIASH_PAR_VAR_AMBIENT_EXPRESSION+x} ]; then echo "r'${VIASH_PAR_VAR_AMBIENT_EXPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'obsm_gene_expression_encoding': $( if [ ! -z ${VIASH_PAR_OBSM_GENE_EXPRESSION_ENCODING+x} ]; then echo "r'${VIASH_PAR_OBSM_GENE_EXPRESSION_ENCODING//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'expected_cells_from_qc': $( if [ ! -z ${VIASH_PAR_EXPECTED_CELLS_FROM_QC+x} ]; then echo "r'${VIASH_PAR_EXPECTED_CELLS_FROM_QC//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), - 'expected_cells': $( if [ ! -z ${VIASH_PAR_EXPECTED_CELLS+x} ]; then echo "int(r'${VIASH_PAR_EXPECTED_CELLS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'total_droplets_included': $( if [ ! -z ${VIASH_PAR_TOTAL_DROPLETS_INCLUDED+x} ]; then echo "int(r'${VIASH_PAR_TOTAL_DROPLETS_INCLUDED//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'force_cell_umi_prior': $( if [ ! -z ${VIASH_PAR_FORCE_CELL_UMI_PRIOR+x} ]; then echo "int(r'${VIASH_PAR_FORCE_CELL_UMI_PRIOR//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'force_empty_umi_prior': $( if [ ! -z ${VIASH_PAR_FORCE_EMPTY_UMI_PRIOR+x} ]; then echo "int(r'${VIASH_PAR_FORCE_EMPTY_UMI_PRIOR//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'model': $( if [ ! -z ${VIASH_PAR_MODEL+x} ]; then echo "r'${VIASH_PAR_MODEL//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'epochs': $( if [ ! -z ${VIASH_PAR_EPOCHS+x} ]; then echo "int(r'${VIASH_PAR_EPOCHS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'low_count_threshold': $( if [ ! -z ${VIASH_PAR_LOW_COUNT_THRESHOLD+x} ]; then echo "int(r'${VIASH_PAR_LOW_COUNT_THRESHOLD//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'z_dim': $( if [ ! -z ${VIASH_PAR_Z_DIM+x} ]; then echo "int(r'${VIASH_PAR_Z_DIM//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'z_layers': $( if [ ! -z ${VIASH_PAR_Z_LAYERS+x} ]; then echo "list(map(int, r'${VIASH_PAR_Z_LAYERS//\\'/\\'\\"\\'\\"r\\'}'.split(':')))"; else echo None; fi ), - 'training_fraction': $( if [ ! -z ${VIASH_PAR_TRAINING_FRACTION+x} ]; then echo "float(r'${VIASH_PAR_TRAINING_FRACTION//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'empty_drop_training_fraction': $( if [ ! -z ${VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION+x} ]; then echo "float(r'${VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'ignore_features': $( if [ ! -z ${VIASH_PAR_IGNORE_FEATURES+x} ]; then echo "list(map(int, r'${VIASH_PAR_IGNORE_FEATURES//\\'/\\'\\"\\'\\"r\\'}'.split(':')))"; else echo None; fi ), - 'fpr': $( if [ ! -z ${VIASH_PAR_FPR+x} ]; then echo "list(map(float, r'${VIASH_PAR_FPR//\\'/\\'\\"\\'\\"r\\'}'.split(':')))"; else echo None; fi ), - 'exclude_feature_types': $( if [ ! -z ${VIASH_PAR_EXCLUDE_FEATURE_TYPES+x} ]; then echo "r'${VIASH_PAR_EXCLUDE_FEATURE_TYPES//\\'/\\'\\"\\'\\"r\\'}'.split(':')"; else echo None; fi ), - 'projected_ambient_count_threshold': $( if [ ! -z ${VIASH_PAR_PROJECTED_AMBIENT_COUNT_THRESHOLD+x} ]; then echo "float(r'${VIASH_PAR_PROJECTED_AMBIENT_COUNT_THRESHOLD//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'learning_rate': $( if [ ! -z ${VIASH_PAR_LEARNING_RATE+x} ]; then echo "float(r'${VIASH_PAR_LEARNING_RATE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'final_elbo_fail_fraction': $( if [ ! -z ${VIASH_PAR_FINAL_ELBO_FAIL_FRACTION+x} ]; then echo "float(r'${VIASH_PAR_FINAL_ELBO_FAIL_FRACTION//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'epoch_elbo_fail_fraction': $( if [ ! -z ${VIASH_PAR_EPOCH_ELBO_FAIL_FRACTION+x} ]; then echo "float(r'${VIASH_PAR_EPOCH_ELBO_FAIL_FRACTION//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'num_training_tries': $( if [ ! -z ${VIASH_PAR_NUM_TRAINING_TRIES+x} ]; then echo "int(r'${VIASH_PAR_NUM_TRAINING_TRIES//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'learning_rate_retry_mult': $( if [ ! -z ${VIASH_PAR_LEARNING_RATE_RETRY_MULT+x} ]; then echo "float(r'${VIASH_PAR_LEARNING_RATE_RETRY_MULT//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'posterior_batch_size': $( if [ ! -z ${VIASH_PAR_POSTERIOR_BATCH_SIZE+x} ]; then echo "int(r'${VIASH_PAR_POSTERIOR_BATCH_SIZE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'posterior_regulation': $( if [ ! -z ${VIASH_PAR_POSTERIOR_REGULATION+x} ]; then echo "r'${VIASH_PAR_POSTERIOR_REGULATION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'alpha': $( if [ ! -z ${VIASH_PAR_ALPHA+x} ]; then echo "float(r'${VIASH_PAR_ALPHA//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'q': $( if [ ! -z ${VIASH_PAR_Q+x} ]; then echo "float(r'${VIASH_PAR_Q//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'estimator': $( if [ ! -z ${VIASH_PAR_ESTIMATOR+x} ]; then echo "r'${VIASH_PAR_ESTIMATOR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'estimator_multiple_cpu': $( if [ ! -z ${VIASH_PAR_ESTIMATOR_MULTIPLE_CPU+x} ]; then echo "r'${VIASH_PAR_ESTIMATOR_MULTIPLE_CPU//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), - 'constant_learning_rate': $( if [ ! -z ${VIASH_PAR_CONSTANT_LEARNING_RATE+x} ]; then echo "r'${VIASH_PAR_CONSTANT_LEARNING_RATE//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), - 'debug': $( if [ ! -z ${VIASH_PAR_DEBUG+x} ]; then echo "r'${VIASH_PAR_DEBUG//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), - 'cuda': $( if [ ! -z ${VIASH_PAR_CUDA+x} ]; then echo "r'${VIASH_PAR_CUDA//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -## VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - - -logger.info("Reading input mudata") -mdata = mu.read_h5mu(par["input"]) - -mod = par["modality"] -logger.info("Performing log transformation on modality %s", mod) -data = mdata.mod[mod] - -# import pathlib -# with pathlib.Path(os.path.dirname(par["output"])) / "cellbender" as temp_dir: -# os.mkdir(temp_dir) -with tempfile.TemporaryDirectory(prefix="cellbender-", dir=meta["temp_dir"]) as temp_dir: - # construct paths within tempdir - input_file = os.path.join(temp_dir, "input.h5ad") - output_file = os.path.join(temp_dir, "output.h5") - - logger.info("Creating AnnData input file for CellBender: '%s'", input_file) - data.write_h5ad(input_file) - - logger.info("Constructing CellBender command") - cmd_pars = [ - "cellbender", "remove-background", - "--input", input_file, - "--output", output_file, - # don't create checkpoints because they're not used / returned anyways - "--checkpoint-mins", "99999999" - ] - - if meta.get("cpus") is not None: - cmd_pars += ["--cpu-threads", str(meta["cpus"])] - - extra_args = [ - ("--expected-cells", "expected_cells", True), - ("--total-droplets-included", "total_droplets_included", True), - ("--force-cell-umi-prior", "force_cell_umi_prior", True), - ("--force-empty-umi-prior", "force_empty_umi_prior", True), - ("--model", "model", True), - ("--epochs", "epochs", True), - ("--low-count-threshold", "low_count_threshold", True), - ("--z-dim", "z_dim", True), - ("--z-layers", "z_layers", True), - ("--training-fraction", "training_fraction", True), - ("--empty-drop-training-fraction", "empty_drop_training_fraction", True), - ("--ignore-features", "ignore_features", True), - ("--fpr", "fpr", True), - ("--exclude-feature-types", "exclude_feature_types", True), - ("--projected-ambient-count-threshold", "projected_ambient_count_threshold", True), - ("--learning-rate", "learning_rate", True), - ("--final-elbo-fail-fraction", "final_elbo_fail_fraction", True), - ("--epoch-elbo-fail-fraction", "epoch_elbo_fail_fraction", True), - ("--num-training-tries", "num_training_tries", True), - ("--learning-rate-retry-mult", "learning_rate_retry_mult", True), - ("--posterior-batch-size", "posterior_batch_size", True), - ("--posterior-regulation", "posterior_regulation", True), - ("--alpha", "alpha", True), - ("--q", "q", True), - ("--estimator", "estimator", True), - ("--estimator-multiple-cpu", "estimator_multiple_cpu", False), - ("--constant-learning-rate", "constant_learning_rate", False), - ("--debug", "debug", False), - ("--cuda", "cuda", False), - ] - for (flag, name, is_kwarg) in extra_args: - if par[name]: - values = par[name] if isinstance(par[name], list) else [par[name]] - cmd_pars += [flag] + [str(val) for val in values] if is_kwarg else [flag] - - if par["expected_cells_from_qc"] and "metrics_cellranger" in data.uns: - assert par["expected_cells"] is None, "If min_counts is defined, expected_cells should be undefined" - assert par["total_droplets_included"] is None, "If min_counts is defined, expected_cells should be undefined" - met = data.uns["metrics_cellranger"] - col_name = "Estimated Number of Cells" - assert col_name in met.columns, "%s should be a column in .obs[metrics_cellranger]" - est_cells = met[col_name].values[0] - logger.info("Selecting --expected-cells %d and --total-droplets-included %d", est_cells, est_cells * 5) - cmd_pars += ["--expected-cells", str(est_cells), "--total-droplets-included", str(5*est_cells)] - - logger.info("Running CellBender: '%s'", ' '.join(cmd_pars)) - out = subprocess.check_output(cmd_pars).decode("utf-8") - - logger.info("Reading CellBender 10xh5 output file: '%s'", output_file) - adata_out = anndata_from_h5(output_file, analyzed_barcodes_only=False) - - logger.info("CellBender output format:", adata_out) - - # AnnData object with n_obs x n_vars = 6794880 x 33538 - # obs: 'cellbender_analyzed' - # var: 'ambient_expression', 'feature_type', 'genome', 'gene_id', 'cellbender_analyzed' - # uns: 'background_fraction', 'barcode_indices_for_latents', 'cell_probability', 'cell_size', 'droplet_efficiency', 'gene_expression_encoding', - # 'cell_size_lognormal_std', 'empty_droplet_size_lognormal_loc', 'empty_droplet_size_lognormal_scale', 'swapping_fraction_dist_params', - # 'barcodes_analyzed', 'barcodes_analyzed_inds', 'estimator', 'features_analyzed_inds', 'fraction_data_used_for_testing', 'learning_curve_learning_rate_epoch', - # 'learning_curve_learning_rate_value', 'learning_curve_test_elbo', 'learning_curve_test_epoch', 'learning_curve_train_elbo', 'learning_curve_train_epoch', - # 'target_false_positive_rate' - - logger.info("Copying X output to MuData") - data.layers[par["layer_output"]] = adata_out.X - - logger.info("Copying .obs output to MuData") - obs_store = { - "obs_background_fraction": "background_fraction", - "obs_cell_probability": "cell_probability", - "obs_cell_size": "cell_size", - "obs_droplet_efficiency": "droplet_efficiency", - "obs_latent_scale": "latent_scale" - } - for to_name, from_name in obs_store.items(): - if par[to_name]: - if from_name in adata_out.obs: - data.obs[par[to_name]] = adata_out.obs[from_name] - # when using unfiltered data, the values will be in uns instead of obs - elif from_name in adata_out.uns and "barcode_indices_for_latents" in adata_out.uns: - vec = np.zeros(data.n_obs) - vec[adata_out.uns["barcode_indices_for_latents"]] = adata_out.uns[from_name] - data.obs[par[to_name]] = vec - - logger.info("Copying .var output to MuData") - var_store = { "var_ambient_expression": "ambient_expression" } - for to_name, from_name in var_store.items(): - if par[to_name]: - data.var[par[to_name]] = adata_out.var[from_name] - - logger.info("Copying obsm_gene_expression_encoding output to MuData") - obsm_store = { "obsm_gene_expression_encoding": "gene_expression_encoding" } - for to_name, from_name in obsm_store.items(): - if par[to_name]: - if from_name in adata_out.obsm: - data.obsm[par[to_name]] = adata_out.obsm[from_name] - elif from_name in adata_out.uns and "barcode_indices_for_latents" in adata_out.uns: - matrix_to_store = adata_out.uns[from_name] - number_of_obs = data.X.shape[0] - latent_space_sparse = csr_matrix((number_of_obs, par["z_dim"]), - dtype=adata_out.uns[from_name].dtype) - obs_rows_in_space_representation = adata_out.uns["barcode_indices_for_latents"] - latent_space_sparse[obs_rows_in_space_representation] = adata_out.uns[from_name] - data.obsm[par[to_name]] = latent_space_sparse - else: - raise RuntimeError("Requested to save latent gene encoding, but the data is either missing " - "from cellbender output or in an incorrect format.") - - -logger.info("Writing to file %s", par["output"]) -mdata.write_h5mu(filename=par["output"], compression=par["output_compression"]) -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/correction_cellbender_remove_background", - "tag" : "0.12.0" - }, - "label" : [ - "singlecpu", - "lowmem", - "gpu" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/correction/cellbender_remove_background/nextflow.config b/target/nextflow/correction/cellbender_remove_background/nextflow.config deleted file mode 100644 index 5576a3d41f7..00000000000 --- a/target/nextflow/correction/cellbender_remove_background/nextflow.config +++ /dev/null @@ -1,107 +0,0 @@ -manifest { - name = 'cellbender_remove_background' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Eliminating technical artifacts from high-throughput single-cell RNA sequencing data.\n\nThis module removes counts due to ambient RNA molecules and random barcode swapping from (raw) UMI-based scRNA-seq count matrices. \nAt the moment, only the count matrices produced by the CellRanger count pipeline is supported. Support for additional tools and protocols \nwill be added in the future. A quick start tutorial can be found here.\n\nFleming et al. 2022, bioRxiv.\n' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/correction/cellbender_remove_background/nextflow_params.yaml b/target/nextflow/correction/cellbender_remove_background/nextflow_params.yaml deleted file mode 100644 index cc465d7b29b..00000000000 --- a/target/nextflow/correction/cellbender_remove_background/nextflow_params.yaml +++ /dev/null @@ -1,51 +0,0 @@ -# Inputs -input: # please fill in - example: "input.h5mu" -modality: "rna" - -# Outputs -# output: "$id.$key.output.h5mu" -# output_compression: "gzip" -layer_output: "cellbender_corrected" -obs_background_fraction: "cellbender_background_fraction" -obs_cell_probability: "cellbender_cell_probability" -obs_cell_size: "cellbender_cell_size" -obs_droplet_efficiency: "cellbender_droplet_efficiency" -obs_latent_scale: "cellbender_latent_scale" -var_ambient_expression: "cellbender_ambient_expression" -obsm_gene_expression_encoding: "cellbender_gene_expression_encoding" - -# Arguments -expected_cells_from_qc: false -# expected_cells: 1000 -# total_droplets_included: 25000 -# force_cell_umi_prior: 123 -# force_empty_umi_prior: 123 -model: "full" -epochs: 150 -low_count_threshold: 5 -z_dim: 64 -z_layers: [512] -training_fraction: 0.9 -empty_drop_training_fraction: 0.2 -# ignore_features: [123] -fpr: [0.01] -# exclude_feature_types: ["foo"] -projected_ambient_count_threshold: 0.1 -learning_rate: 1.0E-4 -# final_elbo_fail_fraction: 123.0 -# epoch_elbo_fail_fraction: 123.0 -num_training_tries: 1 -learning_rate_retry_mult: 0.2 -posterior_batch_size: 128 -# posterior_regulation: "foo" -# alpha: 123.0 -# q: 123.0 -estimator: "mckp" -estimator_multiple_cpu: false -# constant_learning_rate: true -debug: false -cuda: false - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/correction/cellbender_remove_background/nextflow_schema.json b/target/nextflow/correction/cellbender_remove_background/nextflow_schema.json deleted file mode 100644 index 2d91eda68a5..00000000000 --- a/target/nextflow/correction/cellbender_remove_background/nextflow_schema.json +++ /dev/null @@ -1,355 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "cellbender_remove_background", - "description": "Eliminating technical artifacts from high-throughput single-cell RNA sequencing data.\n\nThis module removes counts due to ambient RNA molecules and random barcode swapping from (raw) UMI-based scRNA-seq count matrices. \nAt the moment, only the count matrices produced by the CellRanger count pipeline is supported. Support for additional tools and protocols \nwill be added in the future. A quick start tutorial can be found here.\n\nFleming et al. 2022, bioRxiv.\n", - "type": "object", - "definitions": { - "inputs" : { - "title": "Inputs", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: `file`, required, example: `input.h5mu`. Input h5mu file", - "help_text": "Type: `file`, required, example: `input.h5mu`. Input h5mu file. Data file on which to run tool. Data must be un-filtered: it should include empty droplets." - }, - - "modality": { - "type": "string", - "description": "Type: `string`, default: `rna`. List of modalities to process", - "help_text": "Type: `string`, default: `rna`. List of modalities to process.", - "default": "rna" - } - - } - }, - "outputs" : { - "title": "Outputs", - "type": "object", - "description": "No description", - "properties": { - - "output": { - "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Full count matrix as an h5mu file, with background RNA removed", - "help_text": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Full count matrix as an h5mu file, with background RNA removed. This file contains all the original droplet barcodes.", - "default": "$id.$key.output.h5mu" - }, - - "output_compression": { - "type": "string", - "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. ", - "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. ", - "enum": ["gzip", "lzf"] - - }, - - "layer_output": { - "type": "string", - "description": "Type: `string`, default: `cellbender_corrected`. Output layer", - "help_text": "Type: `string`, default: `cellbender_corrected`. Output layer", - "default": "cellbender_corrected" - }, - - "obs_background_fraction": { - "type": "string", - "description": "Type: `string`, default: `cellbender_background_fraction`. ", - "help_text": "Type: `string`, default: `cellbender_background_fraction`. ", - "default": "cellbender_background_fraction" - }, - - "obs_cell_probability": { - "type": "string", - "description": "Type: `string`, default: `cellbender_cell_probability`. ", - "help_text": "Type: `string`, default: `cellbender_cell_probability`. ", - "default": "cellbender_cell_probability" - }, - - "obs_cell_size": { - "type": "string", - "description": "Type: `string`, default: `cellbender_cell_size`. ", - "help_text": "Type: `string`, default: `cellbender_cell_size`. ", - "default": "cellbender_cell_size" - }, - - "obs_droplet_efficiency": { - "type": "string", - "description": "Type: `string`, default: `cellbender_droplet_efficiency`. ", - "help_text": "Type: `string`, default: `cellbender_droplet_efficiency`. ", - "default": "cellbender_droplet_efficiency" - }, - - "obs_latent_scale": { - "type": "string", - "description": "Type: `string`, default: `cellbender_latent_scale`. ", - "help_text": "Type: `string`, default: `cellbender_latent_scale`. ", - "default": "cellbender_latent_scale" - }, - - "var_ambient_expression": { - "type": "string", - "description": "Type: `string`, default: `cellbender_ambient_expression`. ", - "help_text": "Type: `string`, default: `cellbender_ambient_expression`. ", - "default": "cellbender_ambient_expression" - }, - - "obsm_gene_expression_encoding": { - "type": "string", - "description": "Type: `string`, default: `cellbender_gene_expression_encoding`. ", - "help_text": "Type: `string`, default: `cellbender_gene_expression_encoding`. ", - "default": "cellbender_gene_expression_encoding" - } - - } - }, - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "expected_cells_from_qc": { - "type": "boolean", - "description": "Type: `boolean`, default: `false`. Will use the Cell Ranger QC to determine the estimated number of cells", - "help_text": "Type: `boolean`, default: `false`. Will use the Cell Ranger QC to determine the estimated number of cells", - "default": "False" - }, - - "expected_cells": { - "type": "integer", - "description": "Type: `integer`, example: `1000`. Number of cells expected in the dataset (a rough estimate within a factor of 2 is sufficient)", - "help_text": "Type: `integer`, example: `1000`. Number of cells expected in the dataset (a rough estimate within a factor of 2 is sufficient)." - }, - - "total_droplets_included": { - "type": "integer", - "description": "Type: `integer`, example: `25000`. The number of droplets from the rank-ordered UMI plot\nthat will have their cell probabilities inferred as an\noutput", - "help_text": "Type: `integer`, example: `25000`. The number of droplets from the rank-ordered UMI plot\nthat will have their cell probabilities inferred as an\noutput. Include the droplets which might contain cells.\nDroplets beyond TOTAL_DROPLETS_INCLUDED should be\n\u0027surely empty\u0027 droplets.\n" - }, - - "force_cell_umi_prior": { - "type": "integer", - "description": "Type: `integer`. Ignore CellBender\u0027s heuristic prior estimation, and use this prior for UMI counts in cells", - "help_text": "Type: `integer`. Ignore CellBender\u0027s heuristic prior estimation, and use this prior for UMI counts in cells." - }, - - "force_empty_umi_prior": { - "type": "integer", - "description": "Type: `integer`. Ignore CellBender\u0027s heuristic prior estimation, and use this prior for UMI counts in empty droplets", - "help_text": "Type: `integer`. Ignore CellBender\u0027s heuristic prior estimation, and use this prior for UMI counts in empty droplets." - }, - - "model": { - "type": "string", - "description": "Type: `string`, default: `full`, choices: ``naive`, `simple`, `ambient`, `swapping`, `full``. Which model is being used for count data", - "help_text": "Type: `string`, default: `full`, choices: ``naive`, `simple`, `ambient`, `swapping`, `full``. Which model is being used for count data.\n\n* \u0027naive\u0027 subtracts the estimated ambient profile.\n* \u0027simple\u0027 does not model either ambient RNA or random barcode swapping (for debugging purposes -- not recommended).\n* \u0027ambient\u0027 assumes background RNA is incorporated into droplets.\n* \u0027swapping\u0027 assumes background RNA comes from random barcode swapping (via PCR chimeras).\n* \u0027full\u0027 uses a combined ambient and swapping model.\n", - "enum": ["naive", "simple", "ambient", "swapping", "full"] - , - "default": "full" - }, - - "epochs": { - "type": "integer", - "description": "Type: `integer`, default: `150`. Number of epochs to train", - "help_text": "Type: `integer`, default: `150`. Number of epochs to train.", - "default": "150" - }, - - "low_count_threshold": { - "type": "integer", - "description": "Type: `integer`, default: `5`. Droplets with UMI counts below this number are completely \nexcluded from the analysis", - "help_text": "Type: `integer`, default: `5`. Droplets with UMI counts below this number are completely \nexcluded from the analysis. This can help identify the correct \nprior for empty droplet counts in the rare case where empty \ncounts are extremely high (over 200).\n", - "default": "5" - }, - - "z_dim": { - "type": "integer", - "description": "Type: `integer`, default: `64`. Dimension of latent variable z", - "help_text": "Type: `integer`, default: `64`. Dimension of latent variable z.\n", - "default": "64" - }, - - "z_layers": { - "type": "string", - "description": "Type: List of `integer`, default: `512`, multiple_sep: `\":\"`. Dimension of hidden layers in the encoder for z", - "help_text": "Type: List of `integer`, default: `512`, multiple_sep: `\":\"`. Dimension of hidden layers in the encoder for z.\n", - "default": "512" - }, - - "training_fraction": { - "type": "number", - "description": "Type: `double`, default: `0.9`. Training detail: the fraction of the data used for training", - "help_text": "Type: `double`, default: `0.9`. Training detail: the fraction of the data used for training.\nThe rest is never seen by the inference algorithm. Speeds up learning.\n", - "default": "0.9" - }, - - "empty_drop_training_fraction": { - "type": "number", - "description": "Type: `double`, default: `0.2`. Training detail: the fraction of the training data each epoch that \nis drawn (randomly sampled) from surely empty droplets", - "help_text": "Type: `double`, default: `0.2`. Training detail: the fraction of the training data each epoch that \nis drawn (randomly sampled) from surely empty droplets.\n", - "default": "0.2" - }, - - "ignore_features": { - "type": "string", - "description": "Type: List of `integer`, multiple_sep: `\":\"`. Integer indices of features to ignore entirely", - "help_text": "Type: List of `integer`, multiple_sep: `\":\"`. Integer indices of features to ignore entirely. In the output\ncount matrix, the counts for these features will be unchanged.\n" - }, - - "fpr": { - "type": "string", - "description": "Type: List of `double`, default: `0.01`, multiple_sep: `\":\"`. Target \u0027delta\u0027 false positive rate in [0, 1)", - "help_text": "Type: List of `double`, default: `0.01`, multiple_sep: `\":\"`. Target \u0027delta\u0027 false positive rate in [0, 1). Use 0 for a cohort\nof samples which will be jointly analyzed for differential expression.\nA false positive is a true signal count that is erroneously removed.\nMore background removal is accompanied by more signal removal at\nhigh values of FPR. You can specify multiple values, which will\ncreate multiple output files.\n", - "default": "0.01" - }, - - "exclude_feature_types": { - "type": "string", - "description": "Type: List of `string`, multiple_sep: `\":\"`. Feature types to ignore during the analysis", - "help_text": "Type: List of `string`, multiple_sep: `\":\"`. Feature types to ignore during the analysis. These features will\nbe left unchanged in the output file.\n" - }, - - "projected_ambient_count_threshold": { - "type": "number", - "description": "Type: `double`, default: `0.1`. Controls how many features are included in the analysis, which\ncan lead to a large speedup", - "help_text": "Type: `double`, default: `0.1`. Controls how many features are included in the analysis, which\ncan lead to a large speedup. If a feature is expected to have less\nthan PROJECTED_AMBIENT_COUNT_THRESHOLD counts total in all cells\n(summed), then that gene is excluded, and it will be unchanged\nin the output count matrix. For example, \nPROJECTED_AMBIENT_COUNT_THRESHOLD = 0 will include all features\nwhich have even a single count in any empty droplet.\n", - "default": "0.1" - }, - - "learning_rate": { - "type": "number", - "description": "Type: `double`, default: `1.0E-4`. Training detail: lower learning rate for inference", - "help_text": "Type: `double`, default: `1.0E-4`. Training detail: lower learning rate for inference.\nA OneCycle learning rate schedule is used, where the\nupper learning rate is ten times this value. (For this\nvalue, probably do not exceed 1e-3).\n", - "default": "0.0001" - }, - - "final_elbo_fail_fraction": { - "type": "number", - "description": "Type: `double`. Training is considered to have failed if \n(best_test_ELBO - final_test_ELBO)/(best_test_ELBO - initial_test_ELBO) \u003e FINAL_ELBO_FAIL_FRACTION", - "help_text": "Type: `double`. Training is considered to have failed if \n(best_test_ELBO - final_test_ELBO)/(best_test_ELBO - initial_test_ELBO) \u003e FINAL_ELBO_FAIL_FRACTION.\nTraining will automatically re-run if --num-training-tries \u003e 1.\nBy default, will not fail training based on final_training_ELBO.\n" - }, - - "epoch_elbo_fail_fraction": { - "type": "number", - "description": "Type: `double`. Training is considered to have failed if \n(previous_epoch_test_ELBO - current_epoch_test_ELBO)/(previous_epoch_test_ELBO - initial_train_ELBO) \u003e EPOCH_ELBO_FAIL_FRACTION", - "help_text": "Type: `double`. Training is considered to have failed if \n(previous_epoch_test_ELBO - current_epoch_test_ELBO)/(previous_epoch_test_ELBO - initial_train_ELBO) \u003e EPOCH_ELBO_FAIL_FRACTION.\nTraining will automatically re-run if --num-training-tries \u003e 1.\nBy default, will not fail training based on epoch_training_ELBO.\n" - }, - - "num_training_tries": { - "type": "integer", - "description": "Type: `integer`, default: `1`. Number of times to attempt to train the model", - "help_text": "Type: `integer`, default: `1`. Number of times to attempt to train the model. At each subsequent attempt,\nthe learning rate is multiplied by LEARNING_RATE_RETRY_MULT.\n", - "default": "1" - }, - - "learning_rate_retry_mult": { - "type": "number", - "description": "Type: `double`, default: `0.2`. Learning rate is multiplied by this amount each time a new training\nattempt is made", - "help_text": "Type: `double`, default: `0.2`. Learning rate is multiplied by this amount each time a new training\nattempt is made. (This parameter is only used if training fails based\non EPOCH_ELBO_FAIL_FRACTION or FINAL_ELBO_FAIL_FRACTION and\nNUM_TRAINING_TRIES is \u003e 1.) \n", - "default": "0.2" - }, - - "posterior_batch_size": { - "type": "integer", - "description": "Type: `integer`, default: `128`. Training detail: size of batches when creating the posterior", - "help_text": "Type: `integer`, default: `128`. Training detail: size of batches when creating the posterior.\nReduce this to avoid running out of GPU memory creating the posterior\n(will be slower).\n", - "default": "128" - }, - - "posterior_regulation": { - "type": "string", - "description": "Type: `string`, choices: ``PRq`, `PRmu`, `PRmu_gene``. Posterior regularization method", - "help_text": "Type: `string`, choices: ``PRq`, `PRmu`, `PRmu_gene``. Posterior regularization method. (For experts: not required for normal usage,\nsee documentation). \n\n* PRq is approximate quantile-targeting.\n* PRmu is approximate mean-targeting aggregated over genes (behavior of v0.2.0).\n* PRmu_gene is approximate mean-targeting per gene.\n", - "enum": ["PRq", "PRmu", "PRmu_gene"] - - }, - - "alpha": { - "type": "number", - "description": "Type: `double`. Tunable parameter alpha for the PRq posterior regularization method\n(not normally used: see documentation)", - "help_text": "Type: `double`. Tunable parameter alpha for the PRq posterior regularization method\n(not normally used: see documentation).\n" - }, - - "q": { - "type": "number", - "description": "Type: `double`. Tunable parameter q for the CDF threshold estimation method (not\nnormally used: see documentation)", - "help_text": "Type: `double`. Tunable parameter q for the CDF threshold estimation method (not\nnormally used: see documentation).\n" - }, - - "estimator": { - "type": "string", - "description": "Type: `string`, default: `mckp`, choices: ``map`, `mean`, `cdf`, `sample`, `mckp``. Output denoised count estimation method", - "help_text": "Type: `string`, default: `mckp`, choices: ``map`, `mean`, `cdf`, `sample`, `mckp``. Output denoised count estimation method. (For experts: not required\nfor normal usage, see documentation).\n", - "enum": ["map", "mean", "cdf", "sample", "mckp"] - , - "default": "mckp" - }, - - "estimator_multiple_cpu": { - "type": "boolean", - "description": "Type: `boolean_true`, default: `false`. Including the flag --estimator-multiple-cpu will use more than one\nCPU to compute the MCKP output count estimator in parallel (does nothing\nfor other estimators)", - "help_text": "Type: `boolean_true`, default: `false`. Including the flag --estimator-multiple-cpu will use more than one\nCPU to compute the MCKP output count estimator in parallel (does nothing\nfor other estimators).\n", - "default": "False" - }, - - "constant_learning_rate": { - "type": "boolean", - "description": "Type: `boolean`. Including the flag --constant-learning-rate will use the ClippedAdam\noptimizer instead of the OneCycleLR learning rate schedule, which is\nthe default", - "help_text": "Type: `boolean`. Including the flag --constant-learning-rate will use the ClippedAdam\noptimizer instead of the OneCycleLR learning rate schedule, which is\nthe default. Learning is faster with the OneCycleLR schedule.\nHowever, training can easily be continued from a checkpoint for more\nepochs than the initial command specified when using ClippedAdam. On\nthe other hand, if using the OneCycleLR schedule with 150 epochs\nspecified, it is not possible to pick up from that final checkpoint\nand continue training until 250 epochs.\n" - }, - - "debug": { - "type": "boolean", - "description": "Type: `boolean_true`, default: `false`. Including the flag --debug will log extra messages useful for debugging", - "help_text": "Type: `boolean_true`, default: `false`. Including the flag --debug will log extra messages useful for debugging.\n", - "default": "False" - }, - - "cuda": { - "type": "boolean", - "description": "Type: `boolean_true`, default: `false`. Including the flag --cuda will run the inference on a\nGPU", - "help_text": "Type: `boolean_true`, default: `false`. Including the flag --cuda will run the inference on a\nGPU.\n", - "default": "False" - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/inputs" - }, - { - "$ref": "#/definitions/outputs" - }, - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/correction/cellbender_remove_background/setup_logger.py b/target/nextflow/correction/cellbender_remove_background/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/nextflow/correction/cellbender_remove_background/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/nextflow/correction/cellbender_remove_background_v0_2/.config.vsh.yaml b/target/nextflow/correction/cellbender_remove_background_v0_2/.config.vsh.yaml deleted file mode 100644 index 798f58bacca..00000000000 --- a/target/nextflow/correction/cellbender_remove_background_v0_2/.config.vsh.yaml +++ /dev/null @@ -1,406 +0,0 @@ -functionality: - name: "cellbender_remove_background_v0_2" - namespace: "correction" - version: "0.12.3" - argument_groups: - - name: "Inputs" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input h5mu file." - info: null - example: - - "input.h5mu" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - description: "List of modalities to process." - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Outputs" - arguments: - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Full count matrix as an h5mu file, with background RNA removed.\ - \ This file contains all the original droplet barcodes." - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--layer_output" - description: "Output layer" - info: null - default: - - "corrected" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_latent_rt_efficiency" - info: null - default: - - "latent_rt_efficiency" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_latent_cell_probability" - info: null - default: - - "latent_cell_probability" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_latent_scale" - info: null - default: - - "latent_scale" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--var_ambient_expression" - info: null - default: - - "ambient_expression" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obsm_latent_gene_encoding" - info: null - default: - - "cellbender_latent_gene_encoding" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Arguments" - arguments: - - type: "integer" - name: "--expected_cells" - description: "Number of cells expected in the dataset (a rough estimate within\ - \ a factor of 2 is sufficient)." - info: null - example: - - 1000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--total_droplets_included" - description: "The number of droplets from the rank-ordered UMI plot\nthat will\ - \ be analyzed. The largest 'total_droplets'\ndroplets will have their cell\ - \ probabilities inferred\nas an output.\n" - info: null - example: - - 25000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean" - name: "--expected_cells_from_qc" - description: "Will use the Cell Ranger QC to determine the estimated number\ - \ of cells" - info: null - default: - - true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--model" - description: "Which model is being used for count data. 'simple'\ndoes not model\ - \ either ambient RNA or random barcode\nswapping (for debugging purposes --\ - \ not recommended).\n'ambient' assumes background RNA is incorporated into\n\ - droplets. 'swapping' assumes background RNA comes from\nrandom barcode swapping.\ - \ 'full' uses a combined\nambient and swapping model.\n" - info: null - default: - - "full" - required: false - choices: - - "simple" - - "ambient" - - "swapping" - - "full" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--epochs" - description: "Number of epochs to train." - info: null - default: - - 150 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--low_count_threshold" - description: "Droplets with UMI counts below this number are completely \nexcluded\ - \ from the analysis. This can help identify the correct \nprior for empty\ - \ droplet counts in the rare case where empty \ncounts are extremely high\ - \ (over 200).\n" - info: null - default: - - 15 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--z_dim" - description: "Dimension of latent variable z.\n" - info: null - default: - - 100 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--z_layers" - description: "Dimension of hidden layers in the encoder for z.\n" - info: null - default: - - 500 - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--training_fraction" - description: "Training detail: the fraction of the data used for training.\n\ - The rest is never seen by the inference algorithm. Speeds up learning.\n" - info: null - default: - - 0.9 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--empty_drop_training_fraction" - description: "Training detail: the fraction of the training data each epoch\ - \ that \nis drawn (randomly sampled) from surely empty droplets.\n" - info: null - default: - - 0.5 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--fpr" - description: "Target false positive rate in (0, 1). A false positive\nis a true\ - \ signal count that is erroneously removed.\nMore background removal is accompanied\ - \ by more signal\nremoval at high values of FPR. You can specify\nmultiple\ - \ values, which will create multiple output\nfiles.\n" - info: null - default: - - 0.01 - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--exclude_antibody_capture" - description: "Including the flag --exclude-antibody-capture will\ncause remove-background\ - \ to operate on gene counts\nonly, ignoring other features.\n" - info: null - direction: "input" - dest: "par" - - type: "double" - name: "--learning_rate" - description: "Training detail: lower learning rate for inference. A\nOneCycle\ - \ learning rate schedule is used, where the\nupper learning rate is ten times\ - \ this value. (For this\nvalue, probably do not exceed 1e-3).\n" - info: null - example: - - 1.0E-4 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--cuda" - description: "Including the flag --cuda will run the inference on a\nGPU.\n" - info: null - direction: "input" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "helper.py" - - type: "file" - path: "src/utils/setup_logger.py" - description: "Eliminating technical artifacts from high-throughput single-cell RNA\ - \ sequencing data.\n\nThis module removes counts due to ambient RNA molecules\ - \ and random barcode swapping from (raw) UMI-based scRNA-seq count matrices. \n\ - At the moment, only the count matrices produced by the CellRanger count pipeline\ - \ is supported. Support for additional tools and protocols \nwill be added in\ - \ the future. A quick start tutorial can be found here.\n\nFleming et al. 2022,\ - \ bioRxiv.\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_raw_feature_bc_matrix.h5" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "nvcr.io/nvidia/pytorch:22.12-py3" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "muon==0.1.5" - - "tables==3.8.0" - - "cellbender==0.2.1" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "muon~=0.1.4" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "gpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/correction/cellbender_remove_background_v0_2/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/correction/cellbender_remove_background_v0_2" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/correction/cellbender_remove_background_v0_2/cellbender_remove_background_v0_2" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/correction/cellbender_remove_background_v0_2/helper.py b/target/nextflow/correction/cellbender_remove_background_v0_2/helper.py deleted file mode 100644 index 479dd56f596..00000000000 --- a/target/nextflow/correction/cellbender_remove_background_v0_2/helper.py +++ /dev/null @@ -1,143 +0,0 @@ -# This file is copied from https://github.com/broadinstitute/CellBender/issues/128#issuecomment-1175336065 -# to solve an issue with scanpy not being able to read in the 10x h5 files produced by cellbender. -# -# Note: If something doesn't work in this helper function, it may be interesting to -# take a look at the comments by Dries: https://github.com/openpipelines-bio/openpipeline/pull/115 -# I'm not going to apply them for now -- if it ain't broke, don't fix it. -import tables -import numpy as np -import scipy.sparse as sp -import anndata -from typing import Dict - - -def anndata_from_h5(file: str, - analyzed_barcodes_only: bool = True) -> 'anndata.AnnData': - """Load an output h5 file into an AnnData object for downstream work. - - Args: - file: The h5 file - analyzed_barcodes_only: False to load all barcodes, so that the size of - the AnnData object will match the size of the input raw count matrix. - True to load a limited set of barcodes: only those analyzed by the - algorithm. This allows relevant latent variables to be loaded - properly into adata.obs and adata.obsm, rather than adata.uns. - - Returns: - adata: The anndata object, populated with inferred latent variables - and metadata. - - """ - - d = dict_from_h5(file) - X = sp.csc_matrix((d.pop('data'), d.pop('indices'), d.pop('indptr')), - shape=d.pop('shape')).transpose().tocsr() - - # check and see if we have barcode index annotations, and if the file is filtered - barcode_key = [k for k in d.keys() if (('barcode' in k) and ('ind' in k))] - if len(barcode_key) > 0: - max_barcode_ind = d[barcode_key[0]].max() - filtered_file = (max_barcode_ind >= X.shape[0]) - else: - filtered_file = True - - if analyzed_barcodes_only: - if filtered_file: - # filtered file being read, so we don't need to subset - print('Assuming we are loading a "filtered" file that contains only cells.') - pass - elif 'barcode_indices_for_latents' in d.keys(): - X = X[d['barcode_indices_for_latents'], :] - d['barcodes'] = d['barcodes'][d['barcode_indices_for_latents']] - elif 'barcodes_analyzed_inds' in d.keys(): - X = X[d['barcodes_analyzed_inds'], :] - d['barcodes'] = d['barcodes'][d['barcodes_analyzed_inds']] - else: - print('Warning: analyzed_barcodes_only=True, but the key ' - '"barcodes_analyzed_inds" or "barcode_indices_for_latents" ' - 'is missing from the h5 file. ' - 'Will output all barcodes, and proceed as if ' - 'analyzed_barcodes_only=False') - - # Construct the anndata object. - adata = anndata.AnnData(X=X, - obs={'barcode': d.pop('barcodes').astype(str)}, - var={'gene_name': (d.pop('gene_names') if 'gene_names' in d.keys() - else d.pop('name')).astype(str)}, - dtype=X.dtype) - adata.obs.set_index('barcode', inplace=True) - adata.var.set_index('gene_name', inplace=True) - - # For CellRanger v2 legacy format, "gene_ids" was called "genes"... rename this - if 'genes' in d.keys(): - d['id'] = d.pop('genes') - - # For purely aesthetic purposes, rename "id" to "gene_id" - if 'id' in d.keys(): - d['gene_id'] = d.pop('id') - - # If genomes are empty, try to guess them based on gene_id - if 'genome' in d.keys(): - if np.array([s.decode() == '' for s in d['genome']]).all(): - if '_' in d['gene_id'][0].decode(): - print('Genome field blank, so attempting to guess genomes based on gene_id prefixes') - d['genome'] = np.array([s.decode().split('_')[0] for s in d['gene_id']], dtype=str) - - # Add other information to the anndata object in the appropriate slot. - _fill_adata_slots_automatically(adata, d) - - # Add a special additional field to .var if it exists. - if 'features_analyzed_inds' in adata.uns.keys(): - adata.var['cellbender_analyzed'] = [True if (i in adata.uns['features_analyzed_inds']) - else False for i in range(adata.shape[1])] - - if analyzed_barcodes_only: - for col in adata.obs.columns[adata.obs.columns.str.startswith('barcodes_analyzed') - | adata.obs.columns.str.startswith('barcode_indices')]: - try: - del adata.obs[col] - except Exception: - pass - else: - # Add a special additional field to .obs if all barcodes are included. - if 'barcodes_analyzed_inds' in adata.uns.keys(): - adata.obs['cellbender_analyzed'] = [True if (i in adata.uns['barcodes_analyzed_inds']) - else False for i in range(adata.shape[0])] - - return adata - - -def dict_from_h5(file: str) -> Dict[str, np.ndarray]: - """Read in everything from an h5 file and put into a dictionary.""" - d = {} - with tables.open_file(file) as f: - # read in everything - for array in f.walk_nodes("/", "Array"): - d[array.name] = array.read() - return d - - -def _fill_adata_slots_automatically(adata, d): - """Add other information to the adata object in the appropriate slot.""" - - for key, value in d.items(): - try: - if value is None: - continue - value = np.asarray(value) - if len(value.shape) == 0: - adata.uns[key] = value - elif value.shape[0] == adata.shape[0]: - if (len(value.shape) < 2) or (value.shape[1] < 2): - adata.obs[key] = value - else: - adata.obsm[key] = value - elif value.shape[0] == adata.shape[1]: - if value.dtype.name.startswith('bytes'): - adata.var[key] = value.astype(str) - else: - adata.var[key] = value - else: - adata.uns[key] = value - except Exception: - print('Unable to load data into AnnData: ', key, value, type(value)) \ No newline at end of file diff --git a/target/nextflow/correction/cellbender_remove_background_v0_2/main.nf b/target/nextflow/correction/cellbender_remove_background_v0_2/main.nf deleted file mode 100644 index 7ed3e19518b..00000000000 --- a/target/nextflow/correction/cellbender_remove_background_v0_2/main.nf +++ /dev/null @@ -1,2946 +0,0 @@ -// cellbender_remove_background_v0_2 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "cellbender_remove_background_v0_2", - "namespace" : "correction", - "version" : "0.12.3", - "argument_groups" : [ - { - "name" : "Inputs", - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "alternatives" : [ - "-i" - ], - "description" : "Input h5mu file.", - "example" : [ - "input.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--modality", - "description" : "List of modalities to process.", - "default" : [ - "rna" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Outputs", - "arguments" : [ - { - "type" : "file", - "name" : "--output", - "alternatives" : [ - "-o" - ], - "description" : "Full count matrix as an h5mu file, with background RNA removed. This file contains all the original droplet barcodes.", - "example" : [ - "output.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_compression", - "example" : [ - "gzip" - ], - "required" : false, - "choices" : [ - "gzip", - "lzf" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--layer_output", - "description" : "Output layer", - "default" : [ - "corrected" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--obs_latent_rt_efficiency", - "default" : [ - "latent_rt_efficiency" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--obs_latent_cell_probability", - "default" : [ - "latent_cell_probability" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--obs_latent_scale", - "default" : [ - "latent_scale" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--var_ambient_expression", - "default" : [ - "ambient_expression" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--obsm_latent_gene_encoding", - "default" : [ - "cellbender_latent_gene_encoding" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Arguments", - "arguments" : [ - { - "type" : "integer", - "name" : "--expected_cells", - "description" : "Number of cells expected in the dataset (a rough estimate within a factor of 2 is sufficient).", - "example" : [ - 1000 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--total_droplets_included", - "description" : "The number of droplets from the rank-ordered UMI plot\nthat will be analyzed. The largest 'total_droplets'\ndroplets will have their cell probabilities inferred\nas an output.\n", - "example" : [ - 25000 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "boolean", - "name" : "--expected_cells_from_qc", - "description" : "Will use the Cell Ranger QC to determine the estimated number of cells", - "default" : [ - true - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--model", - "description" : "Which model is being used for count data. 'simple'\ndoes not model either ambient RNA or random barcode\nswapping (for debugging purposes -- not recommended).\n'ambient' assumes background RNA is incorporated into\ndroplets. 'swapping' assumes background RNA comes from\nrandom barcode swapping. 'full' uses a combined\nambient and swapping model.\n", - "default" : [ - "full" - ], - "required" : false, - "choices" : [ - "simple", - "ambient", - "swapping", - "full" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--epochs", - "description" : "Number of epochs to train.", - "default" : [ - 150 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--low_count_threshold", - "description" : "Droplets with UMI counts below this number are completely \nexcluded from the analysis. This can help identify the correct \nprior for empty droplet counts in the rare case where empty \ncounts are extremely high (over 200).\n", - "default" : [ - 15 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--z_dim", - "description" : "Dimension of latent variable z.\n", - "default" : [ - 100 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--z_layers", - "description" : "Dimension of hidden layers in the encoder for z.\n", - "default" : [ - 500 - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--training_fraction", - "description" : "Training detail: the fraction of the data used for training.\nThe rest is never seen by the inference algorithm. Speeds up learning.\n", - "default" : [ - 0.9 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--empty_drop_training_fraction", - "description" : "Training detail: the fraction of the training data each epoch that \nis drawn (randomly sampled) from surely empty droplets.\n", - "default" : [ - 0.5 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--fpr", - "description" : "Target false positive rate in (0, 1). A false positive\nis a true signal count that is erroneously removed.\nMore background removal is accompanied by more signal\nremoval at high values of FPR. You can specify\nmultiple values, which will create multiple output\nfiles.\n", - "default" : [ - 0.01 - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "boolean_true", - "name" : "--exclude_antibody_capture", - "description" : "Including the flag --exclude-antibody-capture will\ncause remove-background to operate on gene counts\nonly, ignoring other features.\n", - "direction" : "input", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--learning_rate", - "description" : "Training detail: lower learning rate for inference. A\nOneCycle learning rate schedule is used, where the\nupper learning rate is ten times this value. (For this\nvalue, probably do not exceed 1e-3).\n", - "example" : [ - 1.0E-4 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "boolean_true", - "name" : "--cuda", - "description" : "Including the flag --cuda will run the inference on a\nGPU.\n", - "direction" : "input", - "dest" : "par" - } - ] - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/correction/cellbender_remove_background_v0_2/" - }, - { - "type" : "file", - "path" : "helper.py", - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/correction/cellbender_remove_background_v0_2/" - }, - { - "type" : "file", - "path" : "src/utils/setup_logger.py", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "description" : "Eliminating technical artifacts from high-throughput single-cell RNA sequencing data.\n\nThis module removes counts due to ambient RNA molecules and random barcode swapping from (raw) UMI-based scRNA-seq count matrices. \nAt the moment, only the count matrices produced by the CellRanger count pipeline is supported. Support for additional tools and protocols \nwill be added in the future. A quick start tutorial can be found here.\n\nFleming et al. 2022, bioRxiv.\n", - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/correction/cellbender_remove_background_v0_2/" - }, - { - "type" : "file", - "path" : "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_raw_feature_bc_matrix.h5", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "nvcr.io/nvidia/pytorch:22.12-py3", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "python", - "user" : false, - "packages" : [ - "mudata~=0.2.3", - "anndata~=0.9.1", - "muon==0.1.5", - "tables==3.8.0", - "cellbender==0.2.1" - ], - "upgrade" : true - } - ], - "test_setup" : [ - { - "type" : "python", - "user" : false, - "packages" : [ - "muon~=0.1.4" - ], - "upgrade" : true - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "gpu" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/correction/cellbender_remove_background_v0_2/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/correction/cellbender_remove_background_v0_2", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -import mudata as mu -import tempfile -import subprocess -import os -import sys -import numpy as np -from scipy.sparse import csr_matrix - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'layer_output': $( if [ ! -z ${VIASH_PAR_LAYER_OUTPUT+x} ]; then echo "r'${VIASH_PAR_LAYER_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'obs_latent_rt_efficiency': $( if [ ! -z ${VIASH_PAR_OBS_LATENT_RT_EFFICIENCY+x} ]; then echo "r'${VIASH_PAR_OBS_LATENT_RT_EFFICIENCY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'obs_latent_cell_probability': $( if [ ! -z ${VIASH_PAR_OBS_LATENT_CELL_PROBABILITY+x} ]; then echo "r'${VIASH_PAR_OBS_LATENT_CELL_PROBABILITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'obs_latent_scale': $( if [ ! -z ${VIASH_PAR_OBS_LATENT_SCALE+x} ]; then echo "r'${VIASH_PAR_OBS_LATENT_SCALE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'var_ambient_expression': $( if [ ! -z ${VIASH_PAR_VAR_AMBIENT_EXPRESSION+x} ]; then echo "r'${VIASH_PAR_VAR_AMBIENT_EXPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'obsm_latent_gene_encoding': $( if [ ! -z ${VIASH_PAR_OBSM_LATENT_GENE_ENCODING+x} ]; then echo "r'${VIASH_PAR_OBSM_LATENT_GENE_ENCODING//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'expected_cells': $( if [ ! -z ${VIASH_PAR_EXPECTED_CELLS+x} ]; then echo "int(r'${VIASH_PAR_EXPECTED_CELLS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'total_droplets_included': $( if [ ! -z ${VIASH_PAR_TOTAL_DROPLETS_INCLUDED+x} ]; then echo "int(r'${VIASH_PAR_TOTAL_DROPLETS_INCLUDED//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'expected_cells_from_qc': $( if [ ! -z ${VIASH_PAR_EXPECTED_CELLS_FROM_QC+x} ]; then echo "r'${VIASH_PAR_EXPECTED_CELLS_FROM_QC//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), - 'model': $( if [ ! -z ${VIASH_PAR_MODEL+x} ]; then echo "r'${VIASH_PAR_MODEL//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'epochs': $( if [ ! -z ${VIASH_PAR_EPOCHS+x} ]; then echo "int(r'${VIASH_PAR_EPOCHS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'low_count_threshold': $( if [ ! -z ${VIASH_PAR_LOW_COUNT_THRESHOLD+x} ]; then echo "int(r'${VIASH_PAR_LOW_COUNT_THRESHOLD//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'z_dim': $( if [ ! -z ${VIASH_PAR_Z_DIM+x} ]; then echo "int(r'${VIASH_PAR_Z_DIM//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'z_layers': $( if [ ! -z ${VIASH_PAR_Z_LAYERS+x} ]; then echo "list(map(int, r'${VIASH_PAR_Z_LAYERS//\\'/\\'\\"\\'\\"r\\'}'.split(':')))"; else echo None; fi ), - 'training_fraction': $( if [ ! -z ${VIASH_PAR_TRAINING_FRACTION+x} ]; then echo "float(r'${VIASH_PAR_TRAINING_FRACTION//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'empty_drop_training_fraction': $( if [ ! -z ${VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION+x} ]; then echo "float(r'${VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'fpr': $( if [ ! -z ${VIASH_PAR_FPR+x} ]; then echo "list(map(float, r'${VIASH_PAR_FPR//\\'/\\'\\"\\'\\"r\\'}'.split(':')))"; else echo None; fi ), - 'exclude_antibody_capture': $( if [ ! -z ${VIASH_PAR_EXCLUDE_ANTIBODY_CAPTURE+x} ]; then echo "r'${VIASH_PAR_EXCLUDE_ANTIBODY_CAPTURE//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), - 'learning_rate': $( if [ ! -z ${VIASH_PAR_LEARNING_RATE+x} ]; then echo "float(r'${VIASH_PAR_LEARNING_RATE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'cuda': $( if [ ! -z ${VIASH_PAR_CUDA+x} ]; then echo "r'${VIASH_PAR_CUDA//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -## VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -from helper import anndata_from_h5 - -logger.info("Reading input mudata") -mdata = mu.read_h5mu(par["input"]) - -mod = par["modality"] -logger.info("Performing log transformation on modality %s", mod) -data = mdata.mod[mod] - -# with pathlib.Path(meta["temp_dir"]) / "cellbender" as temp_dir: -# os.mkdir(temp_dir) -with tempfile.TemporaryDirectory(prefix="cellbender-", dir=meta["temp_dir"]) as temp_dir: - # construct paths within tempdir - input_file = os.path.join(temp_dir, "input.h5ad") - output_file = os.path.join(temp_dir, "output.h5") - - logger.info("Creating AnnData input file for CellBender: '%s'", input_file) - data.write_h5ad(input_file) - - logger.info("Constructing CellBender command") - cmd_pars = [ - "cellbender", "remove-background", - "--input", input_file, - "--output", output_file - ] - - extra_args = [ - ("--expected-cells", "expected_cells", True), - ("--total-droplets-included", "total_droplets_included", True), - ("--model", "model", True), - ("--epochs", "epochs", True), - ("--cuda", "cuda", False), - ("--low-count-threshold", "low_count_threshold", True), - ("--z-dim", "z_dim", True), - ("--z-layers", "z_layers", True), - ("--training-fraction", "training_fraction", True), - ("--exclude-antibody-capture", "exclude_antibody_capture", False), - ("--learning-rate", "learning_rate", True), - ("--empty-drop-training-fraction", "empty_drop_training_fraction", True), - ] - for (flag, name, is_kwarg) in extra_args: - if par[name]: - values = par[name] if isinstance(par[name], list) else [par[name]] - cmd_pars += [flag] + [str(val) for val in values] if is_kwarg else [flag] - - if par["expected_cells_from_qc"] and "metrics_cellranger" in data.uns: - assert par["expected_cells"] is None, "If min_counts is defined, expected_cells should be undefined" - assert par["total_droplets_included"] is None, "If min_counts is defined, expected_cells should be undefined" - met = data.uns["metrics_cellranger"] - col_name = "Estimated Number of Cells" - assert col_name in met.columns, "%s should be a column in .obs[metrics_cellranger]" - est_cells = met[col_name].values[0] - logger.info("Selecting --expected-cells %d and --total-droplets-included %d", est_cells, est_cells * 5) - cmd_pars += ["--expected-cells", str(est_cells), "--total-droplets-included", str(5*est_cells)] - - logger.info("Running CellBender: '%s'", ' '.join(cmd_pars)) - out = subprocess.check_output(cmd_pars).decode("utf-8") - - logger.info("Reading CellBender 10xh5 output file: '%s'", output_file) - # have to use custom read_10x_h5 function for now - # will be fixed when https://github.com/scverse/scanpy/pull/2344 is merged - # adata_out = sc.read_10x_h5(output_file, gex_only=False) - adata_out = anndata_from_h5(output_file, analyzed_barcodes_only=False) - - logger.info("Copying X output to MuData") - data.layers[par["layer_output"]] = adata_out.X - - logger.info("Copying .obs output to MuData") - obs_store = { - "obs_latent_rt_efficiency": "latent_RT_efficiency", - "obs_latent_cell_probability": "latent_cell_probability", - "obs_latent_scale": "latent_scale" - } - for to_name, from_name in obs_store.items(): - if par[to_name]: - if from_name in adata_out.obs: - data.obs[par[to_name]] = adata_out.obs[from_name] - # when using unfiltered data, the values will be in uns instead of obs - elif from_name in adata_out.uns and 'barcode_indices_for_latents' in adata_out.uns: - vec = np.zeros(data.n_obs) - vec[adata_out.uns['barcode_indices_for_latents']] = adata_out.uns[from_name] - data.obs[par[to_name]] = vec - - logger.info("Copying .var output to MuData") - var_store = { "var_ambient_expression": "ambient_expression" } - for to_name, from_name in var_store.items(): - if par[to_name]: - data.var[par[to_name]] = adata_out.var[from_name] - - logger.info("Copying obsm_latent_gene_encoding output to MuData") - obsm_store = { "obsm_latent_gene_encoding": "latent_gene_encoding" } - for to_name, from_name in obsm_store.items(): - if par[to_name]: - if from_name in adata_out.obsm: - data.obsm[par[to_name]] = adata_out.obsm[from_name] - elif from_name in adata_out.uns and 'barcode_indices_for_latents' in adata_out.uns: - matrix_to_store = adata_out.uns[from_name] - number_of_obs = data.X.shape[0] - latent_space_sparse = csr_matrix((number_of_obs, par['z_dim']), - dtype=adata_out.uns[from_name].dtype) - obs_rows_in_space_representation = adata_out.uns['barcode_indices_for_latents'] - latent_space_sparse[obs_rows_in_space_representation] = adata_out.uns[from_name] - data.obsm[par[to_name]] = latent_space_sparse - else: - raise RuntimeError("Requested to save latent gene encoding, but the data is either missing " - "from cellbender output or in an incorrect format.") - - -logger.info("Writing to file %s", par["output"]) -mdata.write_h5mu(filename=par["output"], compression=par["output_compression"]) -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/correction_cellbender_remove_background_v0_2", - "tag" : "0.12.0" - }, - "label" : [ - "gpu" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/correction/cellbender_remove_background_v0_2/nextflow.config b/target/nextflow/correction/cellbender_remove_background_v0_2/nextflow.config deleted file mode 100644 index 259304a26d9..00000000000 --- a/target/nextflow/correction/cellbender_remove_background_v0_2/nextflow.config +++ /dev/null @@ -1,107 +0,0 @@ -manifest { - name = 'cellbender_remove_background_v0_2' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Eliminating technical artifacts from high-throughput single-cell RNA sequencing data.\n\nThis module removes counts due to ambient RNA molecules and random barcode swapping from (raw) UMI-based scRNA-seq count matrices. \nAt the moment, only the count matrices produced by the CellRanger count pipeline is supported. Support for additional tools and protocols \nwill be added in the future. A quick start tutorial can be found here.\n\nFleming et al. 2022, bioRxiv.\n' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/correction/cellbender_remove_background_v0_2/nextflow_params.yaml b/target/nextflow/correction/cellbender_remove_background_v0_2/nextflow_params.yaml deleted file mode 100644 index aebea733c6c..00000000000 --- a/target/nextflow/correction/cellbender_remove_background_v0_2/nextflow_params.yaml +++ /dev/null @@ -1,33 +0,0 @@ -# Inputs -input: # please fill in - example: "input.h5mu" -modality: "rna" - -# Outputs -# output: "$id.$key.output.h5mu" -# output_compression: "gzip" -layer_output: "corrected" -obs_latent_rt_efficiency: "latent_rt_efficiency" -obs_latent_cell_probability: "latent_cell_probability" -obs_latent_scale: "latent_scale" -var_ambient_expression: "ambient_expression" -obsm_latent_gene_encoding: "cellbender_latent_gene_encoding" - -# Arguments -# expected_cells: 1000 -# total_droplets_included: 25000 -expected_cells_from_qc: true -model: "full" -epochs: 150 -low_count_threshold: 15 -z_dim: 100 -z_layers: [500] -training_fraction: 0.9 -empty_drop_training_fraction: 0.5 -fpr: [0.01] -exclude_antibody_capture: false -# learning_rate: 1.0E-4 -cuda: false - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/correction/cellbender_remove_background_v0_2/nextflow_schema.json b/target/nextflow/correction/cellbender_remove_background_v0_2/nextflow_schema.json deleted file mode 100644 index 443371d8e06..00000000000 --- a/target/nextflow/correction/cellbender_remove_background_v0_2/nextflow_schema.json +++ /dev/null @@ -1,234 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "cellbender_remove_background_v0_2", - "description": "Eliminating technical artifacts from high-throughput single-cell RNA sequencing data.\n\nThis module removes counts due to ambient RNA molecules and random barcode swapping from (raw) UMI-based scRNA-seq count matrices. \nAt the moment, only the count matrices produced by the CellRanger count pipeline is supported. Support for additional tools and protocols \nwill be added in the future. A quick start tutorial can be found here.\n\nFleming et al. 2022, bioRxiv.\n", - "type": "object", - "definitions": { - "inputs" : { - "title": "Inputs", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: `file`, required, example: `input.h5mu`. Input h5mu file", - "help_text": "Type: `file`, required, example: `input.h5mu`. Input h5mu file." - }, - - "modality": { - "type": "string", - "description": "Type: `string`, default: `rna`. List of modalities to process", - "help_text": "Type: `string`, default: `rna`. List of modalities to process.", - "default": "rna" - } - - } - }, - "outputs" : { - "title": "Outputs", - "type": "object", - "description": "No description", - "properties": { - - "output": { - "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Full count matrix as an h5mu file, with background RNA removed", - "help_text": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Full count matrix as an h5mu file, with background RNA removed. This file contains all the original droplet barcodes.", - "default": "$id.$key.output.h5mu" - }, - - "output_compression": { - "type": "string", - "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. ", - "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. ", - "enum": ["gzip", "lzf"] - - }, - - "layer_output": { - "type": "string", - "description": "Type: `string`, default: `corrected`. Output layer", - "help_text": "Type: `string`, default: `corrected`. Output layer", - "default": "corrected" - }, - - "obs_latent_rt_efficiency": { - "type": "string", - "description": "Type: `string`, default: `latent_rt_efficiency`. ", - "help_text": "Type: `string`, default: `latent_rt_efficiency`. ", - "default": "latent_rt_efficiency" - }, - - "obs_latent_cell_probability": { - "type": "string", - "description": "Type: `string`, default: `latent_cell_probability`. ", - "help_text": "Type: `string`, default: `latent_cell_probability`. ", - "default": "latent_cell_probability" - }, - - "obs_latent_scale": { - "type": "string", - "description": "Type: `string`, default: `latent_scale`. ", - "help_text": "Type: `string`, default: `latent_scale`. ", - "default": "latent_scale" - }, - - "var_ambient_expression": { - "type": "string", - "description": "Type: `string`, default: `ambient_expression`. ", - "help_text": "Type: `string`, default: `ambient_expression`. ", - "default": "ambient_expression" - }, - - "obsm_latent_gene_encoding": { - "type": "string", - "description": "Type: `string`, default: `cellbender_latent_gene_encoding`. ", - "help_text": "Type: `string`, default: `cellbender_latent_gene_encoding`. ", - "default": "cellbender_latent_gene_encoding" - } - - } - }, - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "expected_cells": { - "type": "integer", - "description": "Type: `integer`, example: `1000`. Number of cells expected in the dataset (a rough estimate within a factor of 2 is sufficient)", - "help_text": "Type: `integer`, example: `1000`. Number of cells expected in the dataset (a rough estimate within a factor of 2 is sufficient)." - }, - - "total_droplets_included": { - "type": "integer", - "description": "Type: `integer`, example: `25000`. The number of droplets from the rank-ordered UMI plot\nthat will be analyzed", - "help_text": "Type: `integer`, example: `25000`. The number of droplets from the rank-ordered UMI plot\nthat will be analyzed. The largest \u0027total_droplets\u0027\ndroplets will have their cell probabilities inferred\nas an output.\n" - }, - - "expected_cells_from_qc": { - "type": "boolean", - "description": "Type: `boolean`, default: `true`. Will use the Cell Ranger QC to determine the estimated number of cells", - "help_text": "Type: `boolean`, default: `true`. Will use the Cell Ranger QC to determine the estimated number of cells", - "default": "True" - }, - - "model": { - "type": "string", - "description": "Type: `string`, default: `full`, choices: ``simple`, `ambient`, `swapping`, `full``. Which model is being used for count data", - "help_text": "Type: `string`, default: `full`, choices: ``simple`, `ambient`, `swapping`, `full``. Which model is being used for count data. \u0027simple\u0027\ndoes not model either ambient RNA or random barcode\nswapping (for debugging purposes -- not recommended).\n\u0027ambient\u0027 assumes background RNA is incorporated into\ndroplets. \u0027swapping\u0027 assumes background RNA comes from\nrandom barcode swapping. \u0027full\u0027 uses a combined\nambient and swapping model.\n", - "enum": ["simple", "ambient", "swapping", "full"] - , - "default": "full" - }, - - "epochs": { - "type": "integer", - "description": "Type: `integer`, default: `150`. Number of epochs to train", - "help_text": "Type: `integer`, default: `150`. Number of epochs to train.", - "default": "150" - }, - - "low_count_threshold": { - "type": "integer", - "description": "Type: `integer`, default: `15`. Droplets with UMI counts below this number are completely \nexcluded from the analysis", - "help_text": "Type: `integer`, default: `15`. Droplets with UMI counts below this number are completely \nexcluded from the analysis. This can help identify the correct \nprior for empty droplet counts in the rare case where empty \ncounts are extremely high (over 200).\n", - "default": "15" - }, - - "z_dim": { - "type": "integer", - "description": "Type: `integer`, default: `100`. Dimension of latent variable z", - "help_text": "Type: `integer`, default: `100`. Dimension of latent variable z.\n", - "default": "100" - }, - - "z_layers": { - "type": "string", - "description": "Type: List of `integer`, default: `500`, multiple_sep: `\":\"`. Dimension of hidden layers in the encoder for z", - "help_text": "Type: List of `integer`, default: `500`, multiple_sep: `\":\"`. Dimension of hidden layers in the encoder for z.\n", - "default": "500" - }, - - "training_fraction": { - "type": "number", - "description": "Type: `double`, default: `0.9`. Training detail: the fraction of the data used for training", - "help_text": "Type: `double`, default: `0.9`. Training detail: the fraction of the data used for training.\nThe rest is never seen by the inference algorithm. Speeds up learning.\n", - "default": "0.9" - }, - - "empty_drop_training_fraction": { - "type": "number", - "description": "Type: `double`, default: `0.5`. Training detail: the fraction of the training data each epoch that \nis drawn (randomly sampled) from surely empty droplets", - "help_text": "Type: `double`, default: `0.5`. Training detail: the fraction of the training data each epoch that \nis drawn (randomly sampled) from surely empty droplets.\n", - "default": "0.5" - }, - - "fpr": { - "type": "string", - "description": "Type: List of `double`, default: `0.01`, multiple_sep: `\":\"`. Target false positive rate in (0, 1)", - "help_text": "Type: List of `double`, default: `0.01`, multiple_sep: `\":\"`. Target false positive rate in (0, 1). A false positive\nis a true signal count that is erroneously removed.\nMore background removal is accompanied by more signal\nremoval at high values of FPR. You can specify\nmultiple values, which will create multiple output\nfiles.\n", - "default": "0.01" - }, - - "exclude_antibody_capture": { - "type": "boolean", - "description": "Type: `boolean_true`, default: `false`. Including the flag --exclude-antibody-capture will\ncause remove-background to operate on gene counts\nonly, ignoring other features", - "help_text": "Type: `boolean_true`, default: `false`. Including the flag --exclude-antibody-capture will\ncause remove-background to operate on gene counts\nonly, ignoring other features.\n", - "default": "False" - }, - - "learning_rate": { - "type": "number", - "description": "Type: `double`, example: `1.0E-4`. Training detail: lower learning rate for inference", - "help_text": "Type: `double`, example: `1.0E-4`. Training detail: lower learning rate for inference. A\nOneCycle learning rate schedule is used, where the\nupper learning rate is ten times this value. (For this\nvalue, probably do not exceed 1e-3).\n" - }, - - "cuda": { - "type": "boolean", - "description": "Type: `boolean_true`, default: `false`. Including the flag --cuda will run the inference on a\nGPU", - "help_text": "Type: `boolean_true`, default: `false`. Including the flag --cuda will run the inference on a\nGPU.\n", - "default": "False" - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/inputs" - }, - { - "$ref": "#/definitions/outputs" - }, - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/correction/cellbender_remove_background_v0_2/setup_logger.py b/target/nextflow/correction/cellbender_remove_background_v0_2/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/nextflow/correction/cellbender_remove_background_v0_2/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/nextflow/dataflow/concat/.config.vsh.yaml b/target/nextflow/dataflow/concat/.config.vsh.yaml deleted file mode 100644 index 3a98bf31ed3..00000000000 --- a/target/nextflow/dataflow/concat/.config.vsh.yaml +++ /dev/null @@ -1,222 +0,0 @@ -functionality: - name: "concat" - namespace: "dataflow" - version: "0.12.3" - authors: - - name: "Dries Schaumont" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Paths to the different samples to be concatenated." - info: null - example: - - "sample_paths" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: true - multiple_sep: "," - dest: "par" - - type: "string" - name: "--input_id" - description: "Names of the different samples that have to be concatenated. Must\ - \ be specified when using '--mode move'.\nIn this case, the ids will be used\ - \ for the columns names of the dataframes registring the conflicts.\nIf specified,\ - \ must be of same length as `--input`.\n" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: "," - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_sample_name" - description: "Name of the .obs key under which to add the sample names." - info: null - default: - - "sample_id" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--other_axis_mode" - description: "How to handle the merging of other axis (var, obs, ...).\n\n - None:\ - \ keep no data\n - same: only keep elements of the matrices which are the same\ - \ in each of the samples\n - unique: only keep elements for which there is only\ - \ 1 possible value (1 value that can occur in multiple samples)\n - first: keep\ - \ the annotation from the first sample\n - only: keep elements that show up\ - \ in only one of the objects (1 unique element in only 1 sample)\n - move: identical\ - \ to 'same', but moving the conflicting values to .varm or .obsm\n" - info: null - default: - - "move" - required: false - choices: - - "same" - - "unique" - - "first" - - "only" - - "concat" - - "move" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Concatenates several uni-modal samples in .h5mu files into a single\ - \ file.\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/concat_test_data/e18_mouse_brain_fresh_5k_filtered_feature_bc_matrix_subset_unique_obs.h5mu" - - type: "file" - path: "resources_test/concat_test_data/human_brain_3k_filtered_feature_bc_matrix_subset_unique_obs.h5mu" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "pandas~=2.1.1" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - - "muon" - upgrade: true - entrypoint: [] - cmd: null -- type: "native" - id: "native" -- type: "nextflow" - id: "nextflow" - directives: - label: - - "midcpu" - - "highmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/dataflow/concat/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/dataflow/concat" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/dataflow/concat/concat" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/dataflow/concat/main.nf b/target/nextflow/dataflow/concat/main.nf deleted file mode 100644 index dfb027d5c2a..00000000000 --- a/target/nextflow/dataflow/concat/main.nf +++ /dev/null @@ -1,2911 +0,0 @@ -// concat 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Dries Schaumont (maintainer) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "concat", - "namespace" : "dataflow", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Dries Schaumont", - "roles" : [ - "maintainer" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "dries@data-intuitive.com", - "github" : "DriesSchaumont", - "orcid" : "0000-0002-4389-0440", - "linkedin" : "dries-schaumont" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Scientist" - } - ] - } - } - ], - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "alternatives" : [ - "-i" - ], - "description" : "Paths to the different samples to be concatenated.", - "example" : [ - "sample_paths" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ",", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--input_id", - "description" : "Names of the different samples that have to be concatenated. Must be specified when using '--mode move'.\nIn this case, the ids will be used for the columns names of the dataframes registring the conflicts.\nIf specified, must be of same length as `--input`.\n", - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ",", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--output", - "alternatives" : [ - "-o" - ], - "example" : [ - "output.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_compression", - "description" : "The compression format to be used on the output h5mu object.", - "example" : [ - "gzip" - ], - "required" : false, - "choices" : [ - "gzip", - "lzf" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--obs_sample_name", - "description" : "Name of the .obs key under which to add the sample names.", - "default" : [ - "sample_id" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--other_axis_mode", - "description" : "How to handle the merging of other axis (var, obs, ...).\n\n - None: keep no data\n - same: only keep elements of the matrices which are the same in each of the samples\n - unique: only keep elements for which there is only 1 possible value (1 value that can occur in multiple samples)\n - first: keep the annotation from the first sample\n - only: keep elements that show up in only one of the objects (1 unique element in only 1 sample)\n - move: identical to 'same', but moving the conflicting values to .varm or .obsm\n", - "default" : [ - "move" - ], - "required" : false, - "choices" : [ - "same", - "unique", - "first", - "only", - "concat", - "move" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/dataflow/concat/" - }, - { - "type" : "file", - "path" : "src/utils/setup_logger.py", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "description" : "Concatenates several uni-modal samples in .h5mu files into a single file.\n", - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/dataflow/concat/" - }, - { - "type" : "file", - "path" : "resources_test/concat_test_data/e18_mouse_brain_fresh_5k_filtered_feature_bc_matrix_subset_unique_obs.h5mu", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - }, - { - "type" : "file", - "path" : "resources_test/concat_test_data/human_brain_3k_filtered_feature_bc_matrix_subset_unique_obs.h5mu", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "python:3.10-slim", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "procps" - ], - "interactive" : false - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "mudata~=0.2.3", - "anndata~=0.9.1", - "pandas~=2.1.1" - ], - "upgrade" : true - } - ], - "test_setup" : [ - { - "type" : "python", - "user" : false, - "packages" : [ - "viashpy==0.5.0", - "muon" - ], - "upgrade" : true - } - ] - }, - { - "type" : "native", - "id" : "native" - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "midcpu", - "highmem" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/dataflow/concat/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/dataflow/concat", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -from __future__ import annotations -import sys -import anndata -import mudata as mu -import pandas as pd -import numpy as np -from collections.abc import Iterable -from multiprocessing import Pool -from pathlib import Path -from h5py import File as H5File -from typing import Literal -import shutil - -### VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'.split(',')"; else echo None; fi ), - 'input_id': $( if [ ! -z ${VIASH_PAR_INPUT_ID+x} ]; then echo "r'${VIASH_PAR_INPUT_ID//\\'/\\'\\"\\'\\"r\\'}'.split(',')"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'obs_sample_name': $( if [ ! -z ${VIASH_PAR_OBS_SAMPLE_NAME+x} ]; then echo "r'${VIASH_PAR_OBS_SAMPLE_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'other_axis_mode': $( if [ ! -z ${VIASH_PAR_OTHER_AXIS_MODE+x} ]; then echo "r'${VIASH_PAR_OTHER_AXIS_MODE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -### VIASH END - -sys.path.append(meta["resources_dir"]) - -# START TEMPORARY WORKAROUND compress_h5mu -# reason: resources aren't available when using Nextflow fusion - -# from compress_h5mu import compress_h5mu -from h5py import Group, Dataset -from typing import Union -from functools import partial - -def compress_h5mu(input_path: Union[str, Path], - output_path: Union[str, Path], - compression: Union[Literal['gzip'], Literal['lzf']]): - input_path, output_path = str(input_path), str(output_path) - - def copy_attributes(in_object, out_object): - for key, value in in_object.attrs.items(): - out_object.attrs[key] = value - - def visit_path(output_h5: H5File, - compression: Union[Literal['gzip'], Literal['lzf']], - name: str, object: Union[Group, Dataset]): - if isinstance(object, Group): - new_group = output_h5.create_group(name) - copy_attributes(object, new_group) - elif isinstance(object, Dataset): - # Compression only works for non-scalar Dataset objects - # Scalar objects dont have a shape defined - if not object.compression and object.shape not in [None, ()]: - new_dataset = output_h5.create_dataset(name, data=object, compression=compression) - copy_attributes(object, new_dataset) - else: - output_h5.copy(object, name) - else: - raise NotImplementedError(f"Could not copy element {name}, " - f"type has not been implemented yet: {type(object)}") - - with H5File(input_path, 'r') as input_h5, H5File(output_path, 'w', userblock_size=512) as output_h5: - copy_attributes(input_h5, output_h5) - input_h5.visititems(partial(visit_path, output_h5, compression)) - - with open(input_path, "rb") as input_bytes: - # Mudata puts metadata like this in the first 512 bytes: - # MuData (format-version=0.1.0;creator=muon;creator-version=0.2.0) - # See mudata/_core/io.py, read_h5mu() function - starting_metadata = input_bytes.read(100) - # The metadata is padded with extra null bytes up until 512 bytes - truncate_location = starting_metadata.find(b"\\\\x00") - starting_metadata = starting_metadata[:truncate_location] - with open(output_path, "br+") as f: - nbytes = f.write(starting_metadata) - f.write(b"\\\\0" * (512 - nbytes)) -# END TEMPORARY WORKAROUND compress_h5mu - -# START TEMPORARY WORKAROUND setup_logger -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -def indexes_unique(indices: Iterable[pd.Index]) -> bool: - combined_indices = indices[0].append(indices[1:]) - return combined_indices.is_unique - -def check_observations_unique(samples: Iterable[anndata.AnnData]) -> None: - observation_ids = [sample.obs.index for sample in samples] - if not indexes_unique(observation_ids): - raise ValueError("Observations are not unique across samples.") - - -def nunique(row): - unique = pd.unique(row) - unique_without_na = pd.core.dtypes.missing.remove_na_arraylike(unique) - return len(unique_without_na) > 1 - -def any_row_contains_duplicate_values(n_processes: int, frame: pd.DataFrame) -> bool: - """ - Check if any row contains duplicate values, that are not NA. - """ - numpy_array = frame.to_numpy() - with Pool(n_processes) as pool: - is_duplicated = pool.map(nunique, iter(numpy_array)) - return any(is_duplicated) - -def concatenate_matrices(n_processes: int, matrices: dict[str, pd.DataFrame], align_to: pd.Index | None) \\\\ - -> tuple[dict[str, pd.DataFrame], pd.DataFrame | None, dict[str, pd.core.dtypes.dtypes.Dtype]]: - """ - Merge matrices by combining columns that have the same name. - Columns that contain conflicting values (e.i. the columns have different values), - are not merged, but instead moved to a new dataframe. - """ - column_names = set(column_name for var in matrices.values() for column_name in var) - logger.debug('Trying to concatenate columns: %s.', ",".join(column_names)) - if not column_names: - return {}, pd.DataFrame(index=align_to) - conflicts, concatenated_matrix = \\\\ - split_conflicts_and_concatenated_columns(n_processes, - matrices, - column_names, - align_to) - concatenated_matrix = cast_to_writeable_dtype(concatenated_matrix) - conflicts = {conflict_name: cast_to_writeable_dtype(conflict_df) - for conflict_name, conflict_df in conflicts.items()} - return conflicts, concatenated_matrix - -def get_first_non_na_value_vector(df): - numpy_arr = df.to_numpy() - n_rows, n_cols = numpy_arr.shape - col_index = pd.isna(numpy_arr).argmin(axis=1) - flat_index = n_cols * np.arange(n_rows) + col_index - return pd.Series(numpy_arr.ravel()[flat_index], index=df.index, name=df.columns[0]) - -def split_conflicts_and_concatenated_columns(n_processes: int, - matrices: dict[str, pd.DataFrame], - column_names: Iterable[str], - align_to: pd.Index | None = None) -> \\\\ - tuple[dict[str, pd.DataFrame], pd.DataFrame]: - """ - Retrieve columns with the same name from a list of dataframes which are - identical across all the frames (ignoring NA values). - Columns which are not the same are regarded as 'conflicts', - which are stored in seperate dataframes, one per columns - with the same name that store conflicting values. - """ - conflicts = {} - concatenated_matrix = [] - for column_name in column_names: - columns = {input_id: var[column_name] - for input_id, var in matrices.items() - if column_name in var} - assert columns, "Some columns should have been found." - concatenated_columns = pd.concat(columns.values(), axis=1, - join="outer", sort=False) - if any_row_contains_duplicate_values(n_processes, concatenated_columns): - concatenated_columns.columns = columns.keys() # Use the sample id as column name - if align_to is not None: - concatenated_columns = concatenated_columns.reindex(align_to, copy=False) - conflicts[f'conflict_{column_name}'] = concatenated_columns - else: - unique_values = get_first_non_na_value_vector(concatenated_columns) - concatenated_matrix.append(unique_values) - if not concatenated_matrix: - return conflicts, pd.DataFrame(index=align_to) - concatenated_matrix = pd.concat(concatenated_matrix, join="outer", - axis=1, sort=False) - if align_to is not None: - concatenated_matrix = concatenated_matrix.reindex(align_to, copy=False) - return conflicts, concatenated_matrix - -def cast_to_writeable_dtype(result: pd.DataFrame) -> pd.DataFrame: - """ - Cast the dataframe to dtypes that can be written by mudata. - """ - # dtype inferral workfs better with np.nan - result = result.replace({pd.NA: np.nan}) - - # MuData supports nullable booleans and ints - # ie. \\`IntegerArray\\` and \\`BooleanArray\\` - result = result.convert_dtypes(infer_objects=True, - convert_integer=True, - convert_string=False, - convert_boolean=True, - convert_floating=False) - - # Convert leftover 'object' columns to string - # However, na values are supported, so convert all values except NA's to string - object_cols = result.select_dtypes(include='object').columns.values - for obj_col in object_cols: - result[obj_col] = result[obj_col].where(result[obj_col].isna(), result[obj_col].astype(str)).astype('category') - return result - -def split_conflicts_modalities(n_processes: int, samples: dict[str, anndata.AnnData], output: anndata.AnnData) \\\\ - -> anndata.AnnData: - """ - Merge .var and .obs matrices of the anndata objects. Columns are merged - when the values (excl NA) are the same in each of the matrices. - Conflicting columns are moved to a separate dataframe (one dataframe for each column, - containing all the corresponding column from each sample). - """ - matrices_to_parse = ("var", "obs") - for matrix_name in matrices_to_parse: - matrices = {sample_id: getattr(sample, matrix_name) for sample_id, sample in samples.items()} - output_index = getattr(output, matrix_name).index - align_to = output_index if matrix_name == "var" else None - conflicts, concatenated_matrix = concatenate_matrices(n_processes, matrices, align_to) - if concatenated_matrix.empty: - concatenated_matrix.index = output_index - # Write the conflicts to the output - for conflict_name, conflict_data in conflicts.items(): - getattr(output, f"{matrix_name}m")[conflict_name] = conflict_data - - # Set other annotation matrices in the output - setattr(output, matrix_name, concatenated_matrix) - - return output - - -def concatenate_modality(n_processes: int, mod: str, input_files: Iterable[str | Path], - other_axis_mode: str, input_ids: tuple[str]) -> anndata.AnnData: - - concat_modes = { - "move": None, - } - other_axis_mode_to_apply = concat_modes.get(other_axis_mode, other_axis_mode) - - mod_data = {} - for input_id, input_file in zip(input_ids, input_files): - try: - mod_data[input_id] = mu.read_h5ad(input_file, mod=mod) - except KeyError as e: # Modality does not exist for this sample, skip it - if f"Unable to open object '{mod}' doesn't exist" not in str(e): - raise e - pass - check_observations_unique(mod_data.values()) - - concatenated_data = anndata.concat(mod_data.values(), join='outer', merge=other_axis_mode_to_apply) - - if other_axis_mode == "move": - concatenated_data = split_conflicts_modalities(n_processes, mod_data, concatenated_data) - - return concatenated_data - -def concatenate_modalities(n_processes: int, modalities: list[str], input_files: Path | str, - other_axis_mode: str, output_file: Path | str, - compression: Literal['gzip'] | Literal['lzf'], - input_ids: tuple[str] | None = None) -> None: - """ - Join the modalities together into a single multimodal sample. - """ - logger.info('Concatenating samples.') - output_file, input_files = Path(output_file), [Path(input_file) for input_file in input_files] - output_file_uncompressed = output_file.with_name(output_file.stem + "_uncompressed.h5mu") - output_file_uncompressed.touch() - # Create empty mudata file - mdata = mu.MuData({modality: anndata.AnnData() for modality in modalities}) - mdata.write(output_file_uncompressed, compression=compression) - - for mod_name in modalities: - new_mod = concatenate_modality(n_processes, mod_name, - input_files, other_axis_mode, - input_ids) - logger.info("Writing out modality '%s' to '%s' with compression '%s'.", - mod_name, output_file_uncompressed, compression) - mu.write_h5ad(output_file_uncompressed, data=new_mod, mod=mod_name) - - if compression: - compress_h5mu(output_file_uncompressed, output_file, compression=compression) - output_file_uncompressed.unlink() - else: - shutil.move(output_file_uncompressed, output_file) - - logger.info("Concatenation successful.") - -def main() -> None: - # Get a list of all possible modalities - mods = set() - for path in par["input"]: - try: - with H5File(path, 'r') as f_root: - mods = mods | set(f_root["mod"].keys()) - except OSError: - raise OSError(f"Failed to load {path}. Is it a valid h5 file?") - - input_ids = None - if par["input_id"]: - input_ids: tuple[str] = tuple(i.strip() for i in par["input_id"]) - if len(input_ids) != len(par["input"]): - raise ValueError("The number of sample names must match the number of sample files.") - - if len(set(input_ids)) != len(input_ids): - raise ValueError("The sample names should be unique.") - - logger.info("\\\\nConcatenating data from paths:\\\\n\\\\t%s", - "\\\\n\\\\t".join(par["input"])) - - if par["other_axis_mode"] == "move" and not input_ids: - raise ValueError("--mode 'move' requires --input_ids.") - - n_processes = meta["cpus"] if meta["cpus"] else 1 - concatenate_modalities(n_processes, - list(mods), - par["input"], - par["other_axis_mode"], - par["output"], - par["output_compression"], - input_ids=input_ids) - - -if __name__ == "__main__": - main() -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/dataflow_concat", - "tag" : "0.12.0" - }, - "label" : [ - "midcpu", - "highmem" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/dataflow/concat/nextflow.config b/target/nextflow/dataflow/concat/nextflow.config deleted file mode 100644 index 70ee3a6e3de..00000000000 --- a/target/nextflow/dataflow/concat/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'concat' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Concatenates several uni-modal samples in .h5mu files into a single file.\n' - author = 'Dries Schaumont' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/dataflow/concat/nextflow_params.yaml b/target/nextflow/dataflow/concat/nextflow_params.yaml deleted file mode 100644 index f59626839bb..00000000000 --- a/target/nextflow/dataflow/concat/nextflow_params.yaml +++ /dev/null @@ -1,11 +0,0 @@ -# Arguments -input: # please fill in - example: ["sample_paths"] -# input_id: ["foo"] -# output: "$id.$key.output.h5mu" -# output_compression: "gzip" -obs_sample_name: "sample_id" -other_axis_mode: "move" - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/dataflow/concat/nextflow_schema.json b/target/nextflow/dataflow/concat/nextflow_schema.json deleted file mode 100644 index f3e68228ccd..00000000000 --- a/target/nextflow/dataflow/concat/nextflow_schema.json +++ /dev/null @@ -1,88 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "concat", - "description": "Concatenates several uni-modal samples in .h5mu files into a single file.\n", - "type": "object", - "definitions": { - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: List of `file`, required, example: `sample_paths`, multiple_sep: `\",\"`. Paths to the different samples to be concatenated", - "help_text": "Type: List of `file`, required, example: `sample_paths`, multiple_sep: `\",\"`. Paths to the different samples to be concatenated." - }, - - "input_id": { - "type": "string", - "description": "Type: List of `string`, multiple_sep: `\",\"`. Names of the different samples that have to be concatenated", - "help_text": "Type: List of `string`, multiple_sep: `\",\"`. Names of the different samples that have to be concatenated. Must be specified when using \u0027--mode move\u0027.\nIn this case, the ids will be used for the columns names of the dataframes registring the conflicts.\nIf specified, must be of same length as `--input`.\n" - }, - - "output": { - "type": "string", - "description": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. ", - "help_text": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. ", - "default": "$id.$key.output.h5mu" - }, - - "output_compression": { - "type": "string", - "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", - "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", - "enum": ["gzip", "lzf"] - - }, - - "obs_sample_name": { - "type": "string", - "description": "Type: `string`, default: `sample_id`. Name of the ", - "help_text": "Type: `string`, default: `sample_id`. Name of the .obs key under which to add the sample names.", - "default": "sample_id" - }, - - "other_axis_mode": { - "type": "string", - "description": "Type: `string`, default: `move`, choices: ``same`, `unique`, `first`, `only`, `concat`, `move``. How to handle the merging of other axis (var, obs, ", - "help_text": "Type: `string`, default: `move`, choices: ``same`, `unique`, `first`, `only`, `concat`, `move``. How to handle the merging of other axis (var, obs, ...).\n\n - None: keep no data\n - same: only keep elements of the matrices which are the same in each of the samples\n - unique: only keep elements for which there is only 1 possible value (1 value that can occur in multiple samples)\n - first: keep the annotation from the first sample\n - only: keep elements that show up in only one of the objects (1 unique element in only 1 sample)\n - move: identical to \u0027same\u0027, but moving the conflicting values to .varm or .obsm\n", - "enum": ["same", "unique", "first", "only", "concat", "move"] - , - "default": "move" - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/dataflow/concat/setup_logger.py b/target/nextflow/dataflow/concat/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/nextflow/dataflow/concat/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/nextflow/dataflow/merge/.config.vsh.yaml b/target/nextflow/dataflow/merge/.config.vsh.yaml deleted file mode 100644 index 71715a30356..00000000000 --- a/target/nextflow/dataflow/merge/.config.vsh.yaml +++ /dev/null @@ -1,175 +0,0 @@ -functionality: - name: "merge" - namespace: "dataflow" - version: "0.12.3" - authors: - - name: "Dries Schaumont" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Paths to the single-modality .h5mu files that need to be combined" - info: null - default: - - "sample_paths" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: true - multiple_sep: "," - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Path to the output file." - info: null - default: - - "output.h5mu" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Combine one or more single-modality .h5mu files together into one\ - \ .h5mu file.\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "../../../resources_test/merge_test_data/pbmc_1k_protein_v3_filtered_feature_bc_matrix_rna.h5mu" - - type: "file" - path: "../../../resources_test/merge_test_data/pbmc_1k_protein_v3_filtered_feature_bc_matrix_prot.h5mu" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "pandas~=2.0.0" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "native" - id: "native" -- type: "nextflow" - id: "nextflow" - directives: - label: - - "singlecpu" - - "highmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/dataflow/merge/config.vsh.yml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/dataflow/merge" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/dataflow/merge/merge" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/dataflow/merge/main.nf b/target/nextflow/dataflow/merge/main.nf deleted file mode 100644 index 50b5dc02cef..00000000000 --- a/target/nextflow/dataflow/merge/main.nf +++ /dev/null @@ -1,2614 +0,0 @@ -// merge 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Dries Schaumont (maintainer) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "merge", - "namespace" : "dataflow", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Dries Schaumont", - "roles" : [ - "maintainer" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "dries@data-intuitive.com", - "github" : "DriesSchaumont", - "orcid" : "0000-0002-4389-0440", - "linkedin" : "dries-schaumont" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Scientist" - } - ] - } - } - ], - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "alternatives" : [ - "-i" - ], - "description" : "Paths to the single-modality .h5mu files that need to be combined", - "default" : [ - "sample_paths" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ",", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--output", - "alternatives" : [ - "-o" - ], - "description" : "Path to the output file.", - "default" : [ - "output.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_compression", - "description" : "The compression format to be used on the output h5mu object.", - "example" : [ - "gzip" - ], - "required" : false, - "choices" : [ - "gzip", - "lzf" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/dataflow/merge/" - }, - { - "type" : "file", - "path" : "src/utils/setup_logger.py", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "description" : "Combine one or more single-modality .h5mu files together into one .h5mu file.\n", - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/dataflow/merge/" - }, - { - "type" : "file", - "path" : "../../../resources_test/merge_test_data/pbmc_1k_protein_v3_filtered_feature_bc_matrix_rna.h5mu", - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/dataflow/merge/" - }, - { - "type" : "file", - "path" : "../../../resources_test/merge_test_data/pbmc_1k_protein_v3_filtered_feature_bc_matrix_prot.h5mu", - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/dataflow/merge/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "python:3.10-slim", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "procps" - ], - "interactive" : false - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "mudata~=0.2.3", - "anndata~=0.9.1", - "pandas~=2.0.0" - ], - "upgrade" : true - } - ], - "test_setup" : [ - { - "type" : "python", - "user" : false, - "packages" : [ - "viashpy==0.5.0" - ], - "upgrade" : true - } - ] - }, - { - "type" : "native", - "id" : "native" - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "singlecpu", - "highmem" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/dataflow/merge/config.vsh.yml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/dataflow/merge", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -from __future__ import annotations -import sys -import mudata as md -import pandas as pd -import numpy as np - - -### VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'.split(',')"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -### VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -def main(): - logger.info('Reading input files %s', ",".join(par["input"])) - input_samples = [md.read_h5mu(path) for path in par["input"]] - - logger.info('Merging into single object.') - sample_modalities = {} - for input_sample in input_samples: - for mod_name, mod_data in input_sample.mod.items(): - if mod_name in sample_modalities: - raise ValueError(f"Modality '{mod_name}' was found in more than 1 sample.") - sample_modalities[mod_name] = mod_data - - merged = md.MuData(sample_modalities) - merged.update() - for df_attr in ("var", "obs"): - df = getattr(merged, df_attr) - df = df.replace({pd.NA: np.nan}, inplace=False) - - # MuData supports nullable booleans and ints - # ie. \\`IntegerArray\\` and \\`BooleanArray\\` - df = df.convert_dtypes(infer_objects=True, - convert_integer=True, - convert_string=False, - convert_boolean=True, - convert_floating=False) - - # Convert leftover 'object' columns to string - object_cols = df.select_dtypes(include='object').columns.values - for obj_col in object_cols: - df[obj_col].astype(str).astype('category') - setattr(merged, df_attr, df) - - merged.write_h5mu(par["output"], compression=par["output_compression"]) - logger.info('Finished') - - -if __name__ == '__main__': - main() -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/dataflow_merge", - "tag" : "0.12.0" - }, - "label" : [ - "singlecpu", - "highmem" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/dataflow/merge/nextflow.config b/target/nextflow/dataflow/merge/nextflow.config deleted file mode 100644 index 3394916e04a..00000000000 --- a/target/nextflow/dataflow/merge/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'merge' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Combine one or more single-modality .h5mu files together into one .h5mu file.\n' - author = 'Dries Schaumont' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/dataflow/merge/nextflow_params.yaml b/target/nextflow/dataflow/merge/nextflow_params.yaml deleted file mode 100644 index 90f26b252dd..00000000000 --- a/target/nextflow/dataflow/merge/nextflow_params.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# Arguments -input: # please fill in - example: ["sample_paths"] -# output: "$id.$key.output.h5mu" -# output_compression: "gzip" - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/dataflow/merge/nextflow_schema.json b/target/nextflow/dataflow/merge/nextflow_schema.json deleted file mode 100644 index 32c4ba7f789..00000000000 --- a/target/nextflow/dataflow/merge/nextflow_schema.json +++ /dev/null @@ -1,67 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "merge", - "description": "Combine one or more single-modality .h5mu files together into one .h5mu file.\n", - "type": "object", - "definitions": { - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: List of `file`, required, default: `sample_paths`, multiple_sep: `\",\"`. Paths to the single-modality ", - "help_text": "Type: List of `file`, required, default: `sample_paths`, multiple_sep: `\",\"`. Paths to the single-modality .h5mu files that need to be combined", - "default": "sample_paths" - }, - - "output": { - "type": "string", - "description": "Type: `file`, default: `$id.$key.output.h5mu`. Path to the output file", - "help_text": "Type: `file`, default: `$id.$key.output.h5mu`. Path to the output file.", - "default": "$id.$key.output.h5mu" - }, - - "output_compression": { - "type": "string", - "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", - "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", - "enum": ["gzip", "lzf"] - - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/dataflow/merge/setup_logger.py b/target/nextflow/dataflow/merge/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/nextflow/dataflow/merge/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/nextflow/dataflow/split_modalities/.config.vsh.yaml b/target/nextflow/dataflow/split_modalities/.config.vsh.yaml deleted file mode 100644 index 36fd7afc059..00000000000 --- a/target/nextflow/dataflow/split_modalities/.config.vsh.yaml +++ /dev/null @@ -1,214 +0,0 @@ -functionality: - name: "split_modalities" - namespace: "dataflow" - version: "0.12.3" - authors: - - name: "Dries Schaumont" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - - name: "Robrecht Cannoodt" - roles: - - "contributor" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Path to a single .h5mu file." - info: null - default: - - "sample_path" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output directory containing multiple h5mu files." - info: null - example: - - "/path/to/output" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output_types" - description: "A csv containing the base filename and modality type per output\ - \ file." - info: null - example: - - "types.csv" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--compression" - description: "The compression format to be used on the final h5mu object." - info: null - default: - - "gzip" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Split the modalities from a single .h5mu multimodal sample into seperate\ - \ .h5mu files. \n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "native" - id: "native" -- type: "nextflow" - id: "nextflow" - directives: - label: - - "singlecpu" - - "lowmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/dataflow/split_modalities/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/dataflow/split_modalities" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/dataflow/split_modalities/split_modalities" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/dataflow/split_modalities/main.nf b/target/nextflow/dataflow/split_modalities/main.nf deleted file mode 100644 index d64dd97a720..00000000000 --- a/target/nextflow/dataflow/split_modalities/main.nf +++ /dev/null @@ -1,2655 +0,0 @@ -// split_modalities 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Dries Schaumont (maintainer) -// * Robrecht Cannoodt (contributor) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "split_modalities", - "namespace" : "dataflow", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Dries Schaumont", - "roles" : [ - "maintainer" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "dries@data-intuitive.com", - "github" : "DriesSchaumont", - "orcid" : "0000-0002-4389-0440", - "linkedin" : "dries-schaumont" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Scientist" - } - ] - } - }, - { - "name" : "Robrecht Cannoodt", - "roles" : [ - "contributor" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "robrecht@data-intuitive.com", - "github" : "rcannood", - "orcid" : "0000-0003-3641-729X", - "linkedin" : "robrechtcannoodt" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Science Engineer" - }, - { - "name" : "Open Problems", - "href" : "https://openproblems.bio", - "role" : "Core Member" - } - ] - } - } - ], - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "alternatives" : [ - "-i" - ], - "description" : "Path to a single .h5mu file.", - "default" : [ - "sample_path" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--output", - "alternatives" : [ - "-o" - ], - "description" : "Output directory containing multiple h5mu files.", - "example" : [ - "/path/to/output" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_compression", - "description" : "The compression format to be used on the output h5mu object.", - "example" : [ - "gzip" - ], - "required" : false, - "choices" : [ - "gzip", - "lzf" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--output_types", - "description" : "A csv containing the base filename and modality type per output file.", - "example" : [ - "types.csv" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--compression", - "description" : "The compression format to be used on the final h5mu object.", - "default" : [ - "gzip" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/dataflow/split_modalities/" - }, - { - "type" : "file", - "path" : "src/utils/setup_logger.py", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "description" : "Split the modalities from a single .h5mu multimodal sample into seperate .h5mu files. \n", - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/dataflow/split_modalities/" - }, - { - "type" : "file", - "path" : "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "python:3.10-slim", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "procps" - ], - "interactive" : false - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "mudata~=0.2.3", - "anndata~=0.9.1" - ], - "upgrade" : true - } - ], - "test_setup" : [ - { - "type" : "python", - "user" : false, - "packages" : [ - "viashpy==0.5.0" - ], - "upgrade" : true - } - ] - }, - { - "type" : "native", - "id" : "native" - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "singlecpu", - "lowmem" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/dataflow/split_modalities/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/dataflow/split_modalities", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -from __future__ import annotations -import sys -import mudata as md -from sys import stdout -from pathlib import Path -import pandas as pd - -### VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_types': $( if [ ! -z ${VIASH_PAR_OUTPUT_TYPES+x} ]; then echo "r'${VIASH_PAR_OUTPUT_TYPES//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'compression': $( if [ ! -z ${VIASH_PAR_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -### VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -def main() -> None: - output_dir = Path(par["output"]) - if not output_dir.is_dir(): - output_dir.mkdir(parents=True) - - logger.info('Reading input file %s', par['input']) - sample = md.read_h5mu(par["input"].strip()) - input_file = Path(par["input"]) - - logger.info('Creating output types csv') - - names = {mod_name: f"{input_file.stem}_{mod_name}.h5mu" - for mod_name in sample.mod.keys() } - df = pd.DataFrame({"name": list(names.keys()), "filename": list(names.values())}) - df.to_csv(par["output_types"], index=False) - - logger.info('Splitting up modalities %s', ", ".join(sample.mod.keys())) - for mod_name, mod in sample.mod.items(): - new_sample = md.MuData({mod_name: mod}) - logger.info('Writing to %s', names[mod_name]) - new_sample.write_h5mu(output_dir / names[mod_name], compression=par["output_compression"]) - - logger.info("Finished") - - -if __name__ == "__main__": - main() -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/dataflow_split_modalities", - "tag" : "0.12.0" - }, - "label" : [ - "singlecpu", - "lowmem" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/dataflow/split_modalities/nextflow.config b/target/nextflow/dataflow/split_modalities/nextflow.config deleted file mode 100644 index d52bf76959e..00000000000 --- a/target/nextflow/dataflow/split_modalities/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'split_modalities' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Split the modalities from a single .h5mu multimodal sample into seperate .h5mu files. \n' - author = 'Dries Schaumont, Robrecht Cannoodt' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/dataflow/split_modalities/nextflow_params.yaml b/target/nextflow/dataflow/split_modalities/nextflow_params.yaml deleted file mode 100644 index 8da3e33dc1c..00000000000 --- a/target/nextflow/dataflow/split_modalities/nextflow_params.yaml +++ /dev/null @@ -1,10 +0,0 @@ -# Arguments -input: # please fill in - example: "sample_path" -# output: "$id.$key.output.output" -# output_compression: "gzip" -# output_types: "$id.$key.output_types.csv" -compression: "gzip" - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/dataflow/split_modalities/nextflow_schema.json b/target/nextflow/dataflow/split_modalities/nextflow_schema.json deleted file mode 100644 index c172bd89e8c..00000000000 --- a/target/nextflow/dataflow/split_modalities/nextflow_schema.json +++ /dev/null @@ -1,81 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "split_modalities", - "description": "Split the modalities from a single .h5mu multimodal sample into seperate .h5mu files. \n", - "type": "object", - "definitions": { - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: `file`, required, default: `sample_path`. Path to a single ", - "help_text": "Type: `file`, required, default: `sample_path`. Path to a single .h5mu file.", - "default": "sample_path" - }, - - "output": { - "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.output`, example: `/path/to/output`. Output directory containing multiple h5mu files", - "help_text": "Type: `file`, required, default: `$id.$key.output.output`, example: `/path/to/output`. Output directory containing multiple h5mu files.", - "default": "$id.$key.output.output" - }, - - "output_compression": { - "type": "string", - "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", - "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", - "enum": ["gzip", "lzf"] - - }, - - "output_types": { - "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output_types.csv`, example: `types.csv`. A csv containing the base filename and modality type per output file", - "help_text": "Type: `file`, required, default: `$id.$key.output_types.csv`, example: `types.csv`. A csv containing the base filename and modality type per output file.", - "default": "$id.$key.output_types.csv" - }, - - "compression": { - "type": "string", - "description": "Type: `string`, default: `gzip`. The compression format to be used on the final h5mu object", - "help_text": "Type: `string`, default: `gzip`. The compression format to be used on the final h5mu object.", - "default": "gzip" - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/dataflow/split_modalities/setup_logger.py b/target/nextflow/dataflow/split_modalities/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/nextflow/dataflow/split_modalities/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/nextflow/demux/bcl2fastq/.config.vsh.yaml b/target/nextflow/demux/bcl2fastq/.config.vsh.yaml deleted file mode 100644 index 4b57441e50f..00000000000 --- a/target/nextflow/demux/bcl2fastq/.config.vsh.yaml +++ /dev/null @@ -1,169 +0,0 @@ -functionality: - name: "bcl2fastq" - namespace: "demux" - version: "0.12.3" - authors: - - name: "Toni Verbeiren" - roles: - - "author" - - "maintainer" - info: - role: "Core Team Member" - links: - github: "tverbeiren" - linkedin: "verbeiren" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist and CEO" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - - "--runfolder_dir" - description: "Input run directory" - info: null - example: - - "bcl_dir" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--sample_sheet" - alternatives: - - "-s" - description: "Pointer to the sample sheet" - info: null - example: - - "SampleSheet.csv" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output directory containig fastq files" - info: null - example: - - "fastq_dir" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--reports" - description: "Reports directory" - info: null - example: - - "reports_dir" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--ignore_missing" - info: null - direction: "input" - dest: "par" - resources: - - type: "bash_script" - path: "script.sh" - is_executable: true - description: "Convert bcl files to fastq files using bcl2fastq.\n" - test_resources: - - type: "bash_script" - path: "test.sh" - is_executable: true - - type: "file" - path: "resources_test/cellranger_tiny_bcl/bcl" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "ghcr.io/data-intuitive/bcl2fastq:2.20" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "midmem" - - "midcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/demux/bcl2fastq/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/demux/bcl2fastq" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/demux/bcl2fastq/bcl2fastq" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/demux/bcl2fastq/main.nf b/target/nextflow/demux/bcl2fastq/main.nf deleted file mode 100644 index 99d76399634..00000000000 --- a/target/nextflow/demux/bcl2fastq/main.nf +++ /dev/null @@ -1,2548 +0,0 @@ -// bcl2fastq 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Toni Verbeiren (author, maintainer) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "bcl2fastq", - "namespace" : "demux", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Toni Verbeiren", - "roles" : [ - "author", - "maintainer" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "github" : "tverbeiren", - "linkedin" : "verbeiren" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Scientist and CEO" - } - ] - } - } - ], - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "alternatives" : [ - "-i", - "--runfolder_dir" - ], - "description" : "Input run directory", - "example" : [ - "bcl_dir" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--sample_sheet", - "alternatives" : [ - "-s" - ], - "description" : "Pointer to the sample sheet", - "example" : [ - "SampleSheet.csv" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--output", - "alternatives" : [ - "-o" - ], - "description" : "Output directory containig fastq files", - "example" : [ - "fastq_dir" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--reports", - "description" : "Reports directory", - "example" : [ - "reports_dir" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "boolean_true", - "name" : "--ignore_missing", - "direction" : "input", - "dest" : "par" - } - ], - "resources" : [ - { - "type" : "bash_script", - "path" : "script.sh", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/demux/bcl2fastq/" - } - ], - "description" : "Convert bcl files to fastq files using bcl2fastq.\n", - "test_resources" : [ - { - "type" : "bash_script", - "path" : "test.sh", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/demux/bcl2fastq/" - }, - { - "type" : "file", - "path" : "resources_test/cellranger_tiny_bcl/bcl", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "ghcr.io/data-intuitive/bcl2fastq:2.20", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline" - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "midmem", - "midcpu" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/demux/bcl2fastq/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/demux/bcl2fastq", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -## VIASH START -# The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) -$( if [ ! -z ${VIASH_PAR_SAMPLE_SHEET+x} ]; then echo "${VIASH_PAR_SAMPLE_SHEET}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sample_sheet='&'#" ; else echo "# par_sample_sheet="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) -$( if [ ! -z ${VIASH_PAR_REPORTS+x} ]; then echo "${VIASH_PAR_REPORTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_reports='&'#" ; else echo "# par_reports="; fi ) -$( if [ ! -z ${VIASH_PAR_IGNORE_MISSING+x} ]; then echo "${VIASH_PAR_IGNORE_MISSING}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ignore_missing='&'#" ; else echo "# par_ignore_missing="; fi ) -$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) -$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) -$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) -$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) -$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) -$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) - -## VIASH END -#!/bin/bash - -set -exo pipefail - -extra_params=() - -# Handle reports stored separate -if [ ! -z "\\$par_reports" ]; then - extra_params+=("--reports-dir" "\\$par_reports") -fi - -# Handle the boolean flag -if [ "\\$par_ignore_missing" == "true" ]; then - extra_params+=("--ignore-missing-control" "--ignore-missing-bcl" "--ignore-missing-filter") -fi - -# Run the actual command -bcl2fastq \\\\ - --runfolder-dir "\\$par_input" \\\\ - --sample-sheet "\\$par_sample_sheet" \\\\ - --output-dir "\\$par_output" \\\\ - "\\${extra_params[@]}" -VIASHMAIN -bash "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/demux_bcl2fastq", - "tag" : "0.12.0" - }, - "label" : [ - "midmem", - "midcpu" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/demux/bcl2fastq/nextflow.config b/target/nextflow/demux/bcl2fastq/nextflow.config deleted file mode 100644 index 8cff2fe7b22..00000000000 --- a/target/nextflow/demux/bcl2fastq/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'bcl2fastq' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Convert bcl files to fastq files using bcl2fastq.\n' - author = 'Toni Verbeiren' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/demux/bcl2fastq/nextflow_params.yaml b/target/nextflow/demux/bcl2fastq/nextflow_params.yaml deleted file mode 100644 index 64952782d47..00000000000 --- a/target/nextflow/demux/bcl2fastq/nextflow_params.yaml +++ /dev/null @@ -1,10 +0,0 @@ -# Arguments -input: # please fill in - example: "bcl_dir" -sample_sheet: # please fill in - example: "SampleSheet.csv" -# output: "$id.$key.output.output" -# reports: "$id.$key.reports.reports" -ignore_missing: false - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/demux/bcl2fastq/nextflow_schema.json b/target/nextflow/demux/bcl2fastq/nextflow_schema.json deleted file mode 100644 index f784feb634b..00000000000 --- a/target/nextflow/demux/bcl2fastq/nextflow_schema.json +++ /dev/null @@ -1,78 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "bcl2fastq", - "description": "Convert bcl files to fastq files using bcl2fastq.\n", - "type": "object", - "definitions": { - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: `file`, required, example: `bcl_dir`. Input run directory", - "help_text": "Type: `file`, required, example: `bcl_dir`. Input run directory" - }, - - "sample_sheet": { - "type": "string", - "description": "Type: `file`, required, example: `SampleSheet.csv`. Pointer to the sample sheet", - "help_text": "Type: `file`, required, example: `SampleSheet.csv`. Pointer to the sample sheet" - }, - - "output": { - "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.output`, example: `fastq_dir`. Output directory containig fastq files", - "help_text": "Type: `file`, required, default: `$id.$key.output.output`, example: `fastq_dir`. Output directory containig fastq files", - "default": "$id.$key.output.output" - }, - - "reports": { - "type": "string", - "description": "Type: `file`, default: `$id.$key.reports.reports`, example: `reports_dir`. Reports directory", - "help_text": "Type: `file`, default: `$id.$key.reports.reports`, example: `reports_dir`. Reports directory", - "default": "$id.$key.reports.reports" - }, - - "ignore_missing": { - "type": "boolean", - "description": "Type: `boolean_true`, default: `false`. ", - "help_text": "Type: `boolean_true`, default: `false`. ", - "default": "False" - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/demux/bcl_convert/.config.vsh.yaml b/target/nextflow/demux/bcl_convert/.config.vsh.yaml deleted file mode 100644 index 976471a7e63..00000000000 --- a/target/nextflow/demux/bcl_convert/.config.vsh.yaml +++ /dev/null @@ -1,189 +0,0 @@ -functionality: - name: "bcl_convert" - namespace: "demux" - version: "0.12.3" - authors: - - name: "Toni Verbeiren" - roles: - - "author" - - "maintainer" - info: - role: "Core Team Member" - links: - github: "tverbeiren" - linkedin: "verbeiren" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist and CEO" - - name: "Marijke Van Moerbeke" - roles: - - "author" - info: - role: "Contributor" - links: - github: "mvanmoerbeke" - orcid: "0000-0002-3097-5621" - linkedin: "marijke-van-moerbeke-84303a34" - organizations: - - name: "OpenAnalytics" - href: "https://www.openanalytics.eu" - role: "Statistical Consultant" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input run directory" - info: null - example: - - "bcl_dir" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--sample_sheet" - alternatives: - - "-s" - description: "Pointer to the sample sheet" - info: null - example: - - "bcl_dir" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output directory containig fastq files" - info: null - example: - - "fastq_dir" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--reports" - description: "Reports directory" - info: null - example: - - "reports_dir" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean" - name: "--test_mode" - description: "Should bcl-convert be run in test mode (using --first-tile-only)?" - info: null - default: - - false - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "bash_script" - path: "script.sh" - is_executable: true - description: "Convert bcl files to fastq files using bcl-convert.\nInformation about\ - \ upgrading from bcl2fastq via\nhttps://emea.support.illumina.com/bulletins/2020/10/upgrading-from-bcl2fastq-to-bcl-convert.html\n\ - and https://support.illumina.com/sequencing/sequencing_software/bcl-convert/compatibility.html\n" - test_resources: - - type: "bash_script" - path: "test.sh" - is_executable: true - - type: "file" - path: "resources_test/cellranger_tiny_bcl/bcl2" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "ghcr.io/data-intuitive/bclconvert:3.10" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "midmem" - - "midcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/demux/bcl_convert/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/demux/bcl_convert" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/demux/bcl_convert/bcl_convert" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/demux/bcl_convert/main.nf b/target/nextflow/demux/bcl_convert/main.nf deleted file mode 100644 index 423ef5522a9..00000000000 --- a/target/nextflow/demux/bcl_convert/main.nf +++ /dev/null @@ -1,2574 +0,0 @@ -// bcl_convert 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Toni Verbeiren (author, maintainer) -// * Marijke Van Moerbeke (author) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "bcl_convert", - "namespace" : "demux", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Toni Verbeiren", - "roles" : [ - "author", - "maintainer" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "github" : "tverbeiren", - "linkedin" : "verbeiren" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Scientist and CEO" - } - ] - } - }, - { - "name" : "Marijke Van Moerbeke", - "roles" : [ - "author" - ], - "info" : { - "role" : "Contributor", - "links" : { - "github" : "mvanmoerbeke", - "orcid" : "0000-0002-3097-5621", - "linkedin" : "marijke-van-moerbeke-84303a34" - }, - "organizations" : [ - { - "name" : "OpenAnalytics", - "href" : "https://www.openanalytics.eu", - "role" : "Statistical Consultant" - } - ] - } - } - ], - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "alternatives" : [ - "-i" - ], - "description" : "Input run directory", - "example" : [ - "bcl_dir" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--sample_sheet", - "alternatives" : [ - "-s" - ], - "description" : "Pointer to the sample sheet", - "example" : [ - "bcl_dir" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--output", - "alternatives" : [ - "-o" - ], - "description" : "Output directory containig fastq files", - "example" : [ - "fastq_dir" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--reports", - "description" : "Reports directory", - "example" : [ - "reports_dir" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "boolean", - "name" : "--test_mode", - "description" : "Should bcl-convert be run in test mode (using --first-tile-only)?", - "default" : [ - false - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ], - "resources" : [ - { - "type" : "bash_script", - "path" : "script.sh", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/demux/bcl_convert/" - } - ], - "description" : "Convert bcl files to fastq files using bcl-convert.\nInformation about upgrading from bcl2fastq via\nhttps://emea.support.illumina.com/bulletins/2020/10/upgrading-from-bcl2fastq-to-bcl-convert.html\nand https://support.illumina.com/sequencing/sequencing_software/bcl-convert/compatibility.html\n", - "test_resources" : [ - { - "type" : "bash_script", - "path" : "test.sh", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/demux/bcl_convert/" - }, - { - "type" : "file", - "path" : "resources_test/cellranger_tiny_bcl/bcl2", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "ghcr.io/data-intuitive/bclconvert:3.10", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline" - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "midmem", - "midcpu" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/demux/bcl_convert/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/demux/bcl_convert", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -#!/bin/bash - -set -eo pipefail - -## VIASH START -# The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) -$( if [ ! -z ${VIASH_PAR_SAMPLE_SHEET+x} ]; then echo "${VIASH_PAR_SAMPLE_SHEET}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sample_sheet='&'#" ; else echo "# par_sample_sheet="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) -$( if [ ! -z ${VIASH_PAR_REPORTS+x} ]; then echo "${VIASH_PAR_REPORTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_reports='&'#" ; else echo "# par_reports="; fi ) -$( if [ ! -z ${VIASH_PAR_TEST_MODE+x} ]; then echo "${VIASH_PAR_TEST_MODE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_test_mode='&'#" ; else echo "# par_test_mode="; fi ) -$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) -$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) -$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) -$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) -$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) -$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) - -## VIASH END - -[ -d "\\$par_output" ] || mkdir -p "\\$par_output" - -bcl-convert \\\\ - --force \\\\ - --bcl-input-directory "\\$par_input" \\\\ - --output-directory "\\$par_output" \\\\ - --sample-sheet "\\$par_sample_sheet" \\\\ - --first-tile-only \\$par_test_mode - -if [ ! -z "\\$par_reports" ]; then - echo "Moving reports to its own location" - mv "\\$par_output"/Reports "\\$par_reports" -else - echo "Leaving reports alone" -fi -VIASHMAIN -bash "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/demux_bcl_convert", - "tag" : "0.12.0" - }, - "label" : [ - "midmem", - "midcpu" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/demux/bcl_convert/nextflow.config b/target/nextflow/demux/bcl_convert/nextflow.config deleted file mode 100644 index 86e51326225..00000000000 --- a/target/nextflow/demux/bcl_convert/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'bcl_convert' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Convert bcl files to fastq files using bcl-convert.\nInformation about upgrading from bcl2fastq via\nhttps://emea.support.illumina.com/bulletins/2020/10/upgrading-from-bcl2fastq-to-bcl-convert.html\nand https://support.illumina.com/sequencing/sequencing_software/bcl-convert/compatibility.html\n' - author = 'Toni Verbeiren, Marijke Van Moerbeke' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/demux/bcl_convert/nextflow_params.yaml b/target/nextflow/demux/bcl_convert/nextflow_params.yaml deleted file mode 100644 index a6a214dc347..00000000000 --- a/target/nextflow/demux/bcl_convert/nextflow_params.yaml +++ /dev/null @@ -1,10 +0,0 @@ -# Arguments -input: # please fill in - example: "bcl_dir" -sample_sheet: # please fill in - example: "bcl_dir" -# output: "$id.$key.output.output" -# reports: "$id.$key.reports.reports" -test_mode: false - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/demux/bcl_convert/nextflow_schema.json b/target/nextflow/demux/bcl_convert/nextflow_schema.json deleted file mode 100644 index f6306799e44..00000000000 --- a/target/nextflow/demux/bcl_convert/nextflow_schema.json +++ /dev/null @@ -1,78 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "bcl_convert", - "description": "Convert bcl files to fastq files using bcl-convert.\nInformation about upgrading from bcl2fastq via\nhttps://emea.support.illumina.com/bulletins/2020/10/upgrading-from-bcl2fastq-to-bcl-convert.html\nand https://support.illumina.com/sequencing/sequencing_software/bcl-convert/compatibility.html\n", - "type": "object", - "definitions": { - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: `file`, required, example: `bcl_dir`. Input run directory", - "help_text": "Type: `file`, required, example: `bcl_dir`. Input run directory" - }, - - "sample_sheet": { - "type": "string", - "description": "Type: `file`, required, example: `bcl_dir`. Pointer to the sample sheet", - "help_text": "Type: `file`, required, example: `bcl_dir`. Pointer to the sample sheet" - }, - - "output": { - "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.output`, example: `fastq_dir`. Output directory containig fastq files", - "help_text": "Type: `file`, required, default: `$id.$key.output.output`, example: `fastq_dir`. Output directory containig fastq files", - "default": "$id.$key.output.output" - }, - - "reports": { - "type": "string", - "description": "Type: `file`, default: `$id.$key.reports.reports`, example: `reports_dir`. Reports directory", - "help_text": "Type: `file`, default: `$id.$key.reports.reports`, example: `reports_dir`. Reports directory", - "default": "$id.$key.reports.reports" - }, - - "test_mode": { - "type": "boolean", - "description": "Type: `boolean`, default: `false`. Should bcl-convert be run in test mode (using --first-tile-only)?", - "help_text": "Type: `boolean`, default: `false`. Should bcl-convert be run in test mode (using --first-tile-only)?", - "default": "False" - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/demux/cellranger_mkfastq/.config.vsh.yaml b/target/nextflow/demux/cellranger_mkfastq/.config.vsh.yaml deleted file mode 100644 index fb11e2898b1..00000000000 --- a/target/nextflow/demux/cellranger_mkfastq/.config.vsh.yaml +++ /dev/null @@ -1,207 +0,0 @@ -functionality: - name: "cellranger_mkfastq" - namespace: "demux" - version: "0.12.3" - authors: - - name: "Angela Oliveira Pisco" - roles: - - "author" - info: - role: "Contributor" - links: - github: "aopisco" - orcid: "0000-0003-0142-2355" - linkedin: "aopisco" - organizations: - - name: "Insitro" - href: "https://insitro.com" - role: "Director of Computational Biology" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - - name: "Samuel D'Souza" - roles: - - "author" - info: - role: "Contributor" - links: - github: "srdsam" - linkedin: "samuel-d-souza-887023150/" - organizations: - - name: "Chan Zuckerberg Biohub" - href: "https://www.czbiohub.org" - role: "Data Engineer" - - name: "Robrecht Cannoodt" - roles: - - "author" - - "maintainer" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - arguments: - - type: "file" - name: "--input" - description: "Path to the (untarred) BCL files. Expects 'RunParameters.xml' at\ - \ './'." - info: null - example: - - "/path/to/bcl" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--sample_sheet" - description: "The path to the sample sheet." - info: null - example: - - "SampleSheet.csv" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - description: "The folder to store the demux results" - info: null - example: - - "/path/to/output" - default: - - "fastqs" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--reports" - description: "Reports directory" - info: null - example: - - "reports_dir" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "bash_script" - path: "script.sh" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Demultiplex raw sequencing data" - usage: "cellranger_mkfastq \\\n --input /path/to/bcl \\\n --sample_sheet SampleSheet.csv\ - \ \\\n --output /path/to/output\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/cellranger_tiny_bcl" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "ghcr.io/data-intuitive/cellranger:6.1" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "docker" - run: - - "apt-get update && apt-get upgrade -y" - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highmem" - - "highcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/demux/cellranger_mkfastq/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/demux/cellranger_mkfastq" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/demux/cellranger_mkfastq/cellranger_mkfastq" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/demux/cellranger_mkfastq/main.nf b/target/nextflow/demux/cellranger_mkfastq/main.nf deleted file mode 100644 index e1de17d2434..00000000000 --- a/target/nextflow/demux/cellranger_mkfastq/main.nf +++ /dev/null @@ -1,2646 +0,0 @@ -// cellranger_mkfastq 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Angela Oliveira Pisco (author) -// * Samuel D'Souza (author) -// * Robrecht Cannoodt (author, maintainer) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "cellranger_mkfastq", - "namespace" : "demux", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Angela Oliveira Pisco", - "roles" : [ - "author" - ], - "info" : { - "role" : "Contributor", - "links" : { - "github" : "aopisco", - "orcid" : "0000-0003-0142-2355", - "linkedin" : "aopisco" - }, - "organizations" : [ - { - "name" : "Insitro", - "href" : "https://insitro.com", - "role" : "Director of Computational Biology" - }, - { - "name" : "Open Problems", - "href" : "https://openproblems.bio", - "role" : "Core Member" - } - ] - } - }, - { - "name" : "Samuel D'Souza", - "roles" : [ - "author" - ], - "info" : { - "role" : "Contributor", - "links" : { - "github" : "srdsam", - "linkedin" : "samuel-d-souza-887023150/" - }, - "organizations" : [ - { - "name" : "Chan Zuckerberg Biohub", - "href" : "https://www.czbiohub.org", - "role" : "Data Engineer" - } - ] - } - }, - { - "name" : "Robrecht Cannoodt", - "roles" : [ - "author", - "maintainer" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "robrecht@data-intuitive.com", - "github" : "rcannood", - "orcid" : "0000-0003-3641-729X", - "linkedin" : "robrechtcannoodt" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Science Engineer" - }, - { - "name" : "Open Problems", - "href" : "https://openproblems.bio", - "role" : "Core Member" - } - ] - } - } - ], - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "description" : "Path to the (untarred) BCL files. Expects 'RunParameters.xml' at './'.", - "example" : [ - "/path/to/bcl" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--sample_sheet", - "description" : "The path to the sample sheet.", - "example" : [ - "SampleSheet.csv" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--output", - "description" : "The folder to store the demux results", - "example" : [ - "/path/to/output" - ], - "default" : [ - "fastqs" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--reports", - "description" : "Reports directory", - "example" : [ - "reports_dir" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ], - "resources" : [ - { - "type" : "bash_script", - "path" : "script.sh", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/demux/cellranger_mkfastq/" - }, - { - "type" : "file", - "path" : "src/utils/setup_logger.py", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "description" : "Demultiplex raw sequencing data", - "usage" : "cellranger_mkfastq \\\\\n --input /path/to/bcl \\\\\n --sample_sheet SampleSheet.csv \\\\\n --output /path/to/output\n", - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/demux/cellranger_mkfastq/" - }, - { - "type" : "file", - "path" : "resources_test/cellranger_tiny_bcl", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "ghcr.io/data-intuitive/cellranger:6.1", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "docker", - "run" : [ - "apt-get update && apt-get upgrade -y" - ] - } - ], - "test_setup" : [ - { - "type" : "python", - "user" : false, - "packages" : [ - "viashpy==0.5.0" - ], - "upgrade" : true - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "highmem", - "highcpu" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/demux/cellranger_mkfastq/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/demux/cellranger_mkfastq", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -#!/bin/bash - -set -eo pipefail - -## VIASH START -# The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) -$( if [ ! -z ${VIASH_PAR_SAMPLE_SHEET+x} ]; then echo "${VIASH_PAR_SAMPLE_SHEET}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sample_sheet='&'#" ; else echo "# par_sample_sheet="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) -$( if [ ! -z ${VIASH_PAR_REPORTS+x} ]; then echo "${VIASH_PAR_REPORTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_reports='&'#" ; else echo "# par_reports="; fi ) -$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) -$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) -$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) -$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) -$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) -$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) - -## VIASH END - -# create temporary directory -tmpdir=\\$(mktemp -d "$VIASH_TEMP/\\$meta_functionality_name-XXXXXXXX") -function clean_up { - rm -rf "\\$tmpdir" -} -trap clean_up EXIT - -# if par_input not is a folder, untar first -if [ ! -d "\\$par_input" ]; then - echo "Assuming input is a tar.gz, untarring" - input_dir="\\$tmpdir/bcl" - mkdir -p "\\$input_dir" - tar -xzf "\\$par_input" -C "\\$input_dir" --strip-components=1 -else - input_dir="\\$par_input" -fi - - -# add additional params -extra_params=( ) - -if [ ! -z "\\$meta_cpus" ]; then - extra_params+=( "--localcores=\\$meta_cpus" ) -fi -if [ ! -z "\\$meta_memory_gb" ]; then - # always keep 2gb for the OS itself - memory_gb=\\`python -c "print(int('\\$meta_memory_gb') - 2)"\\` - extra_params+=( "--localmem=\\$memory_gb" ) -fi - - -echo "Running cellranger demux" - -id=myoutput - -cellranger mkfastq \\\\ - --id "\\$id" \\\\ - --csv "\\$par_sample_sheet" \\\\ - --run "\\$par_input" \\\\ - "\\${extra_params[@]}" \\\\ - --disable-ui \\\\ - --output-dir "\\$par_output" - -# Move reports to their own output location -if [ ! -z "\\$par_reports" ]; then - echo "Moving reports its own location" - mv "\\$par_output"/Reports "\\$par_reports" -else - echo "Leaving reports alone" -fi -VIASHMAIN -bash "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/demux_cellranger_mkfastq", - "tag" : "0.12.0" - }, - "label" : [ - "highmem", - "highcpu" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/demux/cellranger_mkfastq/nextflow.config b/target/nextflow/demux/cellranger_mkfastq/nextflow.config deleted file mode 100644 index 8b66f628cc6..00000000000 --- a/target/nextflow/demux/cellranger_mkfastq/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'cellranger_mkfastq' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Demultiplex raw sequencing data' - author = 'Angela Oliveira Pisco, Samuel D\'Souza, Robrecht Cannoodt' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/demux/cellranger_mkfastq/nextflow_params.yaml b/target/nextflow/demux/cellranger_mkfastq/nextflow_params.yaml deleted file mode 100644 index 7da67817572..00000000000 --- a/target/nextflow/demux/cellranger_mkfastq/nextflow_params.yaml +++ /dev/null @@ -1,9 +0,0 @@ -# Arguments -input: # please fill in - example: "/path/to/bcl" -sample_sheet: # please fill in - example: "SampleSheet.csv" -# output: "$id.$key.output.output" -# reports: "$id.$key.reports.reports" - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/demux/cellranger_mkfastq/nextflow_schema.json b/target/nextflow/demux/cellranger_mkfastq/nextflow_schema.json deleted file mode 100644 index 3f9dc4ee76d..00000000000 --- a/target/nextflow/demux/cellranger_mkfastq/nextflow_schema.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "cellranger_mkfastq", - "description": "Demultiplex raw sequencing data", - "type": "object", - "definitions": { - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: `file`, required, example: `/path/to/bcl`. Path to the (untarred) BCL files", - "help_text": "Type: `file`, required, example: `/path/to/bcl`. Path to the (untarred) BCL files. Expects \u0027RunParameters.xml\u0027 at \u0027./\u0027." - }, - - "sample_sheet": { - "type": "string", - "description": "Type: `file`, required, example: `SampleSheet.csv`. The path to the sample sheet", - "help_text": "Type: `file`, required, example: `SampleSheet.csv`. The path to the sample sheet." - }, - - "output": { - "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.output`, example: `/path/to/output`. The folder to store the demux results", - "help_text": "Type: `file`, required, default: `$id.$key.output.output`, example: `/path/to/output`. The folder to store the demux results", - "default": "$id.$key.output.output" - }, - - "reports": { - "type": "string", - "description": "Type: `file`, default: `$id.$key.reports.reports`, example: `reports_dir`. Reports directory", - "help_text": "Type: `file`, default: `$id.$key.reports.reports`, example: `reports_dir`. Reports directory", - "default": "$id.$key.reports.reports" - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/demux/cellranger_mkfastq/setup_logger.py b/target/nextflow/demux/cellranger_mkfastq/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/nextflow/demux/cellranger_mkfastq/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/nextflow/dimred/pca/.config.vsh.yaml b/target/nextflow/dimred/pca/.config.vsh.yaml deleted file mode 100644 index 37ef15ac31d..00000000000 --- a/target/nextflow/dimred/pca/.config.vsh.yaml +++ /dev/null @@ -1,253 +0,0 @@ -functionality: - name: "pca" - namespace: "dimred" - version: "0.12.3" - authors: - - name: "Dries De Maeyer" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "ddemaeyer@gmail.com" - github: "ddemaeyer" - linkedin: "dries-de-maeyer-b46a814" - organizations: - - name: "Janssen Pharmaceuticals" - href: "https://www.janssen.com" - role: "Principal Scientist" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input h5mu file" - info: null - example: - - "input.h5mu" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--layer" - description: "Use specified layer for expression values instead of the .X object\ - \ from the modality." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--var_input" - description: "Column name in .var matrix that will be used to select which genes\ - \ to run the PCA on." - info: null - example: - - "filter_with_hvg" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output h5mu file." - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obsm_output" - description: "In which .obsm slot to store the resulting embedding." - info: null - default: - - "X_pca" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--varm_output" - description: "In which .varm slot to store the resulting loadings matrix." - info: null - default: - - "pca_loadings" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--uns_output" - description: "In which .uns slot to store the resulting variance objects." - info: null - default: - - "pca_variance" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--num_components" - description: "Number of principal components to compute. Defaults to 50, or 1\ - \ - minimum dimension size of selected representation." - info: null - example: - - 25 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--overwrite" - description: "Allow overwriting .obsm, .varm and .uns slots." - info: null - direction: "input" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Computes PCA coordinates, loadings and variance decomposition. Uses\ - \ the implementation of scikit-learn [Pedregosa11].\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.9-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "scanpy~=1.9.5" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highcpu" - - "highmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/dimred/pca/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/dimred/pca" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/dimred/pca/pca" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/dimred/pca/main.nf b/target/nextflow/dimred/pca/main.nf deleted file mode 100644 index 4a6f2229c27..00000000000 --- a/target/nextflow/dimred/pca/main.nf +++ /dev/null @@ -1,2718 +0,0 @@ -// pca 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Dries De Maeyer (maintainer) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "pca", - "namespace" : "dimred", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Dries De Maeyer", - "roles" : [ - "maintainer" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "ddemaeyer@gmail.com", - "github" : "ddemaeyer", - "linkedin" : "dries-de-maeyer-b46a814" - }, - "organizations" : [ - { - "name" : "Janssen Pharmaceuticals", - "href" : "https://www.janssen.com", - "role" : "Principal Scientist" - } - ] - } - } - ], - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "alternatives" : [ - "-i" - ], - "description" : "Input h5mu file", - "example" : [ - "input.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--modality", - "default" : [ - "rna" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--layer", - "description" : "Use specified layer for expression values instead of the .X object from the modality.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--var_input", - "description" : "Column name in .var matrix that will be used to select which genes to run the PCA on.", - "example" : [ - "filter_with_hvg" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--output", - "alternatives" : [ - "-o" - ], - "description" : "Output h5mu file.", - "example" : [ - "output.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_compression", - "description" : "The compression format to be used on the output h5mu object.", - "example" : [ - "gzip" - ], - "required" : false, - "choices" : [ - "gzip", - "lzf" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--obsm_output", - "description" : "In which .obsm slot to store the resulting embedding.", - "default" : [ - "X_pca" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--varm_output", - "description" : "In which .varm slot to store the resulting loadings matrix.", - "default" : [ - "pca_loadings" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--uns_output", - "description" : "In which .uns slot to store the resulting variance objects.", - "default" : [ - "pca_variance" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--num_components", - "description" : "Number of principal components to compute. Defaults to 50, or 1 - minimum dimension size of selected representation.", - "example" : [ - 25 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "boolean_true", - "name" : "--overwrite", - "description" : "Allow overwriting .obsm, .varm and .uns slots.", - "direction" : "input", - "dest" : "par" - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/dimred/pca/" - }, - { - "type" : "file", - "path" : "src/utils/setup_logger.py", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "description" : "Computes PCA coordinates, loadings and variance decomposition. Uses the implementation of scikit-learn [Pedregosa11].\n", - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/dimred/pca/" - }, - { - "type" : "file", - "path" : "resources_test/pbmc_1k_protein_v3", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "python:3.9-slim", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "procps" - ], - "interactive" : false - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "mudata~=0.2.3", - "anndata~=0.9.1", - "scanpy~=1.9.5" - ], - "upgrade" : true - } - ], - "test_setup" : [ - { - "type" : "python", - "user" : false, - "packages" : [ - "viashpy==0.5.0" - ], - "upgrade" : true - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "highcpu", - "highmem" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/dimred/pca/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/dimred/pca", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -import scanpy as sc -import mudata as mu -import sys -from anndata import AnnData - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'layer': $( if [ ! -z ${VIASH_PAR_LAYER+x} ]; then echo "r'${VIASH_PAR_LAYER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'var_input': $( if [ ! -z ${VIASH_PAR_VAR_INPUT+x} ]; then echo "r'${VIASH_PAR_VAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'obsm_output': $( if [ ! -z ${VIASH_PAR_OBSM_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'varm_output': $( if [ ! -z ${VIASH_PAR_VARM_OUTPUT+x} ]; then echo "r'${VIASH_PAR_VARM_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'uns_output': $( if [ ! -z ${VIASH_PAR_UNS_OUTPUT+x} ]; then echo "r'${VIASH_PAR_UNS_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'num_components': $( if [ ! -z ${VIASH_PAR_NUM_COMPONENTS+x} ]; then echo "int(r'${VIASH_PAR_NUM_COMPONENTS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'overwrite': $( if [ ! -z ${VIASH_PAR_OVERWRITE+x} ]; then echo "r'${VIASH_PAR_OVERWRITE//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -## VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -logger.info("Reading %s.", par["input"]) -mdata = mu.read_h5mu(par["input"]) - -logger.info("Computing PCA components for modality '%s'", par['modality']) -data = mdata.mod[par['modality']] -if par['layer'] and par['layer'] not in data.layers: - raise ValueError(f"{par['layer']} was not found in modality {par['modality']}.") -layer = data.X if not par['layer'] else data.layers[par['layer']] -adata_input_layer = AnnData(layer) -adata_input_layer.var.index = data.var.index - -use_highly_variable = False -if par["var_input"]: - if not par["var_input"] in data.var.columns: - raise ValueError(f"Requested to use .var column {par['var_input']} " - "as a selection of genes to run the PCA on, " - f"but the column is not available for modality {par['modality']}") - use_highly_variable = True - adata_input_layer.var['highly_variable'] = data.var[par["var_input"]] - -# run pca -output_adata = sc.tl.pca( - adata_input_layer, - n_comps=par["num_components"], - copy=True, - use_highly_variable=use_highly_variable -) - -# store output in specific objects - -check_exist_dict = { - "obsm_output": ("obs"), - "varm_output": ("varm"), - "uns_output": ("uns") -} -for parameter_name, field in check_exist_dict.items(): - if par[parameter_name] in getattr(data, field): - if not par["overwrite"]: - raise ValueError(f"Requested to create field {par[parameter_name]} in .{field} " - f"for modality {par['modality']}, but field already exists.") - del getattr(data, field)[par[parameter_name]] - -data.obsm[par["obsm_output"]] = output_adata.obsm['X_pca'] -data.varm[par["varm_output"]] = output_adata.varm['PCs'] -data.uns[par["uns_output"]] = { "variance": output_adata.uns['pca']['variance'], - "variance_ratio": output_adata.uns['pca']['variance_ratio'] } - - -logger.info("Writing to %s.", par["output"]) -mdata.write_h5mu(filename=par["output"], compression=par["output_compression"]) - -logger.info("Finished") -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/dimred_pca", - "tag" : "0.12.0" - }, - "label" : [ - "highcpu", - "highmem" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/dimred/pca/nextflow.config b/target/nextflow/dimred/pca/nextflow.config deleted file mode 100644 index a5da02e0032..00000000000 --- a/target/nextflow/dimred/pca/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'pca' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Computes PCA coordinates, loadings and variance decomposition. Uses the implementation of scikit-learn [Pedregosa11].\n' - author = 'Dries De Maeyer' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/dimred/pca/nextflow_params.yaml b/target/nextflow/dimred/pca/nextflow_params.yaml deleted file mode 100644 index 52d5f4d55bc..00000000000 --- a/target/nextflow/dimred/pca/nextflow_params.yaml +++ /dev/null @@ -1,16 +0,0 @@ -# Arguments -input: # please fill in - example: "input.h5mu" -modality: "rna" -# layer: "foo" -# var_input: "filter_with_hvg" -# output: "$id.$key.output.h5mu" -# output_compression: "gzip" -obsm_output: "X_pca" -varm_output: "pca_loadings" -uns_output: "pca_variance" -# num_components: 25 -overwrite: false - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/dimred/pca/nextflow_schema.json b/target/nextflow/dimred/pca/nextflow_schema.json deleted file mode 100644 index 9b38ace3170..00000000000 --- a/target/nextflow/dimred/pca/nextflow_schema.json +++ /dev/null @@ -1,119 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "pca", - "description": "Computes PCA coordinates, loadings and variance decomposition. Uses the implementation of scikit-learn [Pedregosa11].\n", - "type": "object", - "definitions": { - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: `file`, required, example: `input.h5mu`. Input h5mu file", - "help_text": "Type: `file`, required, example: `input.h5mu`. Input h5mu file" - }, - - "modality": { - "type": "string", - "description": "Type: `string`, default: `rna`. ", - "help_text": "Type: `string`, default: `rna`. ", - "default": "rna" - }, - - "layer": { - "type": "string", - "description": "Type: `string`. Use specified layer for expression values instead of the ", - "help_text": "Type: `string`. Use specified layer for expression values instead of the .X object from the modality." - }, - - "var_input": { - "type": "string", - "description": "Type: `string`, example: `filter_with_hvg`. Column name in ", - "help_text": "Type: `string`, example: `filter_with_hvg`. Column name in .var matrix that will be used to select which genes to run the PCA on." - }, - - "output": { - "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file", - "help_text": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file.", - "default": "$id.$key.output.h5mu" - }, - - "output_compression": { - "type": "string", - "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", - "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", - "enum": ["gzip", "lzf"] - - }, - - "obsm_output": { - "type": "string", - "description": "Type: `string`, default: `X_pca`. In which ", - "help_text": "Type: `string`, default: `X_pca`. In which .obsm slot to store the resulting embedding.", - "default": "X_pca" - }, - - "varm_output": { - "type": "string", - "description": "Type: `string`, default: `pca_loadings`. In which ", - "help_text": "Type: `string`, default: `pca_loadings`. In which .varm slot to store the resulting loadings matrix.", - "default": "pca_loadings" - }, - - "uns_output": { - "type": "string", - "description": "Type: `string`, default: `pca_variance`. In which ", - "help_text": "Type: `string`, default: `pca_variance`. In which .uns slot to store the resulting variance objects.", - "default": "pca_variance" - }, - - "num_components": { - "type": "integer", - "description": "Type: `integer`, example: `25`. Number of principal components to compute", - "help_text": "Type: `integer`, example: `25`. Number of principal components to compute. Defaults to 50, or 1 - minimum dimension size of selected representation." - }, - - "overwrite": { - "type": "boolean", - "description": "Type: `boolean_true`, default: `false`. Allow overwriting ", - "help_text": "Type: `boolean_true`, default: `false`. Allow overwriting .obsm, .varm and .uns slots.", - "default": "False" - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/dimred/pca/setup_logger.py b/target/nextflow/dimred/pca/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/nextflow/dimred/pca/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/nextflow/dimred/umap/.config.vsh.yaml b/target/nextflow/dimred/umap/.config.vsh.yaml deleted file mode 100644 index ea4349c006f..00000000000 --- a/target/nextflow/dimred/umap/.config.vsh.yaml +++ /dev/null @@ -1,312 +0,0 @@ -functionality: - name: "umap" - namespace: "dimred" - version: "0.12.3" - authors: - - name: "Dries De Maeyer" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "ddemaeyer@gmail.com" - github: "ddemaeyer" - linkedin: "dries-de-maeyer-b46a814" - organizations: - - name: "Janssen Pharmaceuticals" - href: "https://www.janssen.com" - role: "Principal Scientist" - argument_groups: - - name: "Inputs" - arguments: - - type: "file" - name: "--input" - description: "Input h5mu file" - info: null - example: - - "input.h5mu" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--uns_neighbors" - description: "The `.uns` neighbors slot as output by the `find_neighbors` component." - info: null - default: - - "neighbors" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Outputs" - arguments: - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output h5mu file." - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obsm_output" - description: "The pre/postfix under which to store the UMAP results." - info: null - default: - - "umap" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Arguments" - arguments: - - type: "double" - name: "--min_dist" - description: "The effective minimum distance between embedded points. Smaller\ - \ values will result in a more clustered/clumped embedding where nearby points\ - \ on the manifold are drawn closer together, while larger values will result\ - \ on a more even dispersal of points. The value should be set relative to\ - \ the spread value, which determines the scale at which embedded points will\ - \ be spread out." - info: null - default: - - 0.5 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--spread" - description: "The effective scale of embedded points. In combination with `min_dist`\ - \ this determines how clustered/clumped the embedded points are." - info: null - default: - - 1.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--num_components" - description: "The number of dimensions of the embedding." - info: null - default: - - 2 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--max_iter" - description: "The number of iterations (epochs) of the optimization. Called\ - \ `n_epochs` in the original UMAP. Default is set to 500 if neighbors['connectivities'].shape[0]\ - \ <= 10000, else 200." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--alpha" - description: "The initial learning rate for the embedding optimization." - info: null - default: - - 1.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--gamma" - description: "Weighting applied to negative samples in low dimensional embedding\ - \ optimization. Values higher than one will result in greater weight being\ - \ given to negative samples." - info: null - default: - - 1.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--negative_sample_rate" - description: "The number of negative edge/1-simplex samples to use per positive\ - \ edge/1-simplex sample in optimizing the low dimensional embedding." - info: null - default: - - 5 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--init_pos" - description: "How to initialize the low dimensional embedding. Called `init`\ - \ in the original UMAP. Options are:\n\n* Any key from `.obsm`\n* `'paga'`:\ - \ positions from `paga()`\n* `'spectral'`: use a spectral embedding of the\ - \ graph\n* `'random'`: assign initial embedding positions at random.\n" - info: null - default: - - "spectral" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "UMAP (Uniform Manifold Approximation and Projection) is a manifold\ - \ learning technique suitable for visualizing high-dimensional data. Besides tending\ - \ to be faster than tSNE, it optimizes the embedding such that it best reflects\ - \ the topology of the data, which we represent throughout Scanpy using a neighborhood\ - \ graph. tSNE, by contrast, optimizes the distribution of nearest-neighbor distances\ - \ in the embedding such that these best match the distribution of distances in\ - \ the high-dimensional space. We use the implementation of umap-learn [McInnes18].\ - \ For a few comparisons of UMAP with tSNE, see this preprint.\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.9-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "scanpy~=1.9.5" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highcpu" - - "midmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/dimred/umap/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/dimred/umap" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/dimred/umap/umap" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/dimred/umap/main.nf b/target/nextflow/dimred/umap/main.nf deleted file mode 100644 index 6938628bc36..00000000000 --- a/target/nextflow/dimred/umap/main.nf +++ /dev/null @@ -1,2778 +0,0 @@ -// umap 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Dries De Maeyer (maintainer) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "umap", - "namespace" : "dimred", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Dries De Maeyer", - "roles" : [ - "maintainer" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "ddemaeyer@gmail.com", - "github" : "ddemaeyer", - "linkedin" : "dries-de-maeyer-b46a814" - }, - "organizations" : [ - { - "name" : "Janssen Pharmaceuticals", - "href" : "https://www.janssen.com", - "role" : "Principal Scientist" - } - ] - } - } - ], - "argument_groups" : [ - { - "name" : "Inputs", - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "description" : "Input h5mu file", - "example" : [ - "input.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--modality", - "default" : [ - "rna" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--uns_neighbors", - "description" : "The `.uns` neighbors slot as output by the `find_neighbors` component.", - "default" : [ - "neighbors" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Outputs", - "arguments" : [ - { - "type" : "file", - "name" : "--output", - "alternatives" : [ - "-o" - ], - "description" : "Output h5mu file.", - "example" : [ - "output.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_compression", - "description" : "The compression format to be used on the output h5mu object.", - "example" : [ - "gzip" - ], - "required" : false, - "choices" : [ - "gzip", - "lzf" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--obsm_output", - "description" : "The pre/postfix under which to store the UMAP results.", - "default" : [ - "umap" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Arguments", - "arguments" : [ - { - "type" : "double", - "name" : "--min_dist", - "description" : "The effective minimum distance between embedded points. Smaller values will result in a more clustered/clumped embedding where nearby points on the manifold are drawn closer together, while larger values will result on a more even dispersal of points. The value should be set relative to the spread value, which determines the scale at which embedded points will be spread out.", - "default" : [ - 0.5 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--spread", - "description" : "The effective scale of embedded points. In combination with `min_dist` this determines how clustered/clumped the embedded points are.", - "default" : [ - 1.0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--num_components", - "description" : "The number of dimensions of the embedding.", - "default" : [ - 2 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--max_iter", - "description" : "The number of iterations (epochs) of the optimization. Called `n_epochs` in the original UMAP. Default is set to 500 if neighbors['connectivities'].shape[0] <= 10000, else 200.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--alpha", - "description" : "The initial learning rate for the embedding optimization.", - "default" : [ - 1.0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--gamma", - "description" : "Weighting applied to negative samples in low dimensional embedding optimization. Values higher than one will result in greater weight being given to negative samples.", - "default" : [ - 1.0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--negative_sample_rate", - "description" : "The number of negative edge/1-simplex samples to use per positive edge/1-simplex sample in optimizing the low dimensional embedding.", - "default" : [ - 5 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--init_pos", - "description" : "How to initialize the low dimensional embedding. Called `init` in the original UMAP. Options are:\n\n* Any key from `.obsm`\n* `'paga'`: positions from `paga()`\n* `'spectral'`: use a spectral embedding of the graph\n* `'random'`: assign initial embedding positions at random.\n", - "default" : [ - "spectral" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/dimred/umap/" - }, - { - "type" : "file", - "path" : "src/utils/setup_logger.py", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "description" : "UMAP (Uniform Manifold Approximation and Projection) is a manifold learning technique suitable for visualizing high-dimensional data. Besides tending to be faster than tSNE, it optimizes the embedding such that it best reflects the topology of the data, which we represent throughout Scanpy using a neighborhood graph. tSNE, by contrast, optimizes the distribution of nearest-neighbor distances in the embedding such that these best match the distribution of distances in the high-dimensional space. We use the implementation of umap-learn [McInnes18]. For a few comparisons of UMAP with tSNE, see this preprint.\n", - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/dimred/umap/" - }, - { - "type" : "file", - "path" : "resources_test/pbmc_1k_protein_v3", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "python:3.9-slim", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "procps" - ], - "interactive" : false - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "mudata~=0.2.3", - "anndata~=0.9.1", - "scanpy~=1.9.5" - ], - "upgrade" : true - } - ], - "test_setup" : [ - { - "type" : "python", - "user" : false, - "packages" : [ - "viashpy==0.5.0" - ], - "upgrade" : true - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "highcpu", - "midmem" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/dimred/umap/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/dimred/umap", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -import scanpy as sc -import mudata as mu -import sys -import anndata as ad - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'uns_neighbors': $( if [ ! -z ${VIASH_PAR_UNS_NEIGHBORS+x} ]; then echo "r'${VIASH_PAR_UNS_NEIGHBORS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'obsm_output': $( if [ ! -z ${VIASH_PAR_OBSM_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'min_dist': $( if [ ! -z ${VIASH_PAR_MIN_DIST+x} ]; then echo "float(r'${VIASH_PAR_MIN_DIST//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'spread': $( if [ ! -z ${VIASH_PAR_SPREAD+x} ]; then echo "float(r'${VIASH_PAR_SPREAD//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'num_components': $( if [ ! -z ${VIASH_PAR_NUM_COMPONENTS+x} ]; then echo "int(r'${VIASH_PAR_NUM_COMPONENTS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'max_iter': $( if [ ! -z ${VIASH_PAR_MAX_ITER+x} ]; then echo "int(r'${VIASH_PAR_MAX_ITER//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'alpha': $( if [ ! -z ${VIASH_PAR_ALPHA+x} ]; then echo "float(r'${VIASH_PAR_ALPHA//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'gamma': $( if [ ! -z ${VIASH_PAR_GAMMA+x} ]; then echo "float(r'${VIASH_PAR_GAMMA//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'negative_sample_rate': $( if [ ! -z ${VIASH_PAR_NEGATIVE_SAMPLE_RATE+x} ]; then echo "int(r'${VIASH_PAR_NEGATIVE_SAMPLE_RATE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'init_pos': $( if [ ! -z ${VIASH_PAR_INIT_POS+x} ]; then echo "r'${VIASH_PAR_INIT_POS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -## VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -logger.info("Reading %s", par["input"]) -mdata = mu.read_h5mu(par["input"]) - -logger.info("Computing UMAP for modality '%s'", par['modality']) -data = mdata.mod[par['modality']] - -if par['uns_neighbors'] not in data.uns: - raise ValueError(f"'{par['uns_neighbors']}' was not found in .mod['{par['modality']}'].uns.") - -# create temporary AnnData -# ... because sc.tl.umap doesn't allow to choose -# the obsm output slot -# ... also we can see scanpy is a data format dependency hell -neigh_key = par["uns_neighbors"] -temp_uns = { neigh_key: data.uns[neigh_key] } -conn_key = temp_uns[neigh_key]['connectivities_key'] -dist_key = temp_uns[neigh_key]['distances_key'] -temp_obsp = { - conn_key: data.obsp[conn_key], - dist_key: data.obsp[dist_key], -} -pca_key = temp_uns[neigh_key]['params']['use_rep'] -temp_obsm = { - pca_key: data.obsm[pca_key] -} - -temp_adata = ad.AnnData( - obsm=temp_obsm, - obsp=temp_obsp, - uns=temp_uns, - shape=data.shape -) - -sc.tl.umap( - temp_adata, - min_dist=par["min_dist"], - spread=par["spread"], - n_components=par["num_components"], - maxiter=par["max_iter"], - alpha=par["alpha"], - gamma=par["gamma"], - negative_sample_rate=par["negative_sample_rate"], - init_pos=par["init_pos"], - neighbors_key=neigh_key -) - -data.obsm[par['obsm_output']] = temp_adata.obsm['X_umap'] - -logger.info("Writing to %s.", par["output"]) -mdata.write_h5mu(filename=par["output"], compression=par["output_compression"]) - -logger.info("Finished") -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/dimred_umap", - "tag" : "0.12.0" - }, - "label" : [ - "highcpu", - "midmem" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/dimred/umap/nextflow.config b/target/nextflow/dimred/umap/nextflow.config deleted file mode 100644 index 413c403a89a..00000000000 --- a/target/nextflow/dimred/umap/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'umap' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'UMAP (Uniform Manifold Approximation and Projection) is a manifold learning technique suitable for visualizing high-dimensional data. Besides tending to be faster than tSNE, it optimizes the embedding such that it best reflects the topology of the data, which we represent throughout Scanpy using a neighborhood graph. tSNE, by contrast, optimizes the distribution of nearest-neighbor distances in the embedding such that these best match the distribution of distances in the high-dimensional space. We use the implementation of umap-learn [McInnes18]. For a few comparisons of UMAP with tSNE, see this preprint.\n' - author = 'Dries De Maeyer' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/dimred/umap/nextflow_params.yaml b/target/nextflow/dimred/umap/nextflow_params.yaml deleted file mode 100644 index fc7cc388975..00000000000 --- a/target/nextflow/dimred/umap/nextflow_params.yaml +++ /dev/null @@ -1,23 +0,0 @@ -# Inputs -input: # please fill in - example: "input.h5mu" -modality: "rna" -uns_neighbors: "neighbors" - -# Outputs -# output: "$id.$key.output.h5mu" -# output_compression: "gzip" -obsm_output: "umap" - -# Arguments -min_dist: 0.5 -spread: 1.0 -num_components: 2 -# max_iter: 123 -alpha: 1.0 -gamma: 1.0 -negative_sample_rate: 5 -init_pos: "spectral" - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/dimred/umap/nextflow_schema.json b/target/nextflow/dimred/umap/nextflow_schema.json deleted file mode 100644 index 452fd29a65d..00000000000 --- a/target/nextflow/dimred/umap/nextflow_schema.json +++ /dev/null @@ -1,164 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "umap", - "description": "UMAP (Uniform Manifold Approximation and Projection) is a manifold learning technique suitable for visualizing high-dimensional data. Besides tending to be faster than tSNE, it optimizes the embedding such that it best reflects the topology of the data, which we represent throughout Scanpy using a neighborhood graph. tSNE, by contrast, optimizes the distribution of nearest-neighbor distances in the embedding such that these best match the distribution of distances in the high-dimensional space. We use the implementation of umap-learn [McInnes18]. For a few comparisons of UMAP with tSNE, see this preprint.\n", - "type": "object", - "definitions": { - "inputs" : { - "title": "Inputs", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: `file`, required, example: `input.h5mu`. Input h5mu file", - "help_text": "Type: `file`, required, example: `input.h5mu`. Input h5mu file" - }, - - "modality": { - "type": "string", - "description": "Type: `string`, default: `rna`. ", - "help_text": "Type: `string`, default: `rna`. ", - "default": "rna" - }, - - "uns_neighbors": { - "type": "string", - "description": "Type: `string`, default: `neighbors`. The `", - "help_text": "Type: `string`, default: `neighbors`. The `.uns` neighbors slot as output by the `find_neighbors` component.", - "default": "neighbors" - } - - } - }, - "outputs" : { - "title": "Outputs", - "type": "object", - "description": "No description", - "properties": { - - "output": { - "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file", - "help_text": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file.", - "default": "$id.$key.output.h5mu" - }, - - "output_compression": { - "type": "string", - "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", - "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", - "enum": ["gzip", "lzf"] - - }, - - "obsm_output": { - "type": "string", - "description": "Type: `string`, default: `umap`. The pre/postfix under which to store the UMAP results", - "help_text": "Type: `string`, default: `umap`. The pre/postfix under which to store the UMAP results.", - "default": "umap" - } - - } - }, - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "min_dist": { - "type": "number", - "description": "Type: `double`, default: `0.5`. The effective minimum distance between embedded points", - "help_text": "Type: `double`, default: `0.5`. The effective minimum distance between embedded points. Smaller values will result in a more clustered/clumped embedding where nearby points on the manifold are drawn closer together, while larger values will result on a more even dispersal of points. The value should be set relative to the spread value, which determines the scale at which embedded points will be spread out.", - "default": "0.5" - }, - - "spread": { - "type": "number", - "description": "Type: `double`, default: `1.0`. The effective scale of embedded points", - "help_text": "Type: `double`, default: `1.0`. The effective scale of embedded points. In combination with `min_dist` this determines how clustered/clumped the embedded points are.", - "default": "1.0" - }, - - "num_components": { - "type": "integer", - "description": "Type: `integer`, default: `2`. The number of dimensions of the embedding", - "help_text": "Type: `integer`, default: `2`. The number of dimensions of the embedding.", - "default": "2" - }, - - "max_iter": { - "type": "integer", - "description": "Type: `integer`. The number of iterations (epochs) of the optimization", - "help_text": "Type: `integer`. The number of iterations (epochs) of the optimization. Called `n_epochs` in the original UMAP. Default is set to 500 if neighbors[\u0027connectivities\u0027].shape[0] \u003c= 10000, else 200." - }, - - "alpha": { - "type": "number", - "description": "Type: `double`, default: `1.0`. The initial learning rate for the embedding optimization", - "help_text": "Type: `double`, default: `1.0`. The initial learning rate for the embedding optimization.", - "default": "1.0" - }, - - "gamma": { - "type": "number", - "description": "Type: `double`, default: `1.0`. Weighting applied to negative samples in low dimensional embedding optimization", - "help_text": "Type: `double`, default: `1.0`. Weighting applied to negative samples in low dimensional embedding optimization. Values higher than one will result in greater weight being given to negative samples.", - "default": "1.0" - }, - - "negative_sample_rate": { - "type": "integer", - "description": "Type: `integer`, default: `5`. The number of negative edge/1-simplex samples to use per positive edge/1-simplex sample in optimizing the low dimensional embedding", - "help_text": "Type: `integer`, default: `5`. The number of negative edge/1-simplex samples to use per positive edge/1-simplex sample in optimizing the low dimensional embedding.", - "default": "5" - }, - - "init_pos": { - "type": "string", - "description": "Type: `string`, default: `spectral`. How to initialize the low dimensional embedding", - "help_text": "Type: `string`, default: `spectral`. How to initialize the low dimensional embedding. Called `init` in the original UMAP. Options are:\n\n* Any key from `.obsm`\n* `\u0027paga\u0027`: positions from `paga()`\n* `\u0027spectral\u0027`: use a spectral embedding of the graph\n* `\u0027random\u0027`: assign initial embedding positions at random.\n", - "default": "spectral" - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/inputs" - }, - { - "$ref": "#/definitions/outputs" - }, - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/dimred/umap/setup_logger.py b/target/nextflow/dimred/umap/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/nextflow/dimred/umap/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/nextflow/download/download_file/.config.vsh.yaml b/target/nextflow/download/download_file/.config.vsh.yaml deleted file mode 100644 index da764f92807..00000000000 --- a/target/nextflow/download/download_file/.config.vsh.yaml +++ /dev/null @@ -1,138 +0,0 @@ -functionality: - name: "download_file" - namespace: "download" - version: "0.12.3" - authors: - - name: "Robrecht Cannoodt" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - arguments: - - type: "string" - name: "--input" - description: "URL to a file to download." - info: null - example: - - "https://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_1k_protein_v3/pbmc_1k_protein_v3_raw_feature_bc_matrix.h5" - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - description: "Path where to store output." - info: null - example: - - "pbmc_1k_protein_v3_raw_feature_bc_matrix.h5" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--verbose" - alternatives: - - "-v" - description: "Increase verbosity" - info: null - direction: "input" - dest: "par" - resources: - - type: "bash_script" - path: "script.sh" - is_executable: true - description: "Download a file.\n" - usage: "download_file \\\n --input https://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_1k_protein_v3/pbmc_1k_protein_v3_raw_feature_bc_matrix.h5\ - \ \\\n --output output_rna.h5\n" - test_resources: - - type: "bash_script" - path: "run_test.sh" - is_executable: true - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "bash:5.1.16" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/download/download_file/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/download/download_file" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/download/download_file/download_file" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/download/download_file/main.nf b/target/nextflow/download/download_file/main.nf deleted file mode 100644 index 4bb1fd5ef59..00000000000 --- a/target/nextflow/download/download_file/main.nf +++ /dev/null @@ -1,2492 +0,0 @@ -// download_file 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Robrecht Cannoodt (maintainer) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "download_file", - "namespace" : "download", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Robrecht Cannoodt", - "roles" : [ - "maintainer" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "robrecht@data-intuitive.com", - "github" : "rcannood", - "orcid" : "0000-0003-3641-729X", - "linkedin" : "robrechtcannoodt" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Science Engineer" - }, - { - "name" : "Open Problems", - "href" : "https://openproblems.bio", - "role" : "Core Member" - } - ] - } - } - ], - "arguments" : [ - { - "type" : "string", - "name" : "--input", - "description" : "URL to a file to download.", - "example" : [ - "https://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_1k_protein_v3/pbmc_1k_protein_v3_raw_feature_bc_matrix.h5" - ], - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--output", - "description" : "Path where to store output.", - "example" : [ - "pbmc_1k_protein_v3_raw_feature_bc_matrix.h5" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "boolean_true", - "name" : "--verbose", - "alternatives" : [ - "-v" - ], - "description" : "Increase verbosity", - "direction" : "input", - "dest" : "par" - } - ], - "resources" : [ - { - "type" : "bash_script", - "path" : "script.sh", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/download/download_file/" - } - ], - "description" : "Download a file.\n", - "usage" : "download_file \\\\\n --input https://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_1k_protein_v3/pbmc_1k_protein_v3_raw_feature_bc_matrix.h5 \\\\\n --output output_rna.h5\n", - "test_resources" : [ - { - "type" : "bash_script", - "path" : "run_test.sh", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/download/download_file/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "bash:5.1.16", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline" - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/download/download_file/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/download/download_file", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -#!/bin/bash - -set -eo pipefail - -## VIASH START -# The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) -$( if [ ! -z ${VIASH_PAR_VERBOSE+x} ]; then echo "${VIASH_PAR_VERBOSE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_verbose='&'#" ; else echo "# par_verbose="; fi ) -$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) -$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) -$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) -$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) -$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) -$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) - -## VIASH END - -extra_params=() - -if [ "\\$par_verbose" != "true" ]; then - extra_params+=("--quiet") -fi - -wget "\\$par_input" -O "\\$par_output" "\\${extra_params[@]}" -VIASHMAIN -bash "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/download_download_file", - "tag" : "0.12.0" - }, - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/download/download_file/nextflow.config b/target/nextflow/download/download_file/nextflow.config deleted file mode 100644 index 39bd7596806..00000000000 --- a/target/nextflow/download/download_file/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'download_file' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Download a file.\n' - author = 'Robrecht Cannoodt' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/download/download_file/nextflow_params.yaml b/target/nextflow/download/download_file/nextflow_params.yaml deleted file mode 100644 index d3120622209..00000000000 --- a/target/nextflow/download/download_file/nextflow_params.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# Arguments -input: # please fill in - example: "https://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_1k_protein_v3/pbmc_1k_protein_v3_raw_feature_bc_matrix.h5" -# output: "$id.$key.output.h5" -verbose: false - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/download/download_file/nextflow_schema.json b/target/nextflow/download/download_file/nextflow_schema.json deleted file mode 100644 index e7510ef7c3b..00000000000 --- a/target/nextflow/download/download_file/nextflow_schema.json +++ /dev/null @@ -1,65 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "download_file", - "description": "Download a file.\n", - "type": "object", - "definitions": { - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: `string`, required, example: `https://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_1k_protein_v3/pbmc_1k_protein_v3_raw_feature_bc_matrix.h5`. URL to a file to download", - "help_text": "Type: `string`, required, example: `https://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_1k_protein_v3/pbmc_1k_protein_v3_raw_feature_bc_matrix.h5`. URL to a file to download." - }, - - "output": { - "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.h5`, example: `pbmc_1k_protein_v3_raw_feature_bc_matrix.h5`. Path where to store output", - "help_text": "Type: `file`, required, default: `$id.$key.output.h5`, example: `pbmc_1k_protein_v3_raw_feature_bc_matrix.h5`. Path where to store output.", - "default": "$id.$key.output.h5" - }, - - "verbose": { - "type": "boolean", - "description": "Type: `boolean_true`, default: `false`. Increase verbosity", - "help_text": "Type: `boolean_true`, default: `false`. Increase verbosity", - "default": "False" - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/download/sync_test_resources/.config.vsh.yaml b/target/nextflow/download/sync_test_resources/.config.vsh.yaml deleted file mode 100644 index 059e4e8040c..00000000000 --- a/target/nextflow/download/sync_test_resources/.config.vsh.yaml +++ /dev/null @@ -1,170 +0,0 @@ -functionality: - name: "sync_test_resources" - namespace: "download" - version: "0.12.3" - authors: - - name: "Robrecht Cannoodt" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - arguments: - - type: "string" - name: "--input" - alternatives: - - "-i" - description: "Path to the S3 bucket to sync from." - info: null - default: - - "s3://openpipelines-data" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Path to the test resource directory." - info: null - default: - - "resources_test" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--quiet" - description: "Displays the operations that would be performed using the specified\ - \ command without actually running them." - info: null - direction: "input" - dest: "par" - - type: "boolean_true" - name: "--dryrun" - description: "Does not display the operations performed from the specified command." - info: null - direction: "input" - dest: "par" - - type: "boolean_true" - name: "--delete" - description: "Files that exist in the destination but not in the source are deleted\ - \ during sync." - info: null - direction: "input" - dest: "par" - - type: "string" - name: "--exclude" - description: "Exclude all files or objects from the command that matches the specified\ - \ pattern." - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - resources: - - type: "bash_script" - path: "script.sh" - is_executable: true - description: "Synchronise the test resources from s3://openpipelines-data to resources_test" - usage: "sync_test_resources\nsync_test_resources --input s3://openpipelines-data\ - \ --output resources_test\n" - test_resources: - - type: "bash_script" - path: "run_test.sh" - is_executable: true - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "amazon/aws-cli:2.11.0" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "yum" - packages: - - "procps" - entrypoint: [] - cmd: null -- type: "native" - id: "native" -- type: "nextflow" - id: "nextflow" - directives: - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/download/sync_test_resources/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/download/sync_test_resources" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/download/sync_test_resources/sync_test_resources" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/download/sync_test_resources/main.nf b/target/nextflow/download/sync_test_resources/main.nf deleted file mode 100644 index 11ec7f69c86..00000000000 --- a/target/nextflow/download/sync_test_resources/main.nf +++ /dev/null @@ -1,2554 +0,0 @@ -// sync_test_resources 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Robrecht Cannoodt (maintainer) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "sync_test_resources", - "namespace" : "download", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Robrecht Cannoodt", - "roles" : [ - "maintainer" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "robrecht@data-intuitive.com", - "github" : "rcannood", - "orcid" : "0000-0003-3641-729X", - "linkedin" : "robrechtcannoodt" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Science Engineer" - }, - { - "name" : "Open Problems", - "href" : "https://openproblems.bio", - "role" : "Core Member" - } - ] - } - } - ], - "arguments" : [ - { - "type" : "string", - "name" : "--input", - "alternatives" : [ - "-i" - ], - "description" : "Path to the S3 bucket to sync from.", - "default" : [ - "s3://openpipelines-data" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--output", - "alternatives" : [ - "-o" - ], - "description" : "Path to the test resource directory.", - "default" : [ - "resources_test" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "boolean_true", - "name" : "--quiet", - "description" : "Displays the operations that would be performed using the specified command without actually running them.", - "direction" : "input", - "dest" : "par" - }, - { - "type" : "boolean_true", - "name" : "--dryrun", - "description" : "Does not display the operations performed from the specified command.", - "direction" : "input", - "dest" : "par" - }, - { - "type" : "boolean_true", - "name" : "--delete", - "description" : "Files that exist in the destination but not in the source are deleted during sync.", - "direction" : "input", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--exclude", - "description" : "Exclude all files or objects from the command that matches the specified pattern.", - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ":", - "dest" : "par" - } - ], - "resources" : [ - { - "type" : "bash_script", - "path" : "script.sh", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/download/sync_test_resources/" - } - ], - "description" : "Synchronise the test resources from s3://openpipelines-data to resources_test", - "usage" : "sync_test_resources\nsync_test_resources --input s3://openpipelines-data --output resources_test\n", - "test_resources" : [ - { - "type" : "bash_script", - "path" : "run_test.sh", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/download/sync_test_resources/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "amazon/aws-cli:2.11.0", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "yum", - "packages" : [ - "procps" - ] - } - ] - }, - { - "type" : "native", - "id" : "native" - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/download/sync_test_resources/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/download/sync_test_resources", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -#!/bin/bash - -set -eo pipefail - -## VIASH START -# The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) -$( if [ ! -z ${VIASH_PAR_QUIET+x} ]; then echo "${VIASH_PAR_QUIET}" | sed "s#'#'\\"'\\"'#g;s#.*#par_quiet='&'#" ; else echo "# par_quiet="; fi ) -$( if [ ! -z ${VIASH_PAR_DRYRUN+x} ]; then echo "${VIASH_PAR_DRYRUN}" | sed "s#'#'\\"'\\"'#g;s#.*#par_dryrun='&'#" ; else echo "# par_dryrun="; fi ) -$( if [ ! -z ${VIASH_PAR_DELETE+x} ]; then echo "${VIASH_PAR_DELETE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_delete='&'#" ; else echo "# par_delete="; fi ) -$( if [ ! -z ${VIASH_PAR_EXCLUDE+x} ]; then echo "${VIASH_PAR_EXCLUDE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_exclude='&'#" ; else echo "# par_exclude="; fi ) -$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) -$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) -$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) -$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) -$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) -$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) - -## VIASH END - -extra_params=( ) - -if [ "\\$par_quiet" == "true" ]; then - extra_params+=( "--quiet" ) -fi -if [ "\\$par_dryrun" == "true" ]; then - extra_params+=( "--dryrun" ) -fi -if [ "\\$par_delete" == "true" ]; then - extra_params+=( "--delete" ) -fi - -if [ ! -z \\${par_exclude+x} ]; then - IFS=":" - for var in \\$par_exclude; do - unset IFS - extra_params+=( "--exclude" "\\$var" ) - done -fi - - -# Disable the use of the Amazon EC2 instance metadata service (IMDS). -# see https://florian.ec/blog/github-actions-awscli-errors/ -# or https://github.com/aws/aws-cli/issues/5234#issuecomment-705831465 -export AWS_EC2_METADATA_DISABLED=true - -aws s3 sync "\\$par_input" "\\$par_output" --no-sign-request "\\${extra_params[@]}" -VIASHMAIN -bash "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/download_sync_test_resources", - "tag" : "0.12.0" - }, - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/download/sync_test_resources/nextflow.config b/target/nextflow/download/sync_test_resources/nextflow.config deleted file mode 100644 index bea4d45e6e1..00000000000 --- a/target/nextflow/download/sync_test_resources/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'sync_test_resources' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Synchronise the test resources from s3://openpipelines-data to resources_test' - author = 'Robrecht Cannoodt' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/download/sync_test_resources/nextflow_params.yaml b/target/nextflow/download/sync_test_resources/nextflow_params.yaml deleted file mode 100644 index 7c714c2f36c..00000000000 --- a/target/nextflow/download/sync_test_resources/nextflow_params.yaml +++ /dev/null @@ -1,11 +0,0 @@ -# Arguments -input: "s3://openpipelines-data" -# output: "$id.$key.output.output" -quiet: false -dryrun: false -delete: false -# exclude: ["foo"] - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/download/sync_test_resources/nextflow_schema.json b/target/nextflow/download/sync_test_resources/nextflow_schema.json deleted file mode 100644 index fba797e3653..00000000000 --- a/target/nextflow/download/sync_test_resources/nextflow_schema.json +++ /dev/null @@ -1,86 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "sync_test_resources", - "description": "Synchronise the test resources from s3://openpipelines-data to resources_test", - "type": "object", - "definitions": { - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: `string`, default: `s3://openpipelines-data`. Path to the S3 bucket to sync from", - "help_text": "Type: `string`, default: `s3://openpipelines-data`. Path to the S3 bucket to sync from.", - "default": "s3://openpipelines-data" - }, - - "output": { - "type": "string", - "description": "Type: `file`, default: `$id.$key.output.output`. Path to the test resource directory", - "help_text": "Type: `file`, default: `$id.$key.output.output`. Path to the test resource directory.", - "default": "$id.$key.output.output" - }, - - "quiet": { - "type": "boolean", - "description": "Type: `boolean_true`, default: `false`. Displays the operations that would be performed using the specified command without actually running them", - "help_text": "Type: `boolean_true`, default: `false`. Displays the operations that would be performed using the specified command without actually running them.", - "default": "False" - }, - - "dryrun": { - "type": "boolean", - "description": "Type: `boolean_true`, default: `false`. Does not display the operations performed from the specified command", - "help_text": "Type: `boolean_true`, default: `false`. Does not display the operations performed from the specified command.", - "default": "False" - }, - - "delete": { - "type": "boolean", - "description": "Type: `boolean_true`, default: `false`. Files that exist in the destination but not in the source are deleted during sync", - "help_text": "Type: `boolean_true`, default: `false`. Files that exist in the destination but not in the source are deleted during sync.", - "default": "False" - }, - - "exclude": { - "type": "string", - "description": "Type: List of `string`, multiple_sep: `\":\"`. Exclude all files or objects from the command that matches the specified pattern", - "help_text": "Type: List of `string`, multiple_sep: `\":\"`. Exclude all files or objects from the command that matches the specified pattern." - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/files/make_params/.config.vsh.yaml b/target/nextflow/files/make_params/.config.vsh.yaml deleted file mode 100644 index dd4d69a25d9..00000000000 --- a/target/nextflow/files/make_params/.config.vsh.yaml +++ /dev/null @@ -1,220 +0,0 @@ -functionality: - name: "make_params" - namespace: "files" - version: "0.12.3" - authors: - - name: "Angela Oliveira Pisco" - roles: - - "author" - info: - role: "Contributor" - links: - github: "aopisco" - orcid: "0000-0003-0142-2355" - linkedin: "aopisco" - organizations: - - name: "Insitro" - href: "https://insitro.com" - role: "Director of Computational Biology" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - - name: "Robrecht Cannoodt" - roles: - - "maintainer" - - "author" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - arguments: - - type: "file" - name: "--base_dir" - description: "Base directory to search recursively" - info: null - example: - - "/path/to/dir" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--pattern" - description: "An optional regular expression. Only file names which match the\ - \ regular expression will be matched." - info: null - example: - - "*.fastq.gz" - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--n_dirname_drop" - description: "For every matched file, the parent directory will be traversed N\ - \ times." - info: null - default: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--n_basename_id" - description: "The unique identifiers will consist of at least N dirnames." - info: null - default: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--id_name" - description: "The name for storing the identifier field in the yaml." - info: null - default: - - "id" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--path_name" - description: "The name for storing the path field in the yaml." - info: null - default: - - "path" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--group_name" - description: "Top level name for the group of entries." - info: null - example: - - "param_list" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - description: "Output YAML file." - info: null - example: - - "params.yaml" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "r_script" - path: "script.R" - is_executable: true - description: "Looks for files in a directory and turn it in a params file." - test_resources: - - type: "bash_script" - path: "test_make_params.sh" - is_executable: true - - type: "file" - path: "../../../src" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "ghcr.io/data-intuitive/randpy:r4.0" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "singlecpu" - - "lowmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/files/make_params/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/files/make_params" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/files/make_params/make_params" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/files/make_params/main.nf b/target/nextflow/files/make_params/main.nf deleted file mode 100644 index a7bb9012e26..00000000000 --- a/target/nextflow/files/make_params/main.nf +++ /dev/null @@ -1,2663 +0,0 @@ -// make_params 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Angela Oliveira Pisco (author) -// * Robrecht Cannoodt (maintainer, author) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "make_params", - "namespace" : "files", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Angela Oliveira Pisco", - "roles" : [ - "author" - ], - "info" : { - "role" : "Contributor", - "links" : { - "github" : "aopisco", - "orcid" : "0000-0003-0142-2355", - "linkedin" : "aopisco" - }, - "organizations" : [ - { - "name" : "Insitro", - "href" : "https://insitro.com", - "role" : "Director of Computational Biology" - }, - { - "name" : "Open Problems", - "href" : "https://openproblems.bio", - "role" : "Core Member" - } - ] - } - }, - { - "name" : "Robrecht Cannoodt", - "roles" : [ - "maintainer", - "author" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "robrecht@data-intuitive.com", - "github" : "rcannood", - "orcid" : "0000-0003-3641-729X", - "linkedin" : "robrechtcannoodt" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Science Engineer" - }, - { - "name" : "Open Problems", - "href" : "https://openproblems.bio", - "role" : "Core Member" - } - ] - } - } - ], - "arguments" : [ - { - "type" : "file", - "name" : "--base_dir", - "description" : "Base directory to search recursively", - "example" : [ - "/path/to/dir" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--pattern", - "description" : "An optional regular expression. Only file names which match the regular expression will be matched.", - "example" : [ - "*.fastq.gz" - ], - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--n_dirname_drop", - "description" : "For every matched file, the parent directory will be traversed N times.", - "default" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--n_basename_id", - "description" : "The unique identifiers will consist of at least N dirnames.", - "default" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--id_name", - "description" : "The name for storing the identifier field in the yaml.", - "default" : [ - "id" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--path_name", - "description" : "The name for storing the path field in the yaml.", - "default" : [ - "path" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--group_name", - "description" : "Top level name for the group of entries.", - "example" : [ - "param_list" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--output", - "description" : "Output YAML file.", - "example" : [ - "params.yaml" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ], - "resources" : [ - { - "type" : "r_script", - "path" : "script.R", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/files/make_params/" - } - ], - "description" : "Looks for files in a directory and turn it in a params file.", - "test_resources" : [ - { - "type" : "bash_script", - "path" : "test_make_params.sh", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/files/make_params/" - }, - { - "type" : "file", - "path" : "../../../src", - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/files/make_params/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "ghcr.io/data-intuitive/randpy:r4.0", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline" - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "singlecpu", - "lowmem" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/files/make_params/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/files/make_params", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -library(dplyr) -library(purrr) - -## VIASH START -# The following code has been auto-generated by Viash. -# treat warnings as errors -.viash_orig_warn <- options(warn = 2) - -par <- list( - "base_dir" = $( if [ ! -z ${VIASH_PAR_BASE_DIR+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_BASE_DIR" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), - "pattern" = $( if [ ! -z ${VIASH_PAR_PATTERN+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_PATTERN" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), - "n_dirname_drop" = $( if [ ! -z ${VIASH_PAR_N_DIRNAME_DROP+x} ]; then echo -n "as.integer('"; echo -n "$VIASH_PAR_N_DIRNAME_DROP" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), - "n_basename_id" = $( if [ ! -z ${VIASH_PAR_N_BASENAME_ID+x} ]; then echo -n "as.integer('"; echo -n "$VIASH_PAR_N_BASENAME_ID" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), - "id_name" = $( if [ ! -z ${VIASH_PAR_ID_NAME+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_ID_NAME" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), - "path_name" = $( if [ ! -z ${VIASH_PAR_PATH_NAME+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_PATH_NAME" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), - "group_name" = $( if [ ! -z ${VIASH_PAR_GROUP_NAME+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_GROUP_NAME" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), - "output" = $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_OUTPUT" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ) -) -meta <- list( - "functionality_name" = $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo -n "'"; echo -n "$VIASH_META_FUNCTIONALITY_NAME" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), - "resources_dir" = $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo -n "'"; echo -n "$VIASH_META_RESOURCES_DIR" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), - "executable" = $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo -n "'"; echo -n "$VIASH_META_EXECUTABLE" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), - "config" = $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo -n "'"; echo -n "$VIASH_META_CONFIG" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), - "temp_dir" = $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo -n "'"; echo -n "$VIASH_META_TEMP_DIR" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), - "cpus" = $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo -n "as.integer('"; echo -n "$VIASH_META_CPUS" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), - "memory_b" = $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_B" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), - "memory_kb" = $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_KB" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), - "memory_mb" = $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_MB" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), - "memory_gb" = $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_GB" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), - "memory_tb" = $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_TB" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), - "memory_pb" = $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_PB" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ) -) - - -# restore original warn setting -options(.viash_orig_warn) -rm(.viash_orig_warn) - -## VIASH END - -cat("> Listing files of base dir ", par\\$base_dir, "\\\\n", sep = "") -paths <- list.files( - normalizePath(par\\$base_dir), - pattern = par\\$pattern, - recursive = TRUE, - full.names = TRUE -) - -cat("> Traversing up ", par\\$n_dirname_apply, " times\\\\n", sep = "") -for (i in seq_len(par\\$n_dirname_drop)) { - paths <- dirname(paths) %>% unique() -} - -# removing /viash_automount in case we're inside a docker container -paths <- gsub("^/viash_automount", "", paths) - -cat("> Checking whether basenames are unique\\\\n") -i <- par\\$n_basename_id -maxi <- strsplit(paths, "/") %>% map_int(length) %>% max - -regex <- paste0(".*/(", paste(rep("[^/]+/", i), collapse = ""), "[^/]*)\\$") -ids <- gsub("/", "_", gsub(regex, "\\\\\\\\1", paths)) - -cat("> Printing first five rows\\\\n") -print(tibble(id = ids, path = paths) %>% head(5)) -cat("\\\\n") - -while (i < maxi && any(duplicated(ids))) { - i <- i + 1 - cat("Duplicated ids detected, combining with ", i, " dirnames in an attempt to get unique ids.\\\\n") - regex <- paste0(".*/(", paste(rep("[^/]+/", i), collapse = ""), "[^/]*)\\$") - ids <- gsub("/", "_", gsub(regex, "\\\\\\\\1", paths)) - - cat("> Printing first five rows\\\\n") - print(tibble(id = ids, path = paths) %>% head(5)) - cat("\\\\n") -} - -cat("> Transforming into list of items\\\\n") -par_list <- map2( - ids, paths, - function(id, input) { - setNames(list(id, input), c(par\\$id_name, par\\$path_name)) - } -) - -if (!is.null(par\\$group_name)) { - par_list <- setNames(list(par_list), par\\$group_name) -} - -cat("> Writing as YAML\\\\n") -yaml::write_yaml(par_list, par\\$output) -VIASHMAIN -Rscript "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/files_make_params", - "tag" : "0.12.0" - }, - "label" : [ - "singlecpu", - "lowmem" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/files/make_params/nextflow.config b/target/nextflow/files/make_params/nextflow.config deleted file mode 100644 index 5a1cb159823..00000000000 --- a/target/nextflow/files/make_params/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'make_params' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Looks for files in a directory and turn it in a params file.' - author = 'Angela Oliveira Pisco, Robrecht Cannoodt' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/files/make_params/nextflow_params.yaml b/target/nextflow/files/make_params/nextflow_params.yaml deleted file mode 100644 index 9dbc54ad0db..00000000000 --- a/target/nextflow/files/make_params/nextflow_params.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# Arguments -base_dir: # please fill in - example: "/path/to/dir" -pattern: # please fill in - example: "*.fastq.gz" -n_dirname_drop: 0 -n_basename_id: 0 -id_name: "id" -path_name: "path" -# group_name: "param_list" -# output: "$id.$key.output.yaml" - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/files/make_params/nextflow_schema.json b/target/nextflow/files/make_params/nextflow_schema.json deleted file mode 100644 index 677ee7397c4..00000000000 --- a/target/nextflow/files/make_params/nextflow_schema.json +++ /dev/null @@ -1,98 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "make_params", - "description": "Looks for files in a directory and turn it in a params file.", - "type": "object", - "definitions": { - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "base_dir": { - "type": "string", - "description": "Type: `file`, required, example: `/path/to/dir`. Base directory to search recursively", - "help_text": "Type: `file`, required, example: `/path/to/dir`. Base directory to search recursively" - }, - - "pattern": { - "type": "string", - "description": "Type: `string`, required, example: `*.fastq.gz`. An optional regular expression", - "help_text": "Type: `string`, required, example: `*.fastq.gz`. An optional regular expression. Only file names which match the regular expression will be matched." - }, - - "n_dirname_drop": { - "type": "integer", - "description": "Type: `integer`, default: `0`. For every matched file, the parent directory will be traversed N times", - "help_text": "Type: `integer`, default: `0`. For every matched file, the parent directory will be traversed N times.", - "default": "0" - }, - - "n_basename_id": { - "type": "integer", - "description": "Type: `integer`, default: `0`. The unique identifiers will consist of at least N dirnames", - "help_text": "Type: `integer`, default: `0`. The unique identifiers will consist of at least N dirnames.", - "default": "0" - }, - - "id_name": { - "type": "string", - "description": "Type: `string`, default: `id`. The name for storing the identifier field in the yaml", - "help_text": "Type: `string`, default: `id`. The name for storing the identifier field in the yaml.", - "default": "id" - }, - - "path_name": { - "type": "string", - "description": "Type: `string`, default: `path`. The name for storing the path field in the yaml", - "help_text": "Type: `string`, default: `path`. The name for storing the path field in the yaml.", - "default": "path" - }, - - "group_name": { - "type": "string", - "description": "Type: `string`, example: `param_list`. Top level name for the group of entries", - "help_text": "Type: `string`, example: `param_list`. Top level name for the group of entries." - }, - - "output": { - "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.yaml`, example: `params.yaml`. Output YAML file", - "help_text": "Type: `file`, required, default: `$id.$key.output.yaml`, example: `params.yaml`. Output YAML file.", - "default": "$id.$key.output.yaml" - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/filter/delimit_fraction/.config.vsh.yaml b/target/nextflow/filter/delimit_fraction/.config.vsh.yaml deleted file mode 100644 index 2e6fea8bf10..00000000000 --- a/target/nextflow/filter/delimit_fraction/.config.vsh.yaml +++ /dev/null @@ -1,241 +0,0 @@ -functionality: - name: "delimit_fraction" - namespace: "filter" - version: "0.12.3" - authors: - - name: "Dries Schaumont" - roles: - - "author" - - "maintainer" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - argument_groups: - - name: "Inputs" - arguments: - - type: "file" - name: "--input" - description: "Input h5mu file" - info: null - example: - - "input.h5mu" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--layer" - info: null - example: - - "raw_counts" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_fraction_column" - description: "Name of column from .var dataframe selecting\na column that contains\ - \ floating point values between 0 and 1.\n" - info: null - example: - - "fraction_mitochondrial" - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Outputs" - arguments: - - type: "file" - name: "--output" - description: "Output h5mu file." - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_name_filter" - description: "In which .obs slot to store a boolean array corresponding to which\ - \ observations should be removed." - info: null - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Arguments" - arguments: - - type: "double" - name: "--min_fraction" - description: "Min fraction for an observation to be retained (True in output)." - info: null - default: - - 0.0 - required: false - min: 0.0 - max: 1.0 - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--max_fraction" - description: "Max fraction for an observation to be retained (True in output)." - info: null - default: - - 1.0 - required: false - min: 0.0 - max: 1.0 - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Turns a column containing values between 0 and 1 into a boolean column\ - \ based on thresholds.\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.9-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "singlecpu" - - "lowmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/filter/delimit_fraction/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/filter/delimit_fraction" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/filter/delimit_fraction/delimit_fraction" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/filter/delimit_fraction/main.nf b/target/nextflow/filter/delimit_fraction/main.nf deleted file mode 100644 index 6d98952a370..00000000000 --- a/target/nextflow/filter/delimit_fraction/main.nf +++ /dev/null @@ -1,2713 +0,0 @@ -// delimit_fraction 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Dries Schaumont (author, maintainer) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "delimit_fraction", - "namespace" : "filter", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Dries Schaumont", - "roles" : [ - "author", - "maintainer" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "dries@data-intuitive.com", - "github" : "DriesSchaumont", - "orcid" : "0000-0002-4389-0440", - "linkedin" : "dries-schaumont" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Scientist" - } - ] - } - } - ], - "argument_groups" : [ - { - "name" : "Inputs", - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "description" : "Input h5mu file", - "example" : [ - "input.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--modality", - "default" : [ - "rna" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--layer", - "example" : [ - "raw_counts" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--obs_fraction_column", - "description" : "Name of column from .var dataframe selecting\na column that contains floating point values between 0 and 1.\n", - "example" : [ - "fraction_mitochondrial" - ], - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Outputs", - "arguments" : [ - { - "type" : "file", - "name" : "--output", - "description" : "Output h5mu file.", - "example" : [ - "output.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_compression", - "description" : "The compression format to be used on the output h5mu object.", - "example" : [ - "gzip" - ], - "required" : false, - "choices" : [ - "gzip", - "lzf" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--obs_name_filter", - "description" : "In which .obs slot to store a boolean array corresponding to which observations should be removed.", - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Arguments", - "arguments" : [ - { - "type" : "double", - "name" : "--min_fraction", - "description" : "Min fraction for an observation to be retained (True in output).", - "default" : [ - 0.0 - ], - "required" : false, - "min" : 0.0, - "max" : 1.0, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--max_fraction", - "description" : "Max fraction for an observation to be retained (True in output).", - "default" : [ - 1.0 - ], - "required" : false, - "min" : 0.0, - "max" : 1.0, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/filter/delimit_fraction/" - }, - { - "type" : "file", - "path" : "src/utils/setup_logger.py", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "description" : "Turns a column containing values between 0 and 1 into a boolean column based on thresholds.\n", - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/filter/delimit_fraction/" - }, - { - "type" : "file", - "path" : "resources_test/pbmc_1k_protein_v3", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "python:3.9-slim", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "procps" - ], - "interactive" : false - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "mudata~=0.2.3", - "anndata~=0.9.1" - ], - "upgrade" : true - } - ], - "test_setup" : [ - { - "type" : "python", - "user" : false, - "packages" : [ - "viashpy==0.5.0" - ], - "upgrade" : true - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "singlecpu", - "lowmem" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/filter/delimit_fraction/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/filter/delimit_fraction", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN - -import mudata as mu -import numpy as np -import sys -from operator import le, ge -from pandas.api.types import is_float_dtype - - -### VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'layer': $( if [ ! -z ${VIASH_PAR_LAYER+x} ]; then echo "r'${VIASH_PAR_LAYER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'obs_fraction_column': $( if [ ! -z ${VIASH_PAR_OBS_FRACTION_COLUMN+x} ]; then echo "r'${VIASH_PAR_OBS_FRACTION_COLUMN//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'obs_name_filter': $( if [ ! -z ${VIASH_PAR_OBS_NAME_FILTER+x} ]; then echo "r'${VIASH_PAR_OBS_NAME_FILTER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'min_fraction': $( if [ ! -z ${VIASH_PAR_MIN_FRACTION+x} ]; then echo "float(r'${VIASH_PAR_MIN_FRACTION//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'max_fraction': $( if [ ! -z ${VIASH_PAR_MAX_FRACTION+x} ]; then echo "float(r'${VIASH_PAR_MAX_FRACTION//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -### VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -logger.info("Reading input data") -mdata = mu.read_h5mu(par["input"]) - -mdata.var_names_make_unique() - -mod = par['modality'] -logger.info("Processing modality %s.", mod) -data = mdata.mod[mod] - -logger.info("\\\\tUnfiltered data: %s", data) - -logger.info("\\\\tComputing aggregations.") - -def apply_filter_to_mask(mask, base, filter, comparator): - new_filt = np.ravel(comparator(base, filter)) - num_removed = np.sum(np.invert(new_filt) & mask) - mask &= new_filt - return num_removed, mask - -try: - fraction = data.obs[par['obs_fraction_column']] -except KeyError: - raise ValueError(f"Could not find column '{par['obs_fraction_column']}'") -if not is_float_dtype(fraction): - raise ValueError(f"Column '{par['obs_fraction_column']}' does not contain float datatype.") -if fraction.max() > 1: - raise ValueError(f"Column '{par['obs_fraction_column']}' contains values > 1.") -if fraction.min() < 0: - raise ValueError(f"Column '{par['obs_fraction_column']}' contains values < 0.") - - -# Filter cells -filters = (("min_fraction", fraction, ge, "\\\\tRemoving %s cells with <%s percentage mitochondrial reads."), - ("max_fraction", fraction, le, "\\\\tRemoving %s cells with >%s percentage mitochondrial reads."), - ) - -keep_cells = np.repeat(True, data.n_obs) -for filter_name_or_value, base, comparator, message in filters: - try: - filter = par[filter_name_or_value] - except KeyError: - filter = filter_name_or_value - if filter is not None: - num_removed, keep_cells = apply_filter_to_mask(keep_cells, base, filter, comparator) - logger.info(message, num_removed, filter) - -data.obs[par["obs_name_filter"]] = keep_cells - -logger.info("\\\\tFiltered data: %s", data) -logger.info("Writing output data to %s", par["output"]) -mdata.write_h5mu(par["output"], compression=par["output_compression"]) - -logger.info("Finished") -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/filter_delimit_fraction", - "tag" : "0.12.0" - }, - "label" : [ - "singlecpu", - "lowmem" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/filter/delimit_fraction/nextflow.config b/target/nextflow/filter/delimit_fraction/nextflow.config deleted file mode 100644 index 4305d46b190..00000000000 --- a/target/nextflow/filter/delimit_fraction/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'delimit_fraction' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Turns a column containing values between 0 and 1 into a boolean column based on thresholds.\n' - author = 'Dries Schaumont' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/filter/delimit_fraction/nextflow_params.yaml b/target/nextflow/filter/delimit_fraction/nextflow_params.yaml deleted file mode 100644 index b3df3adef10..00000000000 --- a/target/nextflow/filter/delimit_fraction/nextflow_params.yaml +++ /dev/null @@ -1,18 +0,0 @@ -# Inputs -input: # please fill in - example: "input.h5mu" -modality: "rna" -# layer: "raw_counts" -obs_fraction_column: # please fill in - example: "fraction_mitochondrial" - -# Outputs -# output: "$id.$key.output.h5mu" -# output_compression: "gzip" -obs_name_filter: # please fill in - example: "foo" - -# Arguments -min_fraction: 0 -max_fraction: 1 - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/filter/delimit_fraction/nextflow_schema.json b/target/nextflow/filter/delimit_fraction/nextflow_schema.json deleted file mode 100644 index 6f5bb0e68d4..00000000000 --- a/target/nextflow/filter/delimit_fraction/nextflow_schema.json +++ /dev/null @@ -1,127 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "delimit_fraction", - "description": "Turns a column containing values between 0 and 1 into a boolean column based on thresholds.\n", - "type": "object", - "definitions": { - "inputs" : { - "title": "Inputs", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: `file`, required, example: `input.h5mu`. Input h5mu file", - "help_text": "Type: `file`, required, example: `input.h5mu`. Input h5mu file" - }, - - "modality": { - "type": "string", - "description": "Type: `string`, default: `rna`. ", - "help_text": "Type: `string`, default: `rna`. ", - "default": "rna" - }, - - "layer": { - "type": "string", - "description": "Type: `string`, example: `raw_counts`. ", - "help_text": "Type: `string`, example: `raw_counts`. " - }, - - "obs_fraction_column": { - "type": "string", - "description": "Type: `string`, required, example: `fraction_mitochondrial`. Name of column from ", - "help_text": "Type: `string`, required, example: `fraction_mitochondrial`. Name of column from .var dataframe selecting\na column that contains floating point values between 0 and 1.\n" - } - - } - }, - "outputs" : { - "title": "Outputs", - "type": "object", - "description": "No description", - "properties": { - - "output": { - "type": "string", - "description": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file", - "help_text": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file.", - "default": "$id.$key.output.h5mu" - }, - - "output_compression": { - "type": "string", - "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", - "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", - "enum": ["gzip", "lzf"] - - }, - - "obs_name_filter": { - "type": "string", - "description": "Type: `string`, required. In which ", - "help_text": "Type: `string`, required. In which .obs slot to store a boolean array corresponding to which observations should be removed." - } - - } - }, - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "min_fraction": { - "type": "number", - "description": "Type: `double`, default: `0`. Min fraction for an observation to be retained (True in output)", - "help_text": "Type: `double`, default: `0`. Min fraction for an observation to be retained (True in output).", - "default": "0" - }, - - "max_fraction": { - "type": "number", - "description": "Type: `double`, default: `1`. Max fraction for an observation to be retained (True in output)", - "help_text": "Type: `double`, default: `1`. Max fraction for an observation to be retained (True in output).", - "default": "1" - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/inputs" - }, - { - "$ref": "#/definitions/outputs" - }, - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/filter/delimit_fraction/setup_logger.py b/target/nextflow/filter/delimit_fraction/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/nextflow/filter/delimit_fraction/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/nextflow/filter/do_filter/.config.vsh.yaml b/target/nextflow/filter/do_filter/.config.vsh.yaml deleted file mode 100644 index c8212d38c48..00000000000 --- a/target/nextflow/filter/do_filter/.config.vsh.yaml +++ /dev/null @@ -1,202 +0,0 @@ -functionality: - name: "do_filter" - namespace: "filter" - version: "0.12.3" - authors: - - name: "Robrecht Cannoodt" - roles: - - "maintainer" - - "contributor" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - arguments: - - type: "file" - name: "--input" - description: "Input h5mu file" - info: null - example: - - "input.h5mu" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_filter" - description: "Which .obs columns to use to filter the observations by." - info: null - example: - - "filter_with_x" - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--var_filter" - description: "Which .var columns to use to filter the observations by." - info: null - example: - - "filter_with_x" - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - description: "Output h5mu file." - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Remove observations and variables based on specified .obs and .var\ - \ columns.\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.9-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "singlecpu" - - "lowmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/filter/do_filter/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/filter/do_filter" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/filter/do_filter/do_filter" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/filter/do_filter/main.nf b/target/nextflow/filter/do_filter/main.nf deleted file mode 100644 index 74c3d36b1a4..00000000000 --- a/target/nextflow/filter/do_filter/main.nf +++ /dev/null @@ -1,2634 +0,0 @@ -// do_filter 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Robrecht Cannoodt (maintainer, contributor) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "do_filter", - "namespace" : "filter", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Robrecht Cannoodt", - "roles" : [ - "maintainer", - "contributor" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "robrecht@data-intuitive.com", - "github" : "rcannood", - "orcid" : "0000-0003-3641-729X", - "linkedin" : "robrechtcannoodt" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Science Engineer" - }, - { - "name" : "Open Problems", - "href" : "https://openproblems.bio", - "role" : "Core Member" - } - ] - } - } - ], - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "description" : "Input h5mu file", - "example" : [ - "input.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--modality", - "default" : [ - "rna" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--obs_filter", - "description" : "Which .obs columns to use to filter the observations by.", - "example" : [ - "filter_with_x" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--var_filter", - "description" : "Which .var columns to use to filter the observations by.", - "example" : [ - "filter_with_x" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--output", - "description" : "Output h5mu file.", - "example" : [ - "output.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_compression", - "description" : "The compression format to be used on the output h5mu object.", - "example" : [ - "gzip" - ], - "required" : false, - "choices" : [ - "gzip", - "lzf" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/filter/do_filter/" - }, - { - "type" : "file", - "path" : "src/utils/setup_logger.py", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "description" : "Remove observations and variables based on specified .obs and .var columns.\n", - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/filter/do_filter/" - }, - { - "type" : "file", - "path" : "resources_test/pbmc_1k_protein_v3", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "python:3.9-slim", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "procps" - ], - "interactive" : false - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "mudata~=0.2.3", - "anndata~=0.9.1" - ], - "upgrade" : true - } - ], - "test_setup" : [ - { - "type" : "python", - "user" : false, - "packages" : [ - "viashpy==0.5.0" - ], - "upgrade" : true - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "singlecpu", - "lowmem" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/filter/do_filter/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/filter/do_filter", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -import mudata as mu -import numpy as np -import sys - -### VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'obs_filter': $( if [ ! -z ${VIASH_PAR_OBS_FILTER+x} ]; then echo "r'${VIASH_PAR_OBS_FILTER//\\'/\\'\\"\\'\\"r\\'}'.split(':')"; else echo None; fi ), - 'var_filter': $( if [ ! -z ${VIASH_PAR_VAR_FILTER+x} ]; then echo "r'${VIASH_PAR_VAR_FILTER//\\'/\\'\\"\\'\\"r\\'}'.split(':')"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -### VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -logger.info("Reading %s", par['input']) -mdata = mu.read_h5mu(par["input"]) - -mod = par["modality"] -logger.info("Processing modality '%s'", mod) - -obs_filt = np.repeat(True, mdata.mod[mod].n_obs) -var_filt = np.repeat(True, mdata.mod[mod].n_vars) - -par["obs_filter"] = par["obs_filter"] if par["obs_filter"] else [] -par["var_filter"] = par["var_filter"] if par["var_filter"] else [] - -for obs_name in par["obs_filter"]: - logger.info("Filtering modality '%s' observations by .obs['%s']", mod, obs_name) - if not obs_name in mdata.mod[mod].obs: - raise ValueError(f".mod[{mod}].obs[{obs_name}] does not exist.") - if obs_name in mdata.mod[mod].obs: - obs_filt &= mdata.mod[mod].obs[obs_name] - -for var_name in par["var_filter"]: - logger.info("Filtering modality '%s' variables by .var['%s']", mod, var_name) - if not var_name in mdata.mod[mod].var: - raise ValueError(f".mod[{mod}].var[{var_name}] does not exist.") - if var_name in mdata.mod[mod].var: - var_filt &= mdata.mod[mod].var[var_name] - -mdata.mod[mod] = mdata.mod[mod][obs_filt, var_filt].copy() - -logger.info("Writing h5mu to file %s.", par["output"]) -mdata.write_h5mu(par["output"], compression=par["output_compression"]) -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/filter_do_filter", - "tag" : "0.12.0" - }, - "label" : [ - "singlecpu", - "lowmem" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/filter/do_filter/nextflow.config b/target/nextflow/filter/do_filter/nextflow.config deleted file mode 100644 index 39263b51694..00000000000 --- a/target/nextflow/filter/do_filter/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'do_filter' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Remove observations and variables based on specified .obs and .var columns.\n' - author = 'Robrecht Cannoodt' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/filter/do_filter/nextflow_params.yaml b/target/nextflow/filter/do_filter/nextflow_params.yaml deleted file mode 100644 index a0d7704cbec..00000000000 --- a/target/nextflow/filter/do_filter/nextflow_params.yaml +++ /dev/null @@ -1,11 +0,0 @@ -# Arguments -input: # please fill in - example: "input.h5mu" -modality: "rna" -# obs_filter: ["filter_with_x"] -# var_filter: ["filter_with_x"] -# output: "$id.$key.output.h5mu" -# output_compression: "gzip" - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/filter/do_filter/nextflow_schema.json b/target/nextflow/filter/do_filter/nextflow_schema.json deleted file mode 100644 index 9c0e896ebea..00000000000 --- a/target/nextflow/filter/do_filter/nextflow_schema.json +++ /dev/null @@ -1,85 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "do_filter", - "description": "Remove observations and variables based on specified .obs and .var columns.\n", - "type": "object", - "definitions": { - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: `file`, required, example: `input.h5mu`. Input h5mu file", - "help_text": "Type: `file`, required, example: `input.h5mu`. Input h5mu file" - }, - - "modality": { - "type": "string", - "description": "Type: `string`, default: `rna`. ", - "help_text": "Type: `string`, default: `rna`. ", - "default": "rna" - }, - - "obs_filter": { - "type": "string", - "description": "Type: List of `string`, example: `filter_with_x`, multiple_sep: `\":\"`. Which ", - "help_text": "Type: List of `string`, example: `filter_with_x`, multiple_sep: `\":\"`. Which .obs columns to use to filter the observations by." - }, - - "var_filter": { - "type": "string", - "description": "Type: List of `string`, example: `filter_with_x`, multiple_sep: `\":\"`. Which ", - "help_text": "Type: List of `string`, example: `filter_with_x`, multiple_sep: `\":\"`. Which .var columns to use to filter the observations by." - }, - - "output": { - "type": "string", - "description": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file", - "help_text": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file.", - "default": "$id.$key.output.h5mu" - }, - - "output_compression": { - "type": "string", - "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", - "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", - "enum": ["gzip", "lzf"] - - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/filter/do_filter/setup_logger.py b/target/nextflow/filter/do_filter/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/nextflow/filter/do_filter/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/nextflow/filter/filter_with_counts/.config.vsh.yaml b/target/nextflow/filter/filter_with_counts/.config.vsh.yaml deleted file mode 100644 index 55373a7f6b4..00000000000 --- a/target/nextflow/filter/filter_with_counts/.config.vsh.yaml +++ /dev/null @@ -1,295 +0,0 @@ -functionality: - name: "filter_with_counts" - namespace: "filter" - version: "0.12.3" - authors: - - name: "Dries De Maeyer" - roles: - - "author" - info: - role: "Core Team Member" - links: - email: "ddemaeyer@gmail.com" - github: "ddemaeyer" - linkedin: "dries-de-maeyer-b46a814" - organizations: - - name: "Janssen Pharmaceuticals" - href: "https://www.janssen.com" - role: "Principal Scientist" - - name: "Robrecht Cannoodt" - roles: - - "maintainer" - - "author" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - argument_groups: - - name: "Inputs" - arguments: - - type: "file" - name: "--input" - description: "Input h5mu file" - info: null - example: - - "input.h5mu" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--layer" - info: null - example: - - "raw_counts" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Outputs" - arguments: - - type: "file" - name: "--output" - description: "Output h5mu file." - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--do_subset" - description: "Whether to subset before storing the output." - info: null - direction: "input" - dest: "par" - - type: "string" - name: "--obs_name_filter" - description: "In which .obs slot to store a boolean array corresponding to which\ - \ observations should be removed." - info: null - default: - - "filter_with_counts" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--var_name_filter" - description: "In which .var slot to store a boolean array corresponding to which\ - \ variables should be removed." - info: null - default: - - "filter_with_counts" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Arguments" - arguments: - - type: "integer" - name: "--min_counts" - description: "Minimum number of counts captured per cell." - info: null - example: - - 200 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--max_counts" - description: "Maximum number of counts captured per cell." - info: null - example: - - 5000000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--min_genes_per_cell" - description: "Minimum of non-zero values per cell." - info: null - example: - - 200 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--max_genes_per_cell" - description: "Maximum of non-zero values per cell." - info: null - example: - - 1500000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--min_cells_per_gene" - description: "Minimum of non-zero values per gene." - info: null - example: - - 3 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Filter scRNA-seq data based on the primary QC metrics. \nThis is based\ - \ on both the UMI counts, the gene counts \nand the mitochondrial genes (genes\ - \ starting with mt/MT).\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.9-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "singlecpu" - - "lowmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/filter/filter_with_counts/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/filter/filter_with_counts" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/filter/filter_with_counts/filter_with_counts" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/filter/filter_with_counts/main.nf b/target/nextflow/filter/filter_with_counts/main.nf deleted file mode 100644 index b4caa05f754..00000000000 --- a/target/nextflow/filter/filter_with_counts/main.nf +++ /dev/null @@ -1,2796 +0,0 @@ -// filter_with_counts 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Dries De Maeyer (author) -// * Robrecht Cannoodt (maintainer, author) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "filter_with_counts", - "namespace" : "filter", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Dries De Maeyer", - "roles" : [ - "author" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "ddemaeyer@gmail.com", - "github" : "ddemaeyer", - "linkedin" : "dries-de-maeyer-b46a814" - }, - "organizations" : [ - { - "name" : "Janssen Pharmaceuticals", - "href" : "https://www.janssen.com", - "role" : "Principal Scientist" - } - ] - } - }, - { - "name" : "Robrecht Cannoodt", - "roles" : [ - "maintainer", - "author" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "robrecht@data-intuitive.com", - "github" : "rcannood", - "orcid" : "0000-0003-3641-729X", - "linkedin" : "robrechtcannoodt" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Science Engineer" - }, - { - "name" : "Open Problems", - "href" : "https://openproblems.bio", - "role" : "Core Member" - } - ] - } - } - ], - "argument_groups" : [ - { - "name" : "Inputs", - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "description" : "Input h5mu file", - "example" : [ - "input.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--modality", - "default" : [ - "rna" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--layer", - "example" : [ - "raw_counts" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Outputs", - "arguments" : [ - { - "type" : "file", - "name" : "--output", - "description" : "Output h5mu file.", - "example" : [ - "output.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_compression", - "description" : "The compression format to be used on the output h5mu object.", - "example" : [ - "gzip" - ], - "required" : false, - "choices" : [ - "gzip", - "lzf" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "boolean_true", - "name" : "--do_subset", - "description" : "Whether to subset before storing the output.", - "direction" : "input", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--obs_name_filter", - "description" : "In which .obs slot to store a boolean array corresponding to which observations should be removed.", - "default" : [ - "filter_with_counts" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--var_name_filter", - "description" : "In which .var slot to store a boolean array corresponding to which variables should be removed.", - "default" : [ - "filter_with_counts" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Arguments", - "arguments" : [ - { - "type" : "integer", - "name" : "--min_counts", - "description" : "Minimum number of counts captured per cell.", - "example" : [ - 200 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--max_counts", - "description" : "Maximum number of counts captured per cell.", - "example" : [ - 5000000 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--min_genes_per_cell", - "description" : "Minimum of non-zero values per cell.", - "example" : [ - 200 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--max_genes_per_cell", - "description" : "Maximum of non-zero values per cell.", - "example" : [ - 1500000 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--min_cells_per_gene", - "description" : "Minimum of non-zero values per gene.", - "example" : [ - 3 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/filter/filter_with_counts/" - }, - { - "type" : "file", - "path" : "src/utils/setup_logger.py", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "description" : "Filter scRNA-seq data based on the primary QC metrics. \nThis is based on both the UMI counts, the gene counts \nand the mitochondrial genes (genes starting with mt/MT).\n", - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/filter/filter_with_counts/" - }, - { - "type" : "file", - "path" : "resources_test/pbmc_1k_protein_v3", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "python:3.9-slim", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "procps" - ], - "interactive" : false - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "mudata~=0.2.3", - "anndata~=0.9.1" - ], - "upgrade" : true - } - ], - "test_setup" : [ - { - "type" : "python", - "user" : false, - "packages" : [ - "viashpy==0.5.0" - ], - "upgrade" : true - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "singlecpu", - "lowmem" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/filter/filter_with_counts/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/filter/filter_with_counts", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN - -import mudata as mu -import numpy as np -import sys -from operator import le, ge, gt - -### VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'layer': $( if [ ! -z ${VIASH_PAR_LAYER+x} ]; then echo "r'${VIASH_PAR_LAYER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'do_subset': $( if [ ! -z ${VIASH_PAR_DO_SUBSET+x} ]; then echo "r'${VIASH_PAR_DO_SUBSET//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), - 'obs_name_filter': $( if [ ! -z ${VIASH_PAR_OBS_NAME_FILTER+x} ]; then echo "r'${VIASH_PAR_OBS_NAME_FILTER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'var_name_filter': $( if [ ! -z ${VIASH_PAR_VAR_NAME_FILTER+x} ]; then echo "r'${VIASH_PAR_VAR_NAME_FILTER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'min_counts': $( if [ ! -z ${VIASH_PAR_MIN_COUNTS+x} ]; then echo "int(r'${VIASH_PAR_MIN_COUNTS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'max_counts': $( if [ ! -z ${VIASH_PAR_MAX_COUNTS+x} ]; then echo "int(r'${VIASH_PAR_MAX_COUNTS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'min_genes_per_cell': $( if [ ! -z ${VIASH_PAR_MIN_GENES_PER_CELL+x} ]; then echo "int(r'${VIASH_PAR_MIN_GENES_PER_CELL//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'max_genes_per_cell': $( if [ ! -z ${VIASH_PAR_MAX_GENES_PER_CELL+x} ]; then echo "int(r'${VIASH_PAR_MAX_GENES_PER_CELL//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'min_cells_per_gene': $( if [ ! -z ${VIASH_PAR_MIN_CELLS_PER_GENE+x} ]; then echo "int(r'${VIASH_PAR_MIN_CELLS_PER_GENE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -### VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -logger.info("Reading input data") -mdata = mu.read_h5mu(par["input"]) - -mdata.var_names_make_unique() - -mod = par['modality'] -logger.info("Processing modality %s.", mod) -data = mdata.mod[mod] - -logger.info("\\\\tUnfiltered data: %s", data) - -logger.info("\\\\tComputing aggregations.") -n_counts_per_cell = np.ravel(np.sum(data.X, axis=1)) -n_cells_per_gene = np.sum(data.X > 0, axis=0) -n_genes_per_cell = np.sum(data.X > 0, axis=1) - -def apply_filter_to_mask(mask, base, filter, comparator): - new_filt = np.ravel(comparator(base, filter)) - num_removed = np.sum(np.invert(new_filt) & mask) - mask &= new_filt - return num_removed, mask - -# Filter genes -keep_genes = np.repeat(True, data.n_vars) -if par["min_cells_per_gene"] is not None: - num_removed, keep_genes = apply_filter_to_mask(keep_genes, - n_cells_per_gene, - par['min_cells_per_gene'], - ge) - logger.info("\\\\tRemoving %s genes with non-zero values in <%s cells.", - num_removed, par['min_cells_per_gene']) - -# Filter cells -filters = (("min_genes_per_cell", n_genes_per_cell, ge, "\\\\tRemoving %s cells with non-zero values in <%s genes."), - ("max_genes_per_cell", n_genes_per_cell, le, "\\\\tRemoving %s cells with non-zero values in >%s genes."), - ("min_counts", n_counts_per_cell, ge, "\\\\tRemoving %s cells with <%s total counts."), - ("max_counts", n_counts_per_cell, le, "\\\\tRemoving %s cells with >%s total counts."), - (0, np.sum(data[:,keep_genes].X, axis=1), gt, "\\\\tRemoving %s cells with %s counts")) - -keep_cells = np.repeat(True, data.n_obs) -for filter_name_or_value, base, comparator, message in filters: - try: - filter = par[filter_name_or_value] - except KeyError: - filter = filter_name_or_value - if filter is not None: - num_removed, keep_cells = apply_filter_to_mask(keep_cells, base, filter, comparator) - logger.info(message, num_removed, filter) - -if par["obs_name_filter"] is not None: - data.obs[par["obs_name_filter"]] = keep_cells -if par["var_name_filter"] is not None: - data.var[par["var_name_filter"]] = keep_genes - -if par["do_subset"]: - mdata.mod[mod] = data[keep_cells, keep_genes] - -logger.info("\\\\tFiltered data: %s", data) -logger.info("Writing output data to %s", par["output"]) -mdata.write_h5mu(par["output"], compression=par["output_compression"]) - -logger.info("Finished") -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/filter_filter_with_counts", - "tag" : "0.12.0" - }, - "label" : [ - "singlecpu", - "lowmem" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/filter/filter_with_counts/nextflow.config b/target/nextflow/filter/filter_with_counts/nextflow.config deleted file mode 100644 index d49049b4e41..00000000000 --- a/target/nextflow/filter/filter_with_counts/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'filter_with_counts' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Filter scRNA-seq data based on the primary QC metrics. \nThis is based on both the UMI counts, the gene counts \nand the mitochondrial genes (genes starting with mt/MT).\n' - author = 'Dries De Maeyer, Robrecht Cannoodt' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/filter/filter_with_counts/nextflow_params.yaml b/target/nextflow/filter/filter_with_counts/nextflow_params.yaml deleted file mode 100644 index bc01a706b90..00000000000 --- a/target/nextflow/filter/filter_with_counts/nextflow_params.yaml +++ /dev/null @@ -1,22 +0,0 @@ -# Inputs -input: # please fill in - example: "input.h5mu" -modality: "rna" -# layer: "raw_counts" - -# Outputs -# output: "$id.$key.output.h5mu" -# output_compression: "gzip" -do_subset: false -obs_name_filter: "filter_with_counts" -var_name_filter: "filter_with_counts" - -# Arguments -# min_counts: 200 -# max_counts: 5000000 -# min_genes_per_cell: 200 -# max_genes_per_cell: 1500000 -# min_cells_per_gene: 3 - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/filter/filter_with_counts/nextflow_schema.json b/target/nextflow/filter/filter_with_counts/nextflow_schema.json deleted file mode 100644 index 32090b85a08..00000000000 --- a/target/nextflow/filter/filter_with_counts/nextflow_schema.json +++ /dev/null @@ -1,152 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "filter_with_counts", - "description": "Filter scRNA-seq data based on the primary QC metrics. \nThis is based on both the UMI counts, the gene counts \nand the mitochondrial genes (genes starting with mt/MT).\n", - "type": "object", - "definitions": { - "inputs" : { - "title": "Inputs", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: `file`, required, example: `input.h5mu`. Input h5mu file", - "help_text": "Type: `file`, required, example: `input.h5mu`. Input h5mu file" - }, - - "modality": { - "type": "string", - "description": "Type: `string`, default: `rna`. ", - "help_text": "Type: `string`, default: `rna`. ", - "default": "rna" - }, - - "layer": { - "type": "string", - "description": "Type: `string`, example: `raw_counts`. ", - "help_text": "Type: `string`, example: `raw_counts`. " - } - - } - }, - "outputs" : { - "title": "Outputs", - "type": "object", - "description": "No description", - "properties": { - - "output": { - "type": "string", - "description": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file", - "help_text": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file.", - "default": "$id.$key.output.h5mu" - }, - - "output_compression": { - "type": "string", - "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", - "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", - "enum": ["gzip", "lzf"] - - }, - - "do_subset": { - "type": "boolean", - "description": "Type: `boolean_true`, default: `false`. Whether to subset before storing the output", - "help_text": "Type: `boolean_true`, default: `false`. Whether to subset before storing the output.", - "default": "False" - }, - - "obs_name_filter": { - "type": "string", - "description": "Type: `string`, default: `filter_with_counts`. In which ", - "help_text": "Type: `string`, default: `filter_with_counts`. In which .obs slot to store a boolean array corresponding to which observations should be removed.", - "default": "filter_with_counts" - }, - - "var_name_filter": { - "type": "string", - "description": "Type: `string`, default: `filter_with_counts`. In which ", - "help_text": "Type: `string`, default: `filter_with_counts`. In which .var slot to store a boolean array corresponding to which variables should be removed.", - "default": "filter_with_counts" - } - - } - }, - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "min_counts": { - "type": "integer", - "description": "Type: `integer`, example: `200`. Minimum number of counts captured per cell", - "help_text": "Type: `integer`, example: `200`. Minimum number of counts captured per cell." - }, - - "max_counts": { - "type": "integer", - "description": "Type: `integer`, example: `5000000`. Maximum number of counts captured per cell", - "help_text": "Type: `integer`, example: `5000000`. Maximum number of counts captured per cell." - }, - - "min_genes_per_cell": { - "type": "integer", - "description": "Type: `integer`, example: `200`. Minimum of non-zero values per cell", - "help_text": "Type: `integer`, example: `200`. Minimum of non-zero values per cell." - }, - - "max_genes_per_cell": { - "type": "integer", - "description": "Type: `integer`, example: `1500000`. Maximum of non-zero values per cell", - "help_text": "Type: `integer`, example: `1500000`. Maximum of non-zero values per cell." - }, - - "min_cells_per_gene": { - "type": "integer", - "description": "Type: `integer`, example: `3`. Minimum of non-zero values per gene", - "help_text": "Type: `integer`, example: `3`. Minimum of non-zero values per gene." - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/inputs" - }, - { - "$ref": "#/definitions/outputs" - }, - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/filter/filter_with_counts/setup_logger.py b/target/nextflow/filter/filter_with_counts/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/nextflow/filter/filter_with_counts/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/nextflow/filter/filter_with_hvg/.config.vsh.yaml b/target/nextflow/filter/filter_with_hvg/.config.vsh.yaml deleted file mode 100644 index b0b39549609..00000000000 --- a/target/nextflow/filter/filter_with_hvg/.config.vsh.yaml +++ /dev/null @@ -1,352 +0,0 @@ -functionality: - name: "filter_with_hvg" - namespace: "filter" - version: "0.12.3" - authors: - - name: "Dries De Maeyer" - roles: - - "contributor" - info: - role: "Core Team Member" - links: - email: "ddemaeyer@gmail.com" - github: "ddemaeyer" - linkedin: "dries-de-maeyer-b46a814" - organizations: - - name: "Janssen Pharmaceuticals" - href: "https://www.janssen.com" - role: "Principal Scientist" - - name: "Robrecht Cannoodt" - roles: - - "maintainer" - - "contributor" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - arguments: - - type: "file" - name: "--input" - description: "Input h5mu file" - info: null - example: - - "input.h5mu" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--layer" - description: "use adata.layers[layer] for expression values instead of adata.X." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - description: "Output h5mu file." - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--var_name_filter" - description: "In which .var slot to store a boolean array corresponding to which\ - \ observations should be filtered out." - info: null - default: - - "filter_with_hvg" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--varm_name" - description: "In which .varm slot to store additional metadata." - info: null - default: - - "hvg" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--do_subset" - description: "Whether to subset before storing the output." - info: null - direction: "input" - dest: "par" - - type: "string" - name: "--flavor" - description: "Choose the flavor for identifying highly variable genes. For the\ - \ dispersion based methods\nin their default workflows, Seurat passes the cutoffs\ - \ whereas Cell Ranger passes n_top_genes.\n" - info: null - default: - - "seurat" - required: false - choices: - - "seurat" - - "cell_ranger" - - "seurat_v3" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--n_top_genes" - description: "Number of highly-variable genes to keep. Mandatory if flavor='seurat_v3'." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--min_mean" - description: "If n_top_genes is defined, this and all other cutoffs for the means\ - \ and the normalized dispersions are ignored. Ignored if flavor='seurat_v3'." - info: null - default: - - 0.0125 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--max_mean" - description: "If n_top_genes is defined, this and all other cutoffs for the means\ - \ and the normalized dispersions are ignored. Ignored if flavor='seurat_v3'." - info: null - default: - - 3.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--min_disp" - description: "If n_top_genes is defined, this and all other cutoffs for the means\ - \ and the normalized dispersions are ignored. Ignored if flavor='seurat_v3'." - info: null - default: - - 0.5 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--max_disp" - description: "If n_top_genes is defined, this and all other cutoffs for the means\ - \ and the normalized dispersions are ignored. Ignored if flavor='seurat_v3'.\ - \ Default is +inf." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--span" - description: "The fraction of the data (cells) used when estimating the variance\ - \ in the loess model fit if flavor='seurat_v3'." - info: null - default: - - 0.3 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--n_bins" - description: "Number of bins for binning the mean gene expression. Normalization\ - \ is done with respect to each bin. If just a single gene falls into a bin,\ - \ the normalized dispersion is artificially set to 1." - info: null - default: - - 20 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_batch_key" - description: "If specified, highly-variable genes are selected within each batch\ - \ separately and merged. This simple \nprocess avoids the selection of batch-specific\ - \ genes and acts as a lightweight batch correction method. \nFor all flavors,\ - \ genes are first sorted by how many batches they are a HVG. For dispersion-based\ - \ flavors \nties are broken by normalized dispersion. If flavor = 'seurat_v3',\ - \ ties are broken by the median (across\nbatches) rank based on within-batch\ - \ normalized variance.\n" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Annotate highly variable genes [Satija15] [Zheng17] [Stuart19].\n\n\ - Expects logarithmized data, except when flavor='seurat_v3' in which count data\ - \ is expected.\n\nDepending on flavor, this reproduces the R-implementations of\ - \ Seurat [Satija15], Cell Ranger [Zheng17], and Seurat v3 [Stuart19].\n\nFor the\ - \ dispersion-based methods ([Satija15] and [Zheng17]), the normalized dispersion\ - \ is obtained by scaling with the mean and standard deviation of the dispersions\ - \ for genes falling into a given bin for mean expression of genes. This means\ - \ that for each bin of mean expression, highly variable genes are selected.\n\n\ - For [Stuart19], a normalized variance for each gene is computed. First, the data\ - \ are standardized (i.e., z-score normalization per feature) with a regularized\ - \ standard deviation. Next, the normalized variance is computed as the variance\ - \ of each gene after the transformation. Genes are ranked by the normalized variance.\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.9" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "scanpy~=1.9.5" - - "scikit-misc" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "singlecpu" - - "lowmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/filter/filter_with_hvg/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/filter/filter_with_hvg" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/filter/filter_with_hvg/filter_with_hvg" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/filter/filter_with_hvg/main.nf b/target/nextflow/filter/filter_with_hvg/main.nf deleted file mode 100644 index feafe9151b2..00000000000 --- a/target/nextflow/filter/filter_with_hvg/main.nf +++ /dev/null @@ -1,2856 +0,0 @@ -// filter_with_hvg 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Dries De Maeyer (contributor) -// * Robrecht Cannoodt (maintainer, contributor) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "filter_with_hvg", - "namespace" : "filter", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Dries De Maeyer", - "roles" : [ - "contributor" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "ddemaeyer@gmail.com", - "github" : "ddemaeyer", - "linkedin" : "dries-de-maeyer-b46a814" - }, - "organizations" : [ - { - "name" : "Janssen Pharmaceuticals", - "href" : "https://www.janssen.com", - "role" : "Principal Scientist" - } - ] - } - }, - { - "name" : "Robrecht Cannoodt", - "roles" : [ - "maintainer", - "contributor" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "robrecht@data-intuitive.com", - "github" : "rcannood", - "orcid" : "0000-0003-3641-729X", - "linkedin" : "robrechtcannoodt" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Science Engineer" - }, - { - "name" : "Open Problems", - "href" : "https://openproblems.bio", - "role" : "Core Member" - } - ] - } - } - ], - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "description" : "Input h5mu file", - "example" : [ - "input.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--modality", - "default" : [ - "rna" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--layer", - "description" : "use adata.layers[layer] for expression values instead of adata.X.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--output", - "description" : "Output h5mu file.", - "example" : [ - "output.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_compression", - "description" : "The compression format to be used on the output h5mu object.", - "example" : [ - "gzip" - ], - "required" : false, - "choices" : [ - "gzip", - "lzf" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--var_name_filter", - "description" : "In which .var slot to store a boolean array corresponding to which observations should be filtered out.", - "default" : [ - "filter_with_hvg" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--varm_name", - "description" : "In which .varm slot to store additional metadata.", - "default" : [ - "hvg" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "boolean_true", - "name" : "--do_subset", - "description" : "Whether to subset before storing the output.", - "direction" : "input", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--flavor", - "description" : "Choose the flavor for identifying highly variable genes. For the dispersion based methods\nin their default workflows, Seurat passes the cutoffs whereas Cell Ranger passes n_top_genes.\n", - "default" : [ - "seurat" - ], - "required" : false, - "choices" : [ - "seurat", - "cell_ranger", - "seurat_v3" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--n_top_genes", - "description" : "Number of highly-variable genes to keep. Mandatory if flavor='seurat_v3'.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--min_mean", - "description" : "If n_top_genes is defined, this and all other cutoffs for the means and the normalized dispersions are ignored. Ignored if flavor='seurat_v3'.", - "default" : [ - 0.0125 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--max_mean", - "description" : "If n_top_genes is defined, this and all other cutoffs for the means and the normalized dispersions are ignored. Ignored if flavor='seurat_v3'.", - "default" : [ - 3.0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--min_disp", - "description" : "If n_top_genes is defined, this and all other cutoffs for the means and the normalized dispersions are ignored. Ignored if flavor='seurat_v3'.", - "default" : [ - 0.5 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--max_disp", - "description" : "If n_top_genes is defined, this and all other cutoffs for the means and the normalized dispersions are ignored. Ignored if flavor='seurat_v3'. Default is +inf.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--span", - "description" : "The fraction of the data (cells) used when estimating the variance in the loess model fit if flavor='seurat_v3'.", - "default" : [ - 0.3 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--n_bins", - "description" : "Number of bins for binning the mean gene expression. Normalization is done with respect to each bin. If just a single gene falls into a bin, the normalized dispersion is artificially set to 1.", - "default" : [ - 20 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--obs_batch_key", - "description" : "If specified, highly-variable genes are selected within each batch separately and merged. This simple \nprocess avoids the selection of batch-specific genes and acts as a lightweight batch correction method. \nFor all flavors, genes are first sorted by how many batches they are a HVG. For dispersion-based flavors \nties are broken by normalized dispersion. If flavor = 'seurat_v3', ties are broken by the median (across\nbatches) rank based on within-batch normalized variance.\n", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/filter/filter_with_hvg/" - }, - { - "type" : "file", - "path" : "src/utils/setup_logger.py", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "description" : "Annotate highly variable genes [Satija15] [Zheng17] [Stuart19].\n\nExpects logarithmized data, except when flavor='seurat_v3' in which count data is expected.\n\nDepending on flavor, this reproduces the R-implementations of Seurat [Satija15], Cell Ranger [Zheng17], and Seurat v3 [Stuart19].\n\nFor the dispersion-based methods ([Satija15] and [Zheng17]), the normalized dispersion is obtained by scaling with the mean and standard deviation of the dispersions for genes falling into a given bin for mean expression of genes. This means that for each bin of mean expression, highly variable genes are selected.\n\nFor [Stuart19], a normalized variance for each gene is computed. First, the data are standardized (i.e., z-score normalization per feature) with a regularized standard deviation. Next, the normalized variance is computed as the variance of each gene after the transformation. Genes are ranked by the normalized variance.\n", - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/filter/filter_with_hvg/" - }, - { - "type" : "file", - "path" : "resources_test/pbmc_1k_protein_v3", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "python:3.9", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "python", - "user" : false, - "packages" : [ - "mudata~=0.2.3", - "anndata~=0.9.1", - "scanpy~=1.9.5", - "scikit-misc" - ], - "upgrade" : true - } - ], - "test_setup" : [ - { - "type" : "python", - "user" : false, - "packages" : [ - "viashpy==0.5.0" - ], - "upgrade" : true - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "singlecpu", - "lowmem" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/filter/filter_with_hvg/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/filter/filter_with_hvg", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -import scanpy as sc -import mudata as mu -import numpy as np -import sys -import re - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'layer': $( if [ ! -z ${VIASH_PAR_LAYER+x} ]; then echo "r'${VIASH_PAR_LAYER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'var_name_filter': $( if [ ! -z ${VIASH_PAR_VAR_NAME_FILTER+x} ]; then echo "r'${VIASH_PAR_VAR_NAME_FILTER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'varm_name': $( if [ ! -z ${VIASH_PAR_VARM_NAME+x} ]; then echo "r'${VIASH_PAR_VARM_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'do_subset': $( if [ ! -z ${VIASH_PAR_DO_SUBSET+x} ]; then echo "r'${VIASH_PAR_DO_SUBSET//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), - 'flavor': $( if [ ! -z ${VIASH_PAR_FLAVOR+x} ]; then echo "r'${VIASH_PAR_FLAVOR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'n_top_genes': $( if [ ! -z ${VIASH_PAR_N_TOP_GENES+x} ]; then echo "int(r'${VIASH_PAR_N_TOP_GENES//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'min_mean': $( if [ ! -z ${VIASH_PAR_MIN_MEAN+x} ]; then echo "float(r'${VIASH_PAR_MIN_MEAN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'max_mean': $( if [ ! -z ${VIASH_PAR_MAX_MEAN+x} ]; then echo "float(r'${VIASH_PAR_MAX_MEAN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'min_disp': $( if [ ! -z ${VIASH_PAR_MIN_DISP+x} ]; then echo "float(r'${VIASH_PAR_MIN_DISP//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'max_disp': $( if [ ! -z ${VIASH_PAR_MAX_DISP+x} ]; then echo "float(r'${VIASH_PAR_MAX_DISP//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'span': $( if [ ! -z ${VIASH_PAR_SPAN+x} ]; then echo "float(r'${VIASH_PAR_SPAN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'n_bins': $( if [ ! -z ${VIASH_PAR_N_BINS+x} ]; then echo "int(r'${VIASH_PAR_N_BINS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'obs_batch_key': $( if [ ! -z ${VIASH_PAR_OBS_BATCH_KEY+x} ]; then echo "r'${VIASH_PAR_OBS_BATCH_KEY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -## VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -mdata = mu.read_h5mu(par["input"]) -mdata.var_names_make_unique() - -mod = par['modality'] -logger.info(f"Processing modality '%s'", mod) -data = mdata.mod[mod] - -# Workaround for issue -# https://github.com/scverse/scanpy/issues/2239 -# https://github.com/scverse/scanpy/issues/2181 -if par['flavor'] != "seurat_v3": - # This component requires log normalized data when flavor is not seurat_v3 - # We assume that the data is correctly normalized but scanpy will look at - # .uns to check the transformations performed on the data. - # To prevent scanpy from automatically tranforming the counts when they are - # already transformed, we set the appropriate values to .uns. - if 'log1p' not in data.uns: - logger.warning("When flavor is not set to 'seurat_v3', " - "the input data for this component must be log-transformed. " - "However, the 'log1p' dictionairy in .uns has not been set. " - "This is fine if you did not log transform your data with scanpy." - "Otherwise, please check if you are providing log transformed " - "data using --layer.") - data.uns['log1p'] = {'base': None} - elif 'log1p' in data.uns and 'base' not in data.uns['log1p']: - data.uns['log1p']['base'] = None - -logger.info("\\\\tUnfiltered data: %s", data) - -logger.info("\\\\tComputing hvg") -# construct arguments -hvg_args = { - 'adata': data, - 'n_top_genes': par["n_top_genes"], - 'min_mean': par["min_mean"], - 'max_mean': par["max_mean"], - 'min_disp': par["min_disp"], - 'span': par["span"], - 'n_bins': par["n_bins"], - 'flavor': par["flavor"], - 'subset': False, - 'inplace': False, - 'layer': par['layer'], -} - -optional_parameters = { - "max_disp": "max_disp", - "obs_batch_key": "batch_key", - "n_top_genes": "n_top_genes" -} -# only add parameter if it's passed -for par_name, dest_name in optional_parameters.items(): - if par.get(par_name): - hvg_args[dest_name] = par[par_name] - -# scanpy does not do this check, although it is stated in the documentation -if par['flavor'] == "seurat_v3" and not par['n_top_genes']: - raise ValueError("When flavor is set to 'seurat_v3', you are required to set 'n_top_genes'.") - -if par["layer"] and not par['layer'] in data.layers: - raise ValueError(f"Layer '{par['layer']}' not found in layers for modality '{mod}'. " - f"Found layers are: {','.join(data.layers)}") -# call function -try: - out = sc.pp.highly_variable_genes(**hvg_args) - if par['obs_batch_key'] is not None: - assert (out.index == data.var.index).all(), "Expected output index values to be equivalent to the input index" -except ValueError as err: - if str(err) == "cannot specify integer \\`bins\\` when input data contains infinity": - err.args = ("Cannot specify integer \\`bins\\` when input data contains infinity. " - "Perhaps input data has not been log normalized?",) - if re.search("Bin edges must be unique:", str(err)): - raise RuntimeError("Scanpy failed to calculate hvg. The error " - "returned by scanpy (see above) could be the " - "result from trying to use this component on unfiltered data.") from err - raise err - -out.index = data.var.index -logger.info("\\\\tStoring output into .var") -if par.get("var_name_filter", None) is not None: - data.var[par["var_name_filter"]] = out["highly_variable"] - -if par.get("varm_name", None) is not None and 'mean_bin' in out: - # drop mean_bin as mudata/anndata doesn't support tuples - data.varm[par["varm_name"]] = out.drop("mean_bin", axis=1) - -if par["do_subset"]: - keep_feats = np.ravel(data.var[par["var_name_filter"]]) - mdata.mod[mod] = data[:,keep_feats] - -logger.info("Writing h5mu to file") -mdata.write_h5mu(par["output"], compression=par["output_compression"]) -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/filter_filter_with_hvg", - "tag" : "0.12.0" - }, - "label" : [ - "singlecpu", - "lowmem" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/filter/filter_with_hvg/nextflow.config b/target/nextflow/filter/filter_with_hvg/nextflow.config deleted file mode 100644 index 15803ba13b5..00000000000 --- a/target/nextflow/filter/filter_with_hvg/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'filter_with_hvg' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Annotate highly variable genes [Satija15] [Zheng17] [Stuart19].\n\nExpects logarithmized data, except when flavor=\'seurat_v3\' in which count data is expected.\n\nDepending on flavor, this reproduces the R-implementations of Seurat [Satija15], Cell Ranger [Zheng17], and Seurat v3 [Stuart19].\n\nFor the dispersion-based methods ([Satija15] and [Zheng17]), the normalized dispersion is obtained by scaling with the mean and standard deviation of the dispersions for genes falling into a given bin for mean expression of genes. This means that for each bin of mean expression, highly variable genes are selected.\n\nFor [Stuart19], a normalized variance for each gene is computed. First, the data are standardized (i.e., z-score normalization per feature) with a regularized standard deviation. Next, the normalized variance is computed as the variance of each gene after the transformation. Genes are ranked by the normalized variance.\n' - author = 'Dries De Maeyer, Robrecht Cannoodt' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/filter/filter_with_hvg/nextflow_params.yaml b/target/nextflow/filter/filter_with_hvg/nextflow_params.yaml deleted file mode 100644 index 1f5eaec7a25..00000000000 --- a/target/nextflow/filter/filter_with_hvg/nextflow_params.yaml +++ /dev/null @@ -1,22 +0,0 @@ -# Arguments -input: # please fill in - example: "input.h5mu" -modality: "rna" -# layer: "foo" -# output: "$id.$key.output.h5mu" -# output_compression: "gzip" -var_name_filter: "filter_with_hvg" -varm_name: "hvg" -do_subset: false -flavor: "seurat" -# n_top_genes: 123 -min_mean: 0.0125 -max_mean: 3 -min_disp: 0.5 -# max_disp: 123.0 -span: 0.3 -n_bins: 20 -# obs_batch_key: "foo" - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/filter/filter_with_hvg/nextflow_schema.json b/target/nextflow/filter/filter_with_hvg/nextflow_schema.json deleted file mode 100644 index a7b6156f7ca..00000000000 --- a/target/nextflow/filter/filter_with_hvg/nextflow_schema.json +++ /dev/null @@ -1,162 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "filter_with_hvg", - "description": "Annotate highly variable genes [Satija15] [Zheng17] [Stuart19].\n\nExpects logarithmized data, except when flavor=\u0027seurat_v3\u0027 in which count data is expected.\n\nDepending on flavor, this reproduces the R-implementations of Seurat [Satija15], Cell Ranger [Zheng17], and Seurat v3 [Stuart19].\n\nFor the dispersion-based methods ([Satija15] and [Zheng17]), the normalized dispersion is obtained by scaling with the mean and standard deviation of the dispersions for genes falling into a given bin for mean expression of genes. This means that for each bin of mean expression, highly variable genes are selected.\n\nFor [Stuart19], a normalized variance for each gene is computed. First, the data are standardized (i.e., z-score normalization per feature) with a regularized standard deviation. Next, the normalized variance is computed as the variance of each gene after the transformation. Genes are ranked by the normalized variance.\n", - "type": "object", - "definitions": { - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: `file`, required, example: `input.h5mu`. Input h5mu file", - "help_text": "Type: `file`, required, example: `input.h5mu`. Input h5mu file" - }, - - "modality": { - "type": "string", - "description": "Type: `string`, default: `rna`. ", - "help_text": "Type: `string`, default: `rna`. ", - "default": "rna" - }, - - "layer": { - "type": "string", - "description": "Type: `string`. use adata", - "help_text": "Type: `string`. use adata.layers[layer] for expression values instead of adata.X." - }, - - "output": { - "type": "string", - "description": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file", - "help_text": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file.", - "default": "$id.$key.output.h5mu" - }, - - "output_compression": { - "type": "string", - "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", - "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", - "enum": ["gzip", "lzf"] - - }, - - "var_name_filter": { - "type": "string", - "description": "Type: `string`, default: `filter_with_hvg`. In which ", - "help_text": "Type: `string`, default: `filter_with_hvg`. In which .var slot to store a boolean array corresponding to which observations should be filtered out.", - "default": "filter_with_hvg" - }, - - "varm_name": { - "type": "string", - "description": "Type: `string`, default: `hvg`. In which ", - "help_text": "Type: `string`, default: `hvg`. In which .varm slot to store additional metadata.", - "default": "hvg" - }, - - "do_subset": { - "type": "boolean", - "description": "Type: `boolean_true`, default: `false`. Whether to subset before storing the output", - "help_text": "Type: `boolean_true`, default: `false`. Whether to subset before storing the output.", - "default": "False" - }, - - "flavor": { - "type": "string", - "description": "Type: `string`, default: `seurat`, choices: ``seurat`, `cell_ranger`, `seurat_v3``. Choose the flavor for identifying highly variable genes", - "help_text": "Type: `string`, default: `seurat`, choices: ``seurat`, `cell_ranger`, `seurat_v3``. Choose the flavor for identifying highly variable genes. For the dispersion based methods\nin their default workflows, Seurat passes the cutoffs whereas Cell Ranger passes n_top_genes.\n", - "enum": ["seurat", "cell_ranger", "seurat_v3"] - , - "default": "seurat" - }, - - "n_top_genes": { - "type": "integer", - "description": "Type: `integer`. Number of highly-variable genes to keep", - "help_text": "Type: `integer`. Number of highly-variable genes to keep. Mandatory if flavor=\u0027seurat_v3\u0027." - }, - - "min_mean": { - "type": "number", - "description": "Type: `double`, default: `0.0125`. If n_top_genes is defined, this and all other cutoffs for the means and the normalized dispersions are ignored", - "help_text": "Type: `double`, default: `0.0125`. If n_top_genes is defined, this and all other cutoffs for the means and the normalized dispersions are ignored. Ignored if flavor=\u0027seurat_v3\u0027.", - "default": "0.0125" - }, - - "max_mean": { - "type": "number", - "description": "Type: `double`, default: `3`. If n_top_genes is defined, this and all other cutoffs for the means and the normalized dispersions are ignored", - "help_text": "Type: `double`, default: `3`. If n_top_genes is defined, this and all other cutoffs for the means and the normalized dispersions are ignored. Ignored if flavor=\u0027seurat_v3\u0027.", - "default": "3" - }, - - "min_disp": { - "type": "number", - "description": "Type: `double`, default: `0.5`. If n_top_genes is defined, this and all other cutoffs for the means and the normalized dispersions are ignored", - "help_text": "Type: `double`, default: `0.5`. If n_top_genes is defined, this and all other cutoffs for the means and the normalized dispersions are ignored. Ignored if flavor=\u0027seurat_v3\u0027.", - "default": "0.5" - }, - - "max_disp": { - "type": "number", - "description": "Type: `double`. If n_top_genes is defined, this and all other cutoffs for the means and the normalized dispersions are ignored", - "help_text": "Type: `double`. If n_top_genes is defined, this and all other cutoffs for the means and the normalized dispersions are ignored. Ignored if flavor=\u0027seurat_v3\u0027. Default is +inf." - }, - - "span": { - "type": "number", - "description": "Type: `double`, default: `0.3`. The fraction of the data (cells) used when estimating the variance in the loess model fit if flavor=\u0027seurat_v3\u0027", - "help_text": "Type: `double`, default: `0.3`. The fraction of the data (cells) used when estimating the variance in the loess model fit if flavor=\u0027seurat_v3\u0027.", - "default": "0.3" - }, - - "n_bins": { - "type": "integer", - "description": "Type: `integer`, default: `20`. Number of bins for binning the mean gene expression", - "help_text": "Type: `integer`, default: `20`. Number of bins for binning the mean gene expression. Normalization is done with respect to each bin. If just a single gene falls into a bin, the normalized dispersion is artificially set to 1.", - "default": "20" - }, - - "obs_batch_key": { - "type": "string", - "description": "Type: `string`. If specified, highly-variable genes are selected within each batch separately and merged", - "help_text": "Type: `string`. If specified, highly-variable genes are selected within each batch separately and merged. This simple \nprocess avoids the selection of batch-specific genes and acts as a lightweight batch correction method. \nFor all flavors, genes are first sorted by how many batches they are a HVG. For dispersion-based flavors \nties are broken by normalized dispersion. If flavor = \u0027seurat_v3\u0027, ties are broken by the median (across\nbatches) rank based on within-batch normalized variance.\n" - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/filter/filter_with_hvg/setup_logger.py b/target/nextflow/filter/filter_with_hvg/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/nextflow/filter/filter_with_hvg/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/nextflow/filter/filter_with_scrublet/.config.vsh.yaml b/target/nextflow/filter/filter_with_scrublet/.config.vsh.yaml deleted file mode 100644 index 1d6bb387537..00000000000 --- a/target/nextflow/filter/filter_with_scrublet/.config.vsh.yaml +++ /dev/null @@ -1,304 +0,0 @@ -functionality: - name: "filter_with_scrublet" - namespace: "filter" - version: "0.12.3" - authors: - - name: "Dries De Maeyer" - roles: - - "contributor" - info: - role: "Core Team Member" - links: - email: "ddemaeyer@gmail.com" - github: "ddemaeyer" - linkedin: "dries-de-maeyer-b46a814" - organizations: - - name: "Janssen Pharmaceuticals" - href: "https://www.janssen.com" - role: "Principal Scientist" - - name: "Robrecht Cannoodt" - roles: - - "maintainer" - - "contributor" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - arguments: - - type: "file" - name: "--input" - description: "Input h5mu file" - info: null - example: - - "input.h5mu" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - description: "Output h5mu file." - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_name_filter" - description: "In which .obs slot to store a boolean array corresponding to which\ - \ observations should be filtered out." - info: null - default: - - "filter_with_scrublet" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--do_subset" - description: "Whether to subset before storing the output." - info: null - direction: "input" - dest: "par" - - type: "string" - name: "--obs_name_doublet_score" - description: "Name of the doublet scores column in the obs slot of the returned\ - \ object." - info: null - default: - - "scrublet_doublet_score" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--min_counts" - description: "The number of minimal UMI counts per cell that have to be present\ - \ for initial cell detection." - info: null - default: - - 2 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--min_cells" - description: "The number of cells in which UMIs for a gene were detected." - info: null - default: - - 3 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--min_gene_variablity_percent" - description: "Used for gene filtering prior to PCA. Keep the most highly variable\ - \ genes (in the top min_gene_variability_pctl percentile), as measured by the\ - \ v-statistic [Klein et al., Cell 2015]." - info: null - default: - - 85.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--num_pca_components" - description: "Number of principal components to use during PCA dimensionality\ - \ reduction." - info: null - default: - - 30 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--distance_metric" - description: "The distance metric used for computing similarities." - info: null - default: - - "euclidean" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--allow_automatic_threshold_detection_fail" - description: "When scrublet fails to automatically determine the double score\ - \ threshold, \nallow the component to continue and set the output columns to\ - \ NA.\n" - info: null - direction: "input" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Doublet detection using the Scrublet method (Wolock, Lopez and Klein,\ - \ 2019).\nThe method tests for potential doublets by using the expression profiles\ - \ of\ncells to generate synthetic potential doubles which are tested against cells.\ - \ \nThe method returns a \"doublet score\" on which it calls for potential doublets.\n\ - \nFor the source code please visit https://github.com/AllonKleinLab/scrublet.\n\ - \nFor 10x we expect the doublet rates to be:\n Multiplet Rate (%) - # of Cells\ - \ Loaded - # of Cells Recovered\n ~0.4% ~800 ~500\n ~0.8% ~1,600 ~1,000\n ~1.6%\ - \ ~3,200 ~2,000\n ~2.3% ~4,800 ~3,000\n ~3.1% ~6,400 ~4,000\n ~3.9% ~8,000\ - \ ~5,000\n ~4.6% ~9,600 ~6,000\n ~5.4% ~11,200 ~7,000\n ~6.1% ~12,800 ~8,000\n\ - \ ~6.9% ~14,400 ~9,000\n ~7.6% ~16,000 ~10,000\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - run_args: - - "--env NUMBA_CACHE_DIR=/tmp" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - - "build-essential" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "scanpy~=1.9.5" - - "scrublet" - - "annoy==1.16.3" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highcpu" - - "midmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/filter/filter_with_scrublet/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/filter/filter_with_scrublet" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/filter/filter_with_scrublet/filter_with_scrublet" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/filter/filter_with_scrublet/main.nf b/target/nextflow/filter/filter_with_scrublet/main.nf deleted file mode 100644 index 99f3b551e8d..00000000000 --- a/target/nextflow/filter/filter_with_scrublet/main.nf +++ /dev/null @@ -1,2769 +0,0 @@ -// filter_with_scrublet 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Dries De Maeyer (contributor) -// * Robrecht Cannoodt (maintainer, contributor) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "filter_with_scrublet", - "namespace" : "filter", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Dries De Maeyer", - "roles" : [ - "contributor" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "ddemaeyer@gmail.com", - "github" : "ddemaeyer", - "linkedin" : "dries-de-maeyer-b46a814" - }, - "organizations" : [ - { - "name" : "Janssen Pharmaceuticals", - "href" : "https://www.janssen.com", - "role" : "Principal Scientist" - } - ] - } - }, - { - "name" : "Robrecht Cannoodt", - "roles" : [ - "maintainer", - "contributor" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "robrecht@data-intuitive.com", - "github" : "rcannood", - "orcid" : "0000-0003-3641-729X", - "linkedin" : "robrechtcannoodt" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Science Engineer" - }, - { - "name" : "Open Problems", - "href" : "https://openproblems.bio", - "role" : "Core Member" - } - ] - } - } - ], - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "description" : "Input h5mu file", - "example" : [ - "input.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--modality", - "default" : [ - "rna" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--output", - "description" : "Output h5mu file.", - "example" : [ - "output.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_compression", - "description" : "The compression format to be used on the output h5mu object.", - "example" : [ - "gzip" - ], - "required" : false, - "choices" : [ - "gzip", - "lzf" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--obs_name_filter", - "description" : "In which .obs slot to store a boolean array corresponding to which observations should be filtered out.", - "default" : [ - "filter_with_scrublet" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "boolean_true", - "name" : "--do_subset", - "description" : "Whether to subset before storing the output.", - "direction" : "input", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--obs_name_doublet_score", - "description" : "Name of the doublet scores column in the obs slot of the returned object.", - "default" : [ - "scrublet_doublet_score" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--min_counts", - "description" : "The number of minimal UMI counts per cell that have to be present for initial cell detection.", - "default" : [ - 2 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--min_cells", - "description" : "The number of cells in which UMIs for a gene were detected.", - "default" : [ - 3 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--min_gene_variablity_percent", - "description" : "Used for gene filtering prior to PCA. Keep the most highly variable genes (in the top min_gene_variability_pctl percentile), as measured by the v-statistic [Klein et al., Cell 2015].", - "default" : [ - 85.0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--num_pca_components", - "description" : "Number of principal components to use during PCA dimensionality reduction.", - "default" : [ - 30 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--distance_metric", - "description" : "The distance metric used for computing similarities.", - "default" : [ - "euclidean" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "boolean_true", - "name" : "--allow_automatic_threshold_detection_fail", - "description" : "When scrublet fails to automatically determine the double score threshold, \nallow the component to continue and set the output columns to NA.\n", - "direction" : "input", - "dest" : "par" - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/filter/filter_with_scrublet/" - }, - { - "type" : "file", - "path" : "src/utils/setup_logger.py", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "description" : "Doublet detection using the Scrublet method (Wolock, Lopez and Klein, 2019).\nThe method tests for potential doublets by using the expression profiles of\ncells to generate synthetic potential doubles which are tested against cells. \nThe method returns a \\"doublet score\\" on which it calls for potential doublets.\n\nFor the source code please visit https://github.com/AllonKleinLab/scrublet.\n\nFor 10x we expect the doublet rates to be:\n Multiplet Rate (%) - # of Cells Loaded - # of Cells Recovered\n ~0.4% ~800 ~500\n ~0.8% ~1,600 ~1,000\n ~1.6% ~3,200 ~2,000\n ~2.3% ~4,800 ~3,000\n ~3.1% ~6,400 ~4,000\n ~3.9% ~8,000 ~5,000\n ~4.6% ~9,600 ~6,000\n ~5.4% ~11,200 ~7,000\n ~6.1% ~12,800 ~8,000\n ~6.9% ~14,400 ~9,000\n ~7.6% ~16,000 ~10,000\n", - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/filter/filter_with_scrublet/" - }, - { - "type" : "file", - "path" : "resources_test/pbmc_1k_protein_v3", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "python:3.10-slim", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "run_args" : [ - "--env NUMBA_CACHE_DIR=/tmp" - ], - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "procps", - "build-essential" - ], - "interactive" : false - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "mudata~=0.2.3", - "anndata~=0.9.1", - "scanpy~=1.9.5", - "scrublet", - "annoy==1.16.3" - ], - "upgrade" : true - } - ], - "test_setup" : [ - { - "type" : "python", - "user" : false, - "packages" : [ - "viashpy==0.5.0" - ], - "upgrade" : true - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "highcpu", - "midmem" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/filter/filter_with_scrublet/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/filter/filter_with_scrublet", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -import scrublet as scr -import mudata as mu -import numpy as np -import sys -import pandas as pd - -### VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'obs_name_filter': $( if [ ! -z ${VIASH_PAR_OBS_NAME_FILTER+x} ]; then echo "r'${VIASH_PAR_OBS_NAME_FILTER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'do_subset': $( if [ ! -z ${VIASH_PAR_DO_SUBSET+x} ]; then echo "r'${VIASH_PAR_DO_SUBSET//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), - 'obs_name_doublet_score': $( if [ ! -z ${VIASH_PAR_OBS_NAME_DOUBLET_SCORE+x} ]; then echo "r'${VIASH_PAR_OBS_NAME_DOUBLET_SCORE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'min_counts': $( if [ ! -z ${VIASH_PAR_MIN_COUNTS+x} ]; then echo "int(r'${VIASH_PAR_MIN_COUNTS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'min_cells': $( if [ ! -z ${VIASH_PAR_MIN_CELLS+x} ]; then echo "int(r'${VIASH_PAR_MIN_CELLS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'min_gene_variablity_percent': $( if [ ! -z ${VIASH_PAR_MIN_GENE_VARIABLITY_PERCENT+x} ]; then echo "float(r'${VIASH_PAR_MIN_GENE_VARIABLITY_PERCENT//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'num_pca_components': $( if [ ! -z ${VIASH_PAR_NUM_PCA_COMPONENTS+x} ]; then echo "int(r'${VIASH_PAR_NUM_PCA_COMPONENTS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'distance_metric': $( if [ ! -z ${VIASH_PAR_DISTANCE_METRIC+x} ]; then echo "r'${VIASH_PAR_DISTANCE_METRIC//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'allow_automatic_threshold_detection_fail': $( if [ ! -z ${VIASH_PAR_ALLOW_AUTOMATIC_THRESHOLD_DETECTION_FAIL+x} ]; then echo "r'${VIASH_PAR_ALLOW_AUTOMATIC_THRESHOLD_DETECTION_FAIL//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -### VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -logger.info("Reading %s.", par['input']) -mdata = mu.read_h5mu(par["input"]) - -mod = par["modality"] -logger.info("Processing modality '%s'.", mod) -data = mdata.mod[mod] - -logger.info("\\\\tRunning scrublet") -scrub = scr.Scrublet(data.X) - -doublet_scores, predicted_doublets = scrub.scrub_doublets( - min_counts=par["min_counts"], - min_cells=par["min_cells"], - min_gene_variability_pctl=par["min_gene_variablity_percent"], - n_prin_comps=par["num_pca_components"], - distance_metric=par["distance_metric"], - use_approx_neighbors=False -) - -try: - keep_cells = np.invert(predicted_doublets) -except TypeError: - if par['allow_automatic_threshold_detection_fail']: - # Scrublet might not throw an error and return None if it fails to detect doublets... - logger.info("\\\\tScrublet could not automatically detect the doublet score threshold. Setting output columns to NA.") - keep_cells = np.nan - doublet_scores = np.nan - else: - raise RuntimeError("Scrublet could not automatically detect the doublet score threshold. " - "--allow_automatic_threshold_detection_fail can be used to ignore this failure " - "and set the corresponding output columns to NA.") - -logger.info("\\\\tStoring output into .obs") -if par["obs_name_doublet_score"] is not None: - data.obs[par["obs_name_doublet_score"]] = doublet_scores - data.obs[par["obs_name_doublet_score"]] = data.obs[par["obs_name_doublet_score"]].astype("float64") -if par["obs_name_filter"] is not None: - data.obs[par["obs_name_filter"]] = keep_cells - data.obs[par["obs_name_filter"]] = data.obs[par["obs_name_filter"]].astype(pd.BooleanDtype()) - -if par["do_subset"]: - if pd.api.types.is_scalar(keep_cells) and pd.isna(keep_cells): - logger.warning("Not subsetting beacuse doublets were not predicted") - else: - mdata.mod[mod] = data[keep_cells, :] - -logger.info("Writing h5mu to %s", par["output"]) -mdata.write_h5mu(par["output"], compression=par["output_compression"]) -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/filter_filter_with_scrublet", - "tag" : "0.12.0" - }, - "label" : [ - "highcpu", - "midmem" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/filter/filter_with_scrublet/nextflow.config b/target/nextflow/filter/filter_with_scrublet/nextflow.config deleted file mode 100644 index eaaa6adac90..00000000000 --- a/target/nextflow/filter/filter_with_scrublet/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'filter_with_scrublet' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Doublet detection using the Scrublet method (Wolock, Lopez and Klein, 2019).\nThe method tests for potential doublets by using the expression profiles of\ncells to generate synthetic potential doubles which are tested against cells. \nThe method returns a "doublet score" on which it calls for potential doublets.\n\nFor the source code please visit https://github.com/AllonKleinLab/scrublet.\n\nFor 10x we expect the doublet rates to be:\n Multiplet Rate (%) - # of Cells Loaded - # of Cells Recovered\n ~0.4% ~800 ~500\n ~0.8% ~1,600 ~1,000\n ~1.6% ~3,200 ~2,000\n ~2.3% ~4,800 ~3,000\n ~3.1% ~6,400 ~4,000\n ~3.9% ~8,000 ~5,000\n ~4.6% ~9,600 ~6,000\n ~5.4% ~11,200 ~7,000\n ~6.1% ~12,800 ~8,000\n ~6.9% ~14,400 ~9,000\n ~7.6% ~16,000 ~10,000\n' - author = 'Dries De Maeyer, Robrecht Cannoodt' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/filter/filter_with_scrublet/nextflow_params.yaml b/target/nextflow/filter/filter_with_scrublet/nextflow_params.yaml deleted file mode 100644 index 33af31b238d..00000000000 --- a/target/nextflow/filter/filter_with_scrublet/nextflow_params.yaml +++ /dev/null @@ -1,18 +0,0 @@ -# Arguments -input: # please fill in - example: "input.h5mu" -modality: "rna" -# output: "$id.$key.output.h5mu" -# output_compression: "gzip" -obs_name_filter: "filter_with_scrublet" -do_subset: false -obs_name_doublet_score: "scrublet_doublet_score" -min_counts: 2 -min_cells: 3 -min_gene_variablity_percent: 85 -num_pca_components: 30 -distance_metric: "euclidean" -allow_automatic_threshold_detection_fail: false - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/filter/filter_with_scrublet/nextflow_schema.json b/target/nextflow/filter/filter_with_scrublet/nextflow_schema.json deleted file mode 100644 index ed54b2ce403..00000000000 --- a/target/nextflow/filter/filter_with_scrublet/nextflow_schema.json +++ /dev/null @@ -1,136 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "filter_with_scrublet", - "description": "Doublet detection using the Scrublet method (Wolock, Lopez and Klein, 2019).\nThe method tests for potential doublets by using the expression profiles of\ncells to generate synthetic potential doubles which are tested against cells. \nThe method returns a \"doublet score\" on which it calls for potential doublets.\n\nFor the source code please visit https://github.com/AllonKleinLab/scrublet.\n\nFor 10x we expect the doublet rates to be:\n Multiplet Rate (%) - # of Cells Loaded - # of Cells Recovered\n ~0.4% ~800 ~500\n ~0.8% ~1,600 ~1,000\n ~1.6% ~3,200 ~2,000\n ~2.3% ~4,800 ~3,000\n ~3.1% ~6,400 ~4,000\n ~3.9% ~8,000 ~5,000\n ~4.6% ~9,600 ~6,000\n ~5.4% ~11,200 ~7,000\n ~6.1% ~12,800 ~8,000\n ~6.9% ~14,400 ~9,000\n ~7.6% ~16,000 ~10,000\n", - "type": "object", - "definitions": { - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: `file`, required, example: `input.h5mu`. Input h5mu file", - "help_text": "Type: `file`, required, example: `input.h5mu`. Input h5mu file" - }, - - "modality": { - "type": "string", - "description": "Type: `string`, default: `rna`. ", - "help_text": "Type: `string`, default: `rna`. ", - "default": "rna" - }, - - "output": { - "type": "string", - "description": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file", - "help_text": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file.", - "default": "$id.$key.output.h5mu" - }, - - "output_compression": { - "type": "string", - "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", - "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", - "enum": ["gzip", "lzf"] - - }, - - "obs_name_filter": { - "type": "string", - "description": "Type: `string`, default: `filter_with_scrublet`. In which ", - "help_text": "Type: `string`, default: `filter_with_scrublet`. In which .obs slot to store a boolean array corresponding to which observations should be filtered out.", - "default": "filter_with_scrublet" - }, - - "do_subset": { - "type": "boolean", - "description": "Type: `boolean_true`, default: `false`. Whether to subset before storing the output", - "help_text": "Type: `boolean_true`, default: `false`. Whether to subset before storing the output.", - "default": "False" - }, - - "obs_name_doublet_score": { - "type": "string", - "description": "Type: `string`, default: `scrublet_doublet_score`. Name of the doublet scores column in the obs slot of the returned object", - "help_text": "Type: `string`, default: `scrublet_doublet_score`. Name of the doublet scores column in the obs slot of the returned object.", - "default": "scrublet_doublet_score" - }, - - "min_counts": { - "type": "integer", - "description": "Type: `integer`, default: `2`. The number of minimal UMI counts per cell that have to be present for initial cell detection", - "help_text": "Type: `integer`, default: `2`. The number of minimal UMI counts per cell that have to be present for initial cell detection.", - "default": "2" - }, - - "min_cells": { - "type": "integer", - "description": "Type: `integer`, default: `3`. The number of cells in which UMIs for a gene were detected", - "help_text": "Type: `integer`, default: `3`. The number of cells in which UMIs for a gene were detected.", - "default": "3" - }, - - "min_gene_variablity_percent": { - "type": "number", - "description": "Type: `double`, default: `85`. Used for gene filtering prior to PCA", - "help_text": "Type: `double`, default: `85`. Used for gene filtering prior to PCA. Keep the most highly variable genes (in the top min_gene_variability_pctl percentile), as measured by the v-statistic [Klein et al., Cell 2015].", - "default": "85" - }, - - "num_pca_components": { - "type": "integer", - "description": "Type: `integer`, default: `30`. Number of principal components to use during PCA dimensionality reduction", - "help_text": "Type: `integer`, default: `30`. Number of principal components to use during PCA dimensionality reduction.", - "default": "30" - }, - - "distance_metric": { - "type": "string", - "description": "Type: `string`, default: `euclidean`. The distance metric used for computing similarities", - "help_text": "Type: `string`, default: `euclidean`. The distance metric used for computing similarities.", - "default": "euclidean" - }, - - "allow_automatic_threshold_detection_fail": { - "type": "boolean", - "description": "Type: `boolean_true`, default: `false`. When scrublet fails to automatically determine the double score threshold, \nallow the component to continue and set the output columns to NA", - "help_text": "Type: `boolean_true`, default: `false`. When scrublet fails to automatically determine the double score threshold, \nallow the component to continue and set the output columns to NA.\n", - "default": "False" - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/filter/filter_with_scrublet/setup_logger.py b/target/nextflow/filter/filter_with_scrublet/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/nextflow/filter/filter_with_scrublet/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/nextflow/filter/remove_modality/.config.vsh.yaml b/target/nextflow/filter/remove_modality/.config.vsh.yaml deleted file mode 100644 index 98d5c843268..00000000000 --- a/target/nextflow/filter/remove_modality/.config.vsh.yaml +++ /dev/null @@ -1,171 +0,0 @@ -functionality: - name: "remove_modality" - namespace: "filter" - version: "0.12.3" - authors: - - name: "Dries Schaumont" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - arguments: - - type: "file" - name: "--input" - description: "Input h5mu file" - info: null - example: - - "input.h5mu" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - required: true - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - description: "Output h5mu file." - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - description: "Remove a modality from a .h5mu file\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.9-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "singlecpu" - - "lowmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/filter/remove_modality/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/filter/remove_modality" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/filter/remove_modality/remove_modality" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/filter/remove_modality/main.nf b/target/nextflow/filter/remove_modality/main.nf deleted file mode 100644 index 1d98b535470..00000000000 --- a/target/nextflow/filter/remove_modality/main.nf +++ /dev/null @@ -1,2550 +0,0 @@ -// remove_modality 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Dries Schaumont (maintainer) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "remove_modality", - "namespace" : "filter", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Dries Schaumont", - "roles" : [ - "maintainer" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "dries@data-intuitive.com", - "github" : "DriesSchaumont", - "orcid" : "0000-0002-4389-0440", - "linkedin" : "dries-schaumont" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Scientist" - } - ] - } - } - ], - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "description" : "Input h5mu file", - "example" : [ - "input.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--modality", - "required" : true, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--output", - "description" : "Output h5mu file.", - "example" : [ - "output.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_compression", - "description" : "The compression format to be used on the output h5mu object.", - "example" : [ - "gzip" - ], - "required" : false, - "choices" : [ - "gzip", - "lzf" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/filter/remove_modality/" - } - ], - "description" : "Remove a modality from a .h5mu file\n", - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/filter/remove_modality/" - }, - { - "type" : "file", - "path" : "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "python:3.9-slim", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "procps" - ], - "interactive" : false - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "mudata~=0.2.3", - "anndata~=0.9.1" - ], - "upgrade" : true - } - ], - "test_setup" : [ - { - "type" : "python", - "user" : false, - "packages" : [ - "viashpy==0.5.0" - ], - "upgrade" : true - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "singlecpu", - "lowmem" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/filter/remove_modality/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/filter/remove_modality", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -from mudata import read_h5mu, MuData - - -### VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'.split(':')"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -### VIASH END - - -input_mudata = read_h5mu(par['input']) -new_mods = {mod_name: mod for mod_name, mod - in input_mudata.mod.items() - if mod_name not in par['modality']} - -new_mudata = MuData(new_mods) -new_mudata.write_h5mu(filename=par["output"], compression=par["output_compression"]) -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/filter_remove_modality", - "tag" : "0.12.0" - }, - "label" : [ - "singlecpu", - "lowmem" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/filter/remove_modality/nextflow.config b/target/nextflow/filter/remove_modality/nextflow.config deleted file mode 100644 index 9b3ea4864fc..00000000000 --- a/target/nextflow/filter/remove_modality/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'remove_modality' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Remove a modality from a .h5mu file\n' - author = 'Dries Schaumont' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/filter/remove_modality/nextflow_params.yaml b/target/nextflow/filter/remove_modality/nextflow_params.yaml deleted file mode 100644 index 886189a0005..00000000000 --- a/target/nextflow/filter/remove_modality/nextflow_params.yaml +++ /dev/null @@ -1,9 +0,0 @@ -# Arguments -input: # please fill in - example: "input.h5mu" -modality: # please fill in - example: ["foo"] -# output: "$id.$key.output.h5mu" -# output_compression: "gzip" - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/filter/remove_modality/nextflow_schema.json b/target/nextflow/filter/remove_modality/nextflow_schema.json deleted file mode 100644 index e136f142cfe..00000000000 --- a/target/nextflow/filter/remove_modality/nextflow_schema.json +++ /dev/null @@ -1,72 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "remove_modality", - "description": "Remove a modality from a .h5mu file\n", - "type": "object", - "definitions": { - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: `file`, required, example: `input.h5mu`. Input h5mu file", - "help_text": "Type: `file`, required, example: `input.h5mu`. Input h5mu file" - }, - - "modality": { - "type": "string", - "description": "Type: List of `string`, required, multiple_sep: `\":\"`. ", - "help_text": "Type: List of `string`, required, multiple_sep: `\":\"`. " - }, - - "output": { - "type": "string", - "description": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file", - "help_text": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file.", - "default": "$id.$key.output.h5mu" - }, - - "output_compression": { - "type": "string", - "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", - "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", - "enum": ["gzip", "lzf"] - - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/filter/subset_h5mu/.config.vsh.yaml b/target/nextflow/filter/subset_h5mu/.config.vsh.yaml deleted file mode 100644 index cdfe7d13abb..00000000000 --- a/target/nextflow/filter/subset_h5mu/.config.vsh.yaml +++ /dev/null @@ -1,187 +0,0 @@ -functionality: - name: "subset_h5mu" - namespace: "filter" - version: "0.12.3" - authors: - - name: "Dries Schaumont" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - arguments: - - type: "file" - name: "--input" - description: "Input h5mu file" - info: null - example: - - "input.h5mu" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - description: "Output h5mu file." - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--number_of_observations" - description: "Number of observations to be selected from the h5mu file." - info: null - example: - - 5 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Create a subset of a mudata file by selecting the first number of\ - \ observations\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.9-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "singlecpu" - - "lowmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/filter/subset_h5mu/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/filter/subset_h5mu" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/filter/subset_h5mu/subset_h5mu" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/filter/subset_h5mu/main.nf b/target/nextflow/filter/subset_h5mu/main.nf deleted file mode 100644 index 0def3d04067..00000000000 --- a/target/nextflow/filter/subset_h5mu/main.nf +++ /dev/null @@ -1,2575 +0,0 @@ -// subset_h5mu 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Dries Schaumont (maintainer) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "subset_h5mu", - "namespace" : "filter", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Dries Schaumont", - "roles" : [ - "maintainer" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "dries@data-intuitive.com", - "github" : "DriesSchaumont", - "orcid" : "0000-0002-4389-0440", - "linkedin" : "dries-schaumont" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Scientist" - } - ] - } - } - ], - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "description" : "Input h5mu file", - "example" : [ - "input.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--modality", - "default" : [ - "rna" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--output", - "description" : "Output h5mu file.", - "example" : [ - "output.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_compression", - "description" : "The compression format to be used on the output h5mu object.", - "example" : [ - "gzip" - ], - "required" : false, - "choices" : [ - "gzip", - "lzf" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--number_of_observations", - "description" : "Number of observations to be selected from the h5mu file.", - "example" : [ - 5 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/filter/subset_h5mu/" - }, - { - "type" : "file", - "path" : "src/utils/setup_logger.py", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "description" : "Create a subset of a mudata file by selecting the first number of observations\n", - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/filter/subset_h5mu/" - }, - { - "type" : "file", - "path" : "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "python:3.9-slim", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "procps" - ], - "interactive" : false - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "mudata~=0.2.3", - "anndata~=0.9.1" - ], - "upgrade" : true - } - ], - "test_setup" : [ - { - "type" : "python", - "user" : false, - "packages" : [ - "viashpy==0.5.0" - ], - "upgrade" : true - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "singlecpu", - "lowmem" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/filter/subset_h5mu/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/filter/subset_h5mu", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -import mudata - -### VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'number_of_observations': $( if [ ! -z ${VIASH_PAR_NUMBER_OF_OBSERVATIONS+x} ]; then echo "int(r'${VIASH_PAR_NUMBER_OF_OBSERVATIONS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -### VIASH END - -if __name__ == "__main__": - # read data - data = mudata.read(par["input"]) - - # subset data - if par["modality"]: - data.mod[par["modality"]] = data.mod[par["modality"]][:par["number_of_observations"]] - else: - data = data[:par["number_of_observations"]] - - # write data - data.write_h5mu(par["output"], compression=par["output_compression"]) -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/filter_subset_h5mu", - "tag" : "0.12.0" - }, - "label" : [ - "singlecpu", - "lowmem" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/filter/subset_h5mu/nextflow.config b/target/nextflow/filter/subset_h5mu/nextflow.config deleted file mode 100644 index 5c5245bb5ad..00000000000 --- a/target/nextflow/filter/subset_h5mu/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'subset_h5mu' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Create a subset of a mudata file by selecting the first number of observations\n' - author = 'Dries Schaumont' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/filter/subset_h5mu/nextflow_params.yaml b/target/nextflow/filter/subset_h5mu/nextflow_params.yaml deleted file mode 100644 index 8300904f6c0..00000000000 --- a/target/nextflow/filter/subset_h5mu/nextflow_params.yaml +++ /dev/null @@ -1,10 +0,0 @@ -# Arguments -input: # please fill in - example: "input.h5mu" -modality: "rna" -# output: "$id.$key.output.h5mu" -# output_compression: "gzip" -# number_of_observations: 5 - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/filter/subset_h5mu/nextflow_schema.json b/target/nextflow/filter/subset_h5mu/nextflow_schema.json deleted file mode 100644 index b75335fbeae..00000000000 --- a/target/nextflow/filter/subset_h5mu/nextflow_schema.json +++ /dev/null @@ -1,79 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "subset_h5mu", - "description": "Create a subset of a mudata file by selecting the first number of observations\n", - "type": "object", - "definitions": { - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: `file`, required, example: `input.h5mu`. Input h5mu file", - "help_text": "Type: `file`, required, example: `input.h5mu`. Input h5mu file" - }, - - "modality": { - "type": "string", - "description": "Type: `string`, default: `rna`. ", - "help_text": "Type: `string`, default: `rna`. ", - "default": "rna" - }, - - "output": { - "type": "string", - "description": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file", - "help_text": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file.", - "default": "$id.$key.output.h5mu" - }, - - "output_compression": { - "type": "string", - "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", - "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", - "enum": ["gzip", "lzf"] - - }, - - "number_of_observations": { - "type": "integer", - "description": "Type: `integer`, example: `5`. Number of observations to be selected from the h5mu file", - "help_text": "Type: `integer`, example: `5`. Number of observations to be selected from the h5mu file." - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/filter/subset_h5mu/setup_logger.py b/target/nextflow/filter/subset_h5mu/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/nextflow/filter/subset_h5mu/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/nextflow/integrate/harmonypy/.config.vsh.yaml b/target/nextflow/integrate/harmonypy/.config.vsh.yaml deleted file mode 100644 index 14fe5ee1c33..00000000000 --- a/target/nextflow/integrate/harmonypy/.config.vsh.yaml +++ /dev/null @@ -1,240 +0,0 @@ -functionality: - name: "harmonypy" - namespace: "integrate" - version: "0.12.3" - authors: - - name: "Dries Schaumont" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - - name: "Robrecht Cannoodt" - roles: - - "contributor" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input h5mu file" - info: null - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output h5mu file." - info: null - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obsm_input" - description: "Which .obsm slot to use as a starting PCA embedding." - info: null - default: - - "X_pca" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obsm_output" - description: "In which .obsm slot to store the resulting integrated embedding." - info: null - default: - - "X_pca_integrated" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--theta" - description: "Diversity clustering penalty parameter. Specify for each variable\ - \ in group.by.vars. theta=0 does not encourage any diversity. Larger values\ - \ of theta result in more diverse clusters." - info: null - default: - - 2.0 - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_covariates" - description: "The .obs field(s) that define the covariate(s) to regress out." - info: null - example: - - "batch" - - "sample" - required: true - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - description: "Performs Harmony integration based as described in https://github.com/immunogenomics/harmony.\ - \ Based on an implementation in python from https://github.com/slowkow/harmonypy" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "scanpy~=1.9.5" - - "harmonypy~=0.0.6" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highmem" - - "highcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/integrate/harmonypy/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/integrate/harmonypy" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/integrate/harmonypy/harmonypy" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/integrate/harmonypy/main.nf b/target/nextflow/integrate/harmonypy/main.nf deleted file mode 100644 index 8fd0689c681..00000000000 --- a/target/nextflow/integrate/harmonypy/main.nf +++ /dev/null @@ -1,2646 +0,0 @@ -// harmonypy 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Dries Schaumont (maintainer) -// * Robrecht Cannoodt (contributor) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "harmonypy", - "namespace" : "integrate", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Dries Schaumont", - "roles" : [ - "maintainer" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "dries@data-intuitive.com", - "github" : "DriesSchaumont", - "orcid" : "0000-0002-4389-0440", - "linkedin" : "dries-schaumont" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Scientist" - } - ] - } - }, - { - "name" : "Robrecht Cannoodt", - "roles" : [ - "contributor" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "robrecht@data-intuitive.com", - "github" : "rcannood", - "orcid" : "0000-0003-3641-729X", - "linkedin" : "robrechtcannoodt" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Science Engineer" - }, - { - "name" : "Open Problems", - "href" : "https://openproblems.bio", - "role" : "Core Member" - } - ] - } - } - ], - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "alternatives" : [ - "-i" - ], - "description" : "Input h5mu file", - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--output", - "alternatives" : [ - "-o" - ], - "description" : "Output h5mu file.", - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_compression", - "description" : "The compression format to be used on the output h5mu object.", - "example" : [ - "gzip" - ], - "required" : false, - "choices" : [ - "gzip", - "lzf" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--modality", - "default" : [ - "rna" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--obsm_input", - "description" : "Which .obsm slot to use as a starting PCA embedding.", - "default" : [ - "X_pca" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--obsm_output", - "description" : "In which .obsm slot to store the resulting integrated embedding.", - "default" : [ - "X_pca_integrated" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--theta", - "description" : "Diversity clustering penalty parameter. Specify for each variable in group.by.vars. theta=0 does not encourage any diversity. Larger values of theta result in more diverse clusters.", - "default" : [ - 2.0 - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--obs_covariates", - "description" : "The .obs field(s) that define the covariate(s) to regress out.", - "example" : [ - "batch", - "sample" - ], - "required" : true, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ":", - "dest" : "par" - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/integrate/harmonypy/" - } - ], - "description" : "Performs Harmony integration based as described in https://github.com/immunogenomics/harmony. Based on an implementation in python from https://github.com/slowkow/harmonypy", - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/integrate/harmonypy/" - }, - { - "type" : "file", - "path" : "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "python:3.10-slim", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "procps" - ], - "interactive" : false - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "mudata~=0.2.3", - "anndata~=0.9.1", - "scanpy~=1.9.5", - "harmonypy~=0.0.6" - ], - "upgrade" : true - } - ], - "test_setup" : [ - { - "type" : "python", - "user" : false, - "packages" : [ - "viashpy==0.5.0" - ], - "upgrade" : true - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "highmem", - "highcpu" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/integrate/harmonypy/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/integrate/harmonypy", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -import mudata -from harmonypy import run_harmony - - -### VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'obsm_input': $( if [ ! -z ${VIASH_PAR_OBSM_INPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'obsm_output': $( if [ ! -z ${VIASH_PAR_OBSM_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'theta': $( if [ ! -z ${VIASH_PAR_THETA+x} ]; then echo "list(map(float, r'${VIASH_PAR_THETA//\\'/\\'\\"\\'\\"r\\'}'.split(':')))"; else echo None; fi ), - 'obs_covariates': $( if [ ! -z ${VIASH_PAR_OBS_COVARIATES+x} ]; then echo "r'${VIASH_PAR_OBS_COVARIATES//\\'/\\'\\"\\'\\"r\\'}'.split(':')"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -### VIASH END - - -def main(): - mdata = mudata.read(par["input"].strip()) - mod_name = par['modality'] - mod = mdata.mod[mod_name] - pca_embedding = mod.obsm[par['obsm_input']] - metadata = mod.obs - ho = run_harmony(pca_embedding, metadata, par['obs_covariates'], theta=par['theta']) - mod.obsm[par["obsm_output"]] = ho.Z_corr.T - mdata.write_h5mu(par['output'].strip(), compression=par["output_compression"]) - -if __name__ == "__main__": - main() -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/integrate_harmonypy", - "tag" : "0.12.0" - }, - "label" : [ - "highmem", - "highcpu" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/integrate/harmonypy/nextflow.config b/target/nextflow/integrate/harmonypy/nextflow.config deleted file mode 100644 index 115cc75021f..00000000000 --- a/target/nextflow/integrate/harmonypy/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'harmonypy' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Performs Harmony integration based as described in https://github.com/immunogenomics/harmony. Based on an implementation in python from https://github.com/slowkow/harmonypy' - author = 'Dries Schaumont, Robrecht Cannoodt' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/integrate/harmonypy/nextflow_params.yaml b/target/nextflow/integrate/harmonypy/nextflow_params.yaml deleted file mode 100644 index 011b93747e1..00000000000 --- a/target/nextflow/integrate/harmonypy/nextflow_params.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# Arguments -input: # please fill in - example: "path/to/file" -# output: "$id.$key.output.output" -# output_compression: "gzip" -modality: "rna" -obsm_input: "X_pca" -obsm_output: "X_pca_integrated" -theta: [2] -obs_covariates: # please fill in - example: ["batch", "sample"] - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/integrate/harmonypy/nextflow_schema.json b/target/nextflow/integrate/harmonypy/nextflow_schema.json deleted file mode 100644 index b46b9ac7451..00000000000 --- a/target/nextflow/integrate/harmonypy/nextflow_schema.json +++ /dev/null @@ -1,100 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "harmonypy", - "description": "Performs Harmony integration based as described in https://github.com/immunogenomics/harmony. Based on an implementation in python from https://github.com/slowkow/harmonypy", - "type": "object", - "definitions": { - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: `file`, required. Input h5mu file", - "help_text": "Type: `file`, required. Input h5mu file" - }, - - "output": { - "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.output`. Output h5mu file", - "help_text": "Type: `file`, required, default: `$id.$key.output.output`. Output h5mu file.", - "default": "$id.$key.output.output" - }, - - "output_compression": { - "type": "string", - "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", - "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", - "enum": ["gzip", "lzf"] - - }, - - "modality": { - "type": "string", - "description": "Type: `string`, default: `rna`. ", - "help_text": "Type: `string`, default: `rna`. ", - "default": "rna" - }, - - "obsm_input": { - "type": "string", - "description": "Type: `string`, default: `X_pca`. Which ", - "help_text": "Type: `string`, default: `X_pca`. Which .obsm slot to use as a starting PCA embedding.", - "default": "X_pca" - }, - - "obsm_output": { - "type": "string", - "description": "Type: `string`, default: `X_pca_integrated`. In which ", - "help_text": "Type: `string`, default: `X_pca_integrated`. In which .obsm slot to store the resulting integrated embedding.", - "default": "X_pca_integrated" - }, - - "theta": { - "type": "string", - "description": "Type: List of `double`, default: `2`, multiple_sep: `\":\"`. Diversity clustering penalty parameter", - "help_text": "Type: List of `double`, default: `2`, multiple_sep: `\":\"`. Diversity clustering penalty parameter. Specify for each variable in group.by.vars. theta=0 does not encourage any diversity. Larger values of theta result in more diverse clusters.", - "default": "2" - }, - - "obs_covariates": { - "type": "string", - "description": "Type: List of `string`, required, example: `batch:sample`, multiple_sep: `\":\"`. The ", - "help_text": "Type: List of `string`, required, example: `batch:sample`, multiple_sep: `\":\"`. The .obs field(s) that define the covariate(s) to regress out." - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/integrate/scanorama/.config.vsh.yaml b/target/nextflow/integrate/scanorama/.config.vsh.yaml deleted file mode 100644 index 556f08c99b6..00000000000 --- a/target/nextflow/integrate/scanorama/.config.vsh.yaml +++ /dev/null @@ -1,283 +0,0 @@ -functionality: - name: "scanorama" - namespace: "integrate" - version: "0.12.3" - authors: - - name: "Dries De Maeyer" - roles: - - "author" - info: - role: "Core Team Member" - links: - email: "ddemaeyer@gmail.com" - github: "ddemaeyer" - linkedin: "dries-de-maeyer-b46a814" - organizations: - - name: "Janssen Pharmaceuticals" - href: "https://www.janssen.com" - role: "Principal Scientist" - - name: "Dries Schaumont" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input h5mu file" - info: null - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output .h5mu file" - info: null - default: - - "output.h5ad" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_batch" - description: "Column name discriminating between your batches." - info: null - default: - - "batch" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obsm_input" - description: "Basis obsm slot to run scanorama on." - info: null - default: - - "X_pca" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obsm_output" - description: "The name of the field in adata.obsm where the integrated embeddings\ - \ will be stored after running this function. Defaults to X_scanorama." - info: null - default: - - "X_scanorama" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--knn" - description: "Number of nearest neighbors to use for matching." - info: null - default: - - 20 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--batch_size" - description: "The batch size used in the alignment vector computation. Useful\ - \ when integrating very large (>100k samples) datasets. Set to large value that\ - \ runs within available memory." - info: null - default: - - 5000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--sigma" - description: "Correction smoothing parameter on Gaussian kernel." - info: null - default: - - 15.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean" - name: "--approx" - description: "Use approximate nearest neighbors with Python annoy; greatly speeds\ - \ up matching runtime." - info: null - default: - - true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--alpha" - description: "Alignment score minimum cutoff" - info: null - default: - - 0.1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - description: "Use Scanorama to integrate different experiments.\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.9-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - - "build-essential" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "scanpy~=1.9.5" - - "scanorama" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "midcpu" - - "highmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/integrate/scanorama/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/integrate/scanorama" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/integrate/scanorama/scanorama" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/integrate/scanorama/main.nf b/target/nextflow/integrate/scanorama/main.nf deleted file mode 100644 index ba8863ed99a..00000000000 --- a/target/nextflow/integrate/scanorama/main.nf +++ /dev/null @@ -1,2702 +0,0 @@ -// scanorama 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Dries De Maeyer (author) -// * Dries Schaumont (maintainer) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "scanorama", - "namespace" : "integrate", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Dries De Maeyer", - "roles" : [ - "author" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "ddemaeyer@gmail.com", - "github" : "ddemaeyer", - "linkedin" : "dries-de-maeyer-b46a814" - }, - "organizations" : [ - { - "name" : "Janssen Pharmaceuticals", - "href" : "https://www.janssen.com", - "role" : "Principal Scientist" - } - ] - } - }, - { - "name" : "Dries Schaumont", - "roles" : [ - "maintainer" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "dries@data-intuitive.com", - "github" : "DriesSchaumont", - "orcid" : "0000-0002-4389-0440", - "linkedin" : "dries-schaumont" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Scientist" - } - ] - } - } - ], - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "alternatives" : [ - "-i" - ], - "description" : "Input h5mu file", - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--modality", - "default" : [ - "rna" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--output", - "alternatives" : [ - "-o" - ], - "description" : "Output .h5mu file", - "default" : [ - "output.h5ad" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_compression", - "description" : "The compression format to be used on the output h5mu object.", - "example" : [ - "gzip" - ], - "required" : false, - "choices" : [ - "gzip", - "lzf" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--obs_batch", - "description" : "Column name discriminating between your batches.", - "default" : [ - "batch" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--obsm_input", - "description" : "Basis obsm slot to run scanorama on.", - "default" : [ - "X_pca" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--obsm_output", - "description" : "The name of the field in adata.obsm where the integrated embeddings will be stored after running this function. Defaults to X_scanorama.", - "default" : [ - "X_scanorama" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--knn", - "description" : "Number of nearest neighbors to use for matching.", - "default" : [ - 20 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--batch_size", - "description" : "The batch size used in the alignment vector computation. Useful when integrating very large (>100k samples) datasets. Set to large value that runs within available memory.", - "default" : [ - 5000 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--sigma", - "description" : "Correction smoothing parameter on Gaussian kernel.", - "default" : [ - 15.0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "boolean", - "name" : "--approx", - "description" : "Use approximate nearest neighbors with Python annoy; greatly speeds up matching runtime.", - "default" : [ - true - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--alpha", - "description" : "Alignment score minimum cutoff", - "default" : [ - 0.1 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/integrate/scanorama/" - } - ], - "description" : "Use Scanorama to integrate different experiments.\n", - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/integrate/scanorama/" - }, - { - "type" : "file", - "path" : "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "python:3.9-slim", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "procps", - "build-essential" - ], - "interactive" : false - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "mudata~=0.2.3", - "anndata~=0.9.1", - "scanpy~=1.9.5", - "scanorama" - ], - "upgrade" : true - } - ], - "test_setup" : [ - { - "type" : "python", - "user" : false, - "packages" : [ - "viashpy==0.5.0" - ], - "upgrade" : true - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "midcpu", - "highmem" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/integrate/scanorama/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/integrate/scanorama", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -### VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'obs_batch': $( if [ ! -z ${VIASH_PAR_OBS_BATCH+x} ]; then echo "r'${VIASH_PAR_OBS_BATCH//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'obsm_input': $( if [ ! -z ${VIASH_PAR_OBSM_INPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'obsm_output': $( if [ ! -z ${VIASH_PAR_OBSM_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'knn': $( if [ ! -z ${VIASH_PAR_KNN+x} ]; then echo "int(r'${VIASH_PAR_KNN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'batch_size': $( if [ ! -z ${VIASH_PAR_BATCH_SIZE+x} ]; then echo "int(r'${VIASH_PAR_BATCH_SIZE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'sigma': $( if [ ! -z ${VIASH_PAR_SIGMA+x} ]; then echo "float(r'${VIASH_PAR_SIGMA//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'approx': $( if [ ! -z ${VIASH_PAR_APPROX+x} ]; then echo "r'${VIASH_PAR_APPROX//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), - 'alpha': $( if [ ! -z ${VIASH_PAR_ALPHA+x} ]; then echo "float(r'${VIASH_PAR_ALPHA//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -### VIASH END - -from scanpy.external.pp import scanorama_integrate -from mudata import read_h5mu - -mdata = read_h5mu(par["input"]) - -mod_name = par["modality"] -mod = mdata.mod[mod_name] - -# Integration. -scanorama_integrate(mod, - key=par["obs_batch"], - basis=par["obsm_input"], - adjusted_basis=par["obsm_output"], - knn=par["knn"], - alpha=par["alpha"], - sigma=par["sigma"], - approx=par["approx"], - batch_size=par["batch_size"] ) - -mdata.write_h5mu(par["output"], compression=par["output_compression"]) -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/integrate_scanorama", - "tag" : "0.12.0" - }, - "label" : [ - "midcpu", - "highmem" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/integrate/scanorama/nextflow.config b/target/nextflow/integrate/scanorama/nextflow.config deleted file mode 100644 index 543c17e00db..00000000000 --- a/target/nextflow/integrate/scanorama/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'scanorama' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Use Scanorama to integrate different experiments.\n' - author = 'Dries De Maeyer, Dries Schaumont' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/integrate/scanorama/nextflow_params.yaml b/target/nextflow/integrate/scanorama/nextflow_params.yaml deleted file mode 100644 index 0a9714d0cc5..00000000000 --- a/target/nextflow/integrate/scanorama/nextflow_params.yaml +++ /dev/null @@ -1,17 +0,0 @@ -# Arguments -input: # please fill in - example: "path/to/file" -modality: "rna" -# output: "$id.$key.output.h5ad" -# output_compression: "gzip" -obs_batch: "batch" -obsm_input: "X_pca" -obsm_output: "X_scanorama" -knn: 20 -batch_size: 5000 -sigma: 15 -approx: true -alpha: 0.1 - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/integrate/scanorama/nextflow_schema.json b/target/nextflow/integrate/scanorama/nextflow_schema.json deleted file mode 100644 index 9ff3afff470..00000000000 --- a/target/nextflow/integrate/scanorama/nextflow_schema.json +++ /dev/null @@ -1,129 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "scanorama", - "description": "Use Scanorama to integrate different experiments.\n", - "type": "object", - "definitions": { - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: `file`, required. Input h5mu file", - "help_text": "Type: `file`, required. Input h5mu file" - }, - - "modality": { - "type": "string", - "description": "Type: `string`, default: `rna`. ", - "help_text": "Type: `string`, default: `rna`. ", - "default": "rna" - }, - - "output": { - "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.h5ad`. Output ", - "help_text": "Type: `file`, required, default: `$id.$key.output.h5ad`. Output .h5mu file", - "default": "$id.$key.output.h5ad" - }, - - "output_compression": { - "type": "string", - "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", - "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", - "enum": ["gzip", "lzf"] - - }, - - "obs_batch": { - "type": "string", - "description": "Type: `string`, default: `batch`. Column name discriminating between your batches", - "help_text": "Type: `string`, default: `batch`. Column name discriminating between your batches.", - "default": "batch" - }, - - "obsm_input": { - "type": "string", - "description": "Type: `string`, default: `X_pca`. Basis obsm slot to run scanorama on", - "help_text": "Type: `string`, default: `X_pca`. Basis obsm slot to run scanorama on.", - "default": "X_pca" - }, - - "obsm_output": { - "type": "string", - "description": "Type: `string`, default: `X_scanorama`. The name of the field in adata", - "help_text": "Type: `string`, default: `X_scanorama`. The name of the field in adata.obsm where the integrated embeddings will be stored after running this function. Defaults to X_scanorama.", - "default": "X_scanorama" - }, - - "knn": { - "type": "integer", - "description": "Type: `integer`, default: `20`. Number of nearest neighbors to use for matching", - "help_text": "Type: `integer`, default: `20`. Number of nearest neighbors to use for matching.", - "default": "20" - }, - - "batch_size": { - "type": "integer", - "description": "Type: `integer`, default: `5000`. The batch size used in the alignment vector computation", - "help_text": "Type: `integer`, default: `5000`. The batch size used in the alignment vector computation. Useful when integrating very large (\u003e100k samples) datasets. Set to large value that runs within available memory.", - "default": "5000" - }, - - "sigma": { - "type": "number", - "description": "Type: `double`, default: `15`. Correction smoothing parameter on Gaussian kernel", - "help_text": "Type: `double`, default: `15`. Correction smoothing parameter on Gaussian kernel.", - "default": "15" - }, - - "approx": { - "type": "boolean", - "description": "Type: `boolean`, default: `true`. Use approximate nearest neighbors with Python annoy; greatly speeds up matching runtime", - "help_text": "Type: `boolean`, default: `true`. Use approximate nearest neighbors with Python annoy; greatly speeds up matching runtime.", - "default": "True" - }, - - "alpha": { - "type": "number", - "description": "Type: `double`, default: `0.1`. Alignment score minimum cutoff", - "help_text": "Type: `double`, default: `0.1`. Alignment score minimum cutoff", - "default": "0.1" - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/integrate/scarches/.config.vsh.yaml b/target/nextflow/integrate/scarches/.config.vsh.yaml deleted file mode 100644 index dd77332a214..00000000000 --- a/target/nextflow/integrate/scarches/.config.vsh.yaml +++ /dev/null @@ -1,331 +0,0 @@ -functionality: - name: "scarches" - namespace: "integrate" - version: "0.12.3" - authors: - - name: "Vladimir Shitov" - info: - role: "Contributor" - links: - email: "vladimir.shitov@helmholtz-muenchen.de" - github: "vladimirshitov" - orcid: "0000-0002-1960-8812" - linkedin: "vladimir-shitov-9a659513b" - organizations: - - name: "Helmholtz Munich" - href: "https://www.helmholtz-munich.de" - role: "PhD Candidate" - argument_groups: - - name: "Inputs" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input h5mu file to use as a query" - info: null - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--reference" - alternatives: - - "-r" - description: "Path to the directory with reference model or a web link. For\ - \ HLCA use https://zenodo.org/record/6337966/files/HLCA_reference_model.zip" - info: null - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--dataset_name" - description: "Name of query dataset to use as a batch name. If not set, name\ - \ of the input file is used" - info: null - default: - - "test_dataset" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Outputs" - arguments: - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output h5mu file." - info: null - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--model_output" - description: "Output directory for model" - info: null - default: - - "model" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obsm_output" - description: "In which .obsm slot to store the resulting integrated embedding." - info: null - default: - - "X_integrated_scanvi" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Early stopping arguments" - arguments: - - type: "boolean" - name: "--early_stopping" - description: "Whether to perform early stopping with respect to the validation\ - \ set." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--early_stopping_monitor" - description: "Metric logged during validation set epoch." - info: null - default: - - "elbo_validation" - required: false - choices: - - "elbo_validation" - - "reconstruction_loss_validation" - - "kl_local_validation" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--early_stopping_patience" - description: "Number of validation epochs with no improvement after which training\ - \ will be stopped." - info: null - default: - - 45 - required: false - min: 1 - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--early_stopping_min_delta" - description: "Minimum change in the monitored quantity to qualify as an improvement,\ - \ i.e. an absolute change of less than min_delta, will count as no improvement." - info: null - default: - - 0.0 - required: false - min: 0.0 - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Learning parameters" - arguments: - - type: "integer" - name: "--max_epochs" - description: "Number of passes through the dataset, defaults to (20000 / number\ - \ of cells) * 400 or 400; whichever is smallest." - info: null - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean" - name: "--reduce_lr_on_plateau" - description: "Whether to monitor validation loss and reduce learning rate when\ - \ validation set `lr_scheduler_metric` plateaus." - info: null - default: - - true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--lr_factor" - description: "Factor to reduce learning rate." - info: null - default: - - 0.6 - required: false - min: 0.0 - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--lr_patience" - description: "Number of epochs with no improvement after which learning rate\ - \ will be reduced." - info: null - default: - - 30.0 - required: false - min: 0.0 - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Performs reference mapping with scArches" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu" - - type: "file" - path: "resources_test/HLCA_reference_model/HLCA_reference_model.zip" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "nvcr.io/nvidia/pytorch:23.09-py3" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "scvi-tools~=1.0.3" - - "pandas~=2.1.0" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highmem" - - "highcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -- type: "native" - id: "native" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/integrate/scarches/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/integrate/scarches" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/integrate/scarches/scarches" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/integrate/scarches/main.nf b/target/nextflow/integrate/scarches/main.nf deleted file mode 100644 index 4b928080a66..00000000000 --- a/target/nextflow/integrate/scarches/main.nf +++ /dev/null @@ -1,2962 +0,0 @@ -// scarches 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Vladimir Shitov - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "scarches", - "namespace" : "integrate", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Vladimir Shitov", - "info" : { - "role" : "Contributor", - "links" : { - "email" : "vladimir.shitov@helmholtz-muenchen.de", - "github" : "vladimirshitov", - "orcid" : "0000-0002-1960-8812", - "linkedin" : "vladimir-shitov-9a659513b" - }, - "organizations" : [ - { - "name" : "Helmholtz Munich", - "href" : "https://www.helmholtz-munich.de", - "role" : "PhD Candidate" - } - ] - } - } - ], - "argument_groups" : [ - { - "name" : "Inputs", - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "alternatives" : [ - "-i" - ], - "description" : "Input h5mu file to use as a query", - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--modality", - "default" : [ - "rna" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--reference", - "alternatives" : [ - "-r" - ], - "description" : "Path to the directory with reference model or a web link. For HLCA use https://zenodo.org/record/6337966/files/HLCA_reference_model.zip", - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--dataset_name", - "description" : "Name of query dataset to use as a batch name. If not set, name of the input file is used", - "default" : [ - "test_dataset" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Outputs", - "arguments" : [ - { - "type" : "file", - "name" : "--output", - "alternatives" : [ - "-o" - ], - "description" : "Output h5mu file.", - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_compression", - "description" : "The compression format to be used on the output h5mu object.", - "example" : [ - "gzip" - ], - "required" : false, - "choices" : [ - "gzip", - "lzf" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--model_output", - "description" : "Output directory for model", - "default" : [ - "model" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--obsm_output", - "description" : "In which .obsm slot to store the resulting integrated embedding.", - "default" : [ - "X_integrated_scanvi" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Early stopping arguments", - "arguments" : [ - { - "type" : "boolean", - "name" : "--early_stopping", - "description" : "Whether to perform early stopping with respect to the validation set.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--early_stopping_monitor", - "description" : "Metric logged during validation set epoch.", - "default" : [ - "elbo_validation" - ], - "required" : false, - "choices" : [ - "elbo_validation", - "reconstruction_loss_validation", - "kl_local_validation" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--early_stopping_patience", - "description" : "Number of validation epochs with no improvement after which training will be stopped.", - "default" : [ - 45 - ], - "required" : false, - "min" : 1, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--early_stopping_min_delta", - "description" : "Minimum change in the monitored quantity to qualify as an improvement, i.e. an absolute change of less than min_delta, will count as no improvement.", - "default" : [ - 0.0 - ], - "required" : false, - "min" : 0.0, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Learning parameters", - "arguments" : [ - { - "type" : "integer", - "name" : "--max_epochs", - "description" : "Number of passes through the dataset, defaults to (20000 / number of cells) * 400 or 400; whichever is smallest.", - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "boolean", - "name" : "--reduce_lr_on_plateau", - "description" : "Whether to monitor validation loss and reduce learning rate when validation set `lr_scheduler_metric` plateaus.", - "default" : [ - true - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--lr_factor", - "description" : "Factor to reduce learning rate.", - "default" : [ - 0.6 - ], - "required" : false, - "min" : 0.0, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--lr_patience", - "description" : "Number of epochs with no improvement after which learning rate will be reduced.", - "default" : [ - 30.0 - ], - "required" : false, - "min" : 0.0, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/integrate/scarches/" - }, - { - "type" : "file", - "path" : "src/utils/setup_logger.py", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "description" : "Performs reference mapping with scArches", - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/integrate/scarches/" - }, - { - "type" : "file", - "path" : "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - }, - { - "type" : "file", - "path" : "resources_test/HLCA_reference_model/HLCA_reference_model.zip", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "nvcr.io/nvidia/pytorch:23.09-py3", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "python", - "user" : false, - "packages" : [ - "mudata~=0.2.3", - "anndata~=0.9.1", - "scvi-tools~=1.0.3", - "pandas~=2.1.0" - ], - "upgrade" : true - } - ], - "test_setup" : [ - { - "type" : "python", - "user" : false, - "packages" : [ - "viashpy==0.5.0" - ], - "upgrade" : true - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "highmem", - "highcpu" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - }, - { - "type" : "native", - "id" : "native" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/integrate/scarches/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/integrate/scarches", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -import sys -import mudata -import scvi -from torch.cuda import is_available as cuda_is_available -try: - from torch.backends.mps import is_available as mps_is_available -except ModuleNotFoundError: - # Older pytorch versions - # MacOS GPUs - def mps_is_available(): - return False - -### VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'reference': $( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "r'${VIASH_PAR_REFERENCE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'dataset_name': $( if [ ! -z ${VIASH_PAR_DATASET_NAME+x} ]; then echo "r'${VIASH_PAR_DATASET_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'model_output': $( if [ ! -z ${VIASH_PAR_MODEL_OUTPUT+x} ]; then echo "r'${VIASH_PAR_MODEL_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'obsm_output': $( if [ ! -z ${VIASH_PAR_OBSM_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'early_stopping': $( if [ ! -z ${VIASH_PAR_EARLY_STOPPING+x} ]; then echo "r'${VIASH_PAR_EARLY_STOPPING//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), - 'early_stopping_monitor': $( if [ ! -z ${VIASH_PAR_EARLY_STOPPING_MONITOR+x} ]; then echo "r'${VIASH_PAR_EARLY_STOPPING_MONITOR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'early_stopping_patience': $( if [ ! -z ${VIASH_PAR_EARLY_STOPPING_PATIENCE+x} ]; then echo "int(r'${VIASH_PAR_EARLY_STOPPING_PATIENCE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'early_stopping_min_delta': $( if [ ! -z ${VIASH_PAR_EARLY_STOPPING_MIN_DELTA+x} ]; then echo "float(r'${VIASH_PAR_EARLY_STOPPING_MIN_DELTA//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'max_epochs': $( if [ ! -z ${VIASH_PAR_MAX_EPOCHS+x} ]; then echo "int(r'${VIASH_PAR_MAX_EPOCHS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'reduce_lr_on_plateau': $( if [ ! -z ${VIASH_PAR_REDUCE_LR_ON_PLATEAU+x} ]; then echo "r'${VIASH_PAR_REDUCE_LR_ON_PLATEAU//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), - 'lr_factor': $( if [ ! -z ${VIASH_PAR_LR_FACTOR+x} ]; then echo "float(r'${VIASH_PAR_LR_FACTOR//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'lr_patience': $( if [ ! -z ${VIASH_PAR_LR_PATIENCE+x} ]; then echo "float(r'${VIASH_PAR_LR_PATIENCE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -### VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -def _read_model_name_from_registry(model_path) -> str: - """Read registry with information about the model, return the model name""" - registry = scvi.model.base.BaseModelClass.load_registry(model_path) - return registry["model_name"] - - -def _detect_base_model(model_path): - """Read from the model's file which scvi_tools model it contains""" - - names_to_models_map = { - "AUTOZI": scvi.model.AUTOZI, - "CondSCVI": scvi.model.CondSCVI, - "DestVI": scvi.model.DestVI, - "LinearSCVI": scvi.model.LinearSCVI, - "PEAKVI": scvi.model.PEAKVI, - "SCANVI": scvi.model.SCANVI, - "SCVI": scvi.model.SCVI, - "TOTALVI": scvi.model.TOTALVI, - "MULTIVI": scvi.model.MULTIVI, - "AmortizedLDA": scvi.model.AmortizedLDA, - "JaxSCVI": scvi.model.JaxSCVI, - } - - return names_to_models_map[_read_model_name_from_registry(model_path)] - - -def extract_file_name(file_path): - """Return the name of the file from path to this file - - Examples - -------- - >>> extract_file_name("resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu") - pbmc_1k_protein_v3_mms - """ - slash_position = file_path.rfind("/") - dot_position = file_path.rfind(".") - - return file_path[slash_position + 1: dot_position] - - -def map_to_existing_reference(adata_query, model_path, check_val_every_n_epoch=1): - """ - A function to map the query data to the reference atlas - - Input: - * adata_query: An AnnData object with the query - * model_path: The reference model directory - - Output: - * vae_query: the trained scvi_tools model - * adata_query: The AnnData object with the query preprocessed for the mapping to the reference - """ - model = _detect_base_model(model_path) - - try: - model.prepare_query_anndata(adata_query, model_path) - except ValueError: - logger.warning("ValueError thrown when preparing adata for mapping. Clearing .varm field to prevent it") - adata_query.varm.clear() - model.prepare_query_anndata(adata_query, model_path) - - # Load query data into the model - vae_query = model.load_query_data( - adata_query, - model_path, - freeze_dropout=True - ) - - # Train scArches model for query mapping - vae_query.train( - max_epochs=par["max_epochs"], - early_stopping=par['early_stopping'], - early_stopping_monitor=par['early_stopping_monitor'], - early_stopping_patience=par['early_stopping_patience'], - early_stopping_min_delta=par['early_stopping_min_delta'], - check_val_every_n_epoch=check_val_every_n_epoch, - use_gpu=(cuda_is_available() or mps_is_available()) - ) - - return vae_query, adata_query - - -def _convert_object_dtypes_to_strings(adata): - """Convert object dtypes in .var and .obs to string to prevent error when saving file""" - def convert_cols(df): - object_cols = df.columns[df.dtypes == "object"] - for col in object_cols: - df[col] = df[col].astype(str) - return df - - adata.var = convert_cols(adata.var) - adata.obs = convert_cols(adata.obs) - - return adata - - -def _get_model_path(model_path: str): - """Obtain path to the directory with reference model. If the proposed \\`model_path\\` is a .zip archive, unzip it. If nesessary, convert model to the new format - - Parameters - ---------- - model_path : str - Path to a directory, where to search for the model or to a zip file containing the model - - Returns - ------- - Path to a directory with reference model in format of scvi-tools>=0.15 - """ - import os - import zipfile - import tempfile - from pathlib import Path - - if os.path.isdir(model_path) and "model.pt" in os.listdir(model_path): - # Probably, the \\`model_path\\` already contains model in the output format of scvi-tools>=0.15 - return model_path - - # The model either has old format or is a zip file downloaded from Zenodo - new_directory = Path(tempfile.TemporaryDirectory().name) - - if zipfile.is_zipfile(model_path): - with zipfile.ZipFile(model_path) as archive: - archive.extractall(new_directory) - model_dir = next(new_directory.glob("**/*.pt")).parent - - else: - model_dir = next(Path(model_path).glob("**/*.pt")).parent - - if "model_params.pt" in os.listdir(model_dir): - # The model is in the \\`directory\\`, but it was generated with scvi-tools<0.15 - # TODO: for new references (that could not be SCANVI based), we need to check the base class somehow. Reading registry does not work with models generated by scvi-tools<0.15 - # Here I assume that the reference model is for HLCA and thus is SCANVI based - converted_model_path = os.path.join(model_dir, "converted") - scvi.model.SCANVI.convert_legacy_save(model_dir, converted_model_path) - return converted_model_path - - elif "model.pt" in os.listdir(model_dir): - # Archive contained model in the new format, so just return the directory - return model_dir - - else: - raise ValueError("Cannot find model in the provided reference path. Please, provide a path or a link to the directory with reference model. For HLCA use https://zenodo.org/record/6337966/files/HLCA_reference_model.zip") - - -def main(): - - mdata_query = mudata.read(par["input"].strip()) - adata_query = mdata_query.mod[par["modality"]].copy() - - if "dataset" not in adata_query.obs.columns: - # Write name of the dataset as batch variable - if par["dataset_name"] is None: - logger.info("Detecting dataset name") - par["dataset_name"] = extract_file_name(par["input"]) - logger.info(f"Detected {par['dataset_name']}") - - adata_query.obs["dataset"] = par["dataset_name"] - - model_path = _get_model_path(par["reference"]) - vae_query, adata_query = map_to_existing_reference(adata_query, model_path=model_path) - model_name = _read_model_name_from_registry(model_path) - - # Save info about the used model - mdata_query.mod[par["modality"]].uns["integration_method"] = model_name - - logger.info("Trying to write latent representation") - output_key = par["obsm_output"].format(model_name=model_name) - mdata_query.mod[par["modality"]].obsm[output_key] = vae_query.get_latent_representation() - - logger.info("Converting dtypes") - mdata_query.mod[par["modality"]] = _convert_object_dtypes_to_strings(mdata_query.mod[par["modality"]]) - - logger.info("Updating mudata") - try: - mdata_query.update() # Without that error might be thrown during file saving - except KeyError: - # Sometimes this error is thrown, but then everything is magically fixed, and the file gets saved normally - # This is discussed here a bit: https://github.com/scverse/mudata/issues/27 - logger.warning("KeyError was thrown during updating mudata. Probably, the file is fixed after that, but be careful") - - logger.info("Saving h5mu file") - mdata_query.write_h5mu(par["output"].strip(), compression=par["output_compression"]) - - logger.info("Saving model") - vae_query.save(par["model_output"], overwrite=True) - -if __name__ == "__main__": - main() -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/integrate_scarches", - "tag" : "0.12.0" - }, - "label" : [ - "highmem", - "highcpu" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/integrate/scarches/nextflow.config b/target/nextflow/integrate/scarches/nextflow.config deleted file mode 100644 index 9de392f5763..00000000000 --- a/target/nextflow/integrate/scarches/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'scarches' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Performs reference mapping with scArches' - author = 'Vladimir Shitov' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/integrate/scarches/nextflow_params.yaml b/target/nextflow/integrate/scarches/nextflow_params.yaml deleted file mode 100644 index e8b0b098625..00000000000 --- a/target/nextflow/integrate/scarches/nextflow_params.yaml +++ /dev/null @@ -1,27 +0,0 @@ -# Inputs -input: # please fill in - example: "path/to/file" -modality: "rna" -reference: # please fill in - example: "path/to/file" -dataset_name: "test_dataset" - -# Outputs -# output: "$id.$key.output.output" -# output_compression: "gzip" -# model_output: "$id.$key.model_output.model_output" -obsm_output: "X_integrated_scanvi" - -# Early stopping arguments -# early_stopping: true -early_stopping_monitor: "elbo_validation" -early_stopping_patience: 45 -early_stopping_min_delta: 0.0 - -# Learning parameters -max_epochs: # please fill in - example: 123 -reduce_lr_on_plateau: true -lr_factor: 0.6 -lr_patience: 30 - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/integrate/scarches/nextflow_schema.json b/target/nextflow/integrate/scarches/nextflow_schema.json deleted file mode 100644 index 6d7ad3e97e4..00000000000 --- a/target/nextflow/integrate/scarches/nextflow_schema.json +++ /dev/null @@ -1,189 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "scarches", - "description": "Performs reference mapping with scArches", - "type": "object", - "definitions": { - "inputs" : { - "title": "Inputs", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: `file`, required. Input h5mu file to use as a query", - "help_text": "Type: `file`, required. Input h5mu file to use as a query" - }, - - "modality": { - "type": "string", - "description": "Type: `string`, default: `rna`. ", - "help_text": "Type: `string`, default: `rna`. ", - "default": "rna" - }, - - "reference": { - "type": "string", - "description": "Type: `file`, required. Path to the directory with reference model or a web link", - "help_text": "Type: `file`, required. Path to the directory with reference model or a web link. For HLCA use https://zenodo.org/record/6337966/files/HLCA_reference_model.zip" - }, - - "dataset_name": { - "type": "string", - "description": "Type: `string`, default: `test_dataset`. Name of query dataset to use as a batch name", - "help_text": "Type: `string`, default: `test_dataset`. Name of query dataset to use as a batch name. If not set, name of the input file is used", - "default": "test_dataset" - } - - } - }, - "outputs" : { - "title": "Outputs", - "type": "object", - "description": "No description", - "properties": { - - "output": { - "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.output`. Output h5mu file", - "help_text": "Type: `file`, required, default: `$id.$key.output.output`. Output h5mu file.", - "default": "$id.$key.output.output" - }, - - "output_compression": { - "type": "string", - "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", - "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", - "enum": ["gzip", "lzf"] - - }, - - "model_output": { - "type": "string", - "description": "Type: `file`, default: `$id.$key.model_output.model_output`. Output directory for model", - "help_text": "Type: `file`, default: `$id.$key.model_output.model_output`. Output directory for model", - "default": "$id.$key.model_output.model_output" - }, - - "obsm_output": { - "type": "string", - "description": "Type: `string`, default: `X_integrated_scanvi`. In which ", - "help_text": "Type: `string`, default: `X_integrated_scanvi`. In which .obsm slot to store the resulting integrated embedding.", - "default": "X_integrated_scanvi" - } - - } - }, - "early stopping arguments" : { - "title": "Early stopping arguments", - "type": "object", - "description": "No description", - "properties": { - - "early_stopping": { - "type": "boolean", - "description": "Type: `boolean`. Whether to perform early stopping with respect to the validation set", - "help_text": "Type: `boolean`. Whether to perform early stopping with respect to the validation set." - }, - - "early_stopping_monitor": { - "type": "string", - "description": "Type: `string`, default: `elbo_validation`, choices: ``elbo_validation`, `reconstruction_loss_validation`, `kl_local_validation``. Metric logged during validation set epoch", - "help_text": "Type: `string`, default: `elbo_validation`, choices: ``elbo_validation`, `reconstruction_loss_validation`, `kl_local_validation``. Metric logged during validation set epoch.", - "enum": ["elbo_validation", "reconstruction_loss_validation", "kl_local_validation"] - , - "default": "elbo_validation" - }, - - "early_stopping_patience": { - "type": "integer", - "description": "Type: `integer`, default: `45`. Number of validation epochs with no improvement after which training will be stopped", - "help_text": "Type: `integer`, default: `45`. Number of validation epochs with no improvement after which training will be stopped.", - "default": "45" - }, - - "early_stopping_min_delta": { - "type": "number", - "description": "Type: `double`, default: `0.0`. Minimum change in the monitored quantity to qualify as an improvement, i", - "help_text": "Type: `double`, default: `0.0`. Minimum change in the monitored quantity to qualify as an improvement, i.e. an absolute change of less than min_delta, will count as no improvement.", - "default": "0.0" - } - - } - }, - "learning parameters" : { - "title": "Learning parameters", - "type": "object", - "description": "No description", - "properties": { - - "max_epochs": { - "type": "integer", - "description": "Type: `integer`, required. Number of passes through the dataset, defaults to (20000 / number of cells) * 400 or 400; whichever is smallest", - "help_text": "Type: `integer`, required. Number of passes through the dataset, defaults to (20000 / number of cells) * 400 or 400; whichever is smallest." - }, - - "reduce_lr_on_plateau": { - "type": "boolean", - "description": "Type: `boolean`, default: `true`. Whether to monitor validation loss and reduce learning rate when validation set `lr_scheduler_metric` plateaus", - "help_text": "Type: `boolean`, default: `true`. Whether to monitor validation loss and reduce learning rate when validation set `lr_scheduler_metric` plateaus.", - "default": "True" - }, - - "lr_factor": { - "type": "number", - "description": "Type: `double`, default: `0.6`. Factor to reduce learning rate", - "help_text": "Type: `double`, default: `0.6`. Factor to reduce learning rate.", - "default": "0.6" - }, - - "lr_patience": { - "type": "number", - "description": "Type: `double`, default: `30`. Number of epochs with no improvement after which learning rate will be reduced", - "help_text": "Type: `double`, default: `30`. Number of epochs with no improvement after which learning rate will be reduced.", - "default": "30" - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/inputs" - }, - { - "$ref": "#/definitions/outputs" - }, - { - "$ref": "#/definitions/early stopping arguments" - }, - { - "$ref": "#/definitions/learning parameters" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/integrate/scarches/setup_logger.py b/target/nextflow/integrate/scarches/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/nextflow/integrate/scarches/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/nextflow/integrate/scvi/.config.vsh.yaml b/target/nextflow/integrate/scvi/.config.vsh.yaml deleted file mode 100644 index b0336fa962b..00000000000 --- a/target/nextflow/integrate/scvi/.config.vsh.yaml +++ /dev/null @@ -1,591 +0,0 @@ -functionality: - name: "scvi" - namespace: "integrate" - version: "0.12.3" - authors: - - name: "Malte D. Luecken" - roles: - - "author" - info: - role: "Core Team Member" - links: - email: "malte.luecken@helmholtz-muenchen.de" - github: "LuckyMD" - orcid: "0000-0001-7464-7921" - linkedin: "malte-l%C3%BCcken-b8b21049" - twitter: "MDLuecken" - organizations: - - name: "Helmholtz Munich" - href: "https://www.helmholtz-munich.de" - role: "Group Leader" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - - name: "Dries Schaumont" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - - name: "Matthias Beyens" - roles: - - "contributor" - info: - role: "Contributor" - links: - github: "MatthiasBeyens" - orcid: "0000-0003-3304-0706" - email: "matthias.beyens@gmail.com" - linkedin: "mbeyens" - organizations: - - name: "Janssen Pharmaceuticals" - href: "https://www.janssen.com" - role: "Principal Scientist" - argument_groups: - - name: "Inputs" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input h5mu file" - info: null - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--input_layer" - description: "Input layer to use. If None, X is used" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_batch" - description: "Column name discriminating between your batches." - info: null - default: - - "sample_id" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--var_input" - description: ".var column containing highly variable genes. By default, do not\ - \ subset genes." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_labels" - description: "Key in adata.obs for label information. Categories will automatically\ - \ be \nconverted into integer categories and saved to adata.obs['_scvi_labels'].\n\ - If None, assigns the same label to all the data.\n" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_size_factor" - description: "Key in adata.obs for size factor information. Instead of using\ - \ library size as a size factor,\nthe provided size factor column will be\ - \ used as offset in the mean of the likelihood.\nAssumed to be on linear scale.\n" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_categorical_covariate" - description: "Keys in adata.obs that correspond to categorical data. These covariates\ - \ can be added in\naddition to the batch covariate and are also treated as\ - \ nuisance factors\n(i.e., the model tries to minimize their effects on the\ - \ latent space).\nThus, these should not be used for biologically-relevant\ - \ factors that you do _not_ want to correct for.\n" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_continuous_covariate" - description: "Keys in adata.obs that correspond to continuous data. These covariates\ - \ can be added in\naddition to the batch covariate and are also treated as\ - \ nuisance factors\n(i.e., the model tries to minimize their effects on the\ - \ latent space). Thus, these should not be\nused for biologically-relevant\ - \ factors that you do _not_ want to correct for.\n" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - name: "Outputs" - arguments: - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output h5mu file." - info: null - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output_model" - description: "Folder where the state of the trained model will be saved to." - info: null - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obsm_output" - description: "In which .obsm slot to store the resulting integrated embedding." - info: null - default: - - "X_scvi_integrated" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "SCVI options" - arguments: - - type: "integer" - name: "--n_hidden_nodes" - description: "Number of nodes per hidden layer." - info: null - default: - - 128 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--n_dimensions_latent_space" - description: "Dimensionality of the latent space." - info: null - default: - - 30 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--n_hidden_layers" - description: "Number of hidden layers used for encoder and decoder neural-networks." - info: null - default: - - 2 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--dropout_rate" - description: "Dropout rate for the neural networks." - info: null - default: - - 0.1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--dispersion" - description: "Set the behavior for the dispersion for negative binomial distributions:\n\ - - gene: dispersion parameter of negative binomial is constant per gene across\ - \ cells\n- gene-batch: dispersion can differ between different batches\n-\ - \ gene-label: dispersion can differ between different labels\n- gene-cell:\ - \ dispersion can differ for every gene in every cell\n" - info: null - default: - - "gene" - required: false - choices: - - "gene" - - "gene-batch" - - "gene-label" - - "gene-cell" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--gene_likelihood" - description: "Model used to generate the expression data from a count-based\ - \ likelihood distribution.\n- nb: Negative binomial distribution\n- zinb:\ - \ Zero-inflated negative binomial distribution\n- poisson: Poisson distribution\n" - info: null - default: - - "nb" - required: false - choices: - - "nb" - - "zinb" - - "poisson" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Variational auto-encoder model options" - arguments: - - type: "string" - name: "--use_layer_normalization" - description: "Neural networks for which to enable layer normalization. \n" - info: null - default: - - "both" - required: false - choices: - - "encoder" - - "decoder" - - "none" - - "both" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--use_batch_normalization" - description: "Neural networks for which to enable batch normalization. \n" - info: null - default: - - "none" - required: false - choices: - - "encoder" - - "decoder" - - "none" - - "both" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean_false" - name: "--encode_covariates" - description: "Whether to concatenate covariates to expression in encoder" - info: null - direction: "input" - dest: "par" - - type: "boolean_true" - name: "--deeply_inject_covariates" - description: "Whether to concatenate covariates into output of hidden layers\ - \ in encoder/decoder. \nThis option only applies when n_layers > 1. The covariates\ - \ are concatenated to\nthe input of subsequent hidden layers.\n" - info: null - direction: "input" - dest: "par" - - type: "boolean_true" - name: "--use_observed_lib_size" - description: "Use observed library size for RNA as scaling factor in mean of\ - \ conditional distribution.\n" - info: null - direction: "input" - dest: "par" - - name: "Early stopping arguments" - arguments: - - type: "boolean" - name: "--early_stopping" - description: "Whether to perform early stopping with respect to the validation\ - \ set." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--early_stopping_monitor" - description: "Metric logged during validation set epoch." - info: null - default: - - "elbo_validation" - required: false - choices: - - "elbo_validation" - - "reconstruction_loss_validation" - - "kl_local_validation" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--early_stopping_patience" - description: "Number of validation epochs with no improvement after which training\ - \ will be stopped." - info: null - default: - - 45 - required: false - min: 1 - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--early_stopping_min_delta" - description: "Minimum change in the monitored quantity to qualify as an improvement,\ - \ i.e. an absolute change of less than min_delta, will count as no improvement." - info: null - default: - - 0.0 - required: false - min: 0.0 - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Learning parameters" - arguments: - - type: "integer" - name: "--max_epochs" - description: "Number of passes through the dataset, defaults to (20000 / number\ - \ of cells) * 400 or 400; whichever is smallest." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean" - name: "--reduce_lr_on_plateau" - description: "Whether to monitor validation loss and reduce learning rate when\ - \ validation set `lr_scheduler_metric` plateaus." - info: null - default: - - true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--lr_factor" - description: "Factor to reduce learning rate." - info: null - default: - - 0.6 - required: false - min: 0.0 - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--lr_patience" - description: "Number of epochs with no improvement after which learning rate\ - \ will be reduced." - info: null - default: - - 30.0 - required: false - min: 0.0 - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Data validition" - arguments: - - type: "integer" - name: "--n_obs_min_count" - description: "Minimum number of cells threshold ensuring that every obs_batch\ - \ category has sufficient observations (cells) for model training." - info: null - default: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--n_var_min_count" - description: "Minimum number of genes threshold ensuring that every var_input\ - \ filter has sufficient observations (genes) for model training." - info: null - default: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "../../utils/subset_vars.py" - description: "Performs scvi integration as done in the human lung cell atlas https://github.com/LungCellAtlas/HLCA" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "../../utils/subset_vars.py" - - type: "file" - path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "nvcr.io/nvidia/pytorch:23.06-py3" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "libpng-dev" - - "libjpeg-dev" - interactive: false - - type: "docker" - run: - - "pip install \"jax[cuda]\" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html\n" - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "scanpy~=1.9.5" - upgrade: true - - type: "python" - user: false - packages: - - "numba~=0.57.1" - - "scvi-tools~=1.0.0" - upgrade: false - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "midcpu" - - "midmem" - - "gpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/integrate/scvi/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/integrate/scvi" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/integrate/scvi/scvi" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/integrate/scvi/main.nf b/target/nextflow/integrate/scvi/main.nf deleted file mode 100644 index be868125220..00000000000 --- a/target/nextflow/integrate/scvi/main.nf +++ /dev/null @@ -1,3174 +0,0 @@ -// scvi 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Malte D. Luecken (author) -// * Dries Schaumont (maintainer) -// * Matthias Beyens (contributor) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "scvi", - "namespace" : "integrate", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Malte D. Luecken", - "roles" : [ - "author" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "malte.luecken@helmholtz-muenchen.de", - "github" : "LuckyMD", - "orcid" : "0000-0001-7464-7921", - "linkedin" : "malte-l%C3%BCcken-b8b21049", - "twitter" : "MDLuecken" - }, - "organizations" : [ - { - "name" : "Helmholtz Munich", - "href" : "https://www.helmholtz-munich.de", - "role" : "Group Leader" - }, - { - "name" : "Open Problems", - "href" : "https://openproblems.bio", - "role" : "Core Member" - } - ] - } - }, - { - "name" : "Dries Schaumont", - "roles" : [ - "maintainer" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "dries@data-intuitive.com", - "github" : "DriesSchaumont", - "orcid" : "0000-0002-4389-0440", - "linkedin" : "dries-schaumont" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Scientist" - } - ] - } - }, - { - "name" : "Matthias Beyens", - "roles" : [ - "contributor" - ], - "info" : { - "role" : "Contributor", - "links" : { - "github" : "MatthiasBeyens", - "orcid" : "0000-0003-3304-0706", - "email" : "matthias.beyens@gmail.com", - "linkedin" : "mbeyens" - }, - "organizations" : [ - { - "name" : "Janssen Pharmaceuticals", - "href" : "https://www.janssen.com", - "role" : "Principal Scientist" - } - ] - } - } - ], - "argument_groups" : [ - { - "name" : "Inputs", - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "alternatives" : [ - "-i" - ], - "description" : "Input h5mu file", - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--modality", - "default" : [ - "rna" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--input_layer", - "description" : "Input layer to use. If None, X is used", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--obs_batch", - "description" : "Column name discriminating between your batches.", - "default" : [ - "sample_id" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--var_input", - "description" : ".var column containing highly variable genes. By default, do not subset genes.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--obs_labels", - "description" : "Key in adata.obs for label information. Categories will automatically be \nconverted into integer categories and saved to adata.obs['_scvi_labels'].\nIf None, assigns the same label to all the data.\n", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--obs_size_factor", - "description" : "Key in adata.obs for size factor information. Instead of using library size as a size factor,\nthe provided size factor column will be used as offset in the mean of the likelihood.\nAssumed to be on linear scale.\n", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--obs_categorical_covariate", - "description" : "Keys in adata.obs that correspond to categorical data. These covariates can be added in\naddition to the batch covariate and are also treated as nuisance factors\n(i.e., the model tries to minimize their effects on the latent space).\nThus, these should not be used for biologically-relevant factors that you do _not_ want to correct for.\n", - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--obs_continuous_covariate", - "description" : "Keys in adata.obs that correspond to continuous data. These covariates can be added in\naddition to the batch covariate and are also treated as nuisance factors\n(i.e., the model tries to minimize their effects on the latent space). Thus, these should not be\nused for biologically-relevant factors that you do _not_ want to correct for.\n", - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Outputs", - "arguments" : [ - { - "type" : "file", - "name" : "--output", - "alternatives" : [ - "-o" - ], - "description" : "Output h5mu file.", - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--output_model", - "description" : "Folder where the state of the trained model will be saved to.", - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_compression", - "description" : "The compression format to be used on the output h5mu object.", - "example" : [ - "gzip" - ], - "required" : false, - "choices" : [ - "gzip", - "lzf" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--obsm_output", - "description" : "In which .obsm slot to store the resulting integrated embedding.", - "default" : [ - "X_scvi_integrated" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "SCVI options", - "arguments" : [ - { - "type" : "integer", - "name" : "--n_hidden_nodes", - "description" : "Number of nodes per hidden layer.", - "default" : [ - 128 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--n_dimensions_latent_space", - "description" : "Dimensionality of the latent space.", - "default" : [ - 30 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--n_hidden_layers", - "description" : "Number of hidden layers used for encoder and decoder neural-networks.", - "default" : [ - 2 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--dropout_rate", - "description" : "Dropout rate for the neural networks.", - "default" : [ - 0.1 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--dispersion", - "description" : "Set the behavior for the dispersion for negative binomial distributions:\n- gene: dispersion parameter of negative binomial is constant per gene across cells\n- gene-batch: dispersion can differ between different batches\n- gene-label: dispersion can differ between different labels\n- gene-cell: dispersion can differ for every gene in every cell\n", - "default" : [ - "gene" - ], - "required" : false, - "choices" : [ - "gene", - "gene-batch", - "gene-label", - "gene-cell" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--gene_likelihood", - "description" : "Model used to generate the expression data from a count-based likelihood distribution.\n- nb: Negative binomial distribution\n- zinb: Zero-inflated negative binomial distribution\n- poisson: Poisson distribution\n", - "default" : [ - "nb" - ], - "required" : false, - "choices" : [ - "nb", - "zinb", - "poisson" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Variational auto-encoder model options", - "arguments" : [ - { - "type" : "string", - "name" : "--use_layer_normalization", - "description" : "Neural networks for which to enable layer normalization. \n", - "default" : [ - "both" - ], - "required" : false, - "choices" : [ - "encoder", - "decoder", - "none", - "both" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--use_batch_normalization", - "description" : "Neural networks for which to enable batch normalization. \n", - "default" : [ - "none" - ], - "required" : false, - "choices" : [ - "encoder", - "decoder", - "none", - "both" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "boolean_false", - "name" : "--encode_covariates", - "description" : "Whether to concatenate covariates to expression in encoder", - "direction" : "input", - "dest" : "par" - }, - { - "type" : "boolean_true", - "name" : "--deeply_inject_covariates", - "description" : "Whether to concatenate covariates into output of hidden layers in encoder/decoder. \nThis option only applies when n_layers > 1. The covariates are concatenated to\nthe input of subsequent hidden layers.\n", - "direction" : "input", - "dest" : "par" - }, - { - "type" : "boolean_true", - "name" : "--use_observed_lib_size", - "description" : "Use observed library size for RNA as scaling factor in mean of conditional distribution.\n", - "direction" : "input", - "dest" : "par" - } - ] - }, - { - "name" : "Early stopping arguments", - "arguments" : [ - { - "type" : "boolean", - "name" : "--early_stopping", - "description" : "Whether to perform early stopping with respect to the validation set.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--early_stopping_monitor", - "description" : "Metric logged during validation set epoch.", - "default" : [ - "elbo_validation" - ], - "required" : false, - "choices" : [ - "elbo_validation", - "reconstruction_loss_validation", - "kl_local_validation" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--early_stopping_patience", - "description" : "Number of validation epochs with no improvement after which training will be stopped.", - "default" : [ - 45 - ], - "required" : false, - "min" : 1, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--early_stopping_min_delta", - "description" : "Minimum change in the monitored quantity to qualify as an improvement, i.e. an absolute change of less than min_delta, will count as no improvement.", - "default" : [ - 0.0 - ], - "required" : false, - "min" : 0.0, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Learning parameters", - "arguments" : [ - { - "type" : "integer", - "name" : "--max_epochs", - "description" : "Number of passes through the dataset, defaults to (20000 / number of cells) * 400 or 400; whichever is smallest.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "boolean", - "name" : "--reduce_lr_on_plateau", - "description" : "Whether to monitor validation loss and reduce learning rate when validation set `lr_scheduler_metric` plateaus.", - "default" : [ - true - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--lr_factor", - "description" : "Factor to reduce learning rate.", - "default" : [ - 0.6 - ], - "required" : false, - "min" : 0.0, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--lr_patience", - "description" : "Number of epochs with no improvement after which learning rate will be reduced.", - "default" : [ - 30.0 - ], - "required" : false, - "min" : 0.0, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Data validition", - "arguments" : [ - { - "type" : "integer", - "name" : "--n_obs_min_count", - "description" : "Minimum number of cells threshold ensuring that every obs_batch category has sufficient observations (cells) for model training.", - "default" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--n_var_min_count", - "description" : "Minimum number of genes threshold ensuring that every var_input filter has sufficient observations (genes) for model training.", - "default" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/integrate/scvi/" - }, - { - "type" : "file", - "path" : "../../utils/subset_vars.py", - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/integrate/scvi/" - } - ], - "description" : "Performs scvi integration as done in the human lung cell atlas https://github.com/LungCellAtlas/HLCA", - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/integrate/scvi/" - }, - { - "type" : "file", - "path" : "../../utils/subset_vars.py", - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/integrate/scvi/" - }, - { - "type" : "file", - "path" : "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "nvcr.io/nvidia/pytorch:23.06-py3", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "libpng-dev", - "libjpeg-dev" - ], - "interactive" : false - }, - { - "type" : "docker", - "run" : [ - "pip install \\"jax[cuda]\\" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html\n" - ] - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "mudata~=0.2.3", - "anndata~=0.9.1", - "scanpy~=1.9.5" - ], - "upgrade" : true - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "numba~=0.57.1", - "scvi-tools~=1.0.0" - ], - "upgrade" : false - } - ], - "test_setup" : [ - { - "type" : "python", - "user" : false, - "packages" : [ - "viashpy==0.5.0" - ], - "upgrade" : true - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "midcpu", - "midmem", - "gpu" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/integrate/scvi/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/integrate/scvi", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -from scanpy._utils import check_nonnegative_integers -import mudata -import scvi - -### VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'input_layer': $( if [ ! -z ${VIASH_PAR_INPUT_LAYER+x} ]; then echo "r'${VIASH_PAR_INPUT_LAYER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'obs_batch': $( if [ ! -z ${VIASH_PAR_OBS_BATCH+x} ]; then echo "r'${VIASH_PAR_OBS_BATCH//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'var_input': $( if [ ! -z ${VIASH_PAR_VAR_INPUT+x} ]; then echo "r'${VIASH_PAR_VAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'obs_labels': $( if [ ! -z ${VIASH_PAR_OBS_LABELS+x} ]; then echo "r'${VIASH_PAR_OBS_LABELS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'obs_size_factor': $( if [ ! -z ${VIASH_PAR_OBS_SIZE_FACTOR+x} ]; then echo "r'${VIASH_PAR_OBS_SIZE_FACTOR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'obs_categorical_covariate': $( if [ ! -z ${VIASH_PAR_OBS_CATEGORICAL_COVARIATE+x} ]; then echo "r'${VIASH_PAR_OBS_CATEGORICAL_COVARIATE//\\'/\\'\\"\\'\\"r\\'}'.split(':')"; else echo None; fi ), - 'obs_continuous_covariate': $( if [ ! -z ${VIASH_PAR_OBS_CONTINUOUS_COVARIATE+x} ]; then echo "r'${VIASH_PAR_OBS_CONTINUOUS_COVARIATE//\\'/\\'\\"\\'\\"r\\'}'.split(':')"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_model': $( if [ ! -z ${VIASH_PAR_OUTPUT_MODEL+x} ]; then echo "r'${VIASH_PAR_OUTPUT_MODEL//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'obsm_output': $( if [ ! -z ${VIASH_PAR_OBSM_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'n_hidden_nodes': $( if [ ! -z ${VIASH_PAR_N_HIDDEN_NODES+x} ]; then echo "int(r'${VIASH_PAR_N_HIDDEN_NODES//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'n_dimensions_latent_space': $( if [ ! -z ${VIASH_PAR_N_DIMENSIONS_LATENT_SPACE+x} ]; then echo "int(r'${VIASH_PAR_N_DIMENSIONS_LATENT_SPACE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'n_hidden_layers': $( if [ ! -z ${VIASH_PAR_N_HIDDEN_LAYERS+x} ]; then echo "int(r'${VIASH_PAR_N_HIDDEN_LAYERS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'dropout_rate': $( if [ ! -z ${VIASH_PAR_DROPOUT_RATE+x} ]; then echo "float(r'${VIASH_PAR_DROPOUT_RATE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'dispersion': $( if [ ! -z ${VIASH_PAR_DISPERSION+x} ]; then echo "r'${VIASH_PAR_DISPERSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'gene_likelihood': $( if [ ! -z ${VIASH_PAR_GENE_LIKELIHOOD+x} ]; then echo "r'${VIASH_PAR_GENE_LIKELIHOOD//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'use_layer_normalization': $( if [ ! -z ${VIASH_PAR_USE_LAYER_NORMALIZATION+x} ]; then echo "r'${VIASH_PAR_USE_LAYER_NORMALIZATION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'use_batch_normalization': $( if [ ! -z ${VIASH_PAR_USE_BATCH_NORMALIZATION+x} ]; then echo "r'${VIASH_PAR_USE_BATCH_NORMALIZATION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'encode_covariates': $( if [ ! -z ${VIASH_PAR_ENCODE_COVARIATES+x} ]; then echo "r'${VIASH_PAR_ENCODE_COVARIATES//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), - 'deeply_inject_covariates': $( if [ ! -z ${VIASH_PAR_DEEPLY_INJECT_COVARIATES+x} ]; then echo "r'${VIASH_PAR_DEEPLY_INJECT_COVARIATES//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), - 'use_observed_lib_size': $( if [ ! -z ${VIASH_PAR_USE_OBSERVED_LIB_SIZE+x} ]; then echo "r'${VIASH_PAR_USE_OBSERVED_LIB_SIZE//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), - 'early_stopping': $( if [ ! -z ${VIASH_PAR_EARLY_STOPPING+x} ]; then echo "r'${VIASH_PAR_EARLY_STOPPING//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), - 'early_stopping_monitor': $( if [ ! -z ${VIASH_PAR_EARLY_STOPPING_MONITOR+x} ]; then echo "r'${VIASH_PAR_EARLY_STOPPING_MONITOR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'early_stopping_patience': $( if [ ! -z ${VIASH_PAR_EARLY_STOPPING_PATIENCE+x} ]; then echo "int(r'${VIASH_PAR_EARLY_STOPPING_PATIENCE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'early_stopping_min_delta': $( if [ ! -z ${VIASH_PAR_EARLY_STOPPING_MIN_DELTA+x} ]; then echo "float(r'${VIASH_PAR_EARLY_STOPPING_MIN_DELTA//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'max_epochs': $( if [ ! -z ${VIASH_PAR_MAX_EPOCHS+x} ]; then echo "int(r'${VIASH_PAR_MAX_EPOCHS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'reduce_lr_on_plateau': $( if [ ! -z ${VIASH_PAR_REDUCE_LR_ON_PLATEAU+x} ]; then echo "r'${VIASH_PAR_REDUCE_LR_ON_PLATEAU//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), - 'lr_factor': $( if [ ! -z ${VIASH_PAR_LR_FACTOR+x} ]; then echo "float(r'${VIASH_PAR_LR_FACTOR//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'lr_patience': $( if [ ! -z ${VIASH_PAR_LR_PATIENCE+x} ]; then echo "float(r'${VIASH_PAR_LR_PATIENCE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'n_obs_min_count': $( if [ ! -z ${VIASH_PAR_N_OBS_MIN_COUNT+x} ]; then echo "int(r'${VIASH_PAR_N_OBS_MIN_COUNT//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'n_var_min_count': $( if [ ! -z ${VIASH_PAR_N_VAR_MIN_COUNT+x} ]; then echo "int(r'${VIASH_PAR_N_VAR_MIN_COUNT//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -### VIASH END - -import sys -sys.path.append(meta['resources_dir']) - -# START TEMPORARY WORKAROUND subset_vars -# reason: resources aren't available when using Nextflow fusion -# from subset_vars import subset_vars -def subset_vars(adata, subset_col): - return adata[:, adata.var[subset_col]].copy() - -# END TEMPORARY WORKAROUND subset_vars - -#TODO: optionally, move to qa -# https://github.com/openpipelines-bio/openpipeline/issues/435 -def check_validity_anndata(adata, layer, obs_batch, - n_obs_min_count, n_var_min_count): - assert check_nonnegative_integers( - adata.layers[layer] if layer else adata.X - ), f"Make sure input adata contains raw_counts" - - assert len(set(adata.var_names)) == len( - adata.var_names - ), f"Dataset contains multiple genes with same gene name." - - # Ensure every obs_batch category has sufficient observations - assert min(adata.obs[[obs_batch]].value_counts()) > n_obs_min_count, \\\\ - f"Anndata has fewer than {n_obs_min_count} cells." - - assert adata.n_vars > n_var_min_count, \\\\ - f"Anndata has fewer than {n_var_min_count} genes." - - - -def main(): - mdata = mudata.read(par["input"].strip()) - adata = mdata.mod[par['modality']] - - if par['var_input']: - # Subset to HVG - adata_subset = subset_vars(adata, subset_col=par["var_input"]).copy() - else: - adata_subset = adata.copy() - - check_validity_anndata( - adata_subset, par['input_layer'], par['obs_batch'], - par["n_obs_min_count"], par["n_var_min_count"] - ) - # Set up the data - scvi.model.SCVI.setup_anndata( - adata_subset, - batch_key=par['obs_batch'], - layer=par['input_layer'], - labels_key=par['obs_labels'], - size_factor_key=par['obs_size_factor'], - categorical_covariate_keys=par['obs_categorical_covariate'], - continuous_covariate_keys=par['obs_continuous_covariate'], - ) - - # Set up the model - vae_uns = scvi.model.SCVI( - adata_subset, - n_hidden=par["n_hidden_nodes"], - n_latent=par["n_dimensions_latent_space"], - n_layers=par["n_hidden_layers"], - dropout_rate=par["dropout_rate"], - dispersion=par["dispersion"], - gene_likelihood=par["gene_likelihood"], - use_layer_norm=par["use_layer_normalization"], - use_batch_norm=par["use_batch_normalization"], - encode_covariates=par["encode_covariates"], # Default (True) is for better scArches performance -> maybe don't use this always? - deeply_inject_covariates=par["deeply_inject_covariates"], # Default (False) for better scArches performance -> maybe don't use this always? - use_observed_lib_size=par["use_observed_lib_size"], # When size_factors are not passed - ) - - plan_kwargs = { - "reduce_lr_on_plateau": par['reduce_lr_on_plateau'], - "lr_patience": par['lr_patience'], - "lr_factor": par['lr_factor'], - } - - - # Train the model - vae_uns.train( - max_epochs=par['max_epochs'], - early_stopping=par['early_stopping'], - early_stopping_monitor=par['early_stopping_monitor'], - early_stopping_patience=par['early_stopping_patience'], - early_stopping_min_delta=par['early_stopping_min_delta'], - plan_kwargs=plan_kwargs, - check_val_every_n_epoch=1, - accelerator="auto", - ) - # Note: train_size=1.0 should give better results, but then can't do early_stopping on validation set - - # Get the latent output - adata.obsm[par['obsm_output']] = vae_uns.get_latent_representation() - - mdata.mod[par['modality']] = adata - mdata.write_h5mu(par['output'].strip(), compression=par["output_compression"]) - if par["output_model"]: - vae_uns.save(par["output_model"], overwrite=True) - -if __name__ == "__main__": - main() -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/integrate_scvi", - "tag" : "0.12.0" - }, - "label" : [ - "midcpu", - "midmem", - "gpu" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/integrate/scvi/nextflow.config b/target/nextflow/integrate/scvi/nextflow.config deleted file mode 100644 index d71811418ee..00000000000 --- a/target/nextflow/integrate/scvi/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'scvi' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Performs scvi integration as done in the human lung cell atlas https://github.com/LungCellAtlas/HLCA' - author = 'Malte D. Luecken, Dries Schaumont, Matthias Beyens' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/integrate/scvi/nextflow_params.yaml b/target/nextflow/integrate/scvi/nextflow_params.yaml deleted file mode 100644 index a62879b7b90..00000000000 --- a/target/nextflow/integrate/scvi/nextflow_params.yaml +++ /dev/null @@ -1,51 +0,0 @@ -# Inputs -input: # please fill in - example: "path/to/file" -modality: "rna" -# input_layer: "foo" -obs_batch: "sample_id" -# var_input: "foo" -# obs_labels: "foo" -# obs_size_factor: "foo" -# obs_categorical_covariate: ["foo"] -# obs_continuous_covariate: ["foo"] - -# Outputs -# output: "$id.$key.output.output" -# output_model: "$id.$key.output_model.output_model" -# output_compression: "gzip" -obsm_output: "X_scvi_integrated" - -# SCVI options -n_hidden_nodes: 128 -n_dimensions_latent_space: 30 -n_hidden_layers: 2 -dropout_rate: 0.1 -dispersion: "gene" -gene_likelihood: "nb" - -# Variational auto-encoder model options -use_layer_normalization: "both" -use_batch_normalization: "none" -encode_covariates: true -deeply_inject_covariates: false -use_observed_lib_size: false - -# Early stopping arguments -# early_stopping: true -early_stopping_monitor: "elbo_validation" -early_stopping_patience: 45 -early_stopping_min_delta: 0.0 - -# Learning parameters -# max_epochs: 123 -reduce_lr_on_plateau: true -lr_factor: 0.6 -lr_patience: 30 - -# Data validition -n_obs_min_count: 0 -n_var_min_count: 0 - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/integrate/scvi/nextflow_schema.json b/target/nextflow/integrate/scvi/nextflow_schema.json deleted file mode 100644 index bf382a44e1f..00000000000 --- a/target/nextflow/integrate/scvi/nextflow_schema.json +++ /dev/null @@ -1,351 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "scvi", - "description": "Performs scvi integration as done in the human lung cell atlas https://github.com/LungCellAtlas/HLCA", - "type": "object", - "definitions": { - "inputs" : { - "title": "Inputs", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: `file`, required. Input h5mu file", - "help_text": "Type: `file`, required. Input h5mu file" - }, - - "modality": { - "type": "string", - "description": "Type: `string`, default: `rna`. ", - "help_text": "Type: `string`, default: `rna`. ", - "default": "rna" - }, - - "input_layer": { - "type": "string", - "description": "Type: `string`. Input layer to use", - "help_text": "Type: `string`. Input layer to use. If None, X is used" - }, - - "obs_batch": { - "type": "string", - "description": "Type: `string`, default: `sample_id`. Column name discriminating between your batches", - "help_text": "Type: `string`, default: `sample_id`. Column name discriminating between your batches.", - "default": "sample_id" - }, - - "var_input": { - "type": "string", - "description": "Type: `string`. ", - "help_text": "Type: `string`. .var column containing highly variable genes. By default, do not subset genes." - }, - - "obs_labels": { - "type": "string", - "description": "Type: `string`. Key in adata", - "help_text": "Type: `string`. Key in adata.obs for label information. Categories will automatically be \nconverted into integer categories and saved to adata.obs[\u0027_scvi_labels\u0027].\nIf None, assigns the same label to all the data.\n" - }, - - "obs_size_factor": { - "type": "string", - "description": "Type: `string`. Key in adata", - "help_text": "Type: `string`. Key in adata.obs for size factor information. Instead of using library size as a size factor,\nthe provided size factor column will be used as offset in the mean of the likelihood.\nAssumed to be on linear scale.\n" - }, - - "obs_categorical_covariate": { - "type": "string", - "description": "Type: List of `string`, multiple_sep: `\":\"`. Keys in adata", - "help_text": "Type: List of `string`, multiple_sep: `\":\"`. Keys in adata.obs that correspond to categorical data. These covariates can be added in\naddition to the batch covariate and are also treated as nuisance factors\n(i.e., the model tries to minimize their effects on the latent space).\nThus, these should not be used for biologically-relevant factors that you do _not_ want to correct for.\n" - }, - - "obs_continuous_covariate": { - "type": "string", - "description": "Type: List of `string`, multiple_sep: `\":\"`. Keys in adata", - "help_text": "Type: List of `string`, multiple_sep: `\":\"`. Keys in adata.obs that correspond to continuous data. These covariates can be added in\naddition to the batch covariate and are also treated as nuisance factors\n(i.e., the model tries to minimize their effects on the latent space). Thus, these should not be\nused for biologically-relevant factors that you do _not_ want to correct for.\n" - } - - } - }, - "outputs" : { - "title": "Outputs", - "type": "object", - "description": "No description", - "properties": { - - "output": { - "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.output`. Output h5mu file", - "help_text": "Type: `file`, required, default: `$id.$key.output.output`. Output h5mu file.", - "default": "$id.$key.output.output" - }, - - "output_model": { - "type": "string", - "description": "Type: `file`, default: `$id.$key.output_model.output_model`. Folder where the state of the trained model will be saved to", - "help_text": "Type: `file`, default: `$id.$key.output_model.output_model`. Folder where the state of the trained model will be saved to.", - "default": "$id.$key.output_model.output_model" - }, - - "output_compression": { - "type": "string", - "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", - "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", - "enum": ["gzip", "lzf"] - - }, - - "obsm_output": { - "type": "string", - "description": "Type: `string`, default: `X_scvi_integrated`. In which ", - "help_text": "Type: `string`, default: `X_scvi_integrated`. In which .obsm slot to store the resulting integrated embedding.", - "default": "X_scvi_integrated" - } - - } - }, - "scvi options" : { - "title": "SCVI options", - "type": "object", - "description": "No description", - "properties": { - - "n_hidden_nodes": { - "type": "integer", - "description": "Type: `integer`, default: `128`. Number of nodes per hidden layer", - "help_text": "Type: `integer`, default: `128`. Number of nodes per hidden layer.", - "default": "128" - }, - - "n_dimensions_latent_space": { - "type": "integer", - "description": "Type: `integer`, default: `30`. Dimensionality of the latent space", - "help_text": "Type: `integer`, default: `30`. Dimensionality of the latent space.", - "default": "30" - }, - - "n_hidden_layers": { - "type": "integer", - "description": "Type: `integer`, default: `2`. Number of hidden layers used for encoder and decoder neural-networks", - "help_text": "Type: `integer`, default: `2`. Number of hidden layers used for encoder and decoder neural-networks.", - "default": "2" - }, - - "dropout_rate": { - "type": "number", - "description": "Type: `double`, default: `0.1`. Dropout rate for the neural networks", - "help_text": "Type: `double`, default: `0.1`. Dropout rate for the neural networks.", - "default": "0.1" - }, - - "dispersion": { - "type": "string", - "description": "Type: `string`, default: `gene`, choices: ``gene`, `gene-batch`, `gene-label`, `gene-cell``. Set the behavior for the dispersion for negative binomial distributions:\n- gene: dispersion parameter of negative binomial is constant per gene across cells\n- gene-batch: dispersion can differ between different batches\n- gene-label: dispersion can differ between different labels\n- gene-cell: dispersion can differ for every gene in every cell\n", - "help_text": "Type: `string`, default: `gene`, choices: ``gene`, `gene-batch`, `gene-label`, `gene-cell``. Set the behavior for the dispersion for negative binomial distributions:\n- gene: dispersion parameter of negative binomial is constant per gene across cells\n- gene-batch: dispersion can differ between different batches\n- gene-label: dispersion can differ between different labels\n- gene-cell: dispersion can differ for every gene in every cell\n", - "enum": ["gene", "gene-batch", "gene-label", "gene-cell"] - , - "default": "gene" - }, - - "gene_likelihood": { - "type": "string", - "description": "Type: `string`, default: `nb`, choices: ``nb`, `zinb`, `poisson``. Model used to generate the expression data from a count-based likelihood distribution", - "help_text": "Type: `string`, default: `nb`, choices: ``nb`, `zinb`, `poisson``. Model used to generate the expression data from a count-based likelihood distribution.\n- nb: Negative binomial distribution\n- zinb: Zero-inflated negative binomial distribution\n- poisson: Poisson distribution\n", - "enum": ["nb", "zinb", "poisson"] - , - "default": "nb" - } - - } - }, - "variational auto-encoder model options" : { - "title": "Variational auto-encoder model options", - "type": "object", - "description": "No description", - "properties": { - - "use_layer_normalization": { - "type": "string", - "description": "Type: `string`, default: `both`, choices: ``encoder`, `decoder`, `none`, `both``. Neural networks for which to enable layer normalization", - "help_text": "Type: `string`, default: `both`, choices: ``encoder`, `decoder`, `none`, `both``. Neural networks for which to enable layer normalization. \n", - "enum": ["encoder", "decoder", "none", "both"] - , - "default": "both" - }, - - "use_batch_normalization": { - "type": "string", - "description": "Type: `string`, default: `none`, choices: ``encoder`, `decoder`, `none`, `both``. Neural networks for which to enable batch normalization", - "help_text": "Type: `string`, default: `none`, choices: ``encoder`, `decoder`, `none`, `both``. Neural networks for which to enable batch normalization. \n", - "enum": ["encoder", "decoder", "none", "both"] - , - "default": "none" - }, - - "encode_covariates": { - "type": "boolean", - "description": "Type: `boolean_false`, default: `true`. Whether to concatenate covariates to expression in encoder", - "help_text": "Type: `boolean_false`, default: `true`. Whether to concatenate covariates to expression in encoder", - "default": "True" - }, - - "deeply_inject_covariates": { - "type": "boolean", - "description": "Type: `boolean_true`, default: `false`. Whether to concatenate covariates into output of hidden layers in encoder/decoder", - "help_text": "Type: `boolean_true`, default: `false`. Whether to concatenate covariates into output of hidden layers in encoder/decoder. \nThis option only applies when n_layers \u003e 1. The covariates are concatenated to\nthe input of subsequent hidden layers.\n", - "default": "False" - }, - - "use_observed_lib_size": { - "type": "boolean", - "description": "Type: `boolean_true`, default: `false`. Use observed library size for RNA as scaling factor in mean of conditional distribution", - "help_text": "Type: `boolean_true`, default: `false`. Use observed library size for RNA as scaling factor in mean of conditional distribution.\n", - "default": "False" - } - - } - }, - "early stopping arguments" : { - "title": "Early stopping arguments", - "type": "object", - "description": "No description", - "properties": { - - "early_stopping": { - "type": "boolean", - "description": "Type: `boolean`. Whether to perform early stopping with respect to the validation set", - "help_text": "Type: `boolean`. Whether to perform early stopping with respect to the validation set." - }, - - "early_stopping_monitor": { - "type": "string", - "description": "Type: `string`, default: `elbo_validation`, choices: ``elbo_validation`, `reconstruction_loss_validation`, `kl_local_validation``. Metric logged during validation set epoch", - "help_text": "Type: `string`, default: `elbo_validation`, choices: ``elbo_validation`, `reconstruction_loss_validation`, `kl_local_validation``. Metric logged during validation set epoch.", - "enum": ["elbo_validation", "reconstruction_loss_validation", "kl_local_validation"] - , - "default": "elbo_validation" - }, - - "early_stopping_patience": { - "type": "integer", - "description": "Type: `integer`, default: `45`. Number of validation epochs with no improvement after which training will be stopped", - "help_text": "Type: `integer`, default: `45`. Number of validation epochs with no improvement after which training will be stopped.", - "default": "45" - }, - - "early_stopping_min_delta": { - "type": "number", - "description": "Type: `double`, default: `0.0`. Minimum change in the monitored quantity to qualify as an improvement, i", - "help_text": "Type: `double`, default: `0.0`. Minimum change in the monitored quantity to qualify as an improvement, i.e. an absolute change of less than min_delta, will count as no improvement.", - "default": "0.0" - } - - } - }, - "learning parameters" : { - "title": "Learning parameters", - "type": "object", - "description": "No description", - "properties": { - - "max_epochs": { - "type": "integer", - "description": "Type: `integer`. Number of passes through the dataset, defaults to (20000 / number of cells) * 400 or 400; whichever is smallest", - "help_text": "Type: `integer`. Number of passes through the dataset, defaults to (20000 / number of cells) * 400 or 400; whichever is smallest." - }, - - "reduce_lr_on_plateau": { - "type": "boolean", - "description": "Type: `boolean`, default: `true`. Whether to monitor validation loss and reduce learning rate when validation set `lr_scheduler_metric` plateaus", - "help_text": "Type: `boolean`, default: `true`. Whether to monitor validation loss and reduce learning rate when validation set `lr_scheduler_metric` plateaus.", - "default": "True" - }, - - "lr_factor": { - "type": "number", - "description": "Type: `double`, default: `0.6`. Factor to reduce learning rate", - "help_text": "Type: `double`, default: `0.6`. Factor to reduce learning rate.", - "default": "0.6" - }, - - "lr_patience": { - "type": "number", - "description": "Type: `double`, default: `30`. Number of epochs with no improvement after which learning rate will be reduced", - "help_text": "Type: `double`, default: `30`. Number of epochs with no improvement after which learning rate will be reduced.", - "default": "30" - } - - } - }, - "data validition" : { - "title": "Data validition", - "type": "object", - "description": "No description", - "properties": { - - "n_obs_min_count": { - "type": "integer", - "description": "Type: `integer`, default: `0`. Minimum number of cells threshold ensuring that every obs_batch category has sufficient observations (cells) for model training", - "help_text": "Type: `integer`, default: `0`. Minimum number of cells threshold ensuring that every obs_batch category has sufficient observations (cells) for model training.", - "default": "0" - }, - - "n_var_min_count": { - "type": "integer", - "description": "Type: `integer`, default: `0`. Minimum number of genes threshold ensuring that every var_input filter has sufficient observations (genes) for model training", - "help_text": "Type: `integer`, default: `0`. Minimum number of genes threshold ensuring that every var_input filter has sufficient observations (genes) for model training.", - "default": "0" - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/inputs" - }, - { - "$ref": "#/definitions/outputs" - }, - { - "$ref": "#/definitions/scvi options" - }, - { - "$ref": "#/definitions/variational auto-encoder model options" - }, - { - "$ref": "#/definitions/early stopping arguments" - }, - { - "$ref": "#/definitions/learning parameters" - }, - { - "$ref": "#/definitions/data validition" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/integrate/scvi/subset_vars.py b/target/nextflow/integrate/scvi/subset_vars.py deleted file mode 100644 index 10011c8fcca..00000000000 --- a/target/nextflow/integrate/scvi/subset_vars.py +++ /dev/null @@ -1,16 +0,0 @@ -def subset_vars(adata, subset_col): - """Subset highly variable genes from AnnData object - - Parameters - ---------- - adata : AnnData - Annotated data object - subset_col : str - Name of the boolean column in `adata.var` that contains the information if features should be used or not - - Returns - ------- - AnnData - Copy of `adata` with subsetted features - """ - return adata[:, adata.var[subset_col]].copy() diff --git a/target/nextflow/integrate/totalvi/.config.vsh.yaml b/target/nextflow/integrate/totalvi/.config.vsh.yaml deleted file mode 100644 index b8dbc1cb07b..00000000000 --- a/target/nextflow/integrate/totalvi/.config.vsh.yaml +++ /dev/null @@ -1,348 +0,0 @@ -functionality: - name: "totalvi" - namespace: "integrate" - version: "0.12.3" - authors: - - name: "Vladimir Shitov" - info: - role: "Contributor" - links: - email: "vladimir.shitov@helmholtz-muenchen.de" - github: "vladimirshitov" - orcid: "0000-0002-1960-8812" - linkedin: "vladimir-shitov-9a659513b" - organizations: - - name: "Helmholtz Munich" - href: "https://www.helmholtz-munich.de" - role: "PhD Candidate" - argument_groups: - - name: "Inputs" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input h5mu file with query data to integrate with reference." - info: null - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--reference" - alternatives: - - "-r" - description: "Input h5mu file with reference data to train the TOTALVI model." - info: null - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--force_retrain" - alternatives: - - "-f" - description: "If true, retrain the model and save it to reference_model_path" - info: null - direction: "input" - dest: "par" - - type: "string" - name: "--query_modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--query_proteins_modality" - description: "Name of the modality in the input (query) h5mu file containing\ - \ protein data" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--reference_modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--reference_proteins_modality" - description: "Name of the modality containing proteins in the reference" - info: null - default: - - "prot" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--input_layer" - description: "Input layer to use. If None, X is used" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_batch" - description: "Column name discriminating between your batches." - info: null - default: - - "sample_id" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--var_input" - description: ".var column containing highly variable genes. By default, do not\ - \ subset genes." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Outputs" - arguments: - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output h5mu file." - info: null - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obsm_output" - description: "In which .obsm slot to store the resulting integrated embedding." - info: null - default: - - "X_integrated_totalvi" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obsm_normalized_rna_output" - description: "In which .obsm slot to store the normalized RNA from TOTALVI." - info: null - default: - - "X_totalvi_normalized_rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obsm_normalized_protein_output" - description: "In which .obsm slot to store the normalized protein data from\ - \ TOTALVI." - info: null - default: - - "X_totalvi_normalized_protein" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--reference_model_path" - description: "Directory with the reference model. If not exists, trained model\ - \ will be saved there" - info: null - default: - - "totalvi_model_reference" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--query_model_path" - description: "Directory, where the query model will be saved" - info: null - default: - - "totalvi_model_query" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Learning parameters" - arguments: - - type: "integer" - name: "--max_epochs" - description: "Number of passes through the dataset" - info: null - default: - - 400 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--max_query_epochs" - description: "Number of passes through the dataset, when fine-tuning model for\ - \ query" - info: null - default: - - 200 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--weight_decay" - description: "Weight decay, when fine-tuning model for query" - info: null - default: - - 0.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Performs mapping to the reference by totalvi model: https://docs.scvi-tools.org/en/stable/tutorials/notebooks/scarches_scvi_tools.html#Reference-mapping-with-TOTALVI" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.9" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "libopenblas-dev" - - "liblapack-dev" - - "gfortran" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "torchmetrics~=0.11.0" - - "scvi-tools~=1.0.3" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highmem" - - "highcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -- type: "native" - id: "native" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/integrate/totalvi/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/integrate/totalvi" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/integrate/totalvi/totalvi" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/integrate/totalvi/main.nf b/target/nextflow/integrate/totalvi/main.nf deleted file mode 100644 index df2d508e7ee..00000000000 --- a/target/nextflow/integrate/totalvi/main.nf +++ /dev/null @@ -1,2923 +0,0 @@ -// totalvi 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Vladimir Shitov - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "totalvi", - "namespace" : "integrate", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Vladimir Shitov", - "info" : { - "role" : "Contributor", - "links" : { - "email" : "vladimir.shitov@helmholtz-muenchen.de", - "github" : "vladimirshitov", - "orcid" : "0000-0002-1960-8812", - "linkedin" : "vladimir-shitov-9a659513b" - }, - "organizations" : [ - { - "name" : "Helmholtz Munich", - "href" : "https://www.helmholtz-munich.de", - "role" : "PhD Candidate" - } - ] - } - } - ], - "argument_groups" : [ - { - "name" : "Inputs", - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "alternatives" : [ - "-i" - ], - "description" : "Input h5mu file with query data to integrate with reference.", - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--reference", - "alternatives" : [ - "-r" - ], - "description" : "Input h5mu file with reference data to train the TOTALVI model.", - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "boolean_true", - "name" : "--force_retrain", - "alternatives" : [ - "-f" - ], - "description" : "If true, retrain the model and save it to reference_model_path", - "direction" : "input", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--query_modality", - "default" : [ - "rna" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--query_proteins_modality", - "description" : "Name of the modality in the input (query) h5mu file containing protein data", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--reference_modality", - "default" : [ - "rna" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--reference_proteins_modality", - "description" : "Name of the modality containing proteins in the reference", - "default" : [ - "prot" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--input_layer", - "description" : "Input layer to use. If None, X is used", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--obs_batch", - "description" : "Column name discriminating between your batches.", - "default" : [ - "sample_id" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--var_input", - "description" : ".var column containing highly variable genes. By default, do not subset genes.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Outputs", - "arguments" : [ - { - "type" : "file", - "name" : "--output", - "alternatives" : [ - "-o" - ], - "description" : "Output h5mu file.", - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--obsm_output", - "description" : "In which .obsm slot to store the resulting integrated embedding.", - "default" : [ - "X_integrated_totalvi" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--obsm_normalized_rna_output", - "description" : "In which .obsm slot to store the normalized RNA from TOTALVI.", - "default" : [ - "X_totalvi_normalized_rna" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--obsm_normalized_protein_output", - "description" : "In which .obsm slot to store the normalized protein data from TOTALVI.", - "default" : [ - "X_totalvi_normalized_protein" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--reference_model_path", - "description" : "Directory with the reference model. If not exists, trained model will be saved there", - "default" : [ - "totalvi_model_reference" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--query_model_path", - "description" : "Directory, where the query model will be saved", - "default" : [ - "totalvi_model_query" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Learning parameters", - "arguments" : [ - { - "type" : "integer", - "name" : "--max_epochs", - "description" : "Number of passes through the dataset", - "default" : [ - 400 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--max_query_epochs", - "description" : "Number of passes through the dataset, when fine-tuning model for query", - "default" : [ - 200 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--weight_decay", - "description" : "Weight decay, when fine-tuning model for query", - "default" : [ - 0.0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/integrate/totalvi/" - }, - { - "type" : "file", - "path" : "src/utils/setup_logger.py", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "description" : "Performs mapping to the reference by totalvi model: https://docs.scvi-tools.org/en/stable/tutorials/notebooks/scarches_scvi_tools.html#Reference-mapping-with-TOTALVI", - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/integrate/totalvi/" - }, - { - "type" : "file", - "path" : "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "python:3.9", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "libopenblas-dev", - "liblapack-dev", - "gfortran" - ], - "interactive" : false - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "mudata~=0.2.3", - "anndata~=0.9.1", - "torchmetrics~=0.11.0", - "scvi-tools~=1.0.3" - ], - "upgrade" : true - } - ], - "test_setup" : [ - { - "type" : "python", - "user" : false, - "packages" : [ - "viashpy==0.5.0" - ], - "upgrade" : true - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "highmem", - "highcpu" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - }, - { - "type" : "native", - "id" : "native" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/integrate/totalvi/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/integrate/totalvi", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -from typing import Tuple - -import os -import sys -import mudata -from anndata import AnnData # For type hints -from mudata import MuData # For type hints -import numpy as np -import scvi -from scipy.sparse import issparse - - -### VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'reference': $( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "r'${VIASH_PAR_REFERENCE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'force_retrain': $( if [ ! -z ${VIASH_PAR_FORCE_RETRAIN+x} ]; then echo "r'${VIASH_PAR_FORCE_RETRAIN//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), - 'query_modality': $( if [ ! -z ${VIASH_PAR_QUERY_MODALITY+x} ]; then echo "r'${VIASH_PAR_QUERY_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'query_proteins_modality': $( if [ ! -z ${VIASH_PAR_QUERY_PROTEINS_MODALITY+x} ]; then echo "r'${VIASH_PAR_QUERY_PROTEINS_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'reference_modality': $( if [ ! -z ${VIASH_PAR_REFERENCE_MODALITY+x} ]; then echo "r'${VIASH_PAR_REFERENCE_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'reference_proteins_modality': $( if [ ! -z ${VIASH_PAR_REFERENCE_PROTEINS_MODALITY+x} ]; then echo "r'${VIASH_PAR_REFERENCE_PROTEINS_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'input_layer': $( if [ ! -z ${VIASH_PAR_INPUT_LAYER+x} ]; then echo "r'${VIASH_PAR_INPUT_LAYER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'obs_batch': $( if [ ! -z ${VIASH_PAR_OBS_BATCH+x} ]; then echo "r'${VIASH_PAR_OBS_BATCH//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'var_input': $( if [ ! -z ${VIASH_PAR_VAR_INPUT+x} ]; then echo "r'${VIASH_PAR_VAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'obsm_output': $( if [ ! -z ${VIASH_PAR_OBSM_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'obsm_normalized_rna_output': $( if [ ! -z ${VIASH_PAR_OBSM_NORMALIZED_RNA_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_NORMALIZED_RNA_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'obsm_normalized_protein_output': $( if [ ! -z ${VIASH_PAR_OBSM_NORMALIZED_PROTEIN_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_NORMALIZED_PROTEIN_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'reference_model_path': $( if [ ! -z ${VIASH_PAR_REFERENCE_MODEL_PATH+x} ]; then echo "r'${VIASH_PAR_REFERENCE_MODEL_PATH//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'query_model_path': $( if [ ! -z ${VIASH_PAR_QUERY_MODEL_PATH+x} ]; then echo "r'${VIASH_PAR_QUERY_MODEL_PATH//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'max_epochs': $( if [ ! -z ${VIASH_PAR_MAX_EPOCHS+x} ]; then echo "int(r'${VIASH_PAR_MAX_EPOCHS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'max_query_epochs': $( if [ ! -z ${VIASH_PAR_MAX_QUERY_EPOCHS+x} ]; then echo "int(r'${VIASH_PAR_MAX_QUERY_EPOCHS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'weight_decay': $( if [ ! -z ${VIASH_PAR_WEIGHT_DECAY+x} ]; then echo "float(r'${VIASH_PAR_WEIGHT_DECAY//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -### VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -def align_proteins_names(adata_reference: AnnData, mdata_query: MuData, adata_query: AnnData, reference_proteins_key: str, query_proteins_key: str) -> AnnData: - """Make sure that proteins are located in the same .obsm slot in reference and query. Pad query proteins with zeros if they are absent""" - proteins_reference = adata_reference.obsm[reference_proteins_key] - - # If query has no protein data, put matrix of zeros - if not query_proteins_key or query_proteins_key not in mdata_query.mod: - adata_query.obsm[reference_proteins_key] = np.zeros((adata_query.n_obs, proteins_reference.shape[1])) - else: - # Make sure that proteins expression has the same key in query and reference - adata_query.obsm[reference_proteins_key] = adata_query.obsm[query_proteins_key] - - return adata_query - - -def extract_proteins_to_anndata(mdata: MuData, rna_modality_key, protein_modality_key, input_layer, hvg_var_key=None) -> AnnData: - """TOTALVI requires data to be stored in AnnData format with protein counts in .obsm slot. This function performs the conversion""" - adata: AnnData = mdata.mod[rna_modality_key].copy() - - if hvg_var_key: - selected_genes = adata.var_names[adata.var[hvg_var_key]] - adata = adata[:, selected_genes].copy() - - if protein_modality_key in mdata.mod: - # Put the proteins modality into .obsm slot - proteins_reference_adata = mdata.mod[protein_modality_key].copy() - - if input_layer is None: - proteins = proteins_reference_adata.X - else: - proteins = proteins_reference_adata.obsm[input_layer] - - if issparse(proteins): - proteins = proteins.toarray() - - adata.obsm[protein_modality_key] = proteins - - return adata - - -def build_reference_model(adata_reference: AnnData, max_train_epochs: int = 400) -> scvi.model.TOTALVI: - - vae_reference = scvi.model.TOTALVI(adata_reference, use_layer_norm="both", use_batch_norm="none") - vae_reference.train(max_train_epochs) - - vae_reference.save(par["reference_model_path"]) - - return vae_reference - -def is_retraining_model() -> bool: - """Decide, whether reference model should be trained. It happens when no model exists or force_retrain flag is on""" - - trained_model_exists = os.path.isdir(par["reference_model_path"]) and ("model.pt" in os.listdir(par["reference_model_path"])) - return not trained_model_exists or par["force_retrain"] - - -def map_query_to_reference(mdata_reference: MuData, mdata_query: MuData, adata_query: AnnData) -> Tuple[scvi.model.TOTALVI, AnnData]: - """Build model on the provided reference if necessary, and map query to the reference""" - - adata_reference: AnnData = extract_proteins_to_anndata(mdata_reference, rna_modality_key=par["reference_modality"], protein_modality_key=par["reference_proteins_modality"], - input_layer=par["input_layer"], hvg_var_key=par["var_input"]) - - scvi.model.TOTALVI.setup_anndata( - adata_reference, - batch_key=par["obs_batch"], - protein_expression_obsm_key=par["reference_proteins_modality"] - ) - - if is_retraining_model(): - vae_reference = build_reference_model(adata_reference, max_train_epochs=par["max_epochs"]) - else: - vae_reference = scvi.model.TOTALVI.load(dir_path=par["reference_model_path"], adata=adata_reference) - - adata_query: AnnData = align_proteins_names(adata_reference, mdata_query, adata_query, reference_proteins_key=par["reference_proteins_modality"], - query_proteins_key=par["query_proteins_modality"]) - - # Reorder genes and pad missing genes with 0s - scvi.model.TOTALVI.prepare_query_anndata(adata_query, vae_reference) - - # Train the model for query - vae_query = scvi.model.TOTALVI.load_query_data( - adata_query, - vae_reference - ) - vae_query.train(par["max_query_epochs"], plan_kwargs=dict(weight_decay=par["weight_decay"])) - - return vae_query, adata_query - -def main(): - mdata_query = mudata.read(par["input"].strip()) - adata_query = extract_proteins_to_anndata(mdata_query, - rna_modality_key=par["query_modality"], - protein_modality_key=par["query_proteins_modality"], - input_layer=par["input_layer"], - hvg_var_key=par["var_input"]) - - if par["reference"].endswith(".h5mu"): - logger.info("Reading reference") - mdata_reference = mudata.read(par["reference"].strip()) - - logger.info("Mapping query to the reference") - vae_query, adata_query = map_query_to_reference(mdata_reference, mdata_query, adata_query) - else: - raise ValueError("Incorrect format of reference, please provide a .h5mu file") - - adata_query.uns["integration_method"] = "totalvi" - - logger.info("Getting the latent representation of query") - mdata_query.mod[par["query_modality"]].obsm[par["obsm_output"]] = vae_query.get_latent_representation() - - norm_rna, norm_protein = vae_query.get_normalized_expression() - mdata_query.mod[par["query_modality"]].obsm[par["obsm_normalized_rna_output"]] = norm_rna.to_numpy() - - if par["query_proteins_modality"] in mdata_query.mod: - mdata_query.mod[par["query_proteins_modality"]].obsm[par["obsm_normalized_protein_output"]] = norm_protein.to_numpy() - - logger.info("Updating mdata") - mdata_query.update() - - logger.info("Saving updated query data") - mdata_query.write_h5mu(par["output"].strip()) - - logger.info("Saving query model") - vae_query.save(par["query_model_path"], overwrite=True) - -if __name__ == "__main__": - main() -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/integrate_totalvi", - "tag" : "0.12.0" - }, - "label" : [ - "highmem", - "highcpu" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/integrate/totalvi/nextflow.config b/target/nextflow/integrate/totalvi/nextflow.config deleted file mode 100644 index fced150c165..00000000000 --- a/target/nextflow/integrate/totalvi/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'totalvi' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Performs mapping to the reference by totalvi model: https://docs.scvi-tools.org/en/stable/tutorials/notebooks/scarches_scvi_tools.html#Reference-mapping-with-TOTALVI' - author = 'Vladimir Shitov' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/integrate/totalvi/nextflow_params.yaml b/target/nextflow/integrate/totalvi/nextflow_params.yaml deleted file mode 100644 index 5cff159b5a1..00000000000 --- a/target/nextflow/integrate/totalvi/nextflow_params.yaml +++ /dev/null @@ -1,28 +0,0 @@ -# Inputs -input: # please fill in - example: "path/to/file" -reference: # please fill in - example: "path/to/file" -force_retrain: false -query_modality: "rna" -# query_proteins_modality: "foo" -reference_modality: "rna" -reference_proteins_modality: "prot" -# input_layer: "foo" -obs_batch: "sample_id" -# var_input: "foo" - -# Outputs -# output: "$id.$key.output.output" -obsm_output: "X_integrated_totalvi" -obsm_normalized_rna_output: "X_totalvi_normalized_rna" -obsm_normalized_protein_output: "X_totalvi_normalized_protein" -# reference_model_path: "$id.$key.reference_model_path.reference_model_path" -# query_model_path: "$id.$key.query_model_path.query_model_path" - -# Learning parameters -max_epochs: 400 -max_query_epochs: 200 -weight_decay: 0.0 - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/integrate/totalvi/nextflow_schema.json b/target/nextflow/integrate/totalvi/nextflow_schema.json deleted file mode 100644 index 851c672489f..00000000000 --- a/target/nextflow/integrate/totalvi/nextflow_schema.json +++ /dev/null @@ -1,195 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "totalvi", - "description": "Performs mapping to the reference by totalvi model: https://docs.scvi-tools.org/en/stable/tutorials/notebooks/scarches_scvi_tools.html#Reference-mapping-with-TOTALVI", - "type": "object", - "definitions": { - "inputs" : { - "title": "Inputs", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: `file`, required. Input h5mu file with query data to integrate with reference", - "help_text": "Type: `file`, required. Input h5mu file with query data to integrate with reference." - }, - - "reference": { - "type": "string", - "description": "Type: `file`, required. Input h5mu file with reference data to train the TOTALVI model", - "help_text": "Type: `file`, required. Input h5mu file with reference data to train the TOTALVI model." - }, - - "force_retrain": { - "type": "boolean", - "description": "Type: `boolean_true`, default: `false`. If true, retrain the model and save it to reference_model_path", - "help_text": "Type: `boolean_true`, default: `false`. If true, retrain the model and save it to reference_model_path", - "default": "False" - }, - - "query_modality": { - "type": "string", - "description": "Type: `string`, default: `rna`. ", - "help_text": "Type: `string`, default: `rna`. ", - "default": "rna" - }, - - "query_proteins_modality": { - "type": "string", - "description": "Type: `string`. Name of the modality in the input (query) h5mu file containing protein data", - "help_text": "Type: `string`. Name of the modality in the input (query) h5mu file containing protein data" - }, - - "reference_modality": { - "type": "string", - "description": "Type: `string`, default: `rna`. ", - "help_text": "Type: `string`, default: `rna`. ", - "default": "rna" - }, - - "reference_proteins_modality": { - "type": "string", - "description": "Type: `string`, default: `prot`. Name of the modality containing proteins in the reference", - "help_text": "Type: `string`, default: `prot`. Name of the modality containing proteins in the reference", - "default": "prot" - }, - - "input_layer": { - "type": "string", - "description": "Type: `string`. Input layer to use", - "help_text": "Type: `string`. Input layer to use. If None, X is used" - }, - - "obs_batch": { - "type": "string", - "description": "Type: `string`, default: `sample_id`. Column name discriminating between your batches", - "help_text": "Type: `string`, default: `sample_id`. Column name discriminating between your batches.", - "default": "sample_id" - }, - - "var_input": { - "type": "string", - "description": "Type: `string`. ", - "help_text": "Type: `string`. .var column containing highly variable genes. By default, do not subset genes." - } - - } - }, - "outputs" : { - "title": "Outputs", - "type": "object", - "description": "No description", - "properties": { - - "output": { - "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.output`. Output h5mu file", - "help_text": "Type: `file`, required, default: `$id.$key.output.output`. Output h5mu file.", - "default": "$id.$key.output.output" - }, - - "obsm_output": { - "type": "string", - "description": "Type: `string`, default: `X_integrated_totalvi`. In which ", - "help_text": "Type: `string`, default: `X_integrated_totalvi`. In which .obsm slot to store the resulting integrated embedding.", - "default": "X_integrated_totalvi" - }, - - "obsm_normalized_rna_output": { - "type": "string", - "description": "Type: `string`, default: `X_totalvi_normalized_rna`. In which ", - "help_text": "Type: `string`, default: `X_totalvi_normalized_rna`. In which .obsm slot to store the normalized RNA from TOTALVI.", - "default": "X_totalvi_normalized_rna" - }, - - "obsm_normalized_protein_output": { - "type": "string", - "description": "Type: `string`, default: `X_totalvi_normalized_protein`. In which ", - "help_text": "Type: `string`, default: `X_totalvi_normalized_protein`. In which .obsm slot to store the normalized protein data from TOTALVI.", - "default": "X_totalvi_normalized_protein" - }, - - "reference_model_path": { - "type": "string", - "description": "Type: `file`, default: `$id.$key.reference_model_path.reference_model_path`. Directory with the reference model", - "help_text": "Type: `file`, default: `$id.$key.reference_model_path.reference_model_path`. Directory with the reference model. If not exists, trained model will be saved there", - "default": "$id.$key.reference_model_path.reference_model_path" - }, - - "query_model_path": { - "type": "string", - "description": "Type: `file`, default: `$id.$key.query_model_path.query_model_path`. Directory, where the query model will be saved", - "help_text": "Type: `file`, default: `$id.$key.query_model_path.query_model_path`. Directory, where the query model will be saved", - "default": "$id.$key.query_model_path.query_model_path" - } - - } - }, - "learning parameters" : { - "title": "Learning parameters", - "type": "object", - "description": "No description", - "properties": { - - "max_epochs": { - "type": "integer", - "description": "Type: `integer`, default: `400`. Number of passes through the dataset", - "help_text": "Type: `integer`, default: `400`. Number of passes through the dataset", - "default": "400" - }, - - "max_query_epochs": { - "type": "integer", - "description": "Type: `integer`, default: `200`. Number of passes through the dataset, when fine-tuning model for query", - "help_text": "Type: `integer`, default: `200`. Number of passes through the dataset, when fine-tuning model for query", - "default": "200" - }, - - "weight_decay": { - "type": "number", - "description": "Type: `double`, default: `0.0`. Weight decay, when fine-tuning model for query", - "help_text": "Type: `double`, default: `0.0`. Weight decay, when fine-tuning model for query", - "default": "0.0" - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/inputs" - }, - { - "$ref": "#/definitions/outputs" - }, - { - "$ref": "#/definitions/learning parameters" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/integrate/totalvi/setup_logger.py b/target/nextflow/integrate/totalvi/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/nextflow/integrate/totalvi/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/nextflow/interpret/lianapy/.config.vsh.yaml b/target/nextflow/interpret/lianapy/.config.vsh.yaml deleted file mode 100644 index 1fe32159ae0..00000000000 --- a/target/nextflow/interpret/lianapy/.config.vsh.yaml +++ /dev/null @@ -1,313 +0,0 @@ -functionality: - name: "lianapy" - namespace: "interpret" - version: "0.12.3" - authors: - - name: "Mauro Saporita" - roles: - - "author" - info: - role: "Contributor" - links: - email: "maurosaporita@gmail.com" - github: "mauro-saporita" - linkedin: "mauro-saporita-930b06a5" - organizations: - - name: "Ardigen" - href: "https://ardigen.com" - role: "Lead Nextflow Developer" - - name: "Povilas Gibas" - roles: - - "author" - info: - role: "Contributor" - links: - email: "povilasgibas@gmail.com" - github: "PoGibas" - linkedin: "povilas-gibas" - organizations: - - name: "Ardigen" - href: "https://ardigen.com" - role: "Bioinformatician" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input h5mu file" - info: null - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output h5mu file." - info: null - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - info: null - default: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--layer" - description: "Layer in anndata.AnnData.layers to use. If None, use mudata.mod[modality].X." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--groupby" - description: "The key of the observations grouping to consider." - info: null - default: - - "bulk_labels" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--resource_name" - description: "Name of the resource to be loaded and use for ligand-receptor inference." - info: null - default: - - "consensus" - required: false - choices: - - "baccin2019" - - "cellcall" - - "cellchatdb" - - "cellinker" - - "cellphonedb" - - "celltalkdb" - - "connectomedb2020" - - "consensus" - - "embrace" - - "guide2pharma" - - "hpmr" - - "icellnet" - - "italk" - - "kirouac2010" - - "lrdb" - - "mouseconsensus" - - "ramilowski2015" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--gene_symbol" - description: "Column name in var DataFrame in which gene symbol are stored." - info: null - default: - - "gene_symbol" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--expr_prop" - description: "Minimum expression proportion for the ligands/receptors (and their\ - \ subunits) in the corresponding cell identities. Set to '0', to return unfiltered\ - \ results." - info: null - default: - - 0.1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--min_cells" - description: "Minimum cells per cell identity ('groupby') to be considered for\ - \ downstream analysis." - info: null - default: - - 5 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--aggregate_method" - description: "Method aggregation approach, one of ['mean', 'rra'], where 'mean'\ - \ represents the mean rank, while 'rra' is the RobustRankAggregate (Kolde et\ - \ al., 2014) of the interactions." - info: null - default: - - "rra" - required: false - choices: - - "mean" - - "rra" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean" - name: "--return_all_lrs" - description: "Bool whether to return all LRs, or only those that surpass the 'expr_prop'\ - \ threshold. Those interactions that do not pass the 'expr_prop' threshold will\ - \ be assigned to the *worst* score of the ones that do. 'False' by default." - info: null - default: - - false - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--n_perms" - description: "Number of permutations for the permutation test. Note that this\ - \ is relevant only for permutation-based methods - e.g. 'CellPhoneDB" - info: null - default: - - 100 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - description: "Performs LIANA integration based as described in https://github.com/saezlab/liana-py" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - - "git" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "liana~=0.1.9" - - "numpy~=1.24.3" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "lowmem" - - "lowcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/interpret/lianapy/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/interpret/lianapy" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/interpret/lianapy/lianapy" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/interpret/lianapy/main.nf b/target/nextflow/interpret/lianapy/main.nf deleted file mode 100644 index 32a8fe671ae..00000000000 --- a/target/nextflow/interpret/lianapy/main.nf +++ /dev/null @@ -1,2757 +0,0 @@ -// lianapy 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Mauro Saporita (author) -// * Povilas Gibas (author) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "lianapy", - "namespace" : "interpret", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Mauro Saporita", - "roles" : [ - "author" - ], - "info" : { - "role" : "Contributor", - "links" : { - "email" : "maurosaporita@gmail.com", - "github" : "mauro-saporita", - "linkedin" : "mauro-saporita-930b06a5" - }, - "organizations" : [ - { - "name" : "Ardigen", - "href" : "https://ardigen.com", - "role" : "Lead Nextflow Developer" - } - ] - } - }, - { - "name" : "Povilas Gibas", - "roles" : [ - "author" - ], - "info" : { - "role" : "Contributor", - "links" : { - "email" : "povilasgibas@gmail.com", - "github" : "PoGibas", - "linkedin" : "povilas-gibas" - }, - "organizations" : [ - { - "name" : "Ardigen", - "href" : "https://ardigen.com", - "role" : "Bioinformatician" - } - ] - } - } - ], - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "alternatives" : [ - "-i" - ], - "description" : "Input h5mu file", - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--output", - "alternatives" : [ - "-o" - ], - "description" : "Output h5mu file.", - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_compression", - "default" : [ - "gzip" - ], - "required" : false, - "choices" : [ - "gzip", - "lzf" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--modality", - "default" : [ - "rna" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--layer", - "description" : "Layer in anndata.AnnData.layers to use. If None, use mudata.mod[modality].X.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--groupby", - "description" : "The key of the observations grouping to consider.", - "default" : [ - "bulk_labels" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--resource_name", - "description" : "Name of the resource to be loaded and use for ligand-receptor inference.", - "default" : [ - "consensus" - ], - "required" : false, - "choices" : [ - "baccin2019", - "cellcall", - "cellchatdb", - "cellinker", - "cellphonedb", - "celltalkdb", - "connectomedb2020", - "consensus", - "embrace", - "guide2pharma", - "hpmr", - "icellnet", - "italk", - "kirouac2010", - "lrdb", - "mouseconsensus", - "ramilowski2015" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--gene_symbol", - "description" : "Column name in var DataFrame in which gene symbol are stored.", - "default" : [ - "gene_symbol" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--expr_prop", - "description" : "Minimum expression proportion for the ligands/receptors (and their subunits) in the corresponding cell identities. Set to '0', to return unfiltered results.", - "default" : [ - 0.1 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--min_cells", - "description" : "Minimum cells per cell identity ('groupby') to be considered for downstream analysis.", - "default" : [ - 5 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--aggregate_method", - "description" : "Method aggregation approach, one of ['mean', 'rra'], where 'mean' represents the mean rank, while 'rra' is the RobustRankAggregate (Kolde et al., 2014) of the interactions.", - "default" : [ - "rra" - ], - "required" : false, - "choices" : [ - "mean", - "rra" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "boolean", - "name" : "--return_all_lrs", - "description" : "Bool whether to return all LRs, or only those that surpass the 'expr_prop' threshold. Those interactions that do not pass the 'expr_prop' threshold will be assigned to the *worst* score of the ones that do. 'False' by default.", - "default" : [ - false - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--n_perms", - "description" : "Number of permutations for the permutation test. Note that this is relevant only for permutation-based methods - e.g. 'CellPhoneDB", - "default" : [ - 100 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/interpret/lianapy/" - } - ], - "description" : "Performs LIANA integration based as described in https://github.com/saezlab/liana-py", - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/interpret/lianapy/" - }, - { - "type" : "file", - "path" : "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "python:3.10-slim", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "procps", - "git" - ], - "interactive" : false - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "mudata~=0.2.3", - "anndata~=0.9.1", - "liana~=0.1.9", - "numpy~=1.24.3" - ], - "upgrade" : true - } - ], - "test_setup" : [ - { - "type" : "python", - "user" : false, - "packages" : [ - "viashpy==0.5.0" - ], - "upgrade" : true - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "lowmem", - "lowcpu" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/interpret/lianapy/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/interpret/lianapy", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -import liana -import mudata -# TODO: Remove when grouping labels exist -# For sign/PCA/ -import numpy as np - -### VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'layer': $( if [ ! -z ${VIASH_PAR_LAYER+x} ]; then echo "r'${VIASH_PAR_LAYER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'groupby': $( if [ ! -z ${VIASH_PAR_GROUPBY+x} ]; then echo "r'${VIASH_PAR_GROUPBY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resource_name': $( if [ ! -z ${VIASH_PAR_RESOURCE_NAME+x} ]; then echo "r'${VIASH_PAR_RESOURCE_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'gene_symbol': $( if [ ! -z ${VIASH_PAR_GENE_SYMBOL+x} ]; then echo "r'${VIASH_PAR_GENE_SYMBOL//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'expr_prop': $( if [ ! -z ${VIASH_PAR_EXPR_PROP+x} ]; then echo "float(r'${VIASH_PAR_EXPR_PROP//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'min_cells': $( if [ ! -z ${VIASH_PAR_MIN_CELLS+x} ]; then echo "int(r'${VIASH_PAR_MIN_CELLS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'aggregate_method': $( if [ ! -z ${VIASH_PAR_AGGREGATE_METHOD+x} ]; then echo "r'${VIASH_PAR_AGGREGATE_METHOD//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'return_all_lrs': $( if [ ! -z ${VIASH_PAR_RETURN_ALL_LRS+x} ]; then echo "r'${VIASH_PAR_RETURN_ALL_LRS//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), - 'n_perms': $( if [ ! -z ${VIASH_PAR_N_PERMS+x} ]; then echo "int(r'${VIASH_PAR_N_PERMS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -### VIASH END - - -def main(): - - # Get input data - mdata = mudata.read(par['input'].strip()) - mod = mdata.mod[par['modality']] - - # Add dummy grouping labels when they do not exist - if par['groupby'] not in mod.obs: - foo = mod.obsm.to_df().iloc[:, 0] - mod.obs[par['groupby']] = np.sign(foo).astype('category') - - # Solve gene labels - orig_gene_label = mod.var.index - mod.var_names = mod.var[par['gene_symbol']].astype(str) - mod.var_names_make_unique() - - liana.mt.rank_aggregate( - adata = mod, - groupby = par['groupby'], - resource_name = par["resource_name"], - expr_prop = par["expr_prop"], - min_cells = par["min_cells"], - aggregate_method = par["aggregate_method"], - return_all_lrs = par["return_all_lrs"], - layer = par["layer"], - n_perms = par["n_perms"], - verbose = True, - inplace = True, - use_raw = False - ) - - # Return original gene labels - mod.var_names = orig_gene_label - - # TODO: make sure compression is needed - mdata.write_h5mu(par['output'].strip(), compression=par['output_compression']) - -if __name__ == "__main__": - main() -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/interpret_lianapy", - "tag" : "0.12.0" - }, - "label" : [ - "lowmem", - "lowcpu" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/interpret/lianapy/nextflow.config b/target/nextflow/interpret/lianapy/nextflow.config deleted file mode 100644 index a30996e01ef..00000000000 --- a/target/nextflow/interpret/lianapy/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'lianapy' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Performs LIANA integration based as described in https://github.com/saezlab/liana-py' - author = 'Mauro Saporita, Povilas Gibas' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/interpret/lianapy/nextflow_params.yaml b/target/nextflow/interpret/lianapy/nextflow_params.yaml deleted file mode 100644 index 5d90f8fa00b..00000000000 --- a/target/nextflow/interpret/lianapy/nextflow_params.yaml +++ /dev/null @@ -1,18 +0,0 @@ -# Arguments -input: # please fill in - example: "path/to/file" -# output: "$id.$key.output.output" -output_compression: "gzip" -modality: "rna" -# layer: "foo" -groupby: "bulk_labels" -resource_name: "consensus" -gene_symbol: "gene_symbol" -expr_prop: 0.1 -min_cells: 5 -aggregate_method: "rra" -return_all_lrs: false -n_perms: 100 - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/interpret/lianapy/nextflow_schema.json b/target/nextflow/interpret/lianapy/nextflow_schema.json deleted file mode 100644 index b9995300f44..00000000000 --- a/target/nextflow/interpret/lianapy/nextflow_schema.json +++ /dev/null @@ -1,140 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "lianapy", - "description": "Performs LIANA integration based as described in https://github.com/saezlab/liana-py", - "type": "object", - "definitions": { - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: `file`, required. Input h5mu file", - "help_text": "Type: `file`, required. Input h5mu file" - }, - - "output": { - "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.output`. Output h5mu file", - "help_text": "Type: `file`, required, default: `$id.$key.output.output`. Output h5mu file.", - "default": "$id.$key.output.output" - }, - - "output_compression": { - "type": "string", - "description": "Type: `string`, default: `gzip`, choices: ``gzip`, `lzf``. ", - "help_text": "Type: `string`, default: `gzip`, choices: ``gzip`, `lzf``. ", - "enum": ["gzip", "lzf"] - , - "default": "gzip" - }, - - "modality": { - "type": "string", - "description": "Type: `string`, default: `rna`. ", - "help_text": "Type: `string`, default: `rna`. ", - "default": "rna" - }, - - "layer": { - "type": "string", - "description": "Type: `string`. Layer in anndata", - "help_text": "Type: `string`. Layer in anndata.AnnData.layers to use. If None, use mudata.mod[modality].X." - }, - - "groupby": { - "type": "string", - "description": "Type: `string`, default: `bulk_labels`. The key of the observations grouping to consider", - "help_text": "Type: `string`, default: `bulk_labels`. The key of the observations grouping to consider.", - "default": "bulk_labels" - }, - - "resource_name": { - "type": "string", - "description": "Type: `string`, default: `consensus`, choices: ``baccin2019`, `cellcall`, `cellchatdb`, `cellinker`, `cellphonedb`, `celltalkdb`, `connectomedb2020`, `consensus`, `embrace`, `guide2pharma`, `hpmr`, `icellnet`, `italk`, `kirouac2010`, `lrdb`, `mouseconsensus`, `ramilowski2015``. Name of the resource to be loaded and use for ligand-receptor inference", - "help_text": "Type: `string`, default: `consensus`, choices: ``baccin2019`, `cellcall`, `cellchatdb`, `cellinker`, `cellphonedb`, `celltalkdb`, `connectomedb2020`, `consensus`, `embrace`, `guide2pharma`, `hpmr`, `icellnet`, `italk`, `kirouac2010`, `lrdb`, `mouseconsensus`, `ramilowski2015``. Name of the resource to be loaded and use for ligand-receptor inference.", - "enum": ["baccin2019", "cellcall", "cellchatdb", "cellinker", "cellphonedb", "celltalkdb", "connectomedb2020", "consensus", "embrace", "guide2pharma", "hpmr", "icellnet", "italk", "kirouac2010", "lrdb", "mouseconsensus", "ramilowski2015"] - , - "default": "consensus" - }, - - "gene_symbol": { - "type": "string", - "description": "Type: `string`, default: `gene_symbol`. Column name in var DataFrame in which gene symbol are stored", - "help_text": "Type: `string`, default: `gene_symbol`. Column name in var DataFrame in which gene symbol are stored.", - "default": "gene_symbol" - }, - - "expr_prop": { - "type": "number", - "description": "Type: `double`, default: `0.1`. Minimum expression proportion for the ligands/receptors (and their subunits) in the corresponding cell identities", - "help_text": "Type: `double`, default: `0.1`. Minimum expression proportion for the ligands/receptors (and their subunits) in the corresponding cell identities. Set to \u00270\u0027, to return unfiltered results.", - "default": "0.1" - }, - - "min_cells": { - "type": "integer", - "description": "Type: `integer`, default: `5`. Minimum cells per cell identity (\u0027groupby\u0027) to be considered for downstream analysis", - "help_text": "Type: `integer`, default: `5`. Minimum cells per cell identity (\u0027groupby\u0027) to be considered for downstream analysis.", - "default": "5" - }, - - "aggregate_method": { - "type": "string", - "description": "Type: `string`, default: `rra`, choices: ``mean`, `rra``. Method aggregation approach, one of [\u0027mean\u0027, \u0027rra\u0027], where \u0027mean\u0027 represents the mean rank, while \u0027rra\u0027 is the RobustRankAggregate (Kolde et al", - "help_text": "Type: `string`, default: `rra`, choices: ``mean`, `rra``. Method aggregation approach, one of [\u0027mean\u0027, \u0027rra\u0027], where \u0027mean\u0027 represents the mean rank, while \u0027rra\u0027 is the RobustRankAggregate (Kolde et al., 2014) of the interactions.", - "enum": ["mean", "rra"] - , - "default": "rra" - }, - - "return_all_lrs": { - "type": "boolean", - "description": "Type: `boolean`, default: `false`. Bool whether to return all LRs, or only those that surpass the \u0027expr_prop\u0027 threshold", - "help_text": "Type: `boolean`, default: `false`. Bool whether to return all LRs, or only those that surpass the \u0027expr_prop\u0027 threshold. Those interactions that do not pass the \u0027expr_prop\u0027 threshold will be assigned to the *worst* score of the ones that do. \u0027False\u0027 by default.", - "default": "False" - }, - - "n_perms": { - "type": "integer", - "description": "Type: `integer`, default: `100`. Number of permutations for the permutation test", - "help_text": "Type: `integer`, default: `100`. Number of permutations for the permutation test. Note that this is relevant only for permutation-based methods - e.g. \u0027CellPhoneDB", - "default": "100" - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/labels_transfer/knn/.config.vsh.yaml b/target/nextflow/labels_transfer/knn/.config.vsh.yaml deleted file mode 100644 index 8f099934250..00000000000 --- a/target/nextflow/labels_transfer/knn/.config.vsh.yaml +++ /dev/null @@ -1,379 +0,0 @@ -functionality: - name: "knn" - namespace: "labels_transfer" - version: "0.12.3" - authors: - - name: "Vladimir Shitov" - roles: - - "author" - info: - role: "Contributor" - links: - email: "vladimir.shitov@helmholtz-muenchen.de" - github: "vladimirshitov" - orcid: "0000-0002-1960-8812" - linkedin: "vladimir-shitov-9a659513b" - organizations: - - name: "Helmholtz Munich" - href: "https://www.helmholtz-munich.de" - role: "PhD Candidate" - argument_groups: - - name: "Input dataset (query) arguments" - arguments: - - type: "file" - name: "--input" - description: "The query data to transfer the labels to. Should be a .h5mu file." - info: - label: "Query" - file_format: - type: "h5mu" - mod: - rna: - description: "Modality in AnnData format containing RNA data." - required: true - slots: - X: - type: "double" - name: "features" - required: false - description: "The expression data to use for the classifier's inference,\ - \ if `--input_obsm_features` argument is not provided.\n" - obsm: - - type: "double" - name: "features" - example: "X_integrated_scanvi" - required: false - description: "The embedding to use for the classifier's inference.\ - \ Override using the `--input_obsm_features` argument. If not\ - \ provided, the `.X` slot will be used instead.\nMake sure that\ - \ embedding was obtained in the same way as the reference embedding\ - \ (e.g. by the same model or preprocessing).\n" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - description: "Which modality to use." - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--input_obsm_features" - description: "The `.obsm` key of the embedding to use for the classifier's inference.\ - \ If not provided, the `.X` slot will be used instead.\nMake sure that embedding\ - \ was obtained in the same way as the reference embedding (e.g. by the same\ - \ model or preprocessing).\n" - info: null - example: - - "X_integrated_scanvi" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Reference dataset arguments" - arguments: - - type: "file" - name: "--reference" - description: "The reference data to train classifiers on." - info: - label: "Reference" - file_format: - type: "h5ad" - X: - type: "double" - name: "features" - required: false - description: "The expression data to use for the classifier's training,\ - \ if `--input_obsm_features` argument is not provided.\n" - obsm: - - type: "double" - name: "features" - example: "X_integrated_scanvi" - description: "The embedding to use for the classifier's training. Override\ - \ using the `--reference_obsm_features` argument.\nMake sure that embedding\ - \ was obtained in the same way as the query embedding (e.g. by the same\ - \ model or preprocessing).\n" - required: true - obs: - - type: "string" - name: "targets" - multiple: true - example: - - "ann_level_1" - - "ann_level_2" - - "ann_level_3" - - "ann_level_4" - - "ann_level_5" - - "ann_finest_level" - description: "The target labels to transfer. Override using the `--reference_obs_targets`\ - \ argument." - required: true - example: - - "https:/zenodo.org/record/6337966/files/HLCA_emb_and_metadata.h5ad" - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--reference_obsm_features" - description: "The `.obsm` key of the embedding to use for the classifier's training.\n\ - Make sure that embedding was obtained in the same way as the query embedding\ - \ (e.g. by the same model or preprocessing).\n" - info: null - default: - - "X_integrated_scanvi" - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--reference_obs_targets" - description: "The `.obs` key of the target labels to tranfer." - info: null - default: - - "ann_level_1" - - "ann_level_2" - - "ann_level_3" - - "ann_level_4" - - "ann_level_5" - - "ann_finest_level" - required: false - direction: "input" - multiple: true - multiple_sep: "," - dest: "par" - - name: "Outputs" - arguments: - - type: "file" - name: "--output" - description: "The query data in .h5mu format with predicted labels transfered\ - \ from the reference." - info: - label: "Output data" - file_format: - type: "h5mu" - mod: - rna: - description: "Modality in AnnData format containing RNA data." - required: true - obs: - - type: "string" - name: "predictions" - description: "The predicted labels. Override using the `--output_obs_predictions`\ - \ argument." - required: true - - type: "double" - name: "uncertainty" - description: "The uncertainty of the predicted labels. Override using\ - \ the `--output_obs_uncertainty` argument." - required: false - obsm: - - type: "double" - name: "X_integrated_scanvi" - description: "The embedding used for the classifier's inference. Could\ - \ have any name, specified by `input_obsm_features` argument.\"" - required: false - uns: - - type: "string" - name: "parameters" - example: "labels_tranfer" - description: "Additional information about the parameters used for\ - \ the label transfer." - required: true - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_obs_predictions" - description: "In which `.obs` slots to store the predicted information.\nIf\ - \ provided, must have the same length as `--reference_obs_targets`.\nIf empty,\ - \ will default to the `reference_obs_targets` combined with the `\"_pred\"\ - ` suffix.\n" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_obs_uncertainty" - description: "In which `.obs` slots to store the uncertainty of the predictions.\n\ - If provided, must have the same length as `--reference_obs_targets`.\nIf empty,\ - \ will default to the `reference_obs_targets` combined with the `\"_uncertainty\"\ - ` suffix.\n" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_uns_parameters" - description: "The `.uns` key to store additional information about the parameters\ - \ used for the label transfer." - info: null - default: - - "labels_transfer" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Learning parameters" - arguments: - - type: "integer" - name: "--n_neighbors" - alternatives: - - "-k" - description: "Number of nearest neighbors to use for classification" - info: null - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "../utils/helper.py" - - type: "file" - path: "../../utils/setup_logger.py" - description: "Performs label transfer from reference to query using KNN classifier" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/annotation_test_data/" - - type: "file" - path: "resources_test/pbmc_1k_protein_v3/" - info: - method_id: "KNN_pynndescent" - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - - "git" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - upgrade: true - - type: "apt" - packages: - - "libopenblas-dev" - - "liblapack-dev" - - "gfortran" - interactive: false - - type: "python" - user: false - packages: - - "scanpy~=1.9.5" - - "pynndescent~=0.5.8" - - "numba~=0.56.4" - - "numpy~=1.23.5" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highmem" - - "highcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -- type: "native" - id: "native" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/labels_transfer/knn/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/labels_transfer/knn" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/labels_transfer/knn/knn" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/labels_transfer/knn/helper.py b/target/nextflow/labels_transfer/knn/helper.py deleted file mode 100644 index a90bf59efdb..00000000000 --- a/target/nextflow/labels_transfer/knn/helper.py +++ /dev/null @@ -1,32 +0,0 @@ -def check_arguments(par): - # check output .obs predictions - if not par["output_obs_predictions"]: - par["output_obs_predictions"] = [ t + "_pred" for t in par["reference_obs_targets"]] - assert len(par["output_obs_predictions"]) == len(par["reference_obs_targets"]), f"Number of output_obs_predictions must match number of reference_obs_targets\npar: {par}" - - # check output .obs uncertainty - if not par["output_obs_uncertainty"]: - par["output_obs_uncertainty"] = [ t + "_uncertainty" for t in par["reference_obs_targets"]] - assert len(par["output_obs_uncertainty"]) == len(par["reference_obs_targets"]), f"Number of output_obs_uncertainty must match number of reference_obs_targets\npar: {par}" - - return par - -def get_reference_features(adata_reference, par, logger): - if par["reference_obsm_features"] is None: - logger.info("Using .X of reference data") - train_data = adata_reference.X - else: - logger.info(f"Using .obsm[{par['reference_obsm_features']}] of reference data") - train_data = adata_reference.obsm[par["reference_obsm_features"]] - - return train_data - -def get_query_features(adata, par, logger): - if par["input_obsm_features"] is None: - logger.info("Using .X of query data") - query_data = adata.X - else: - logger.info(f"Using .obsm[{par['input_obsm_features']}] of query data") - query_data = adata.obsm[par["input_obsm_features"]] - - return query_data \ No newline at end of file diff --git a/target/nextflow/labels_transfer/knn/main.nf b/target/nextflow/labels_transfer/knn/main.nf deleted file mode 100644 index c65164cb08a..00000000000 --- a/target/nextflow/labels_transfer/knn/main.nf +++ /dev/null @@ -1,2921 +0,0 @@ -// knn 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Vladimir Shitov (author) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "knn", - "namespace" : "labels_transfer", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Vladimir Shitov", - "roles" : [ - "author" - ], - "info" : { - "role" : "Contributor", - "links" : { - "email" : "vladimir.shitov@helmholtz-muenchen.de", - "github" : "vladimirshitov", - "orcid" : "0000-0002-1960-8812", - "linkedin" : "vladimir-shitov-9a659513b" - }, - "organizations" : [ - { - "name" : "Helmholtz Munich", - "href" : "https://www.helmholtz-munich.de", - "role" : "PhD Candidate" - } - ] - } - } - ], - "argument_groups" : [ - { - "name" : "Input dataset (query) arguments", - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "description" : "The query data to transfer the labels to. Should be a .h5mu file.", - "info" : { - "label" : "Query", - "file_format" : { - "type" : "h5mu", - "mod" : { - "rna" : { - "description" : "Modality in AnnData format containing RNA data.", - "required" : true, - "slots" : { - "X" : { - "type" : "double", - "name" : "features", - "required" : false, - "description" : "The expression data to use for the classifier's inference, if `--input_obsm_features` argument is not provided.\n" - }, - "obsm" : [ - { - "type" : "double", - "name" : "features", - "example" : "X_integrated_scanvi", - "required" : false, - "description" : "The embedding to use for the classifier's inference. Override using the `--input_obsm_features` argument. If not provided, the `.X` slot will be used instead.\nMake sure that embedding was obtained in the same way as the reference embedding (e.g. by the same model or preprocessing).\n" - } - ] - } - } - } - } - }, - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--modality", - "description" : "Which modality to use.", - "default" : [ - "rna" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--input_obsm_features", - "description" : "The `.obsm` key of the embedding to use for the classifier's inference. If not provided, the `.X` slot will be used instead.\nMake sure that embedding was obtained in the same way as the reference embedding (e.g. by the same model or preprocessing).\n", - "example" : [ - "X_integrated_scanvi" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Reference dataset arguments", - "arguments" : [ - { - "type" : "file", - "name" : "--reference", - "description" : "The reference data to train classifiers on.", - "info" : { - "label" : "Reference", - "file_format" : { - "type" : "h5ad", - "X" : { - "type" : "double", - "name" : "features", - "required" : false, - "description" : "The expression data to use for the classifier's training, if `--input_obsm_features` argument is not provided.\n" - }, - "obsm" : [ - { - "type" : "double", - "name" : "features", - "example" : "X_integrated_scanvi", - "description" : "The embedding to use for the classifier's training. Override using the `--reference_obsm_features` argument.\nMake sure that embedding was obtained in the same way as the query embedding (e.g. by the same model or preprocessing).\n", - "required" : true - } - ], - "obs" : [ - { - "type" : "string", - "name" : "targets", - "multiple" : true, - "example" : [ - "ann_level_1", - "ann_level_2", - "ann_level_3", - "ann_level_4", - "ann_level_5", - "ann_finest_level" - ], - "description" : "The target labels to transfer. Override using the `--reference_obs_targets` argument.", - "required" : true - } - ] - } - }, - "example" : [ - "https:/zenodo.org/record/6337966/files/HLCA_emb_and_metadata.h5ad" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--reference_obsm_features", - "description" : "The `.obsm` key of the embedding to use for the classifier's training.\nMake sure that embedding was obtained in the same way as the query embedding (e.g. by the same model or preprocessing).\n", - "default" : [ - "X_integrated_scanvi" - ], - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--reference_obs_targets", - "description" : "The `.obs` key of the target labels to tranfer.", - "default" : [ - "ann_level_1", - "ann_level_2", - "ann_level_3", - "ann_level_4", - "ann_level_5", - "ann_finest_level" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ",", - "dest" : "par" - } - ] - }, - { - "name" : "Outputs", - "arguments" : [ - { - "type" : "file", - "name" : "--output", - "description" : "The query data in .h5mu format with predicted labels transfered from the reference.", - "info" : { - "label" : "Output data", - "file_format" : { - "type" : "h5mu", - "mod" : { - "rna" : { - "description" : "Modality in AnnData format containing RNA data.", - "required" : true, - "obs" : [ - { - "type" : "string", - "name" : "predictions", - "description" : "The predicted labels. Override using the `--output_obs_predictions` argument.", - "required" : true - }, - { - "type" : "double", - "name" : "uncertainty", - "description" : "The uncertainty of the predicted labels. Override using the `--output_obs_uncertainty` argument.", - "required" : false - } - ], - "obsm" : [ - { - "type" : "double", - "name" : "X_integrated_scanvi", - "description" : "The embedding used for the classifier's inference. Could have any name, specified by `input_obsm_features` argument.\\"", - "required" : false - } - ], - "uns" : [ - { - "type" : "string", - "name" : "parameters", - "example" : "labels_tranfer", - "description" : "Additional information about the parameters used for the label transfer.", - "required" : true - } - ] - } - } - } - }, - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_obs_predictions", - "description" : "In which `.obs` slots to store the predicted information.\nIf provided, must have the same length as `--reference_obs_targets`.\nIf empty, will default to the `reference_obs_targets` combined with the `\\"_pred\\"` suffix.\n", - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_obs_uncertainty", - "description" : "In which `.obs` slots to store the uncertainty of the predictions.\nIf provided, must have the same length as `--reference_obs_targets`.\nIf empty, will default to the `reference_obs_targets` combined with the `\\"_uncertainty\\"` suffix.\n", - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_uns_parameters", - "description" : "The `.uns` key to store additional information about the parameters used for the label transfer.", - "default" : [ - "labels_transfer" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Learning parameters", - "arguments" : [ - { - "type" : "integer", - "name" : "--n_neighbors", - "alternatives" : [ - "-k" - ], - "description" : "Number of nearest neighbors to use for classification", - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/labels_transfer/knn/" - }, - { - "type" : "file", - "path" : "../utils/helper.py", - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/labels_transfer/knn/" - }, - { - "type" : "file", - "path" : "../../utils/setup_logger.py", - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/labels_transfer/knn/" - } - ], - "description" : "Performs label transfer from reference to query using KNN classifier", - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/labels_transfer/knn/" - }, - { - "type" : "file", - "path" : "resources_test/annotation_test_data/", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - }, - { - "type" : "file", - "path" : "resources_test/pbmc_1k_protein_v3/", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "info" : { - "method_id" : "KNN_pynndescent" - }, - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "python:3.10-slim", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "procps", - "git" - ], - "interactive" : false - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "mudata~=0.2.3", - "anndata~=0.9.1" - ], - "upgrade" : true - }, - { - "type" : "apt", - "packages" : [ - "libopenblas-dev", - "liblapack-dev", - "gfortran" - ], - "interactive" : false - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "scanpy~=1.9.5", - "pynndescent~=0.5.8", - "numba~=0.56.4", - "numpy~=1.23.5" - ], - "upgrade" : true - } - ], - "test_setup" : [ - { - "type" : "python", - "user" : false, - "packages" : [ - "viashpy==0.5.0" - ], - "upgrade" : true - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "highmem", - "highcpu" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - }, - { - "type" : "native", - "id" : "native" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/labels_transfer/knn/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/labels_transfer/knn", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -import sys -import warnings - -import mudata -import numpy as np -import scanpy as sc -from scipy.sparse import issparse -import pynndescent -import numba - - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'input_obsm_features': $( if [ ! -z ${VIASH_PAR_INPUT_OBSM_FEATURES+x} ]; then echo "r'${VIASH_PAR_INPUT_OBSM_FEATURES//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'reference': $( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "r'${VIASH_PAR_REFERENCE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'reference_obsm_features': $( if [ ! -z ${VIASH_PAR_REFERENCE_OBSM_FEATURES+x} ]; then echo "r'${VIASH_PAR_REFERENCE_OBSM_FEATURES//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'reference_obs_targets': $( if [ ! -z ${VIASH_PAR_REFERENCE_OBS_TARGETS+x} ]; then echo "r'${VIASH_PAR_REFERENCE_OBS_TARGETS//\\'/\\'\\"\\'\\"r\\'}'.split(',')"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_obs_predictions': $( if [ ! -z ${VIASH_PAR_OUTPUT_OBS_PREDICTIONS+x} ]; then echo "r'${VIASH_PAR_OUTPUT_OBS_PREDICTIONS//\\'/\\'\\"\\'\\"r\\'}'.split(':')"; else echo None; fi ), - 'output_obs_uncertainty': $( if [ ! -z ${VIASH_PAR_OUTPUT_OBS_UNCERTAINTY+x} ]; then echo "r'${VIASH_PAR_OUTPUT_OBS_UNCERTAINTY//\\'/\\'\\"\\'\\"r\\'}'.split(':')"; else echo None; fi ), - 'output_uns_parameters': $( if [ ! -z ${VIASH_PAR_OUTPUT_UNS_PARAMETERS+x} ]; then echo "r'${VIASH_PAR_OUTPUT_UNS_PARAMETERS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'n_neighbors': $( if [ ! -z ${VIASH_PAR_N_NEIGHBORS+x} ]; then echo "int(r'${VIASH_PAR_N_NEIGHBORS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -## VIASH END - -sys.path.append(meta["resources_dir"]) -from helper import check_arguments, get_reference_features, get_query_features -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger - -@numba.njit -def weighted_prediction(weights, ref_cats): - """Get highest weight category.""" - N = len(weights) - predictions = np.zeros((N,), dtype=ref_cats.dtype) - uncertainty = np.zeros((N,)) - for i in range(N): - obs_weights = weights[i] - obs_cats = ref_cats[i] - best_prob = 0 - for c in np.unique(obs_cats): - cand_prob = np.sum(obs_weights[obs_cats == c]) - if cand_prob > best_prob: - best_prob = cand_prob - predictions[i] = c - uncertainty[i] = max(1 - best_prob, 0) - - return predictions, uncertainty - -def distances_to_affinities(distances): - stds = np.std(distances, axis=1) - stds = (2.0 / stds) ** 2 - stds = stds.reshape(-1, 1) - distances_tilda = np.exp(-np.true_divide(distances, stds)) - - return distances_tilda / np.sum(distances_tilda, axis=1, keepdims=True) - -def main(par): - logger = setup_logger() - - logger.info("Checking arguments") - par = check_arguments(par) - - logger.info("Reading input (query) data") - mdata = mudata.read(par["input"]) - adata = mdata.mod[par["modality"]] - - logger.info("Reading reference data") - adata_reference = sc.read(par["reference"], backup_url=par["reference"]) - - # fetch feature data - train_data = get_reference_features(adata_reference, par, logger) - query_data = get_query_features(adata, par, logger) - - # pynndescent does not support sparse matrices - if issparse(train_data): - warnings.warn("Converting sparse matrix to dense. This may consume a lot of memory.") - train_data = train_data.toarray() - - logger.debug(f"Shape of train data: {train_data.shape}") - - logger.info("Building NN index") - ref_nn_index = pynndescent.NNDescent(train_data, n_neighbors=par["n_neighbors"]) - ref_nn_index.prepare() - - ref_neighbors, ref_distances = ref_nn_index.query(query_data, k=par["n_neighbors"]) - - weights = distances_to_affinities(ref_distances) - - output_uns_parameters = adata.uns.get(par["output_uns_parameters"], {}) - - # for each annotation level, get prediction and uncertainty - - for obs_tar, obs_pred, obs_unc in zip(par["reference_obs_targets"], par["output_obs_predictions"], par["output_obs_uncertainty"]): - logger.info(f"Predicting labels for {obs_tar}") - ref_cats = adata_reference.obs[obs_tar].cat.codes.to_numpy()[ref_neighbors] - prediction, uncertainty = weighted_prediction(weights, ref_cats) - prediction = np.asarray(adata_reference.obs[obs_tar].cat.categories)[prediction] - - adata.obs[obs_pred], adata.obs[obs_unc] = prediction, uncertainty - - # Write information about labels transfer to uns - output_uns_parameters[obs_tar] = { - "method": "KNN_pynndescent", - "n_neighbors": par["n_neighbors"], - "reference": par["reference"] - } - - adata.uns[par["output_uns_parameters"]] = output_uns_parameters - - mdata.mod[par['modality']] = adata - mdata.update() - mdata.write_h5mu(par['output'].strip()) - -if __name__ == "__main__": - main(par) -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/labels_transfer_knn", - "tag" : "0.12.0" - }, - "label" : [ - "highmem", - "highcpu" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/labels_transfer/knn/nextflow.config b/target/nextflow/labels_transfer/knn/nextflow.config deleted file mode 100644 index bb783120861..00000000000 --- a/target/nextflow/labels_transfer/knn/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'knn' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Performs label transfer from reference to query using KNN classifier' - author = 'Vladimir Shitov' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/labels_transfer/knn/nextflow_params.yaml b/target/nextflow/labels_transfer/knn/nextflow_params.yaml deleted file mode 100644 index 672161899c3..00000000000 --- a/target/nextflow/labels_transfer/knn/nextflow_params.yaml +++ /dev/null @@ -1,6 +0,0 @@ -# Learning parameters -n_neighbors: # please fill in - example: 123 - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/labels_transfer/knn/nextflow_schema.json b/target/nextflow/labels_transfer/knn/nextflow_schema.json deleted file mode 100644 index 7af9ecce014..00000000000 --- a/target/nextflow/labels_transfer/knn/nextflow_schema.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "knn", - "description": "Performs label transfer from reference to query using KNN classifier", - "type": "object", - "definitions": { - "learning parameters" : { - "title": "Learning parameters", - "type": "object", - "description": "No description", - "properties": { - - "n_neighbors": { - "type": "integer", - "description": "Type: `integer`, required. Number of nearest neighbors to use for classification", - "help_text": "Type: `integer`, required. Number of nearest neighbors to use for classification" - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/learning parameters" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/labels_transfer/knn/setup_logger.py b/target/nextflow/labels_transfer/knn/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/nextflow/labels_transfer/knn/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/nextflow/labels_transfer/xgboost/.config.vsh.yaml b/target/nextflow/labels_transfer/xgboost/.config.vsh.yaml deleted file mode 100644 index e2887b6c124..00000000000 --- a/target/nextflow/labels_transfer/xgboost/.config.vsh.yaml +++ /dev/null @@ -1,594 +0,0 @@ -functionality: - name: "xgboost" - namespace: "labels_transfer" - version: "0.12.3" - authors: - - name: "Vladimir Shitov" - roles: - - "author" - info: - role: "Contributor" - links: - email: "vladimir.shitov@helmholtz-muenchen.de" - github: "vladimirshitov" - orcid: "0000-0002-1960-8812" - linkedin: "vladimir-shitov-9a659513b" - organizations: - - name: "Helmholtz Munich" - href: "https://www.helmholtz-munich.de" - role: "PhD Candidate" - argument_groups: - - name: "Input dataset (query) arguments" - arguments: - - type: "file" - name: "--input" - description: "The query data to transfer the labels to. Should be a .h5mu file." - info: - label: "Query" - file_format: - type: "h5mu" - mod: - rna: - description: "Modality in AnnData format containing RNA data." - required: true - slots: - X: - type: "double" - name: "features" - required: false - description: "The expression data to use for the classifier's inference,\ - \ if `--input_obsm_features` argument is not provided.\n" - obsm: - - type: "double" - name: "features" - example: "X_integrated_scanvi" - required: false - description: "The embedding to use for the classifier's inference.\ - \ Override using the `--input_obsm_features` argument. If not\ - \ provided, the `.X` slot will be used instead.\nMake sure that\ - \ embedding was obtained in the same way as the reference embedding\ - \ (e.g. by the same model or preprocessing).\n" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - description: "Which modality to use." - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--input_obsm_features" - description: "The `.obsm` key of the embedding to use for the classifier's inference.\ - \ If not provided, the `.X` slot will be used instead.\nMake sure that embedding\ - \ was obtained in the same way as the reference embedding (e.g. by the same\ - \ model or preprocessing).\n" - info: null - example: - - "X_integrated_scanvi" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Reference dataset arguments" - arguments: - - type: "file" - name: "--reference" - description: "The reference data to train classifiers on." - info: - label: "Reference" - file_format: - type: "h5ad" - X: - type: "double" - name: "features" - required: false - description: "The expression data to use for the classifier's training,\ - \ if `--input_obsm_features` argument is not provided.\n" - obsm: - - type: "double" - name: "features" - example: "X_integrated_scanvi" - description: "The embedding to use for the classifier's training. Override\ - \ using the `--reference_obsm_features` argument.\nMake sure that embedding\ - \ was obtained in the same way as the query embedding (e.g. by the same\ - \ model or preprocessing).\n" - required: true - obs: - - type: "string" - name: "targets" - multiple: true - example: - - "ann_level_1" - - "ann_level_2" - - "ann_level_3" - - "ann_level_4" - - "ann_level_5" - - "ann_finest_level" - description: "The target labels to transfer. Override using the `--reference_obs_targets`\ - \ argument." - required: true - example: - - "https:/zenodo.org/record/6337966/files/HLCA_emb_and_metadata.h5ad" - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--reference_obsm_features" - description: "The `.obsm` key of the embedding to use for the classifier's training.\n\ - Make sure that embedding was obtained in the same way as the query embedding\ - \ (e.g. by the same model or preprocessing).\n" - info: null - default: - - "X_integrated_scanvi" - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--reference_obs_targets" - description: "The `.obs` key of the target labels to tranfer." - info: null - default: - - "ann_level_1" - - "ann_level_2" - - "ann_level_3" - - "ann_level_4" - - "ann_level_5" - - "ann_finest_level" - required: false - direction: "input" - multiple: true - multiple_sep: "," - dest: "par" - - name: "Outputs" - arguments: - - type: "file" - name: "--output" - description: "The query data in .h5mu format with predicted labels transfered\ - \ from the reference." - info: - label: "Output data" - file_format: - type: "h5mu" - mod: - rna: - description: "Modality in AnnData format containing RNA data." - required: true - obs: - - type: "string" - name: "predictions" - description: "The predicted labels. Override using the `--output_obs_predictions`\ - \ argument." - required: true - - type: "double" - name: "uncertainty" - description: "The uncertainty of the predicted labels. Override using\ - \ the `--output_obs_uncertainty` argument." - required: false - obsm: - - type: "double" - name: "X_integrated_scanvi" - description: "The embedding used for the classifier's inference. Could\ - \ have any name, specified by `input_obsm_features` argument.\"" - required: false - uns: - - type: "string" - name: "parameters" - example: "labels_tranfer" - description: "Additional information about the parameters used for\ - \ the label transfer." - required: true - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_obs_predictions" - description: "In which `.obs` slots to store the predicted information.\nIf\ - \ provided, must have the same length as `--reference_obs_targets`.\nIf empty,\ - \ will default to the `reference_obs_targets` combined with the `\"_pred\"\ - ` suffix.\n" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_obs_uncertainty" - description: "In which `.obs` slots to store the uncertainty of the predictions.\n\ - If provided, must have the same length as `--reference_obs_targets`.\nIf empty,\ - \ will default to the `reference_obs_targets` combined with the `\"_uncertainty\"\ - ` suffix.\n" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_uns_parameters" - description: "The `.uns` key to store additional information about the parameters\ - \ used for the label transfer." - info: null - default: - - "labels_transfer" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Execution arguments" - arguments: - - type: "boolean_true" - name: "--force_retrain" - alternatives: - - "-f" - description: "Retrain models on the reference even if model_output directory\ - \ already has trained classifiers. WARNING! It will rewrite existing classifiers\ - \ for targets in the model_output directory!" - info: null - direction: "input" - dest: "par" - - type: "boolean" - name: "--use_gpu" - description: "Use GPU during models training and inference (recommended)." - info: null - default: - - false - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--verbosity" - alternatives: - - "-v" - description: "The verbosity level for evaluation of the classifier from the\ - \ range [0,2]" - info: null - default: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--model_output" - description: "Output directory for model" - info: null - default: - - "model" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Learning parameters" - arguments: - - type: "double" - name: "--learning_rate" - alternatives: - - "--eta" - description: "Step size shrinkage used in update to prevents overfitting. Range:\ - \ [0,1]. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ - \ for the reference" - info: null - default: - - 0.3 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--min_split_loss" - alternatives: - - "--gamma" - description: "Minimum loss reduction required to make a further partition on\ - \ a leaf node of the tree. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ - \ for the reference" - info: null - default: - - 0.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--max_depth" - alternatives: - - "-d" - description: "Maximum depth of a tree. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ - \ for the reference" - info: null - default: - - 6 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--min_child_weight" - description: "Minimum sum of instance weight (hessian) needed in a child. See\ - \ https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ - \ for the reference" - info: null - default: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--max_delta_step" - description: "Maximum delta step we allow each leaf output to be. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ - \ for the reference" - info: null - default: - - 0.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--subsample" - description: "Subsample ratio of the training instances. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ - \ for the reference" - info: null - default: - - 1.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--sampling_method" - description: "The method to use to sample the training instances. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ - \ for the reference" - info: null - default: - - "uniform" - required: false - choices: - - "uniform" - - "gradient_based" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--colsample_bytree" - description: "Fraction of columns to be subsampled. Range (0, 1]. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ - \ for the reference" - info: null - default: - - 1.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--colsample_bylevel" - description: "Subsample ratio of columns for each level. Range (0, 1]. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ - \ for the reference" - info: null - default: - - 1.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--colsample_bynode" - description: "Subsample ratio of columns for each node (split). Range (0, 1].\ - \ See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ - \ for the reference" - info: null - default: - - 1.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--reg_lambda" - alternatives: - - "--lambda" - description: "L2 regularization term on weights. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ - \ for the reference" - info: null - default: - - 1.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--reg_alpha" - alternatives: - - "--alpha" - description: "L1 regularization term on weights. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ - \ for the reference" - info: null - default: - - 0.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--scale_pos_weight" - description: "Control the balance of positive and negative weights, useful for\ - \ unbalanced classes. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ - \ for the reference" - info: null - default: - - 1.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "../utils/helper.py" - - type: "file" - path: "src/utils/setup_logger.py" - description: "Performs label transfer from reference to query using XGBoost classifier" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/annotation_test_data/" - - type: "file" - path: "resources_test/pbmc_1k_protein_v3/" - info: - method_id: "XGBClassifier" - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - - "git" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - upgrade: true - - type: "apt" - packages: - - "libopenblas-dev" - - "liblapack-dev" - - "gfortran" - interactive: false - - type: "python" - user: false - packages: - - "scanpy~=1.9.5" - - "xgboost~=1.7.1" - - "scikit-learn~=1.1.1" - - "numpy~=1.23.5" - - "pandas~=1.4.4" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highmem" - - "highcpu" - - "gpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -- type: "native" - id: "native" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/labels_transfer/xgboost/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/labels_transfer/xgboost" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/labels_transfer/xgboost/xgboost" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/labels_transfer/xgboost/helper.py b/target/nextflow/labels_transfer/xgboost/helper.py deleted file mode 100644 index a90bf59efdb..00000000000 --- a/target/nextflow/labels_transfer/xgboost/helper.py +++ /dev/null @@ -1,32 +0,0 @@ -def check_arguments(par): - # check output .obs predictions - if not par["output_obs_predictions"]: - par["output_obs_predictions"] = [ t + "_pred" for t in par["reference_obs_targets"]] - assert len(par["output_obs_predictions"]) == len(par["reference_obs_targets"]), f"Number of output_obs_predictions must match number of reference_obs_targets\npar: {par}" - - # check output .obs uncertainty - if not par["output_obs_uncertainty"]: - par["output_obs_uncertainty"] = [ t + "_uncertainty" for t in par["reference_obs_targets"]] - assert len(par["output_obs_uncertainty"]) == len(par["reference_obs_targets"]), f"Number of output_obs_uncertainty must match number of reference_obs_targets\npar: {par}" - - return par - -def get_reference_features(adata_reference, par, logger): - if par["reference_obsm_features"] is None: - logger.info("Using .X of reference data") - train_data = adata_reference.X - else: - logger.info(f"Using .obsm[{par['reference_obsm_features']}] of reference data") - train_data = adata_reference.obsm[par["reference_obsm_features"]] - - return train_data - -def get_query_features(adata, par, logger): - if par["input_obsm_features"] is None: - logger.info("Using .X of query data") - query_data = adata.X - else: - logger.info(f"Using .obsm[{par['input_obsm_features']}] of query data") - query_data = adata.obsm[par["input_obsm_features"]] - - return query_data \ No newline at end of file diff --git a/target/nextflow/labels_transfer/xgboost/main.nf b/target/nextflow/labels_transfer/xgboost/main.nf deleted file mode 100644 index 161cfb54a5e..00000000000 --- a/target/nextflow/labels_transfer/xgboost/main.nf +++ /dev/null @@ -1,3422 +0,0 @@ -// xgboost 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Vladimir Shitov (author) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "xgboost", - "namespace" : "labels_transfer", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Vladimir Shitov", - "roles" : [ - "author" - ], - "info" : { - "role" : "Contributor", - "links" : { - "email" : "vladimir.shitov@helmholtz-muenchen.de", - "github" : "vladimirshitov", - "orcid" : "0000-0002-1960-8812", - "linkedin" : "vladimir-shitov-9a659513b" - }, - "organizations" : [ - { - "name" : "Helmholtz Munich", - "href" : "https://www.helmholtz-munich.de", - "role" : "PhD Candidate" - } - ] - } - } - ], - "argument_groups" : [ - { - "name" : "Input dataset (query) arguments", - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "description" : "The query data to transfer the labels to. Should be a .h5mu file.", - "info" : { - "label" : "Query", - "file_format" : { - "type" : "h5mu", - "mod" : { - "rna" : { - "description" : "Modality in AnnData format containing RNA data.", - "required" : true, - "slots" : { - "X" : { - "type" : "double", - "name" : "features", - "required" : false, - "description" : "The expression data to use for the classifier's inference, if `--input_obsm_features` argument is not provided.\n" - }, - "obsm" : [ - { - "type" : "double", - "name" : "features", - "example" : "X_integrated_scanvi", - "required" : false, - "description" : "The embedding to use for the classifier's inference. Override using the `--input_obsm_features` argument. If not provided, the `.X` slot will be used instead.\nMake sure that embedding was obtained in the same way as the reference embedding (e.g. by the same model or preprocessing).\n" - } - ] - } - } - } - } - }, - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--modality", - "description" : "Which modality to use.", - "default" : [ - "rna" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--input_obsm_features", - "description" : "The `.obsm` key of the embedding to use for the classifier's inference. If not provided, the `.X` slot will be used instead.\nMake sure that embedding was obtained in the same way as the reference embedding (e.g. by the same model or preprocessing).\n", - "example" : [ - "X_integrated_scanvi" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Reference dataset arguments", - "arguments" : [ - { - "type" : "file", - "name" : "--reference", - "description" : "The reference data to train classifiers on.", - "info" : { - "label" : "Reference", - "file_format" : { - "type" : "h5ad", - "X" : { - "type" : "double", - "name" : "features", - "required" : false, - "description" : "The expression data to use for the classifier's training, if `--input_obsm_features` argument is not provided.\n" - }, - "obsm" : [ - { - "type" : "double", - "name" : "features", - "example" : "X_integrated_scanvi", - "description" : "The embedding to use for the classifier's training. Override using the `--reference_obsm_features` argument.\nMake sure that embedding was obtained in the same way as the query embedding (e.g. by the same model or preprocessing).\n", - "required" : true - } - ], - "obs" : [ - { - "type" : "string", - "name" : "targets", - "multiple" : true, - "example" : [ - "ann_level_1", - "ann_level_2", - "ann_level_3", - "ann_level_4", - "ann_level_5", - "ann_finest_level" - ], - "description" : "The target labels to transfer. Override using the `--reference_obs_targets` argument.", - "required" : true - } - ] - } - }, - "example" : [ - "https:/zenodo.org/record/6337966/files/HLCA_emb_and_metadata.h5ad" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--reference_obsm_features", - "description" : "The `.obsm` key of the embedding to use for the classifier's training.\nMake sure that embedding was obtained in the same way as the query embedding (e.g. by the same model or preprocessing).\n", - "default" : [ - "X_integrated_scanvi" - ], - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--reference_obs_targets", - "description" : "The `.obs` key of the target labels to tranfer.", - "default" : [ - "ann_level_1", - "ann_level_2", - "ann_level_3", - "ann_level_4", - "ann_level_5", - "ann_finest_level" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ",", - "dest" : "par" - } - ] - }, - { - "name" : "Outputs", - "arguments" : [ - { - "type" : "file", - "name" : "--output", - "description" : "The query data in .h5mu format with predicted labels transfered from the reference.", - "info" : { - "label" : "Output data", - "file_format" : { - "type" : "h5mu", - "mod" : { - "rna" : { - "description" : "Modality in AnnData format containing RNA data.", - "required" : true, - "obs" : [ - { - "type" : "string", - "name" : "predictions", - "description" : "The predicted labels. Override using the `--output_obs_predictions` argument.", - "required" : true - }, - { - "type" : "double", - "name" : "uncertainty", - "description" : "The uncertainty of the predicted labels. Override using the `--output_obs_uncertainty` argument.", - "required" : false - } - ], - "obsm" : [ - { - "type" : "double", - "name" : "X_integrated_scanvi", - "description" : "The embedding used for the classifier's inference. Could have any name, specified by `input_obsm_features` argument.\\"", - "required" : false - } - ], - "uns" : [ - { - "type" : "string", - "name" : "parameters", - "example" : "labels_tranfer", - "description" : "Additional information about the parameters used for the label transfer.", - "required" : true - } - ] - } - } - } - }, - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_obs_predictions", - "description" : "In which `.obs` slots to store the predicted information.\nIf provided, must have the same length as `--reference_obs_targets`.\nIf empty, will default to the `reference_obs_targets` combined with the `\\"_pred\\"` suffix.\n", - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_obs_uncertainty", - "description" : "In which `.obs` slots to store the uncertainty of the predictions.\nIf provided, must have the same length as `--reference_obs_targets`.\nIf empty, will default to the `reference_obs_targets` combined with the `\\"_uncertainty\\"` suffix.\n", - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_uns_parameters", - "description" : "The `.uns` key to store additional information about the parameters used for the label transfer.", - "default" : [ - "labels_transfer" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Execution arguments", - "arguments" : [ - { - "type" : "boolean_true", - "name" : "--force_retrain", - "alternatives" : [ - "-f" - ], - "description" : "Retrain models on the reference even if model_output directory already has trained classifiers. WARNING! It will rewrite existing classifiers for targets in the model_output directory!", - "direction" : "input", - "dest" : "par" - }, - { - "type" : "boolean", - "name" : "--use_gpu", - "description" : "Use GPU during models training and inference (recommended).", - "default" : [ - false - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--verbosity", - "alternatives" : [ - "-v" - ], - "description" : "The verbosity level for evaluation of the classifier from the range [0,2]", - "default" : [ - 1 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--model_output", - "description" : "Output directory for model", - "default" : [ - "model" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Learning parameters", - "arguments" : [ - { - "type" : "double", - "name" : "--learning_rate", - "alternatives" : [ - "--eta" - ], - "description" : "Step size shrinkage used in update to prevents overfitting. Range: [0,1]. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster for the reference", - "default" : [ - 0.3 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--min_split_loss", - "alternatives" : [ - "--gamma" - ], - "description" : "Minimum loss reduction required to make a further partition on a leaf node of the tree. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster for the reference", - "default" : [ - 0.0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--max_depth", - "alternatives" : [ - "-d" - ], - "description" : "Maximum depth of a tree. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster for the reference", - "default" : [ - 6 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--min_child_weight", - "description" : "Minimum sum of instance weight (hessian) needed in a child. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster for the reference", - "default" : [ - 1 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--max_delta_step", - "description" : "Maximum delta step we allow each leaf output to be. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster for the reference", - "default" : [ - 0.0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--subsample", - "description" : "Subsample ratio of the training instances. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster for the reference", - "default" : [ - 1.0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--sampling_method", - "description" : "The method to use to sample the training instances. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster for the reference", - "default" : [ - "uniform" - ], - "required" : false, - "choices" : [ - "uniform", - "gradient_based" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--colsample_bytree", - "description" : "Fraction of columns to be subsampled. Range (0, 1]. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster for the reference", - "default" : [ - 1.0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--colsample_bylevel", - "description" : "Subsample ratio of columns for each level. Range (0, 1]. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster for the reference", - "default" : [ - 1.0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--colsample_bynode", - "description" : "Subsample ratio of columns for each node (split). Range (0, 1]. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster for the reference", - "default" : [ - 1.0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--reg_lambda", - "alternatives" : [ - "--lambda" - ], - "description" : "L2 regularization term on weights. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster for the reference", - "default" : [ - 1.0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--reg_alpha", - "alternatives" : [ - "--alpha" - ], - "description" : "L1 regularization term on weights. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster for the reference", - "default" : [ - 0.0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--scale_pos_weight", - "description" : "Control the balance of positive and negative weights, useful for unbalanced classes. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster for the reference", - "default" : [ - 1.0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/labels_transfer/xgboost/" - }, - { - "type" : "file", - "path" : "../utils/helper.py", - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/labels_transfer/xgboost/" - }, - { - "type" : "file", - "path" : "src/utils/setup_logger.py", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "description" : "Performs label transfer from reference to query using XGBoost classifier", - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/labels_transfer/xgboost/" - }, - { - "type" : "file", - "path" : "resources_test/annotation_test_data/", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - }, - { - "type" : "file", - "path" : "resources_test/pbmc_1k_protein_v3/", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "info" : { - "method_id" : "XGBClassifier" - }, - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "python:3.10-slim", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "procps", - "git" - ], - "interactive" : false - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "mudata~=0.2.3", - "anndata~=0.9.1" - ], - "upgrade" : true - }, - { - "type" : "apt", - "packages" : [ - "libopenblas-dev", - "liblapack-dev", - "gfortran" - ], - "interactive" : false - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "scanpy~=1.9.5", - "xgboost~=1.7.1", - "scikit-learn~=1.1.1", - "numpy~=1.23.5", - "pandas~=1.4.4" - ], - "upgrade" : true - } - ], - "test_setup" : [ - { - "type" : "python", - "user" : false, - "packages" : [ - "viashpy==0.5.0" - ], - "upgrade" : true - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "highmem", - "highcpu", - "gpu" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - }, - { - "type" : "native", - "id" : "native" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/labels_transfer/xgboost/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/labels_transfer/xgboost", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -import sys -import json -import os -from typing import Optional -import yaml -from pathlib import Path - -import mudata -import numpy as np -import scanpy as sc -import pandas as pd -import xgboost as xgb -from sklearn.model_selection import train_test_split -from sklearn.metrics import classification_report -from sklearn.preprocessing import LabelEncoder - - -### VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'input_obsm_features': $( if [ ! -z ${VIASH_PAR_INPUT_OBSM_FEATURES+x} ]; then echo "r'${VIASH_PAR_INPUT_OBSM_FEATURES//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'reference': $( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "r'${VIASH_PAR_REFERENCE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'reference_obsm_features': $( if [ ! -z ${VIASH_PAR_REFERENCE_OBSM_FEATURES+x} ]; then echo "r'${VIASH_PAR_REFERENCE_OBSM_FEATURES//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'reference_obs_targets': $( if [ ! -z ${VIASH_PAR_REFERENCE_OBS_TARGETS+x} ]; then echo "r'${VIASH_PAR_REFERENCE_OBS_TARGETS//\\'/\\'\\"\\'\\"r\\'}'.split(',')"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_obs_predictions': $( if [ ! -z ${VIASH_PAR_OUTPUT_OBS_PREDICTIONS+x} ]; then echo "r'${VIASH_PAR_OUTPUT_OBS_PREDICTIONS//\\'/\\'\\"\\'\\"r\\'}'.split(':')"; else echo None; fi ), - 'output_obs_uncertainty': $( if [ ! -z ${VIASH_PAR_OUTPUT_OBS_UNCERTAINTY+x} ]; then echo "r'${VIASH_PAR_OUTPUT_OBS_UNCERTAINTY//\\'/\\'\\"\\'\\"r\\'}'.split(':')"; else echo None; fi ), - 'output_uns_parameters': $( if [ ! -z ${VIASH_PAR_OUTPUT_UNS_PARAMETERS+x} ]; then echo "r'${VIASH_PAR_OUTPUT_UNS_PARAMETERS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'force_retrain': $( if [ ! -z ${VIASH_PAR_FORCE_RETRAIN+x} ]; then echo "r'${VIASH_PAR_FORCE_RETRAIN//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), - 'use_gpu': $( if [ ! -z ${VIASH_PAR_USE_GPU+x} ]; then echo "r'${VIASH_PAR_USE_GPU//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), - 'verbosity': $( if [ ! -z ${VIASH_PAR_VERBOSITY+x} ]; then echo "int(r'${VIASH_PAR_VERBOSITY//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'model_output': $( if [ ! -z ${VIASH_PAR_MODEL_OUTPUT+x} ]; then echo "r'${VIASH_PAR_MODEL_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'learning_rate': $( if [ ! -z ${VIASH_PAR_LEARNING_RATE+x} ]; then echo "float(r'${VIASH_PAR_LEARNING_RATE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'min_split_loss': $( if [ ! -z ${VIASH_PAR_MIN_SPLIT_LOSS+x} ]; then echo "float(r'${VIASH_PAR_MIN_SPLIT_LOSS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'max_depth': $( if [ ! -z ${VIASH_PAR_MAX_DEPTH+x} ]; then echo "int(r'${VIASH_PAR_MAX_DEPTH//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'min_child_weight': $( if [ ! -z ${VIASH_PAR_MIN_CHILD_WEIGHT+x} ]; then echo "int(r'${VIASH_PAR_MIN_CHILD_WEIGHT//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'max_delta_step': $( if [ ! -z ${VIASH_PAR_MAX_DELTA_STEP+x} ]; then echo "float(r'${VIASH_PAR_MAX_DELTA_STEP//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'subsample': $( if [ ! -z ${VIASH_PAR_SUBSAMPLE+x} ]; then echo "float(r'${VIASH_PAR_SUBSAMPLE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'sampling_method': $( if [ ! -z ${VIASH_PAR_SAMPLING_METHOD+x} ]; then echo "r'${VIASH_PAR_SAMPLING_METHOD//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'colsample_bytree': $( if [ ! -z ${VIASH_PAR_COLSAMPLE_BYTREE+x} ]; then echo "float(r'${VIASH_PAR_COLSAMPLE_BYTREE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'colsample_bylevel': $( if [ ! -z ${VIASH_PAR_COLSAMPLE_BYLEVEL+x} ]; then echo "float(r'${VIASH_PAR_COLSAMPLE_BYLEVEL//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'colsample_bynode': $( if [ ! -z ${VIASH_PAR_COLSAMPLE_BYNODE+x} ]; then echo "float(r'${VIASH_PAR_COLSAMPLE_BYNODE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'reg_lambda': $( if [ ! -z ${VIASH_PAR_REG_LAMBDA+x} ]; then echo "float(r'${VIASH_PAR_REG_LAMBDA//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'reg_alpha': $( if [ ! -z ${VIASH_PAR_REG_ALPHA+x} ]; then echo "float(r'${VIASH_PAR_REG_ALPHA//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'scale_pos_weight': $( if [ ! -z ${VIASH_PAR_SCALE_POS_WEIGHT+x} ]; then echo "float(r'${VIASH_PAR_SCALE_POS_WEIGHT//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -### VIASH END - -sys.path.append(meta["resources_dir"]) -from helper import check_arguments, get_reference_features, get_query_features -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -# read config arguments -config = yaml.safe_load(Path(meta["config"]).read_text()) - -# look for training params for method -argument_groups = { grp["name"]: grp["arguments"] for grp in config["functionality"]["argument_groups"] } -training_arg_names = [ arg["name"].replace("--", "") for arg in argument_groups["Learning parameters"] ] -training_params = { arg_name: par[arg_name] for arg_name in training_arg_names } - -def encode_labels(y): - labels_encoder = LabelEncoder() - labels_encoder.fit(y) - - return labels_encoder.transform(y), labels_encoder - - -def get_model_eval(xgb_model, X_test, y_test, labels_encoder): - preds = xgb_model.predict(X_test) - - cr = classification_report(labels_encoder.inverse_transform(y_test), - labels_encoder.inverse_transform(preds), - output_dict=True) - cr_df = pd.DataFrame(cr).transpose() - - return cr_df - - -def train_test_split_adata(adata, labels): - train_data = pd.DataFrame(data=adata.X, index=adata.obs_names) - - X_train, X_test, y_train, y_test = train_test_split( - train_data, labels, test_size=0.2, random_state=42, stratify=labels) - - return X_train, X_test, y_train, y_test - - -def train_xgb_model(X_train, y_train, gpu=True) -> xgb.XGBClassifier: - n_classes = len(np.unique(y_train)) - objective = "binary:logistic" if n_classes == 2 else "multi:softprob" - - tree_method = "gpu_hist" if gpu else "hist" - xgbc = xgb.XGBClassifier(tree_method=tree_method, objective=objective, **training_params) - xgbc.fit(X_train, y_train) - - return xgbc - - -def build_classifier(X, y, labels_encoder, label_key, eval_verbosity: Optional[int] = 1, gpu=True) -> xgb.XGBClassifier: - # Adata prep - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y) - #Note: Do we need a new train-test split for each classifier? - - # Model training - xgb_model = train_xgb_model(X_train, y_train, gpu=gpu) - - # Model eval - if eval_verbosity != 0: - cr_df = get_model_eval(xgb_model, X_test, y_test, labels_encoder) - - if eval_verbosity == 2: - print(cr_df) - - else: - overall_accuracy = cr_df["support"]["accuracy"] - low_prec_key = cr_df.precision.idxmin() - low_prec_val = cr_df.precision.min() - low_rec_key = cr_df.recall.idxmin() - low_rec_val = cr_df.recall.min() - low_f1_key = cr_df["f1-score"].idxmin() - low_f1_val = cr_df["f1-score"].min() - - print("") - print(f"Summary stats for {label_key} model:") - print(f"Overall accuracy: {overall_accuracy}") - print(f"Min. precision: {low_prec_key}: {low_prec_val}") - print(f"Min. Recall: {low_rec_key}: {low_rec_val}") - print(f"Min. F1-score: {low_f1_key}: {low_f1_val}") - print("") - - return xgb_model - - -def build_ref_classifiers(adata_reference, targets, model_path, - eval_verbosity: Optional[int] = 1, gpu: Optional[bool] = True) -> None: - """ - This function builds xgboost classifiers on a reference embedding for a designated number of - adata_reference.obs columns. Classifier .xgb files and a model_info.json file is written to the \\`model_path\\` - directory. Model evaluation is printed to stdout. - - Inputs: - * \\`adata_reference\\`: The AnnData object that was used to train the reference model - * \\`model_path\\`: The reference model directory where the classifiers will also be stored - * \\`eval_verbosity\\`: The verbosity level for evaluation of the classifier from the range [0;2]. - * \\`gpu\\`: Boolean indicating whether a gpu is available for classifier training - - - Example: - \\`\\`\\` - >>> adata - AnnData object with n_obs x n_vars = 700 x 765 - obs: "ann_finest_level", "ann_level_1" - - >>> os.listdir("/path/to/model") - model_params.pt* - - >>> build_ref_classifiers(adata, "path/to/model", eval_verbosity=1, gpu=True) - >>> os.listdir("/path/to/model") - classifier_ann_finest_level.xgb* model_info.json* - classifier_ann_level_1.xgb* model_params.pt* - \\`\\`\\` - """ - - # Check inputs - if not isinstance(eval_verbosity, int): - raise TypeError("\\`eval_verbosity\\` should be an integer between 0 and 2.") - - if eval_verbosity < 0 or eval_verbosity > 2: - raise ValueError("\\`eval_verbosity\\` should be an integer between 0 and 2.") - - train_data = get_reference_features(adata_reference, par, logger) - - if not os.path.exists(model_path): - os.makedirs(model_path, exist_ok=True) - - # Map from name of classifier to file names - classifiers = dict() - - for label, obs_pred in zip(targets, par["output_obs_predictions"]): - if label not in adata_reference.obs: - raise ValueError(f"{label} is not in the \\`adata\\` object passed!") - - filename = "classifier_" + label + ".xgb" - - labels, labels_encoder = encode_labels(adata_reference.obs[label]) - logger.info(f"Classes: {labels_encoder.classes_}") - - logger.info(f"Building classifier for {label}...") - xgb_model = build_classifier( - X=train_data, - y=labels, - labels_encoder=labels_encoder, - label_key=label, - eval_verbosity=eval_verbosity, - gpu=gpu - ) - - # Save classifier - logger.info("Saving model") - xgb_model.save_model(os.path.join(model_path, filename)) - - # Store classifier info - classifiers[label] = { - "filename": filename, - "labels": labels_encoder.classes_.tolist(), - "obs_column": obs_pred, - "model_params": training_params, - } - - # Store model_info.json file - model_info = { - "classifier_info": classifiers - } - - logger.info("Writing model_info to the file") - # Read previous file if it exists - if os.path.exists(model_path + "/model_info.json"): - logger.info("Old model_info file found, updating") - with open(model_path + "/model_info.json", "r") as f: - old_model_info = json.loads(f.read()) - - for key in old_model_info: - if key in model_info: - old_model_info[key].update(model_info[key]) - json_string = json.dumps(old_model_info, indent=4) - - else: - logger.info("Creating a new file") - json_string = json.dumps(model_info, indent=4) - - with open(model_path + "/model_info.json", "w") as f: - f.write(json_string) - - -def project_labels( - query_dataset, - cell_type_classifier_model: xgb.XGBClassifier, - annotation_column_name='label_pred', - uncertainty_column_name='label_uncertainty', - uncertainty_thresh=None # Note: currently not passed to predict function -): - """ - A function that projects predicted labels onto the query dataset, along with uncertainty scores. - Performs in-place update of the adata object, adding columns to the \\`obs\\` DataFrame. - - Input: - * \\`query_dataset\\`: The query \\`AnnData\\` object - * \\`model_file\\`: Path to the classification model file - * \\`prediction_key\\`: Column name in \\`adata.obs\\` where to store the predicted labels - * \\`uncertainty_key\\`: Column name in \\`adata.obs\\` where to store the uncertainty scores - * \\`uncertainty_thresh\\`: The uncertainty threshold above which we call a cell 'Unknown' - - Output: - Nothing is output, the passed anndata is modified inplace - - """ - - if (uncertainty_thresh is not None) and (uncertainty_thresh < 0 or uncertainty_thresh > 1): - raise ValueError(f'\\`uncertainty_thresh\\` must be \\`None\\` or between 0 and 1.') - - query_data = get_query_features(query_dataset, par, logger) - - # Predict labels and probabilities - query_dataset.obs[annotation_column_name] = cell_type_classifier_model.predict(query_data) - - logger.info("Predicting probabilities") - probs = cell_type_classifier_model.predict_proba(query_data) - - # Format probabilities - df_probs = pd.DataFrame(probs, columns=cell_type_classifier_model.classes_, index=query_dataset.obs_names) - query_dataset.obs[uncertainty_column_name] = 1 - df_probs.max(1) - - # Note: this is here in case we want to propose a set of values for the user to accept to seed the - # manual curation of predicted labels - if uncertainty_thresh is not None: - logger.info("Marking uncertain predictions") - query_dataset.obs[annotation_column_name + "_filtered"] = [ - val if query_dataset.obs[uncertainty_column_name][i] < uncertainty_thresh - else "Unknown" for i, val in enumerate(query_dataset.obs[annotation_column_name])] - - return query_dataset - - -def predict( - query_dataset, - cell_type_classifier_model_path, - annotation_column_name: str, - prediction_column_name: str, - uncertainty_column_name: str, - models_info, - use_gpu: bool = False -) -> pd.DataFrame: - """ - Returns \\`obs\\` DataFrame with prediction columns appended - """ - - tree_method = "gpu_hist" if use_gpu else "hist" - - labels = models_info["classifier_info"][annotation_column_name]["labels"] - - objective = "binary:logistic" if len(labels) == 2 else "multi:softprob" - cell_type_classifier_model = xgb.XGBClassifier(tree_method=tree_method, objective=objective) - - logger.info("Loading model") - cell_type_classifier_model.load_model(fname=cell_type_classifier_model_path) - - logger.info("Predicting labels") - project_labels(query_dataset, - cell_type_classifier_model, - annotation_column_name=prediction_column_name, - uncertainty_column_name=uncertainty_column_name) - - logger.info("Converting labels from numbers to classes") - labels_encoder = LabelEncoder() - labels_encoder.classes_ = np.array(labels) - query_dataset.obs[prediction_column_name] = labels_encoder.inverse_transform(query_dataset.obs[prediction_column_name]) - - return query_dataset - - -def main(par): - logger.info("Checking arguments") - par = check_arguments(par) - - mdata = mudata.read(par["input"].strip()) - adata = mdata.mod[par["modality"]] - - adata_reference = sc.read(par["reference"], backup_url=par["reference"]) - - # If classifiers for targets are in the model_output directory, simply open them and run (unless \\`retrain\\` != True) - # If some classifiers are missing, train and save them first - # Predict and save the query data - - targets_to_train = [] - - for obs_target in par["reference_obs_targets"]: - if not os.path.exists(par["model_output"]) or f"classifier_{obs_target}.xgb" not in os.listdir(par["model_output"]) or par["force_retrain"]: - logger.info(f"Classifier for {obs_target} added to a training schedule") - targets_to_train.append(obs_target) - else: - logger.info(f"Found classifier for {obs_target}, no retraining required") - - build_ref_classifiers(adata_reference, targets_to_train, model_path=par["model_output"], - gpu=par["use_gpu"], eval_verbosity=par["verbosity"]) - - output_uns_parameters = adata.uns.get(par["output_uns_parameters"], {}) - - with open(par["model_output"] + "/model_info.json", "r") as f: - models_info = json.loads(f.read()) - - for obs_target, obs_pred, obs_unc in zip(par["reference_obs_targets"], par["output_obs_predictions"], par["output_obs_uncertainty"]): - logger.info(f"Predicting {obs_target}") - - adata = predict(query_dataset=adata, - cell_type_classifier_model_path=os.path.join(par["model_output"], "classifier_" + obs_target + ".xgb"), - annotation_column_name=obs_target, - prediction_column_name=obs_pred, - uncertainty_column_name=obs_unc, - models_info=models_info, - use_gpu=par["use_gpu"]) - - if obs_target in targets_to_train: - # Save information about the transfer to .uns - output_uns_parameters[obs_target] = { - "method": "XGBClassifier", - **training_params - } - - adata.uns[par["output_uns_parameters"]] = output_uns_parameters - - logger.info("Updating mdata") - mdata.mod[par['modality']] = adata - mdata.update() - - logger.info("Writing output") - mdata.write_h5mu(par['output'].strip()) - -if __name__ == "__main__": - main(par) -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/labels_transfer_xgboost", - "tag" : "0.12.0" - }, - "label" : [ - "highmem", - "highcpu", - "gpu" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/labels_transfer/xgboost/nextflow.config b/target/nextflow/labels_transfer/xgboost/nextflow.config deleted file mode 100644 index 2b25dee2d7a..00000000000 --- a/target/nextflow/labels_transfer/xgboost/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'xgboost' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Performs label transfer from reference to query using XGBoost classifier' - author = 'Vladimir Shitov' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/labels_transfer/xgboost/nextflow_params.yaml b/target/nextflow/labels_transfer/xgboost/nextflow_params.yaml deleted file mode 100644 index b93b2716c7b..00000000000 --- a/target/nextflow/labels_transfer/xgboost/nextflow_params.yaml +++ /dev/null @@ -1,24 +0,0 @@ -# Execution arguments -force_retrain: false -use_gpu: false -verbosity: 1 -# model_output: "$id.$key.model_output.model_output" - -# Learning parameters -learning_rate: 0.3 -min_split_loss: 0 -max_depth: 6 -min_child_weight: 1 -max_delta_step: 0 -subsample: 1 -sampling_method: "uniform" -colsample_bytree: 1 -colsample_bylevel: 1 -colsample_bynode: 1 -reg_lambda: 1 -reg_alpha: 0 -scale_pos_weight: 1 - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/labels_transfer/xgboost/nextflow_schema.json b/target/nextflow/labels_transfer/xgboost/nextflow_schema.json deleted file mode 100644 index 70f8bb777f0..00000000000 --- a/target/nextflow/labels_transfer/xgboost/nextflow_schema.json +++ /dev/null @@ -1,177 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "xgboost", - "description": "Performs label transfer from reference to query using XGBoost classifier", - "type": "object", - "definitions": { - "execution arguments" : { - "title": "Execution arguments", - "type": "object", - "description": "No description", - "properties": { - - "force_retrain": { - "type": "boolean", - "description": "Type: `boolean_true`, default: `false`. Retrain models on the reference even if model_output directory already has trained classifiers", - "help_text": "Type: `boolean_true`, default: `false`. Retrain models on the reference even if model_output directory already has trained classifiers. WARNING! It will rewrite existing classifiers for targets in the model_output directory!", - "default": "False" - }, - - "use_gpu": { - "type": "boolean", - "description": "Type: `boolean`, default: `false`. Use GPU during models training and inference (recommended)", - "help_text": "Type: `boolean`, default: `false`. Use GPU during models training and inference (recommended).", - "default": "False" - }, - - "verbosity": { - "type": "integer", - "description": "Type: `integer`, default: `1`. The verbosity level for evaluation of the classifier from the range [0,2]", - "help_text": "Type: `integer`, default: `1`. The verbosity level for evaluation of the classifier from the range [0,2]", - "default": "1" - }, - - "model_output": { - "type": "string", - "description": "Type: `file`, default: `$id.$key.model_output.model_output`. Output directory for model", - "help_text": "Type: `file`, default: `$id.$key.model_output.model_output`. Output directory for model", - "default": "$id.$key.model_output.model_output" - } - - } - }, - "learning parameters" : { - "title": "Learning parameters", - "type": "object", - "description": "No description", - "properties": { - - "learning_rate": { - "type": "number", - "description": "Type: `double`, default: `0.3`. Step size shrinkage used in update to prevents overfitting", - "help_text": "Type: `double`, default: `0.3`. Step size shrinkage used in update to prevents overfitting. Range: [0,1]. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster for the reference", - "default": "0.3" - }, - - "min_split_loss": { - "type": "number", - "description": "Type: `double`, default: `0`. Minimum loss reduction required to make a further partition on a leaf node of the tree", - "help_text": "Type: `double`, default: `0`. Minimum loss reduction required to make a further partition on a leaf node of the tree. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster for the reference", - "default": "0" - }, - - "max_depth": { - "type": "integer", - "description": "Type: `integer`, default: `6`. Maximum depth of a tree", - "help_text": "Type: `integer`, default: `6`. Maximum depth of a tree. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster for the reference", - "default": "6" - }, - - "min_child_weight": { - "type": "integer", - "description": "Type: `integer`, default: `1`. Minimum sum of instance weight (hessian) needed in a child", - "help_text": "Type: `integer`, default: `1`. Minimum sum of instance weight (hessian) needed in a child. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster for the reference", - "default": "1" - }, - - "max_delta_step": { - "type": "number", - "description": "Type: `double`, default: `0`. Maximum delta step we allow each leaf output to be", - "help_text": "Type: `double`, default: `0`. Maximum delta step we allow each leaf output to be. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster for the reference", - "default": "0" - }, - - "subsample": { - "type": "number", - "description": "Type: `double`, default: `1`. Subsample ratio of the training instances", - "help_text": "Type: `double`, default: `1`. Subsample ratio of the training instances. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster for the reference", - "default": "1" - }, - - "sampling_method": { - "type": "string", - "description": "Type: `string`, default: `uniform`, choices: ``uniform`, `gradient_based``. The method to use to sample the training instances", - "help_text": "Type: `string`, default: `uniform`, choices: ``uniform`, `gradient_based``. The method to use to sample the training instances. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster for the reference", - "enum": ["uniform", "gradient_based"] - , - "default": "uniform" - }, - - "colsample_bytree": { - "type": "number", - "description": "Type: `double`, default: `1`. Fraction of columns to be subsampled", - "help_text": "Type: `double`, default: `1`. Fraction of columns to be subsampled. Range (0, 1]. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster for the reference", - "default": "1" - }, - - "colsample_bylevel": { - "type": "number", - "description": "Type: `double`, default: `1`. Subsample ratio of columns for each level", - "help_text": "Type: `double`, default: `1`. Subsample ratio of columns for each level. Range (0, 1]. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster for the reference", - "default": "1" - }, - - "colsample_bynode": { - "type": "number", - "description": "Type: `double`, default: `1`. Subsample ratio of columns for each node (split)", - "help_text": "Type: `double`, default: `1`. Subsample ratio of columns for each node (split). Range (0, 1]. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster for the reference", - "default": "1" - }, - - "reg_lambda": { - "type": "number", - "description": "Type: `double`, default: `1`. L2 regularization term on weights", - "help_text": "Type: `double`, default: `1`. L2 regularization term on weights. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster for the reference", - "default": "1" - }, - - "reg_alpha": { - "type": "number", - "description": "Type: `double`, default: `0`. L1 regularization term on weights", - "help_text": "Type: `double`, default: `0`. L1 regularization term on weights. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster for the reference", - "default": "0" - }, - - "scale_pos_weight": { - "type": "number", - "description": "Type: `double`, default: `1`. Control the balance of positive and negative weights, useful for unbalanced classes", - "help_text": "Type: `double`, default: `1`. Control the balance of positive and negative weights, useful for unbalanced classes. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster for the reference", - "default": "1" - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/execution arguments" - }, - { - "$ref": "#/definitions/learning parameters" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/labels_transfer/xgboost/setup_logger.py b/target/nextflow/labels_transfer/xgboost/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/nextflow/labels_transfer/xgboost/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/nextflow/mapping/bd_rhapsody/.config.vsh.yaml b/target/nextflow/mapping/bd_rhapsody/.config.vsh.yaml deleted file mode 100644 index 72f3da797a9..00000000000 --- a/target/nextflow/mapping/bd_rhapsody/.config.vsh.yaml +++ /dev/null @@ -1,417 +0,0 @@ -functionality: - name: "bd_rhapsody" - namespace: "mapping" - version: "0.12.3" - authors: - - name: "Robrecht Cannoodt" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - argument_groups: - - name: "Inputs" - arguments: - - type: "string" - name: "--mode" - description: "Whether to run a whole transcriptome analysis (WTA) or a targeted\ - \ analysis." - info: null - example: - - "wta" - required: true - choices: - - "wta" - - "targeted" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Path to your read files in the FASTQ.GZ format. You may specify\ - \ as many R1/R2 read pairs as you want." - info: null - example: - - "input.fastq.gz" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "file" - name: "--reference" - alternatives: - - "-r" - - "--reference_genome" - description: "Refence to map to. For `--mode wta`, this is the path to STAR\ - \ index as a tar.gz file. For `--mode targeted`, this is the path to mRNA\ - \ reference file for pre-designed, supplemental, or custom panel, in FASTA\ - \ format" - info: null - example: - - "reference_genome.tar.gz|reference.fasta" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "file" - name: "--transcriptome_annotation" - alternatives: - - "-t" - description: "Path to GTF annotation file (only for `--mode wta`)." - info: null - example: - - "transcriptome.gtf" - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--abseq_reference" - alternatives: - - "-a" - description: "Path to the AbSeq reference file in FASTA format. Only needed\ - \ if BD AbSeq Ab-Oligos are used." - info: null - example: - - "abseq_reference.fasta" - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "file" - name: "--supplemental_reference" - alternatives: - - "-s" - description: "Path to the supplemental reference file in FASTA format. Only\ - \ needed if there are additional transgene sequences used in the experiment\ - \ (only for `--mode wta`)." - info: null - example: - - "supplemental_reference.fasta" - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--sample_prefix" - description: "Specify a run name to use as the output file base name. Use only\ - \ letters, numbers, or hyphens. Do not use special characters or spaces." - info: null - default: - - "sample" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Outputs" - arguments: - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output folder. Output still needs to be processed further." - info: null - example: - - "output_dir" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Putative cell calling settings" - arguments: - - type: "string" - name: "--putative_cell_call" - description: "Specify the dataset to be used for putative cell calling. For\ - \ putative cell calling using an AbSeq dataset, please provide an AbSeq_Reference\ - \ fasta file above." - info: null - example: - - "mRNA" - required: false - choices: - - "mRNA" - - "AbSeq_Experimental" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--exact_cell_count" - description: "Exact cell count - Set a specific number (>=1) of cells as putative,\ - \ based on those with the highest error-corrected read count" - info: null - example: - - 10000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--disable_putative_calling" - description: "Disable Refined Putative Cell Calling - Determine putative cells\ - \ using only the basic algorithm (minimum second derivative along the cumulative\ - \ reads curve). The refined algorithm attempts to remove false positives and\ - \ recover false negatives, but may not be ideal for certain complex mixtures\ - \ of cell types. Does not apply if Exact Cell Count is set." - info: null - direction: "input" - dest: "par" - - name: "Subsample arguments" - arguments: - - type: "double" - name: "--subsample" - description: "A number >1 or fraction (0 < n < 1) to indicate the number or\ - \ percentage of reads to subsample." - info: null - example: - - 0.01 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--subsample_seed" - description: "A seed for replicating a previous subsampled run." - info: null - example: - - 3445 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Multiplex arguments" - arguments: - - type: "string" - name: "--sample_tags_version" - description: "Specify if multiplexed run." - info: null - example: - - "human" - required: false - choices: - - "human" - - "hs" - - "mouse" - - "mm" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--tag_names" - description: "Tag_Names (optional) - Specify the tag number followed by '-'\ - \ and the desired sample name to appear in Sample_Tag_Metrics.csv.\nDo not\ - \ use the special characters: &, (), [], {}, <>, ?, |\n" - info: null - example: - - "4-mySample" - - "9-myOtherSample" - - "6-alsoThisSample" - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - name: "VDJ arguments" - arguments: - - type: "string" - name: "--vdj_version" - description: "Specify if VDJ run." - info: null - example: - - "human" - required: false - choices: - - "human" - - "mouse" - - "humanBCR" - - "humanBCR" - - "humanTCR" - - "mouseBCR" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "CWL-runner arguments" - arguments: - - type: "boolean" - name: "--parallel" - description: "Run jobs in parallel." - info: null - default: - - true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--timestamps" - description: "Add timestamps to the errors, warnings, and notifications." - info: null - direction: "input" - dest: "par" - - type: "boolean_true" - name: "--dryrun" - description: "If true, the output directory will only contain the CWL input\ - \ files, but the pipeline itself will not be executed." - info: null - direction: "input" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "rhapsody_wta_1.10.1_nodocker.cwl" - - type: "file" - path: "rhapsody_targeted_1.10.1_nodocker.cwl" - - type: "file" - path: "src/utils/setup_logger.py" - description: "A wrapper for the BD Rhapsody Analysis CWL v1.10.1 pipeline.\n\nThe\ - \ CWL pipeline file is obtained by cloning 'https://bitbucket.org/CRSwDev/cwl/src/master/'\ - \ and removing all objects with class 'DockerRequirement' from the YML.\n\nThis\ - \ pipeline can be used for a targeted analysis (with `--mode targeted`) or for\ - \ a whole transcriptome analysis (with `--mode wta`).\n\n* If mode is `\"targeted\"\ - `, then either the `--reference` or `--abseq_reference` parameters must be defined.\n\ - * If mode is `\"wta\"`, then `--reference` and `--transcriptome_annotation` must\ - \ be defined, `--abseq_reference` and `--supplemental_reference` is optional.\n\ - \nThe reference_genome and transcriptome_annotation files can be generated with\ - \ the make_reference pipeline.\nAlternatively, BD also provides standard references\ - \ which can be downloaded from these locations:\n\n - Human: http://bd-rhapsody-public.s3-website-us-east-1.amazonaws.com/Rhapsody-WTA/GRCh38-PhiX-gencodev29/\n\ - \ - Mouse: http://bd-rhapsody-public.s3-website-us-east-1.amazonaws.com/Rhapsody-WTA/GRCm38-PhiX-gencodevM19/\n" - test_resources: - - type: "bash_script" - path: "test_memory.sh" - is_executable: true - - type: "bash_script" - path: "test_wta.sh" - is_executable: true - - type: "bash_script" - path: "test_targeted.sh" - is_executable: true - - type: "file" - path: "resources_test/bdrhap_vdj" - - type: "file" - path: "resources_test/bdrhap_5kjrt" - - type: "file" - path: "resources_test/reference_gencodev41_chr1/" - info: - name: "BD Rhapsody" - short_description: "A wrapper for the BD Rhapsody Analysis CWL v1.10.1 pipeline" - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "ghcr.io/data-intuitive/bd_rhapsody:1.10.1" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "python" - user: false - packages: - - "pandas<2" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highmem" - - "highcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/mapping/bd_rhapsody/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/bd_rhapsody" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/bd_rhapsody/bd_rhapsody" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/mapping/bd_rhapsody/main.nf b/target/nextflow/mapping/bd_rhapsody/main.nf deleted file mode 100644 index 4698ab4afbd..00000000000 --- a/target/nextflow/mapping/bd_rhapsody/main.nf +++ /dev/null @@ -1,3249 +0,0 @@ -// bd_rhapsody 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Robrecht Cannoodt (maintainer) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "bd_rhapsody", - "namespace" : "mapping", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Robrecht Cannoodt", - "roles" : [ - "maintainer" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "robrecht@data-intuitive.com", - "github" : "rcannood", - "orcid" : "0000-0003-3641-729X", - "linkedin" : "robrechtcannoodt" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Science Engineer" - }, - { - "name" : "Open Problems", - "href" : "https://openproblems.bio", - "role" : "Core Member" - } - ] - } - } - ], - "argument_groups" : [ - { - "name" : "Inputs", - "arguments" : [ - { - "type" : "string", - "name" : "--mode", - "description" : "Whether to run a whole transcriptome analysis (WTA) or a targeted analysis.", - "example" : [ - "wta" - ], - "required" : true, - "choices" : [ - "wta", - "targeted" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--input", - "alternatives" : [ - "-i" - ], - "description" : "Path to your read files in the FASTQ.GZ format. You may specify as many R1/R2 read pairs as you want.", - "example" : [ - "input.fastq.gz" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--reference", - "alternatives" : [ - "-r", - "--reference_genome" - ], - "description" : "Refence to map to. For `--mode wta`, this is the path to STAR index as a tar.gz file. For `--mode targeted`, this is the path to mRNA reference file for pre-designed, supplemental, or custom panel, in FASTA format", - "example" : [ - "reference_genome.tar.gz|reference.fasta" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--transcriptome_annotation", - "alternatives" : [ - "-t" - ], - "description" : "Path to GTF annotation file (only for `--mode wta`).", - "example" : [ - "transcriptome.gtf" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--abseq_reference", - "alternatives" : [ - "-a" - ], - "description" : "Path to the AbSeq reference file in FASTA format. Only needed if BD AbSeq Ab-Oligos are used.", - "example" : [ - "abseq_reference.fasta" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--supplemental_reference", - "alternatives" : [ - "-s" - ], - "description" : "Path to the supplemental reference file in FASTA format. Only needed if there are additional transgene sequences used in the experiment (only for `--mode wta`).", - "example" : [ - "supplemental_reference.fasta" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--sample_prefix", - "description" : "Specify a run name to use as the output file base name. Use only letters, numbers, or hyphens. Do not use special characters or spaces.", - "default" : [ - "sample" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Outputs", - "arguments" : [ - { - "type" : "file", - "name" : "--output", - "alternatives" : [ - "-o" - ], - "description" : "Output folder. Output still needs to be processed further.", - "example" : [ - "output_dir" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Putative cell calling settings", - "arguments" : [ - { - "type" : "string", - "name" : "--putative_cell_call", - "description" : "Specify the dataset to be used for putative cell calling. For putative cell calling using an AbSeq dataset, please provide an AbSeq_Reference fasta file above.", - "example" : [ - "mRNA" - ], - "required" : false, - "choices" : [ - "mRNA", - "AbSeq_Experimental" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--exact_cell_count", - "description" : "Exact cell count - Set a specific number (>=1) of cells as putative, based on those with the highest error-corrected read count", - "example" : [ - 10000 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "boolean_true", - "name" : "--disable_putative_calling", - "description" : "Disable Refined Putative Cell Calling - Determine putative cells using only the basic algorithm (minimum second derivative along the cumulative reads curve). The refined algorithm attempts to remove false positives and recover false negatives, but may not be ideal for certain complex mixtures of cell types. Does not apply if Exact Cell Count is set.", - "direction" : "input", - "dest" : "par" - } - ] - }, - { - "name" : "Subsample arguments", - "arguments" : [ - { - "type" : "double", - "name" : "--subsample", - "description" : "A number >1 or fraction (0 < n < 1) to indicate the number or percentage of reads to subsample.", - "example" : [ - 0.01 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--subsample_seed", - "description" : "A seed for replicating a previous subsampled run.", - "example" : [ - 3445 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Multiplex arguments", - "arguments" : [ - { - "type" : "string", - "name" : "--sample_tags_version", - "description" : "Specify if multiplexed run.", - "example" : [ - "human" - ], - "required" : false, - "choices" : [ - "human", - "hs", - "mouse", - "mm" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--tag_names", - "description" : "Tag_Names (optional) - Specify the tag number followed by '-' and the desired sample name to appear in Sample_Tag_Metrics.csv.\nDo not use the special characters: &, (), [], {}, <>, ?, |\n", - "example" : [ - "4-mySample", - "9-myOtherSample", - "6-alsoThisSample" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "VDJ arguments", - "arguments" : [ - { - "type" : "string", - "name" : "--vdj_version", - "description" : "Specify if VDJ run.", - "example" : [ - "human" - ], - "required" : false, - "choices" : [ - "human", - "mouse", - "humanBCR", - "humanBCR", - "humanTCR", - "mouseBCR" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "CWL-runner arguments", - "arguments" : [ - { - "type" : "boolean", - "name" : "--parallel", - "description" : "Run jobs in parallel.", - "default" : [ - true - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "boolean_true", - "name" : "--timestamps", - "description" : "Add timestamps to the errors, warnings, and notifications.", - "direction" : "input", - "dest" : "par" - }, - { - "type" : "boolean_true", - "name" : "--dryrun", - "description" : "If true, the output directory will only contain the CWL input files, but the pipeline itself will not be executed.", - "direction" : "input", - "dest" : "par" - } - ] - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/bd_rhapsody/" - }, - { - "type" : "file", - "path" : "rhapsody_wta_1.10.1_nodocker.cwl", - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/bd_rhapsody/" - }, - { - "type" : "file", - "path" : "rhapsody_targeted_1.10.1_nodocker.cwl", - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/bd_rhapsody/" - }, - { - "type" : "file", - "path" : "src/utils/setup_logger.py", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "description" : "A wrapper for the BD Rhapsody Analysis CWL v1.10.1 pipeline.\n\nThe CWL pipeline file is obtained by cloning 'https://bitbucket.org/CRSwDev/cwl/src/master/' and removing all objects with class 'DockerRequirement' from the YML.\n\nThis pipeline can be used for a targeted analysis (with `--mode targeted`) or for a whole transcriptome analysis (with `--mode wta`).\n\n* If mode is `\\"targeted\\"`, then either the `--reference` or `--abseq_reference` parameters must be defined.\n* If mode is `\\"wta\\"`, then `--reference` and `--transcriptome_annotation` must be defined, `--abseq_reference` and `--supplemental_reference` is optional.\n\nThe reference_genome and transcriptome_annotation files can be generated with the make_reference pipeline.\nAlternatively, BD also provides standard references which can be downloaded from these locations:\n\n - Human: http://bd-rhapsody-public.s3-website-us-east-1.amazonaws.com/Rhapsody-WTA/GRCh38-PhiX-gencodev29/\n - Mouse: http://bd-rhapsody-public.s3-website-us-east-1.amazonaws.com/Rhapsody-WTA/GRCm38-PhiX-gencodevM19/\n", - "test_resources" : [ - { - "type" : "bash_script", - "path" : "test_memory.sh", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/bd_rhapsody/" - }, - { - "type" : "bash_script", - "path" : "test_wta.sh", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/bd_rhapsody/" - }, - { - "type" : "bash_script", - "path" : "test_targeted.sh", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/bd_rhapsody/" - }, - { - "type" : "file", - "path" : "resources_test/bdrhap_vdj", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - }, - { - "type" : "file", - "path" : "resources_test/bdrhap_5kjrt", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - }, - { - "type" : "file", - "path" : "resources_test/reference_gencodev41_chr1/", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "info" : { - "name" : "BD Rhapsody", - "short_description" : "A wrapper for the BD Rhapsody Analysis CWL v1.10.1 pipeline" - }, - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "ghcr.io/data-intuitive/bd_rhapsody:1.10.1", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "python", - "user" : false, - "packages" : [ - "pandas<2" - ], - "upgrade" : true - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "highmem", - "highcpu" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/mapping/bd_rhapsody/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/bd_rhapsody", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -import os -import re -import subprocess -import tempfile -import sys -from typing import Any -import pandas as pd -import gzip -import shutil - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'mode': $( if [ ! -z ${VIASH_PAR_MODE+x} ]; then echo "r'${VIASH_PAR_MODE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'reference': $( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "r'${VIASH_PAR_REFERENCE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'transcriptome_annotation': $( if [ ! -z ${VIASH_PAR_TRANSCRIPTOME_ANNOTATION+x} ]; then echo "r'${VIASH_PAR_TRANSCRIPTOME_ANNOTATION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'abseq_reference': $( if [ ! -z ${VIASH_PAR_ABSEQ_REFERENCE+x} ]; then echo "r'${VIASH_PAR_ABSEQ_REFERENCE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'supplemental_reference': $( if [ ! -z ${VIASH_PAR_SUPPLEMENTAL_REFERENCE+x} ]; then echo "r'${VIASH_PAR_SUPPLEMENTAL_REFERENCE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'sample_prefix': $( if [ ! -z ${VIASH_PAR_SAMPLE_PREFIX+x} ]; then echo "r'${VIASH_PAR_SAMPLE_PREFIX//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'putative_cell_call': $( if [ ! -z ${VIASH_PAR_PUTATIVE_CELL_CALL+x} ]; then echo "r'${VIASH_PAR_PUTATIVE_CELL_CALL//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'exact_cell_count': $( if [ ! -z ${VIASH_PAR_EXACT_CELL_COUNT+x} ]; then echo "int(r'${VIASH_PAR_EXACT_CELL_COUNT//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'disable_putative_calling': $( if [ ! -z ${VIASH_PAR_DISABLE_PUTATIVE_CALLING+x} ]; then echo "r'${VIASH_PAR_DISABLE_PUTATIVE_CALLING//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), - 'subsample': $( if [ ! -z ${VIASH_PAR_SUBSAMPLE+x} ]; then echo "float(r'${VIASH_PAR_SUBSAMPLE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'subsample_seed': $( if [ ! -z ${VIASH_PAR_SUBSAMPLE_SEED+x} ]; then echo "int(r'${VIASH_PAR_SUBSAMPLE_SEED//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'sample_tags_version': $( if [ ! -z ${VIASH_PAR_SAMPLE_TAGS_VERSION+x} ]; then echo "r'${VIASH_PAR_SAMPLE_TAGS_VERSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'tag_names': $( if [ ! -z ${VIASH_PAR_TAG_NAMES+x} ]; then echo "r'${VIASH_PAR_TAG_NAMES//\\'/\\'\\"\\'\\"r\\'}'.split(':')"; else echo None; fi ), - 'vdj_version': $( if [ ! -z ${VIASH_PAR_VDJ_VERSION+x} ]; then echo "r'${VIASH_PAR_VDJ_VERSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'parallel': $( if [ ! -z ${VIASH_PAR_PARALLEL+x} ]; then echo "r'${VIASH_PAR_PARALLEL//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), - 'timestamps': $( if [ ! -z ${VIASH_PAR_TIMESTAMPS+x} ]; then echo "r'${VIASH_PAR_TIMESTAMPS//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), - 'dryrun': $( if [ ! -z ${VIASH_PAR_DRYRUN+x} ]; then echo "r'${VIASH_PAR_DRYRUN//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -## VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -def is_gz_file(filepath): - with open(filepath, 'rb') as test_f: - return test_f.read(2) == b'\\\\x1f\\\\x8b' - -def strip_margin(text: str) -> str: - return re.sub('(\\\\n?)[ \\\\t]*\\\\|', '\\\\\\\\1', text) - -def process_params(par: dict[str, Any]) -> str: - # check input parameters - assert par["input"] is not None, "Pass at least one set of inputs to --input." - if par["mode"] == "wta": - assert len(par["reference"]) == 1, "When mode is \\\\"wta\\\\", --reference should be length 1" - assert par["transcriptome_annotation"] is not None, "When mode is \\\\"wta\\\\", --transcriptome_annotation should be defined" - elif par["mode"] == "targeted": - assert par["transcriptome_annotation"] is None, "When mode is \\\\"targeted\\\\", --transcriptome_annotation should be undefined" - assert par["supplemental_reference"] is None, "When mode is \\\\"targeted\\\\", --supplemental_reference should be undefined" - - # checking sample prefix - if re.match("[^A-Za-z0-9]", par["sample_prefix"]): - logger.warning("--sample_prefix should only consist of letters, numbers or hyphens. Replacing all '[^A-Za-z0-9]' with '-'.") - par["sample_prefix"] = re.sub("[^A-Za-z0-9\\\\\\\\-]", "-", par["sample_prefix"]) - - # if par_input is a directory, look for fastq files - if len(par["input"]) == 1 and os.path.isdir(par["input"][0]): - par["input"] = [ os.path.join(dp, f) for dp, dn, filenames in os.walk(par["input"]) for f in filenames if re.match(r'.*\\\\.fastq.gz', f) ] - - # use absolute paths - par["input"] = [ os.path.abspath(f) for f in par["input"] ] - if par["reference"]: - par["reference"] = [ os.path.abspath(f) for f in par["reference"] ] - if par["transcriptome_annotation"]: - par["transcriptome_annotation"] = os.path.abspath(par["transcriptome_annotation"]) - if par["abseq_reference"]: - par["abseq_reference"] = [ os.path.abspath(f) for f in par["abseq_reference"] ] - if par["supplemental_reference"]: - par["supplemental_reference"] = [ os.path.abspath(f) for f in par["supplemental_reference"] ] - par["output"] = os.path.abspath(par["output"]) - - return par - -def generate_config(par: dict[str, Any]) -> str: - content_list = [strip_margin(f"""\\\\ -#!/usr/bin/env cwl-runner - -cwl:tool: rhapsody - -# This is a YML file used to specify the inputs for a BD Genomics {"WTA" if par["mode"] == "wta" else "Targeted" } Rhapsody Analysis pipeline run. See the -# BD Genomics Analysis Setup User Guide (Doc ID: 47383) for more details. - -## Reads (required) - Path to your read files in the FASTQ.GZ format. You may specify as many R1/R2 read pairs as you want. -Reads: -""")] - - for file in par["input"]: - content_list.append(strip_margin(f"""\\\\ - - class: File - location: "{file}" -""")) - - if par["reference"] and par["mode"] == "wta": - content_list.append(strip_margin(f"""\\\\ - -## Reference_Genome (required) - Path to STAR index for tar.gz format. See Doc ID: 47383 for instructions to obtain pre-built STAR index file. -Reference_Genome: - class: File - location: "{par["reference"][0]}" -""")) - - if par["reference"] and par["mode"] == "targeted": - content_list.append(strip_margin(f"""\\\\ - -## Reference (optional) - Path to mRNA reference file for pre-designed, supplemental, or custom panel, in FASTA format. -Reference: -""")) - for file in par["reference"]: - content_list.append(strip_margin(f"""\\\\ - - class: File - location: {file} -""")) - - if par["transcriptome_annotation"]: - content_list.append(strip_margin(f"""\\\\ - -## Transcriptome_Annotation (required) - Path to GTF annotation file -Transcriptome_Annotation: - class: File - location: "{par["transcriptome_annotation"]}" -""")) - - if par["abseq_reference"]: - content_list.append(strip_margin(f"""\\\\ - -## AbSeq_Reference (optional) - Path to the AbSeq reference file in FASTA format. Only needed if BD AbSeq Ab-Oligos are used. -AbSeq_Reference: -""")) - for file in par["abseq_reference"]: - content_list.append(strip_margin(f"""\\\\ - - class: File - location: {file} -""")) - - if par["supplemental_reference"]: - content_list.append(strip_margin(f"""\\\\ - -## Supplemental_Reference (optional) - Path to the supplemental reference file in FASTA format. Only needed if there are additional transgene sequences used in the experiment. -Supplemental_Reference: -""")) - for file in par["supplemental_reference"]: - content_list.append(strip_margin(f"""\\\\ - - class: File - location: {file} -""")) - - ## Putative Cell Calling Settings - content_list.append(strip_margin(f"""\\\\ - -#################################### -## Putative Cell Calling Settings ## -#################################### -""")) - - if par["putative_cell_call"]: - content_list.append(strip_margin(f"""\\\\ -## Putative cell calling dataset (optional) - Specify the dataset to be used for putative cell calling: mRNA or AbSeq_Experimental. -## For putative cell calling using an AbSeq dataset, please provide an AbSeq_Reference fasta file above. -## By default, the mRNA data will be used for putative cell calling. -Putative_Cell_Call: {par["putative_cell_call"]} -""")) - - if par["exact_cell_count"]: - content_list.append(strip_margin(f"""\\\\ -## Exact cell count (optional) - Set a specific number (>=1) of cells as putative, based on those with the highest error-corrected read count -Exact_Cell_Count: {par["exact_cell_count"]} -""")) - - if par["disable_putative_calling"]: - content_list.append(strip_margin(f"""\\\\ -## Disable Refined Putative Cell Calling (optional) - Determine putative cells using only the basic algorithm (minimum second derivative along the cumulative reads curve). The refined algorithm attempts to remove false positives and recover false negatives, but may not be ideal for certain complex mixtures of cell types. Does not apply if Exact Cell Count is set. -## The values can be true or false. By default, the refined algorithm is used. -Basic_Algo_Only: {str(par["disable_putative_calling"]).lower()} -""")) - - ## Subsample Settings - content_list.append(strip_margin(f"""\\\\ - -######################## -## Subsample Settings ## -######################## -""" - )) - - if par["subsample"]: - content_list.append(strip_margin(f"""\\\\ -## Subsample (optional) - A number >1 or fraction (0 < n < 1) to indicate the number or percentage of reads to subsample. -Subsample: {par["subsample"]} -""")) - - if par["subsample_seed"]: - content_list.append(strip_margin(f"""\\\\ -## Subsample seed (optional) - A seed for replicating a previous subsampled run. -Subsample_seed: {par["subsample_seed"]} -""")) - - - ## Multiplex options - content_list.append(strip_margin(f"""\\\\ - -####################### -## Multiplex options ## -####################### -""" - )) - - if par["sample_tags_version"]: - content_list.append(strip_margin(f"""\\\\ -## Sample Tags Version (optional) - Specify if multiplexed run: human, hs, mouse or mm -Sample_Tags_Version: {par["sample_tags_version"]} -""")) - - if par["tag_names"]: - content_list.append(strip_margin(f"""\\\\ -## Tag_Names (optional) - Specify the tag number followed by '-' and the desired sample name to appear in Sample_Tag_Metrics.csv -# Do not use the special characters: &, (), [], {{}}, <>, ?, | -Tag_Names: [{', '.join(par["tag_names"])}] -""")) - - ## VDJ options - content_list.append(strip_margin(f"""\\\\ - -################# -## VDJ options ## -################# -""" - )) - - if par["vdj_version"]: - content_list.append(strip_margin(f"""\\\\ -## VDJ Version (optional) - Specify if VDJ run: human, mouse, humanBCR, humanTCR, mouseBCR, mouseTCR -VDJ_Version: {par["vdj_version"]} -""")) - - ## VDJ options - content_list.append(strip_margin(f"""\\\\ - -######################## -## Additional Options ## -######################## -""" - )) - - if par["sample_prefix"]: - content_list.append(strip_margin(f"""\\\\ -## Run Name (optional) - Specify a run name to use as the output file base name. Use only letters, numbers, or hyphens. Do not use special characters or spaces. -Run_Name: {par["sample_prefix"]} -""")) - - ## Write config to file - return ''.join(content_list) - -def generate_cwl_file(par: dict[str, Any], meta: dict[str, Any]) -> str: - # create cwl file (if need be) - if par["mode"] == "wta": - orig_cwl_file=os.path.join(meta["resources_dir"], "rhapsody_wta_1.10.1_nodocker.cwl") - elif par["mode"] == "targeted": - orig_cwl_file=os.path.join(meta["resources_dir"], "rhapsody_targeted_1.10.1_nodocker.cwl") - - # Inject computational requirements into pipeline - if meta["memory_mb"] or meta["cpus"]: - cwl_file = os.path.join(par["output"], "pipeline.cwl") - - # Read in the file - with open(orig_cwl_file, 'r') as file : - cwl_data = file.read() - - # Inject computational requirements into pipeline - if meta["memory_mb"]: - memory = int(meta["memory_mb"]) - 2000 # keep 2gb for OS - cwl_data = re.sub('"ramMin": [^\\\\n]*,\\\\n', f'"ramMin": {memory},\\\\n', cwl_data) - if meta["cpus"]: - cwl_data = re.sub('"coresMin": [^\\\\n]*,\\\\n', f'"coresMin": {meta["cpus"]},\\\\n', cwl_data) - - # Write the file out again - with open(cwl_file, 'w') as file: - file.write(cwl_data) - else: - cwl_file = orig_cwl_file - - return cwl_file - -def process_fasta(feature_type: str, path: str) -> pd.DataFrame: - with open(path) as f: - df = pd.DataFrame(data={ - 'feature_type': feature_type, - 'feature_id': [line[1:].strip() for line in f if line[0] == ">"], - 'reference_file': os.path.basename(path), - }) - return df - -def process_gtf(feature_type: str, path: str) -> pd.DataFrame: - with open(path) as f: - data = [] - for line in f: - if not line.startswith("#"): - attr = dict(item.strip().split(' ') for item in line.split('\\\\t')[8].strip('\\\\n').split(';') if item) - row = { - 'feature_types': feature_type, - 'feature_ids': attr["gene_name"].strip("\\\\""), - 'reference_file': os.path.basename(path), - } - data.append(row) - df = pd.DataFrame(data) - df = df.drop_duplicates() - return df - -def extract_feature_types(par: dict[str, Any]): - feature_types = [] - - if par["mode"] == "targeted": - for file in par["reference"]: - logger.info(f"Processing reference fasta {file}") - feature_types.append(process_fasta("Gene Expression", file)) - - if par["mode"] == "wta": - file = par["transcriptome_annotation"] - logger.info(f"Processing reference gtf {file}") - feature_types.append(process_gtf("Gene Expression", file)) - - if par["abseq_reference"]: - for file in par["abseq_reference"]: - logger.info(f"Processing abseq fasta {file}") - feature_types.append(process_fasta("Antibody Capture", file)) - - if par["supplemental_reference"]: - for file in par["supplemental_reference"]: - logger.info(f"Processing supp fasta {file}") - feature_types.append(process_fasta("Other", file)) - - return pd.concat(feature_types) - -def main(par: dict[str, Any], meta: dict[str, Any]): - # Preprocess params - par = process_params(par) - - # Create output dir if not exists - if not os.path.exists(par["output"]): - os.makedirs(par["output"]) - - ## Process parameters - proc_pars = ["--no-container", "--outdir", par["output"]] - - if par["parallel"]: - proc_pars.append("--parallel") - - if par["timestamps"]: - proc_pars.append("--timestamps") - - with tempfile.TemporaryDirectory(prefix="cwl-bd_rhapsody_wta-", dir=meta["temp_dir"]) as temp_dir: - # extract transcriptome gtf if need be - if par["transcriptome_annotation"] and is_gz_file(par["transcriptome_annotation"]): - with open(os.path.join(temp_dir, "transcriptome.gtf"), 'wb') as genes_uncompressed: - with gzip.open(par["transcriptome_annotation"], 'rb') as genes_compressed: - shutil.copyfileobj(genes_compressed, genes_uncompressed) - par["transcriptome_annotation"] = genes_uncompressed.name - - # Create params file - config_file = os.path.join(par["output"], "config.yml") - config_content = generate_config(par) - with open(config_file, "w") as f: - f.write(config_content) - - # Create cwl file (if need be) - cwl_file = generate_cwl_file(par, meta) - - ## Run pipeline - if not par["dryrun"]: - cmd = ["cwl-runner"] + proc_pars + [cwl_file, os.path.basename(config_file)] - - env = dict(os.environ) - env["TMPDIR"] = temp_dir - - logger.info("> " + ' '.join(cmd)) - _ = subprocess.check_call( - cmd, - cwd=os.path.dirname(config_file), - env=env - ) - - # extracting feature ids from references - # extract info from reference files (while they still exist) - feature_df = extract_feature_types(par) - feature_types_file = os.path.join(par["output"], "feature_types.tsv") - feature_df.to_csv(feature_types_file, sep="\\\\t", index=False) - - - if not par["dryrun"]: - # look for counts file - if not par["sample_prefix"]: - par["sample_prefix"] = "sample" - counts_filename = par["sample_prefix"] + "_RSEC_MolsPerCell.csv" - - if par["sample_tags_version"]: - counts_filename = "Combined_" + counts_filename - counts_file = os.path.join(par["output"], counts_filename) - - if not os.path.exists(counts_file): - raise ValueError(f"Could not find output counts file '{counts_filename}'") - - # look for metrics file - metrics_filename = par["sample_prefix"] + "_Metrics_Summary.csv" - metrics_file = os.path.join(par["output"], metrics_filename) - if not os.path.exists(metrics_file): - raise ValueError(f"Could not find output metrics file '{metrics_filename}'") - -if __name__ == "__main__": - main(par, meta) -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/mapping_bd_rhapsody", - "tag" : "0.12.0" - }, - "label" : [ - "highmem", - "highcpu" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/mapping/bd_rhapsody/nextflow.config b/target/nextflow/mapping/bd_rhapsody/nextflow.config deleted file mode 100644 index 9e8ade0d47a..00000000000 --- a/target/nextflow/mapping/bd_rhapsody/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'bd_rhapsody' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'A wrapper for the BD Rhapsody Analysis CWL v1.10.1 pipeline.\n\nThe CWL pipeline file is obtained by cloning \'https://bitbucket.org/CRSwDev/cwl/src/master/\' and removing all objects with class \'DockerRequirement\' from the YML.\n\nThis pipeline can be used for a targeted analysis (with `--mode targeted`) or for a whole transcriptome analysis (with `--mode wta`).\n\n* If mode is `"targeted"`, then either the `--reference` or `--abseq_reference` parameters must be defined.\n* If mode is `"wta"`, then `--reference` and `--transcriptome_annotation` must be defined, `--abseq_reference` and `--supplemental_reference` is optional.\n\nThe reference_genome and transcriptome_annotation files can be generated with the make_reference pipeline.\nAlternatively, BD also provides standard references which can be downloaded from these locations:\n\n - Human: http://bd-rhapsody-public.s3-website-us-east-1.amazonaws.com/Rhapsody-WTA/GRCh38-PhiX-gencodev29/\n - Mouse: http://bd-rhapsody-public.s3-website-us-east-1.amazonaws.com/Rhapsody-WTA/GRCm38-PhiX-gencodevM19/\n' - author = 'Robrecht Cannoodt' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/mapping/bd_rhapsody/nextflow_params.yaml b/target/nextflow/mapping/bd_rhapsody/nextflow_params.yaml deleted file mode 100644 index 81ed1d3a705..00000000000 --- a/target/nextflow/mapping/bd_rhapsody/nextflow_params.yaml +++ /dev/null @@ -1,36 +0,0 @@ -# Inputs -mode: # please fill in - example: "wta" -input: # please fill in - example: ["input.fastq.gz"] -reference: # please fill in - example: ["reference_genome.tar.gz|reference.fasta"] -# transcriptome_annotation: "transcriptome.gtf" -# abseq_reference: ["abseq_reference.fasta"] -# supplemental_reference: ["supplemental_reference.fasta"] -sample_prefix: "sample" - -# Outputs -# output: "$id.$key.output.output" - -# Putative cell calling settings -# putative_cell_call: "mRNA" -# exact_cell_count: 10000 -disable_putative_calling: false - -# Subsample arguments -# subsample: 0.01 -# subsample_seed: 3445 - -# Multiplex arguments -# sample_tags_version: "human" -# tag_names: ["4-mySample", "9-myOtherSample", "6-alsoThisSample"] - -# VDJ arguments -# vdj_version: "human" - -# CWL-runner arguments -parallel: true -timestamps: false -dryrun: false - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/mapping/bd_rhapsody/nextflow_schema.json b/target/nextflow/mapping/bd_rhapsody/nextflow_schema.json deleted file mode 100644 index 2f080179cc6..00000000000 --- a/target/nextflow/mapping/bd_rhapsody/nextflow_schema.json +++ /dev/null @@ -1,239 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "bd_rhapsody", - "description": "A wrapper for the BD Rhapsody Analysis CWL v1.10.1 pipeline.\n\nThe CWL pipeline file is obtained by cloning \u0027https://bitbucket.org/CRSwDev/cwl/src/master/\u0027 and removing all objects with class \u0027DockerRequirement\u0027 from the YML.\n\nThis pipeline can be used for a targeted analysis (with `--mode targeted`) or for a whole transcriptome analysis (with `--mode wta`).\n\n* If mode is `\"targeted\"`, then either the `--reference` or `--abseq_reference` parameters must be defined.\n* If mode is `\"wta\"`, then `--reference` and `--transcriptome_annotation` must be defined, `--abseq_reference` and `--supplemental_reference` is optional.\n\nThe reference_genome and transcriptome_annotation files can be generated with the make_reference pipeline.\nAlternatively, BD also provides standard references which can be downloaded from these locations:\n\n - Human: http://bd-rhapsody-public.s3-website-us-east-1.amazonaws.com/Rhapsody-WTA/GRCh38-PhiX-gencodev29/\n - Mouse: http://bd-rhapsody-public.s3-website-us-east-1.amazonaws.com/Rhapsody-WTA/GRCm38-PhiX-gencodevM19/\n", - "type": "object", - "definitions": { - "inputs" : { - "title": "Inputs", - "type": "object", - "description": "No description", - "properties": { - - "mode": { - "type": "string", - "description": "Type: `string`, required, example: `wta`, choices: ``wta`, `targeted``. Whether to run a whole transcriptome analysis (WTA) or a targeted analysis", - "help_text": "Type: `string`, required, example: `wta`, choices: ``wta`, `targeted``. Whether to run a whole transcriptome analysis (WTA) or a targeted analysis.", - "enum": ["wta", "targeted"] - - }, - - "input": { - "type": "string", - "description": "Type: List of `file`, required, example: `input.fastq.gz`, multiple_sep: `\";\"`. Path to your read files in the FASTQ", - "help_text": "Type: List of `file`, required, example: `input.fastq.gz`, multiple_sep: `\";\"`. Path to your read files in the FASTQ.GZ format. You may specify as many R1/R2 read pairs as you want." - }, - - "reference": { - "type": "string", - "description": "Type: List of `file`, required, example: `reference_genome.tar.gz|reference.fasta`, multiple_sep: `\";\"`. Refence to map to", - "help_text": "Type: List of `file`, required, example: `reference_genome.tar.gz|reference.fasta`, multiple_sep: `\";\"`. Refence to map to. For `--mode wta`, this is the path to STAR index as a tar.gz file. For `--mode targeted`, this is the path to mRNA reference file for pre-designed, supplemental, or custom panel, in FASTA format" - }, - - "transcriptome_annotation": { - "type": "string", - "description": "Type: `file`, example: `transcriptome.gtf`. Path to GTF annotation file (only for `--mode wta`)", - "help_text": "Type: `file`, example: `transcriptome.gtf`. Path to GTF annotation file (only for `--mode wta`)." - }, - - "abseq_reference": { - "type": "string", - "description": "Type: List of `file`, example: `abseq_reference.fasta`, multiple_sep: `\";\"`. Path to the AbSeq reference file in FASTA format", - "help_text": "Type: List of `file`, example: `abseq_reference.fasta`, multiple_sep: `\";\"`. Path to the AbSeq reference file in FASTA format. Only needed if BD AbSeq Ab-Oligos are used." - }, - - "supplemental_reference": { - "type": "string", - "description": "Type: List of `file`, example: `supplemental_reference.fasta`, multiple_sep: `\";\"`. Path to the supplemental reference file in FASTA format", - "help_text": "Type: List of `file`, example: `supplemental_reference.fasta`, multiple_sep: `\";\"`. Path to the supplemental reference file in FASTA format. Only needed if there are additional transgene sequences used in the experiment (only for `--mode wta`)." - }, - - "sample_prefix": { - "type": "string", - "description": "Type: `string`, default: `sample`. Specify a run name to use as the output file base name", - "help_text": "Type: `string`, default: `sample`. Specify a run name to use as the output file base name. Use only letters, numbers, or hyphens. Do not use special characters or spaces.", - "default": "sample" - } - - } - }, - "outputs" : { - "title": "Outputs", - "type": "object", - "description": "No description", - "properties": { - - "output": { - "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.output`, example: `output_dir/`. Output folder", - "help_text": "Type: `file`, required, default: `$id.$key.output.output`, example: `output_dir/`. Output folder. Output still needs to be processed further.", - "default": "$id.$key.output.output" - } - - } - }, - "putative cell calling settings" : { - "title": "Putative cell calling settings", - "type": "object", - "description": "No description", - "properties": { - - "putative_cell_call": { - "type": "string", - "description": "Type: `string`, example: `mRNA`, choices: ``mRNA`, `AbSeq_Experimental``. Specify the dataset to be used for putative cell calling", - "help_text": "Type: `string`, example: `mRNA`, choices: ``mRNA`, `AbSeq_Experimental``. Specify the dataset to be used for putative cell calling. For putative cell calling using an AbSeq dataset, please provide an AbSeq_Reference fasta file above.", - "enum": ["mRNA", "AbSeq_Experimental"] - - }, - - "exact_cell_count": { - "type": "integer", - "description": "Type: `integer`, example: `10000`. Exact cell count - Set a specific number (\u003e=1) of cells as putative, based on those with the highest error-corrected read count", - "help_text": "Type: `integer`, example: `10000`. Exact cell count - Set a specific number (\u003e=1) of cells as putative, based on those with the highest error-corrected read count" - }, - - "disable_putative_calling": { - "type": "boolean", - "description": "Type: `boolean_true`, default: `false`. Disable Refined Putative Cell Calling - Determine putative cells using only the basic algorithm (minimum second derivative along the cumulative reads curve)", - "help_text": "Type: `boolean_true`, default: `false`. Disable Refined Putative Cell Calling - Determine putative cells using only the basic algorithm (minimum second derivative along the cumulative reads curve). The refined algorithm attempts to remove false positives and recover false negatives, but may not be ideal for certain complex mixtures of cell types. Does not apply if Exact Cell Count is set.", - "default": "False" - } - - } - }, - "subsample arguments" : { - "title": "Subsample arguments", - "type": "object", - "description": "No description", - "properties": { - - "subsample": { - "type": "number", - "description": "Type: `double`, example: `0.01`. A number \u003e1 or fraction (0 \u003c n \u003c 1) to indicate the number or percentage of reads to subsample", - "help_text": "Type: `double`, example: `0.01`. A number \u003e1 or fraction (0 \u003c n \u003c 1) to indicate the number or percentage of reads to subsample." - }, - - "subsample_seed": { - "type": "integer", - "description": "Type: `integer`, example: `3445`. A seed for replicating a previous subsampled run", - "help_text": "Type: `integer`, example: `3445`. A seed for replicating a previous subsampled run." - } - - } - }, - "multiplex arguments" : { - "title": "Multiplex arguments", - "type": "object", - "description": "No description", - "properties": { - - "sample_tags_version": { - "type": "string", - "description": "Type: `string`, example: `human`, choices: ``human`, `hs`, `mouse`, `mm``. Specify if multiplexed run", - "help_text": "Type: `string`, example: `human`, choices: ``human`, `hs`, `mouse`, `mm``. Specify if multiplexed run.", - "enum": ["human", "hs", "mouse", "mm"] - - }, - - "tag_names": { - "type": "string", - "description": "Type: List of `string`, example: `4-mySample:9-myOtherSample:6-alsoThisSample`, multiple_sep: `\":\"`. Tag_Names (optional) - Specify the tag number followed by \u0027-\u0027 and the desired sample name to appear in Sample_Tag_Metrics", - "help_text": "Type: List of `string`, example: `4-mySample:9-myOtherSample:6-alsoThisSample`, multiple_sep: `\":\"`. Tag_Names (optional) - Specify the tag number followed by \u0027-\u0027 and the desired sample name to appear in Sample_Tag_Metrics.csv.\nDo not use the special characters: \u0026, (), [], {}, \u003c\u003e, ?, |\n" - } - - } - }, - "vdj arguments" : { - "title": "VDJ arguments", - "type": "object", - "description": "No description", - "properties": { - - "vdj_version": { - "type": "string", - "description": "Type: `string`, example: `human`, choices: ``human`, `mouse`, `humanBCR`, `humanBCR`, `humanTCR`, `mouseBCR``. Specify if VDJ run", - "help_text": "Type: `string`, example: `human`, choices: ``human`, `mouse`, `humanBCR`, `humanBCR`, `humanTCR`, `mouseBCR``. Specify if VDJ run.", - "enum": ["human", "mouse", "humanBCR", "humanBCR", "humanTCR", "mouseBCR"] - - } - - } - }, - "cwl-runner arguments" : { - "title": "CWL-runner arguments", - "type": "object", - "description": "No description", - "properties": { - - "parallel": { - "type": "boolean", - "description": "Type: `boolean`, default: `true`. Run jobs in parallel", - "help_text": "Type: `boolean`, default: `true`. Run jobs in parallel.", - "default": "True" - }, - - "timestamps": { - "type": "boolean", - "description": "Type: `boolean_true`, default: `false`. Add timestamps to the errors, warnings, and notifications", - "help_text": "Type: `boolean_true`, default: `false`. Add timestamps to the errors, warnings, and notifications.", - "default": "False" - }, - - "dryrun": { - "type": "boolean", - "description": "Type: `boolean_true`, default: `false`. If true, the output directory will only contain the CWL input files, but the pipeline itself will not be executed", - "help_text": "Type: `boolean_true`, default: `false`. If true, the output directory will only contain the CWL input files, but the pipeline itself will not be executed.", - "default": "False" - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/inputs" - }, - { - "$ref": "#/definitions/outputs" - }, - { - "$ref": "#/definitions/putative cell calling settings" - }, - { - "$ref": "#/definitions/subsample arguments" - }, - { - "$ref": "#/definitions/multiplex arguments" - }, - { - "$ref": "#/definitions/vdj arguments" - }, - { - "$ref": "#/definitions/cwl-runner arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/mapping/bd_rhapsody/rhapsody_targeted_1.10.1_nodocker.cwl b/target/nextflow/mapping/bd_rhapsody/rhapsody_targeted_1.10.1_nodocker.cwl deleted file mode 100755 index 56a6310bc07..00000000000 --- a/target/nextflow/mapping/bd_rhapsody/rhapsody_targeted_1.10.1_nodocker.cwl +++ /dev/null @@ -1,5159 +0,0 @@ -#!/usr/bin/env cwl-runner -{ - "cwlVersion": "v1.0", - "$graph": [ - { - "inputs": [ - { - "inputBinding": { - "prefix": "--annot-r1", - "itemSeparator": "," - }, - "type": { - "items": "File", - "type": "array" - }, - "id": "#AddtoBam.cwl/Annotation_R1" - }, - { - "inputBinding": { - "prefix": "--cell-order" - }, - "type": "File", - "id": "#AddtoBam.cwl/Cell_Order" - }, - { - "inputBinding": { - "prefix": "--annot-mol-file" - }, - "type": "File", - "id": "#AddtoBam.cwl/Molecular_Annotation" - }, - { - "inputBinding": { - "prefix": "--r2-bam" - }, - "type": "File", - "id": "#AddtoBam.cwl/R2_Bam" - }, - { - "inputBinding": { - "prefix": "--run-metadata" - }, - "type": "File", - "id": "#AddtoBam.cwl/Run_Metadata" - }, - { - "inputBinding": { - "prefix": "--tag-calls" - }, - "type": [ - "null", - "File" - ], - "id": "#AddtoBam.cwl/Tag_Calls" - }, - { - "inputBinding": { - "prefix": "--target-gene-mapping" - }, - "type": [ - "null", - "File" - ], - "id": "#AddtoBam.cwl/Target_Gene_Mapping" - } - ], - "requirements": [ - ], - "outputs": [ - { - "outputBinding": { - "glob": "Annotated_mapping_R2.BAM" - }, - "type": "File", - "id": "#AddtoBam.cwl/Annotated_Bam" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#AddtoBam.cwl/output" - } - ], - "baseCommand": [ - "mist_add_to_bam.py" - ], - "class": "CommandLineTool", - "id": "#AddtoBam.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "prefix": "--extra-seqs" - }, - "type": [ - "null", - "File" - ], - "id": "#AlignR2.cwl/Extra_Seqs" - }, - { - "inputBinding": { - "prefix": "--index" - }, - "type": "File", - "id": "#AlignR2.cwl/Index" - }, - { - "inputBinding": { - "prefix": "--r2-fastqs", - "itemSeparator": "," - }, - "type": { - "items": "File", - "type": "array" - }, - "id": "#AlignR2.cwl/R2" - }, - { - "inputBinding": { - "prefix": "--run-metadata" - }, - "type": "File", - "id": "#AlignR2.cwl/Run_Metadata" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - }, - { - "envDef": [ - { - "envName": "CORES_ALLOCATED_PER_CWL_PROCESS", - "envValue": "$(String(runtime.cores))" - } - ], - "class": "EnvVarRequirement" - } - ], - "outputs": [ - { - "outputBinding": { - "glob": "*zip" - }, - "type": { - "items": "File", - "type": "array" - }, - "id": "#AlignR2.cwl/Alignments" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#AlignR2.cwl/output" - } - ], - "baseCommand": [ - "mist_align_R2.py" - ], - "class": "CommandLineTool", - "id": "#AlignR2.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "prefix": "--umi-option" - }, - "type": [ - "null", - "int" - ], - "id": "#AnnotateMolecules.cwl/AbSeq_UMI" - }, - { - "inputBinding": { - "prefix": "--run-metadata" - }, - "type": "File", - "id": "#AnnotateMolecules.cwl/Run_Metadata" - }, - { - "inputBinding": { - "prefix": "--use-dbec" - }, - "type": [ - "null", - "boolean" - ], - "id": "#AnnotateMolecules.cwl/Use_DBEC" - }, - { - "inputBinding": { - "prefix": "--valid-annot" - }, - "type": "File", - "id": "#AnnotateMolecules.cwl/Valids" - } - ], - "requirements": [ - ], - "outputs": [ - { - "outputBinding": { - "glob": "*_GeneStatus.csv.*" - }, - "type": "File", - "id": "#AnnotateMolecules.cwl/Gene_Status_List" - }, - { - "outputBinding": { - "glob": "stats.json", - "loadContents": true, - "outputEval": "$(JSON.parse(self[0].contents).max_count)\n" - }, - "type": "int", - "id": "#AnnotateMolecules.cwl/Max_Count" - }, - { - "outputBinding": { - "glob": "*_Annotation_Molecule.csv.*" - }, - "type": "File", - "id": "#AnnotateMolecules.cwl/Mol_Annot_List" - }, - { - "outputBinding": { - "glob": "stats.json", - "loadContents": true, - "outputEval": "$(JSON.parse(self[0].contents).total_molecules)\n" - }, - "type": "int", - "id": "#AnnotateMolecules.cwl/Total_Molecules" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#AnnotateMolecules.cwl/output" - } - ], - "baseCommand": [ - "mist_annotate_molecules.py" - ], - "class": "CommandLineTool", - "id": "#AnnotateMolecules.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "prefix": "--filter-metrics", - "itemSeparator": "," - }, - "type": [ - { - "items": [ - "null", - "File" - ], - "type": "array" - } - ], - "id": "#AnnotateR1.cwl/Filter_Metrics" - }, - { - "inputBinding": { - "prefix": "--R1" - }, - "type": "File", - "id": "#AnnotateR1.cwl/R1" - }, - { - "inputBinding": { - "prefix": "--run-metadata" - }, - "type": "File", - "id": "#AnnotateR1.cwl/Run_Metadata" - } - ], - "requirements": [ - { - "ramMin": 2000, - "class": "ResourceRequirement" - } - ], - "outputs": [ - { - "outputBinding": { - "glob": "*_Annotation_R1.csv.gz" - }, - "type": "File", - "id": "#AnnotateR1.cwl/Annotation_R1" - }, - { - "outputBinding": { - "glob": "*_R1_error_count_table.npy" - }, - "type": "File", - "id": "#AnnotateR1.cwl/R1_error_count_table" - }, - { - "outputBinding": { - "glob": "*_R1_read_count_breakdown.json" - }, - "type": "File", - "id": "#AnnotateR1.cwl/R1_read_count_breakdown" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#AnnotateR1.cwl/output" - } - ], - "baseCommand": [ - "mist_annotate_R1.py" - ], - "class": "CommandLineTool", - "id": "#AnnotateR1.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "prefix": "--extra-seqs" - }, - "type": [ - "null", - "File" - ], - "id": "#AnnotateR2.cwl/Extra_Seqs" - }, - { - "inputBinding": { - "prefix": "--gtf" - }, - "type": [ - "null", - "File" - ], - "id": "#AnnotateR2.cwl/GTF_Annotation" - }, - { - "inputBinding": { - "prefix": "--R2-zip" - }, - "type": "File", - "id": "#AnnotateR2.cwl/R2_zip" - }, - { - "inputBinding": { - "prefix": "--run-metadata" - }, - "type": "File", - "id": "#AnnotateR2.cwl/Run_Metadata" - }, - { - "inputBinding": { - "prefix": "--transcript-length" - }, - "type": [ - "null", - "File" - ], - "id": "#AnnotateR2.cwl/Transcript_Length" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "outputs": [ - { - "outputBinding": { - "glob": "*Annotation_R2.csv.gz" - }, - "type": "File", - "id": "#AnnotateR2.cwl/Annot_R2" - }, - { - "outputBinding": { - "glob": "*-annot.gtf" - }, - "type": [ - "null", - "File" - ], - "id": "#AnnotateR2.cwl/GTF" - }, - { - "outputBinding": { - "glob": "*mapping_R2.BAM" - }, - "type": "File", - "id": "#AnnotateR2.cwl/R2_Bam" - }, - { - "outputBinding": { - "glob": "*_picard_quality_metrics.csv.gz" - }, - "type": "File", - "id": "#AnnotateR2.cwl/R2_Quality_Metrics" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#AnnotateR2.cwl/output" - } - ], - "baseCommand": [ - "mist_annotate_R2.py" - ], - "class": "CommandLineTool", - "id": "#AnnotateR2.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "prefix": "--umi-option" - }, - "type": [ - "null", - "int" - ], - "id": "#AnnotateReads.cwl/AbSeq_UMI" - }, - { - "inputBinding": { - "prefix": "--extra-seqs", - "itemSeparator": "," - }, - "type": [ - "null", - "File" - ], - "id": "#AnnotateReads.cwl/Extra_Seqs" - }, - { - "type": { - "items": [ - "null", - "File" - ], - "type": "array" - }, - "id": "#AnnotateReads.cwl/Filter_Metrics" - }, - { - "inputBinding": { - "prefix": "--putative-cell-call" - }, - "type": [ - "null", - "int" - ], - "id": "#AnnotateReads.cwl/Putative_Cell_Call" - }, - { - "type": { - "items": "File", - "type": "array" - }, - "id": "#AnnotateReads.cwl/R1_Annotation" - }, - { - "type": { - "items": "File", - "type": "array" - }, - "id": "#AnnotateReads.cwl/R1_error_count_table" - }, - { - "type": { - "items": "File", - "type": "array" - }, - "id": "#AnnotateReads.cwl/R1_read_count_breakdown" - }, - { - "type": { - "items": "File", - "type": "array" - }, - "id": "#AnnotateReads.cwl/R2_Annotation" - }, - { - "type": { - "items": "File", - "type": "array" - }, - "id": "#AnnotateReads.cwl/R2_Quality_Metrics" - }, - { - "inputBinding": { - "prefix": "--run-metadata" - }, - "type": "File", - "id": "#AnnotateReads.cwl/Run_Metadata" - }, - { - "inputBinding": { - "prefix": "--target-gene-mapping" - }, - "type": [ - "null", - "File" - ], - "id": "#AnnotateReads.cwl/Target_Gene_Mapping" - } - ], - "requirements": [ - { - "class": "InitialWorkDirRequirement", - "listing": [ - { - "writable": false, - "entry": "${\n function getPaths(inputs, attribute) {\n var fp_arr = []\n for (var i = 0; i < inputs[attribute].length; i++)\n {\n fp_arr.push(inputs[attribute][i].path);\n }\n return fp_arr;\n }\n var paths = {}\n paths['annotR1'] = getPaths(inputs, 'R1_Annotation')\n paths['R1_error_count_table'] = getPaths(inputs, 'R1_error_count_table')\n paths['R1_read_count_breakdown'] = getPaths(inputs, 'R1_read_count_breakdown')\n paths['annotR2'] = getPaths(inputs, 'R2_Annotation')\n paths['r2_quality_metrics_fps'] = getPaths(inputs, 'R2_Quality_Metrics')\n if(inputs.Filter_Metrics[0] != null){\n paths['filtering_stat_files'] = getPaths(inputs, 'Filter_Metrics')\n }\n var paths_json = JSON.stringify(paths);\n return paths_json;\n}", - "entryname": "manifest.json" - } - ] - }, - { - "class": "InlineJavascriptRequirement" - }, - { - "envDef": [ - { - "envName": "CORES_ALLOCATED_PER_CWL_PROCESS", - "envValue": "4" - } - ], - "class": "EnvVarRequirement" - } - ], - "outputs": [ - { - "outputBinding": { - "glob": "*_Annotation_Read.csv.gz" - }, - "type": [ - "null", - "File" - ], - "id": "#AnnotateReads.cwl/Annotation_Read" - }, - { - "outputBinding": { - "glob": "*read1_error_rate_archive*" - }, - "type": "File", - "id": "#AnnotateReads.cwl/Read1_error_rate" - }, - { - "outputBinding": { - "glob": "*_SeqMetrics.csv.gz" - }, - "type": "File", - "id": "#AnnotateReads.cwl/Seq_Metrics" - }, - { - "outputBinding": { - "glob": "*Sorted_Valid_Reads.csv.*" - }, - "type": { - "items": "File", - "type": "array" - }, - "id": "#AnnotateReads.cwl/Valid_Reads" - }, - { - "outputBinding": { - "glob": "num_vdj_reads.json", - "loadContents": true, - "outputEval": "${ if (!self[0]) { return 0; } return parseInt(JSON.parse(self[0].contents).BCR); }" - }, - "type": "int", - "id": "#AnnotateReads.cwl/num_valid_ig_reads" - }, - { - "outputBinding": { - "glob": "num_vdj_reads.json", - "loadContents": true, - "outputEval": "${ if (!self[0]) { return 0; } return parseInt(JSON.parse(self[0].contents).TCR); }" - }, - "type": "int", - "id": "#AnnotateReads.cwl/num_valid_tcr_reads" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#AnnotateReads.cwl/output" - }, - { - "outputBinding": { - "glob": "*_VDJ_IG_Valid_Reads.fastq.gz" - }, - "type": [ - "null", - "File" - ], - "id": "#AnnotateReads.cwl/validIgReads" - }, - { - "outputBinding": { - "glob": "*_VDJ_TCR_Valid_Reads.fastq.gz" - }, - "type": [ - "null", - "File" - ], - "id": "#AnnotateReads.cwl/validTcrReads" - } - ], - "baseCommand": [ - "mist_annotate_reads.py" - ], - "class": "CommandLineTool", - "id": "#AnnotateReads.cwl" - }, - { - "inputs": [ - { - "type": { - "items": "File", - "type": "array" - }, - "id": "#BundleLogs.cwl/log_files" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - }, - { - "class": "MultipleInputFeatureRequirement" - } - ], - "outputs": [ - { - "type": "Directory", - "id": "#BundleLogs.cwl/logs_dir" - } - ], - "class": "ExpressionTool", - "expression": "${\n /* shamelly cribbed from https://gist.github.com/jcxplorer/823878 */\n function uuid() {\n var uuid = \"\", i, random;\n for (i = 0; i < 32; i++) {\n random = Math.random() * 16 | 0;\n if (i == 8 || i == 12 || i == 16 || i == 20) {\n uuid += \"-\";\n }\n uuid += (i == 12 ? 4 : (i == 16 ? (random & 3 | 8) : random)).toString(16);\n }\n return uuid;\n }\n var listing = [];\n for (var i = 0; i < inputs.log_files.length; i++) {\n var log_file = inputs.log_files[i];\n log_file.basename = uuid() + \"-\" + log_file.basename;\n listing.push(log_file);\n }\n return ({\n logs_dir: {\n class: \"Directory\",\n basename: \"Logs\",\n listing: listing\n }\n });\n}", - "id": "#BundleLogs.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "position": 0 - }, - "type": [ - "null", - "File" - ], - "id": "#Cell_Classifier.cwl/molsPerCellMatrix" - } - ], - "requirements": [ - ], - "outputs": [ - { - "outputBinding": { - "glob": "*cell_type_experimental.csv" - }, - "type": [ - "null", - "File" - ], - "id": "#Cell_Classifier.cwl/cellTypePredictions" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#Cell_Classifier.cwl/log" - } - ], - "baseCommand": [ - "mist_cell_classifier.py" - ], - "class": "CommandLineTool", - "id": "#Cell_Classifier.cwl" - }, - { - "inputs": [ - { - "doc": "The minimum size (megabytes) of a file that should get split into chunks of a size designated in NumRecordsPerSplit\n", - "inputBinding": { - "prefix": "--min-split-size" - }, - "type": [ - "null", - "int" - ], - "id": "#CheckFastqs.cwl/MinChunkSize" - }, - { - "inputBinding": { - "prefix": "--reads", - "itemSeparator": "," - }, - "type": { - "items": "File", - "type": "array" - }, - "id": "#CheckFastqs.cwl/Reads" - }, - { - "inputBinding": { - "prefix": "--subsample" - }, - "type": [ - "null", - "float" - ], - "id": "#CheckFastqs.cwl/Subsample" - }, - { - "inputBinding": { - "prefix": "--subsample-seed" - }, - "type": [ - "null", - "int" - ], - "id": "#CheckFastqs.cwl/Subsample_Seed" - }, - { - "inputBinding": { - "prefix": "--subsample-seed" - }, - "type": [ - "null", - "int" - ], - "id": "#CheckFastqs.cwl/UserInputSubsampleSeed" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "doc": "CheckFastqs does several quality control routines including: (1) ensuring that read pair file names are formatted correctly and contain a read pair mate; (2) disambiguating the \"Subsample Reads\" input and; (3) if not provided, generating a subsampling seed that the downstream instances can use.\n", - "baseCommand": [ - "mist_check_fastqs.py" - ], - "id": "#CheckFastqs.cwl", - "outputs": [ - { - "outputBinding": { - "glob": "bead_version.json", - "loadContents": true, - "outputEval": "$(JSON.parse(self[0].contents).BeadVersion)\n" - }, - "type": { - "items": { - "fields": [ - { - "type": "string", - "name": "#CheckFastqs.cwl/Bead_Version/Library" - }, - { - "type": "string", - "name": "#CheckFastqs.cwl/Bead_Version/bead_version" - } - ], - "type": "record" - }, - "type": "array" - }, - "id": "#CheckFastqs.cwl/Bead_Version" - }, - { - "outputBinding": { - "glob": "fastq_read_pairs.json", - "loadContents": true, - "outputEval": "$(JSON.parse(self[0].contents).fastq_read_pairs)\n" - }, - "type": { - "items": { - "fields": [ - { - "type": "string", - "name": "#CheckFastqs.cwl/FastqReadPairs/filename" - }, - { - "type": "string", - "name": "#CheckFastqs.cwl/FastqReadPairs/readFlag" - }, - { - "type": "string", - "name": "#CheckFastqs.cwl/FastqReadPairs/readPairId" - }, - { - "type": "string", - "name": "#CheckFastqs.cwl/FastqReadPairs/library" - }, - { - "type": "string", - "name": "#CheckFastqs.cwl/FastqReadPairs/beadVersion" - } - ], - "type": "record" - }, - "type": "array" - }, - "id": "#CheckFastqs.cwl/FastqReadPairs" - }, - { - "outputBinding": { - "glob": "files_to_skip_split_and_subsample.json", - "loadContents": true, - "outputEval": "$(JSON.parse(self[0].contents).files_to_skip_split_and_subsample)\n" - }, - "type": { - "items": "string", - "type": "array" - }, - "id": "#CheckFastqs.cwl/FilesToSkipSplitAndSubsample" - }, - { - "outputBinding": { - "glob": "fastq_read_pairs.json", - "loadContents": true, - "outputEval": "${\n var obj = JSON.parse(self[0].contents);\n var libraries = [];\n var pairs = obj.fastq_read_pairs\n for (var i in pairs){\n if (pairs[i][\"readFlag\"] == \"R1\"){\n if (libraries.indexOf(pairs[i][\"library\"]) == -1){ \n libraries.push(pairs[i][\"library\"]);\n }\n }\n }\n libraries.sort();\n return(libraries.toString())\n}\n" - }, - "type": [ - "null", - "string" - ], - "id": "#CheckFastqs.cwl/Libraries" - }, - { - "outputBinding": { - "outputEval": "${ \n var reads = []; \n var files = inputs.Reads\n for (var i in files){\n reads.push(files[i][\"basename\"]);\n }\n reads.sort();\n return(reads)\n}\n" - }, - "type": [ - "null", - { - "items": "string", - "type": "array" - } - ], - "id": "#CheckFastqs.cwl/ReadsList" - }, - { - "outputBinding": { - "glob": "subsampling_info.json", - "loadContents": true, - "outputEval": "$(JSON.parse(self[0].contents).subsampling_seed)\n" - }, - "type": "int", - "id": "#CheckFastqs.cwl/SubsampleSeed" - }, - { - "outputBinding": { - "glob": "subsampling_info.json", - "loadContents": true, - "outputEval": "$(JSON.parse(self[0].contents).subsampling_ratio)\n" - }, - "type": "float", - "id": "#CheckFastqs.cwl/SubsamplingRatio" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#CheckFastqs.cwl/log" - } - ], - "class": "CommandLineTool" - }, - { - "inputs": [ - { - "inputBinding": { - "prefix": "--abseq-reference", - "itemSeparator": "," - }, - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#CheckReference.cwl/AbSeq_Reference" - }, - { - "inputBinding": { - "prefix": "--putative-cell-call" - }, - "type": [ - "null", - "int" - ], - "id": "#CheckReference.cwl/Putative_Cell_Call" - }, - { - "inputBinding": { - "prefix": "--reference", - "itemSeparator": "," - }, - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#CheckReference.cwl/Reference" - }, - { - "inputBinding": { - "prefix": "--run-metadata" - }, - "type": "File", - "id": "#CheckReference.cwl/Run_Metadata" - }, - { - "inputBinding": { - "prefix": "--supplemental-reference", - "itemSeparator": "," - }, - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#CheckReference.cwl/Supplemental_Reference" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "outputs": [ - { - "outputBinding": { - "glob": "combined_extra_seq.fasta" - }, - "type": [ - "null", - "File" - ], - "id": "#CheckReference.cwl/Extra_Seqs" - }, - { - "outputBinding": { - "glob": "full-gene-list.json" - }, - "type": [ - "null", - "File" - ], - "id": "#CheckReference.cwl/Full_Genes" - }, - { - "outputBinding": { - "glob": "*gtf", - "outputEval": "${\n // get the WTA modified GTF with extra seqs\n if (self.length == 1) {\n return self;\n // there is no modified GTF\n } else if (self.length == 0) {\n // if Reference is null (i.e. AbSeq_Reference only), return no GTF\n if (inputs.Reference === null) {\n return null;\n } else {\n // get the original WTA GTF without extra seqs\n for (var i = 0; i < inputs.Reference.length; i++) {\n if (inputs.Reference[i].basename.toLowerCase().indexOf('gtf') !== -1) {\n return inputs.Reference[i];\n }\n }\n // return no GTF for Targeted\n return null\n }\n }\n}\n" - }, - "type": [ - "null", - "File" - ], - "id": "#CheckReference.cwl/GTF" - }, - { - "outputBinding": { - "glob": "*-annot.*", - "outputEval": "${\n if (self.length == 1) { // Targeted\n return self;\n } else if (self.length == 0){ // WTA without extra seqs or targets\n for (var i = 0; i < inputs.Reference.length; i++) {\n if (inputs.Reference[i].basename.toLowerCase().indexOf('tar.gz') !== -1) {\n return inputs.Reference[i];\n }\n }\n return null\n }\n}\n" - }, - "type": "File", - "id": "#CheckReference.cwl/Index" - }, - { - "outputBinding": { - "glob": "target-gene.json" - }, - "type": [ - "null", - "File" - ], - "id": "#CheckReference.cwl/Target_Gene_Mapping" - }, - { - "outputBinding": { - "glob": "transcript_length.json" - }, - "type": [ - "null", - "File" - ], - "id": "#CheckReference.cwl/Transcript_Length" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#CheckReference.cwl/output" - } - ], - "baseCommand": [ - "mist_check_references.py" - ], - "class": "CommandLineTool", - "id": "#CheckReference.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "prefix": "--cell-order" - }, - "type": "File", - "id": "#DensetoSparse.cwl/Cell_Order" - }, - { - "inputBinding": { - "prefix": "--dense-data-table" - }, - "type": [ - "null", - "File" - ], - "id": "#DensetoSparse.cwl/Dense_Data_Table" - }, - { - "inputBinding": { - "prefix": "--gene-list" - }, - "type": "File", - "id": "#DensetoSparse.cwl/Gene_List" - }, - { - "inputBinding": { - "prefix": "--run-metadata" - }, - "type": "File", - "id": "#DensetoSparse.cwl/Run_Metadata" - } - ], - "requirements": [ - ], - "outputs": [ - { - "outputBinding": { - "glob": "*.csv.gz" - }, - "type": "File", - "id": "#DensetoSparse.cwl/Data_Tables" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#DensetoSparse.cwl/output" - } - ], - "baseCommand": [ - "mist_dense_to_sparse.py" - ], - "class": "CommandLineTool", - "id": "#DensetoSparse.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "position": 1 - }, - "type": [ - "null", - "File" - ], - "id": "#DensetoSparseFile.cwl/GDT_cell_order" - } - ], - "requirements": [ - ], - "stdout": "cell_order.json", - "outputs": [ - { - "type": "stdout", - "id": "#DensetoSparseFile.cwl/Cell_Order" - } - ], - "baseCommand": "cat", - "id": "#DensetoSparseFile.cwl", - "class": "CommandLineTool" - }, - { - "inputs": [ - { - "inputBinding": { - "prefix": "--full-gene-list" - }, - "type": [ - "null", - "File" - ], - "id": "#GetDataTable.cwl/Full_Genes" - }, - { - "inputBinding": { - "prefix": "--gene-status", - "itemSeparator": "," - }, - "type": { - "items": "File", - "type": "array" - }, - "id": "#GetDataTable.cwl/Gene_Status_List" - }, - { - "inputBinding": { - "prefix": "--max-count", - "itemSeparator": "," - }, - "type": { - "items": "int", - "type": "array" - }, - "id": "#GetDataTable.cwl/Max_Count" - }, - { - "inputBinding": { - "prefix": "--mol-annot", - "itemSeparator": "," - }, - "type": { - "items": "File", - "type": "array" - }, - "id": "#GetDataTable.cwl/Molecule_Annotation_List" - }, - { - "inputBinding": { - "prefix": "--putative-cell-call" - }, - "type": [ - "null", - "int" - ], - "id": "#GetDataTable.cwl/Putative_Cell_Call" - }, - { - "inputBinding": { - "prefix": "--run-metadata" - }, - "type": "File", - "id": "#GetDataTable.cwl/Run_Metadata" - }, - { - "inputBinding": { - "prefix": "--seq-metrics" - }, - "type": "File", - "id": "#GetDataTable.cwl/Seq_Metrics" - }, - { - "inputBinding": { - "prefix": "--tag-names", - "itemSeparator": "," - }, - "type": [ - "null", - { - "items": "string", - "type": "array" - } - ], - "id": "#GetDataTable.cwl/Tag_Names" - }, - { - "type": { - "items": "int", - "type": "array" - }, - "id": "#GetDataTable.cwl/Total_Molecules" - } - ], - "requirements": [ - { - "ramMin": "${return Math.min(Math.max(parseInt(inputs.Total_Molecules.reduce(function(a, b) { return a + b; }, 0) / 4000), 32000), 768000);}", - "class": "ResourceRequirement" - }, - { - "class": "InlineJavascriptRequirement" - } - ], - "outputs": [ - { - "outputBinding": { - "glob": "metrics-files.tar.gz" - }, - "type": "File", - "id": "#GetDataTable.cwl/Annot_Files" - }, - { - "outputBinding": { - "glob": "Annotations/*_Bioproduct_Stats.csv" - }, - "type": [ - "null", - "File" - ], - "id": "#GetDataTable.cwl/Bioproduct_Stats" - }, - { - "outputBinding": { - "glob": "Cell_Label_Filtering/*.png" - }, - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#GetDataTable.cwl/Cell_Label_Filter" - }, - { - "outputBinding": { - "glob": "cell_order.json" - }, - "type": "File", - "id": "#GetDataTable.cwl/Cell_Order" - }, - { - "outputBinding": { - "glob": "*_Annotation_Molecule_corrected.csv.gz" - }, - "type": "File", - "id": "#GetDataTable.cwl/Corrected_Molecular_Annotation" - }, - { - "outputBinding": { - "glob": "*PerCell_Dense.csv.gz" - }, - "type": { - "items": "File", - "type": "array" - }, - "id": "#GetDataTable.cwl/Dense_Data_Tables" - }, - { - "outputBinding": { - "glob": "*PerCell_Unfiltered_Dense.csv.gz" - }, - "type": { - "items": "File", - "type": "array" - }, - "id": "#GetDataTable.cwl/Dense_Data_Tables_Unfiltered" - }, - { - "outputBinding": { - "glob": "*_Expression_Data.st.gz" - }, - "type": "File", - "id": "#GetDataTable.cwl/Expression_Data" - }, - { - "outputBinding": { - "glob": "*_Expression_Data_Unfiltered.st.gz" - }, - "type": [ - "null", - "File" - ], - "id": "#GetDataTable.cwl/Expression_Data_Unfiltered" - }, - { - "outputBinding": { - "glob": "gene_list.json" - }, - "type": "File", - "id": "#GetDataTable.cwl/Gene_List" - }, - { - "outputBinding": { - "glob": "Annotations/*_Annotation_Molecule.csv.gz" - }, - "type": "File", - "id": "#GetDataTable.cwl/Molecular_Annotation" - }, - { - "outputBinding": { - "glob": "Cell_Label_Filtering/*_Protein_Aggregates_Experimental.csv" - }, - "type": [ - "null", - "File" - ], - "id": "#GetDataTable.cwl/Protein_Aggregates_Experimental" - }, - { - "outputBinding": { - "glob": "Cell_Label_Filtering/*_Putative_Cells_Origin.csv" - }, - "type": [ - "null", - "File" - ], - "id": "#GetDataTable.cwl/Putative_Cells_Origin" - }, - { - "outputBinding": { - "glob": "Annotations/*_Annotation_Molecule_Trueno.csv" - }, - "type": [ - "null", - "File" - ], - "id": "#GetDataTable.cwl/Tag_Annotation" - }, - { - "outputBinding": { - "glob": "Trueno/*_Calls.csv" - }, - "type": [ - "null", - "File" - ], - "id": "#GetDataTable.cwl/Tag_Calls" - }, - { - "outputBinding": { - "glob": "Trueno/*csv" - }, - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#GetDataTable.cwl/Trueno_out" - }, - { - "outputBinding": { - "glob": "Trueno/*zip" - }, - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#GetDataTable.cwl/Trueno_zip" - }, - { - "outputBinding": { - "glob": "Annotations/*_UMI_Adjusted_CellLabel_Stats.csv" - }, - "type": [ - "null", - "File" - ], - "id": "#GetDataTable.cwl/UMI_Adjusted_CellLabel_Stats" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#GetDataTable.cwl/output" - } - ], - "baseCommand": [ - "mist_get_datatables.py" - ], - "class": "CommandLineTool", - "id": "#GetDataTable.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "position": 1 - }, - "type": "File", - "id": "#IndexBAM.cwl/BamFile" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "stdout": "samtools_index.log", - "outputs": [ - { - "outputBinding": { - "glob": "*.bai" - }, - "type": "File", - "id": "#IndexBAM.cwl/Index" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#IndexBAM.cwl/log" - } - ], - "baseCommand": [ - "samtools", - "index" - ], - "id": "#IndexBAM.cwl", - "arguments": [ - { - "position": 2, - "valueFrom": "${\n return inputs.BamFile.basename + \".bai\"\n}" - } - ], - "class": "CommandLineTool" - }, - { - "inputs": [], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "outputs": [ - { - "type": [ - "null", - "int" - ], - "id": "#InternalSettings.cwl/AbSeq_UMI" - }, - { - "type": [ - "null", - "int" - ], - "id": "#InternalSettings.cwl/Barcode_Num" - }, - { - "type": [ - "null", - "File" - ], - "id": "#InternalSettings.cwl/Extra_Seqs" - }, - { - "type": [ - "null", - "int" - ], - "id": "#InternalSettings.cwl/Label_Version" - }, - { - "type": [ - "null", - "int" - ], - "id": "#InternalSettings.cwl/MinChunkSize" - }, - { - "type": [ - "null", - "long" - ], - "id": "#InternalSettings.cwl/NumRecordsPerSplit" - }, - { - "type": [ - "null", - "boolean" - ], - "id": "#InternalSettings.cwl/Read_Filter_Off" - }, - { - "type": [ - "null", - "string" - ], - "id": "#InternalSettings.cwl/Seq_Run" - }, - { - "type": [ - "null", - "float" - ], - "id": "#InternalSettings.cwl/Subsample_Tags" - }, - { - "type": [ - "null", - "boolean" - ], - "id": "#InternalSettings.cwl/Target_analysis" - }, - { - "type": [ - "null", - "boolean" - ], - "id": "#InternalSettings.cwl/Use_DBEC" - }, - { - "type": [ - "null", - "float" - ], - "id": "#InternalSettings.cwl/VDJ_JGene_Evalue" - }, - { - "type": [ - "null", - "float" - ], - "id": "#InternalSettings.cwl/VDJ_VGene_Evalue" - } - ], - "class": "ExpressionTool", - "expression": "${\n var internalInputs = [\n '_Label_Version',\n '_Read_Filter_Off',\n '_Barcode_Num',\n '_Seq_Run',\n '_AbSeq_UMI',\n '_Use_DBEC',\n '_Extra_Seqs',\n '_MinChunkSize',\n '_NumRecordsPerSplit',\n '_Target_analysis',\n '_Subsample_Tags',\n '_VDJ_VGene_Evalue',\n '_VDJ_JGene_Evalue',\n ];\n var internalOutputs = {}\n for (var i = 0; i < internalInputs.length; i++) {\n var internalInput = internalInputs[i];\n var internalOutput = internalInput.slice(1); // remove leading underscore\n if (inputs.hasOwnProperty(internalInput)) {\n internalOutputs[internalOutput] = inputs[internalInput]; // if input specified, redirect to output\n } else {\n internalOutputs[internalOutput] = null; // if input not specified, provide a null\n }\n }\n return internalOutputs;\n}", - "id": "#InternalSettings.cwl" - }, - { - "inputs": [ - { - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#main/AbSeq_Reference", - "label": "AbSeq Reference" - }, - { - "doc": "Determine putative cells using only the basic algorithm (minimum second derivative along the cumulative reads curve). The refined algorithm attempts to remove false positives and recover false negatives, but may not be ideal for certain complex mixtures of cell types. Does not apply if Exact Cell Count is set.", - "type": [ - "null", - "boolean" - ], - "id": "#main/Basic_Algo_Only", - "label": "Disable Refined Putative Cell Calling" - }, - { - "doc": "Set a specific number (>=1) of cells as putative, based on those with the highest error-corrected read count", - "type": [ - "null", - "int" - ], - "id": "#main/Exact_Cell_Count", - "label": "Exact Cell Count" - }, - { - "doc": "Specify the data to be used for putative cell calling. mRNA is the default selected option. AbSeq (Experimental) is for troubleshooting only.", - "type": [ - "null", - { - "symbols": [ - "#main/Putative_Cell_Call/Putative_Cell_Call/mRNA", - "#main/Putative_Cell_Call/Putative_Cell_Call/AbSeq_Experimental" - ], - "type": "enum", - "name": "#main/Putative_Cell_Call/Putative_Cell_Call" - } - ], - "id": "#main/Putative_Cell_Call", - "label": "Putative Cell Calling" - }, - { - "type": { - "items": "File", - "type": "array" - }, - "id": "#main/Reads", - "label": "Reads" - }, - { - "doc": "A fasta file containing the mRNA panel amplicon targets used in the experiment", - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#main/Reference", - "label": "Reference" - }, - { - "doc": "This is a name for output files, for example Experiment1_Metrics_Summary.csv. Default if left empty is to name run based on a library. Any non-alpha numeric characters will be changed to a hyphen.", - "type": [ - "null", - "string" - ], - "id": "#main/Run_Name", - "label": "Run Name" - }, - { - "doc": "The sample multiplexing kit version. This option should only be set for a multiplexed experiment.", - "type": [ - "null", - { - "symbols": [ - "#main/Sample_Tags_Version/Sample_Tags_Version/human", - "#main/Sample_Tags_Version/Sample_Tags_Version/hs", - "#main/Sample_Tags_Version/Sample_Tags_Version/mouse", - "#main/Sample_Tags_Version/Sample_Tags_Version/mm", - "#main/Sample_Tags_Version/Sample_Tags_Version/custom" - ], - "type": "enum", - "name": "#main/Sample_Tags_Version/Sample_Tags_Version" - } - ], - "id": "#main/Sample_Tags_Version", - "label": "Sample Tags Version" - }, - { - "doc": "Any number of reads >1 or a fraction between 0 < n < 1 to indicate the percentage of reads to subsample.\n", - "type": [ - "null", - "float" - ], - "id": "#main/Subsample", - "label": "Subsample Reads" - }, - { - "doc": "For use when replicating a previous subsampling run only. Obtain the seed generated from the log file for the SplitFastQ node.\n", - "type": [ - "null", - "int" - ], - "id": "#main/Subsample_seed", - "label": "Subsample Seed" - }, - { - "doc": "Specify the Sample Tag number followed by - (hyphen) and a sample name to appear in the output files. For example: 4-Ramos. Should be alpha numeric, with + - and _ allowed. Any special characters: &, (), [], {}, <>, ?, | will be corrected to underscores. \n", - "type": [ - "null", - { - "items": "string", - "type": "array" - } - ], - "id": "#main/Tag_Names", - "label": "Tag Names" - }, - { - "doc": "The VDJ species and chain types. This option should only be set for VDJ experiment.", - "type": [ - "null", - { - "symbols": [ - "#main/VDJ_Version/VDJ_Version/human", - "#main/VDJ_Version/VDJ_Version/hs", - "#main/VDJ_Version/VDJ_Version/mouse", - "#main/VDJ_Version/VDJ_Version/mm", - "#main/VDJ_Version/VDJ_Version/humanBCR", - "#main/VDJ_Version/VDJ_Version/humanTCR", - "#main/VDJ_Version/VDJ_Version/mouseBCR", - "#main/VDJ_Version/VDJ_Version/mouseTCR" - ], - "type": "enum", - "name": "#main/VDJ_Version/VDJ_Version" - } - ], - "id": "#main/VDJ_Version", - "label": "VDJ Species Version" - } - ], - "requirements": [ - { - "class": "ScatterFeatureRequirement" - }, - { - "class": "MultipleInputFeatureRequirement" - }, - { - "class": "SubworkflowFeatureRequirement" - }, - { - "class": "StepInputExpressionRequirement" - }, - { - "class": "InlineJavascriptRequirement" - } - ], - "doc": "The BD Rhapsody\u2122 assays are used to create sequencing libraries from single cell transcriptomes.\n\nAfter sequencing, the analysis pipeline takes the FASTQ files and a reference file for gene alignment. The pipeline generates molecular counts per cell, read counts per cell, metrics, and an alignment file.", - "label": "BD Rhapsody\u2122 Targeted Analysis Pipeline", - "steps": [ - { - "run": "#AddtoBam.cwl", - "scatter": [ - "#main/AddtoBam/R2_Bam" - ], - "in": [ - { - "source": "#main/AnnotateR1/Annotation_R1", - "id": "#main/AddtoBam/Annotation_R1" - }, - { - "source": "#main/Dense_to_Sparse_File/Cell_Order", - "id": "#main/AddtoBam/Cell_Order" - }, - { - "source": "#main/GetDataTable/Corrected_Molecular_Annotation", - "id": "#main/AddtoBam/Molecular_Annotation" - }, - { - "source": "#main/AnnotateR2/R2_Bam", - "id": "#main/AddtoBam/R2_Bam" - }, - { - "source": "#main/Metadata_Settings/Run_Metadata", - "id": "#main/AddtoBam/Run_Metadata" - }, - { - "source": "#main/GetDataTable/Tag_Calls", - "id": "#main/AddtoBam/Tag_Calls" - }, - { - "source": "#main/CheckReference/Target_Gene_Mapping", - "id": "#main/AddtoBam/Target_Gene_Mapping" - } - ], - "requirements": [ - { - "ramMin": 16000, - "class": "ResourceRequirement" - } - ], - "id": "#main/AddtoBam", - "out": [ - "#main/AddtoBam/Annotated_Bam", - "#main/AddtoBam/output" - ] - }, - { - "run": "#AlignR2.cwl", - "out": [ - "#main/AlignR2/Alignments", - "#main/AlignR2/output" - ], - "requirements": [ - { - "coresMin": 8, - "ramMin": 4000, - "class": "ResourceRequirement" - } - ], - "id": "#main/AlignR2", - "in": [ - { - "source": "#main/CheckReference/Extra_Seqs", - "id": "#main/AlignR2/Extra_Seqs" - }, - { - "source": "#main/CheckReference/Index", - "id": "#main/AlignR2/Index" - }, - { - "source": "#main/QualityFilterOuter/R2", - "id": "#main/AlignR2/R2" - }, - { - "source": "#main/Metadata_Settings/Run_Metadata", - "id": "#main/AlignR2/Run_Metadata" - } - ] - }, - { - "run": "#AnnotateMolecules.cwl", - "scatter": [ - "#main/AnnotateMolecules/Valids" - ], - "in": [ - { - "source": "#main/Internal_Settings/AbSeq_UMI", - "id": "#main/AnnotateMolecules/AbSeq_UMI" - }, - { - "source": "#main/Metadata_Settings/Run_Metadata", - "id": "#main/AnnotateMolecules/Run_Metadata" - }, - { - "source": "#main/Internal_Settings/Use_DBEC", - "id": "#main/AnnotateMolecules/Use_DBEC" - }, - { - "source": "#main/AnnotateReads/Valid_Reads", - "id": "#main/AnnotateMolecules/Valids" - } - ], - "requirements": [ - { - "ramMin": 32000, - "class": "ResourceRequirement" - } - ], - "id": "#main/AnnotateMolecules", - "out": [ - "#main/AnnotateMolecules/Mol_Annot_List", - "#main/AnnotateMolecules/Gene_Status_List", - "#main/AnnotateMolecules/Max_Count", - "#main/AnnotateMolecules/Total_Molecules", - "#main/AnnotateMolecules/output" - ] - }, - { - "id": "#main/AnnotateR1", - "out": [ - "#main/AnnotateR1/Annotation_R1", - "#main/AnnotateR1/R1_error_count_table", - "#main/AnnotateR1/R1_read_count_breakdown", - "#main/AnnotateR1/output" - ], - "run": "#AnnotateR1.cwl", - "scatter": [ - "#main/AnnotateR1/R1" - ], - "in": [ - { - "source": "#main/QualityFilterOuter/Filter_Metrics", - "id": "#main/AnnotateR1/Filter_Metrics" - }, - { - "source": "#main/QualityFilterOuter/R1", - "id": "#main/AnnotateR1/R1" - }, - { - "source": "#main/Metadata_Settings/Run_Metadata", - "id": "#main/AnnotateR1/Run_Metadata" - } - ] - }, - { - "run": "#AnnotateR2.cwl", - "scatter": [ - "#main/AnnotateR2/R2_zip" - ], - "in": [ - { - "source": "#main/CheckReference/Extra_Seqs", - "id": "#main/AnnotateR2/Extra_Seqs" - }, - { - "source": "#main/CheckReference/GTF", - "id": "#main/AnnotateR2/GTF_Annotation" - }, - { - "source": "#main/AlignR2/Alignments", - "id": "#main/AnnotateR2/R2_zip" - }, - { - "source": "#main/Metadata_Settings/Run_Metadata", - "id": "#main/AnnotateR2/Run_Metadata" - }, - { - "source": "#main/CheckReference/Transcript_Length", - "id": "#main/AnnotateR2/Transcript_Length" - } - ], - "requirements": [ - { - "ramMin": 4000, - "class": "ResourceRequirement" - } - ], - "id": "#main/AnnotateR2", - "out": [ - "#main/AnnotateR2/Annot_R2", - "#main/AnnotateR2/R2_Bam", - "#main/AnnotateR2/GTF", - "#main/AnnotateR2/output", - "#main/AnnotateR2/R2_Quality_Metrics" - ] - }, - { - "run": "#AnnotateReads.cwl", - "out": [ - "#main/AnnotateReads/Seq_Metrics", - "#main/AnnotateReads/Valid_Reads", - "#main/AnnotateReads/Read1_error_rate", - "#main/AnnotateReads/Annotation_Read", - "#main/AnnotateReads/output", - "#main/AnnotateReads/validTcrReads", - "#main/AnnotateReads/validIgReads", - "#main/AnnotateReads/num_valid_tcr_reads", - "#main/AnnotateReads/num_valid_ig_reads" - ], - "requirements": [ - { - "ramMin": 32000, - "class": "ResourceRequirement" - } - ], - "id": "#main/AnnotateReads", - "in": [ - { - "source": "#main/Internal_Settings/AbSeq_UMI", - "id": "#main/AnnotateReads/AbSeq_UMI" - }, - { - "source": "#main/CheckReference/Extra_Seqs", - "id": "#main/AnnotateReads/Extra_Seqs" - }, - { - "source": "#main/QualityFilterOuter/Filter_Metrics", - "id": "#main/AnnotateReads/Filter_Metrics" - }, - { - "source": "#main/Putative_Cell_Calling_Settings/Putative_Cell_Call", - "id": "#main/AnnotateReads/Putative_Cell_Call" - }, - { - "source": "#main/AnnotateR1/Annotation_R1", - "id": "#main/AnnotateReads/R1_Annotation" - }, - { - "source": "#main/AnnotateR1/R1_error_count_table", - "id": "#main/AnnotateReads/R1_error_count_table" - }, - { - "source": "#main/AnnotateR1/R1_read_count_breakdown", - "id": "#main/AnnotateReads/R1_read_count_breakdown" - }, - { - "source": "#main/AnnotateR2/Annot_R2", - "id": "#main/AnnotateReads/R2_Annotation" - }, - { - "source": "#main/AnnotateR2/R2_Quality_Metrics", - "id": "#main/AnnotateReads/R2_Quality_Metrics" - }, - { - "source": "#main/Metadata_Settings/Run_Metadata", - "id": "#main/AnnotateReads/Run_Metadata" - }, - { - "source": "#main/CheckReference/Target_Gene_Mapping", - "id": "#main/AnnotateReads/Target_Gene_Mapping" - } - ] - }, - { - "out": [ - "#main/BundleLogs/logs_dir" - ], - "run": "#BundleLogs.cwl", - "id": "#main/BundleLogs", - "in": [ - { - "source": [ - "#main/AnnotateReads/output", - "#main/AnnotateR1/output", - "#main/AnnotateR2/output", - "#main/CheckReference/output", - "#main/GetDataTable/output", - "#main/Metrics/output", - "#main/AddtoBam/output", - "#main/AnnotateMolecules/output", - "#main/QualityFilterOuter/output", - "#main/CheckFastqs/log", - "#main/SplitAndSubsample/log", - "#main/MergeBAM/log", - "#main/Dense_to_Sparse_Datatable/output", - "#main/Dense_to_Sparse_Datatable_Unfiltered/output", - "#main/IndexBAM/log", - "#main/CellClassifier/log" - ], - "linkMerge": "merge_flattened", - "id": "#main/BundleLogs/log_files" - } - ] - }, - { - "run": "#Cell_Classifier.cwl", - "out": [ - "#main/CellClassifier/cellTypePredictions", - "#main/CellClassifier/log" - ], - "requirements": [ - { - "ramMin": 4000, - "class": "ResourceRequirement" - } - ], - "id": "#main/CellClassifier", - "in": [ - { - "source": "#main/FindDataTableForCellClassifier/molsPerCellMatrixForCellClassifier", - "id": "#main/CellClassifier/molsPerCellMatrix" - } - ] - }, - { - "out": [ - "#main/CheckFastqs/SubsampleSeed", - "#main/CheckFastqs/SubsamplingRatio", - "#main/CheckFastqs/FilesToSkipSplitAndSubsample", - "#main/CheckFastqs/FastqReadPairs", - "#main/CheckFastqs/Bead_Version", - "#main/CheckFastqs/Libraries", - "#main/CheckFastqs/ReadsList", - "#main/CheckFastqs/log" - ], - "run": "#CheckFastqs.cwl", - "id": "#main/CheckFastqs", - "in": [ - { - "source": "#main/Internal_Settings/MinChunkSize", - "id": "#main/CheckFastqs/MinChunkSize" - }, - { - "source": "#main/Reads", - "id": "#main/CheckFastqs/Reads" - }, - { - "source": "#main/Subsample_Settings/Subsample_Reads", - "id": "#main/CheckFastqs/Subsample" - }, - { - "source": "#main/Subsample_Settings/Subsample_Seed", - "id": "#main/CheckFastqs/Subsample_Seed" - } - ] - }, - { - "run": "#CheckReference.cwl", - "out": [ - "#main/CheckReference/Index", - "#main/CheckReference/Extra_Seqs", - "#main/CheckReference/Full_Genes", - "#main/CheckReference/output", - "#main/CheckReference/Transcript_Length", - "#main/CheckReference/GTF", - "#main/CheckReference/Target_Gene_Mapping" - ], - "requirements": [ - { - "ramMin": 1000, - "class": "ResourceRequirement" - } - ], - "id": "#main/CheckReference", - "in": [ - { - "source": "#main/AbSeq_Reference", - "id": "#main/CheckReference/AbSeq_Reference" - }, - { - "source": "#main/Putative_Cell_Calling_Settings/Putative_Cell_Call", - "id": "#main/CheckReference/Putative_Cell_Call" - }, - { - "source": "#main/Reference", - "id": "#main/CheckReference/Reference" - }, - { - "source": "#main/Metadata_Settings/Run_Metadata", - "id": "#main/CheckReference/Run_Metadata" - } - ] - }, - { - "run": "#DensetoSparse.cwl", - "scatter": [ - "#main/Dense_to_Sparse_Datatable/Dense_Data_Table" - ], - "in": [ - { - "source": "#main/Dense_to_Sparse_File/Cell_Order", - "id": "#main/Dense_to_Sparse_Datatable/Cell_Order" - }, - { - "source": "#main/GetDataTable/Dense_Data_Tables", - "id": "#main/Dense_to_Sparse_Datatable/Dense_Data_Table" - }, - { - "source": "#main/GetDataTable/Gene_List", - "id": "#main/Dense_to_Sparse_Datatable/Gene_List" - }, - { - "source": "#main/Metadata_Settings/Run_Metadata", - "id": "#main/Dense_to_Sparse_Datatable/Run_Metadata" - } - ], - "requirements": [ - { - "ramMin": 16000, - "class": "ResourceRequirement" - } - ], - "id": "#main/Dense_to_Sparse_Datatable", - "out": [ - "#main/Dense_to_Sparse_Datatable/Data_Tables", - "#main/Dense_to_Sparse_Datatable/output" - ] - }, - { - "run": "#DensetoSparse.cwl", - "scatter": [ - "#main/Dense_to_Sparse_Datatable_Unfiltered/Dense_Data_Table" - ], - "in": [ - { - "source": "#main/GetDataTable/Cell_Order", - "id": "#main/Dense_to_Sparse_Datatable_Unfiltered/Cell_Order" - }, - { - "source": "#main/GetDataTable/Dense_Data_Tables_Unfiltered", - "id": "#main/Dense_to_Sparse_Datatable_Unfiltered/Dense_Data_Table" - }, - { - "source": "#main/GetDataTable/Gene_List", - "id": "#main/Dense_to_Sparse_Datatable_Unfiltered/Gene_List" - }, - { - "source": "#main/Metadata_Settings/Run_Metadata", - "id": "#main/Dense_to_Sparse_Datatable_Unfiltered/Run_Metadata" - } - ], - "requirements": [ - { - "ramMin": 16000, - "class": "ResourceRequirement" - } - ], - "id": "#main/Dense_to_Sparse_Datatable_Unfiltered", - "out": [ - "#main/Dense_to_Sparse_Datatable_Unfiltered/Data_Tables", - "#main/Dense_to_Sparse_Datatable_Unfiltered/output" - ] - }, - { - "out": [ - "#main/Dense_to_Sparse_File/Cell_Order" - ], - "run": "#DensetoSparseFile.cwl", - "id": "#main/Dense_to_Sparse_File", - "in": [ - { - "source": "#main/GetDataTable/Cell_Order", - "id": "#main/Dense_to_Sparse_File/GDT_cell_order" - } - ] - }, - { - "out": [ - "#main/FindDataTableForCellClassifier/molsPerCellMatrixForCellClassifier" - ], - "run": { - "cwlVersion": "v1.0", - "inputs": [ - { - "type": { - "items": "File", - "type": "array" - }, - "id": "#main/FindDataTableForCellClassifier/c174ddb5-9fdb-4dae-a1c5-b5666a631cc7/dataTables" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "outputs": [ - { - "type": "File", - "id": "#main/FindDataTableForCellClassifier/c174ddb5-9fdb-4dae-a1c5-b5666a631cc7/molsPerCellMatrixForCellClassifier" - } - ], - "id": "#main/FindDataTableForCellClassifier/c174ddb5-9fdb-4dae-a1c5-b5666a631cc7", - "expression": "${\n for (var i = 0; i < inputs.dataTables.length; i++) {\n var dataTable = inputs.dataTables[i];\n if (dataTable.basename.indexOf(\"_RSEC_MolsPerCell.csv\") >= 0) {\n return({molsPerCellMatrixForCellClassifier: dataTable});\n }\n }\n return({molsPerCellMatrixForCellClassifier: null});\n}", - "class": "ExpressionTool" - }, - "id": "#main/FindDataTableForCellClassifier", - "in": [ - { - "source": "#main/Dense_to_Sparse_Datatable/Data_Tables", - "id": "#main/FindDataTableForCellClassifier/dataTables" - } - ] - }, - { - "out": [ - "#main/GetDataTable/Tag_Calls", - "#main/GetDataTable/Molecular_Annotation", - "#main/GetDataTable/Corrected_Molecular_Annotation", - "#main/GetDataTable/Tag_Annotation", - "#main/GetDataTable/Annot_Files", - "#main/GetDataTable/Cell_Label_Filter", - "#main/GetDataTable/Dense_Data_Tables", - "#main/GetDataTable/Dense_Data_Tables_Unfiltered", - "#main/GetDataTable/Expression_Data", - "#main/GetDataTable/Expression_Data_Unfiltered", - "#main/GetDataTable/Bioproduct_Stats", - "#main/GetDataTable/UMI_Adjusted_CellLabel_Stats", - "#main/GetDataTable/Putative_Cells_Origin", - "#main/GetDataTable/Protein_Aggregates_Experimental", - "#main/GetDataTable/Trueno_out", - "#main/GetDataTable/Trueno_zip", - "#main/GetDataTable/output", - "#main/GetDataTable/Cell_Order", - "#main/GetDataTable/Gene_List" - ], - "run": "#GetDataTable.cwl", - "id": "#main/GetDataTable", - "in": [ - { - "source": "#main/CheckReference/Full_Genes", - "id": "#main/GetDataTable/Full_Genes" - }, - { - "source": "#main/AnnotateMolecules/Gene_Status_List", - "id": "#main/GetDataTable/Gene_Status_List" - }, - { - "source": "#main/AnnotateMolecules/Max_Count", - "id": "#main/GetDataTable/Max_Count" - }, - { - "source": "#main/AnnotateMolecules/Mol_Annot_List", - "id": "#main/GetDataTable/Molecule_Annotation_List" - }, - { - "source": "#main/Putative_Cell_Calling_Settings/Putative_Cell_Call", - "id": "#main/GetDataTable/Putative_Cell_Call" - }, - { - "source": "#main/Metadata_Settings/Run_Metadata", - "id": "#main/GetDataTable/Run_Metadata" - }, - { - "source": "#main/AnnotateReads/Seq_Metrics", - "id": "#main/GetDataTable/Seq_Metrics" - }, - { - "source": "#main/Multiplexing_Settings/Tag_Sample_Names", - "id": "#main/GetDataTable/Tag_Names" - }, - { - "source": "#main/AnnotateMolecules/Total_Molecules", - "id": "#main/GetDataTable/Total_Molecules" - } - ] - }, - { - "out": [ - "#main/IndexBAM/Index", - "#main/IndexBAM/log" - ], - "run": "#IndexBAM.cwl", - "id": "#main/IndexBAM", - "in": [ - { - "source": "#main/MergeBAM/Final_Bam", - "id": "#main/IndexBAM/BamFile" - } - ] - }, - { - "out": [ - "#main/Internal_Settings/Read_Filter_Off", - "#main/Internal_Settings/Barcode_Num", - "#main/Internal_Settings/Seq_Run", - "#main/Internal_Settings/AbSeq_UMI", - "#main/Internal_Settings/Use_DBEC", - "#main/Internal_Settings/Extra_Seqs", - "#main/Internal_Settings/MinChunkSize", - "#main/Internal_Settings/NumRecordsPerSplit", - "#main/Internal_Settings/Target_analysis", - "#main/Internal_Settings/Subsample_Tags", - "#main/Internal_Settings/VDJ_VGene_Evalue", - "#main/Internal_Settings/VDJ_JGene_Evalue" - ], - "in": [], - "run": "#InternalSettings.cwl", - "id": "#main/Internal_Settings", - "label": "Internal Settings" - }, - { - "out": [ - "#main/MergeBAM/Final_Bam", - "#main/MergeBAM/log" - ], - "run": "#MergeBAM.cwl", - "id": "#main/MergeBAM", - "in": [ - { - "source": "#main/AddtoBam/Annotated_Bam", - "id": "#main/MergeBAM/BamFiles" - }, - { - "source": "#main/Metadata_Settings/Run_Base_Name", - "id": "#main/MergeBAM/Run_Name" - }, - { - "source": "#main/Multiplexing_Settings/Sample_Tags_Version", - "id": "#main/MergeBAM/Sample_Tags_Version" - } - ] - }, - { - "out": [ - "#main/MergeMultiplex/Multiplex_out" - ], - "run": { - "cwlVersion": "v1.0", - "inputs": [ - { - "type": { - "items": [ - "null", - "File" - ], - "type": "array" - }, - "id": "#main/MergeMultiplex/8e7f752c-1505-4d65-81b3-f91fcd83b679/SampleTag_Files" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "outputs": [ - { - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#main/MergeMultiplex/8e7f752c-1505-4d65-81b3-f91fcd83b679/Multiplex_out" - } - ], - "id": "#main/MergeMultiplex/8e7f752c-1505-4d65-81b3-f91fcd83b679", - "expression": "${\n var fp_array = [];\n for (var i = 0; i < inputs.SampleTag_Files.length; i++) {\n var fp = inputs.SampleTag_Files[i];\n if (fp != null) {\n fp_array.push(fp);\n }\n }\n return({\"Multiplex_out\": fp_array});\n}", - "class": "ExpressionTool" - }, - "id": "#main/MergeMultiplex", - "in": [ - { - "source": [ - "#main/GetDataTable/Trueno_out", - "#main/Metrics/Sample_Tag_Out" - ], - "linkMerge": "merge_flattened", - "id": "#main/MergeMultiplex/SampleTag_Files" - } - ] - }, - { - "out": [ - "#main/Metadata_Settings/Run_Metadata", - "#main/Metadata_Settings/Run_Base_Name" - ], - "run": "#Metadata.cwl", - "id": "#main/Metadata_Settings", - "in": [ - { - "source": "#main/AbSeq_Reference", - "id": "#main/Metadata_Settings/AbSeq_Reference" - }, - { - "valueFrom": "Targeted", - "id": "#main/Metadata_Settings/Assay" - }, - { - "source": "#main/Putative_Cell_Calling_Settings/Basic_Algo_Only", - "id": "#main/Metadata_Settings/Basic_Algo_Only" - }, - { - "source": "#main/CheckFastqs/Bead_Version", - "id": "#main/Metadata_Settings/Bead_Version" - }, - { - "source": "#main/Putative_Cell_Calling_Settings/Exact_Cell_Count", - "id": "#main/Metadata_Settings/Exact_Cell_Count" - }, - { - "source": "#main/CheckFastqs/Libraries", - "id": "#main/Metadata_Settings/Libraries" - }, - { - "valueFrom": "BD Rhapsody Targeted Analysis Pipeline", - "id": "#main/Metadata_Settings/Pipeline_Name" - }, - { - "source": "#main/Version/version", - "id": "#main/Metadata_Settings/Pipeline_Version" - }, - { - "source": "#main/Putative_Cell_Calling_Settings/Putative_Cell_Call", - "id": "#main/Metadata_Settings/Putative_Cell_Call" - }, - { - "source": "#main/CheckFastqs/ReadsList", - "id": "#main/Metadata_Settings/Reads" - }, - { - "source": "#main/Reference", - "id": "#main/Metadata_Settings/Reference" - }, - { - "source": "#main/Name_Settings/Run_Name", - "id": "#main/Metadata_Settings/Run_Name" - }, - { - "source": "#main/Multiplexing_Settings/Tag_Sample_Names", - "id": "#main/Metadata_Settings/Sample_Tag_Names" - }, - { - "source": "#main/Multiplexing_Settings/Sample_Tags_Version", - "id": "#main/Metadata_Settings/Sample_Tags_Version" - }, - { - "source": "#main/Start_Time/Start_Time", - "id": "#main/Metadata_Settings/Start_Time" - }, - { - "source": "#main/Subsample_Settings/Subsample_Reads", - "id": "#main/Metadata_Settings/Subsample" - }, - { - "source": "#main/Subsample_Settings/Subsample_Seed", - "id": "#main/Metadata_Settings/Subsample_Seed" - }, - { - "source": "#main/VDJ_Settings/VDJ_Version", - "id": "#main/Metadata_Settings/VDJ_Version" - } - ] - }, - { - "out": [ - "#main/Metrics/Metrics_Summary", - "#main/Metrics/Metrics_Archive", - "#main/Metrics/output", - "#main/Metrics/Sample_Tag_Out" - ], - "run": "#Metrics.cwl", - "id": "#main/Metrics", - "in": [ - { - "source": "#main/GetDataTable/Annot_Files", - "id": "#main/Metrics/Annot_Files" - }, - { - "source": "#main/AnnotateReads/Read1_error_rate", - "id": "#main/Metrics/Read1_error_rate" - }, - { - "source": "#main/Metadata_Settings/Run_Metadata", - "id": "#main/Metrics/Run_Metadata" - }, - { - "source": "#main/GetDataTable/Trueno_zip", - "id": "#main/Metrics/Sample_Tag_Archives" - }, - { - "source": "#main/Internal_Settings/Seq_Run", - "id": "#main/Metrics/Seq_Run" - }, - { - "source": "#main/GetDataTable/UMI_Adjusted_CellLabel_Stats", - "id": "#main/Metrics/UMI_Adjusted_Stats" - }, - { - "source": "#main/VDJ_Compile_Results/vdjMetricsJson", - "id": "#main/Metrics/vdjMetricsJson" - } - ] - }, - { - "out": [ - "#main/Multiplexing_Settings/Tag_Sample_Names", - "#main/Multiplexing_Settings/Sample_Tags_Version" - ], - "in": [ - { - "source": "#main/Sample_Tags_Version", - "id": "#main/Multiplexing_Settings/_Sample_Tags_Version" - }, - { - "source": "#main/Tag_Names", - "id": "#main/Multiplexing_Settings/_Tag_Sample_Names" - } - ], - "run": "#MultiplexingSettings.cwl", - "id": "#main/Multiplexing_Settings", - "label": "Multiplexing Settings" - }, - { - "out": [ - "#main/Name_Settings/Run_Name" - ], - "in": [ - { - "source": "#main/Run_Name", - "id": "#main/Name_Settings/_Run_Name" - } - ], - "run": "#NameSettings.cwl", - "id": "#main/Name_Settings", - "label": "Name Settings" - }, - { - "out": [ - "#main/PairReadFiles/ReadPairs" - ], - "run": "#PairReadFiles.cwl", - "id": "#main/PairReadFiles", - "in": [ - { - "source": "#main/CheckFastqs/FastqReadPairs", - "id": "#main/PairReadFiles/FastqReadPairs" - }, - { - "source": "#main/SplitAndSubsample/SplitAndSubsampledFastqs", - "id": "#main/PairReadFiles/Reads" - } - ] - }, - { - "out": [ - "#main/Putative_Cell_Calling_Settings/Putative_Cell_Call", - "#main/Putative_Cell_Calling_Settings/Exact_Cell_Count", - "#main/Putative_Cell_Calling_Settings/Basic_Algo_Only" - ], - "in": [ - { - "source": "#main/Basic_Algo_Only", - "id": "#main/Putative_Cell_Calling_Settings/_Basic_Algo_Only" - }, - { - "source": "#main/Exact_Cell_Count", - "id": "#main/Putative_Cell_Calling_Settings/_Exact_Cell_Count" - }, - { - "source": "#main/Putative_Cell_Call", - "id": "#main/Putative_Cell_Calling_Settings/_Putative_Cell_Call" - } - ], - "run": "#PutativeCellSettings.cwl", - "id": "#main/Putative_Cell_Calling_Settings", - "label": "Putative Cell Calling Settings" - }, - { - "out": [ - "#main/QualityFilterOuter/Filter_Metrics", - "#main/QualityFilterOuter/R1", - "#main/QualityFilterOuter/R2", - "#main/QualityFilterOuter/output" - ], - "run": "#QualityFilterOuter.cwl", - "id": "#main/QualityFilterOuter", - "in": [ - { - "source": "#main/Metadata_Settings/Run_Metadata", - "id": "#main/QualityFilterOuter/Run_Metadata" - }, - { - "source": "#main/PairReadFiles/ReadPairs", - "id": "#main/QualityFilterOuter/Split_Read_Pairs" - } - ] - }, - { - "out": [ - "#main/SplitAndSubsample/SplitAndSubsampledFastqs", - "#main/SplitAndSubsample/log" - ], - "run": "#SplitAndSubsample.cwl", - "id": "#main/SplitAndSubsample", - "in": [ - { - "source": "#main/Reads", - "id": "#main/SplitAndSubsample/Fastqs" - }, - { - "source": "#main/CheckFastqs/FilesToSkipSplitAndSubsample", - "id": "#main/SplitAndSubsample/FilesToSkipSplitAndSubsample" - }, - { - "source": "#main/Internal_Settings/NumRecordsPerSplit", - "id": "#main/SplitAndSubsample/NumRecordsPerSplit" - }, - { - "source": "#main/CheckFastqs/SubsamplingRatio", - "id": "#main/SplitAndSubsample/SubsampleRatio" - }, - { - "source": "#main/CheckFastqs/SubsampleSeed", - "id": "#main/SplitAndSubsample/SubsampleSeed" - } - ] - }, - { - "out": [ - "#main/Start_Time/Start_Time" - ], - "run": { - "cwlVersion": "v1.0", - "inputs": [], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "outputs": [ - { - "type": "string", - "id": "#main/Start_Time/dc4e9fd7-92dc-4aca-80ad-76601aaaf6ad/Start_Time" - } - ], - "id": "#main/Start_Time/dc4e9fd7-92dc-4aca-80ad-76601aaaf6ad", - "expression": "${ \n var today = new Date();\n var date = today.toString()\n return ({Start_Time: date});\n} ", - "class": "ExpressionTool" - }, - "id": "#main/Start_Time", - "in": [] - }, - { - "out": [ - "#main/Subsample_Settings/Subsample_Reads", - "#main/Subsample_Settings/Subsample_Seed" - ], - "in": [ - { - "source": "#main/Subsample", - "id": "#main/Subsample_Settings/_Subsample_Reads" - }, - { - "source": "#main/Subsample_seed", - "id": "#main/Subsample_Settings/_Subsample_Seed" - } - ], - "run": "#SubsampleSettings.cwl", - "id": "#main/Subsample_Settings", - "label": "Subsample Settings" - }, - { - "out": [ - "#main/Uncompress_Datatables/Uncompressed_Data_Tables", - "#main/Uncompress_Datatables/Uncompressed_Expression_Matrix" - ], - "run": "#UncompressDatatables.cwl", - "id": "#main/Uncompress_Datatables", - "in": [ - { - "source": "#main/Dense_to_Sparse_Datatable/Data_Tables", - "id": "#main/Uncompress_Datatables/Compressed_Data_Table" - }, - { - "source": "#main/GetDataTable/Expression_Data", - "id": "#main/Uncompress_Datatables/Compressed_Expression_Matrix" - } - ] - }, - { - "out": [ - "#main/VDJ_Assemble_and_Annotate_Contigs_IG/igCalls" - ], - "run": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl", - "id": "#main/VDJ_Assemble_and_Annotate_Contigs_IG", - "in": [ - { - "source": "#main/VDJ_Preprocess_Reads_IG/RSEC_Reads_Fastq", - "id": "#main/VDJ_Assemble_and_Annotate_Contigs_IG/RSEC_Reads_Fastq" - }, - { - "source": "#main/VDJ_Settings/VDJ_Version", - "id": "#main/VDJ_Assemble_and_Annotate_Contigs_IG/VDJ_Version" - }, - { - "source": "#main/VDJ_Preprocess_Reads_IG/num_cores", - "id": "#main/VDJ_Assemble_and_Annotate_Contigs_IG/num_cores" - } - ] - }, - { - "out": [ - "#main/VDJ_Assemble_and_Annotate_Contigs_TCR/tcrCalls" - ], - "run": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl", - "id": "#main/VDJ_Assemble_and_Annotate_Contigs_TCR", - "in": [ - { - "source": "#main/VDJ_Preprocess_Reads_TCR/RSEC_Reads_Fastq", - "id": "#main/VDJ_Assemble_and_Annotate_Contigs_TCR/RSEC_Reads_Fastq" - }, - { - "source": "#main/VDJ_Settings/VDJ_Version", - "id": "#main/VDJ_Assemble_and_Annotate_Contigs_TCR/VDJ_Version" - }, - { - "source": "#main/VDJ_Preprocess_Reads_TCR/num_cores", - "id": "#main/VDJ_Assemble_and_Annotate_Contigs_TCR/num_cores" - } - ] - }, - { - "out": [ - "#main/VDJ_Compile_Results/vdjCellsDatatable", - "#main/VDJ_Compile_Results/vdjCellsDatatableUncorrected", - "#main/VDJ_Compile_Results/vdjDominantContigs", - "#main/VDJ_Compile_Results/vdjUnfilteredContigs", - "#main/VDJ_Compile_Results/vdjMetricsJson", - "#main/VDJ_Compile_Results/vdjMetricsCsv", - "#main/VDJ_Compile_Results/vdjReadsPerCellByChainTypeFigure" - ], - "run": "#VDJ_Compile_Results.cwl", - "id": "#main/VDJ_Compile_Results", - "in": [ - { - "source": "#main/AnnotateReads/Seq_Metrics", - "id": "#main/VDJ_Compile_Results/Seq_Metrics" - }, - { - "source": "#main/CellClassifier/cellTypePredictions", - "id": "#main/VDJ_Compile_Results/cellTypeMapping" - }, - { - "valueFrom": "$([])", - "id": "#main/VDJ_Compile_Results/chainsToIgnore" - }, - { - "source": "#main/Internal_Settings/VDJ_JGene_Evalue", - "id": "#main/VDJ_Compile_Results/evalueJgene" - }, - { - "source": "#main/Internal_Settings/VDJ_VGene_Evalue", - "id": "#main/VDJ_Compile_Results/evalueVgene" - }, - { - "source": "#main/VDJ_GatherIGCalls/gatheredCalls", - "id": "#main/VDJ_Compile_Results/igCalls" - }, - { - "source": "#main/Metadata_Settings/Run_Metadata", - "id": "#main/VDJ_Compile_Results/metadata" - }, - { - "source": "#main/GetDataTable/Cell_Order", - "id": "#main/VDJ_Compile_Results/putativeCells" - }, - { - "source": "#main/VDJ_GatherTCRCalls/gatheredCalls", - "id": "#main/VDJ_Compile_Results/tcrCalls" - }, - { - "source": "#main/VDJ_Settings/VDJ_Version", - "id": "#main/VDJ_Compile_Results/vdjVersion" - } - ] - }, - { - "out": [ - "#main/VDJ_GatherIGCalls/gatheredCalls" - ], - "run": "#VDJ_GatherCalls.cwl", - "id": "#main/VDJ_GatherIGCalls", - "in": [ - { - "source": "#main/VDJ_Assemble_and_Annotate_Contigs_IG/igCalls", - "id": "#main/VDJ_GatherIGCalls/theCalls" - } - ] - }, - { - "out": [ - "#main/VDJ_GatherTCRCalls/gatheredCalls" - ], - "run": "#VDJ_GatherCalls.cwl", - "id": "#main/VDJ_GatherTCRCalls", - "in": [ - { - "source": "#main/VDJ_Assemble_and_Annotate_Contigs_TCR/tcrCalls", - "id": "#main/VDJ_GatherTCRCalls/theCalls" - } - ] - }, - { - "out": [ - "#main/VDJ_Preprocess_Reads_IG/RSEC_Reads_Fastq", - "#main/VDJ_Preprocess_Reads_IG/num_splits", - "#main/VDJ_Preprocess_Reads_IG/num_cores" - ], - "run": "#VDJ_Preprocess_Reads.cwl", - "id": "#main/VDJ_Preprocess_Reads_IG", - "in": [ - { - "source": "#main/AnnotateReads/validIgReads", - "id": "#main/VDJ_Preprocess_Reads_IG/Valid_Reads_Fastq" - }, - { - "source": "#main/AnnotateReads/num_valid_ig_reads", - "id": "#main/VDJ_Preprocess_Reads_IG/num_valid_reads" - }, - { - "valueFrom": "BCR", - "id": "#main/VDJ_Preprocess_Reads_IG/vdj_type" - } - ] - }, - { - "out": [ - "#main/VDJ_Preprocess_Reads_TCR/RSEC_Reads_Fastq", - "#main/VDJ_Preprocess_Reads_TCR/num_splits", - "#main/VDJ_Preprocess_Reads_TCR/num_cores" - ], - "run": "#VDJ_Preprocess_Reads.cwl", - "id": "#main/VDJ_Preprocess_Reads_TCR", - "in": [ - { - "source": "#main/AnnotateReads/validTcrReads", - "id": "#main/VDJ_Preprocess_Reads_TCR/Valid_Reads_Fastq" - }, - { - "source": "#main/AnnotateReads/num_valid_tcr_reads", - "id": "#main/VDJ_Preprocess_Reads_TCR/num_valid_reads" - }, - { - "valueFrom": "TCR", - "id": "#main/VDJ_Preprocess_Reads_TCR/vdj_type" - } - ] - }, - { - "out": [ - "#main/VDJ_Settings/VDJ_Version" - ], - "in": [ - { - "source": "#main/VDJ_Version", - "id": "#main/VDJ_Settings/_VDJ_Version" - } - ], - "run": "#VDJ_Settings.cwl", - "id": "#main/VDJ_Settings", - "label": "VDJ Settings" - }, - { - "out": [ - "#main/Version/version" - ], - "run": "#Version.cwl", - "id": "#main/Version", - "in": [] - } - ], - "outputs": [ - { - "outputSource": "#main/GetDataTable/Bioproduct_Stats", - "type": [ - "null", - "File" - ], - "id": "#main/Bioproduct_Stats", - "label": "Bioproduct Statistics" - }, - { - "outputSource": "#main/GetDataTable/Cell_Label_Filter", - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#main/Cell_Label_Filter", - "label": "Cell Label Filter" - }, - { - "outputSource": "#main/Uncompress_Datatables/Uncompressed_Data_Tables", - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#main/Data_Tables", - "label": "Data Tables" - }, - { - "outputSource": "#main/Dense_to_Sparse_Datatable_Unfiltered/Data_Tables", - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#main/Data_Tables_Unfiltered", - "label": "Unfiltered Data Tables" - }, - { - "outputSource": "#main/Uncompress_Datatables/Uncompressed_Expression_Matrix", - "type": [ - "null", - "File" - ], - "id": "#main/Expression_Data", - "label": "Expression Matrix" - }, - { - "outputSource": "#main/GetDataTable/Expression_Data_Unfiltered", - "type": [ - "null", - "File" - ], - "id": "#main/Expression_Data_Unfiltered", - "label": "Unfiltered Expression Matrix" - }, - { - "outputSource": "#main/MergeBAM/Final_Bam", - "type": "File", - "id": "#main/Final_Bam", - "label": "Final BAM File" - }, - { - "outputSource": "#main/IndexBAM/Index", - "type": "File", - "id": "#main/Final_Bam_Index", - "label": "Final BAM Index" - }, - { - "outputSource": "#main/CellClassifier/cellTypePredictions", - "type": [ - "null", - "File" - ], - "id": "#main/ImmuneCellClassification(Experimental)", - "label": "Immune Cell Classification (Experimental)" - }, - { - "outputSource": "#main/BundleLogs/logs_dir", - "type": "Directory", - "id": "#main/Logs", - "label": "Pipeline Logs" - }, - { - "outputSource": "#main/Metrics/Metrics_Summary", - "type": "File", - "id": "#main/Metrics_Summary", - "label": "Metrics Summary" - }, - { - "outputSource": "#main/MergeMultiplex/Multiplex_out", - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#main/Multiplex" - }, - { - "outputSource": "#main/GetDataTable/Protein_Aggregates_Experimental", - "type": [ - "null", - "File" - ], - "id": "#main/Protein_Aggregates_Experimental", - "label": "Protein Aggregates (Experimental)" - }, - { - "outputSource": "#main/GetDataTable/Putative_Cells_Origin", - "type": [ - "null", - "File" - ], - "id": "#main/Putative_Cells_Origin", - "label": "Putative Cells Origin" - }, - { - "outputSource": "#main/VDJ_Compile_Results/vdjCellsDatatable", - "type": [ - "null", - "File" - ], - "id": "#main/vdjCellsDatatable", - "label": "vdjCellsDatatable" - }, - { - "outputSource": "#main/VDJ_Compile_Results/vdjCellsDatatableUncorrected", - "type": [ - "null", - "File" - ], - "id": "#main/vdjCellsDatatableUncorrected", - "label": "vdjCellsDatatableUncorrected" - }, - { - "outputSource": "#main/VDJ_Compile_Results/vdjDominantContigs", - "type": [ - "null", - "File" - ], - "id": "#main/vdjDominantContigs", - "label": "vdjDominantContigs" - }, - { - "outputSource": "#main/VDJ_Compile_Results/vdjMetricsCsv", - "type": [ - "null", - "File" - ], - "id": "#main/vdjMetricsCsv", - "label": "vdjMetricsCsv" - }, - { - "outputSource": "#main/VDJ_Compile_Results/vdjUnfilteredContigs", - "type": [ - "null", - "File" - ], - "id": "#main/vdjUnfilteredContigs", - "label": "vdjUnfilteredContigs" - } - ], - "id": "#main", - "class": "Workflow" - }, - { - "inputs": [ - { - "inputBinding": { - "position": 1 - }, - "type": { - "items": "File", - "type": "array" - }, - "id": "#MergeBAM.cwl/BamFiles" - }, - { - "type": [ - "null", - "string" - ], - "id": "#MergeBAM.cwl/Run_Name" - }, - { - "type": [ - "null", - "string" - ], - "id": "#MergeBAM.cwl/Sample_Tags_Version" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "stdout": "samtools_merge.log", - "outputs": [ - { - "outputBinding": { - "glob": "*_final.BAM" - }, - "type": "File", - "id": "#MergeBAM.cwl/Final_Bam" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#MergeBAM.cwl/log" - } - ], - "baseCommand": [ - "samtools", - "merge" - ], - "id": "#MergeBAM.cwl", - "arguments": [ - { - "prefix": "-@", - "valueFrom": "$(runtime.cores)" - }, - { - "position": 0, - "valueFrom": "${\n if (inputs.Sample_Tags_Version) {\n return \"Combined_\" + inputs.Run_Name + \"_final.BAM\"\n } else {\n return inputs.Run_Name + \"_final.BAM\"\n }\n}" - } - ], - "class": "CommandLineTool", - "hints": [ - { - "coresMin": 4, - "class": "ResourceRequirement" - } - ] - }, - { - "inputs": [ - { - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#Metadata.cwl/AbSeq_Reference" - }, - { - "type": "string", - "id": "#Metadata.cwl/Assay" - }, - { - "type": [ - "null", - "boolean" - ], - "id": "#Metadata.cwl/Basic_Algo_Only" - }, - { - "type": { - "items": { - "fields": [ - { - "type": "string", - "name": "#Metadata.cwl/Bead_Version/Library" - }, - { - "type": "string", - "name": "#Metadata.cwl/Bead_Version/bead_version" - } - ], - "type": "record" - }, - "type": "array" - }, - "id": "#Metadata.cwl/Bead_Version" - }, - { - "type": [ - "null", - "int" - ], - "id": "#Metadata.cwl/Exact_Cell_Count" - }, - { - "type": [ - "null", - "int" - ], - "id": "#Metadata.cwl/Label_Version" - }, - { - "type": [ - "null", - "string" - ], - "id": "#Metadata.cwl/Libraries" - }, - { - "type": "string", - "id": "#Metadata.cwl/Pipeline_Name" - }, - { - "type": "string", - "id": "#Metadata.cwl/Pipeline_Version" - }, - { - "type": [ - "null", - "int" - ], - "id": "#Metadata.cwl/Putative_Cell_Call" - }, - { - "type": [ - "null", - "boolean" - ], - "id": "#Metadata.cwl/Read_Filter_Off" - }, - { - "type": [ - "null", - { - "items": "string", - "type": "array" - } - ], - "id": "#Metadata.cwl/Reads" - }, - { - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#Metadata.cwl/Reference" - }, - { - "type": [ - "null", - "string" - ], - "id": "#Metadata.cwl/Run_Name" - }, - { - "type": [ - "null", - { - "items": "string", - "type": "array" - } - ], - "id": "#Metadata.cwl/Sample_Tag_Names" - }, - { - "type": [ - "null", - "string" - ], - "id": "#Metadata.cwl/Sample_Tags_Version" - }, - { - "type": [ - "null", - "string" - ], - "id": "#Metadata.cwl/Start_Time" - }, - { - "type": [ - "null", - "float" - ], - "id": "#Metadata.cwl/Subsample" - }, - { - "type": [ - "null", - "int" - ], - "id": "#Metadata.cwl/Subsample_Seed" - }, - { - "type": [ - "null", - "float" - ], - "id": "#Metadata.cwl/Subsample_Tags" - }, - { - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#Metadata.cwl/Supplemental_Reference" - }, - { - "type": [ - "null", - "string" - ], - "id": "#Metadata.cwl/VDJ_Version" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "stdout": "run_metadata.json", - "outputs": [ - { - "outputBinding": { - "outputEval": "${ \n var name = inputs.Run_Name;\n if (name == null){\n var libraries = inputs.Libraries;\n name = libraries.split(',')[0];\n } \n return(name)\n} \n" - }, - "type": [ - "null", - "string" - ], - "id": "#Metadata.cwl/Run_Base_Name" - }, - { - "type": "stdout", - "id": "#Metadata.cwl/Run_Metadata" - } - ], - "baseCommand": "echo", - "id": "#Metadata.cwl", - "arguments": [ - { - "prefix": "" - }, - { - "shellQuote": true, - "valueFrom": "${\n var metadata = inputs;\n var all_bv = {};\n var customer_bv = \"Original (V1)\";\n for (var i = 0; i < inputs.Bead_Version.length; i++) {\n var BeadVer = inputs.Bead_Version[i];\n var Library = BeadVer[\"Library\"];\n var bead_version = BeadVer[\"bead_version\"];\n all_bv[Library] = bead_version \n var short_bv = bead_version.substring(0, 2);\n if (short_bv == \"V2\"){\n var customer_bv = \"Enhanced (V2)\";\n }\n }\n metadata[\"Bead_Version\"] = all_bv;\n\n var pipeline_name = inputs.Pipeline_Name;\n var assay = inputs.Assay;\n var version = inputs.Pipeline_Version;\n var time = inputs.Start_Time;\n var libraries = inputs.Libraries.split(\",\");\n var i = 0;\n var reference_list = []\n if(inputs.Reference != null){\n reference_list = reference_list.concat(inputs.Reference);\n }\n if(inputs.AbSeq_Reference != null){\n reference_list = reference_list.concat(inputs.AbSeq_Reference);\n }\n\n var supplemental = \"\"\n if(inputs.Supplemental_Reference != null){\n supplemental = \"; Supplemental_Reference - \" + inputs.Supplemental_Reference[0][\"basename\"];\n }\n var references = [];\n for (i = 0; i< reference_list.length; i++) {\n if(reference_list[i] != null){\n references.push(reference_list[i][\"basename\"]);\n }\n }\n var parameters = [];\n if(inputs.Sample_Tags_Version != null){\n var tags = \"Sample Tag Version: \" + inputs.Sample_Tags_Version;\n } else{ \n var tags = \"Sample Tag Version: None\";\n }\n parameters.push(tags);\n\n if(inputs.Sample_Tag_Names != null){\n var tag_names = inputs.Sample_Tag_Names.join(\" ; \")\n var tag_list = \"Sample Tag Names: \" + tag_names;\n } else{\n var tag_list = \"Sample Tag Names: None\";\n }\n parameters.push(tag_list);\n \n if(inputs.VDJ_Version != null){\n var vdj = \"VDJ Version: \" + inputs.VDJ_Version;\n } else{ \n var vdj = \"VDJ Version: None\";\n }\n parameters.push(vdj)\n\n if(inputs.Subsample != null){\n var subsample = \"Subsample: \" + inputs.Subsample;\n } else{ \n var subsample = \"Subsample: None\";\n } \n parameters.push(subsample);\n\n if(inputs.Putative_Cell_Call == 1){\n var call = \"Putative Cell Calling Type: AbSeq\";\n } else{ \n var call = \"Putative Cell Calling Type: mRNA\";\n } \n parameters.push(call)\n\n if(inputs.Basic_Algo_Only){\n var basic = \"Refined Putative Cell Calling: Off\";\n } else{ \n var basic = \"Refined Putative Cell Calling: On\";\n } \n parameters.push(basic)\n\n if(inputs.Exact_Cell_Count != null){\n var cells = \"Exact Cell Count: \" + inputs.Exact_Cell_Count;\n } else{ \n var cells = \"Exact Cell Count: None\";\n } \n parameters.push(cells)\n\n var name = inputs.Run_Name;\n if (name == null){\n var libraries = inputs.Libraries.split(',');\n name = libraries[0];\n } \n\n var header = [\"####################\"];\n header.push(\"## \" + pipeline_name + \" Version \" + version);\n header.push(\"## Analysis Date - \" + time);\n header.push(\"## Libraries - \" + libraries.join(' | ') + \" - Bead version detected: \" + customer_bv);\n header.push(\"## References - \" + references.join(' | ') + supplemental);\n header.push(\"## Parameters - \" + parameters.join(' | '));\n header.push(\"####################\");\n metadata[\"Output_Header\"] = header;\n metadata[\"Run_Base_Name\"] = name;\n var metadata_json = JSON.stringify(metadata);\n return metadata_json;\n}\n" - } - ], - "class": "CommandLineTool" - }, - { - "inputs": [ - { - "inputBinding": { - "prefix": "--annot-files" - }, - "type": "File", - "id": "#Metrics.cwl/Annot_Files" - }, - { - "inputBinding": { - "prefix": "--read1-error-rate" - }, - "type": "File", - "id": "#Metrics.cwl/Read1_error_rate" - }, - { - "inputBinding": { - "prefix": "--run-metadata" - }, - "type": "File", - "id": "#Metrics.cwl/Run_Metadata" - }, - { - "inputBinding": { - "prefix": "--sample-tag-archives", - "itemSeparator": "," - }, - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#Metrics.cwl/Sample_Tag_Archives" - }, - { - "inputBinding": { - "prefix": "--seq-run" - }, - "type": [ - "null", - "string" - ], - "id": "#Metrics.cwl/Seq_Run" - }, - { - "inputBinding": { - "prefix": "--umi-adjusted-stats" - }, - "type": [ - "null", - "File" - ], - "id": "#Metrics.cwl/UMI_Adjusted_Stats" - }, - { - "inputBinding": { - "prefix": "--vdj-metrics-fp" - }, - "type": [ - "null", - "File" - ], - "id": "#Metrics.cwl/vdjMetricsJson" - } - ], - "requirements": [ - ], - "outputs": [ - { - "outputBinding": { - "glob": "internal-metrics-archive.tar.gz" - }, - "type": "File", - "id": "#Metrics.cwl/Metrics_Archive" - }, - { - "outputBinding": { - "glob": "*_Metrics_Summary.csv" - }, - "type": "File", - "id": "#Metrics.cwl/Metrics_Summary" - }, - { - "outputBinding": { - "glob": "*.zip" - }, - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#Metrics.cwl/Sample_Tag_Out" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#Metrics.cwl/output" - } - ], - "baseCommand": [ - "mist_metrics.py" - ], - "class": "CommandLineTool", - "id": "#Metrics.cwl" - }, - { - "inputs": [ - { - "default": "Targeted", - "type": "string", - "id": "#MultiplexingSettings.cwl/Assay" - }, - { - "type": [ - "null", - "Any" - ], - "id": "#MultiplexingSettings.cwl/_Sample_Tags_Version" - }, - { - "type": [ - "null", - { - "items": "string", - "type": "array" - } - ], - "id": "#MultiplexingSettings.cwl/_Tag_Sample_Names" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "outputs": [ - { - "type": [ - "null", - "string" - ], - "id": "#MultiplexingSettings.cwl/Sample_Tags_Version" - }, - { - "type": [ - "null", - { - "items": "string", - "type": "array" - } - ], - "id": "#MultiplexingSettings.cwl/Tag_Sample_Names" - } - ], - "class": "ExpressionTool", - "expression": "${\n var enumifiedSampleTagsVersion = null;\n if (inputs._Sample_Tags_Version) {\n var _Sample_Tags_Version = inputs._Sample_Tags_Version.toLowerCase();\n if (_Sample_Tags_Version.indexOf('human') >= 0 || _Sample_Tags_Version === 'hs')\n {\n enumifiedSampleTagsVersion = 'hs';\n }\n else if (_Sample_Tags_Version.indexOf('mouse') >= 0 || _Sample_Tags_Version === 'mm')\n {\n enumifiedSampleTagsVersion = 'mm';\n }\n else if (_Sample_Tags_Version === 'no multiplexing')\n {\n enumifiedSampleTagsVersion = null;\n }\n else\n {\n throw new Error(\"Cannot parse Sample Tag Version: \" + inputs._Sample_Tags_Version);\n }\n }\n var listTagNames = inputs._Tag_Sample_Names\n var newTagNames = []\n for (var num in listTagNames) {\n var tag = listTagNames[num].replace(/[^A-Za-z0-9-+]/g,\"_\");\n newTagNames.push(tag); \n } \n return ({\n Tag_Sample_Names: newTagNames,\n Sample_Tags_Version: enumifiedSampleTagsVersion\n });\n}", - "id": "#MultiplexingSettings.cwl" - }, - { - "inputs": [ - { - "type": [ - "null", - "string" - ], - "id": "#NameSettings.cwl/_Run_Name" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "outputs": [ - { - "type": [ - "null", - "string" - ], - "id": "#NameSettings.cwl/Run_Name" - } - ], - "class": "ExpressionTool", - "expression": "${ var name = inputs._Run_Name;\n if (name != null) {\n name = name.replace(/[\\W_]+/g,\"-\");}\n return({'Run_Name' : name });\n } ", - "id": "#NameSettings.cwl" - }, - { - "inputs": [ - { - "type": { - "items": { - "fields": [ - { - "type": "string", - "name": "#PairReadFiles.cwl/FastqReadPairs/filename" - }, - { - "type": "string", - "name": "#PairReadFiles.cwl/FastqReadPairs/readFlag" - }, - { - "type": "string", - "name": "#PairReadFiles.cwl/FastqReadPairs/readPairId" - }, - { - "type": "string", - "name": "#PairReadFiles.cwl/FastqReadPairs/library" - } - ], - "type": "record" - }, - "type": "array" - }, - "id": "#PairReadFiles.cwl/FastqReadPairs" - }, - { - "type": { - "items": "File", - "type": "array" - }, - "id": "#PairReadFiles.cwl/Reads" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "doc": "PairReadFiles takes an array of split files and pairs them, such that an R1 file is transferred to the QualityFilter with its corresponding R2 file.\nThe original FASTQ files are paired in CheckFastqs and then split and sub-sampled in SplitAndSubsample. The pairing information is taken from CheckFastqs.\n", - "id": "#PairReadFiles.cwl", - "outputs": [ - { - "type": { - "items": { - "fields": [ - { - "type": "File", - "name": "#PairReadFiles.cwl/ReadPairs/R1" - }, - { - "type": "File", - "name": "#PairReadFiles.cwl/ReadPairs/R2" - }, - { - "type": "int", - "name": "#PairReadFiles.cwl/ReadPairs/readPairId" - }, - { - "type": "string", - "name": "#PairReadFiles.cwl/ReadPairs/library" - } - ], - "type": "record" - }, - "type": "array" - }, - "id": "#PairReadFiles.cwl/ReadPairs" - } - ], - "expression": "${\n // use the CheckFastqs read pairing information to create a dictionary\n // using the original fastq file name without the extension as the key\n var fastqReadPairs = {}\n for (var i = 0; i < inputs.FastqReadPairs.length; i++) {\n var fileDict = inputs.FastqReadPairs[i];\n var filename = fileDict[\"filename\"];\n\n if (!fastqReadPairs[filename]) {\n fastqReadPairs[filename] = {\n readPairId: null,\n readFlag: null,\n library: null,\n };\n }\n else {\n throw new Error(\"Found non-unique fastq filename '\" + filename + \"' in the FastqReadPairs dictionary from CheckFastqs.\")\n }\n\n fastqReadPairs[filename].readPairId = fileDict[\"readPairId\"]\n fastqReadPairs[filename].readFlag = fileDict[\"readFlag\"]\n fastqReadPairs[filename].library = fileDict[\"library\"]\n }\n\n // now loop through the input read files which could\n // be the original fastq files if no sub-sampling has\n // been done, or the sub-sampled fastq files\n var readPairs = {}\n for (var i = 0; i < inputs.Reads.length; i++) {\n\n // Set the fileDict to null\n var fileDict = null;\n\n // Get the fastq file\n var fastqFile = inputs.Reads[i];\n\n // Remove the .gz from the end of the filename\n var fileNoGzExt = fastqFile.basename.replace(/.gz$/i, \"\");\n\n // Remove the next file extension if it exists\n var fileArrayWithExt = fileNoGzExt.split(\".\");\n // If an extension exists, splice the array\n var fileArrayNoExt = null;\n if (fileArrayWithExt.length > 1) {\n fileArrayNoExt = fileArrayWithExt.splice(0, fileArrayWithExt.length-1);\n } else {\n // No file extension exists, so use the whole array\n fileArrayNoExt = fileArrayWithExt\n }\n var fileRootname = fileArrayNoExt.join(\".\")\n\n // if the original files were sub-sampled\n // get the original file and the chunk id\n if (fileRootname.indexOf(\"-\") != -1) {\n // Split on the dash to get the name of\n // the original file and the chunk id\n // The original file name can also have dashes\n var chunkFileArray = fileRootname.split(\"-\");\n\n // Get the original file rootname and chunk id\n // The rootname without the chunk id and file\n // extension is the key from CheckFastqs\n // The chunk id is used later to create a new unique\n // read pair id for all sub-sampled fastq files\n\n // The rootname array should contain all elements up to the last dash\n var fileRootnameArray = chunkFileArray.splice(0, chunkFileArray.length-1);\n var fileRootnameNoChunkId = fileRootnameArray.join(\"-\");\n\n // The chunk id is the last element in the array\n // representing the content after the last dash\n var orgChunkId = chunkFileArray.pop();\n\n // if there is no chunk id, use an arbitrary number\n // the chunk id is unique when the files are sub-sampled\n // and does not need to be unique when the files are not sub-sampled\n var chunkId = 9999;\n if (orgChunkId) {\n // cast to an integer\n chunkId = parseInt(orgChunkId);\n }\n // double check that we have a chunk id\n if (chunkId === undefined || chunkId === null) {\n throw new Error(\"The fastq file sub-sampling id could not be determined!\");\n }\n\n // The file rootname without the chunk id and file extension\n // should match the original file rootname from CheckFastqs\n // The original file rootname from CheckFastqs is the key for\n // the dictionary containing the original unique pair id\n var fileDict = fastqReadPairs[fileRootnameNoChunkId];\n }\n\n // If the files are not sub-sampled or the fileDict\n // is not found, then try to use the original\n // file rootname without the file extension as the key\n if (fileDict === undefined || fileDict === null) {\n\n // if the original files were not sub-sampled,\n // use the original file rootname and an arbitrary chunk id\n var chunkId = 9999;\n\n var fileDict = fastqReadPairs[fileRootname];\n\n // If the fileDict for this file rootname is not found,\n // then the filenames are in an unexpected format and\n // the code to parse the filenames in CheckFastqs,\n // SplitAndSubsample and here need to match\n if (fileDict === undefined || fileDict === null) {\n // Create an error\n if (fileDict === undefined || fileDict === null) {\n throw new Error(\"Cannot find the fastq read pair information for '\" + fastqFile.basename + \"'.\");\n }\n }\n }\n\n // Get the pairing information from CheckFastqs\n var readPairId = fileDict[\"readPairId\"];\n var library = fileDict[\"library\"];\n var flag = fileDict[\"readFlag\"];\n\n // Add the chunkId to create a new unique read pair id\n // for each file (sub-sampled or not)\n var chunkReadPairId = readPairId + \"_\" + chunkId;\n\n // Create a dictionary for each pair of files\n if (!readPairs[chunkReadPairId]) {\n readPairs[chunkReadPairId] = {\n R1: null,\n R2: null,\n library: library,\n readPairId: null,\n };\n }\n // add in the R1 and R2 files, depending on the flag\n if (flag === \"R1\") {\n readPairs[chunkReadPairId].R1 = fastqFile\n } else if (flag === \"R2\") {\n readPairs[chunkReadPairId].R2 = fastqFile\n }\n }\n // we are not interested in the read pair ids in readPairs\n // flatten into an array of objects\n var readPairsList = [];\n var i = 1;\n for (var key in readPairs) {\n if (readPairs.hasOwnProperty(key)) {\n var readPair = readPairs[key];\n readPair.readPairId = i;\n readPairsList.push(readPair);\n i++;\n }\n }\n // pass this array to the record array named \"ReadPairs\" on the CWL layer\n return {ReadPairs: readPairsList}\n}", - "class": "ExpressionTool" - }, - { - "inputs": [ - { - "type": [ - "null", - "boolean" - ], - "id": "#PutativeCellSettings.cwl/_Basic_Algo_Only" - }, - { - "type": [ - "null", - "int" - ], - "id": "#PutativeCellSettings.cwl/_Exact_Cell_Count" - }, - { - "type": [ - "null", - "Any" - ], - "id": "#PutativeCellSettings.cwl/_Putative_Cell_Call" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "outputs": [ - { - "type": [ - "null", - "boolean" - ], - "id": "#PutativeCellSettings.cwl/Basic_Algo_Only" - }, - { - "type": [ - "null", - "int" - ], - "id": "#PutativeCellSettings.cwl/Exact_Cell_Count" - }, - { - "type": [ - "null", - "int" - ], - "id": "#PutativeCellSettings.cwl/Putative_Cell_Call" - } - ], - "class": "ExpressionTool", - "expression": "${\n // the basic algorithm flag defaults to false\n var basicAlgOnlyFlag = false;\n // the user can set the basic algorithm flag\n if (inputs._Basic_Algo_Only) {\n basicAlgOnlyFlag = inputs._Basic_Algo_Only;\n }\n // convert the Putative_Cell_Call from a string to an integer\n var putativeCellCallInt = 0;\n if (inputs._Putative_Cell_Call) {\n if (inputs._Putative_Cell_Call === \"mRNA\") {\n putativeCellCallInt = 0;\n }\n else if (inputs._Putative_Cell_Call == \"AbSeq_Experimental\" || inputs._Putative_Cell_Call == \"AbSeq (Experimental)\") {\n putativeCellCallInt = 1;\n // for protein-only cell calling, we only have the basic algorithm\n basicAlgOnlyFlag = true;\n }\n else if (inputs._Putative_Cell_Call == \"mRNA_and_AbSeq\") {\n putativeCellCallInt = 2;\n }\n }\n // check the exact cell count\n if (inputs._Exact_Cell_Count) {\n if (inputs._Exact_Cell_Count < 1) {\n throw(\"Illogical value for exact cell count: \" + inputs._Exact_Cell_Count);\n }\n }\n return ({\n Putative_Cell_Call: putativeCellCallInt,\n Exact_Cell_Count: inputs._Exact_Cell_Count,\n Basic_Algo_Only: basicAlgOnlyFlag,\n });\n}", - "id": "#PutativeCellSettings.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "prefix": "--run-metadata" - }, - "type": "File", - "id": "#QualityFilter.cwl/Run_Metadata" - }, - { - "type": { - "fields": [ - { - "inputBinding": { - "prefix": "--r1" - }, - "type": "File", - "name": "#QualityFilter.cwl/Split_Read_Pairs/R1" - }, - { - "inputBinding": { - "prefix": "--r2" - }, - "type": "File", - "name": "#QualityFilter.cwl/Split_Read_Pairs/R2" - }, - { - "inputBinding": { - "prefix": "--read-pair-id" - }, - "type": "int", - "name": "#QualityFilter.cwl/Split_Read_Pairs/readPairId" - }, - { - "inputBinding": { - "prefix": "--library" - }, - "type": "string", - "name": "#QualityFilter.cwl/Split_Read_Pairs/library" - } - ], - "type": "record" - }, - "id": "#QualityFilter.cwl/Split_Read_Pairs" - } - ], - "requirements": [ - ], - "outputs": [ - { - "outputBinding": { - "glob": "*read_quality.csv.gz" - }, - "type": [ - "null", - "File" - ], - "id": "#QualityFilter.cwl/Filter_Metrics" - }, - { - "outputBinding": { - "glob": "*_R1*.fastq.gz" - }, - "type": "File", - "id": "#QualityFilter.cwl/R1" - }, - { - "outputBinding": { - "glob": "*_R2*.fastq.gz" - }, - "type": "File", - "id": "#QualityFilter.cwl/R2" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#QualityFilter.cwl/output" - } - ], - "baseCommand": [ - "mist_quality_filter.py" - ], - "class": "CommandLineTool", - "id": "#QualityFilter.cwl" - }, - { - "inputs": [ - { - "type": "File", - "id": "#QualityFilterOuter.cwl/Run_Metadata" - }, - { - "type": { - "items": { - "fields": [ - { - "type": "File", - "name": "#QualityFilterOuter.cwl/Split_Read_Pairs/R1" - }, - { - "type": "File", - "name": "#QualityFilterOuter.cwl/Split_Read_Pairs/R2" - }, - { - "type": "int", - "name": "#QualityFilterOuter.cwl/Split_Read_Pairs/readPairId" - }, - { - "type": "string", - "name": "#QualityFilterOuter.cwl/Split_Read_Pairs/library" - } - ], - "type": "record" - }, - "type": "array" - }, - "id": "#QualityFilterOuter.cwl/Split_Read_Pairs" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - }, - { - "class": "ScatterFeatureRequirement" - }, - { - "class": "StepInputExpressionRequirement" - }, - { - "class": "SubworkflowFeatureRequirement" - } - ], - "outputs": [ - { - "outputSource": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/Filter_Metrics", - "type": [ - { - "items": [ - "null", - "File" - ], - "type": "array" - } - ], - "id": "#QualityFilterOuter.cwl/Filter_Metrics" - }, - { - "outputSource": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/R1", - "type": [ - { - "items": [ - "null", - "File" - ], - "type": "array" - } - ], - "id": "#QualityFilterOuter.cwl/R1" - }, - { - "outputSource": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/R2", - "type": [ - { - "items": [ - "null", - "File" - ], - "type": "array" - } - ], - "id": "#QualityFilterOuter.cwl/R2" - }, - { - "outputSource": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/output", - "type": [ - { - "items": [ - "null", - "File" - ], - "type": "array" - } - ], - "id": "#QualityFilterOuter.cwl/output" - } - ], - "class": "Workflow", - "steps": [ - { - "scatter": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/Split_Read_Pairs", - "out": [ - "#QualityFilterOuter.cwl/Quality_Filter_Scatter/R1", - "#QualityFilterOuter.cwl/Quality_Filter_Scatter/R2", - "#QualityFilterOuter.cwl/Quality_Filter_Scatter/Filter_Metrics", - "#QualityFilterOuter.cwl/Quality_Filter_Scatter/output" - ], - "run": "#QualityFilter.cwl", - "id": "#QualityFilterOuter.cwl/Quality_Filter_Scatter", - "in": [ - { - "source": "#QualityFilterOuter.cwl/Run_Metadata", - "id": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/Run_Metadata" - }, - { - "source": "#QualityFilterOuter.cwl/Split_Read_Pairs", - "id": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/Split_Read_Pairs" - } - ] - } - ], - "id": "#QualityFilterOuter.cwl" - }, - { - "inputs": [ - { - "type": { - "items": "File", - "type": "array" - }, - "id": "#SplitAndSubsample.cwl/Fastqs" - }, - { - "type": { - "items": "string", - "type": "array" - }, - "id": "#SplitAndSubsample.cwl/FilesToSkipSplitAndSubsample" - }, - { - "type": [ - "null", - "long" - ], - "id": "#SplitAndSubsample.cwl/NumRecordsPerSplit" - }, - { - "type": "float", - "id": "#SplitAndSubsample.cwl/SubsampleRatio" - }, - { - "type": "int", - "id": "#SplitAndSubsample.cwl/SubsampleSeed" - } - ], - "requirements": [ - { - "class": "ScatterFeatureRequirement" - }, - { - "class": "InlineJavascriptRequirement" - } - ], - "doc": "SplitAndSubsample splits, subsamples and formats read files to be deposited in QualityFilter.\n", - "id": "#SplitAndSubsample.cwl", - "steps": [ - { - "doc": "After scattering \"SplitAndSubsample\" on a File array, the output of each node is also an array. Thus, we are left with a nestled list. This JS expression flattens this list to deal with the split reads in PairReadFiles.cwl", - "out": [ - "#SplitAndSubsample.cwl/FlattenOutput/SplitFastqList" - ], - "run": { - "cwlVersion": "v1.0", - "inputs": [ - { - "type": { - "items": { - "items": "File", - "type": "array" - }, - "type": "array" - }, - "id": "#SplitAndSubsample.cwl/FlattenOutput/flatten_output/nestledSplitFastqList" - } - ], - "outputs": [ - { - "type": { - "items": "File", - "type": "array" - }, - "id": "#SplitAndSubsample.cwl/FlattenOutput/flatten_output/SplitFastqList" - } - ], - "class": "ExpressionTool", - "expression": "${\n return {SplitFastqList: [].concat.apply([], inputs.nestledSplitFastqList)}\n}\n", - "id": "#SplitAndSubsample.cwl/FlattenOutput/flatten_output" - }, - "id": "#SplitAndSubsample.cwl/FlattenOutput", - "in": [ - { - "source": "#SplitAndSubsample.cwl/SplitAndSubsample/SplitAndSubsampledFastqs", - "id": "#SplitAndSubsample.cwl/FlattenOutput/nestledSplitFastqList" - } - ] - }, - { - "run": { - "cwlVersion": "v1.0", - "inputs": [ - { - "inputBinding": { - "prefix": "--fastq-file-path" - }, - "type": "File", - "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/Fastq" - }, - { - "inputBinding": { - "prefix": "--files-to-skip-split-and-subsample", - "itemSeparator": "," - }, - "type": { - "items": "string", - "type": "array" - }, - "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/FilesToSkipSplitAndSubsample" - }, - { - "inputBinding": { - "prefix": "--num-records" - }, - "type": [ - "null", - "long" - ], - "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/NumRecordsPerSplit" - }, - { - "inputBinding": { - "prefix": "--subsample-ratio" - }, - "type": "float", - "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/SubsampleRatio" - }, - { - "inputBinding": { - "prefix": "--subsample-seed" - }, - "type": "int", - "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/SubsampleSeed" - } - ], - "requirements": [ - ], - "outputs": [ - { - "outputBinding": { - "glob": "*.fastq.gz", - "outputEval": "${ if (self.length === 0) { return [inputs.Fastq]; } else { return self; } }" - }, - "type": { - "items": "File", - "type": "array" - }, - "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/SplitAndSubsampledFastqs" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/log" - } - ], - "baseCommand": [ - "mist_split_fastq.py" - ], - "class": "CommandLineTool", - "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq" - }, - "doc": "Allocate one docker/python process per file to do the actual file splitting.", - "scatter": [ - "#SplitAndSubsample.cwl/SplitAndSubsample/Fastq" - ], - "in": [ - { - "source": "#SplitAndSubsample.cwl/Fastqs", - "id": "#SplitAndSubsample.cwl/SplitAndSubsample/Fastq" - }, - { - "source": "#SplitAndSubsample.cwl/FilesToSkipSplitAndSubsample", - "id": "#SplitAndSubsample.cwl/SplitAndSubsample/FilesToSkipSplitAndSubsample" - }, - { - "source": "#SplitAndSubsample.cwl/NumRecordsPerSplit", - "id": "#SplitAndSubsample.cwl/SplitAndSubsample/NumRecordsPerSplit" - }, - { - "source": "#SplitAndSubsample.cwl/SubsampleRatio", - "id": "#SplitAndSubsample.cwl/SplitAndSubsample/SubsampleRatio" - }, - { - "source": "#SplitAndSubsample.cwl/SubsampleSeed", - "id": "#SplitAndSubsample.cwl/SplitAndSubsample/SubsampleSeed" - } - ], - "id": "#SplitAndSubsample.cwl/SplitAndSubsample", - "out": [ - "#SplitAndSubsample.cwl/SplitAndSubsample/SplitAndSubsampledFastqs", - "#SplitAndSubsample.cwl/SplitAndSubsample/log" - ] - } - ], - "outputs": [ - { - "outputSource": "#SplitAndSubsample.cwl/FlattenOutput/SplitFastqList", - "type": { - "items": "File", - "type": "array" - }, - "id": "#SplitAndSubsample.cwl/SplitAndSubsampledFastqs" - }, - { - "outputSource": "#SplitAndSubsample.cwl/SplitAndSubsample/log", - "type": { - "items": "File", - "type": "array" - }, - "id": "#SplitAndSubsample.cwl/log" - } - ], - "class": "Workflow" - }, - { - "inputs": [ - { - "type": [ - "null", - "float" - ], - "id": "#SubsampleSettings.cwl/_Subsample_Reads" - }, - { - "type": [ - "null", - "int" - ], - "id": "#SubsampleSettings.cwl/_Subsample_Seed" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "outputs": [ - { - "type": [ - "null", - "float" - ], - "id": "#SubsampleSettings.cwl/Subsample_Reads" - }, - { - "type": [ - "null", - "int" - ], - "id": "#SubsampleSettings.cwl/Subsample_Seed" - } - ], - "class": "ExpressionTool", - "expression": "${\n var subsamplingOutputs = {\n Subsample_Reads: inputs._Subsample_Reads,\n Subsample_Seed: inputs._Subsample_Seed\n }\n return subsamplingOutputs;\n}", - "id": "#SubsampleSettings.cwl" - }, - { - "inputs": [ - { - "type": { - "items": "File", - "type": "array" - }, - "id": "#UncompressDatatables.cwl/Compressed_Data_Table" - }, - { - "type": "File", - "id": "#UncompressDatatables.cwl/Compressed_Expression_Matrix" - } - ], - "requirements": [ - { - "class": "ScatterFeatureRequirement" - } - ], - "outputs": [ - { - "outputSource": "#UncompressDatatables.cwl/Uncompress_Datatable/Uncompressed_File", - "type": { - "items": "File", - "type": "array" - }, - "id": "#UncompressDatatables.cwl/Uncompressed_Data_Tables" - }, - { - "outputSource": "#UncompressDatatables.cwl/Uncompress_Expression_Matrix/Uncompressed_File", - "type": "File", - "id": "#UncompressDatatables.cwl/Uncompressed_Expression_Matrix" - } - ], - "class": "Workflow", - "steps": [ - { - "id": "#UncompressDatatables.cwl/Uncompress_Datatable", - "out": [ - "#UncompressDatatables.cwl/Uncompress_Datatable/Uncompressed_File" - ], - "run": { - "cwlVersion": "v1.0", - "inputs": [ - { - "inputBinding": { - "position": 1 - }, - "type": "File", - "id": "#UncompressDatatables.cwl/Uncompress_Datatable/Uncompress_Datatable_Inner/Compressed_File" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "stdout": "$(inputs.Compressed_File.nameroot)", - "outputs": [ - { - "outputBinding": { - "glob": "$(inputs.Compressed_File.nameroot)" - }, - "type": "File", - "id": "#UncompressDatatables.cwl/Uncompress_Datatable/Uncompress_Datatable_Inner/Uncompressed_File" - } - ], - "baseCommand": [ - "gunzip" - ], - "id": "#UncompressDatatables.cwl/Uncompress_Datatable/Uncompress_Datatable_Inner", - "arguments": [ - { - "position": 0, - "valueFrom": "-c" - } - ], - "class": "CommandLineTool", - "hints": [ - ] - }, - "scatter": [ - "#UncompressDatatables.cwl/Uncompress_Datatable/Compressed_File" - ], - "in": [ - { - "source": "#UncompressDatatables.cwl/Compressed_Data_Table", - "id": "#UncompressDatatables.cwl/Uncompress_Datatable/Compressed_File" - } - ] - }, - { - "out": [ - "#UncompressDatatables.cwl/Uncompress_Expression_Matrix/Uncompressed_File" - ], - "run": { - "cwlVersion": "v1.0", - "inputs": [ - { - "inputBinding": { - "position": 1 - }, - "type": "File", - "id": "#UncompressDatatables.cwl/Uncompress_Expression_Matrix/Uncompress_Expression_Matrix_Inner/Compressed_File" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "stdout": "$(inputs.Compressed_File.nameroot)", - "outputs": [ - { - "outputBinding": { - "glob": "$(inputs.Compressed_File.nameroot)" - }, - "type": "File", - "id": "#UncompressDatatables.cwl/Uncompress_Expression_Matrix/Uncompress_Expression_Matrix_Inner/Uncompressed_File" - } - ], - "baseCommand": [ - "gunzip" - ], - "id": "#UncompressDatatables.cwl/Uncompress_Expression_Matrix/Uncompress_Expression_Matrix_Inner", - "arguments": [ - { - "position": 0, - "valueFrom": "-c" - } - ], - "class": "CommandLineTool", - "hints": [ - ] - }, - "id": "#UncompressDatatables.cwl/Uncompress_Expression_Matrix", - "in": [ - { - "source": "#UncompressDatatables.cwl/Compressed_Expression_Matrix", - "id": "#UncompressDatatables.cwl/Uncompress_Expression_Matrix/Compressed_File" - } - ] - } - ], - "id": "#UncompressDatatables.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "position": 1 - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs.cwl/RSEC_Reads_Fastq" - }, - { - "inputBinding": { - "position": 2 - }, - "type": "string", - "id": "#VDJ_Assemble_and_Annotate_Contigs.cwl/Read_Limit" - }, - { - "inputBinding": { - "position": 3 - }, - "type": [ - "null", - "string" - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs.cwl/VDJ_Version" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - }, - { - "class": "ShellCommandRequirement" - } - ], - "outputs": [ - { - "outputBinding": { - "glob": "*_pruned.csv.gz" - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs.cwl/PyirCall" - } - ], - "baseCommand": [ - "AssembleAndAnnotate.sh" - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs.cwl", - "class": "CommandLineTool", - "hints": [ - { - "coresMin": 1, - "ramMin": 3200, - "class": "ResourceRequirement" - } - ] - }, - { - "inputs": [ - { - "type": [ - { - "items": [ - "null", - "File" - ], - "type": "array" - } - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/RSEC_Reads_Fastq" - }, - { - "type": [ - "null", - "string" - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Version" - }, - { - "type": [ - "null", - "int" - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/num_cores" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - }, - { - "class": "ScatterFeatureRequirement" - }, - { - "class": "StepInputExpressionRequirement" - }, - { - "class": "SubworkflowFeatureRequirement" - } - ], - "outputs": [ - { - "outputSource": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG/PyirCall", - "type": [ - { - "items": [ - "null", - "File" - ], - "type": "array" - } - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/igCalls" - } - ], - "class": "Workflow", - "steps": [ - { - "run": "#VDJ_Assemble_and_Annotate_Contigs.cwl", - "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG", - "in": [ - { - "source": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/RSEC_Reads_Fastq", - "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG/RSEC_Reads_Fastq" - }, - { - "valueFrom": "75000", - "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG/Read_Limit" - }, - { - "source": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Version", - "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG/VDJ_Version" - } - ], - "hints": [ - { - "coresMin": "$(inputs.num_cores)", - "class": "ResourceRequirement" - } - ], - "scatter": [ - "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG/RSEC_Reads_Fastq" - ], - "out": [ - "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG/PyirCall" - ] - } - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl" - }, - { - "inputs": [ - { - "type": [ - { - "items": [ - "null", - "File" - ], - "type": "array" - } - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/RSEC_Reads_Fastq" - }, - { - "type": [ - "null", - "string" - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Version" - }, - { - "type": [ - "null", - "int" - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/num_cores" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - }, - { - "class": "ScatterFeatureRequirement" - }, - { - "class": "StepInputExpressionRequirement" - }, - { - "class": "SubworkflowFeatureRequirement" - } - ], - "outputs": [ - { - "outputSource": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR/PyirCall", - "type": [ - { - "items": [ - "null", - "File" - ], - "type": "array" - } - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/tcrCalls" - } - ], - "class": "Workflow", - "steps": [ - { - "run": "#VDJ_Assemble_and_Annotate_Contigs.cwl", - "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR", - "in": [ - { - "source": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/RSEC_Reads_Fastq", - "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR/RSEC_Reads_Fastq" - }, - { - "valueFrom": "75000", - "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR/Read_Limit" - }, - { - "source": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Version", - "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR/VDJ_Version" - } - ], - "hints": [ - { - "coresMin": "$(inputs.num_cores)", - "class": "ResourceRequirement" - } - ], - "scatter": [ - "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR/RSEC_Reads_Fastq" - ], - "out": [ - "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR/PyirCall" - ] - } - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "position": 10, - "prefix": "--seq-metrics" - }, - "type": "File", - "id": "#VDJ_Compile_Results.cwl/Seq_Metrics" - }, - { - "inputBinding": { - "position": 0, - "prefix": "--cell-type-mapping-fp" - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_Compile_Results.cwl/cellTypeMapping" - }, - { - "inputBinding": { - "position": 4, - "prefix": "--ignore", - "itemSeparator": "," - }, - "type": [ - "null", - { - "items": "string", - "type": "array" - } - ], - "id": "#VDJ_Compile_Results.cwl/chainsToIgnore" - }, - { - "inputBinding": { - "position": 8, - "prefix": "--e-value-for-j" - }, - "type": [ - "null", - "float" - ], - "id": "#VDJ_Compile_Results.cwl/evalueJgene" - }, - { - "inputBinding": { - "position": 7, - "prefix": "--e-value-for-v" - }, - "type": [ - "null", - "float" - ], - "id": "#VDJ_Compile_Results.cwl/evalueVgene" - }, - { - "inputBinding": { - "position": 5 - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_Compile_Results.cwl/igCalls" - }, - { - "inputBinding": { - "position": 9, - "prefix": "--metadata-fp" - }, - "type": "File", - "id": "#VDJ_Compile_Results.cwl/metadata" - }, - { - "inputBinding": { - "position": 3, - "prefix": "--putative-cells-json-fp" - }, - "type": "File", - "id": "#VDJ_Compile_Results.cwl/putativeCells" - }, - { - "inputBinding": { - "position": 6 - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_Compile_Results.cwl/tcrCalls" - }, - { - "inputBinding": { - "position": 2, - "prefix": "--vdj-version" - }, - "type": [ - "null", - "string" - ], - "id": "#VDJ_Compile_Results.cwl/vdjVersion" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "outputs": [ - { - "doc": "VDJ data per cell, with distribution based error correction", - "outputBinding": { - "glob": "*_VDJ_perCell.csv" - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_Compile_Results.cwl/vdjCellsDatatable" - }, - { - "doc": "VDJ data per cell, including non-putative cells, no error correction applied", - "outputBinding": { - "glob": "*_VDJ_perCell_uncorrected.csv.gz" - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_Compile_Results.cwl/vdjCellsDatatableUncorrected" - }, - { - "outputBinding": { - "glob": "*_VDJ_Dominant_Contigs.csv.gz" - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_Compile_Results.cwl/vdjDominantContigs" - }, - { - "outputBinding": { - "glob": "*_VDJ_metrics.csv" - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_Compile_Results.cwl/vdjMetricsCsv" - }, - { - "outputBinding": { - "glob": "*_VDJ_metrics.json" - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_Compile_Results.cwl/vdjMetricsJson" - }, - { - "outputBinding": { - "glob": "*_DBEC_cutoff.png" - }, - "type": { - "items": "File", - "type": "array" - }, - "id": "#VDJ_Compile_Results.cwl/vdjReadsPerCellByChainTypeFigure" - }, - { - "outputBinding": { - "glob": "*_VDJ_Unfiltered_Contigs.csv.gz" - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_Compile_Results.cwl/vdjUnfilteredContigs" - } - ], - "baseCommand": [ - "mist_vdj_compile_results.py" - ], - "id": "#VDJ_Compile_Results.cwl", - "class": "CommandLineTool", - "hints": [ - { - "ramMin": 32000, - "class": "ResourceRequirement" - } - ] - }, - { - "inputs": [ - { - "type": [ - { - "items": [ - "null", - "File" - ], - "type": "array" - } - ], - "id": "#VDJ_GatherCalls.cwl/theCalls" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "doc": "VDJ_GatherCalls collect the outputs from the multi-processed VDJ step into one file.\n", - "id": "#VDJ_GatherCalls.cwl", - "steps": [ - { - "out": [ - "#VDJ_GatherCalls.cwl/VDJ_GatherCalls/gatheredCalls" - ], - "run": { - "cwlVersion": "v1.0", - "inputs": [ - { - "type": [ - { - "items": [ - "null", - "File" - ], - "type": "array" - } - ], - "id": "#VDJ_GatherCalls.cwl/VDJ_GatherCalls/gather_PyIR/theCalls" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - }, - { - "class": "ShellCommandRequirement" - } - ], - "outputs": [ - { - "outputBinding": { - "glob": "*_constant_region_called_pruned.csv.gz", - "outputEval": "${\n if (self.size == 0) {\n throw(\"No outputs from PyIR detected in VDJ_GatherCalls!\");\n } else {\n return(self);\n }\n}" - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_GatherCalls.cwl/VDJ_GatherCalls/gather_PyIR/gatheredCalls" - } - ], - "class": "CommandLineTool", - "arguments": [ - { - "shellQuote": false, - "valueFrom": "${\n if (!inputs.theCalls[0] ) {\n return (\"echo \\\"No outputs from PyIR detected in VDJ_GatherCalls\\\"\")\n }\n var inputFiles = \"\"\n if (!inputs.theCalls[0].path.split(\"_PrunePyIR\")[1]){\n inputFiles = \"zcat\"\n for (var i = 0; i < inputs.theCalls.length; i++) {\n inputFiles += \" \" + inputs.theCalls[i].path\n }\n inputFiles += \" | \"\n } else {\n inputFiles = \"zcat \" + inputs.theCalls[0].path.split(\"VDJ\")[0] + \"*\" + inputs.theCalls[0].path.split(\"_PrunePyIR\")[1].split(\"_Number_\")[0] + \"_Number_*.csv.gz | \"\n }\n var outputFileName = \"\\\"gzip > \" + inputs.theCalls[0].nameroot.split(\"_Number_\")[0] + \"_constant_region_called_pruned.csv.gz\" + \"\\\"\"\n var awkCommand = \"awk \\'NR==1{F=$1;print | \" + outputFileName + \" } $1!=F { print | \" + outputFileName + \" }\\' \"\n var outputCommand = inputFiles + awkCommand\n return (outputCommand)\n}" - } - ], - "id": "#VDJ_GatherCalls.cwl/VDJ_GatherCalls/gather_PyIR" - }, - "id": "#VDJ_GatherCalls.cwl/VDJ_GatherCalls", - "in": [ - { - "source": "#VDJ_GatherCalls.cwl/theCalls", - "id": "#VDJ_GatherCalls.cwl/VDJ_GatherCalls/theCalls" - } - ] - } - ], - "outputs": [ - { - "outputSource": "#VDJ_GatherCalls.cwl/VDJ_GatherCalls/gatheredCalls", - "type": [ - "null", - "File" - ], - "id": "#VDJ_GatherCalls.cwl/gatheredCalls" - } - ], - "class": "Workflow" - }, - { - "inputs": [ - { - "type": [ - "null", - "File" - ], - "id": "#VDJ_Preprocess_Reads.cwl/Valid_Reads_Fastq" - }, - { - "type": [ - "null", - "int" - ], - "id": "#VDJ_Preprocess_Reads.cwl/num_valid_reads" - }, - { - "type": "string", - "id": "#VDJ_Preprocess_Reads.cwl/vdj_type" - } - ], - "requirements": [ - { - "class": "SubworkflowFeatureRequirement" - }, - { - "class": "InlineJavascriptRequirement" - }, - { - "envDef": [ - { - "envName": "CORES_ALLOCATED_PER_CWL_PROCESS", - "envValue": "8" - } - ], - "class": "EnvVarRequirement" - } - ], - "outputs": [ - { - "outputSource": "#VDJ_Preprocess_Reads.cwl/VDJ_RSEC_Reads/RSEC_Reads_Fastq", - "type": [ - { - "items": [ - "null", - "File" - ], - "type": "array" - } - ], - "id": "#VDJ_Preprocess_Reads.cwl/RSEC_Reads_Fastq" - }, - { - "type": [ - "null", - "int" - ], - "outputSource": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/num_cores", - "id": "#VDJ_Preprocess_Reads.cwl/num_cores" - }, - { - "type": [ - "null", - "int" - ], - "outputSource": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/num_splits", - "id": "#VDJ_Preprocess_Reads.cwl/num_splits" - } - ], - "class": "Workflow", - "steps": [ - { - "run": "#VDJ_RSEC_Reads.cwl", - "out": [ - "#VDJ_Preprocess_Reads.cwl/VDJ_RSEC_Reads/RSEC_Reads_Fastq" - ], - "requirements": [ - { - "coresMin": 8, - "ramMin": "${ var est_ram = 0.0006 * parseInt(inputs.num_valid_reads) + 2000; var buffer = 1.25; est_ram *= buffer; if (est_ram < 2000) return 2000; if (est_ram > 370000) return 370000; return parseInt(est_ram); }", - "class": "ResourceRequirement" - } - ], - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_RSEC_Reads", - "in": [ - { - "source": "#VDJ_Preprocess_Reads.cwl/VDJ_Trim_Reads/Valid_Reads", - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_RSEC_Reads/Valid_Reads" - }, - { - "source": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/num_splits", - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_RSEC_Reads/num_splits" - }, - { - "source": "#VDJ_Preprocess_Reads.cwl/num_valid_reads", - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_RSEC_Reads/num_valid_reads" - } - ] - }, - { - "out": [ - "#VDJ_Preprocess_Reads.cwl/VDJ_Trim_Reads/Valid_Reads", - "#VDJ_Preprocess_Reads.cwl/VDJ_Trim_Reads/Trim_Report" - ], - "in": [ - { - "source": "#VDJ_Preprocess_Reads.cwl/Valid_Reads_Fastq", - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_Trim_Reads/Valid_Reads_Fastq" - } - ], - "run": "#VDJ_Trim_Reads.cwl", - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_Trim_Reads", - "hints": [ - { - "coresMin": 8, - "class": "ResourceRequirement" - } - ] - }, - { - "out": [ - "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/num_splits", - "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/num_cores" - ], - "run": { - "cwlVersion": "v1.0", - "inputs": [ - { - "type": [ - "null", - "int" - ], - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/determine_num_splits/num_valid_reads" - }, - { - "type": "string", - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/determine_num_splits/vdj_type" - } - ], - "outputs": [ - { - "type": [ - "null", - "int" - ], - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/determine_num_splits/num_cores" - }, - { - "type": [ - "null", - "int" - ], - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/determine_num_splits/num_splits" - } - ], - "class": "ExpressionTool", - "expression": "${\n var ram_per_instance = 192 * 1024;\n var num_cores = 96;\n if (inputs.vdj_type == \"BCR\") {\n ram_per_instance = 144 * 1024;\n num_cores = 72;\n }\n var ram_per_split = 3200;\n var num_splits_per_instance = parseInt(ram_per_instance / ram_per_split);\n var num_splits = num_splits_per_instance;\n\n var num_reads = parseInt(inputs.num_valid_reads);\n if (num_reads != null) {\n if (num_reads > 100000000)\n num_splits = num_splits_per_instance * 2;\n num_cores = num_cores * 2;\n }\n\n return ({\"num_splits\": num_splits, \"num_cores\": num_cores});\n}", - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/determine_num_splits" - }, - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits", - "in": [ - { - "source": "#VDJ_Preprocess_Reads.cwl/num_valid_reads", - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/num_valid_reads" - }, - { - "source": "#VDJ_Preprocess_Reads.cwl/vdj_type", - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/vdj_type" - } - ] - } - ], - "id": "#VDJ_Preprocess_Reads.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "prefix": "--vdj-valid-reads", - "itemSeparator": "," - }, - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#VDJ_RSEC_Reads.cwl/Valid_Reads" - }, - { - "inputBinding": { - "prefix": "--num-splits" - }, - "type": [ - "null", - "int" - ], - "id": "#VDJ_RSEC_Reads.cwl/num_splits" - } - ], - "requirements": [ - ], - "outputs": [ - { - "outputBinding": { - "glob": "*RSEC_Reads_Fastq_*.tar.gz" - }, - "type": [ - { - "items": [ - "null", - "File" - ], - "type": "array" - } - ], - "id": "#VDJ_RSEC_Reads.cwl/RSEC_Reads_Fastq" - } - ], - "baseCommand": "mist_vdj_rsec_reads.py", - "class": "CommandLineTool", - "id": "#VDJ_RSEC_Reads.cwl" - }, - { - "inputs": [ - { - "type": [ - "null", - "Any" - ], - "id": "#VDJ_Settings.cwl/_VDJ_Version" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "outputs": [ - { - "type": [ - "null", - "float" - ], - "id": "#VDJ_Settings.cwl/VDJ_JGene_Evalue" - }, - { - "type": [ - "null", - "float" - ], - "id": "#VDJ_Settings.cwl/VDJ_VGene_Evalue" - }, - { - "type": [ - "null", - "string" - ], - "id": "#VDJ_Settings.cwl/VDJ_Version" - } - ], - "class": "ExpressionTool", - "expression": "${\n var vdjVersion = null;\n if (!inputs._VDJ_Version) {\n vdjVersion = null;}\n else {\n var _VDJ_Version = inputs._VDJ_Version.toLowerCase();\n if (_VDJ_Version === \"human\" || _VDJ_Version === \"hs\" || _VDJ_Version === \"human vdj - bcr and tcr\") {\n vdjVersion = \"human\";\n } else if (_VDJ_Version === \"humanbcr\" || _VDJ_Version === \"human vdj - bcr only\") {\n vdjVersion = \"humanBCR\";\n } else if (_VDJ_Version === \"humantcr\" || _VDJ_Version === \"human vdj - tcr only\") {\n vdjVersion = \"humanTCR\";\n } else if (_VDJ_Version === \"mouse\" || _VDJ_Version === \"mm\" || _VDJ_Version === \"mouse vdj - bcr and tcr\") {\n vdjVersion = \"mouse\";\n } else if (_VDJ_Version === \"mousebcr\" || _VDJ_Version === \"mouse vdj - bcr only\") {\n vdjVersion = \"mouseBCR\";\n } else if (_VDJ_Version === \"mousetcr\" || _VDJ_Version === \"mouse vdj - tcr only\") {\n vdjVersion = \"mouseTCR\";\n } else {\n vdjVersion = inputs._VDJ_Version;\n }\n }\n\n return ({\n VDJ_Version: vdjVersion,\n })\n}", - "id": "#VDJ_Settings.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "position": 1 - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_Trim_Reads.cwl/Valid_Reads_Fastq" - } - ], - "requirements": [ - ], - "outputs": [ - { - "outputBinding": { - "glob": "cutadapt.log" - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_Trim_Reads.cwl/Trim_Report" - }, - { - "outputBinding": { - "glob": "*vdjtxt.gz" - }, - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#VDJ_Trim_Reads.cwl/Valid_Reads" - } - ], - "baseCommand": "VDJ_Trim_Reads.sh", - "class": "CommandLineTool", - "id": "#VDJ_Trim_Reads.cwl" - }, - { - "inputs": [], - "requirements": [ - ], - "stdout": "output.txt", - "outputs": [ - { - "outputBinding": { - "glob": "output.txt", - "loadContents": true, - "outputEval": "$(self[0].contents)" - }, - "type": "string", - "id": "#Version.cwl/version" - } - ], - "baseCommand": [ - "mist_version.py" - ], - "id": "#Version.cwl", - "class": "CommandLineTool" - } - ], - "$namespaces": { - "sbg": "https://sevenbridges.com#", - "arv": "http://arvados.org/cwl#" - } -} \ No newline at end of file diff --git a/target/nextflow/mapping/bd_rhapsody/rhapsody_wta_1.10.1_nodocker.cwl b/target/nextflow/mapping/bd_rhapsody/rhapsody_wta_1.10.1_nodocker.cwl deleted file mode 100755 index 5fa9ea85e48..00000000000 --- a/target/nextflow/mapping/bd_rhapsody/rhapsody_wta_1.10.1_nodocker.cwl +++ /dev/null @@ -1,5204 +0,0 @@ -#!/usr/bin/env cwl-runner -{ - "cwlVersion": "v1.0", - "$graph": [ - { - "inputs": [ - { - "inputBinding": { - "prefix": "--annot-r1", - "itemSeparator": "," - }, - "type": { - "items": "File", - "type": "array" - }, - "id": "#AddtoBam.cwl/Annotation_R1" - }, - { - "inputBinding": { - "prefix": "--cell-order" - }, - "type": "File", - "id": "#AddtoBam.cwl/Cell_Order" - }, - { - "inputBinding": { - "prefix": "--annot-mol-file" - }, - "type": "File", - "id": "#AddtoBam.cwl/Molecular_Annotation" - }, - { - "inputBinding": { - "prefix": "--r2-bam" - }, - "type": "File", - "id": "#AddtoBam.cwl/R2_Bam" - }, - { - "inputBinding": { - "prefix": "--run-metadata" - }, - "type": "File", - "id": "#AddtoBam.cwl/Run_Metadata" - }, - { - "inputBinding": { - "prefix": "--tag-calls" - }, - "type": [ - "null", - "File" - ], - "id": "#AddtoBam.cwl/Tag_Calls" - }, - { - "inputBinding": { - "prefix": "--target-gene-mapping" - }, - "type": [ - "null", - "File" - ], - "id": "#AddtoBam.cwl/Target_Gene_Mapping" - } - ], - "requirements": [ - - ], - "outputs": [ - { - "outputBinding": { - "glob": "Annotated_mapping_R2.BAM" - }, - "type": "File", - "id": "#AddtoBam.cwl/Annotated_Bam" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#AddtoBam.cwl/output" - } - ], - "baseCommand": [ - "mist_add_to_bam.py" - ], - "class": "CommandLineTool", - "id": "#AddtoBam.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "prefix": "--extra-seqs" - }, - "type": [ - "null", - "File" - ], - "id": "#AlignR2.cwl/Extra_Seqs" - }, - { - "inputBinding": { - "prefix": "--index" - }, - "type": "File", - "id": "#AlignR2.cwl/Index" - }, - { - "inputBinding": { - "prefix": "--r2-fastqs", - "itemSeparator": "," - }, - "type": { - "items": "File", - "type": "array" - }, - "id": "#AlignR2.cwl/R2" - }, - { - "inputBinding": { - "prefix": "--run-metadata" - }, - "type": "File", - "id": "#AlignR2.cwl/Run_Metadata" - } - ], - "requirements": [ - - { - "class": "InlineJavascriptRequirement" - }, - { - "envDef": [ - { - "envName": "CORES_ALLOCATED_PER_CWL_PROCESS", - "envValue": "$(String(runtime.cores))" - } - ], - "class": "EnvVarRequirement" - } - ], - "outputs": [ - { - "outputBinding": { - "glob": "*zip" - }, - "type": { - "items": "File", - "type": "array" - }, - "id": "#AlignR2.cwl/Alignments" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#AlignR2.cwl/output" - } - ], - "baseCommand": [ - "mist_align_R2.py" - ], - "class": "CommandLineTool", - "id": "#AlignR2.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "prefix": "--umi-option" - }, - "type": [ - "null", - "int" - ], - "id": "#AnnotateMolecules.cwl/AbSeq_UMI" - }, - { - "inputBinding": { - "prefix": "--run-metadata" - }, - "type": "File", - "id": "#AnnotateMolecules.cwl/Run_Metadata" - }, - { - "inputBinding": { - "prefix": "--use-dbec" - }, - "type": [ - "null", - "boolean" - ], - "id": "#AnnotateMolecules.cwl/Use_DBEC" - }, - { - "inputBinding": { - "prefix": "--valid-annot" - }, - "type": "File", - "id": "#AnnotateMolecules.cwl/Valids" - } - ], - "requirements": [ - - ], - "outputs": [ - { - "outputBinding": { - "glob": "*_GeneStatus.csv.*" - }, - "type": "File", - "id": "#AnnotateMolecules.cwl/Gene_Status_List" - }, - { - "outputBinding": { - "glob": "stats.json", - "loadContents": true, - "outputEval": "$(JSON.parse(self[0].contents).max_count)\n" - }, - "type": "int", - "id": "#AnnotateMolecules.cwl/Max_Count" - }, - { - "outputBinding": { - "glob": "*_Annotation_Molecule.csv.*" - }, - "type": "File", - "id": "#AnnotateMolecules.cwl/Mol_Annot_List" - }, - { - "outputBinding": { - "glob": "stats.json", - "loadContents": true, - "outputEval": "$(JSON.parse(self[0].contents).total_molecules)\n" - }, - "type": "int", - "id": "#AnnotateMolecules.cwl/Total_Molecules" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#AnnotateMolecules.cwl/output" - } - ], - "baseCommand": [ - "mist_annotate_molecules.py" - ], - "class": "CommandLineTool", - "id": "#AnnotateMolecules.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "prefix": "--filter-metrics", - "itemSeparator": "," - }, - "type": [ - { - "items": [ - "null", - "File" - ], - "type": "array" - } - ], - "id": "#AnnotateR1.cwl/Filter_Metrics" - }, - { - "inputBinding": { - "prefix": "--R1" - }, - "type": "File", - "id": "#AnnotateR1.cwl/R1" - }, - { - "inputBinding": { - "prefix": "--run-metadata" - }, - "type": "File", - "id": "#AnnotateR1.cwl/Run_Metadata" - } - ], - "requirements": [ - - { - "ramMin": 2000, - "class": "ResourceRequirement" - } - ], - "outputs": [ - { - "outputBinding": { - "glob": "*_Annotation_R1.csv.gz" - }, - "type": "File", - "id": "#AnnotateR1.cwl/Annotation_R1" - }, - { - "outputBinding": { - "glob": "*_R1_error_count_table.npy" - }, - "type": "File", - "id": "#AnnotateR1.cwl/R1_error_count_table" - }, - { - "outputBinding": { - "glob": "*_R1_read_count_breakdown.json" - }, - "type": "File", - "id": "#AnnotateR1.cwl/R1_read_count_breakdown" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#AnnotateR1.cwl/output" - } - ], - "baseCommand": [ - "mist_annotate_R1.py" - ], - "class": "CommandLineTool", - "id": "#AnnotateR1.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "prefix": "--extra-seqs" - }, - "type": [ - "null", - "File" - ], - "id": "#AnnotateR2.cwl/Extra_Seqs" - }, - { - "inputBinding": { - "prefix": "--gtf" - }, - "type": [ - "null", - "File" - ], - "id": "#AnnotateR2.cwl/GTF_Annotation" - }, - { - "inputBinding": { - "prefix": "--R2-zip" - }, - "type": "File", - "id": "#AnnotateR2.cwl/R2_zip" - }, - { - "inputBinding": { - "prefix": "--run-metadata" - }, - "type": "File", - "id": "#AnnotateR2.cwl/Run_Metadata" - }, - { - "inputBinding": { - "prefix": "--transcript-length" - }, - "type": [ - "null", - "File" - ], - "id": "#AnnotateR2.cwl/Transcript_Length" - } - ], - "requirements": [ - - { - "class": "InlineJavascriptRequirement" - } - ], - "outputs": [ - { - "outputBinding": { - "glob": "*Annotation_R2.csv.gz" - }, - "type": "File", - "id": "#AnnotateR2.cwl/Annot_R2" - }, - { - "outputBinding": { - "glob": "*-annot.gtf" - }, - "type": [ - "null", - "File" - ], - "id": "#AnnotateR2.cwl/GTF" - }, - { - "outputBinding": { - "glob": "*mapping_R2.BAM" - }, - "type": "File", - "id": "#AnnotateR2.cwl/R2_Bam" - }, - { - "outputBinding": { - "glob": "*_picard_quality_metrics.csv.gz" - }, - "type": "File", - "id": "#AnnotateR2.cwl/R2_Quality_Metrics" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#AnnotateR2.cwl/output" - } - ], - "baseCommand": [ - "mist_annotate_R2.py" - ], - "class": "CommandLineTool", - "id": "#AnnotateR2.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "prefix": "--umi-option" - }, - "type": [ - "null", - "int" - ], - "id": "#AnnotateReads.cwl/AbSeq_UMI" - }, - { - "inputBinding": { - "prefix": "--extra-seqs", - "itemSeparator": "," - }, - "type": [ - "null", - "File" - ], - "id": "#AnnotateReads.cwl/Extra_Seqs" - }, - { - "type": { - "items": [ - "null", - "File" - ], - "type": "array" - }, - "id": "#AnnotateReads.cwl/Filter_Metrics" - }, - { - "inputBinding": { - "prefix": "--putative-cell-call" - }, - "type": [ - "null", - "int" - ], - "id": "#AnnotateReads.cwl/Putative_Cell_Call" - }, - { - "type": { - "items": "File", - "type": "array" - }, - "id": "#AnnotateReads.cwl/R1_Annotation" - }, - { - "type": { - "items": "File", - "type": "array" - }, - "id": "#AnnotateReads.cwl/R1_error_count_table" - }, - { - "type": { - "items": "File", - "type": "array" - }, - "id": "#AnnotateReads.cwl/R1_read_count_breakdown" - }, - { - "type": { - "items": "File", - "type": "array" - }, - "id": "#AnnotateReads.cwl/R2_Annotation" - }, - { - "type": { - "items": "File", - "type": "array" - }, - "id": "#AnnotateReads.cwl/R2_Quality_Metrics" - }, - { - "inputBinding": { - "prefix": "--run-metadata" - }, - "type": "File", - "id": "#AnnotateReads.cwl/Run_Metadata" - }, - { - "inputBinding": { - "prefix": "--target-gene-mapping" - }, - "type": [ - "null", - "File" - ], - "id": "#AnnotateReads.cwl/Target_Gene_Mapping" - } - ], - "requirements": [ - - { - "class": "InitialWorkDirRequirement", - "listing": [ - { - "writable": false, - "entry": "${\n function getPaths(inputs, attribute) {\n var fp_arr = []\n for (var i = 0; i < inputs[attribute].length; i++)\n {\n fp_arr.push(inputs[attribute][i].path);\n }\n return fp_arr;\n }\n var paths = {}\n paths['annotR1'] = getPaths(inputs, 'R1_Annotation')\n paths['R1_error_count_table'] = getPaths(inputs, 'R1_error_count_table')\n paths['R1_read_count_breakdown'] = getPaths(inputs, 'R1_read_count_breakdown')\n paths['annotR2'] = getPaths(inputs, 'R2_Annotation')\n paths['r2_quality_metrics_fps'] = getPaths(inputs, 'R2_Quality_Metrics')\n if(inputs.Filter_Metrics[0] != null){\n paths['filtering_stat_files'] = getPaths(inputs, 'Filter_Metrics')\n }\n var paths_json = JSON.stringify(paths);\n return paths_json;\n}", - "entryname": "manifest.json" - } - ] - }, - { - "class": "InlineJavascriptRequirement" - }, - { - "envDef": [ - { - "envName": "CORES_ALLOCATED_PER_CWL_PROCESS", - "envValue": "4" - } - ], - "class": "EnvVarRequirement" - } - ], - "outputs": [ - { - "outputBinding": { - "glob": "*_Annotation_Read.csv.gz" - }, - "type": [ - "null", - "File" - ], - "id": "#AnnotateReads.cwl/Annotation_Read" - }, - { - "outputBinding": { - "glob": "*read1_error_rate_archive*" - }, - "type": "File", - "id": "#AnnotateReads.cwl/Read1_error_rate" - }, - { - "outputBinding": { - "glob": "*_SeqMetrics.csv.gz" - }, - "type": "File", - "id": "#AnnotateReads.cwl/Seq_Metrics" - }, - { - "outputBinding": { - "glob": "*Sorted_Valid_Reads.csv.*" - }, - "type": { - "items": "File", - "type": "array" - }, - "id": "#AnnotateReads.cwl/Valid_Reads" - }, - { - "outputBinding": { - "glob": "num_vdj_reads.json", - "loadContents": true, - "outputEval": "${ if (!self[0]) { return 0; } return parseInt(JSON.parse(self[0].contents).BCR); }" - }, - "type": "int", - "id": "#AnnotateReads.cwl/num_valid_ig_reads" - }, - { - "outputBinding": { - "glob": "num_vdj_reads.json", - "loadContents": true, - "outputEval": "${ if (!self[0]) { return 0; } return parseInt(JSON.parse(self[0].contents).TCR); }" - }, - "type": "int", - "id": "#AnnotateReads.cwl/num_valid_tcr_reads" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#AnnotateReads.cwl/output" - }, - { - "outputBinding": { - "glob": "*_VDJ_IG_Valid_Reads.fastq.gz" - }, - "type": [ - "null", - "File" - ], - "id": "#AnnotateReads.cwl/validIgReads" - }, - { - "outputBinding": { - "glob": "*_VDJ_TCR_Valid_Reads.fastq.gz" - }, - "type": [ - "null", - "File" - ], - "id": "#AnnotateReads.cwl/validTcrReads" - } - ], - "baseCommand": [ - "mist_annotate_reads.py" - ], - "class": "CommandLineTool", - "id": "#AnnotateReads.cwl" - }, - { - "inputs": [ - { - "type": { - "items": "File", - "type": "array" - }, - "id": "#BundleLogs.cwl/log_files" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - }, - { - "class": "MultipleInputFeatureRequirement" - } - ], - "outputs": [ - { - "type": "Directory", - "id": "#BundleLogs.cwl/logs_dir" - } - ], - "class": "ExpressionTool", - "expression": "${\n /* shamelly cribbed from https://gist.github.com/jcxplorer/823878 */\n function uuid() {\n var uuid = \"\", i, random;\n for (i = 0; i < 32; i++) {\n random = Math.random() * 16 | 0;\n if (i == 8 || i == 12 || i == 16 || i == 20) {\n uuid += \"-\";\n }\n uuid += (i == 12 ? 4 : (i == 16 ? (random & 3 | 8) : random)).toString(16);\n }\n return uuid;\n }\n var listing = [];\n for (var i = 0; i < inputs.log_files.length; i++) {\n var log_file = inputs.log_files[i];\n log_file.basename = uuid() + \"-\" + log_file.basename;\n listing.push(log_file);\n }\n return ({\n logs_dir: {\n class: \"Directory\",\n basename: \"Logs\",\n listing: listing\n }\n });\n}", - "id": "#BundleLogs.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "position": 0 - }, - "type": [ - "null", - "File" - ], - "id": "#Cell_Classifier.cwl/molsPerCellMatrix" - } - ], - "requirements": [ - - ], - "outputs": [ - { - "outputBinding": { - "glob": "*cell_type_experimental.csv" - }, - "type": [ - "null", - "File" - ], - "id": "#Cell_Classifier.cwl/cellTypePredictions" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#Cell_Classifier.cwl/log" - } - ], - "baseCommand": [ - "mist_cell_classifier.py" - ], - "class": "CommandLineTool", - "id": "#Cell_Classifier.cwl" - }, - { - "inputs": [ - { - "doc": "The minimum size (megabytes) of a file that should get split into chunks of a size designated in NumRecordsPerSplit\n", - "inputBinding": { - "prefix": "--min-split-size" - }, - "type": [ - "null", - "int" - ], - "id": "#CheckFastqs.cwl/MinChunkSize" - }, - { - "inputBinding": { - "prefix": "--reads", - "itemSeparator": "," - }, - "type": { - "items": "File", - "type": "array" - }, - "id": "#CheckFastqs.cwl/Reads" - }, - { - "inputBinding": { - "prefix": "--subsample" - }, - "type": [ - "null", - "float" - ], - "id": "#CheckFastqs.cwl/Subsample" - }, - { - "inputBinding": { - "prefix": "--subsample-seed" - }, - "type": [ - "null", - "int" - ], - "id": "#CheckFastqs.cwl/Subsample_Seed" - }, - { - "inputBinding": { - "prefix": "--subsample-seed" - }, - "type": [ - "null", - "int" - ], - "id": "#CheckFastqs.cwl/UserInputSubsampleSeed" - } - ], - "requirements": [ - - { - "class": "InlineJavascriptRequirement" - } - ], - "doc": "CheckFastqs does several quality control routines including: (1) ensuring that read pair file names are formatted correctly and contain a read pair mate; (2) disambiguating the \"Subsample Reads\" input and; (3) if not provided, generating a subsampling seed that the downstream instances can use.\n", - "baseCommand": [ - "mist_check_fastqs.py" - ], - "id": "#CheckFastqs.cwl", - "outputs": [ - { - "outputBinding": { - "glob": "bead_version.json", - "loadContents": true, - "outputEval": "$(JSON.parse(self[0].contents).BeadVersion)\n" - }, - "type": { - "items": { - "fields": [ - { - "type": "string", - "name": "#CheckFastqs.cwl/Bead_Version/Library" - }, - { - "type": "string", - "name": "#CheckFastqs.cwl/Bead_Version/bead_version" - } - ], - "type": "record" - }, - "type": "array" - }, - "id": "#CheckFastqs.cwl/Bead_Version" - }, - { - "outputBinding": { - "glob": "fastq_read_pairs.json", - "loadContents": true, - "outputEval": "$(JSON.parse(self[0].contents).fastq_read_pairs)\n" - }, - "type": { - "items": { - "fields": [ - { - "type": "string", - "name": "#CheckFastqs.cwl/FastqReadPairs/filename" - }, - { - "type": "string", - "name": "#CheckFastqs.cwl/FastqReadPairs/readFlag" - }, - { - "type": "string", - "name": "#CheckFastqs.cwl/FastqReadPairs/readPairId" - }, - { - "type": "string", - "name": "#CheckFastqs.cwl/FastqReadPairs/library" - }, - { - "type": "string", - "name": "#CheckFastqs.cwl/FastqReadPairs/beadVersion" - } - ], - "type": "record" - }, - "type": "array" - }, - "id": "#CheckFastqs.cwl/FastqReadPairs" - }, - { - "outputBinding": { - "glob": "files_to_skip_split_and_subsample.json", - "loadContents": true, - "outputEval": "$(JSON.parse(self[0].contents).files_to_skip_split_and_subsample)\n" - }, - "type": { - "items": "string", - "type": "array" - }, - "id": "#CheckFastqs.cwl/FilesToSkipSplitAndSubsample" - }, - { - "outputBinding": { - "glob": "fastq_read_pairs.json", - "loadContents": true, - "outputEval": "${\n var obj = JSON.parse(self[0].contents);\n var libraries = [];\n var pairs = obj.fastq_read_pairs\n for (var i in pairs){\n if (pairs[i][\"readFlag\"] == \"R1\"){\n if (libraries.indexOf(pairs[i][\"library\"]) == -1){ \n libraries.push(pairs[i][\"library\"]);\n }\n }\n }\n libraries.sort();\n return(libraries.toString())\n}\n" - }, - "type": [ - "null", - "string" - ], - "id": "#CheckFastqs.cwl/Libraries" - }, - { - "outputBinding": { - "outputEval": "${ \n var reads = []; \n var files = inputs.Reads\n for (var i in files){\n reads.push(files[i][\"basename\"]);\n }\n reads.sort();\n return(reads)\n}\n" - }, - "type": [ - "null", - { - "items": "string", - "type": "array" - } - ], - "id": "#CheckFastqs.cwl/ReadsList" - }, - { - "outputBinding": { - "glob": "subsampling_info.json", - "loadContents": true, - "outputEval": "$(JSON.parse(self[0].contents).subsampling_seed)\n" - }, - "type": "int", - "id": "#CheckFastqs.cwl/SubsampleSeed" - }, - { - "outputBinding": { - "glob": "subsampling_info.json", - "loadContents": true, - "outputEval": "$(JSON.parse(self[0].contents).subsampling_ratio)\n" - }, - "type": "float", - "id": "#CheckFastqs.cwl/SubsamplingRatio" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#CheckFastqs.cwl/log" - } - ], - "class": "CommandLineTool" - }, - { - "inputs": [ - { - "inputBinding": { - "prefix": "--abseq-reference", - "itemSeparator": "," - }, - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#CheckReference.cwl/AbSeq_Reference" - }, - { - "inputBinding": { - "prefix": "--putative-cell-call" - }, - "type": [ - "null", - "int" - ], - "id": "#CheckReference.cwl/Putative_Cell_Call" - }, - { - "inputBinding": { - "prefix": "--reference", - "itemSeparator": "," - }, - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#CheckReference.cwl/Reference" - }, - { - "inputBinding": { - "prefix": "--run-metadata" - }, - "type": "File", - "id": "#CheckReference.cwl/Run_Metadata" - }, - { - "inputBinding": { - "prefix": "--supplemental-reference", - "itemSeparator": "," - }, - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#CheckReference.cwl/Supplemental_Reference" - } - ], - "requirements": [ - - { - "class": "InlineJavascriptRequirement" - } - ], - "outputs": [ - { - "outputBinding": { - "glob": "combined_extra_seq.fasta" - }, - "type": [ - "null", - "File" - ], - "id": "#CheckReference.cwl/Extra_Seqs" - }, - { - "outputBinding": { - "glob": "full-gene-list.json" - }, - "type": [ - "null", - "File" - ], - "id": "#CheckReference.cwl/Full_Genes" - }, - { - "outputBinding": { - "glob": "*gtf", - "outputEval": "${\n // get the WTA modified GTF with extra seqs\n if (self.length == 1) {\n return self;\n // there is no modified GTF\n } else if (self.length == 0) {\n // if Reference is null (i.e. AbSeq_Reference only), return no GTF\n if (inputs.Reference === null) {\n return null;\n } else {\n // get the original WTA GTF without extra seqs\n for (var i = 0; i < inputs.Reference.length; i++) {\n if (inputs.Reference[i].basename.toLowerCase().indexOf('gtf') !== -1) {\n return inputs.Reference[i];\n }\n }\n // return no GTF for Targeted\n return null\n }\n }\n}\n" - }, - "type": [ - "null", - "File" - ], - "id": "#CheckReference.cwl/GTF" - }, - { - "outputBinding": { - "glob": "*-annot.*", - "outputEval": "${\n if (self.length == 1) { // Targeted\n return self;\n } else if (self.length == 0){ // WTA without extra seqs or targets\n for (var i = 0; i < inputs.Reference.length; i++) {\n if (inputs.Reference[i].basename.toLowerCase().indexOf('tar.gz') !== -1) {\n return inputs.Reference[i];\n }\n }\n return null\n }\n}\n" - }, - "type": "File", - "id": "#CheckReference.cwl/Index" - }, - { - "outputBinding": { - "glob": "target-gene.json" - }, - "type": [ - "null", - "File" - ], - "id": "#CheckReference.cwl/Target_Gene_Mapping" - }, - { - "outputBinding": { - "glob": "transcript_length.json" - }, - "type": [ - "null", - "File" - ], - "id": "#CheckReference.cwl/Transcript_Length" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#CheckReference.cwl/output" - } - ], - "baseCommand": [ - "mist_check_references.py" - ], - "class": "CommandLineTool", - "id": "#CheckReference.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "prefix": "--cell-order" - }, - "type": "File", - "id": "#DensetoSparse.cwl/Cell_Order" - }, - { - "inputBinding": { - "prefix": "--dense-data-table" - }, - "type": [ - "null", - "File" - ], - "id": "#DensetoSparse.cwl/Dense_Data_Table" - }, - { - "inputBinding": { - "prefix": "--gene-list" - }, - "type": "File", - "id": "#DensetoSparse.cwl/Gene_List" - }, - { - "inputBinding": { - "prefix": "--run-metadata" - }, - "type": "File", - "id": "#DensetoSparse.cwl/Run_Metadata" - } - ], - "requirements": [ - - ], - "outputs": [ - { - "outputBinding": { - "glob": "*.csv.gz" - }, - "type": "File", - "id": "#DensetoSparse.cwl/Data_Tables" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#DensetoSparse.cwl/output" - } - ], - "baseCommand": [ - "mist_dense_to_sparse.py" - ], - "class": "CommandLineTool", - "id": "#DensetoSparse.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "position": 1 - }, - "type": [ - "null", - "File" - ], - "id": "#DensetoSparseFile.cwl/GDT_cell_order" - } - ], - "requirements": [ - - ], - "stdout": "cell_order.json", - "outputs": [ - { - "type": "stdout", - "id": "#DensetoSparseFile.cwl/Cell_Order" - } - ], - "baseCommand": "cat", - "id": "#DensetoSparseFile.cwl", - "class": "CommandLineTool" - }, - { - "inputs": [ - { - "inputBinding": { - "prefix": "--full-gene-list" - }, - "type": [ - "null", - "File" - ], - "id": "#GetDataTable.cwl/Full_Genes" - }, - { - "inputBinding": { - "prefix": "--gene-status", - "itemSeparator": "," - }, - "type": { - "items": "File", - "type": "array" - }, - "id": "#GetDataTable.cwl/Gene_Status_List" - }, - { - "inputBinding": { - "prefix": "--max-count", - "itemSeparator": "," - }, - "type": { - "items": "int", - "type": "array" - }, - "id": "#GetDataTable.cwl/Max_Count" - }, - { - "inputBinding": { - "prefix": "--mol-annot", - "itemSeparator": "," - }, - "type": { - "items": "File", - "type": "array" - }, - "id": "#GetDataTable.cwl/Molecule_Annotation_List" - }, - { - "inputBinding": { - "prefix": "--putative-cell-call" - }, - "type": [ - "null", - "int" - ], - "id": "#GetDataTable.cwl/Putative_Cell_Call" - }, - { - "inputBinding": { - "prefix": "--run-metadata" - }, - "type": "File", - "id": "#GetDataTable.cwl/Run_Metadata" - }, - { - "inputBinding": { - "prefix": "--seq-metrics" - }, - "type": "File", - "id": "#GetDataTable.cwl/Seq_Metrics" - }, - { - "inputBinding": { - "prefix": "--tag-names", - "itemSeparator": "," - }, - "type": [ - "null", - { - "items": "string", - "type": "array" - } - ], - "id": "#GetDataTable.cwl/Tag_Names" - }, - { - "type": { - "items": "int", - "type": "array" - }, - "id": "#GetDataTable.cwl/Total_Molecules" - } - ], - "requirements": [ - { - "ramMin": "${return Math.min(Math.max(parseInt(inputs.Total_Molecules.reduce(function(a, b) { return a + b; }, 0) / 4000), 32000), 768000);}", - "class": "ResourceRequirement" - }, - - { - "class": "InlineJavascriptRequirement" - } - ], - "outputs": [ - { - "outputBinding": { - "glob": "metrics-files.tar.gz" - }, - "type": "File", - "id": "#GetDataTable.cwl/Annot_Files" - }, - { - "outputBinding": { - "glob": "Annotations/*_Bioproduct_Stats.csv" - }, - "type": [ - "null", - "File" - ], - "id": "#GetDataTable.cwl/Bioproduct_Stats" - }, - { - "outputBinding": { - "glob": "Cell_Label_Filtering/*.png" - }, - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#GetDataTable.cwl/Cell_Label_Filter" - }, - { - "outputBinding": { - "glob": "cell_order.json" - }, - "type": "File", - "id": "#GetDataTable.cwl/Cell_Order" - }, - { - "outputBinding": { - "glob": "*_Annotation_Molecule_corrected.csv.gz" - }, - "type": "File", - "id": "#GetDataTable.cwl/Corrected_Molecular_Annotation" - }, - { - "outputBinding": { - "glob": "*PerCell_Dense.csv.gz" - }, - "type": { - "items": "File", - "type": "array" - }, - "id": "#GetDataTable.cwl/Dense_Data_Tables" - }, - { - "outputBinding": { - "glob": "*PerCell_Unfiltered_Dense.csv.gz" - }, - "type": { - "items": "File", - "type": "array" - }, - "id": "#GetDataTable.cwl/Dense_Data_Tables_Unfiltered" - }, - { - "outputBinding": { - "glob": "*_Expression_Data.st.gz" - }, - "type": "File", - "id": "#GetDataTable.cwl/Expression_Data" - }, - { - "outputBinding": { - "glob": "*_Expression_Data_Unfiltered.st.gz" - }, - "type": [ - "null", - "File" - ], - "id": "#GetDataTable.cwl/Expression_Data_Unfiltered" - }, - { - "outputBinding": { - "glob": "gene_list.json" - }, - "type": "File", - "id": "#GetDataTable.cwl/Gene_List" - }, - { - "outputBinding": { - "glob": "Annotations/*_Annotation_Molecule.csv.gz" - }, - "type": "File", - "id": "#GetDataTable.cwl/Molecular_Annotation" - }, - { - "outputBinding": { - "glob": "Cell_Label_Filtering/*_Protein_Aggregates_Experimental.csv" - }, - "type": [ - "null", - "File" - ], - "id": "#GetDataTable.cwl/Protein_Aggregates_Experimental" - }, - { - "outputBinding": { - "glob": "Cell_Label_Filtering/*_Putative_Cells_Origin.csv" - }, - "type": [ - "null", - "File" - ], - "id": "#GetDataTable.cwl/Putative_Cells_Origin" - }, - { - "outputBinding": { - "glob": "Annotations/*_Annotation_Molecule_Trueno.csv" - }, - "type": [ - "null", - "File" - ], - "id": "#GetDataTable.cwl/Tag_Annotation" - }, - { - "outputBinding": { - "glob": "Trueno/*_Calls.csv" - }, - "type": [ - "null", - "File" - ], - "id": "#GetDataTable.cwl/Tag_Calls" - }, - { - "outputBinding": { - "glob": "Trueno/*csv" - }, - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#GetDataTable.cwl/Trueno_out" - }, - { - "outputBinding": { - "glob": "Trueno/*zip" - }, - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#GetDataTable.cwl/Trueno_zip" - }, - { - "outputBinding": { - "glob": "Annotations/*_UMI_Adjusted_CellLabel_Stats.csv" - }, - "type": [ - "null", - "File" - ], - "id": "#GetDataTable.cwl/UMI_Adjusted_CellLabel_Stats" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#GetDataTable.cwl/output" - } - ], - "baseCommand": [ - "mist_get_datatables.py" - ], - "class": "CommandLineTool", - "id": "#GetDataTable.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "position": 1 - }, - "type": "File", - "id": "#IndexBAM.cwl/BamFile" - } - ], - "requirements": [ - - { - "class": "InlineJavascriptRequirement" - } - ], - "stdout": "samtools_index.log", - "outputs": [ - { - "outputBinding": { - "glob": "*.bai" - }, - "type": "File", - "id": "#IndexBAM.cwl/Index" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#IndexBAM.cwl/log" - } - ], - "baseCommand": [ - "samtools", - "index" - ], - "id": "#IndexBAM.cwl", - "arguments": [ - { - "position": 2, - "valueFrom": "${\n return inputs.BamFile.basename + \".bai\"\n}" - } - ], - "class": "CommandLineTool" - }, - { - "inputs": [], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "outputs": [ - { - "type": [ - "null", - "int" - ], - "id": "#InternalSettings.cwl/AbSeq_UMI" - }, - { - "type": [ - "null", - "int" - ], - "id": "#InternalSettings.cwl/Barcode_Num" - }, - { - "type": [ - "null", - "File" - ], - "id": "#InternalSettings.cwl/Extra_Seqs" - }, - { - "type": [ - "null", - "int" - ], - "id": "#InternalSettings.cwl/Label_Version" - }, - { - "type": [ - "null", - "int" - ], - "id": "#InternalSettings.cwl/MinChunkSize" - }, - { - "type": [ - "null", - "long" - ], - "id": "#InternalSettings.cwl/NumRecordsPerSplit" - }, - { - "type": [ - "null", - "boolean" - ], - "id": "#InternalSettings.cwl/Read_Filter_Off" - }, - { - "type": [ - "null", - "string" - ], - "id": "#InternalSettings.cwl/Seq_Run" - }, - { - "type": [ - "null", - "float" - ], - "id": "#InternalSettings.cwl/Subsample_Tags" - }, - { - "type": [ - "null", - "boolean" - ], - "id": "#InternalSettings.cwl/Target_analysis" - }, - { - "type": [ - "null", - "boolean" - ], - "id": "#InternalSettings.cwl/Use_DBEC" - }, - { - "type": [ - "null", - "float" - ], - "id": "#InternalSettings.cwl/VDJ_JGene_Evalue" - }, - { - "type": [ - "null", - "float" - ], - "id": "#InternalSettings.cwl/VDJ_VGene_Evalue" - } - ], - "class": "ExpressionTool", - "expression": "${\n var internalInputs = [\n '_Label_Version',\n '_Read_Filter_Off',\n '_Barcode_Num',\n '_Seq_Run',\n '_AbSeq_UMI',\n '_Use_DBEC',\n '_Extra_Seqs',\n '_MinChunkSize',\n '_NumRecordsPerSplit',\n '_Target_analysis',\n '_Subsample_Tags',\n '_VDJ_VGene_Evalue',\n '_VDJ_JGene_Evalue',\n ];\n var internalOutputs = {}\n for (var i = 0; i < internalInputs.length; i++) {\n var internalInput = internalInputs[i];\n var internalOutput = internalInput.slice(1); // remove leading underscore\n if (inputs.hasOwnProperty(internalInput)) {\n internalOutputs[internalOutput] = inputs[internalInput]; // if input specified, redirect to output\n } else {\n internalOutputs[internalOutput] = null; // if input not specified, provide a null\n }\n }\n return internalOutputs;\n}", - "id": "#InternalSettings.cwl" - }, - { - "inputs": [ - { - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#main/AbSeq_Reference", - "label": "AbSeq Reference" - }, - { - "doc": "Determine putative cells using only the basic algorithm (minimum second derivative along the cumulative reads curve). The refined algorithm attempts to remove false positives and recover false negatives, but may not be ideal for certain complex mixtures of cell types. Does not apply if Exact Cell Count is set.", - "type": [ - "null", - "boolean" - ], - "id": "#main/Basic_Algo_Only", - "label": "Disable Refined Putative Cell Calling" - }, - { - "doc": "Set a specific number (>=1) of cells as putative, based on those with the highest error-corrected read count", - "type": [ - "null", - "int" - ], - "id": "#main/Exact_Cell_Count", - "label": "Exact Cell Count" - }, - { - "doc": "Specify the data to be used for putative cell calling. mRNA is the default selected option. AbSeq (Experimental) is for troubleshooting only.", - "type": [ - "null", - { - "symbols": [ - "#main/Putative_Cell_Call/Putative_Cell_Call/mRNA", - "#main/Putative_Cell_Call/Putative_Cell_Call/AbSeq_Experimental" - ], - "type": "enum", - "name": "#main/Putative_Cell_Call/Putative_Cell_Call" - } - ], - "id": "#main/Putative_Cell_Call", - "label": "Putative Cell Calling" - }, - { - "type": { - "items": "File", - "type": "array" - }, - "id": "#main/Reads", - "label": "Reads" - }, - { - "type": "File", - "id": "#main/Reference_Genome", - "label": "Reference Genome" - }, - { - "doc": "This is a name for output files, for example Experiment1_Metrics_Summary.csv. Default if left empty is to name run based on a library. Any non-alpha numeric characters will be changed to a hyphen.", - "type": [ - "null", - "string" - ], - "id": "#main/Run_Name", - "label": "Run Name" - }, - { - "doc": "The sample multiplexing kit version. This option should only be set for a multiplexed experiment.", - "type": [ - "null", - { - "symbols": [ - "#main/Sample_Tags_Version/Sample_Tags_Version/human", - "#main/Sample_Tags_Version/Sample_Tags_Version/hs", - "#main/Sample_Tags_Version/Sample_Tags_Version/mouse", - "#main/Sample_Tags_Version/Sample_Tags_Version/mm", - "#main/Sample_Tags_Version/Sample_Tags_Version/custom" - ], - "type": "enum", - "name": "#main/Sample_Tags_Version/Sample_Tags_Version" - } - ], - "id": "#main/Sample_Tags_Version", - "label": "Sample Tags Version" - }, - { - "doc": "Any number of reads >1 or a fraction between 0 < n < 1 to indicate the percentage of reads to subsample.\n", - "type": [ - "null", - "float" - ], - "id": "#main/Subsample", - "label": "Subsample Reads" - }, - { - "doc": "For use when replicating a previous subsampling run only. Obtain the seed generated from the log file for the SplitFastQ node.\n", - "type": [ - "null", - "int" - ], - "id": "#main/Subsample_seed", - "label": "Subsample Seed" - }, - { - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#main/Supplemental_Reference", - "label": "Supplemental Reference" - }, - { - "doc": "Specify the Sample Tag number followed by - (hyphen) and a sample name to appear in the output files. For example: 4-Ramos. Should be alpha numeric, with + - and _ allowed. Any special characters: &, (), [], {}, <>, ?, | will be corrected to underscores. \n", - "type": [ - "null", - { - "items": "string", - "type": "array" - } - ], - "id": "#main/Tag_Names", - "label": "Tag Names" - }, - { - "type": "File", - "id": "#main/Transcriptome_Annotation", - "label": "Transcriptome Annotation" - }, - { - "doc": "The VDJ species and chain types. This option should only be set for VDJ experiment.", - "type": [ - "null", - { - "symbols": [ - "#main/VDJ_Version/VDJ_Version/human", - "#main/VDJ_Version/VDJ_Version/hs", - "#main/VDJ_Version/VDJ_Version/mouse", - "#main/VDJ_Version/VDJ_Version/mm", - "#main/VDJ_Version/VDJ_Version/humanBCR", - "#main/VDJ_Version/VDJ_Version/humanTCR", - "#main/VDJ_Version/VDJ_Version/mouseBCR", - "#main/VDJ_Version/VDJ_Version/mouseTCR" - ], - "type": "enum", - "name": "#main/VDJ_Version/VDJ_Version" - } - ], - "id": "#main/VDJ_Version", - "label": "VDJ Species Version" - } - ], - "requirements": [ - { - "class": "ScatterFeatureRequirement" - }, - { - "class": "MultipleInputFeatureRequirement" - }, - { - "class": "SubworkflowFeatureRequirement" - }, - { - "class": "StepInputExpressionRequirement" - }, - { - "class": "InlineJavascriptRequirement" - } - ], - "doc": "The BD Rhapsody\u2122 WTA Analysis Pipeline is used to create sequencing libraries from single cell transcriptomes without having to specify a targeted panel.\n\nAfter sequencing, the analysis pipeline takes the FASTQ files, a reference genome file and a transcriptome annotation file for gene alignment. The pipeline generates molecular counts per cell, read counts per cell, metrics, and an alignment file.", - "label": "BD Rhapsody\u2122 WTA Analysis Pipeline", - "steps": [ - { - "run": "#AddtoBam.cwl", - "scatter": [ - "#main/AddtoBam/R2_Bam" - ], - "in": [ - { - "source": "#main/AnnotateR1/Annotation_R1", - "id": "#main/AddtoBam/Annotation_R1" - }, - { - "source": "#main/Dense_to_Sparse_File/Cell_Order", - "id": "#main/AddtoBam/Cell_Order" - }, - { - "source": "#main/GetDataTable/Corrected_Molecular_Annotation", - "id": "#main/AddtoBam/Molecular_Annotation" - }, - { - "source": "#main/AnnotateR2/R2_Bam", - "id": "#main/AddtoBam/R2_Bam" - }, - { - "source": "#main/Metadata_Settings/Run_Metadata", - "id": "#main/AddtoBam/Run_Metadata" - }, - { - "source": "#main/GetDataTable/Tag_Calls", - "id": "#main/AddtoBam/Tag_Calls" - }, - { - "source": "#main/CheckReference/Target_Gene_Mapping", - "id": "#main/AddtoBam/Target_Gene_Mapping" - } - ], - "requirements": [ - { - "ramMin": 16000, - "class": "ResourceRequirement" - } - ], - "id": "#main/AddtoBam", - "out": [ - "#main/AddtoBam/Annotated_Bam", - "#main/AddtoBam/output" - ] - }, - { - "run": "#AlignR2.cwl", - "out": [ - "#main/AlignR2/Alignments", - "#main/AlignR2/output" - ], - "requirements": [ - { - "coresMin": 8, - "ramMin": 48000, - "class": "ResourceRequirement" - } - ], - "id": "#main/AlignR2", - "in": [ - { - "source": "#main/CheckReference/Extra_Seqs", - "id": "#main/AlignR2/Extra_Seqs" - }, - { - "source": "#main/CheckReference/Index", - "id": "#main/AlignR2/Index" - }, - { - "source": "#main/QualityFilterOuter/R2", - "id": "#main/AlignR2/R2" - }, - { - "source": "#main/Metadata_Settings/Run_Metadata", - "id": "#main/AlignR2/Run_Metadata" - } - ] - }, - { - "run": "#AnnotateMolecules.cwl", - "scatter": [ - "#main/AnnotateMolecules/Valids" - ], - "in": [ - { - "source": "#main/Internal_Settings/AbSeq_UMI", - "id": "#main/AnnotateMolecules/AbSeq_UMI" - }, - { - "source": "#main/Metadata_Settings/Run_Metadata", - "id": "#main/AnnotateMolecules/Run_Metadata" - }, - { - "source": "#main/Internal_Settings/Use_DBEC", - "id": "#main/AnnotateMolecules/Use_DBEC" - }, - { - "source": "#main/AnnotateReads/Valid_Reads", - "id": "#main/AnnotateMolecules/Valids" - } - ], - "requirements": [ - { - "ramMin": 32000, - "class": "ResourceRequirement" - } - ], - "id": "#main/AnnotateMolecules", - "out": [ - "#main/AnnotateMolecules/Mol_Annot_List", - "#main/AnnotateMolecules/Gene_Status_List", - "#main/AnnotateMolecules/Max_Count", - "#main/AnnotateMolecules/Total_Molecules", - "#main/AnnotateMolecules/output" - ] - }, - { - "id": "#main/AnnotateR1", - "out": [ - "#main/AnnotateR1/Annotation_R1", - "#main/AnnotateR1/R1_error_count_table", - "#main/AnnotateR1/R1_read_count_breakdown", - "#main/AnnotateR1/output" - ], - "run": "#AnnotateR1.cwl", - "scatter": [ - "#main/AnnotateR1/R1" - ], - "in": [ - { - "source": "#main/QualityFilterOuter/Filter_Metrics", - "id": "#main/AnnotateR1/Filter_Metrics" - }, - { - "source": "#main/QualityFilterOuter/R1", - "id": "#main/AnnotateR1/R1" - }, - { - "source": "#main/Metadata_Settings/Run_Metadata", - "id": "#main/AnnotateR1/Run_Metadata" - } - ] - }, - { - "run": "#AnnotateR2.cwl", - "scatter": [ - "#main/AnnotateR2/R2_zip" - ], - "in": [ - { - "source": "#main/CheckReference/Extra_Seqs", - "id": "#main/AnnotateR2/Extra_Seqs" - }, - { - "source": "#main/CheckReference/GTF", - "id": "#main/AnnotateR2/GTF_Annotation" - }, - { - "source": "#main/AlignR2/Alignments", - "id": "#main/AnnotateR2/R2_zip" - }, - { - "source": "#main/Metadata_Settings/Run_Metadata", - "id": "#main/AnnotateR2/Run_Metadata" - }, - { - "source": "#main/CheckReference/Transcript_Length", - "id": "#main/AnnotateR2/Transcript_Length" - } - ], - "requirements": [ - { - "ramMin": 10000, - "class": "ResourceRequirement" - } - ], - "id": "#main/AnnotateR2", - "out": [ - "#main/AnnotateR2/Annot_R2", - "#main/AnnotateR2/R2_Bam", - "#main/AnnotateR2/GTF", - "#main/AnnotateR2/output", - "#main/AnnotateR2/R2_Quality_Metrics" - ] - }, - { - "run": "#AnnotateReads.cwl", - "out": [ - "#main/AnnotateReads/Seq_Metrics", - "#main/AnnotateReads/Valid_Reads", - "#main/AnnotateReads/Read1_error_rate", - "#main/AnnotateReads/Annotation_Read", - "#main/AnnotateReads/output", - "#main/AnnotateReads/validTcrReads", - "#main/AnnotateReads/validIgReads", - "#main/AnnotateReads/num_valid_tcr_reads", - "#main/AnnotateReads/num_valid_ig_reads" - ], - "requirements": [ - { - "ramMin": 32000, - "class": "ResourceRequirement" - } - ], - "id": "#main/AnnotateReads", - "in": [ - { - "source": "#main/Internal_Settings/AbSeq_UMI", - "id": "#main/AnnotateReads/AbSeq_UMI" - }, - { - "source": "#main/CheckReference/Extra_Seqs", - "id": "#main/AnnotateReads/Extra_Seqs" - }, - { - "source": "#main/QualityFilterOuter/Filter_Metrics", - "id": "#main/AnnotateReads/Filter_Metrics" - }, - { - "source": "#main/Putative_Cell_Calling_Settings/Putative_Cell_Call", - "id": "#main/AnnotateReads/Putative_Cell_Call" - }, - { - "source": "#main/AnnotateR1/Annotation_R1", - "id": "#main/AnnotateReads/R1_Annotation" - }, - { - "source": "#main/AnnotateR1/R1_error_count_table", - "id": "#main/AnnotateReads/R1_error_count_table" - }, - { - "source": "#main/AnnotateR1/R1_read_count_breakdown", - "id": "#main/AnnotateReads/R1_read_count_breakdown" - }, - { - "source": "#main/AnnotateR2/Annot_R2", - "id": "#main/AnnotateReads/R2_Annotation" - }, - { - "source": "#main/AnnotateR2/R2_Quality_Metrics", - "id": "#main/AnnotateReads/R2_Quality_Metrics" - }, - { - "source": "#main/Metadata_Settings/Run_Metadata", - "id": "#main/AnnotateReads/Run_Metadata" - }, - { - "source": "#main/CheckReference/Target_Gene_Mapping", - "id": "#main/AnnotateReads/Target_Gene_Mapping" - } - ] - }, - { - "out": [ - "#main/BundleLogs/logs_dir" - ], - "run": "#BundleLogs.cwl", - "id": "#main/BundleLogs", - "in": [ - { - "source": [ - "#main/AnnotateReads/output", - "#main/AnnotateR1/output", - "#main/AnnotateR2/output", - "#main/CheckReference/output", - "#main/GetDataTable/output", - "#main/Metrics/output", - "#main/AddtoBam/output", - "#main/AnnotateMolecules/output", - "#main/QualityFilterOuter/output", - "#main/CheckFastqs/log", - "#main/SplitAndSubsample/log", - "#main/MergeBAM/log", - "#main/Dense_to_Sparse_Datatable/output", - "#main/Dense_to_Sparse_Datatable_Unfiltered/output", - "#main/IndexBAM/log", - "#main/CellClassifier/log" - ], - "linkMerge": "merge_flattened", - "id": "#main/BundleLogs/log_files" - } - ] - }, - { - "run": "#Cell_Classifier.cwl", - "out": [ - "#main/CellClassifier/cellTypePredictions", - "#main/CellClassifier/log" - ], - "requirements": [ - { - "ramMin": 4000, - "class": "ResourceRequirement" - } - ], - "id": "#main/CellClassifier", - "in": [ - { - "source": "#main/FindDataTableForCellClassifier/molsPerCellMatrixForCellClassifier", - "id": "#main/CellClassifier/molsPerCellMatrix" - } - ] - }, - { - "out": [ - "#main/CheckFastqs/SubsampleSeed", - "#main/CheckFastqs/SubsamplingRatio", - "#main/CheckFastqs/FilesToSkipSplitAndSubsample", - "#main/CheckFastqs/FastqReadPairs", - "#main/CheckFastqs/Bead_Version", - "#main/CheckFastqs/Libraries", - "#main/CheckFastqs/ReadsList", - "#main/CheckFastqs/log" - ], - "run": "#CheckFastqs.cwl", - "id": "#main/CheckFastqs", - "in": [ - { - "source": "#main/Internal_Settings/MinChunkSize", - "id": "#main/CheckFastqs/MinChunkSize" - }, - { - "source": "#main/Reads", - "id": "#main/CheckFastqs/Reads" - }, - { - "source": "#main/Subsample_Settings/Subsample_Reads", - "id": "#main/CheckFastqs/Subsample" - }, - { - "source": "#main/Subsample_Settings/Subsample_Seed", - "id": "#main/CheckFastqs/Subsample_Seed" - } - ] - }, - { - "run": "#CheckReference.cwl", - "out": [ - "#main/CheckReference/Index", - "#main/CheckReference/Extra_Seqs", - "#main/CheckReference/Full_Genes", - "#main/CheckReference/output", - "#main/CheckReference/Transcript_Length", - "#main/CheckReference/GTF", - "#main/CheckReference/Target_Gene_Mapping" - ], - "requirements": [ - { - "ramMin": 10000, - "class": "ResourceRequirement" - } - ], - "id": "#main/CheckReference", - "in": [ - { - "source": "#main/AbSeq_Reference", - "id": "#main/CheckReference/AbSeq_Reference" - }, - { - "source": "#main/Putative_Cell_Calling_Settings/Putative_Cell_Call", - "id": "#main/CheckReference/Putative_Cell_Call" - }, - { - "source": [ - "#main/Transcriptome_Annotation", - "#main/Reference_Genome" - ], - "id": "#main/CheckReference/Reference" - }, - { - "source": "#main/Metadata_Settings/Run_Metadata", - "id": "#main/CheckReference/Run_Metadata" - }, - { - "source": "#main/Supplemental_Reference", - "id": "#main/CheckReference/Supplemental_Reference" - } - ] - }, - { - "run": "#DensetoSparse.cwl", - "scatter": [ - "#main/Dense_to_Sparse_Datatable/Dense_Data_Table" - ], - "in": [ - { - "source": "#main/Dense_to_Sparse_File/Cell_Order", - "id": "#main/Dense_to_Sparse_Datatable/Cell_Order" - }, - { - "source": "#main/GetDataTable/Dense_Data_Tables", - "id": "#main/Dense_to_Sparse_Datatable/Dense_Data_Table" - }, - { - "source": "#main/GetDataTable/Gene_List", - "id": "#main/Dense_to_Sparse_Datatable/Gene_List" - }, - { - "source": "#main/Metadata_Settings/Run_Metadata", - "id": "#main/Dense_to_Sparse_Datatable/Run_Metadata" - } - ], - "requirements": [ - { - "ramMin": 16000, - "class": "ResourceRequirement" - } - ], - "id": "#main/Dense_to_Sparse_Datatable", - "out": [ - "#main/Dense_to_Sparse_Datatable/Data_Tables", - "#main/Dense_to_Sparse_Datatable/output" - ] - }, - { - "run": "#DensetoSparse.cwl", - "scatter": [ - "#main/Dense_to_Sparse_Datatable_Unfiltered/Dense_Data_Table" - ], - "in": [ - { - "source": "#main/GetDataTable/Cell_Order", - "id": "#main/Dense_to_Sparse_Datatable_Unfiltered/Cell_Order" - }, - { - "source": "#main/GetDataTable/Dense_Data_Tables_Unfiltered", - "id": "#main/Dense_to_Sparse_Datatable_Unfiltered/Dense_Data_Table" - }, - { - "source": "#main/GetDataTable/Gene_List", - "id": "#main/Dense_to_Sparse_Datatable_Unfiltered/Gene_List" - }, - { - "source": "#main/Metadata_Settings/Run_Metadata", - "id": "#main/Dense_to_Sparse_Datatable_Unfiltered/Run_Metadata" - } - ], - "requirements": [ - { - "ramMin": 16000, - "class": "ResourceRequirement" - } - ], - "id": "#main/Dense_to_Sparse_Datatable_Unfiltered", - "out": [ - "#main/Dense_to_Sparse_Datatable_Unfiltered/Data_Tables", - "#main/Dense_to_Sparse_Datatable_Unfiltered/output" - ] - }, - { - "out": [ - "#main/Dense_to_Sparse_File/Cell_Order" - ], - "run": "#DensetoSparseFile.cwl", - "id": "#main/Dense_to_Sparse_File", - "in": [ - { - "source": "#main/GetDataTable/Cell_Order", - "id": "#main/Dense_to_Sparse_File/GDT_cell_order" - } - ] - }, - { - "out": [ - "#main/FindDataTableForCellClassifier/molsPerCellMatrixForCellClassifier" - ], - "run": { - "cwlVersion": "v1.0", - "inputs": [ - { - "type": { - "items": "File", - "type": "array" - }, - "id": "#main/FindDataTableForCellClassifier/e13a85b9-73df-4ed0-9386-c8c9ca3b47f0/dataTables" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "outputs": [ - { - "type": "File", - "id": "#main/FindDataTableForCellClassifier/e13a85b9-73df-4ed0-9386-c8c9ca3b47f0/molsPerCellMatrixForCellClassifier" - } - ], - "id": "#main/FindDataTableForCellClassifier/e13a85b9-73df-4ed0-9386-c8c9ca3b47f0", - "expression": "${\n for (var i = 0; i < inputs.dataTables.length; i++) {\n var dataTable = inputs.dataTables[i];\n if (dataTable.basename.indexOf(\"_RSEC_MolsPerCell.csv\") >= 0) {\n return({molsPerCellMatrixForCellClassifier: dataTable});\n }\n }\n return({molsPerCellMatrixForCellClassifier: null});\n}", - "class": "ExpressionTool" - }, - "id": "#main/FindDataTableForCellClassifier", - "in": [ - { - "source": "#main/Dense_to_Sparse_Datatable/Data_Tables", - "id": "#main/FindDataTableForCellClassifier/dataTables" - } - ] - }, - { - "out": [ - "#main/GetDataTable/Tag_Calls", - "#main/GetDataTable/Molecular_Annotation", - "#main/GetDataTable/Corrected_Molecular_Annotation", - "#main/GetDataTable/Tag_Annotation", - "#main/GetDataTable/Annot_Files", - "#main/GetDataTable/Cell_Label_Filter", - "#main/GetDataTable/Dense_Data_Tables", - "#main/GetDataTable/Dense_Data_Tables_Unfiltered", - "#main/GetDataTable/Expression_Data", - "#main/GetDataTable/Expression_Data_Unfiltered", - "#main/GetDataTable/Bioproduct_Stats", - "#main/GetDataTable/UMI_Adjusted_CellLabel_Stats", - "#main/GetDataTable/Putative_Cells_Origin", - "#main/GetDataTable/Protein_Aggregates_Experimental", - "#main/GetDataTable/Trueno_out", - "#main/GetDataTable/Trueno_zip", - "#main/GetDataTable/output", - "#main/GetDataTable/Cell_Order", - "#main/GetDataTable/Gene_List" - ], - "run": "#GetDataTable.cwl", - "id": "#main/GetDataTable", - "in": [ - { - "source": "#main/CheckReference/Full_Genes", - "id": "#main/GetDataTable/Full_Genes" - }, - { - "source": "#main/AnnotateMolecules/Gene_Status_List", - "id": "#main/GetDataTable/Gene_Status_List" - }, - { - "source": "#main/AnnotateMolecules/Max_Count", - "id": "#main/GetDataTable/Max_Count" - }, - { - "source": "#main/AnnotateMolecules/Mol_Annot_List", - "id": "#main/GetDataTable/Molecule_Annotation_List" - }, - { - "source": "#main/Putative_Cell_Calling_Settings/Putative_Cell_Call", - "id": "#main/GetDataTable/Putative_Cell_Call" - }, - { - "source": "#main/Metadata_Settings/Run_Metadata", - "id": "#main/GetDataTable/Run_Metadata" - }, - { - "source": "#main/AnnotateReads/Seq_Metrics", - "id": "#main/GetDataTable/Seq_Metrics" - }, - { - "source": "#main/Multiplexing_Settings/Tag_Sample_Names", - "id": "#main/GetDataTable/Tag_Names" - }, - { - "source": "#main/AnnotateMolecules/Total_Molecules", - "id": "#main/GetDataTable/Total_Molecules" - } - ] - }, - { - "out": [ - "#main/IndexBAM/Index", - "#main/IndexBAM/log" - ], - "run": "#IndexBAM.cwl", - "id": "#main/IndexBAM", - "in": [ - { - "source": "#main/MergeBAM/Final_Bam", - "id": "#main/IndexBAM/BamFile" - } - ] - }, - { - "out": [ - "#main/Internal_Settings/Read_Filter_Off", - "#main/Internal_Settings/Barcode_Num", - "#main/Internal_Settings/Seq_Run", - "#main/Internal_Settings/AbSeq_UMI", - "#main/Internal_Settings/Use_DBEC", - "#main/Internal_Settings/Extra_Seqs", - "#main/Internal_Settings/MinChunkSize", - "#main/Internal_Settings/NumRecordsPerSplit", - "#main/Internal_Settings/Target_analysis", - "#main/Internal_Settings/Subsample_Tags", - "#main/Internal_Settings/VDJ_VGene_Evalue", - "#main/Internal_Settings/VDJ_JGene_Evalue" - ], - "in": [], - "run": "#InternalSettings.cwl", - "id": "#main/Internal_Settings", - "label": "Internal Settings" - }, - { - "out": [ - "#main/MergeBAM/Final_Bam", - "#main/MergeBAM/log" - ], - "run": "#MergeBAM.cwl", - "id": "#main/MergeBAM", - "in": [ - { - "source": "#main/AddtoBam/Annotated_Bam", - "id": "#main/MergeBAM/BamFiles" - }, - { - "source": "#main/Metadata_Settings/Run_Base_Name", - "id": "#main/MergeBAM/Run_Name" - }, - { - "source": "#main/Multiplexing_Settings/Sample_Tags_Version", - "id": "#main/MergeBAM/Sample_Tags_Version" - } - ] - }, - { - "out": [ - "#main/MergeMultiplex/Multiplex_out" - ], - "run": { - "cwlVersion": "v1.0", - "inputs": [ - { - "type": { - "items": [ - "null", - "File" - ], - "type": "array" - }, - "id": "#main/MergeMultiplex/d7de4031-c557-4bec-bdfc-33e9f909e2d7/SampleTag_Files" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "outputs": [ - { - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#main/MergeMultiplex/d7de4031-c557-4bec-bdfc-33e9f909e2d7/Multiplex_out" - } - ], - "id": "#main/MergeMultiplex/d7de4031-c557-4bec-bdfc-33e9f909e2d7", - "expression": "${\n var fp_array = [];\n for (var i = 0; i < inputs.SampleTag_Files.length; i++) {\n var fp = inputs.SampleTag_Files[i];\n if (fp != null) {\n fp_array.push(fp);\n }\n }\n return({\"Multiplex_out\": fp_array});\n}", - "class": "ExpressionTool" - }, - "id": "#main/MergeMultiplex", - "in": [ - { - "source": [ - "#main/GetDataTable/Trueno_out", - "#main/Metrics/Sample_Tag_Out" - ], - "linkMerge": "merge_flattened", - "id": "#main/MergeMultiplex/SampleTag_Files" - } - ] - }, - { - "out": [ - "#main/Metadata_Settings/Run_Metadata", - "#main/Metadata_Settings/Run_Base_Name" - ], - "run": "#Metadata.cwl", - "id": "#main/Metadata_Settings", - "in": [ - { - "source": "#main/AbSeq_Reference", - "id": "#main/Metadata_Settings/AbSeq_Reference" - }, - { - "valueFrom": "WTA", - "id": "#main/Metadata_Settings/Assay" - }, - { - "source": "#main/Putative_Cell_Calling_Settings/Basic_Algo_Only", - "id": "#main/Metadata_Settings/Basic_Algo_Only" - }, - { - "source": "#main/CheckFastqs/Bead_Version", - "id": "#main/Metadata_Settings/Bead_Version" - }, - { - "source": "#main/Putative_Cell_Calling_Settings/Exact_Cell_Count", - "id": "#main/Metadata_Settings/Exact_Cell_Count" - }, - { - "source": "#main/CheckFastqs/Libraries", - "id": "#main/Metadata_Settings/Libraries" - }, - { - "valueFrom": "BD Rhapsody WTA Analysis Pipeline", - "id": "#main/Metadata_Settings/Pipeline_Name" - }, - { - "source": "#main/Version/version", - "id": "#main/Metadata_Settings/Pipeline_Version" - }, - { - "source": "#main/Putative_Cell_Calling_Settings/Putative_Cell_Call", - "id": "#main/Metadata_Settings/Putative_Cell_Call" - }, - { - "source": "#main/CheckFastqs/ReadsList", - "id": "#main/Metadata_Settings/Reads" - }, - { - "source": [ - "#main/Transcriptome_Annotation", - "#main/Reference_Genome" - ], - "id": "#main/Metadata_Settings/Reference" - }, - { - "source": "#main/Name_Settings/Run_Name", - "id": "#main/Metadata_Settings/Run_Name" - }, - { - "source": "#main/Multiplexing_Settings/Tag_Sample_Names", - "id": "#main/Metadata_Settings/Sample_Tag_Names" - }, - { - "source": "#main/Multiplexing_Settings/Sample_Tags_Version", - "id": "#main/Metadata_Settings/Sample_Tags_Version" - }, - { - "source": "#main/Start_Time/Start_Time", - "id": "#main/Metadata_Settings/Start_Time" - }, - { - "source": "#main/Subsample_Settings/Subsample_Reads", - "id": "#main/Metadata_Settings/Subsample" - }, - { - "source": "#main/Subsample_Settings/Subsample_Seed", - "id": "#main/Metadata_Settings/Subsample_Seed" - }, - { - "source": "#main/Supplemental_Reference", - "id": "#main/Metadata_Settings/Supplemental_Reference" - }, - { - "source": "#main/VDJ_Settings/VDJ_Version", - "id": "#main/Metadata_Settings/VDJ_Version" - } - ] - }, - { - "out": [ - "#main/Metrics/Metrics_Summary", - "#main/Metrics/Metrics_Archive", - "#main/Metrics/output", - "#main/Metrics/Sample_Tag_Out" - ], - "run": "#Metrics.cwl", - "id": "#main/Metrics", - "in": [ - { - "source": "#main/GetDataTable/Annot_Files", - "id": "#main/Metrics/Annot_Files" - }, - { - "source": "#main/AnnotateReads/Read1_error_rate", - "id": "#main/Metrics/Read1_error_rate" - }, - { - "source": "#main/Metadata_Settings/Run_Metadata", - "id": "#main/Metrics/Run_Metadata" - }, - { - "source": "#main/GetDataTable/Trueno_zip", - "id": "#main/Metrics/Sample_Tag_Archives" - }, - { - "source": "#main/Internal_Settings/Seq_Run", - "id": "#main/Metrics/Seq_Run" - }, - { - "source": "#main/GetDataTable/UMI_Adjusted_CellLabel_Stats", - "id": "#main/Metrics/UMI_Adjusted_Stats" - }, - { - "source": "#main/VDJ_Compile_Results/vdjMetricsJson", - "id": "#main/Metrics/vdjMetricsJson" - } - ] - }, - { - "out": [ - "#main/Multiplexing_Settings/Tag_Sample_Names", - "#main/Multiplexing_Settings/Sample_Tags_Version" - ], - "in": [ - { - "source": "#main/Sample_Tags_Version", - "id": "#main/Multiplexing_Settings/_Sample_Tags_Version" - }, - { - "source": "#main/Tag_Names", - "id": "#main/Multiplexing_Settings/_Tag_Sample_Names" - } - ], - "run": "#MultiplexingSettings.cwl", - "id": "#main/Multiplexing_Settings", - "label": "Multiplexing Settings" - }, - { - "out": [ - "#main/Name_Settings/Run_Name" - ], - "in": [ - { - "source": "#main/Run_Name", - "id": "#main/Name_Settings/_Run_Name" - } - ], - "run": "#NameSettings.cwl", - "id": "#main/Name_Settings", - "label": "Name Settings" - }, - { - "out": [ - "#main/PairReadFiles/ReadPairs" - ], - "run": "#PairReadFiles.cwl", - "id": "#main/PairReadFiles", - "in": [ - { - "source": "#main/CheckFastqs/FastqReadPairs", - "id": "#main/PairReadFiles/FastqReadPairs" - }, - { - "source": "#main/SplitAndSubsample/SplitAndSubsampledFastqs", - "id": "#main/PairReadFiles/Reads" - } - ] - }, - { - "out": [ - "#main/Putative_Cell_Calling_Settings/Putative_Cell_Call", - "#main/Putative_Cell_Calling_Settings/Exact_Cell_Count", - "#main/Putative_Cell_Calling_Settings/Basic_Algo_Only" - ], - "in": [ - { - "source": "#main/Basic_Algo_Only", - "id": "#main/Putative_Cell_Calling_Settings/_Basic_Algo_Only" - }, - { - "source": "#main/Exact_Cell_Count", - "id": "#main/Putative_Cell_Calling_Settings/_Exact_Cell_Count" - }, - { - "source": "#main/Putative_Cell_Call", - "id": "#main/Putative_Cell_Calling_Settings/_Putative_Cell_Call" - } - ], - "run": "#PutativeCellSettings.cwl", - "id": "#main/Putative_Cell_Calling_Settings", - "label": "Putative Cell Calling Settings" - }, - { - "out": [ - "#main/QualityFilterOuter/Filter_Metrics", - "#main/QualityFilterOuter/R1", - "#main/QualityFilterOuter/R2", - "#main/QualityFilterOuter/output" - ], - "run": "#QualityFilterOuter.cwl", - "id": "#main/QualityFilterOuter", - "in": [ - { - "source": "#main/Metadata_Settings/Run_Metadata", - "id": "#main/QualityFilterOuter/Run_Metadata" - }, - { - "source": "#main/PairReadFiles/ReadPairs", - "id": "#main/QualityFilterOuter/Split_Read_Pairs" - } - ] - }, - { - "out": [ - "#main/SplitAndSubsample/SplitAndSubsampledFastqs", - "#main/SplitAndSubsample/log" - ], - "run": "#SplitAndSubsample.cwl", - "id": "#main/SplitAndSubsample", - "in": [ - { - "source": "#main/Reads", - "id": "#main/SplitAndSubsample/Fastqs" - }, - { - "source": "#main/CheckFastqs/FilesToSkipSplitAndSubsample", - "id": "#main/SplitAndSubsample/FilesToSkipSplitAndSubsample" - }, - { - "source": "#main/Internal_Settings/NumRecordsPerSplit", - "id": "#main/SplitAndSubsample/NumRecordsPerSplit" - }, - { - "source": "#main/CheckFastqs/SubsamplingRatio", - "id": "#main/SplitAndSubsample/SubsampleRatio" - }, - { - "source": "#main/CheckFastqs/SubsampleSeed", - "id": "#main/SplitAndSubsample/SubsampleSeed" - } - ] - }, - { - "out": [ - "#main/Start_Time/Start_Time" - ], - "run": { - "cwlVersion": "v1.0", - "inputs": [], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "outputs": [ - { - "type": "string", - "id": "#main/Start_Time/c0e8267c-52e8-448b-b9c2-7600ab5ed59a/Start_Time" - } - ], - "id": "#main/Start_Time/c0e8267c-52e8-448b-b9c2-7600ab5ed59a", - "expression": "${ \n var today = new Date();\n var date = today.toString()\n return ({Start_Time: date});\n} ", - "class": "ExpressionTool" - }, - "id": "#main/Start_Time", - "in": [] - }, - { - "out": [ - "#main/Subsample_Settings/Subsample_Reads", - "#main/Subsample_Settings/Subsample_Seed" - ], - "in": [ - { - "source": "#main/Subsample", - "id": "#main/Subsample_Settings/_Subsample_Reads" - }, - { - "source": "#main/Subsample_seed", - "id": "#main/Subsample_Settings/_Subsample_Seed" - } - ], - "run": "#SubsampleSettings.cwl", - "id": "#main/Subsample_Settings", - "label": "Subsample Settings" - }, - { - "out": [ - "#main/Uncompress_Datatables/Uncompressed_Data_Tables", - "#main/Uncompress_Datatables/Uncompressed_Expression_Matrix" - ], - "run": "#UncompressDatatables.cwl", - "id": "#main/Uncompress_Datatables", - "in": [ - { - "source": "#main/Dense_to_Sparse_Datatable/Data_Tables", - "id": "#main/Uncompress_Datatables/Compressed_Data_Table" - }, - { - "source": "#main/GetDataTable/Expression_Data", - "id": "#main/Uncompress_Datatables/Compressed_Expression_Matrix" - } - ] - }, - { - "out": [ - "#main/VDJ_Assemble_and_Annotate_Contigs_IG/igCalls" - ], - "run": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl", - "id": "#main/VDJ_Assemble_and_Annotate_Contigs_IG", - "in": [ - { - "source": "#main/VDJ_Preprocess_Reads_IG/RSEC_Reads_Fastq", - "id": "#main/VDJ_Assemble_and_Annotate_Contigs_IG/RSEC_Reads_Fastq" - }, - { - "source": "#main/VDJ_Settings/VDJ_Version", - "id": "#main/VDJ_Assemble_and_Annotate_Contigs_IG/VDJ_Version" - }, - { - "source": "#main/VDJ_Preprocess_Reads_IG/num_cores", - "id": "#main/VDJ_Assemble_and_Annotate_Contigs_IG/num_cores" - } - ] - }, - { - "out": [ - "#main/VDJ_Assemble_and_Annotate_Contigs_TCR/tcrCalls" - ], - "run": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl", - "id": "#main/VDJ_Assemble_and_Annotate_Contigs_TCR", - "in": [ - { - "source": "#main/VDJ_Preprocess_Reads_TCR/RSEC_Reads_Fastq", - "id": "#main/VDJ_Assemble_and_Annotate_Contigs_TCR/RSEC_Reads_Fastq" - }, - { - "source": "#main/VDJ_Settings/VDJ_Version", - "id": "#main/VDJ_Assemble_and_Annotate_Contigs_TCR/VDJ_Version" - }, - { - "source": "#main/VDJ_Preprocess_Reads_TCR/num_cores", - "id": "#main/VDJ_Assemble_and_Annotate_Contigs_TCR/num_cores" - } - ] - }, - { - "out": [ - "#main/VDJ_Compile_Results/vdjCellsDatatable", - "#main/VDJ_Compile_Results/vdjCellsDatatableUncorrected", - "#main/VDJ_Compile_Results/vdjDominantContigs", - "#main/VDJ_Compile_Results/vdjUnfilteredContigs", - "#main/VDJ_Compile_Results/vdjMetricsJson", - "#main/VDJ_Compile_Results/vdjMetricsCsv", - "#main/VDJ_Compile_Results/vdjReadsPerCellByChainTypeFigure" - ], - "run": "#VDJ_Compile_Results.cwl", - "id": "#main/VDJ_Compile_Results", - "in": [ - { - "source": "#main/AnnotateReads/Seq_Metrics", - "id": "#main/VDJ_Compile_Results/Seq_Metrics" - }, - { - "source": "#main/CellClassifier/cellTypePredictions", - "id": "#main/VDJ_Compile_Results/cellTypeMapping" - }, - { - "valueFrom": "$([])", - "id": "#main/VDJ_Compile_Results/chainsToIgnore" - }, - { - "source": "#main/Internal_Settings/VDJ_JGene_Evalue", - "id": "#main/VDJ_Compile_Results/evalueJgene" - }, - { - "source": "#main/Internal_Settings/VDJ_VGene_Evalue", - "id": "#main/VDJ_Compile_Results/evalueVgene" - }, - { - "source": "#main/VDJ_GatherIGCalls/gatheredCalls", - "id": "#main/VDJ_Compile_Results/igCalls" - }, - { - "source": "#main/Metadata_Settings/Run_Metadata", - "id": "#main/VDJ_Compile_Results/metadata" - }, - { - "source": "#main/GetDataTable/Cell_Order", - "id": "#main/VDJ_Compile_Results/putativeCells" - }, - { - "source": "#main/VDJ_GatherTCRCalls/gatheredCalls", - "id": "#main/VDJ_Compile_Results/tcrCalls" - }, - { - "source": "#main/VDJ_Settings/VDJ_Version", - "id": "#main/VDJ_Compile_Results/vdjVersion" - } - ] - }, - { - "out": [ - "#main/VDJ_GatherIGCalls/gatheredCalls" - ], - "run": "#VDJ_GatherCalls.cwl", - "id": "#main/VDJ_GatherIGCalls", - "in": [ - { - "source": "#main/VDJ_Assemble_and_Annotate_Contigs_IG/igCalls", - "id": "#main/VDJ_GatherIGCalls/theCalls" - } - ] - }, - { - "out": [ - "#main/VDJ_GatherTCRCalls/gatheredCalls" - ], - "run": "#VDJ_GatherCalls.cwl", - "id": "#main/VDJ_GatherTCRCalls", - "in": [ - { - "source": "#main/VDJ_Assemble_and_Annotate_Contigs_TCR/tcrCalls", - "id": "#main/VDJ_GatherTCRCalls/theCalls" - } - ] - }, - { - "out": [ - "#main/VDJ_Preprocess_Reads_IG/RSEC_Reads_Fastq", - "#main/VDJ_Preprocess_Reads_IG/num_splits", - "#main/VDJ_Preprocess_Reads_IG/num_cores" - ], - "run": "#VDJ_Preprocess_Reads.cwl", - "id": "#main/VDJ_Preprocess_Reads_IG", - "in": [ - { - "source": "#main/AnnotateReads/validIgReads", - "id": "#main/VDJ_Preprocess_Reads_IG/Valid_Reads_Fastq" - }, - { - "source": "#main/AnnotateReads/num_valid_ig_reads", - "id": "#main/VDJ_Preprocess_Reads_IG/num_valid_reads" - }, - { - "valueFrom": "BCR", - "id": "#main/VDJ_Preprocess_Reads_IG/vdj_type" - } - ] - }, - { - "out": [ - "#main/VDJ_Preprocess_Reads_TCR/RSEC_Reads_Fastq", - "#main/VDJ_Preprocess_Reads_TCR/num_splits", - "#main/VDJ_Preprocess_Reads_TCR/num_cores" - ], - "run": "#VDJ_Preprocess_Reads.cwl", - "id": "#main/VDJ_Preprocess_Reads_TCR", - "in": [ - { - "source": "#main/AnnotateReads/validTcrReads", - "id": "#main/VDJ_Preprocess_Reads_TCR/Valid_Reads_Fastq" - }, - { - "source": "#main/AnnotateReads/num_valid_tcr_reads", - "id": "#main/VDJ_Preprocess_Reads_TCR/num_valid_reads" - }, - { - "valueFrom": "TCR", - "id": "#main/VDJ_Preprocess_Reads_TCR/vdj_type" - } - ] - }, - { - "out": [ - "#main/VDJ_Settings/VDJ_Version" - ], - "in": [ - { - "source": "#main/VDJ_Version", - "id": "#main/VDJ_Settings/_VDJ_Version" - } - ], - "run": "#VDJ_Settings.cwl", - "id": "#main/VDJ_Settings", - "label": "VDJ Settings" - }, - { - "out": [ - "#main/Version/version" - ], - "run": "#Version.cwl", - "id": "#main/Version", - "in": [] - } - ], - "outputs": [ - { - "outputSource": "#main/GetDataTable/Bioproduct_Stats", - "type": [ - "null", - "File" - ], - "id": "#main/Bioproduct_Stats", - "label": "Bioproduct Statistics" - }, - { - "outputSource": "#main/GetDataTable/Cell_Label_Filter", - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#main/Cell_Label_Filter", - "label": "Cell Label Filter" - }, - { - "outputSource": "#main/Uncompress_Datatables/Uncompressed_Data_Tables", - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#main/Data_Tables", - "label": "Data Tables" - }, - { - "outputSource": "#main/Dense_to_Sparse_Datatable_Unfiltered/Data_Tables", - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#main/Data_Tables_Unfiltered", - "label": "Unfiltered Data Tables" - }, - { - "outputSource": "#main/Uncompress_Datatables/Uncompressed_Expression_Matrix", - "type": [ - "null", - "File" - ], - "id": "#main/Expression_Data", - "label": "Expression Matrix" - }, - { - "outputSource": "#main/GetDataTable/Expression_Data_Unfiltered", - "type": [ - "null", - "File" - ], - "id": "#main/Expression_Data_Unfiltered", - "label": "Unfiltered Expression Matrix" - }, - { - "outputSource": "#main/MergeBAM/Final_Bam", - "type": "File", - "id": "#main/Final_Bam", - "label": "Final BAM File" - }, - { - "outputSource": "#main/IndexBAM/Index", - "type": "File", - "id": "#main/Final_Bam_Index", - "label": "Final BAM Index" - }, - { - "outputSource": "#main/CellClassifier/cellTypePredictions", - "type": [ - "null", - "File" - ], - "id": "#main/ImmuneCellClassification(Experimental)", - "label": "Immune Cell Classification (Experimental)" - }, - { - "outputSource": "#main/BundleLogs/logs_dir", - "type": "Directory", - "id": "#main/Logs", - "label": "Pipeline Logs" - }, - { - "outputSource": "#main/Metrics/Metrics_Summary", - "type": "File", - "id": "#main/Metrics_Summary", - "label": "Metrics Summary" - }, - { - "outputSource": "#main/MergeMultiplex/Multiplex_out", - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#main/Multiplex" - }, - { - "outputSource": "#main/GetDataTable/Protein_Aggregates_Experimental", - "type": [ - "null", - "File" - ], - "id": "#main/Protein_Aggregates_Experimental", - "label": "Protein Aggregates (Experimental)" - }, - { - "outputSource": "#main/GetDataTable/Putative_Cells_Origin", - "type": [ - "null", - "File" - ], - "id": "#main/Putative_Cells_Origin", - "label": "Putative Cells Origin" - }, - { - "outputSource": "#main/VDJ_Compile_Results/vdjCellsDatatable", - "type": [ - "null", - "File" - ], - "id": "#main/vdjCellsDatatable", - "label": "vdjCellsDatatable" - }, - { - "outputSource": "#main/VDJ_Compile_Results/vdjCellsDatatableUncorrected", - "type": [ - "null", - "File" - ], - "id": "#main/vdjCellsDatatableUncorrected", - "label": "vdjCellsDatatableUncorrected" - }, - { - "outputSource": "#main/VDJ_Compile_Results/vdjDominantContigs", - "type": [ - "null", - "File" - ], - "id": "#main/vdjDominantContigs", - "label": "vdjDominantContigs" - }, - { - "outputSource": "#main/VDJ_Compile_Results/vdjMetricsCsv", - "type": [ - "null", - "File" - ], - "id": "#main/vdjMetricsCsv", - "label": "vdjMetricsCsv" - }, - { - "outputSource": "#main/VDJ_Compile_Results/vdjUnfilteredContigs", - "type": [ - "null", - "File" - ], - "id": "#main/vdjUnfilteredContigs", - "label": "vdjUnfilteredContigs" - } - ], - "id": "#main", - "class": "Workflow" - }, - { - "inputs": [ - { - "inputBinding": { - "position": 1 - }, - "type": { - "items": "File", - "type": "array" - }, - "id": "#MergeBAM.cwl/BamFiles" - }, - { - "type": [ - "null", - "string" - ], - "id": "#MergeBAM.cwl/Run_Name" - }, - { - "type": [ - "null", - "string" - ], - "id": "#MergeBAM.cwl/Sample_Tags_Version" - } - ], - "requirements": [ - - { - "class": "InlineJavascriptRequirement" - } - ], - "stdout": "samtools_merge.log", - "outputs": [ - { - "outputBinding": { - "glob": "*_final.BAM" - }, - "type": "File", - "id": "#MergeBAM.cwl/Final_Bam" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#MergeBAM.cwl/log" - } - ], - "baseCommand": [ - "samtools", - "merge" - ], - "id": "#MergeBAM.cwl", - "arguments": [ - { - "prefix": "-@", - "valueFrom": "$(runtime.cores)" - }, - { - "position": 0, - "valueFrom": "${\n if (inputs.Sample_Tags_Version) {\n return \"Combined_\" + inputs.Run_Name + \"_final.BAM\"\n } else {\n return inputs.Run_Name + \"_final.BAM\"\n }\n}" - } - ], - "class": "CommandLineTool", - "hints": [ - { - "coresMin": 4, - "class": "ResourceRequirement" - } - ] - }, - { - "inputs": [ - { - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#Metadata.cwl/AbSeq_Reference" - }, - { - "type": "string", - "id": "#Metadata.cwl/Assay" - }, - { - "type": [ - "null", - "boolean" - ], - "id": "#Metadata.cwl/Basic_Algo_Only" - }, - { - "type": { - "items": { - "fields": [ - { - "type": "string", - "name": "#Metadata.cwl/Bead_Version/Library" - }, - { - "type": "string", - "name": "#Metadata.cwl/Bead_Version/bead_version" - } - ], - "type": "record" - }, - "type": "array" - }, - "id": "#Metadata.cwl/Bead_Version" - }, - { - "type": [ - "null", - "int" - ], - "id": "#Metadata.cwl/Exact_Cell_Count" - }, - { - "type": [ - "null", - "int" - ], - "id": "#Metadata.cwl/Label_Version" - }, - { - "type": [ - "null", - "string" - ], - "id": "#Metadata.cwl/Libraries" - }, - { - "type": "string", - "id": "#Metadata.cwl/Pipeline_Name" - }, - { - "type": "string", - "id": "#Metadata.cwl/Pipeline_Version" - }, - { - "type": [ - "null", - "int" - ], - "id": "#Metadata.cwl/Putative_Cell_Call" - }, - { - "type": [ - "null", - "boolean" - ], - "id": "#Metadata.cwl/Read_Filter_Off" - }, - { - "type": [ - "null", - { - "items": "string", - "type": "array" - } - ], - "id": "#Metadata.cwl/Reads" - }, - { - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#Metadata.cwl/Reference" - }, - { - "type": [ - "null", - "string" - ], - "id": "#Metadata.cwl/Run_Name" - }, - { - "type": [ - "null", - { - "items": "string", - "type": "array" - } - ], - "id": "#Metadata.cwl/Sample_Tag_Names" - }, - { - "type": [ - "null", - "string" - ], - "id": "#Metadata.cwl/Sample_Tags_Version" - }, - { - "type": [ - "null", - "string" - ], - "id": "#Metadata.cwl/Start_Time" - }, - { - "type": [ - "null", - "float" - ], - "id": "#Metadata.cwl/Subsample" - }, - { - "type": [ - "null", - "int" - ], - "id": "#Metadata.cwl/Subsample_Seed" - }, - { - "type": [ - "null", - "float" - ], - "id": "#Metadata.cwl/Subsample_Tags" - }, - { - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#Metadata.cwl/Supplemental_Reference" - }, - { - "type": [ - "null", - "string" - ], - "id": "#Metadata.cwl/VDJ_Version" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "stdout": "run_metadata.json", - "outputs": [ - { - "outputBinding": { - "outputEval": "${ \n var name = inputs.Run_Name;\n if (name == null){\n var libraries = inputs.Libraries;\n name = libraries.split(',')[0];\n } \n return(name)\n} \n" - }, - "type": [ - "null", - "string" - ], - "id": "#Metadata.cwl/Run_Base_Name" - }, - { - "type": "stdout", - "id": "#Metadata.cwl/Run_Metadata" - } - ], - "baseCommand": "echo", - "id": "#Metadata.cwl", - "arguments": [ - { - "prefix": "" - }, - { - "shellQuote": true, - "valueFrom": "${\n var metadata = inputs;\n var all_bv = {};\n var customer_bv = \"Original (V1)\";\n for (var i = 0; i < inputs.Bead_Version.length; i++) {\n var BeadVer = inputs.Bead_Version[i];\n var Library = BeadVer[\"Library\"];\n var bead_version = BeadVer[\"bead_version\"];\n all_bv[Library] = bead_version \n var short_bv = bead_version.substring(0, 2);\n if (short_bv == \"V2\"){\n var customer_bv = \"Enhanced (V2)\";\n }\n }\n metadata[\"Bead_Version\"] = all_bv;\n\n var pipeline_name = inputs.Pipeline_Name;\n var assay = inputs.Assay;\n var version = inputs.Pipeline_Version;\n var time = inputs.Start_Time;\n var libraries = inputs.Libraries.split(\",\");\n var i = 0;\n var reference_list = []\n if(inputs.Reference != null){\n reference_list = reference_list.concat(inputs.Reference);\n }\n if(inputs.AbSeq_Reference != null){\n reference_list = reference_list.concat(inputs.AbSeq_Reference);\n }\n\n var supplemental = \"\"\n if(inputs.Supplemental_Reference != null){\n supplemental = \"; Supplemental_Reference - \" + inputs.Supplemental_Reference[0][\"basename\"];\n }\n var references = [];\n for (i = 0; i< reference_list.length; i++) {\n if(reference_list[i] != null){\n references.push(reference_list[i][\"basename\"]);\n }\n }\n var parameters = [];\n if(inputs.Sample_Tags_Version != null){\n var tags = \"Sample Tag Version: \" + inputs.Sample_Tags_Version;\n } else{ \n var tags = \"Sample Tag Version: None\";\n }\n parameters.push(tags);\n\n if(inputs.Sample_Tag_Names != null){\n var tag_names = inputs.Sample_Tag_Names.join(\" ; \")\n var tag_list = \"Sample Tag Names: \" + tag_names;\n } else{\n var tag_list = \"Sample Tag Names: None\";\n }\n parameters.push(tag_list);\n \n if(inputs.VDJ_Version != null){\n var vdj = \"VDJ Version: \" + inputs.VDJ_Version;\n } else{ \n var vdj = \"VDJ Version: None\";\n }\n parameters.push(vdj)\n\n if(inputs.Subsample != null){\n var subsample = \"Subsample: \" + inputs.Subsample;\n } else{ \n var subsample = \"Subsample: None\";\n } \n parameters.push(subsample);\n\n if(inputs.Putative_Cell_Call == 1){\n var call = \"Putative Cell Calling Type: AbSeq\";\n } else{ \n var call = \"Putative Cell Calling Type: mRNA\";\n } \n parameters.push(call)\n\n if(inputs.Basic_Algo_Only){\n var basic = \"Refined Putative Cell Calling: Off\";\n } else{ \n var basic = \"Refined Putative Cell Calling: On\";\n } \n parameters.push(basic)\n\n if(inputs.Exact_Cell_Count != null){\n var cells = \"Exact Cell Count: \" + inputs.Exact_Cell_Count;\n } else{ \n var cells = \"Exact Cell Count: None\";\n } \n parameters.push(cells)\n\n var name = inputs.Run_Name;\n if (name == null){\n var libraries = inputs.Libraries.split(',');\n name = libraries[0];\n } \n\n var header = [\"####################\"];\n header.push(\"## \" + pipeline_name + \" Version \" + version);\n header.push(\"## Analysis Date - \" + time);\n header.push(\"## Libraries - \" + libraries.join(' | ') + \" - Bead version detected: \" + customer_bv);\n header.push(\"## References - \" + references.join(' | ') + supplemental);\n header.push(\"## Parameters - \" + parameters.join(' | '));\n header.push(\"####################\");\n metadata[\"Output_Header\"] = header;\n metadata[\"Run_Base_Name\"] = name;\n var metadata_json = JSON.stringify(metadata);\n return metadata_json;\n}\n" - } - ], - "class": "CommandLineTool" - }, - { - "inputs": [ - { - "inputBinding": { - "prefix": "--annot-files" - }, - "type": "File", - "id": "#Metrics.cwl/Annot_Files" - }, - { - "inputBinding": { - "prefix": "--read1-error-rate" - }, - "type": "File", - "id": "#Metrics.cwl/Read1_error_rate" - }, - { - "inputBinding": { - "prefix": "--run-metadata" - }, - "type": "File", - "id": "#Metrics.cwl/Run_Metadata" - }, - { - "inputBinding": { - "prefix": "--sample-tag-archives", - "itemSeparator": "," - }, - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#Metrics.cwl/Sample_Tag_Archives" - }, - { - "inputBinding": { - "prefix": "--seq-run" - }, - "type": [ - "null", - "string" - ], - "id": "#Metrics.cwl/Seq_Run" - }, - { - "inputBinding": { - "prefix": "--umi-adjusted-stats" - }, - "type": [ - "null", - "File" - ], - "id": "#Metrics.cwl/UMI_Adjusted_Stats" - }, - { - "inputBinding": { - "prefix": "--vdj-metrics-fp" - }, - "type": [ - "null", - "File" - ], - "id": "#Metrics.cwl/vdjMetricsJson" - } - ], - "requirements": [ - - ], - "outputs": [ - { - "outputBinding": { - "glob": "internal-metrics-archive.tar.gz" - }, - "type": "File", - "id": "#Metrics.cwl/Metrics_Archive" - }, - { - "outputBinding": { - "glob": "*_Metrics_Summary.csv" - }, - "type": "File", - "id": "#Metrics.cwl/Metrics_Summary" - }, - { - "outputBinding": { - "glob": "*.zip" - }, - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#Metrics.cwl/Sample_Tag_Out" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#Metrics.cwl/output" - } - ], - "baseCommand": [ - "mist_metrics.py" - ], - "class": "CommandLineTool", - "id": "#Metrics.cwl" - }, - { - "inputs": [ - { - "default": "Targeted", - "type": "string", - "id": "#MultiplexingSettings.cwl/Assay" - }, - { - "type": [ - "null", - "Any" - ], - "id": "#MultiplexingSettings.cwl/_Sample_Tags_Version" - }, - { - "type": [ - "null", - { - "items": "string", - "type": "array" - } - ], - "id": "#MultiplexingSettings.cwl/_Tag_Sample_Names" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "outputs": [ - { - "type": [ - "null", - "string" - ], - "id": "#MultiplexingSettings.cwl/Sample_Tags_Version" - }, - { - "type": [ - "null", - { - "items": "string", - "type": "array" - } - ], - "id": "#MultiplexingSettings.cwl/Tag_Sample_Names" - } - ], - "class": "ExpressionTool", - "expression": "${\n var enumifiedSampleTagsVersion = null;\n if (inputs._Sample_Tags_Version) {\n var _Sample_Tags_Version = inputs._Sample_Tags_Version.toLowerCase();\n if (_Sample_Tags_Version.indexOf('human') >= 0 || _Sample_Tags_Version === 'hs')\n {\n enumifiedSampleTagsVersion = 'hs';\n }\n else if (_Sample_Tags_Version.indexOf('mouse') >= 0 || _Sample_Tags_Version === 'mm')\n {\n enumifiedSampleTagsVersion = 'mm';\n }\n else if (_Sample_Tags_Version === 'no multiplexing')\n {\n enumifiedSampleTagsVersion = null;\n }\n else\n {\n throw new Error(\"Cannot parse Sample Tag Version: \" + inputs._Sample_Tags_Version);\n }\n }\n var listTagNames = inputs._Tag_Sample_Names\n var newTagNames = []\n for (var num in listTagNames) {\n var tag = listTagNames[num].replace(/[^A-Za-z0-9-+]/g,\"_\");\n newTagNames.push(tag); \n } \n return ({\n Tag_Sample_Names: newTagNames,\n Sample_Tags_Version: enumifiedSampleTagsVersion\n });\n}", - "id": "#MultiplexingSettings.cwl" - }, - { - "inputs": [ - { - "type": [ - "null", - "string" - ], - "id": "#NameSettings.cwl/_Run_Name" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "outputs": [ - { - "type": [ - "null", - "string" - ], - "id": "#NameSettings.cwl/Run_Name" - } - ], - "class": "ExpressionTool", - "expression": "${ var name = inputs._Run_Name;\n if (name != null) {\n name = name.replace(/[\\W_]+/g,\"-\");}\n return({'Run_Name' : name });\n } ", - "id": "#NameSettings.cwl" - }, - { - "inputs": [ - { - "type": { - "items": { - "fields": [ - { - "type": "string", - "name": "#PairReadFiles.cwl/FastqReadPairs/filename" - }, - { - "type": "string", - "name": "#PairReadFiles.cwl/FastqReadPairs/readFlag" - }, - { - "type": "string", - "name": "#PairReadFiles.cwl/FastqReadPairs/readPairId" - }, - { - "type": "string", - "name": "#PairReadFiles.cwl/FastqReadPairs/library" - } - ], - "type": "record" - }, - "type": "array" - }, - "id": "#PairReadFiles.cwl/FastqReadPairs" - }, - { - "type": { - "items": "File", - "type": "array" - }, - "id": "#PairReadFiles.cwl/Reads" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "doc": "PairReadFiles takes an array of split files and pairs them, such that an R1 file is transferred to the QualityFilter with its corresponding R2 file.\nThe original FASTQ files are paired in CheckFastqs and then split and sub-sampled in SplitAndSubsample. The pairing information is taken from CheckFastqs.\n", - "id": "#PairReadFiles.cwl", - "outputs": [ - { - "type": { - "items": { - "fields": [ - { - "type": "File", - "name": "#PairReadFiles.cwl/ReadPairs/R1" - }, - { - "type": "File", - "name": "#PairReadFiles.cwl/ReadPairs/R2" - }, - { - "type": "int", - "name": "#PairReadFiles.cwl/ReadPairs/readPairId" - }, - { - "type": "string", - "name": "#PairReadFiles.cwl/ReadPairs/library" - } - ], - "type": "record" - }, - "type": "array" - }, - "id": "#PairReadFiles.cwl/ReadPairs" - } - ], - "expression": "${\n // use the CheckFastqs read pairing information to create a dictionary\n // using the original fastq file name without the extension as the key\n var fastqReadPairs = {}\n for (var i = 0; i < inputs.FastqReadPairs.length; i++) {\n var fileDict = inputs.FastqReadPairs[i];\n var filename = fileDict[\"filename\"];\n\n if (!fastqReadPairs[filename]) {\n fastqReadPairs[filename] = {\n readPairId: null,\n readFlag: null,\n library: null,\n };\n }\n else {\n throw new Error(\"Found non-unique fastq filename '\" + filename + \"' in the FastqReadPairs dictionary from CheckFastqs.\")\n }\n\n fastqReadPairs[filename].readPairId = fileDict[\"readPairId\"]\n fastqReadPairs[filename].readFlag = fileDict[\"readFlag\"]\n fastqReadPairs[filename].library = fileDict[\"library\"]\n }\n\n // now loop through the input read files which could\n // be the original fastq files if no sub-sampling has\n // been done, or the sub-sampled fastq files\n var readPairs = {}\n for (var i = 0; i < inputs.Reads.length; i++) {\n\n // Set the fileDict to null\n var fileDict = null;\n\n // Get the fastq file\n var fastqFile = inputs.Reads[i];\n\n // Remove the .gz from the end of the filename\n var fileNoGzExt = fastqFile.basename.replace(/.gz$/i, \"\");\n\n // Remove the next file extension if it exists\n var fileArrayWithExt = fileNoGzExt.split(\".\");\n // If an extension exists, splice the array\n var fileArrayNoExt = null;\n if (fileArrayWithExt.length > 1) {\n fileArrayNoExt = fileArrayWithExt.splice(0, fileArrayWithExt.length-1);\n } else {\n // No file extension exists, so use the whole array\n fileArrayNoExt = fileArrayWithExt\n }\n var fileRootname = fileArrayNoExt.join(\".\")\n\n // if the original files were sub-sampled\n // get the original file and the chunk id\n if (fileRootname.indexOf(\"-\") != -1) {\n // Split on the dash to get the name of\n // the original file and the chunk id\n // The original file name can also have dashes\n var chunkFileArray = fileRootname.split(\"-\");\n\n // Get the original file rootname and chunk id\n // The rootname without the chunk id and file\n // extension is the key from CheckFastqs\n // The chunk id is used later to create a new unique\n // read pair id for all sub-sampled fastq files\n\n // The rootname array should contain all elements up to the last dash\n var fileRootnameArray = chunkFileArray.splice(0, chunkFileArray.length-1);\n var fileRootnameNoChunkId = fileRootnameArray.join(\"-\");\n\n // The chunk id is the last element in the array\n // representing the content after the last dash\n var orgChunkId = chunkFileArray.pop();\n\n // if there is no chunk id, use an arbitrary number\n // the chunk id is unique when the files are sub-sampled\n // and does not need to be unique when the files are not sub-sampled\n var chunkId = 9999;\n if (orgChunkId) {\n // cast to an integer\n chunkId = parseInt(orgChunkId);\n }\n // double check that we have a chunk id\n if (chunkId === undefined || chunkId === null) {\n throw new Error(\"The fastq file sub-sampling id could not be determined!\");\n }\n\n // The file rootname without the chunk id and file extension\n // should match the original file rootname from CheckFastqs\n // The original file rootname from CheckFastqs is the key for\n // the dictionary containing the original unique pair id\n var fileDict = fastqReadPairs[fileRootnameNoChunkId];\n }\n\n // If the files are not sub-sampled or the fileDict\n // is not found, then try to use the original\n // file rootname without the file extension as the key\n if (fileDict === undefined || fileDict === null) {\n\n // if the original files were not sub-sampled,\n // use the original file rootname and an arbitrary chunk id\n var chunkId = 9999;\n\n var fileDict = fastqReadPairs[fileRootname];\n\n // If the fileDict for this file rootname is not found,\n // then the filenames are in an unexpected format and\n // the code to parse the filenames in CheckFastqs,\n // SplitAndSubsample and here need to match\n if (fileDict === undefined || fileDict === null) {\n // Create an error\n if (fileDict === undefined || fileDict === null) {\n throw new Error(\"Cannot find the fastq read pair information for '\" + fastqFile.basename + \"'.\");\n }\n }\n }\n\n // Get the pairing information from CheckFastqs\n var readPairId = fileDict[\"readPairId\"];\n var library = fileDict[\"library\"];\n var flag = fileDict[\"readFlag\"];\n\n // Add the chunkId to create a new unique read pair id\n // for each file (sub-sampled or not)\n var chunkReadPairId = readPairId + \"_\" + chunkId;\n\n // Create a dictionary for each pair of files\n if (!readPairs[chunkReadPairId]) {\n readPairs[chunkReadPairId] = {\n R1: null,\n R2: null,\n library: library,\n readPairId: null,\n };\n }\n // add in the R1 and R2 files, depending on the flag\n if (flag === \"R1\") {\n readPairs[chunkReadPairId].R1 = fastqFile\n } else if (flag === \"R2\") {\n readPairs[chunkReadPairId].R2 = fastqFile\n }\n }\n // we are not interested in the read pair ids in readPairs\n // flatten into an array of objects\n var readPairsList = [];\n var i = 1;\n for (var key in readPairs) {\n if (readPairs.hasOwnProperty(key)) {\n var readPair = readPairs[key];\n readPair.readPairId = i;\n readPairsList.push(readPair);\n i++;\n }\n }\n // pass this array to the record array named \"ReadPairs\" on the CWL layer\n return {ReadPairs: readPairsList}\n}", - "class": "ExpressionTool" - }, - { - "inputs": [ - { - "type": [ - "null", - "boolean" - ], - "id": "#PutativeCellSettings.cwl/_Basic_Algo_Only" - }, - { - "type": [ - "null", - "int" - ], - "id": "#PutativeCellSettings.cwl/_Exact_Cell_Count" - }, - { - "type": [ - "null", - "Any" - ], - "id": "#PutativeCellSettings.cwl/_Putative_Cell_Call" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "outputs": [ - { - "type": [ - "null", - "boolean" - ], - "id": "#PutativeCellSettings.cwl/Basic_Algo_Only" - }, - { - "type": [ - "null", - "int" - ], - "id": "#PutativeCellSettings.cwl/Exact_Cell_Count" - }, - { - "type": [ - "null", - "int" - ], - "id": "#PutativeCellSettings.cwl/Putative_Cell_Call" - } - ], - "class": "ExpressionTool", - "expression": "${\n // the basic algorithm flag defaults to false\n var basicAlgOnlyFlag = false;\n // the user can set the basic algorithm flag\n if (inputs._Basic_Algo_Only) {\n basicAlgOnlyFlag = inputs._Basic_Algo_Only;\n }\n // convert the Putative_Cell_Call from a string to an integer\n var putativeCellCallInt = 0;\n if (inputs._Putative_Cell_Call) {\n if (inputs._Putative_Cell_Call === \"mRNA\") {\n putativeCellCallInt = 0;\n }\n else if (inputs._Putative_Cell_Call == \"AbSeq_Experimental\" || inputs._Putative_Cell_Call == \"AbSeq (Experimental)\") {\n putativeCellCallInt = 1;\n // for protein-only cell calling, we only have the basic algorithm\n basicAlgOnlyFlag = true;\n }\n else if (inputs._Putative_Cell_Call == \"mRNA_and_AbSeq\") {\n putativeCellCallInt = 2;\n }\n }\n // check the exact cell count\n if (inputs._Exact_Cell_Count) {\n if (inputs._Exact_Cell_Count < 1) {\n throw(\"Illogical value for exact cell count: \" + inputs._Exact_Cell_Count);\n }\n }\n return ({\n Putative_Cell_Call: putativeCellCallInt,\n Exact_Cell_Count: inputs._Exact_Cell_Count,\n Basic_Algo_Only: basicAlgOnlyFlag,\n });\n}", - "id": "#PutativeCellSettings.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "prefix": "--run-metadata" - }, - "type": "File", - "id": "#QualityFilter.cwl/Run_Metadata" - }, - { - "type": { - "fields": [ - { - "inputBinding": { - "prefix": "--r1" - }, - "type": "File", - "name": "#QualityFilter.cwl/Split_Read_Pairs/R1" - }, - { - "inputBinding": { - "prefix": "--r2" - }, - "type": "File", - "name": "#QualityFilter.cwl/Split_Read_Pairs/R2" - }, - { - "inputBinding": { - "prefix": "--read-pair-id" - }, - "type": "int", - "name": "#QualityFilter.cwl/Split_Read_Pairs/readPairId" - }, - { - "inputBinding": { - "prefix": "--library" - }, - "type": "string", - "name": "#QualityFilter.cwl/Split_Read_Pairs/library" - } - ], - "type": "record" - }, - "id": "#QualityFilter.cwl/Split_Read_Pairs" - } - ], - "requirements": [ - - ], - "outputs": [ - { - "outputBinding": { - "glob": "*read_quality.csv.gz" - }, - "type": [ - "null", - "File" - ], - "id": "#QualityFilter.cwl/Filter_Metrics" - }, - { - "outputBinding": { - "glob": "*_R1*.fastq.gz" - }, - "type": "File", - "id": "#QualityFilter.cwl/R1" - }, - { - "outputBinding": { - "glob": "*_R2*.fastq.gz" - }, - "type": "File", - "id": "#QualityFilter.cwl/R2" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#QualityFilter.cwl/output" - } - ], - "baseCommand": [ - "mist_quality_filter.py" - ], - "class": "CommandLineTool", - "id": "#QualityFilter.cwl" - }, - { - "inputs": [ - { - "type": "File", - "id": "#QualityFilterOuter.cwl/Run_Metadata" - }, - { - "type": { - "items": { - "fields": [ - { - "type": "File", - "name": "#QualityFilterOuter.cwl/Split_Read_Pairs/R1" - }, - { - "type": "File", - "name": "#QualityFilterOuter.cwl/Split_Read_Pairs/R2" - }, - { - "type": "int", - "name": "#QualityFilterOuter.cwl/Split_Read_Pairs/readPairId" - }, - { - "type": "string", - "name": "#QualityFilterOuter.cwl/Split_Read_Pairs/library" - } - ], - "type": "record" - }, - "type": "array" - }, - "id": "#QualityFilterOuter.cwl/Split_Read_Pairs" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - }, - { - "class": "ScatterFeatureRequirement" - }, - { - "class": "StepInputExpressionRequirement" - }, - { - "class": "SubworkflowFeatureRequirement" - } - ], - "outputs": [ - { - "outputSource": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/Filter_Metrics", - "type": [ - { - "items": [ - "null", - "File" - ], - "type": "array" - } - ], - "id": "#QualityFilterOuter.cwl/Filter_Metrics" - }, - { - "outputSource": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/R1", - "type": [ - { - "items": [ - "null", - "File" - ], - "type": "array" - } - ], - "id": "#QualityFilterOuter.cwl/R1" - }, - { - "outputSource": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/R2", - "type": [ - { - "items": [ - "null", - "File" - ], - "type": "array" - } - ], - "id": "#QualityFilterOuter.cwl/R2" - }, - { - "outputSource": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/output", - "type": [ - { - "items": [ - "null", - "File" - ], - "type": "array" - } - ], - "id": "#QualityFilterOuter.cwl/output" - } - ], - "class": "Workflow", - "steps": [ - { - "scatter": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/Split_Read_Pairs", - "out": [ - "#QualityFilterOuter.cwl/Quality_Filter_Scatter/R1", - "#QualityFilterOuter.cwl/Quality_Filter_Scatter/R2", - "#QualityFilterOuter.cwl/Quality_Filter_Scatter/Filter_Metrics", - "#QualityFilterOuter.cwl/Quality_Filter_Scatter/output" - ], - "run": "#QualityFilter.cwl", - "id": "#QualityFilterOuter.cwl/Quality_Filter_Scatter", - "in": [ - { - "source": "#QualityFilterOuter.cwl/Run_Metadata", - "id": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/Run_Metadata" - }, - { - "source": "#QualityFilterOuter.cwl/Split_Read_Pairs", - "id": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/Split_Read_Pairs" - } - ] - } - ], - "id": "#QualityFilterOuter.cwl" - }, - { - "inputs": [ - { - "type": { - "items": "File", - "type": "array" - }, - "id": "#SplitAndSubsample.cwl/Fastqs" - }, - { - "type": { - "items": "string", - "type": "array" - }, - "id": "#SplitAndSubsample.cwl/FilesToSkipSplitAndSubsample" - }, - { - "type": [ - "null", - "long" - ], - "id": "#SplitAndSubsample.cwl/NumRecordsPerSplit" - }, - { - "type": "float", - "id": "#SplitAndSubsample.cwl/SubsampleRatio" - }, - { - "type": "int", - "id": "#SplitAndSubsample.cwl/SubsampleSeed" - } - ], - "requirements": [ - { - "class": "ScatterFeatureRequirement" - }, - { - "class": "InlineJavascriptRequirement" - } - ], - "doc": "SplitAndSubsample splits, subsamples and formats read files to be deposited in QualityFilter.\n", - "id": "#SplitAndSubsample.cwl", - "steps": [ - { - "doc": "After scattering \"SplitAndSubsample\" on a File array, the output of each node is also an array. Thus, we are left with a nestled list. This JS expression flattens this list to deal with the split reads in PairReadFiles.cwl", - "out": [ - "#SplitAndSubsample.cwl/FlattenOutput/SplitFastqList" - ], - "run": { - "cwlVersion": "v1.0", - "inputs": [ - { - "type": { - "items": { - "items": "File", - "type": "array" - }, - "type": "array" - }, - "id": "#SplitAndSubsample.cwl/FlattenOutput/flatten_output/nestledSplitFastqList" - } - ], - "outputs": [ - { - "type": { - "items": "File", - "type": "array" - }, - "id": "#SplitAndSubsample.cwl/FlattenOutput/flatten_output/SplitFastqList" - } - ], - "class": "ExpressionTool", - "expression": "${\n return {SplitFastqList: [].concat.apply([], inputs.nestledSplitFastqList)}\n}\n", - "id": "#SplitAndSubsample.cwl/FlattenOutput/flatten_output" - }, - "id": "#SplitAndSubsample.cwl/FlattenOutput", - "in": [ - { - "source": "#SplitAndSubsample.cwl/SplitAndSubsample/SplitAndSubsampledFastqs", - "id": "#SplitAndSubsample.cwl/FlattenOutput/nestledSplitFastqList" - } - ] - }, - { - "run": { - "cwlVersion": "v1.0", - "inputs": [ - { - "inputBinding": { - "prefix": "--fastq-file-path" - }, - "type": "File", - "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/Fastq" - }, - { - "inputBinding": { - "prefix": "--files-to-skip-split-and-subsample", - "itemSeparator": "," - }, - "type": { - "items": "string", - "type": "array" - }, - "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/FilesToSkipSplitAndSubsample" - }, - { - "inputBinding": { - "prefix": "--num-records" - }, - "type": [ - "null", - "long" - ], - "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/NumRecordsPerSplit" - }, - { - "inputBinding": { - "prefix": "--subsample-ratio" - }, - "type": "float", - "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/SubsampleRatio" - }, - { - "inputBinding": { - "prefix": "--subsample-seed" - }, - "type": "int", - "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/SubsampleSeed" - } - ], - "requirements": [ - ], - "outputs": [ - { - "outputBinding": { - "glob": "*.fastq.gz", - "outputEval": "${ if (self.length === 0) { return [inputs.Fastq]; } else { return self; } }" - }, - "type": { - "items": "File", - "type": "array" - }, - "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/SplitAndSubsampledFastqs" - }, - { - "outputBinding": { - "glob": "*.log" - }, - "type": "File", - "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/log" - } - ], - "baseCommand": [ - "mist_split_fastq.py" - ], - "class": "CommandLineTool", - "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq" - }, - "doc": "Allocate one docker/python process per file to do the actual file splitting.", - "scatter": [ - "#SplitAndSubsample.cwl/SplitAndSubsample/Fastq" - ], - "in": [ - { - "source": "#SplitAndSubsample.cwl/Fastqs", - "id": "#SplitAndSubsample.cwl/SplitAndSubsample/Fastq" - }, - { - "source": "#SplitAndSubsample.cwl/FilesToSkipSplitAndSubsample", - "id": "#SplitAndSubsample.cwl/SplitAndSubsample/FilesToSkipSplitAndSubsample" - }, - { - "source": "#SplitAndSubsample.cwl/NumRecordsPerSplit", - "id": "#SplitAndSubsample.cwl/SplitAndSubsample/NumRecordsPerSplit" - }, - { - "source": "#SplitAndSubsample.cwl/SubsampleRatio", - "id": "#SplitAndSubsample.cwl/SplitAndSubsample/SubsampleRatio" - }, - { - "source": "#SplitAndSubsample.cwl/SubsampleSeed", - "id": "#SplitAndSubsample.cwl/SplitAndSubsample/SubsampleSeed" - } - ], - "id": "#SplitAndSubsample.cwl/SplitAndSubsample", - "out": [ - "#SplitAndSubsample.cwl/SplitAndSubsample/SplitAndSubsampledFastqs", - "#SplitAndSubsample.cwl/SplitAndSubsample/log" - ] - } - ], - "outputs": [ - { - "outputSource": "#SplitAndSubsample.cwl/FlattenOutput/SplitFastqList", - "type": { - "items": "File", - "type": "array" - }, - "id": "#SplitAndSubsample.cwl/SplitAndSubsampledFastqs" - }, - { - "outputSource": "#SplitAndSubsample.cwl/SplitAndSubsample/log", - "type": { - "items": "File", - "type": "array" - }, - "id": "#SplitAndSubsample.cwl/log" - } - ], - "class": "Workflow" - }, - { - "inputs": [ - { - "type": [ - "null", - "float" - ], - "id": "#SubsampleSettings.cwl/_Subsample_Reads" - }, - { - "type": [ - "null", - "int" - ], - "id": "#SubsampleSettings.cwl/_Subsample_Seed" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "outputs": [ - { - "type": [ - "null", - "float" - ], - "id": "#SubsampleSettings.cwl/Subsample_Reads" - }, - { - "type": [ - "null", - "int" - ], - "id": "#SubsampleSettings.cwl/Subsample_Seed" - } - ], - "class": "ExpressionTool", - "expression": "${\n var subsamplingOutputs = {\n Subsample_Reads: inputs._Subsample_Reads,\n Subsample_Seed: inputs._Subsample_Seed\n }\n return subsamplingOutputs;\n}", - "id": "#SubsampleSettings.cwl" - }, - { - "inputs": [ - { - "type": { - "items": "File", - "type": "array" - }, - "id": "#UncompressDatatables.cwl/Compressed_Data_Table" - }, - { - "type": "File", - "id": "#UncompressDatatables.cwl/Compressed_Expression_Matrix" - } - ], - "requirements": [ - { - "class": "ScatterFeatureRequirement" - } - ], - "outputs": [ - { - "outputSource": "#UncompressDatatables.cwl/Uncompress_Datatable/Uncompressed_File", - "type": { - "items": "File", - "type": "array" - }, - "id": "#UncompressDatatables.cwl/Uncompressed_Data_Tables" - }, - { - "outputSource": "#UncompressDatatables.cwl/Uncompress_Expression_Matrix/Uncompressed_File", - "type": "File", - "id": "#UncompressDatatables.cwl/Uncompressed_Expression_Matrix" - } - ], - "class": "Workflow", - "steps": [ - { - "id": "#UncompressDatatables.cwl/Uncompress_Datatable", - "out": [ - "#UncompressDatatables.cwl/Uncompress_Datatable/Uncompressed_File" - ], - "run": { - "cwlVersion": "v1.0", - "inputs": [ - { - "inputBinding": { - "position": 1 - }, - "type": "File", - "id": "#UncompressDatatables.cwl/Uncompress_Datatable/Uncompress_Datatable_Inner/Compressed_File" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "stdout": "$(inputs.Compressed_File.nameroot)", - "outputs": [ - { - "outputBinding": { - "glob": "$(inputs.Compressed_File.nameroot)" - }, - "type": "File", - "id": "#UncompressDatatables.cwl/Uncompress_Datatable/Uncompress_Datatable_Inner/Uncompressed_File" - } - ], - "baseCommand": [ - "gunzip" - ], - "id": "#UncompressDatatables.cwl/Uncompress_Datatable/Uncompress_Datatable_Inner", - "arguments": [ - { - "position": 0, - "valueFrom": "-c" - } - ], - "class": "CommandLineTool", - "hints": [ - ] - }, - "scatter": [ - "#UncompressDatatables.cwl/Uncompress_Datatable/Compressed_File" - ], - "in": [ - { - "source": "#UncompressDatatables.cwl/Compressed_Data_Table", - "id": "#UncompressDatatables.cwl/Uncompress_Datatable/Compressed_File" - } - ] - }, - { - "out": [ - "#UncompressDatatables.cwl/Uncompress_Expression_Matrix/Uncompressed_File" - ], - "run": { - "cwlVersion": "v1.0", - "inputs": [ - { - "inputBinding": { - "position": 1 - }, - "type": "File", - "id": "#UncompressDatatables.cwl/Uncompress_Expression_Matrix/Uncompress_Expression_Matrix_Inner/Compressed_File" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "stdout": "$(inputs.Compressed_File.nameroot)", - "outputs": [ - { - "outputBinding": { - "glob": "$(inputs.Compressed_File.nameroot)" - }, - "type": "File", - "id": "#UncompressDatatables.cwl/Uncompress_Expression_Matrix/Uncompress_Expression_Matrix_Inner/Uncompressed_File" - } - ], - "baseCommand": [ - "gunzip" - ], - "id": "#UncompressDatatables.cwl/Uncompress_Expression_Matrix/Uncompress_Expression_Matrix_Inner", - "arguments": [ - { - "position": 0, - "valueFrom": "-c" - } - ], - "class": "CommandLineTool", - "hints": [ - - ] - }, - "id": "#UncompressDatatables.cwl/Uncompress_Expression_Matrix", - "in": [ - { - "source": "#UncompressDatatables.cwl/Compressed_Expression_Matrix", - "id": "#UncompressDatatables.cwl/Uncompress_Expression_Matrix/Compressed_File" - } - ] - } - ], - "id": "#UncompressDatatables.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "position": 1 - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs.cwl/RSEC_Reads_Fastq" - }, - { - "inputBinding": { - "position": 2 - }, - "type": "string", - "id": "#VDJ_Assemble_and_Annotate_Contigs.cwl/Read_Limit" - }, - { - "inputBinding": { - "position": 3 - }, - "type": [ - "null", - "string" - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs.cwl/VDJ_Version" - } - ], - "requirements": [ - - { - "class": "InlineJavascriptRequirement" - }, - { - "class": "ShellCommandRequirement" - } - ], - "outputs": [ - { - "outputBinding": { - "glob": "*_pruned.csv.gz" - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs.cwl/PyirCall" - } - ], - "baseCommand": [ - "AssembleAndAnnotate.sh" - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs.cwl", - "class": "CommandLineTool", - "hints": [ - { - "coresMin": 1, - "ramMin": 3200, - "class": "ResourceRequirement" - } - ] - }, - { - "inputs": [ - { - "type": [ - { - "items": [ - "null", - "File" - ], - "type": "array" - } - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/RSEC_Reads_Fastq" - }, - { - "type": [ - "null", - "string" - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Version" - }, - { - "type": [ - "null", - "int" - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/num_cores" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - }, - { - "class": "ScatterFeatureRequirement" - }, - { - "class": "StepInputExpressionRequirement" - }, - { - "class": "SubworkflowFeatureRequirement" - } - ], - "outputs": [ - { - "outputSource": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG/PyirCall", - "type": [ - { - "items": [ - "null", - "File" - ], - "type": "array" - } - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/igCalls" - } - ], - "class": "Workflow", - "steps": [ - { - "run": "#VDJ_Assemble_and_Annotate_Contigs.cwl", - "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG", - "in": [ - { - "source": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/RSEC_Reads_Fastq", - "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG/RSEC_Reads_Fastq" - }, - { - "valueFrom": "75000", - "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG/Read_Limit" - }, - { - "source": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Version", - "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG/VDJ_Version" - } - ], - "hints": [ - { - "coresMin": "$(inputs.num_cores)", - "class": "ResourceRequirement" - } - ], - "scatter": [ - "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG/RSEC_Reads_Fastq" - ], - "out": [ - "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG/PyirCall" - ] - } - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl" - }, - { - "inputs": [ - { - "type": [ - { - "items": [ - "null", - "File" - ], - "type": "array" - } - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/RSEC_Reads_Fastq" - }, - { - "type": [ - "null", - "string" - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Version" - }, - { - "type": [ - "null", - "int" - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/num_cores" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - }, - { - "class": "ScatterFeatureRequirement" - }, - { - "class": "StepInputExpressionRequirement" - }, - { - "class": "SubworkflowFeatureRequirement" - } - ], - "outputs": [ - { - "outputSource": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR/PyirCall", - "type": [ - { - "items": [ - "null", - "File" - ], - "type": "array" - } - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/tcrCalls" - } - ], - "class": "Workflow", - "steps": [ - { - "run": "#VDJ_Assemble_and_Annotate_Contigs.cwl", - "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR", - "in": [ - { - "source": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/RSEC_Reads_Fastq", - "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR/RSEC_Reads_Fastq" - }, - { - "valueFrom": "75000", - "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR/Read_Limit" - }, - { - "source": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Version", - "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR/VDJ_Version" - } - ], - "hints": [ - { - "coresMin": "$(inputs.num_cores)", - "class": "ResourceRequirement" - } - ], - "scatter": [ - "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR/RSEC_Reads_Fastq" - ], - "out": [ - "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR/PyirCall" - ] - } - ], - "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "position": 10, - "prefix": "--seq-metrics" - }, - "type": "File", - "id": "#VDJ_Compile_Results.cwl/Seq_Metrics" - }, - { - "inputBinding": { - "position": 0, - "prefix": "--cell-type-mapping-fp" - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_Compile_Results.cwl/cellTypeMapping" - }, - { - "inputBinding": { - "position": 4, - "prefix": "--ignore", - "itemSeparator": "," - }, - "type": [ - "null", - { - "items": "string", - "type": "array" - } - ], - "id": "#VDJ_Compile_Results.cwl/chainsToIgnore" - }, - { - "inputBinding": { - "position": 8, - "prefix": "--e-value-for-j" - }, - "type": [ - "null", - "float" - ], - "id": "#VDJ_Compile_Results.cwl/evalueJgene" - }, - { - "inputBinding": { - "position": 7, - "prefix": "--e-value-for-v" - }, - "type": [ - "null", - "float" - ], - "id": "#VDJ_Compile_Results.cwl/evalueVgene" - }, - { - "inputBinding": { - "position": 5 - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_Compile_Results.cwl/igCalls" - }, - { - "inputBinding": { - "position": 9, - "prefix": "--metadata-fp" - }, - "type": "File", - "id": "#VDJ_Compile_Results.cwl/metadata" - }, - { - "inputBinding": { - "position": 3, - "prefix": "--putative-cells-json-fp" - }, - "type": "File", - "id": "#VDJ_Compile_Results.cwl/putativeCells" - }, - { - "inputBinding": { - "position": 6 - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_Compile_Results.cwl/tcrCalls" - }, - { - "inputBinding": { - "position": 2, - "prefix": "--vdj-version" - }, - "type": [ - "null", - "string" - ], - "id": "#VDJ_Compile_Results.cwl/vdjVersion" - } - ], - "requirements": [ - - { - "class": "InlineJavascriptRequirement" - } - ], - "outputs": [ - { - "doc": "VDJ data per cell, with distribution based error correction", - "outputBinding": { - "glob": "*_VDJ_perCell.csv" - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_Compile_Results.cwl/vdjCellsDatatable" - }, - { - "doc": "VDJ data per cell, including non-putative cells, no error correction applied", - "outputBinding": { - "glob": "*_VDJ_perCell_uncorrected.csv.gz" - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_Compile_Results.cwl/vdjCellsDatatableUncorrected" - }, - { - "outputBinding": { - "glob": "*_VDJ_Dominant_Contigs.csv.gz" - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_Compile_Results.cwl/vdjDominantContigs" - }, - { - "outputBinding": { - "glob": "*_VDJ_metrics.csv" - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_Compile_Results.cwl/vdjMetricsCsv" - }, - { - "outputBinding": { - "glob": "*_VDJ_metrics.json" - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_Compile_Results.cwl/vdjMetricsJson" - }, - { - "outputBinding": { - "glob": "*_DBEC_cutoff.png" - }, - "type": { - "items": "File", - "type": "array" - }, - "id": "#VDJ_Compile_Results.cwl/vdjReadsPerCellByChainTypeFigure" - }, - { - "outputBinding": { - "glob": "*_VDJ_Unfiltered_Contigs.csv.gz" - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_Compile_Results.cwl/vdjUnfilteredContigs" - } - ], - "baseCommand": [ - "mist_vdj_compile_results.py" - ], - "id": "#VDJ_Compile_Results.cwl", - "class": "CommandLineTool", - "hints": [ - { - "ramMin": 32000, - "class": "ResourceRequirement" - } - ] - }, - { - "inputs": [ - { - "type": [ - { - "items": [ - "null", - "File" - ], - "type": "array" - } - ], - "id": "#VDJ_GatherCalls.cwl/theCalls" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "doc": "VDJ_GatherCalls collect the outputs from the multi-processed VDJ step into one file.\n", - "id": "#VDJ_GatherCalls.cwl", - "steps": [ - { - "out": [ - "#VDJ_GatherCalls.cwl/VDJ_GatherCalls/gatheredCalls" - ], - "run": { - "cwlVersion": "v1.0", - "inputs": [ - { - "type": [ - { - "items": [ - "null", - "File" - ], - "type": "array" - } - ], - "id": "#VDJ_GatherCalls.cwl/VDJ_GatherCalls/gather_PyIR/theCalls" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - }, - { - "class": "ShellCommandRequirement" - } - ], - "outputs": [ - { - "outputBinding": { - "glob": "*_constant_region_called_pruned.csv.gz", - "outputEval": "${\n if (self.size == 0) {\n throw(\"No outputs from PyIR detected in VDJ_GatherCalls!\");\n } else {\n return(self);\n }\n}" - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_GatherCalls.cwl/VDJ_GatherCalls/gather_PyIR/gatheredCalls" - } - ], - "class": "CommandLineTool", - "arguments": [ - { - "shellQuote": false, - "valueFrom": "${\n if (!inputs.theCalls[0] ) {\n return (\"echo \\\"No outputs from PyIR detected in VDJ_GatherCalls\\\"\")\n }\n var inputFiles = \"\"\n if (!inputs.theCalls[0].path.split(\"_PrunePyIR\")[1]){\n inputFiles = \"zcat\"\n for (var i = 0; i < inputs.theCalls.length; i++) {\n inputFiles += \" \" + inputs.theCalls[i].path\n }\n inputFiles += \" | \"\n } else {\n inputFiles = \"zcat \" + inputs.theCalls[0].path.split(\"VDJ\")[0] + \"*\" + inputs.theCalls[0].path.split(\"_PrunePyIR\")[1].split(\"_Number_\")[0] + \"_Number_*.csv.gz | \"\n }\n var outputFileName = \"\\\"gzip > \" + inputs.theCalls[0].nameroot.split(\"_Number_\")[0] + \"_constant_region_called_pruned.csv.gz\" + \"\\\"\"\n var awkCommand = \"awk \\'NR==1{F=$1;print | \" + outputFileName + \" } $1!=F { print | \" + outputFileName + \" }\\' \"\n var outputCommand = inputFiles + awkCommand\n return (outputCommand)\n}" - } - ], - "id": "#VDJ_GatherCalls.cwl/VDJ_GatherCalls/gather_PyIR" - }, - "id": "#VDJ_GatherCalls.cwl/VDJ_GatherCalls", - "in": [ - { - "source": "#VDJ_GatherCalls.cwl/theCalls", - "id": "#VDJ_GatherCalls.cwl/VDJ_GatherCalls/theCalls" - } - ] - } - ], - "outputs": [ - { - "outputSource": "#VDJ_GatherCalls.cwl/VDJ_GatherCalls/gatheredCalls", - "type": [ - "null", - "File" - ], - "id": "#VDJ_GatherCalls.cwl/gatheredCalls" - } - ], - "class": "Workflow" - }, - { - "inputs": [ - { - "type": [ - "null", - "File" - ], - "id": "#VDJ_Preprocess_Reads.cwl/Valid_Reads_Fastq" - }, - { - "type": [ - "null", - "int" - ], - "id": "#VDJ_Preprocess_Reads.cwl/num_valid_reads" - }, - { - "type": "string", - "id": "#VDJ_Preprocess_Reads.cwl/vdj_type" - } - ], - "requirements": [ - { - "class": "SubworkflowFeatureRequirement" - }, - { - "class": "InlineJavascriptRequirement" - }, - { - "envDef": [ - { - "envName": "CORES_ALLOCATED_PER_CWL_PROCESS", - "envValue": "8" - } - ], - "class": "EnvVarRequirement" - } - ], - "outputs": [ - { - "outputSource": "#VDJ_Preprocess_Reads.cwl/VDJ_RSEC_Reads/RSEC_Reads_Fastq", - "type": [ - { - "items": [ - "null", - "File" - ], - "type": "array" - } - ], - "id": "#VDJ_Preprocess_Reads.cwl/RSEC_Reads_Fastq" - }, - { - "type": [ - "null", - "int" - ], - "outputSource": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/num_cores", - "id": "#VDJ_Preprocess_Reads.cwl/num_cores" - }, - { - "type": [ - "null", - "int" - ], - "outputSource": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/num_splits", - "id": "#VDJ_Preprocess_Reads.cwl/num_splits" - } - ], - "class": "Workflow", - "steps": [ - { - "run": "#VDJ_RSEC_Reads.cwl", - "out": [ - "#VDJ_Preprocess_Reads.cwl/VDJ_RSEC_Reads/RSEC_Reads_Fastq" - ], - "requirements": [ - { - "coresMin": 8, - "ramMin": "${ var est_ram = 0.0006 * parseInt(inputs.num_valid_reads) + 2000; var buffer = 1.25; est_ram *= buffer; if (est_ram < 2000) return 2000; if (est_ram > 370000) return 370000; return parseInt(est_ram); }", - "class": "ResourceRequirement" - } - ], - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_RSEC_Reads", - "in": [ - { - "source": "#VDJ_Preprocess_Reads.cwl/VDJ_Trim_Reads/Valid_Reads", - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_RSEC_Reads/Valid_Reads" - }, - { - "source": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/num_splits", - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_RSEC_Reads/num_splits" - }, - { - "source": "#VDJ_Preprocess_Reads.cwl/num_valid_reads", - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_RSEC_Reads/num_valid_reads" - } - ] - }, - { - "out": [ - "#VDJ_Preprocess_Reads.cwl/VDJ_Trim_Reads/Valid_Reads", - "#VDJ_Preprocess_Reads.cwl/VDJ_Trim_Reads/Trim_Report" - ], - "in": [ - { - "source": "#VDJ_Preprocess_Reads.cwl/Valid_Reads_Fastq", - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_Trim_Reads/Valid_Reads_Fastq" - } - ], - "run": "#VDJ_Trim_Reads.cwl", - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_Trim_Reads", - "hints": [ - { - "coresMin": 8, - "class": "ResourceRequirement" - } - ] - }, - { - "out": [ - "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/num_splits", - "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/num_cores" - ], - "run": { - "cwlVersion": "v1.0", - "inputs": [ - { - "type": [ - "null", - "int" - ], - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/determine_num_splits/num_valid_reads" - }, - { - "type": "string", - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/determine_num_splits/vdj_type" - } - ], - "outputs": [ - { - "type": [ - "null", - "int" - ], - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/determine_num_splits/num_cores" - }, - { - "type": [ - "null", - "int" - ], - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/determine_num_splits/num_splits" - } - ], - "class": "ExpressionTool", - "expression": "${\n var ram_per_instance = 192 * 1024;\n var num_cores = 96;\n if (inputs.vdj_type == \"BCR\") {\n ram_per_instance = 144 * 1024;\n num_cores = 72;\n }\n var ram_per_split = 3200;\n var num_splits_per_instance = parseInt(ram_per_instance / ram_per_split);\n var num_splits = num_splits_per_instance;\n\n var num_reads = parseInt(inputs.num_valid_reads);\n if (num_reads != null) {\n if (num_reads > 100000000)\n num_splits = num_splits_per_instance * 2;\n num_cores = num_cores * 2;\n }\n\n return ({\"num_splits\": num_splits, \"num_cores\": num_cores});\n}", - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/determine_num_splits" - }, - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits", - "in": [ - { - "source": "#VDJ_Preprocess_Reads.cwl/num_valid_reads", - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/num_valid_reads" - }, - { - "source": "#VDJ_Preprocess_Reads.cwl/vdj_type", - "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/vdj_type" - } - ] - } - ], - "id": "#VDJ_Preprocess_Reads.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "prefix": "--vdj-valid-reads", - "itemSeparator": "," - }, - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#VDJ_RSEC_Reads.cwl/Valid_Reads" - }, - { - "inputBinding": { - "prefix": "--num-splits" - }, - "type": [ - "null", - "int" - ], - "id": "#VDJ_RSEC_Reads.cwl/num_splits" - } - ], - "requirements": [ - - ], - "outputs": [ - { - "outputBinding": { - "glob": "*RSEC_Reads_Fastq_*.tar.gz" - }, - "type": [ - { - "items": [ - "null", - "File" - ], - "type": "array" - } - ], - "id": "#VDJ_RSEC_Reads.cwl/RSEC_Reads_Fastq" - } - ], - "baseCommand": "mist_vdj_rsec_reads.py", - "class": "CommandLineTool", - "id": "#VDJ_RSEC_Reads.cwl" - }, - { - "inputs": [ - { - "type": [ - "null", - "Any" - ], - "id": "#VDJ_Settings.cwl/_VDJ_Version" - } - ], - "requirements": [ - { - "class": "InlineJavascriptRequirement" - } - ], - "outputs": [ - { - "type": [ - "null", - "float" - ], - "id": "#VDJ_Settings.cwl/VDJ_JGene_Evalue" - }, - { - "type": [ - "null", - "float" - ], - "id": "#VDJ_Settings.cwl/VDJ_VGene_Evalue" - }, - { - "type": [ - "null", - "string" - ], - "id": "#VDJ_Settings.cwl/VDJ_Version" - } - ], - "class": "ExpressionTool", - "expression": "${\n var vdjVersion = null;\n if (!inputs._VDJ_Version) {\n vdjVersion = null;}\n else {\n var _VDJ_Version = inputs._VDJ_Version.toLowerCase();\n if (_VDJ_Version === \"human\" || _VDJ_Version === \"hs\" || _VDJ_Version === \"human vdj - bcr and tcr\") {\n vdjVersion = \"human\";\n } else if (_VDJ_Version === \"humanbcr\" || _VDJ_Version === \"human vdj - bcr only\") {\n vdjVersion = \"humanBCR\";\n } else if (_VDJ_Version === \"humantcr\" || _VDJ_Version === \"human vdj - tcr only\") {\n vdjVersion = \"humanTCR\";\n } else if (_VDJ_Version === \"mouse\" || _VDJ_Version === \"mm\" || _VDJ_Version === \"mouse vdj - bcr and tcr\") {\n vdjVersion = \"mouse\";\n } else if (_VDJ_Version === \"mousebcr\" || _VDJ_Version === \"mouse vdj - bcr only\") {\n vdjVersion = \"mouseBCR\";\n } else if (_VDJ_Version === \"mousetcr\" || _VDJ_Version === \"mouse vdj - tcr only\") {\n vdjVersion = \"mouseTCR\";\n } else {\n vdjVersion = inputs._VDJ_Version;\n }\n }\n\n return ({\n VDJ_Version: vdjVersion,\n })\n}", - "id": "#VDJ_Settings.cwl" - }, - { - "inputs": [ - { - "inputBinding": { - "position": 1 - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_Trim_Reads.cwl/Valid_Reads_Fastq" - } - ], - "requirements": [ - - ], - "outputs": [ - { - "outputBinding": { - "glob": "cutadapt.log" - }, - "type": [ - "null", - "File" - ], - "id": "#VDJ_Trim_Reads.cwl/Trim_Report" - }, - { - "outputBinding": { - "glob": "*vdjtxt.gz" - }, - "type": [ - "null", - { - "items": "File", - "type": "array" - } - ], - "id": "#VDJ_Trim_Reads.cwl/Valid_Reads" - } - ], - "baseCommand": "VDJ_Trim_Reads.sh", - "class": "CommandLineTool", - "id": "#VDJ_Trim_Reads.cwl" - }, - { - "inputs": [], - "requirements": [ - - ], - "stdout": "output.txt", - "outputs": [ - { - "outputBinding": { - "glob": "output.txt", - "loadContents": true, - "outputEval": "$(self[0].contents)" - }, - "type": "string", - "id": "#Version.cwl/version" - } - ], - "baseCommand": [ - "mist_version.py" - ], - "id": "#Version.cwl", - "class": "CommandLineTool" - } - ], - "$namespaces": { - "sbg": "https://sevenbridges.com#", - "arv": "http://arvados.org/cwl#" - } -} \ No newline at end of file diff --git a/target/nextflow/mapping/bd_rhapsody/setup_logger.py b/target/nextflow/mapping/bd_rhapsody/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/nextflow/mapping/bd_rhapsody/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/nextflow/mapping/cellranger_count/.config.vsh.yaml b/target/nextflow/mapping/cellranger_count/.config.vsh.yaml deleted file mode 100644 index b754fc36012..00000000000 --- a/target/nextflow/mapping/cellranger_count/.config.vsh.yaml +++ /dev/null @@ -1,266 +0,0 @@ -functionality: - name: "cellranger_count" - namespace: "mapping" - version: "0.12.3" - authors: - - name: "Angela Oliveira Pisco" - roles: - - "author" - info: - role: "Contributor" - links: - github: "aopisco" - orcid: "0000-0003-0142-2355" - linkedin: "aopisco" - organizations: - - name: "Insitro" - href: "https://insitro.com" - role: "Director of Computational Biology" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - - name: "Samuel D'Souza" - roles: - - "author" - info: - role: "Contributor" - links: - github: "srdsam" - linkedin: "samuel-d-souza-887023150/" - organizations: - - name: "Chan Zuckerberg Biohub" - href: "https://www.czbiohub.org" - role: "Data Engineer" - - name: "Robrecht Cannoodt" - roles: - - "author" - - "maintainer" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - argument_groups: - - name: "Inputs" - arguments: - - type: "file" - name: "--input" - description: "The fastq.gz files to align. Can also be a single directory containing\ - \ fastq.gz files." - info: null - example: - - "sample_S1_L001_R1_001.fastq.gz" - - "sample_S1_L001_R2_001.fastq.gz" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "file" - name: "--reference" - description: "The path to Cell Ranger reference tar.gz file. Can also be a directory." - info: null - example: - - "reference.tar.gz" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Outputs" - arguments: - - type: "file" - name: "--output" - description: "The folder to store the alignment results." - info: null - example: - - "/path/to/output" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Arguments" - arguments: - - type: "integer" - name: "--expect_cells" - description: "Expected number of recovered cells, used as input to cell calling\ - \ algorithm." - info: null - example: - - 3000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--chemistry" - description: "Assay configuration.\n- auto: autodetect mode\n- threeprime: Single\ - \ Cell 3'\n- fiveprime: Single Cell 5'\n- SC3Pv1: Single Cell 3' v1\n- SC3Pv2:\ - \ Single Cell 3' v2\n- SC3Pv3: Single Cell 3' v3\n- SC3Pv3LT: Single Cell\ - \ 3' v3 LT\n- SC3Pv3HT: Single Cell 3' v3 HT\n- SC5P-PE: Single Cell 5' paired-end\n\ - - SC5P-R2: Single Cell 5' R2-only\n- SC-FB: Single Cell Antibody-only 3' v2\ - \ or 5'\nSee https://kb.10xgenomics.com/hc/en-us/articles/115003764132-How-does-Cell-Ranger-auto-detect-chemistry-\ - \ for more information.\n" - info: null - default: - - "auto" - required: false - choices: - - "auto" - - "threeprime" - - "fiveprime" - - "SC3Pv1" - - "SC3Pv2" - - "SC3Pv3" - - "SC3Pv3LT" - - "SC3Pv3HT" - - "SC5P-PE" - - "SC5P-R2" - - "SC-FB" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean" - name: "--secondary_analysis" - description: "Whether or not to run the secondary analysis e.g. clustering." - info: null - default: - - false - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean" - name: "--generate_bam" - description: "Whether to generate a BAM file." - info: null - default: - - true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean" - name: "--include_introns" - description: "Include intronic reads in count (default=true unless --target-panel\ - \ is specified in which case default=false)" - info: null - default: - - true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "bash_script" - path: "script.sh" - is_executable: true - description: "Align fastq files using Cell Ranger count." - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/cellranger_tiny_fastq" - - type: "file" - path: "src/utils/setup_logger.py" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "ghcr.io/data-intuitive/cellranger:7.0" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "docker" - run: - - "apt update && apt upgrade -y" - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highmem" - - "highcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/mapping/cellranger_count/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/cellranger_count" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/cellranger_count/cellranger_count" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/mapping/cellranger_count/main.nf b/target/nextflow/mapping/cellranger_count/main.nf deleted file mode 100644 index 56b530e78e1..00000000000 --- a/target/nextflow/mapping/cellranger_count/main.nf +++ /dev/null @@ -1,2745 +0,0 @@ -// cellranger_count 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Angela Oliveira Pisco (author) -// * Samuel D'Souza (author) -// * Robrecht Cannoodt (author, maintainer) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "cellranger_count", - "namespace" : "mapping", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Angela Oliveira Pisco", - "roles" : [ - "author" - ], - "info" : { - "role" : "Contributor", - "links" : { - "github" : "aopisco", - "orcid" : "0000-0003-0142-2355", - "linkedin" : "aopisco" - }, - "organizations" : [ - { - "name" : "Insitro", - "href" : "https://insitro.com", - "role" : "Director of Computational Biology" - }, - { - "name" : "Open Problems", - "href" : "https://openproblems.bio", - "role" : "Core Member" - } - ] - } - }, - { - "name" : "Samuel D'Souza", - "roles" : [ - "author" - ], - "info" : { - "role" : "Contributor", - "links" : { - "github" : "srdsam", - "linkedin" : "samuel-d-souza-887023150/" - }, - "organizations" : [ - { - "name" : "Chan Zuckerberg Biohub", - "href" : "https://www.czbiohub.org", - "role" : "Data Engineer" - } - ] - } - }, - { - "name" : "Robrecht Cannoodt", - "roles" : [ - "author", - "maintainer" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "robrecht@data-intuitive.com", - "github" : "rcannood", - "orcid" : "0000-0003-3641-729X", - "linkedin" : "robrechtcannoodt" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Science Engineer" - }, - { - "name" : "Open Problems", - "href" : "https://openproblems.bio", - "role" : "Core Member" - } - ] - } - } - ], - "argument_groups" : [ - { - "name" : "Inputs", - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "description" : "The fastq.gz files to align. Can also be a single directory containing fastq.gz files.", - "example" : [ - "sample_S1_L001_R1_001.fastq.gz", - "sample_S1_L001_R2_001.fastq.gz" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--reference", - "description" : "The path to Cell Ranger reference tar.gz file. Can also be a directory.", - "example" : [ - "reference.tar.gz" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Outputs", - "arguments" : [ - { - "type" : "file", - "name" : "--output", - "description" : "The folder to store the alignment results.", - "example" : [ - "/path/to/output" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Arguments", - "arguments" : [ - { - "type" : "integer", - "name" : "--expect_cells", - "description" : "Expected number of recovered cells, used as input to cell calling algorithm.", - "example" : [ - 3000 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--chemistry", - "description" : "Assay configuration.\n- auto: autodetect mode\n- threeprime: Single Cell 3'\n- fiveprime: Single Cell 5'\n- SC3Pv1: Single Cell 3' v1\n- SC3Pv2: Single Cell 3' v2\n- SC3Pv3: Single Cell 3' v3\n- SC3Pv3LT: Single Cell 3' v3 LT\n- SC3Pv3HT: Single Cell 3' v3 HT\n- SC5P-PE: Single Cell 5' paired-end\n- SC5P-R2: Single Cell 5' R2-only\n- SC-FB: Single Cell Antibody-only 3' v2 or 5'\nSee https://kb.10xgenomics.com/hc/en-us/articles/115003764132-How-does-Cell-Ranger-auto-detect-chemistry- for more information.\n", - "default" : [ - "auto" - ], - "required" : false, - "choices" : [ - "auto", - "threeprime", - "fiveprime", - "SC3Pv1", - "SC3Pv2", - "SC3Pv3", - "SC3Pv3LT", - "SC3Pv3HT", - "SC5P-PE", - "SC5P-R2", - "SC-FB" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "boolean", - "name" : "--secondary_analysis", - "description" : "Whether or not to run the secondary analysis e.g. clustering.", - "default" : [ - false - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "boolean", - "name" : "--generate_bam", - "description" : "Whether to generate a BAM file.", - "default" : [ - true - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "boolean", - "name" : "--include_introns", - "description" : "Include intronic reads in count (default=true unless --target-panel is specified in which case default=false)", - "default" : [ - true - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - } - ], - "resources" : [ - { - "type" : "bash_script", - "path" : "script.sh", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/cellranger_count/" - } - ], - "description" : "Align fastq files using Cell Ranger count.", - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/cellranger_count/" - }, - { - "type" : "file", - "path" : "resources_test/cellranger_tiny_fastq", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - }, - { - "type" : "file", - "path" : "src/utils/setup_logger.py", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "ghcr.io/data-intuitive/cellranger:7.0", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "docker", - "run" : [ - "apt update && apt upgrade -y" - ] - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "highmem", - "highcpu" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/mapping/cellranger_count/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/cellranger_count", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -#!/bin/bash - -set -eo pipefail - -## VIASH START -# The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) -$( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "${VIASH_PAR_REFERENCE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_reference='&'#" ; else echo "# par_reference="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) -$( if [ ! -z ${VIASH_PAR_EXPECT_CELLS+x} ]; then echo "${VIASH_PAR_EXPECT_CELLS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_expect_cells='&'#" ; else echo "# par_expect_cells="; fi ) -$( if [ ! -z ${VIASH_PAR_CHEMISTRY+x} ]; then echo "${VIASH_PAR_CHEMISTRY}" | sed "s#'#'\\"'\\"'#g;s#.*#par_chemistry='&'#" ; else echo "# par_chemistry="; fi ) -$( if [ ! -z ${VIASH_PAR_SECONDARY_ANALYSIS+x} ]; then echo "${VIASH_PAR_SECONDARY_ANALYSIS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_secondary_analysis='&'#" ; else echo "# par_secondary_analysis="; fi ) -$( if [ ! -z ${VIASH_PAR_GENERATE_BAM+x} ]; then echo "${VIASH_PAR_GENERATE_BAM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_generate_bam='&'#" ; else echo "# par_generate_bam="; fi ) -$( if [ ! -z ${VIASH_PAR_INCLUDE_INTRONS+x} ]; then echo "${VIASH_PAR_INCLUDE_INTRONS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_include_introns='&'#" ; else echo "# par_include_introns="; fi ) -$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) -$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) -$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) -$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) -$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) -$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) - -## VIASH END - -# just to make sure paths are absolute -par_reference=\\`realpath \\$par_reference\\` -par_output=\\`realpath \\$par_output\\` - -# create temporary directory -tmpdir=\\$(mktemp -d "\\$meta_temp_dir/\\$meta_functionality_name-XXXXXXXX") -function clean_up { - rm -rf "\\$tmpdir" -} -trap clean_up EXIT - -# process inputs -# for every fastq file found, make a symlink into the tempdir -fastq_dir="\\$tmpdir/fastqs" -mkdir -p "\\$fastq_dir" -IFS=";" -for var in \\$par_input; do - unset IFS - abs_path=\\`realpath \\$var\\` - if [ -d "\\$abs_path" ]; then - find "\\$abs_path" -name *.fastq.gz -exec ln -s {} "\\$fastq_dir" \\\\; - else - ln -s "\\$abs_path" "\\$fastq_dir" - fi -done - -# process reference -if file \\$par_reference | grep -q 'gzip compressed data'; then - echo "Untarring genome" - reference_dir="\\$tmpdir/fastqs" - mkdir -p "\\$reference_dir" - tar -xvf "\\$par_reference" -C "\\$reference_dir" --strip-components=1 - par_reference="\\$reference_dir" -fi - -# cd into tempdir -cd "\\$tmpdir" - -# add additional params -extra_params=( ) - -if [ ! -z "\\$meta_cpus" ]; then - extra_params+=( "--localcores=\\$meta_cpus" ) -fi -if [ ! -z "\\$meta_memory_gb" ]; then - # always keep 2gb for the OS itself - memory_gb=\\`python -c "print(int('\\$meta_memory_gb') - 2)"\\` - extra_params+=( "--localmem=\\$memory_gb" ) -fi -if [ ! -z "\\$par_expect_cells" ]; then - extra_params+=( "--expect-cells=\\$par_expect_cells" ) -fi -if [ ! -z "\\$par_chemistry" ]; then - extra_params+=( "--chemistry=\\$par_chemistry" ) -fi -if [ "\\$par_secondary_analysis" == "false" ]; then - extra_params+=( "--nosecondary" ) -fi -if [ "\\$par_generate_bam" == "false" ]; then - extra_params+=( "--no-bam" ) -fi -echo "Running cellranger count" - - -id=myoutput -cellranger count \\\\ - --id "\\$id" \\\\ - --fastqs "\\$fastq_dir" \\\\ - --transcriptome "\\$par_reference" \\\\ - --include-introns "\\$par_include_introns" \\\\ - "\\${extra_params[@]}" \\\\ - --disable-ui \\\\ - -echo "Copying output" -if [ -d "\\$id/outs/" ]; then - if [ ! -d "\\$par_output" ]; then - mkdir -p "\\$par_output" - fi - mv "\\$id/outs/"* "\\$par_output" -fi -VIASHMAIN -bash "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/mapping_cellranger_count", - "tag" : "0.12.0" - }, - "label" : [ - "highmem", - "highcpu" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/mapping/cellranger_count/nextflow.config b/target/nextflow/mapping/cellranger_count/nextflow.config deleted file mode 100644 index c34d92a1a3f..00000000000 --- a/target/nextflow/mapping/cellranger_count/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'cellranger_count' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Align fastq files using Cell Ranger count.' - author = 'Angela Oliveira Pisco, Samuel D\'Souza, Robrecht Cannoodt' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/mapping/cellranger_count/nextflow_params.yaml b/target/nextflow/mapping/cellranger_count/nextflow_params.yaml deleted file mode 100644 index 858ba95c91b..00000000000 --- a/target/nextflow/mapping/cellranger_count/nextflow_params.yaml +++ /dev/null @@ -1,17 +0,0 @@ -# Inputs -input: # please fill in - example: ["sample_S1_L001_R1_001.fastq.gz", "sample_S1_L001_R2_001.fastq.gz"] -reference: # please fill in - example: "reference.tar.gz" - -# Outputs -# output: "$id.$key.output.output" - -# Arguments -# expect_cells: 3000 -chemistry: "auto" -secondary_analysis: false -generate_bam: true -include_introns: true - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/mapping/cellranger_count/nextflow_schema.json b/target/nextflow/mapping/cellranger_count/nextflow_schema.json deleted file mode 100644 index 4f76cbff566..00000000000 --- a/target/nextflow/mapping/cellranger_count/nextflow_schema.json +++ /dev/null @@ -1,122 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "cellranger_count", - "description": "Align fastq files using Cell Ranger count.", - "type": "object", - "definitions": { - "inputs" : { - "title": "Inputs", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: List of `file`, required, example: `sample_S1_L001_R1_001.fastq.gz;sample_S1_L001_R2_001.fastq.gz`, multiple_sep: `\";\"`. The fastq", - "help_text": "Type: List of `file`, required, example: `sample_S1_L001_R1_001.fastq.gz;sample_S1_L001_R2_001.fastq.gz`, multiple_sep: `\";\"`. The fastq.gz files to align. Can also be a single directory containing fastq.gz files." - }, - - "reference": { - "type": "string", - "description": "Type: `file`, required, example: `reference.tar.gz`. The path to Cell Ranger reference tar", - "help_text": "Type: `file`, required, example: `reference.tar.gz`. The path to Cell Ranger reference tar.gz file. Can also be a directory." - } - - } - }, - "outputs" : { - "title": "Outputs", - "type": "object", - "description": "No description", - "properties": { - - "output": { - "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.output`, example: `/path/to/output`. The folder to store the alignment results", - "help_text": "Type: `file`, required, default: `$id.$key.output.output`, example: `/path/to/output`. The folder to store the alignment results.", - "default": "$id.$key.output.output" - } - - } - }, - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "expect_cells": { - "type": "integer", - "description": "Type: `integer`, example: `3000`. Expected number of recovered cells, used as input to cell calling algorithm", - "help_text": "Type: `integer`, example: `3000`. Expected number of recovered cells, used as input to cell calling algorithm." - }, - - "chemistry": { - "type": "string", - "description": "Type: `string`, default: `auto`, choices: ``auto`, `threeprime`, `fiveprime`, `SC3Pv1`, `SC3Pv2`, `SC3Pv3`, `SC3Pv3LT`, `SC3Pv3HT`, `SC5P-PE`, `SC5P-R2`, `SC-FB``. Assay configuration", - "help_text": "Type: `string`, default: `auto`, choices: ``auto`, `threeprime`, `fiveprime`, `SC3Pv1`, `SC3Pv2`, `SC3Pv3`, `SC3Pv3LT`, `SC3Pv3HT`, `SC5P-PE`, `SC5P-R2`, `SC-FB``. Assay configuration.\n- auto: autodetect mode\n- threeprime: Single Cell 3\u0027\n- fiveprime: Single Cell 5\u0027\n- SC3Pv1: Single Cell 3\u0027 v1\n- SC3Pv2: Single Cell 3\u0027 v2\n- SC3Pv3: Single Cell 3\u0027 v3\n- SC3Pv3LT: Single Cell 3\u0027 v3 LT\n- SC3Pv3HT: Single Cell 3\u0027 v3 HT\n- SC5P-PE: Single Cell 5\u0027 paired-end\n- SC5P-R2: Single Cell 5\u0027 R2-only\n- SC-FB: Single Cell Antibody-only 3\u0027 v2 or 5\u0027\nSee https://kb.10xgenomics.com/hc/en-us/articles/115003764132-How-does-Cell-Ranger-auto-detect-chemistry- for more information.\n", - "enum": ["auto", "threeprime", "fiveprime", "SC3Pv1", "SC3Pv2", "SC3Pv3", "SC3Pv3LT", "SC3Pv3HT", "SC5P-PE", "SC5P-R2", "SC-FB"] - , - "default": "auto" - }, - - "secondary_analysis": { - "type": "boolean", - "description": "Type: `boolean`, default: `false`. Whether or not to run the secondary analysis e", - "help_text": "Type: `boolean`, default: `false`. Whether or not to run the secondary analysis e.g. clustering.", - "default": "False" - }, - - "generate_bam": { - "type": "boolean", - "description": "Type: `boolean`, default: `true`. Whether to generate a BAM file", - "help_text": "Type: `boolean`, default: `true`. Whether to generate a BAM file.", - "default": "True" - }, - - "include_introns": { - "type": "boolean", - "description": "Type: `boolean`, default: `true`. Include intronic reads in count (default=true unless --target-panel is specified in which case default=false)", - "help_text": "Type: `boolean`, default: `true`. Include intronic reads in count (default=true unless --target-panel is specified in which case default=false)", - "default": "True" - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/inputs" - }, - { - "$ref": "#/definitions/outputs" - }, - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/mapping/cellranger_count_split/.config.vsh.yaml b/target/nextflow/mapping/cellranger_count_split/.config.vsh.yaml deleted file mode 100644 index fac2388e7eb..00000000000 --- a/target/nextflow/mapping/cellranger_count_split/.config.vsh.yaml +++ /dev/null @@ -1,218 +0,0 @@ -functionality: - name: "cellranger_count_split" - namespace: "mapping" - version: "0.12.3" - authors: - - name: "Angela Oliveira Pisco" - roles: - - "author" - info: - role: "Contributor" - links: - github: "aopisco" - orcid: "0000-0003-0142-2355" - linkedin: "aopisco" - organizations: - - name: "Insitro" - href: "https://insitro.com" - role: "Director of Computational Biology" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - - name: "Samuel D'Souza" - roles: - - "author" - info: - role: "Contributor" - links: - github: "srdsam" - linkedin: "samuel-d-souza-887023150/" - organizations: - - name: "Chan Zuckerberg Biohub" - href: "https://www.czbiohub.org" - role: "Data Engineer" - - name: "Robrecht Cannoodt" - roles: - - "author" - - "maintainer" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - arguments: - - type: "file" - name: "--input" - description: "Output directory from a Cell Ranger count run." - info: null - example: - - "input_dir" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--filtered_h5" - info: null - example: - - "filtered_feature_bc_matrix.h5" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--metrics_summary" - info: null - example: - - "metrics_summary.csv" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--molecule_info" - info: null - example: - - "molecule_info.h5" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--bam" - info: null - example: - - "possorted_genome_bam.bam" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--bai" - info: null - example: - - "possorted_genome_bam.bam.bai" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--raw_h5" - info: null - example: - - "raw_feature_bc_matrix.h5" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "bash_script" - path: "script.sh" - is_executable: true - description: "Split 10x Cell Ranger output directory into separate output fields." - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "ubuntu:jammy" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "docker" - run: - - "apt update && apt upgrade -y" - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/mapping/cellranger_count_split/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/cellranger_count_split" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/cellranger_count_split/cellranger_count_split" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/mapping/cellranger_count_split/main.nf b/target/nextflow/mapping/cellranger_count_split/main.nf deleted file mode 100644 index 26ff26016ed..00000000000 --- a/target/nextflow/mapping/cellranger_count_split/main.nf +++ /dev/null @@ -1,2633 +0,0 @@ -// cellranger_count_split 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Angela Oliveira Pisco (author) -// * Samuel D'Souza (author) -// * Robrecht Cannoodt (author, maintainer) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "cellranger_count_split", - "namespace" : "mapping", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Angela Oliveira Pisco", - "roles" : [ - "author" - ], - "info" : { - "role" : "Contributor", - "links" : { - "github" : "aopisco", - "orcid" : "0000-0003-0142-2355", - "linkedin" : "aopisco" - }, - "organizations" : [ - { - "name" : "Insitro", - "href" : "https://insitro.com", - "role" : "Director of Computational Biology" - }, - { - "name" : "Open Problems", - "href" : "https://openproblems.bio", - "role" : "Core Member" - } - ] - } - }, - { - "name" : "Samuel D'Souza", - "roles" : [ - "author" - ], - "info" : { - "role" : "Contributor", - "links" : { - "github" : "srdsam", - "linkedin" : "samuel-d-souza-887023150/" - }, - "organizations" : [ - { - "name" : "Chan Zuckerberg Biohub", - "href" : "https://www.czbiohub.org", - "role" : "Data Engineer" - } - ] - } - }, - { - "name" : "Robrecht Cannoodt", - "roles" : [ - "author", - "maintainer" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "robrecht@data-intuitive.com", - "github" : "rcannood", - "orcid" : "0000-0003-3641-729X", - "linkedin" : "robrechtcannoodt" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Science Engineer" - }, - { - "name" : "Open Problems", - "href" : "https://openproblems.bio", - "role" : "Core Member" - } - ] - } - } - ], - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "description" : "Output directory from a Cell Ranger count run.", - "example" : [ - "input_dir" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--filtered_h5", - "example" : [ - "filtered_feature_bc_matrix.h5" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--metrics_summary", - "example" : [ - "metrics_summary.csv" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--molecule_info", - "example" : [ - "molecule_info.h5" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--bam", - "example" : [ - "possorted_genome_bam.bam" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--bai", - "example" : [ - "possorted_genome_bam.bam.bai" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--raw_h5", - "example" : [ - "raw_feature_bc_matrix.h5" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ], - "resources" : [ - { - "type" : "bash_script", - "path" : "script.sh", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/cellranger_count_split/" - } - ], - "description" : "Split 10x Cell Ranger output directory into separate output fields.", - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "ubuntu:jammy", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "docker", - "run" : [ - "apt update && apt upgrade -y" - ] - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/mapping/cellranger_count_split/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/cellranger_count_split", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -#!/bin/bash - -set -eo pipefail - -## VIASH START -# The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) -$( if [ ! -z ${VIASH_PAR_FILTERED_H5+x} ]; then echo "${VIASH_PAR_FILTERED_H5}" | sed "s#'#'\\"'\\"'#g;s#.*#par_filtered_h5='&'#" ; else echo "# par_filtered_h5="; fi ) -$( if [ ! -z ${VIASH_PAR_METRICS_SUMMARY+x} ]; then echo "${VIASH_PAR_METRICS_SUMMARY}" | sed "s#'#'\\"'\\"'#g;s#.*#par_metrics_summary='&'#" ; else echo "# par_metrics_summary="; fi ) -$( if [ ! -z ${VIASH_PAR_MOLECULE_INFO+x} ]; then echo "${VIASH_PAR_MOLECULE_INFO}" | sed "s#'#'\\"'\\"'#g;s#.*#par_molecule_info='&'#" ; else echo "# par_molecule_info="; fi ) -$( if [ ! -z ${VIASH_PAR_BAM+x} ]; then echo "${VIASH_PAR_BAM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bam='&'#" ; else echo "# par_bam="; fi ) -$( if [ ! -z ${VIASH_PAR_BAI+x} ]; then echo "${VIASH_PAR_BAI}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bai='&'#" ; else echo "# par_bai="; fi ) -$( if [ ! -z ${VIASH_PAR_RAW_H5+x} ]; then echo "${VIASH_PAR_RAW_H5}" | sed "s#'#'\\"'\\"'#g;s#.*#par_raw_h5='&'#" ; else echo "# par_raw_h5="; fi ) -$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) -$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) -$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) -$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) -$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) -$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) - -## VIASH END - -filtered_h5="\\$par_input/filtered_feature_bc_matrix.h5" -if [ -f "\\$filtered_h5" ] && [ ! -z "\\$par_filtered_h5" ]; then - echo "+ cp \\$filtered_h5 \\$par_filtered_h5" - cp "\\$filtered_h5" "\\$par_filtered_h5" -fi - -metrics_summary="\\$par_input/metrics_summary.csv" -if [ -f "\\$metrics_summary" ] && [ ! -z "\\$par_metrics_summary" ]; then - echo "+ cp \\$metrics_summary \\$par_metrics_summary" - cp "\\$metrics_summary" "\\$par_metrics_summary" -fi - -molecule_info="\\$par_input/molecule_info.h5" -if [ -f "\\$molecule_info" ] && [ ! -z "\\$par_molecule_info" ]; then - echo "+ cp \\$molecule_info \\$par_molecule_info" - cp "\\$molecule_info" "\\$par_molecule_info" -fi - -bam="\\$par_input/possorted_genome_bam.bam" -if [ -f "\\$bam" ] && [ ! -z "\\$par_bam" ]; then - echo "cp \\$bam \\$par_bam" - cp "\\$bam" "\\$par_bam" -fi - -raw_h5="\\$par_input/raw_feature_bc_matrix.h5" -if [ -f "\\$raw_h5" ] && [ ! -z "\\$par_raw_h5" ]; then - echo "+ cp \\$raw_h5 \\$par_raw_h5" - cp "\\$raw_h5" "\\$par_raw_h5" -fi - -bai="\\$par_input/possorted_genome_bam.bam.bai" -if [ -f "\\$bai" ] && [ ! -z "\\$par_bai" ]; then - echo "+ cp \\$bai \\$par_bai" - cp "\\$bai" "\\$par_bai" -fi -VIASHMAIN -bash "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/mapping_cellranger_count_split", - "tag" : "0.12.0" - }, - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/mapping/cellranger_count_split/nextflow.config b/target/nextflow/mapping/cellranger_count_split/nextflow.config deleted file mode 100644 index a6a1e141d9d..00000000000 --- a/target/nextflow/mapping/cellranger_count_split/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'cellranger_count_split' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Split 10x Cell Ranger output directory into separate output fields.' - author = 'Angela Oliveira Pisco, Samuel D\'Souza, Robrecht Cannoodt' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/mapping/cellranger_count_split/nextflow_params.yaml b/target/nextflow/mapping/cellranger_count_split/nextflow_params.yaml deleted file mode 100644 index fdd7f445ecf..00000000000 --- a/target/nextflow/mapping/cellranger_count_split/nextflow_params.yaml +++ /dev/null @@ -1,12 +0,0 @@ -# Arguments -input: # please fill in - example: "input_dir" -# filtered_h5: "$id.$key.filtered_h5.h5" -# metrics_summary: "$id.$key.metrics_summary.csv" -# molecule_info: "$id.$key.molecule_info.h5" -# bam: "$id.$key.bam.bam" -# bai: "$id.$key.bai.bai" -# raw_h5: "$id.$key.raw_h5.h5" - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/mapping/cellranger_count_split/nextflow_schema.json b/target/nextflow/mapping/cellranger_count_split/nextflow_schema.json deleted file mode 100644 index 4f3d065b1e0..00000000000 --- a/target/nextflow/mapping/cellranger_count_split/nextflow_schema.json +++ /dev/null @@ -1,93 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "cellranger_count_split", - "description": "Split 10x Cell Ranger output directory into separate output fields.", - "type": "object", - "definitions": { - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: `file`, required, example: `input_dir`. Output directory from a Cell Ranger count run", - "help_text": "Type: `file`, required, example: `input_dir`. Output directory from a Cell Ranger count run." - }, - - "filtered_h5": { - "type": "string", - "description": "Type: `file`, default: `$id.$key.filtered_h5.h5`, example: `filtered_feature_bc_matrix.h5`. ", - "help_text": "Type: `file`, default: `$id.$key.filtered_h5.h5`, example: `filtered_feature_bc_matrix.h5`. ", - "default": "$id.$key.filtered_h5.h5" - }, - - "metrics_summary": { - "type": "string", - "description": "Type: `file`, default: `$id.$key.metrics_summary.csv`, example: `metrics_summary.csv`. ", - "help_text": "Type: `file`, default: `$id.$key.metrics_summary.csv`, example: `metrics_summary.csv`. ", - "default": "$id.$key.metrics_summary.csv" - }, - - "molecule_info": { - "type": "string", - "description": "Type: `file`, default: `$id.$key.molecule_info.h5`, example: `molecule_info.h5`. ", - "help_text": "Type: `file`, default: `$id.$key.molecule_info.h5`, example: `molecule_info.h5`. ", - "default": "$id.$key.molecule_info.h5" - }, - - "bam": { - "type": "string", - "description": "Type: `file`, default: `$id.$key.bam.bam`, example: `possorted_genome_bam.bam`. ", - "help_text": "Type: `file`, default: `$id.$key.bam.bam`, example: `possorted_genome_bam.bam`. ", - "default": "$id.$key.bam.bam" - }, - - "bai": { - "type": "string", - "description": "Type: `file`, default: `$id.$key.bai.bai`, example: `possorted_genome_bam.bam.bai`. ", - "help_text": "Type: `file`, default: `$id.$key.bai.bai`, example: `possorted_genome_bam.bam.bai`. ", - "default": "$id.$key.bai.bai" - }, - - "raw_h5": { - "type": "string", - "description": "Type: `file`, default: `$id.$key.raw_h5.h5`, example: `raw_feature_bc_matrix.h5`. ", - "help_text": "Type: `file`, default: `$id.$key.raw_h5.h5`, example: `raw_feature_bc_matrix.h5`. ", - "default": "$id.$key.raw_h5.h5" - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/mapping/cellranger_multi/.config.vsh.yaml b/target/nextflow/mapping/cellranger_multi/.config.vsh.yaml deleted file mode 100644 index 7e9ca75621a..00000000000 --- a/target/nextflow/mapping/cellranger_multi/.config.vsh.yaml +++ /dev/null @@ -1,423 +0,0 @@ -functionality: - name: "cellranger_multi" - namespace: "mapping" - version: "0.12.3" - authors: - - name: "Angela Oliveira Pisco" - roles: - - "author" - info: - role: "Contributor" - links: - github: "aopisco" - orcid: "0000-0003-0142-2355" - linkedin: "aopisco" - organizations: - - name: "Insitro" - href: "https://insitro.com" - role: "Director of Computational Biology" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - - name: "Robrecht Cannoodt" - roles: - - "author" - - "maintainer" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - - name: "Dries De Maeyer" - roles: - - "author" - info: - role: "Core Team Member" - links: - email: "ddemaeyer@gmail.com" - github: "ddemaeyer" - linkedin: "dries-de-maeyer-b46a814" - organizations: - - name: "Janssen Pharmaceuticals" - href: "https://www.janssen.com" - role: "Principal Scientist" - argument_groups: - - name: "Input files" - arguments: - - type: "file" - name: "--input" - description: "The FASTQ files to be analyzed. FASTQ files should conform to\ - \ the naming conventions of bcl2fastq and mkfastq:\n`[Sample Name]_S[Sample\ - \ Index]_L00[Lane Number]_[Read Type]_001.fastq.gz`\n" - info: null - example: - - "mysample_S1_L001_R1_001.fastq.gz" - - "mysample_S1_L001_R2_001.fastq.gz" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "file" - name: "--gex_reference" - description: "Genome refence index built by Cell Ranger mkref." - info: null - example: - - "reference_genome.tar.gz" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--vdj_reference" - description: "VDJ refence index built by Cell Ranger mkref." - info: null - example: - - "reference_vdj.tar.gz" - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--vdj_inner_enrichment_primers" - description: "V(D)J Immune Profiling libraries: if inner enrichment primers\ - \ other than those provided \nin the 10x Genomics kits are used, they need\ - \ to be specified here as a\ntext file with one primer per line.\n" - info: null - example: - - "enrichment_primers.txt" - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--feature_reference" - description: "Path to the Feature reference CSV file, declaring Feature Barcode\ - \ constructs and associated barcodes. Required only for Antibody Capture or\ - \ CRISPR Guide Capture libraries. See https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/feature-bc-analysis#feature-ref\ - \ for more information." - info: null - example: - - "feature_reference.csv" - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Library arguments" - arguments: - - type: "string" - name: "--library_id" - description: "The Illumina sample name to analyze. This must exactly match the\ - \ 'Sample Name' part of the FASTQ files specified in the `--input` argument." - info: null - example: - - "mysample1" - required: true - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--library_type" - description: "The underlying feature type of the library.\nPossible values:\ - \ \"Gene Expression\", \"VDJ\", \"VDJ-T\", \"VDJ-B\", \"Antibody Capture\"\ - , \"CRISPR Guide Capture\", \"Multiplexing Capture\"\n" - info: null - example: - - "Gene Expression" - required: true - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--library_subsample" - description: "Optional. The rate at which reads from the provided FASTQ files\ - \ are sampled. Must be strictly greater than 0 and less than or equal to 1." - info: null - example: - - "0.5" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--library_lanes" - description: "Lanes associated with this sample. Defaults to using all lanes." - info: null - example: - - "1-4" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - name: "Gene expression arguments" - description: "Arguments relevant to the analysis of gene expression data." - arguments: - - type: "integer" - name: "--gex_expect_cells" - description: "Expected number of recovered cells, used as input to cell calling\ - \ algorithm." - info: null - example: - - 3000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--gex_chemistry" - description: "Assay configuration.\n- auto: autodetect mode\n- threeprime: Single\ - \ Cell 3'\n- fiveprime: Single Cell 5'\n- SC3Pv1: Single Cell 3' v1\n- SC3Pv2:\ - \ Single Cell 3' v2\n- SC3Pv3: Single Cell 3' v3\n- SC3Pv3LT: Single Cell\ - \ 3' v3 LT\n- SC3Pv3HT: Single Cell 3' v3 HT\n- SC5P-PE: Single Cell 5' paired-end\n\ - - SC5P-R2: Single Cell 5' R2-only\n- SC-FB: Single Cell Antibody-only 3' v2\ - \ or 5'\nSee https://kb.10xgenomics.com/hc/en-us/articles/115003764132-How-does-Cell-Ranger-auto-detect-chemistry-\ - \ for more information.\n" - info: null - default: - - "auto" - required: false - choices: - - "auto" - - "threeprime" - - "fiveprime" - - "SC3Pv1" - - "SC3Pv2" - - "SC3Pv3" - - "SC3Pv3LT" - - "SC3Pv3HT" - - "SC5P-PE" - - "SC5P-R2" - - "SC-FB" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean" - name: "--gex_secondary_analysis" - description: "Whether or not to run the secondary analysis e.g. clustering." - info: null - default: - - false - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean" - name: "--gex_generate_bam" - description: "Whether to generate a BAM file." - info: null - default: - - false - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean" - name: "--gex_include_introns" - description: "Include intronic reads in count (default=true unless --target-panel\ - \ is specified in which case default=false)" - info: null - default: - - true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Cell multiplexing parameters" - description: "Arguments related to cell multiplexing." - arguments: - - type: "string" - name: "--cell_multiplex_sample_id" - description: "A name to identify a multiplexed sample. Must be alphanumeric\ - \ with hyphens and/or underscores, and less than 64 characters. Required for\ - \ Cell Multiplexing libraries." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--cell_multiplex_oligo_ids" - description: "The Cell Multiplexing oligo IDs used to multiplex this sample.\ - \ If multiple CMOs were used for a sample, separate IDs with a pipe (e.g.,\ - \ CMO301|CMO302). Required for Cell Multiplexing libraries." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--cell_multiplex_description" - description: "A description for the sample." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Outputs" - arguments: - - type: "file" - name: "--output" - description: "The folder to store the alignment results." - info: null - example: - - "/path/to/output" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Executor arguments" - arguments: - - type: "boolean_true" - name: "--dryrun" - description: "If true, the output directory will only contain the CWL input\ - \ files, but the pipeline itself will not be executed." - info: null - direction: "input" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Align fastq files using Cell Ranger multi." - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/10x_5k_anticmv/raw/" - dest: "10x_5k_anticmv/raw/" - - type: "file" - path: "resources_test/10x_5k_lung_crispr/raw/" - dest: "10x_5k_lung_crispr/raw/" - - type: "file" - path: "resources_test/reference_gencodev41_chr1" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "ghcr.io/data-intuitive/cellranger:7.0" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "docker" - run: - - "DEBIAN_FRONTEND=noninteractive apt update && apt upgrade -y && rm -rf /var/lib/apt/lists/*" - - type: "python" - user: false - packages: - - "pandas" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "veryhighmem" - - "highcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/mapping/cellranger_multi/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/cellranger_multi" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/cellranger_multi/cellranger_multi" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/mapping/cellranger_multi/main.nf b/target/nextflow/mapping/cellranger_multi/main.nf deleted file mode 100644 index 3ba8bbecac8..00000000000 --- a/target/nextflow/mapping/cellranger_multi/main.nf +++ /dev/null @@ -1,3132 +0,0 @@ -// cellranger_multi 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Angela Oliveira Pisco (author) -// * Robrecht Cannoodt (author, maintainer) -// * Dries De Maeyer (author) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "cellranger_multi", - "namespace" : "mapping", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Angela Oliveira Pisco", - "roles" : [ - "author" - ], - "info" : { - "role" : "Contributor", - "links" : { - "github" : "aopisco", - "orcid" : "0000-0003-0142-2355", - "linkedin" : "aopisco" - }, - "organizations" : [ - { - "name" : "Insitro", - "href" : "https://insitro.com", - "role" : "Director of Computational Biology" - }, - { - "name" : "Open Problems", - "href" : "https://openproblems.bio", - "role" : "Core Member" - } - ] - } - }, - { - "name" : "Robrecht Cannoodt", - "roles" : [ - "author", - "maintainer" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "robrecht@data-intuitive.com", - "github" : "rcannood", - "orcid" : "0000-0003-3641-729X", - "linkedin" : "robrechtcannoodt" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Science Engineer" - }, - { - "name" : "Open Problems", - "href" : "https://openproblems.bio", - "role" : "Core Member" - } - ] - } - }, - { - "name" : "Dries De Maeyer", - "roles" : [ - "author" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "ddemaeyer@gmail.com", - "github" : "ddemaeyer", - "linkedin" : "dries-de-maeyer-b46a814" - }, - "organizations" : [ - { - "name" : "Janssen Pharmaceuticals", - "href" : "https://www.janssen.com", - "role" : "Principal Scientist" - } - ] - } - } - ], - "argument_groups" : [ - { - "name" : "Input files", - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "description" : "The FASTQ files to be analyzed. FASTQ files should conform to the naming conventions of bcl2fastq and mkfastq:\n`[Sample Name]_S[Sample Index]_L00[Lane Number]_[Read Type]_001.fastq.gz`\n", - "example" : [ - "mysample_S1_L001_R1_001.fastq.gz", - "mysample_S1_L001_R2_001.fastq.gz" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--gex_reference", - "description" : "Genome refence index built by Cell Ranger mkref.", - "example" : [ - "reference_genome.tar.gz" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--vdj_reference", - "description" : "VDJ refence index built by Cell Ranger mkref.", - "example" : [ - "reference_vdj.tar.gz" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--vdj_inner_enrichment_primers", - "description" : "V(D)J Immune Profiling libraries: if inner enrichment primers other than those provided \nin the 10x Genomics kits are used, they need to be specified here as a\ntext file with one primer per line.\n", - "example" : [ - "enrichment_primers.txt" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--feature_reference", - "description" : "Path to the Feature reference CSV file, declaring Feature Barcode constructs and associated barcodes. Required only for Antibody Capture or CRISPR Guide Capture libraries. See https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/feature-bc-analysis#feature-ref for more information.", - "example" : [ - "feature_reference.csv" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Library arguments", - "arguments" : [ - { - "type" : "string", - "name" : "--library_id", - "description" : "The Illumina sample name to analyze. This must exactly match the 'Sample Name' part of the FASTQ files specified in the `--input` argument.", - "example" : [ - "mysample1" - ], - "required" : true, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--library_type", - "description" : "The underlying feature type of the library.\nPossible values: \\"Gene Expression\\", \\"VDJ\\", \\"VDJ-T\\", \\"VDJ-B\\", \\"Antibody Capture\\", \\"CRISPR Guide Capture\\", \\"Multiplexing Capture\\"\n", - "example" : [ - "Gene Expression" - ], - "required" : true, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--library_subsample", - "description" : "Optional. The rate at which reads from the provided FASTQ files are sampled. Must be strictly greater than 0 and less than or equal to 1.", - "example" : [ - "0.5" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--library_lanes", - "description" : "Lanes associated with this sample. Defaults to using all lanes.", - "example" : [ - "1-4" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - } - ] - }, - { - "name" : "Gene expression arguments", - "description" : "Arguments relevant to the analysis of gene expression data.", - "arguments" : [ - { - "type" : "integer", - "name" : "--gex_expect_cells", - "description" : "Expected number of recovered cells, used as input to cell calling algorithm.", - "example" : [ - 3000 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--gex_chemistry", - "description" : "Assay configuration.\n- auto: autodetect mode\n- threeprime: Single Cell 3'\n- fiveprime: Single Cell 5'\n- SC3Pv1: Single Cell 3' v1\n- SC3Pv2: Single Cell 3' v2\n- SC3Pv3: Single Cell 3' v3\n- SC3Pv3LT: Single Cell 3' v3 LT\n- SC3Pv3HT: Single Cell 3' v3 HT\n- SC5P-PE: Single Cell 5' paired-end\n- SC5P-R2: Single Cell 5' R2-only\n- SC-FB: Single Cell Antibody-only 3' v2 or 5'\nSee https://kb.10xgenomics.com/hc/en-us/articles/115003764132-How-does-Cell-Ranger-auto-detect-chemistry- for more information.\n", - "default" : [ - "auto" - ], - "required" : false, - "choices" : [ - "auto", - "threeprime", - "fiveprime", - "SC3Pv1", - "SC3Pv2", - "SC3Pv3", - "SC3Pv3LT", - "SC3Pv3HT", - "SC5P-PE", - "SC5P-R2", - "SC-FB" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "boolean", - "name" : "--gex_secondary_analysis", - "description" : "Whether or not to run the secondary analysis e.g. clustering.", - "default" : [ - false - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "boolean", - "name" : "--gex_generate_bam", - "description" : "Whether to generate a BAM file.", - "default" : [ - false - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "boolean", - "name" : "--gex_include_introns", - "description" : "Include intronic reads in count (default=true unless --target-panel is specified in which case default=false)", - "default" : [ - true - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Cell multiplexing parameters", - "description" : "Arguments related to cell multiplexing.", - "arguments" : [ - { - "type" : "string", - "name" : "--cell_multiplex_sample_id", - "description" : "A name to identify a multiplexed sample. Must be alphanumeric with hyphens and/or underscores, and less than 64 characters. Required for Cell Multiplexing libraries.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--cell_multiplex_oligo_ids", - "description" : "The Cell Multiplexing oligo IDs used to multiplex this sample. If multiple CMOs were used for a sample, separate IDs with a pipe (e.g., CMO301|CMO302). Required for Cell Multiplexing libraries.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--cell_multiplex_description", - "description" : "A description for the sample.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Outputs", - "arguments" : [ - { - "type" : "file", - "name" : "--output", - "description" : "The folder to store the alignment results.", - "example" : [ - "/path/to/output" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Executor arguments", - "arguments" : [ - { - "type" : "boolean_true", - "name" : "--dryrun", - "description" : "If true, the output directory will only contain the CWL input files, but the pipeline itself will not be executed.", - "direction" : "input", - "dest" : "par" - } - ] - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/cellranger_multi/" - }, - { - "type" : "file", - "path" : "src/utils/setup_logger.py", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "description" : "Align fastq files using Cell Ranger multi.", - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/cellranger_multi/" - }, - { - "type" : "file", - "path" : "resources_test/10x_5k_anticmv/raw/", - "dest" : "10x_5k_anticmv/raw/", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - }, - { - "type" : "file", - "path" : "resources_test/10x_5k_lung_crispr/raw/", - "dest" : "10x_5k_lung_crispr/raw/", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - }, - { - "type" : "file", - "path" : "resources_test/reference_gencodev41_chr1", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "ghcr.io/data-intuitive/cellranger:7.0", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "docker", - "run" : [ - "DEBIAN_FRONTEND=noninteractive apt update && apt upgrade -y && rm -rf /var/lib/apt/lists/*" - ] - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "pandas" - ], - "upgrade" : true - } - ], - "test_setup" : [ - { - "type" : "python", - "user" : false, - "packages" : [ - "viashpy==0.5.0" - ], - "upgrade" : true - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "veryhighmem", - "highcpu" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/mapping/cellranger_multi/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/cellranger_multi", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -from __future__ import annotations - -import sys -import re -import subprocess -import tempfile -import pandas as pd -from typing import Optional, Any, Union -import tarfile -from pathlib import Path -import shutil -from itertools import chain - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'gex_reference': $( if [ ! -z ${VIASH_PAR_GEX_REFERENCE+x} ]; then echo "r'${VIASH_PAR_GEX_REFERENCE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'vdj_reference': $( if [ ! -z ${VIASH_PAR_VDJ_REFERENCE+x} ]; then echo "r'${VIASH_PAR_VDJ_REFERENCE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'vdj_inner_enrichment_primers': $( if [ ! -z ${VIASH_PAR_VDJ_INNER_ENRICHMENT_PRIMERS+x} ]; then echo "r'${VIASH_PAR_VDJ_INNER_ENRICHMENT_PRIMERS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'feature_reference': $( if [ ! -z ${VIASH_PAR_FEATURE_REFERENCE+x} ]; then echo "r'${VIASH_PAR_FEATURE_REFERENCE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'library_id': $( if [ ! -z ${VIASH_PAR_LIBRARY_ID+x} ]; then echo "r'${VIASH_PAR_LIBRARY_ID//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'library_type': $( if [ ! -z ${VIASH_PAR_LIBRARY_TYPE+x} ]; then echo "r'${VIASH_PAR_LIBRARY_TYPE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'library_subsample': $( if [ ! -z ${VIASH_PAR_LIBRARY_SUBSAMPLE+x} ]; then echo "r'${VIASH_PAR_LIBRARY_SUBSAMPLE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'library_lanes': $( if [ ! -z ${VIASH_PAR_LIBRARY_LANES+x} ]; then echo "r'${VIASH_PAR_LIBRARY_LANES//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'gex_expect_cells': $( if [ ! -z ${VIASH_PAR_GEX_EXPECT_CELLS+x} ]; then echo "int(r'${VIASH_PAR_GEX_EXPECT_CELLS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'gex_chemistry': $( if [ ! -z ${VIASH_PAR_GEX_CHEMISTRY+x} ]; then echo "r'${VIASH_PAR_GEX_CHEMISTRY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'gex_secondary_analysis': $( if [ ! -z ${VIASH_PAR_GEX_SECONDARY_ANALYSIS+x} ]; then echo "r'${VIASH_PAR_GEX_SECONDARY_ANALYSIS//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), - 'gex_generate_bam': $( if [ ! -z ${VIASH_PAR_GEX_GENERATE_BAM+x} ]; then echo "r'${VIASH_PAR_GEX_GENERATE_BAM//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), - 'gex_include_introns': $( if [ ! -z ${VIASH_PAR_GEX_INCLUDE_INTRONS+x} ]; then echo "r'${VIASH_PAR_GEX_INCLUDE_INTRONS//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), - 'cell_multiplex_sample_id': $( if [ ! -z ${VIASH_PAR_CELL_MULTIPLEX_SAMPLE_ID+x} ]; then echo "r'${VIASH_PAR_CELL_MULTIPLEX_SAMPLE_ID//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cell_multiplex_oligo_ids': $( if [ ! -z ${VIASH_PAR_CELL_MULTIPLEX_OLIGO_IDS+x} ]; then echo "r'${VIASH_PAR_CELL_MULTIPLEX_OLIGO_IDS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cell_multiplex_description': $( if [ ! -z ${VIASH_PAR_CELL_MULTIPLEX_DESCRIPTION+x} ]; then echo "r'${VIASH_PAR_CELL_MULTIPLEX_DESCRIPTION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'dryrun': $( if [ ! -z ${VIASH_PAR_DRYRUN+x} ]; then echo "r'${VIASH_PAR_DRYRUN//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -## VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -fastq_regex = r'([A-Za-z0-9\\\\-_\\\\.]+)_S(\\\\d+)_L(\\\\d+)_[RI](\\\\d+)_(\\\\d+)\\\\.fastq\\\\.gz' -# assert re.match(fastq_regex, "5k_human_GEX_1_subset_S1_L001_R1_001.fastq.gz") is not None - -# Invert some parameters. Keep the original ones in the config for compatibility -inverted_params = { - "gex_generate_no_bam": "gex_generate_bam", - "gex_no_secondary_analysis": "gex_secondary_analysis" -} -for inverted_param, param in inverted_params.items(): - par[inverted_param] = not par[param] if par[param] is not None else None - del par[param] - -GEX_CONFIG_KEYS = { - "gex_reference": "reference", - "gex_expect_cells": "expect-cells", - "gex_chemistry": "chemistry", - "gex_no_secondary_analysis": "no-secondary", - "gex_generate_no_bam": "no-bam", - "gex_include_introns": "include-introns" -} -FEATURE_CONFIG_KEYS = {"feature_reference": "reference"} -VDJ_CONFIG_KEYS = {"vdj_reference": "reference", - "vdj_inner_enrichment_primers": "inner-enrichment-primers"} - -REFERENCE_SECTIONS = { - "gene-expression": (GEX_CONFIG_KEYS, "index"), - "feature": (FEATURE_CONFIG_KEYS, "index"), - "vdj": (VDJ_CONFIG_KEYS, "index") -} - -LIBRARY_CONFIG_KEYS = {'library_id': 'fastq_id', - 'library_type': 'feature_types', - 'library_subsample': 'subsample_rate', - 'library_lanes': 'lanes'} -SAMPLE_PARAMS_CONFIG_KEYS = {'cell_multiplex_sample_id': 'sample_id', - 'cell_multiplex_oligo_ids': 'cmo_ids', - 'cell_multiplex_description': 'description'} - - -# These are derived from the dictionaries above -REFERENCES = tuple(reference_param for reference_param, cellranger_param - in chain(GEX_CONFIG_KEYS.items(), FEATURE_CONFIG_KEYS.items(), VDJ_CONFIG_KEYS.items()) - if cellranger_param == "reference") -LIBRARY_PARAMS = tuple(LIBRARY_CONFIG_KEYS.keys()) -SAMPLE_PARAMS = tuple(SAMPLE_PARAMS_CONFIG_KEYS.keys()) - - -def lengths_gt1(dic: dict[str, Optional[list[Any]]]) -> dict[str, int]: - return {key: len(li) for key, li in dic.items() - if li is not None and len(li) > 1} - -def strip_margin(text: str) -> str: - return re.sub('(\\\\n?)[ \\\\t]*\\\\|', '\\\\\\\\1', text) - - -def subset_dict(dictionary: dict[str, str], - keys: Union[dict[str, str], list[str]]) -> dict[str, str]: - if isinstance(keys, (list, tuple)): - keys = {key: key for key in keys} - return {dest_key: dictionary[orig_key] - for orig_key, dest_key in keys.items() - if dictionary[orig_key] is not None} - -def check_subset_dict_equal_length(group_name: str, - dictionary: dict[str, list[str]]) -> None: - lens = lengths_gt1(dictionary) - assert len(set(lens.values())) <= 1, f"The number of values passed to {group_name} "\\\\ - f"arguments must be 0, 1 or all the same. Offenders: {lens}" - -def process_params(par: dict[str, Any]) -> str: - # if par_input is a directory, look for fastq files - par["input"] = [Path(fastq) for fastq in par["input"]] - if len(par["input"]) == 1 and par["input"][0].is_dir(): - logger.info("Detected '--input' as a directory, " - "traversing to see if we can detect any FASTQ files.") - par["input"] = [input_path for input_path in par["input"][0].rglob('*') - if re.match(fastq_regex, input_path.name) ] - - # check input fastq files - for input_path in par["input"]: - assert re.match(fastq_regex, input_path.name) is not None, \\\\ - f"File name of --input '{input_path}' should match regex {fastq_regex}." - - # check lengths of libraries metadata - library_dict = subset_dict(par, LIBRARY_PARAMS) - check_subset_dict_equal_length("Library", library_dict) - # storing for later use - par["libraries"] = library_dict - - cmo_dict = subset_dict(par, SAMPLE_PARAMS) - check_subset_dict_equal_length("Cell multiplexing", cmo_dict) - # storing for later use - par["cmo"] = cmo_dict - - # use absolute paths - par["input"] = [input_path.resolve() for input_path in par["input"]] - for file_path in REFERENCES + ('output', ): - if par[file_path]: - logger.info('Making path %s absolute', par[file_path]) - par[file_path] = Path(par[file_path]).resolve() - return par - - -def generate_csv_category(name: str, args: dict[str, str], orient: str) -> list[str]: - assert orient in ("index", "columns") - if not args: - return [] - title = [ f'[{name}]' ] - # Which index to include in csv section is based on orientation - to_csv_args = {"index": (orient=="index"), "header": (orient=="columns")} - values = [pd.DataFrame.from_dict(args, orient=orient).to_csv(**to_csv_args).strip()] - return title + values + [""] - - -def generate_config(par: dict[str, Any], fastq_dir: str) -> str: - content_list = [] - par["fastqs"] = fastq_dir - libraries = dict(LIBRARY_CONFIG_KEYS, **{"fastqs": "fastqs"}) - #TODO: use the union (|) operator when python is updated to 3.9 - all_sections = dict(REFERENCE_SECTIONS, - **{"libraries": (libraries, "columns")}, - **{"samples": (SAMPLE_PARAMS_CONFIG_KEYS, "columns")}) - for section_name, (section_params, orientation) in all_sections.items(): - reference_pars = subset_dict(par, section_params) - content_list += generate_csv_category(section_name, reference_pars, orient=orientation) - - return '\\\\n'.join(content_list) - -def main(par: dict[str, Any], meta: dict[str, Any]): - logger.info(" Processing params") - par = process_params(par) - logger.info(par) - - # TODO: throw error or else Cell Ranger will - with tempfile.TemporaryDirectory(prefix="cellranger_multi-", - dir=meta["temp_dir"]) as temp_dir: - temp_dir_path = Path(temp_dir) - for reference_par_name in REFERENCES: - reference = par[reference_par_name] - logger.info('Looking at %s to check if it needs decompressing', reference) - if reference and Path(reference).is_file() and tarfile.is_tarfile(reference): - extaction_dir_name = Path(reference.stem).stem # Remove two extensions (if they exist) - unpacked_directory = temp_dir_path / extaction_dir_name - logger.info('Extracting %s to %s', reference, unpacked_directory) - - with tarfile.open(reference, 'r') as open_tar: - members = open_tar.getmembers() - root_dirs = [member for member in members if member.isdir() - and member.name != '.' and '/' not in member.name] - # if there is only one root_dir (and there are files in that directory) - # strip that directory name from the destination folder - if len(root_dirs) == 1: - for mem in members: - mem.path = Path(*Path(mem.path).parts[1:]) - members_to_move = [mem for mem in members if mem.path != Path('.')] - open_tar.extractall(unpacked_directory, members=members_to_move) - par[reference_par_name] = unpacked_directory - - # Creating symlinks of fastq files to tempdir - input_symlinks_dir = temp_dir_path / "input_symlinks" - input_symlinks_dir.mkdir() - for fastq in par['input']: - destination = input_symlinks_dir / fastq.name - destination.symlink_to(fastq) - - logger.info(" Creating config file") - config_content = generate_config(par, input_symlinks_dir) - - logger.info(" Creating Cell Ranger argument") - temp_id="run" - proc_pars=["--disable-ui", "--id", temp_id] - - command_line_parameters = { - "--localcores": meta['cpus'], - "--localmem": int(meta['memory_gb']) - 2 if meta['memory_gb'] else None, - } - for param, param_value in command_line_parameters.items(): - if param_value: - proc_pars.append(f"{param}={param_value}") - - ## Run pipeline - if par["dryrun"]: - par['output'].mkdir(parents=True, exist_ok=True) - - # write config file - config_file = par['output'] / "config.csv" - with open(config_file, "w") as f: - f.write(config_content) - proc_pars.append(f"--csv={config_file}") - - # display command that would've been used - cmd = ["cellranger multi"] + proc_pars + ["--csv=config.csv"] - logger.info("> " + ' '.join(cmd)) - else: - # write config file to execution directory - config_file = temp_dir_path / "config.csv" - with open(config_file, "w") as f: - f.write(config_content) - proc_pars.append(f"--csv={config_file}") - - # Already copy config file to output directory - par['output'].mkdir(parents=True, exist_ok=True) - with (par['output'] / "config.csv").open('w') as open_config: - open_config.write(config_content) - - # run process - cmd = ["cellranger", "multi"] + proc_pars - logger.info("> " + ' '.join(cmd)) - try: - process_output = subprocess.run( - cmd, - cwd=temp_dir, - check=True, - capture_output=True - ) - except subprocess.CalledProcessError as e: - print(e.output.decode('utf-8'), flush=True) - raise e - else: - # Write stdout output to output folder - with (par["output"] / "cellranger_multi.log").open('w') as open_log: - open_log.write(process_output.stdout.decode('utf-8')) - print(process_output.stdout.decode('utf-8'), flush=True) - - # look for output dir file - tmp_output_dir = temp_dir_path / temp_id / "outs" - expected_files = { - Path("multi"): Path.is_dir, - Path("per_sample_outs"): Path.is_dir, - Path("config.csv"): Path.is_file, - } - for file_path, type_func in expected_files.items(): - output_path = tmp_output_dir / file_path - if not type_func(output_path): - raise ValueError(f"Could not find expected '{output_path}'") - - for output_path in tmp_output_dir.rglob('*'): - if output_path.name != "config.csv": # Already created - shutil.move(str(output_path), par['output']) - -if __name__ == "__main__": - main(par, meta) -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/mapping_cellranger_multi", - "tag" : "0.12.0" - }, - "label" : [ - "veryhighmem", - "highcpu" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/mapping/cellranger_multi/nextflow.config b/target/nextflow/mapping/cellranger_multi/nextflow.config deleted file mode 100644 index f0df0196fa6..00000000000 --- a/target/nextflow/mapping/cellranger_multi/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'cellranger_multi' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Align fastq files using Cell Ranger multi.' - author = 'Angela Oliveira Pisco, Robrecht Cannoodt, Dries De Maeyer' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/mapping/cellranger_multi/nextflow_params.yaml b/target/nextflow/mapping/cellranger_multi/nextflow_params.yaml deleted file mode 100644 index b3ef771c035..00000000000 --- a/target/nextflow/mapping/cellranger_multi/nextflow_params.yaml +++ /dev/null @@ -1,34 +0,0 @@ -# Outputs -# output: "$id.$key.output.output" - -# Input files -input: # please fill in - example: ["mysample_S1_L001_R1_001.fastq.gz", "mysample_S1_L001_R2_001.fastq.gz"] -gex_reference: # please fill in - example: "reference_genome.tar.gz" -# vdj_reference: "reference_vdj.tar.gz" -# vdj_inner_enrichment_primers: "enrichment_primers.txt" -# feature_reference: "feature_reference.csv" - -# Library arguments -library_id: # please fill in - example: ["mysample1"] -library_type: # please fill in - example: ["Gene Expression"] -# library_subsample: ["0.5"] -# library_lanes: ["1-4"] - -# Gene expression arguments -# gex_expect_cells: 3000 -gex_chemistry: "auto" -gex_secondary_analysis: false -gex_generate_bam: false -gex_include_introns: true - -# Cell multiplexing parameters -# cell_multiplex_sample_id: "foo" -# cell_multiplex_oligo_ids: "foo" -# cell_multiplex_description: "foo" - -# Executor arguments -dryrun: false - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/mapping/cellranger_multi/nextflow_schema.json b/target/nextflow/mapping/cellranger_multi/nextflow_schema.json deleted file mode 100644 index b18565e7dd3..00000000000 --- a/target/nextflow/mapping/cellranger_multi/nextflow_schema.json +++ /dev/null @@ -1,222 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "cellranger_multi", - "description": "Align fastq files using Cell Ranger multi.", - "type": "object", - "definitions": { - "outputs" : { - "title": "Outputs", - "type": "object", - "description": "No description", - "properties": { - - "output": { - "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.output`, example: `/path/to/output`. The folder to store the alignment results", - "help_text": "Type: `file`, required, default: `$id.$key.output.output`, example: `/path/to/output`. The folder to store the alignment results.", - "default": "$id.$key.output.output" - } - - } - }, - "input files" : { - "title": "Input files", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: List of `file`, required, example: `mysample_S1_L001_R1_001.fastq.gz;mysample_S1_L001_R2_001.fastq.gz`, multiple_sep: `\";\"`. The FASTQ files to be analyzed", - "help_text": "Type: List of `file`, required, example: `mysample_S1_L001_R1_001.fastq.gz;mysample_S1_L001_R2_001.fastq.gz`, multiple_sep: `\";\"`. The FASTQ files to be analyzed. FASTQ files should conform to the naming conventions of bcl2fastq and mkfastq:\n`[Sample Name]_S[Sample Index]_L00[Lane Number]_[Read Type]_001.fastq.gz`\n" - }, - - "gex_reference": { - "type": "string", - "description": "Type: `file`, required, example: `reference_genome.tar.gz`. Genome refence index built by Cell Ranger mkref", - "help_text": "Type: `file`, required, example: `reference_genome.tar.gz`. Genome refence index built by Cell Ranger mkref." - }, - - "vdj_reference": { - "type": "string", - "description": "Type: `file`, example: `reference_vdj.tar.gz`. VDJ refence index built by Cell Ranger mkref", - "help_text": "Type: `file`, example: `reference_vdj.tar.gz`. VDJ refence index built by Cell Ranger mkref." - }, - - "vdj_inner_enrichment_primers": { - "type": "string", - "description": "Type: `file`, example: `enrichment_primers.txt`. V(D)J Immune Profiling libraries: if inner enrichment primers other than those provided \nin the 10x Genomics kits are used, they need to be specified here as a\ntext file with one primer per line", - "help_text": "Type: `file`, example: `enrichment_primers.txt`. V(D)J Immune Profiling libraries: if inner enrichment primers other than those provided \nin the 10x Genomics kits are used, they need to be specified here as a\ntext file with one primer per line.\n" - }, - - "feature_reference": { - "type": "string", - "description": "Type: `file`, example: `feature_reference.csv`. Path to the Feature reference CSV file, declaring Feature Barcode constructs and associated barcodes", - "help_text": "Type: `file`, example: `feature_reference.csv`. Path to the Feature reference CSV file, declaring Feature Barcode constructs and associated barcodes. Required only for Antibody Capture or CRISPR Guide Capture libraries. See https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/feature-bc-analysis#feature-ref for more information." - } - - } - }, - "library arguments" : { - "title": "Library arguments", - "type": "object", - "description": "No description", - "properties": { - - "library_id": { - "type": "string", - "description": "Type: List of `string`, required, example: `mysample1`, multiple_sep: `\";\"`. The Illumina sample name to analyze", - "help_text": "Type: List of `string`, required, example: `mysample1`, multiple_sep: `\";\"`. The Illumina sample name to analyze. This must exactly match the \u0027Sample Name\u0027 part of the FASTQ files specified in the `--input` argument." - }, - - "library_type": { - "type": "string", - "description": "Type: List of `string`, required, example: `Gene Expression`, multiple_sep: `\";\"`. The underlying feature type of the library", - "help_text": "Type: List of `string`, required, example: `Gene Expression`, multiple_sep: `\";\"`. The underlying feature type of the library.\nPossible values: \"Gene Expression\", \"VDJ\", \"VDJ-T\", \"VDJ-B\", \"Antibody Capture\", \"CRISPR Guide Capture\", \"Multiplexing Capture\"\n" - }, - - "library_subsample": { - "type": "string", - "description": "Type: List of `string`, example: `0.5`, multiple_sep: `\";\"`. Optional", - "help_text": "Type: List of `string`, example: `0.5`, multiple_sep: `\";\"`. Optional. The rate at which reads from the provided FASTQ files are sampled. Must be strictly greater than 0 and less than or equal to 1." - }, - - "library_lanes": { - "type": "string", - "description": "Type: List of `string`, example: `1-4`, multiple_sep: `\";\"`. Lanes associated with this sample", - "help_text": "Type: List of `string`, example: `1-4`, multiple_sep: `\";\"`. Lanes associated with this sample. Defaults to using all lanes." - } - - } - }, - "gene expression arguments" : { - "title": "Gene expression arguments", - "type": "object", - "description": "Arguments relevant to the analysis of gene expression data.", - "properties": { - - "gex_expect_cells": { - "type": "integer", - "description": "Type: `integer`, example: `3000`. Expected number of recovered cells, used as input to cell calling algorithm", - "help_text": "Type: `integer`, example: `3000`. Expected number of recovered cells, used as input to cell calling algorithm." - }, - - "gex_chemistry": { - "type": "string", - "description": "Type: `string`, default: `auto`, choices: ``auto`, `threeprime`, `fiveprime`, `SC3Pv1`, `SC3Pv2`, `SC3Pv3`, `SC3Pv3LT`, `SC3Pv3HT`, `SC5P-PE`, `SC5P-R2`, `SC-FB``. Assay configuration", - "help_text": "Type: `string`, default: `auto`, choices: ``auto`, `threeprime`, `fiveprime`, `SC3Pv1`, `SC3Pv2`, `SC3Pv3`, `SC3Pv3LT`, `SC3Pv3HT`, `SC5P-PE`, `SC5P-R2`, `SC-FB``. Assay configuration.\n- auto: autodetect mode\n- threeprime: Single Cell 3\u0027\n- fiveprime: Single Cell 5\u0027\n- SC3Pv1: Single Cell 3\u0027 v1\n- SC3Pv2: Single Cell 3\u0027 v2\n- SC3Pv3: Single Cell 3\u0027 v3\n- SC3Pv3LT: Single Cell 3\u0027 v3 LT\n- SC3Pv3HT: Single Cell 3\u0027 v3 HT\n- SC5P-PE: Single Cell 5\u0027 paired-end\n- SC5P-R2: Single Cell 5\u0027 R2-only\n- SC-FB: Single Cell Antibody-only 3\u0027 v2 or 5\u0027\nSee https://kb.10xgenomics.com/hc/en-us/articles/115003764132-How-does-Cell-Ranger-auto-detect-chemistry- for more information.\n", - "enum": ["auto", "threeprime", "fiveprime", "SC3Pv1", "SC3Pv2", "SC3Pv3", "SC3Pv3LT", "SC3Pv3HT", "SC5P-PE", "SC5P-R2", "SC-FB"] - , - "default": "auto" - }, - - "gex_secondary_analysis": { - "type": "boolean", - "description": "Type: `boolean`, default: `false`. Whether or not to run the secondary analysis e", - "help_text": "Type: `boolean`, default: `false`. Whether or not to run the secondary analysis e.g. clustering.", - "default": "False" - }, - - "gex_generate_bam": { - "type": "boolean", - "description": "Type: `boolean`, default: `false`. Whether to generate a BAM file", - "help_text": "Type: `boolean`, default: `false`. Whether to generate a BAM file.", - "default": "False" - }, - - "gex_include_introns": { - "type": "boolean", - "description": "Type: `boolean`, default: `true`. Include intronic reads in count (default=true unless --target-panel is specified in which case default=false)", - "help_text": "Type: `boolean`, default: `true`. Include intronic reads in count (default=true unless --target-panel is specified in which case default=false)", - "default": "True" - } - - } - }, - "cell multiplexing parameters" : { - "title": "Cell multiplexing parameters", - "type": "object", - "description": "Arguments related to cell multiplexing.", - "properties": { - - "cell_multiplex_sample_id": { - "type": "string", - "description": "Type: `string`. A name to identify a multiplexed sample", - "help_text": "Type: `string`. A name to identify a multiplexed sample. Must be alphanumeric with hyphens and/or underscores, and less than 64 characters. Required for Cell Multiplexing libraries." - }, - - "cell_multiplex_oligo_ids": { - "type": "string", - "description": "Type: `string`. The Cell Multiplexing oligo IDs used to multiplex this sample", - "help_text": "Type: `string`. The Cell Multiplexing oligo IDs used to multiplex this sample. If multiple CMOs were used for a sample, separate IDs with a pipe (e.g., CMO301|CMO302). Required for Cell Multiplexing libraries." - }, - - "cell_multiplex_description": { - "type": "string", - "description": "Type: `string`. A description for the sample", - "help_text": "Type: `string`. A description for the sample." - } - - } - }, - "executor arguments" : { - "title": "Executor arguments", - "type": "object", - "description": "No description", - "properties": { - - "dryrun": { - "type": "boolean", - "description": "Type: `boolean_true`, default: `false`. If true, the output directory will only contain the CWL input files, but the pipeline itself will not be executed", - "help_text": "Type: `boolean_true`, default: `false`. If true, the output directory will only contain the CWL input files, but the pipeline itself will not be executed.", - "default": "False" - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/outputs" - }, - { - "$ref": "#/definitions/input files" - }, - { - "$ref": "#/definitions/library arguments" - }, - { - "$ref": "#/definitions/gene expression arguments" - }, - { - "$ref": "#/definitions/cell multiplexing parameters" - }, - { - "$ref": "#/definitions/executor arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/mapping/cellranger_multi/setup_logger.py b/target/nextflow/mapping/cellranger_multi/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/nextflow/mapping/cellranger_multi/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/nextflow/mapping/htseq_count/.config.vsh.yaml b/target/nextflow/mapping/htseq_count/.config.vsh.yaml deleted file mode 100644 index ced73cd1c29..00000000000 --- a/target/nextflow/mapping/htseq_count/.config.vsh.yaml +++ /dev/null @@ -1,418 +0,0 @@ -functionality: - name: "htseq_count" - namespace: "mapping" - version: "0.12.3" - authors: - - name: "Robrecht Cannoodt" - roles: - - "author" - - "maintainer" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - - name: "Angela Oliveira Pisco" - roles: - - "author" - info: - role: "Contributor" - links: - github: "aopisco" - orcid: "0000-0003-0142-2355" - linkedin: "aopisco" - organizations: - - name: "Insitro" - href: "https://insitro.com" - role: "Director of Computational Biology" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - argument_groups: - - name: "Input" - arguments: - - type: "file" - name: "--input" - description: "Path to the SAM/BAM files containing the mapped reads." - info: - orig_arg: "samfilenames" - example: - - "mysample1.BAM" - - "mysample2.BAM" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "file" - name: "--reference" - description: "Path to the GTF file containing the features." - info: - orig_arg: "featurefilename" - example: - - "reference.gtf" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Output" - arguments: - - type: "file" - name: "--output" - description: "Filename to output the counts to." - info: - orig_arg: "--counts_output" - example: - - "htseq-count.tsv" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_delimiter" - description: "Column delimiter in output." - info: - orig_arg: "--delimiter" - example: - - "\t" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output_sam" - description: "Write out all SAM alignment records into SAM/BAM files (one per\ - \ input file needed), \nannotating each line with its feature assignment (as\ - \ an optional field with tag 'XF'). \nSee the -p option to use BAM instead\ - \ of SAM.\n" - info: - orig_arg: "--samout" - example: - - "mysample1_out.BAM" - - "mysample2_out.BAM" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--output_sam_format" - description: "Format to use with the --output_sam argument." - info: - orig_arg: "--samout-format" - required: false - choices: - - "sam" - - "bam" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Arguments" - arguments: - - type: "string" - name: "--order" - alternatives: - - "-r" - description: "Sorting order of . Paired-end sequencing data\ - \ must be sorted either by position or\nby read name, and the sorting order\ - \ must be specified. Ignored for single-end data.\n" - info: - orig_arg: "--order" - default: - - "name" - required: false - choices: - - "pos" - - "name" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--stranded" - alternatives: - - "-s" - description: "Whether the data is from a strand-specific assay. 'reverse' means\ - \ 'yes' with reversed strand interpretation." - info: - orig_arg: "--stranded" - default: - - "yes" - required: false - choices: - - "yes" - - "no" - - "reverse" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--minimum_alignment_quality" - alternatives: - - "-a" - - "--minaqual" - description: "Skip all reads with MAPQ alignment quality lower than the given\ - \ minimum value. \nMAPQ is the 5th column of a SAM/BAM file and its usage\ - \ depends on the software \nused to map the reads.\n" - info: - orig_arg: "--minaqual" - default: - - 10 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--type" - alternatives: - - "-t" - description: "Feature type (3rd column in GTF file) to be used, all features\ - \ of other type are ignored (default, suitable for Ensembl GTF files: exon)" - info: - orig_arg: "--type" - example: - - "exon" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--id_attribute" - alternatives: - - "-i" - description: "GTF attribute to be used as feature ID (default, suitable for\ - \ Ensembl GTF files: gene_id).\nAll feature of the right type (see -t option)\ - \ within the same GTF attribute will be added\ntogether. The typical way of\ - \ using this option is to count all exonic reads from each gene\nand add the\ - \ exons but other uses are possible as well. You can call this option multiple\n\ - times: in that case, the combination of all attributes separated by colons\ - \ (:) will be used\nas a unique identifier, e.g. for exons you might use -i\ - \ gene_id -i exon_number.\n" - info: - orig_arg: "--idattr" - example: - - "gene_id" - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--additional_attributes" - description: "Additional feature attributes (suitable for Ensembl GTF files:\ - \ gene_name). Use multiple times\nfor more than one additional attribute.\ - \ These attributes are only used as annotations in the\noutput, while the\ - \ determination of how the counts are added together is done based on option\ - \ -i.\n" - info: - orig_arg: "--additional-attr" - example: - - "gene_name" - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--add_chromosome_info" - description: "Store information about the chromosome of each feature as an additional\ - \ attribute\n(e.g. colunm in the TSV output file).\n" - info: - orig_arg: "--add-chromosome-info" - direction: "input" - dest: "par" - - type: "string" - name: "--mode" - alternatives: - - "-m" - description: "Mode to handle reads overlapping more than one feature." - info: - orig_arg: "--mode" - default: - - "union" - required: false - choices: - - "union" - - "intersection-strict" - - "intersection-nonempty" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--non_unique" - description: "Whether and how to score reads that are not uniquely aligned or\ - \ ambiguously assigned to features." - info: - orig_arg: "--nonunique" - default: - - "none" - required: false - choices: - - "none" - - "all" - - "fraction" - - "random" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--secondary_alignments" - description: "Whether to score secondary alignments (0x100 flag)." - info: - orig_arg: "--secondary-alignments" - required: false - choices: - - "score" - - "ignore" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--supplementary_alignments" - description: "Whether to score supplementary alignments (0x800 flag)." - info: - orig_arg: "--supplementary-alignments" - required: false - choices: - - "score" - - "ignore" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--counts_output_sparse" - description: "Store the counts as a sparse matrix (mtx, h5ad, loom)." - info: - orig_arg: "--counts-output-sparse" - direction: "input" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - description: "Quantify gene expression for subsequent testing for differential expression.\n\ - \nThis script takes one or more alignment files in SAM/BAM format and a feature\ - \ file in GFF format and calculates for each feature the number of reads mapping\ - \ to it. \n\nSee http://htseq.readthedocs.io/en/master/count.html for details.\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/cellranger_tiny_fastq" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "HTSeq" - - "pyyaml" - - "scipy" - - "pandas~=2.0.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highmem" - - "highcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/mapping/htseq_count/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/htseq_count" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/htseq_count/htseq_count" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/mapping/htseq_count/main.nf b/target/nextflow/mapping/htseq_count/main.nf deleted file mode 100644 index 6877e869652..00000000000 --- a/target/nextflow/mapping/htseq_count/main.nf +++ /dev/null @@ -1,2978 +0,0 @@ -// htseq_count 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Robrecht Cannoodt (author, maintainer) -// * Angela Oliveira Pisco (author) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "htseq_count", - "namespace" : "mapping", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Robrecht Cannoodt", - "roles" : [ - "author", - "maintainer" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "robrecht@data-intuitive.com", - "github" : "rcannood", - "orcid" : "0000-0003-3641-729X", - "linkedin" : "robrechtcannoodt" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Science Engineer" - }, - { - "name" : "Open Problems", - "href" : "https://openproblems.bio", - "role" : "Core Member" - } - ] - } - }, - { - "name" : "Angela Oliveira Pisco", - "roles" : [ - "author" - ], - "info" : { - "role" : "Contributor", - "links" : { - "github" : "aopisco", - "orcid" : "0000-0003-0142-2355", - "linkedin" : "aopisco" - }, - "organizations" : [ - { - "name" : "Insitro", - "href" : "https://insitro.com", - "role" : "Director of Computational Biology" - }, - { - "name" : "Open Problems", - "href" : "https://openproblems.bio", - "role" : "Core Member" - } - ] - } - } - ], - "argument_groups" : [ - { - "name" : "Input", - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "description" : "Path to the SAM/BAM files containing the mapped reads.", - "info" : { - "orig_arg" : "samfilenames" - }, - "example" : [ - "mysample1.BAM", - "mysample2.BAM" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--reference", - "description" : "Path to the GTF file containing the features.", - "info" : { - "orig_arg" : "featurefilename" - }, - "example" : [ - "reference.gtf" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Output", - "arguments" : [ - { - "type" : "file", - "name" : "--output", - "description" : "Filename to output the counts to.", - "info" : { - "orig_arg" : "--counts_output" - }, - "example" : [ - "htseq-count.tsv" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_delimiter", - "description" : "Column delimiter in output.", - "info" : { - "orig_arg" : "--delimiter" - }, - "example" : [ - "\t" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--output_sam", - "description" : "Write out all SAM alignment records into SAM/BAM files (one per input file needed), \nannotating each line with its feature assignment (as an optional field with tag 'XF'). \nSee the -p option to use BAM instead of SAM.\n", - "info" : { - "orig_arg" : "--samout" - }, - "example" : [ - "mysample1_out.BAM", - "mysample2_out.BAM" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "output", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_sam_format", - "description" : "Format to use with the --output_sam argument.", - "info" : { - "orig_arg" : "--samout-format" - }, - "required" : false, - "choices" : [ - "sam", - "bam" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Arguments", - "arguments" : [ - { - "type" : "string", - "name" : "--order", - "alternatives" : [ - "-r" - ], - "description" : "Sorting order of . Paired-end sequencing data must be sorted either by position or\nby read name, and the sorting order must be specified. Ignored for single-end data.\n", - "info" : { - "orig_arg" : "--order" - }, - "default" : [ - "name" - ], - "required" : false, - "choices" : [ - "pos", - "name" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--stranded", - "alternatives" : [ - "-s" - ], - "description" : "Whether the data is from a strand-specific assay. 'reverse' means 'yes' with reversed strand interpretation.", - "info" : { - "orig_arg" : "--stranded" - }, - "default" : [ - "yes" - ], - "required" : false, - "choices" : [ - "yes", - "no", - "reverse" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--minimum_alignment_quality", - "alternatives" : [ - "-a", - "--minaqual" - ], - "description" : "Skip all reads with MAPQ alignment quality lower than the given minimum value. \nMAPQ is the 5th column of a SAM/BAM file and its usage depends on the software \nused to map the reads.\n", - "info" : { - "orig_arg" : "--minaqual" - }, - "default" : [ - 10 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--type", - "alternatives" : [ - "-t" - ], - "description" : "Feature type (3rd column in GTF file) to be used, all features of other type are ignored (default, suitable for Ensembl GTF files: exon)", - "info" : { - "orig_arg" : "--type" - }, - "example" : [ - "exon" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--id_attribute", - "alternatives" : [ - "-i" - ], - "description" : "GTF attribute to be used as feature ID (default, suitable for Ensembl GTF files: gene_id).\nAll feature of the right type (see -t option) within the same GTF attribute will be added\ntogether. The typical way of using this option is to count all exonic reads from each gene\nand add the exons but other uses are possible as well. You can call this option multiple\ntimes: in that case, the combination of all attributes separated by colons (:) will be used\nas a unique identifier, e.g. for exons you might use -i gene_id -i exon_number.\n", - "info" : { - "orig_arg" : "--idattr" - }, - "example" : [ - "gene_id" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--additional_attributes", - "description" : "Additional feature attributes (suitable for Ensembl GTF files: gene_name). Use multiple times\nfor more than one additional attribute. These attributes are only used as annotations in the\noutput, while the determination of how the counts are added together is done based on option -i.\n", - "info" : { - "orig_arg" : "--additional-attr" - }, - "example" : [ - "gene_name" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "boolean_true", - "name" : "--add_chromosome_info", - "description" : "Store information about the chromosome of each feature as an additional attribute\n(e.g. colunm in the TSV output file).\n", - "info" : { - "orig_arg" : "--add-chromosome-info" - }, - "direction" : "input", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--mode", - "alternatives" : [ - "-m" - ], - "description" : "Mode to handle reads overlapping more than one feature.", - "info" : { - "orig_arg" : "--mode" - }, - "default" : [ - "union" - ], - "required" : false, - "choices" : [ - "union", - "intersection-strict", - "intersection-nonempty" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--non_unique", - "description" : "Whether and how to score reads that are not uniquely aligned or ambiguously assigned to features.", - "info" : { - "orig_arg" : "--nonunique" - }, - "default" : [ - "none" - ], - "required" : false, - "choices" : [ - "none", - "all", - "fraction", - "random" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--secondary_alignments", - "description" : "Whether to score secondary alignments (0x100 flag).", - "info" : { - "orig_arg" : "--secondary-alignments" - }, - "required" : false, - "choices" : [ - "score", - "ignore" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--supplementary_alignments", - "description" : "Whether to score supplementary alignments (0x800 flag).", - "info" : { - "orig_arg" : "--supplementary-alignments" - }, - "required" : false, - "choices" : [ - "score", - "ignore" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "boolean_true", - "name" : "--counts_output_sparse", - "description" : "Store the counts as a sparse matrix (mtx, h5ad, loom).", - "info" : { - "orig_arg" : "--counts-output-sparse" - }, - "direction" : "input", - "dest" : "par" - } - ] - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/htseq_count/" - } - ], - "description" : "Quantify gene expression for subsequent testing for differential expression.\n\nThis script takes one or more alignment files in SAM/BAM format and a feature file in GFF format and calculates for each feature the number of reads mapping to it. \n\nSee http://htseq.readthedocs.io/en/master/count.html for details.\n", - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/htseq_count/" - }, - { - "type" : "file", - "path" : "resources_test/cellranger_tiny_fastq", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "python:3.10-slim", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "procps" - ], - "interactive" : false - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "HTSeq", - "pyyaml", - "scipy", - "pandas~=2.0.0" - ], - "upgrade" : true - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "highmem", - "highcpu" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/mapping/htseq_count/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/htseq_count", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -import tempfile -import subprocess -from pathlib import Path -import tarfile -import gzip -import shutil -import yaml - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'reference': $( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "r'${VIASH_PAR_REFERENCE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_delimiter': $( if [ ! -z ${VIASH_PAR_OUTPUT_DELIMITER+x} ]; then echo "r'${VIASH_PAR_OUTPUT_DELIMITER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_sam': $( if [ ! -z ${VIASH_PAR_OUTPUT_SAM+x} ]; then echo "r'${VIASH_PAR_OUTPUT_SAM//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'output_sam_format': $( if [ ! -z ${VIASH_PAR_OUTPUT_SAM_FORMAT+x} ]; then echo "r'${VIASH_PAR_OUTPUT_SAM_FORMAT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'order': $( if [ ! -z ${VIASH_PAR_ORDER+x} ]; then echo "r'${VIASH_PAR_ORDER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'stranded': $( if [ ! -z ${VIASH_PAR_STRANDED+x} ]; then echo "r'${VIASH_PAR_STRANDED//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'minimum_alignment_quality': $( if [ ! -z ${VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY+x} ]; then echo "int(r'${VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'type': $( if [ ! -z ${VIASH_PAR_TYPE+x} ]; then echo "r'${VIASH_PAR_TYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'id_attribute': $( if [ ! -z ${VIASH_PAR_ID_ATTRIBUTE+x} ]; then echo "r'${VIASH_PAR_ID_ATTRIBUTE//\\'/\\'\\"\\'\\"r\\'}'.split(':')"; else echo None; fi ), - 'additional_attributes': $( if [ ! -z ${VIASH_PAR_ADDITIONAL_ATTRIBUTES+x} ]; then echo "r'${VIASH_PAR_ADDITIONAL_ATTRIBUTES//\\'/\\'\\"\\'\\"r\\'}'.split(':')"; else echo None; fi ), - 'add_chromosome_info': $( if [ ! -z ${VIASH_PAR_ADD_CHROMOSOME_INFO+x} ]; then echo "r'${VIASH_PAR_ADD_CHROMOSOME_INFO//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), - 'mode': $( if [ ! -z ${VIASH_PAR_MODE+x} ]; then echo "r'${VIASH_PAR_MODE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'non_unique': $( if [ ! -z ${VIASH_PAR_NON_UNIQUE+x} ]; then echo "r'${VIASH_PAR_NON_UNIQUE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'secondary_alignments': $( if [ ! -z ${VIASH_PAR_SECONDARY_ALIGNMENTS+x} ]; then echo "r'${VIASH_PAR_SECONDARY_ALIGNMENTS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'supplementary_alignments': $( if [ ! -z ${VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS+x} ]; then echo "r'${VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'counts_output_sparse': $( if [ ! -z ${VIASH_PAR_COUNTS_OUTPUT_SPARSE+x} ]; then echo "r'${VIASH_PAR_COUNTS_OUTPUT_SPARSE//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -## VIASH END - -######################## -### Helper functions ### -######################## - -# helper function for cheching whether something is a gzip -def is_gz_file(path: Path) -> bool: - with open(path, 'rb') as file: - return file.read(2) == b'\\\\x1f\\\\x8b' - -# if {par_value} is a Path, extract it to a temp_dir_path and return the resulting path -def extract_if_need_be(par_value: Path, temp_dir_path: Path) -> Path: - if par_value.is_file() and tarfile.is_tarfile(par_value): - # Remove two extensions (if they exist) - extaction_dir_name = Path(par_value.stem).stem - unpacked_path = temp_dir_path / extaction_dir_name - print(f' Tar detected; extracting {par_value} to {unpacked_path}', flush=True) - - with tarfile.open(par_value, 'r') as open_tar: - members = open_tar.getmembers() - root_dirs = [member - for member in members - if member.isdir() and member.name != '.' and '/' not in member.name] - # if there is only one root_dir (and there are files in that directory) - # strip that directory name from the destination folder - if len(root_dirs) == 1: - for mem in members: - mem.path = Path(*Path(mem.path).parts[1:]) - members_to_move = [mem for mem in members if mem.path != Path('.')] - open_tar.extractall(unpacked_path, members=members_to_move) - return unpacked_path - - elif par_value.is_file() and is_gz_file(par_value): - # Remove extension (if it exists) - extaction_file_name = Path(par_value.stem) - unpacked_path = temp_dir_path / extaction_file_name - print(f' Gzip detected; extracting {par_value} to {unpacked_path}', flush=True) - - with gzip.open(par_value, 'rb') as f_in: - with open(unpacked_path, 'wb') as f_out: - shutil.copyfileobj(f_in, f_out) - return unpacked_path - - else: - return par_value - -def generate_args(par, config): - # fetch arguments from config - arguments = [ - arg - for group in config["functionality"]["argument_groups"] - for arg in group["arguments"] - ] - - cmd_args = [] - - for arg in arguments: - arg_val = par.get(arg["name"].removeprefix("--")) - orig_arg = arg.get("info", {}).get("orig_arg") - if arg_val and orig_arg: - if not arg.get("multiple", False): - arg_val = [arg_val] - - if arg["type"] in ["boolean_true", "boolean_false"]: - # if argument is a boolean_true or boolean_false, simply add the flag - arg_val = [orig_arg] - elif orig_arg.startswith("-"): - # if the orig arg flag is not a positional, - # add the flag in front of each element and flatten - arg_val = [str(x) for val in arg_val for x in [orig_arg, val]] - - cmd_args.extend(arg_val) - - return cmd_args - -######################## -### Main code ### -######################## - -# read config arguments -config = yaml.safe_load(Path(meta["config"]).read_text()) - - -with tempfile.TemporaryDirectory(prefix="htseq-", dir=meta["temp_dir"]) as temp_dir: - - # checking for compressed files, ungzip files if need be - temp_dir_path = Path(temp_dir) - reference = Path(par["reference"]) - - print(f'>> Check compression of --reference with value: {reference}', flush=True) - par["reference"] = extract_if_need_be(reference, temp_dir_path) - - print(">> Constructing command", flush=True) - cmd_args = [ "htseq-count" ] + generate_args(par, config) - - # manually process cpus parameter - if 'cpus' in meta and meta['cpus']: - cmd_args.extend(["--nprocesses", str(meta["cpus"])]) - - print(">> Running htseq-count with command:", flush=True) - print("+ " + ' '.join([str(x) for x in cmd_args]), flush=True) - - subprocess.run( - cmd_args, - check=True - ) -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/mapping_htseq_count", - "tag" : "0.12.0" - }, - "label" : [ - "highmem", - "highcpu" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/mapping/htseq_count/nextflow.config b/target/nextflow/mapping/htseq_count/nextflow.config deleted file mode 100644 index 3601e4198af..00000000000 --- a/target/nextflow/mapping/htseq_count/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'htseq_count' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Quantify gene expression for subsequent testing for differential expression.\n\nThis script takes one or more alignment files in SAM/BAM format and a feature file in GFF format and calculates for each feature the number of reads mapping to it. \n\nSee http://htseq.readthedocs.io/en/master/count.html for details.\n' - author = 'Robrecht Cannoodt, Angela Oliveira Pisco' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/mapping/htseq_count/nextflow_params.yaml b/target/nextflow/mapping/htseq_count/nextflow_params.yaml deleted file mode 100644 index 55b2152e8ff..00000000000 --- a/target/nextflow/mapping/htseq_count/nextflow_params.yaml +++ /dev/null @@ -1,27 +0,0 @@ -# Arguments -order: "name" -stranded: "yes" -minimum_alignment_quality: 10 -# type: "exon" -# id_attribute: ["gene_id"] -# additional_attributes: ["gene_name"] -add_chromosome_info: false -mode: "union" -non_unique: "none" -# secondary_alignments: "foo" -# supplementary_alignments: "foo" -counts_output_sparse: false - -# Input -input: # please fill in - example: ["mysample1.BAM", "mysample2.BAM"] -reference: # please fill in - example: "reference.gtf" - -# Output -# output: "$id.$key.output.tsv" -# output_delimiter: " " -# output_sam: ["$id.$key.output_sam_*.BAM"] -# output_sam_format: "foo" - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/mapping/htseq_count/nextflow_schema.json b/target/nextflow/mapping/htseq_count/nextflow_schema.json deleted file mode 100644 index 0e8a3b3d6d4..00000000000 --- a/target/nextflow/mapping/htseq_count/nextflow_schema.json +++ /dev/null @@ -1,198 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "htseq_count", - "description": "Quantify gene expression for subsequent testing for differential expression.\n\nThis script takes one or more alignment files in SAM/BAM format and a feature file in GFF format and calculates for each feature the number of reads mapping to it. \n\nSee http://htseq.readthedocs.io/en/master/count.html for details.\n", - "type": "object", - "definitions": { - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "order": { - "type": "string", - "description": "Type: `string`, default: `name`, choices: ``pos`, `name``. Sorting order of \u003calignment_file\u003e", - "help_text": "Type: `string`, default: `name`, choices: ``pos`, `name``. Sorting order of \u003calignment_file\u003e. Paired-end sequencing data must be sorted either by position or\nby read name, and the sorting order must be specified. Ignored for single-end data.\n", - "enum": ["pos", "name"] - , - "default": "name" - }, - - "stranded": { - "type": "string", - "description": "Type: `string`, default: `yes`, choices: ``yes`, `no`, `reverse``. Whether the data is from a strand-specific assay", - "help_text": "Type: `string`, default: `yes`, choices: ``yes`, `no`, `reverse``. Whether the data is from a strand-specific assay. \u0027reverse\u0027 means \u0027yes\u0027 with reversed strand interpretation.", - "enum": ["yes", "no", "reverse"] - , - "default": "yes" - }, - - "minimum_alignment_quality": { - "type": "integer", - "description": "Type: `integer`, default: `10`. Skip all reads with MAPQ alignment quality lower than the given minimum value", - "help_text": "Type: `integer`, default: `10`. Skip all reads with MAPQ alignment quality lower than the given minimum value. \nMAPQ is the 5th column of a SAM/BAM file and its usage depends on the software \nused to map the reads.\n", - "default": "10" - }, - - "type": { - "type": "string", - "description": "Type: `string`, example: `exon`. Feature type (3rd column in GTF file) to be used, all features of other type are ignored (default, suitable for Ensembl GTF files: exon)", - "help_text": "Type: `string`, example: `exon`. Feature type (3rd column in GTF file) to be used, all features of other type are ignored (default, suitable for Ensembl GTF files: exon)" - }, - - "id_attribute": { - "type": "string", - "description": "Type: List of `string`, example: `gene_id`, multiple_sep: `\":\"`. GTF attribute to be used as feature ID (default, suitable for Ensembl GTF files: gene_id)", - "help_text": "Type: List of `string`, example: `gene_id`, multiple_sep: `\":\"`. GTF attribute to be used as feature ID (default, suitable for Ensembl GTF files: gene_id).\nAll feature of the right type (see -t option) within the same GTF attribute will be added\ntogether. The typical way of using this option is to count all exonic reads from each gene\nand add the exons but other uses are possible as well. You can call this option multiple\ntimes: in that case, the combination of all attributes separated by colons (:) will be used\nas a unique identifier, e.g. for exons you might use -i gene_id -i exon_number.\n" - }, - - "additional_attributes": { - "type": "string", - "description": "Type: List of `string`, example: `gene_name`, multiple_sep: `\":\"`. Additional feature attributes (suitable for Ensembl GTF files: gene_name)", - "help_text": "Type: List of `string`, example: `gene_name`, multiple_sep: `\":\"`. Additional feature attributes (suitable for Ensembl GTF files: gene_name). Use multiple times\nfor more than one additional attribute. These attributes are only used as annotations in the\noutput, while the determination of how the counts are added together is done based on option -i.\n" - }, - - "add_chromosome_info": { - "type": "boolean", - "description": "Type: `boolean_true`, default: `false`. Store information about the chromosome of each feature as an additional attribute\n(e", - "help_text": "Type: `boolean_true`, default: `false`. Store information about the chromosome of each feature as an additional attribute\n(e.g. colunm in the TSV output file).\n", - "default": "False" - }, - - "mode": { - "type": "string", - "description": "Type: `string`, default: `union`, choices: ``union`, `intersection-strict`, `intersection-nonempty``. Mode to handle reads overlapping more than one feature", - "help_text": "Type: `string`, default: `union`, choices: ``union`, `intersection-strict`, `intersection-nonempty``. Mode to handle reads overlapping more than one feature.", - "enum": ["union", "intersection-strict", "intersection-nonempty"] - , - "default": "union" - }, - - "non_unique": { - "type": "string", - "description": "Type: `string`, default: `none`, choices: ``none`, `all`, `fraction`, `random``. Whether and how to score reads that are not uniquely aligned or ambiguously assigned to features", - "help_text": "Type: `string`, default: `none`, choices: ``none`, `all`, `fraction`, `random``. Whether and how to score reads that are not uniquely aligned or ambiguously assigned to features.", - "enum": ["none", "all", "fraction", "random"] - , - "default": "none" - }, - - "secondary_alignments": { - "type": "string", - "description": "Type: `string`, choices: ``score`, `ignore``. Whether to score secondary alignments (0x100 flag)", - "help_text": "Type: `string`, choices: ``score`, `ignore``. Whether to score secondary alignments (0x100 flag).", - "enum": ["score", "ignore"] - - }, - - "supplementary_alignments": { - "type": "string", - "description": "Type: `string`, choices: ``score`, `ignore``. Whether to score supplementary alignments (0x800 flag)", - "help_text": "Type: `string`, choices: ``score`, `ignore``. Whether to score supplementary alignments (0x800 flag).", - "enum": ["score", "ignore"] - - }, - - "counts_output_sparse": { - "type": "boolean", - "description": "Type: `boolean_true`, default: `false`. Store the counts as a sparse matrix (mtx, h5ad, loom)", - "help_text": "Type: `boolean_true`, default: `false`. Store the counts as a sparse matrix (mtx, h5ad, loom).", - "default": "False" - } - - } - }, - "input" : { - "title": "Input", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: List of `file`, required, example: `mysample1.BAM;mysample2.BAM`, multiple_sep: `\";\"`. Path to the SAM/BAM files containing the mapped reads", - "help_text": "Type: List of `file`, required, example: `mysample1.BAM;mysample2.BAM`, multiple_sep: `\";\"`. Path to the SAM/BAM files containing the mapped reads." - }, - - "reference": { - "type": "string", - "description": "Type: `file`, required, example: `reference.gtf`. Path to the GTF file containing the features", - "help_text": "Type: `file`, required, example: `reference.gtf`. Path to the GTF file containing the features." - } - - } - }, - "output" : { - "title": "Output", - "type": "object", - "description": "No description", - "properties": { - - "output": { - "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.tsv`, example: `htseq-count.tsv`. Filename to output the counts to", - "help_text": "Type: `file`, required, default: `$id.$key.output.tsv`, example: `htseq-count.tsv`. Filename to output the counts to.", - "default": "$id.$key.output.tsv" - }, - - "output_delimiter": { - "type": "string", - "description": "Type: `string`, example: `\t`. Column delimiter in output", - "help_text": "Type: `string`, example: `\t`. Column delimiter in output." - }, - - "output_sam": { - "type": "string", - "description": "Type: List of `file`, default: `$id.$key.output_sam_*.BAM`, example: `mysample1_out.BAM;mysample2_out.BAM`, multiple_sep: `\";\"`. Write out all SAM alignment records into SAM/BAM files (one per input file needed), \nannotating each line with its feature assignment (as an optional field with tag \u0027XF\u0027)", - "help_text": "Type: List of `file`, default: `$id.$key.output_sam_*.BAM`, example: `mysample1_out.BAM;mysample2_out.BAM`, multiple_sep: `\";\"`. Write out all SAM alignment records into SAM/BAM files (one per input file needed), \nannotating each line with its feature assignment (as an optional field with tag \u0027XF\u0027). \nSee the -p option to use BAM instead of SAM.\n", - "default": "$id.$key.output_sam_*.BAM" - }, - - "output_sam_format": { - "type": "string", - "description": "Type: `string`, choices: ``sam`, `bam``. Format to use with the --output_sam argument", - "help_text": "Type: `string`, choices: ``sam`, `bam``. Format to use with the --output_sam argument.", - "enum": ["sam", "bam"] - - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/input" - }, - { - "$ref": "#/definitions/output" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/mapping/htseq_count_to_h5mu/.config.vsh.yaml b/target/nextflow/mapping/htseq_count_to_h5mu/.config.vsh.yaml deleted file mode 100644 index 98e9f4afa34..00000000000 --- a/target/nextflow/mapping/htseq_count_to_h5mu/.config.vsh.yaml +++ /dev/null @@ -1,209 +0,0 @@ -functionality: - name: "htseq_count_to_h5mu" - namespace: "mapping" - version: "0.12.3" - authors: - - name: "Robrecht Cannoodt" - roles: - - "author" - - "maintainer" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - - name: "Angela Oliveira Pisco" - roles: - - "author" - info: - role: "Contributor" - links: - github: "aopisco" - orcid: "0000-0003-0142-2355" - linkedin: "aopisco" - organizations: - - name: "Insitro" - href: "https://insitro.com" - role: "Director of Computational Biology" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - argument_groups: - - name: "Input" - arguments: - - type: "string" - name: "--input_id" - description: "The obs index for the counts" - info: null - example: - - "foo" - required: true - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "file" - name: "--input_counts" - description: "The counts as a TSV file as output by HTSeq." - info: null - example: - - "counts.tsv" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "file" - name: "--reference" - description: "The GTF file." - info: null - example: - - "gencode_v41_star" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Outputs" - arguments: - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output h5mu file." - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - description: "Convert the htseq table to a h5mu.\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/cellranger_tiny_fastq" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "gtfparse" - - "polars[pyarrow] < 0.16.14" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highmem" - - "midcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/mapping/htseq_count_to_h5mu/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/htseq_count_to_h5mu" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/htseq_count_to_h5mu/htseq_count_to_h5mu" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/mapping/htseq_count_to_h5mu/main.nf b/target/nextflow/mapping/htseq_count_to_h5mu/main.nf deleted file mode 100644 index 7e64b6f4658..00000000000 --- a/target/nextflow/mapping/htseq_count_to_h5mu/main.nf +++ /dev/null @@ -1,2710 +0,0 @@ -// htseq_count_to_h5mu 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Robrecht Cannoodt (author, maintainer) -// * Angela Oliveira Pisco (author) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "htseq_count_to_h5mu", - "namespace" : "mapping", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Robrecht Cannoodt", - "roles" : [ - "author", - "maintainer" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "robrecht@data-intuitive.com", - "github" : "rcannood", - "orcid" : "0000-0003-3641-729X", - "linkedin" : "robrechtcannoodt" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Science Engineer" - }, - { - "name" : "Open Problems", - "href" : "https://openproblems.bio", - "role" : "Core Member" - } - ] - } - }, - { - "name" : "Angela Oliveira Pisco", - "roles" : [ - "author" - ], - "info" : { - "role" : "Contributor", - "links" : { - "github" : "aopisco", - "orcid" : "0000-0003-0142-2355", - "linkedin" : "aopisco" - }, - "organizations" : [ - { - "name" : "Insitro", - "href" : "https://insitro.com", - "role" : "Director of Computational Biology" - }, - { - "name" : "Open Problems", - "href" : "https://openproblems.bio", - "role" : "Core Member" - } - ] - } - } - ], - "argument_groups" : [ - { - "name" : "Input", - "arguments" : [ - { - "type" : "string", - "name" : "--input_id", - "description" : "The obs index for the counts", - "example" : [ - "foo" - ], - "required" : true, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--input_counts", - "description" : "The counts as a TSV file as output by HTSeq.", - "example" : [ - "counts.tsv" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--reference", - "description" : "The GTF file.", - "example" : [ - "gencode_v41_star" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Outputs", - "arguments" : [ - { - "type" : "file", - "name" : "--output", - "alternatives" : [ - "-o" - ], - "description" : "Output h5mu file.", - "example" : [ - "output.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_compression", - "description" : "The compression format to be used on the output h5mu object.", - "example" : [ - "gzip" - ], - "required" : false, - "choices" : [ - "gzip", - "lzf" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/htseq_count_to_h5mu/" - } - ], - "description" : "Convert the htseq table to a h5mu.\n", - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/htseq_count_to_h5mu/" - }, - { - "type" : "file", - "path" : "resources_test/cellranger_tiny_fastq", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "python:3.10-slim", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "procps" - ], - "interactive" : false - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "mudata~=0.2.3", - "anndata~=0.9.1", - "gtfparse", - "polars[pyarrow] < 0.16.14" - ], - "upgrade" : true - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "highmem", - "midcpu" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/mapping/htseq_count_to_h5mu/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/htseq_count_to_h5mu", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -import tempfile -from pathlib import Path -import tarfile -import gzip -import shutil -import pandas as pd -import mudata as md -import anndata as ad -import polars as pl -import numpy as np -import gtfparse - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input_id': $( if [ ! -z ${VIASH_PAR_INPUT_ID+x} ]; then echo "r'${VIASH_PAR_INPUT_ID//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'input_counts': $( if [ ! -z ${VIASH_PAR_INPUT_COUNTS+x} ]; then echo "r'${VIASH_PAR_INPUT_COUNTS//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'reference': $( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "r'${VIASH_PAR_REFERENCE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -## VIASH END - -######################## -### Helper functions ### -######################## - -# helper function for cheching whether something is a gzip -def is_gz_file(path: Path) -> bool: - with open(path, 'rb') as file: - return file.read(2) == b'\\\\x1f\\\\x8b' - -# if {par_value} is a Path, extract it to a temp_dir_path and return the resulting path -def extract_if_need_be(par_value: Path, temp_dir_path: Path) -> Path: - if par_value.is_file() and tarfile.is_tarfile(par_value): - # Remove two extensions (if they exist) - extaction_dir_name = Path(par_value.stem).stem - unpacked_path = temp_dir_path / extaction_dir_name - print(f' Tar detected; extracting {par_value} to {unpacked_path}', flush=True) - - with tarfile.open(par_value, 'r') as open_tar: - members = open_tar.getmembers() - root_dirs = [member - for member in members - if member.isdir() and member.name != '.' and '/' not in member.name] - # if there is only one root_dir (and there are files in that directory) - # strip that directory name from the destination folder - if len(root_dirs) == 1: - for mem in members: - mem.path = Path(*Path(mem.path).parts[1:]) - members_to_move = [mem for mem in members if mem.path != Path('.')] - open_tar.extractall(unpacked_path, members=members_to_move) - return unpacked_path - - elif par_value.is_file() and is_gz_file(par_value): - # Remove extension (if it exists) - extaction_file_name = Path(par_value.stem) - unpacked_path = temp_dir_path / extaction_file_name - print(f' Gzip detected; extracting {par_value} to {unpacked_path}', flush=True) - - with gzip.open(par_value, 'rb') as f_in: - with open(unpacked_path, 'wb') as f_out: - shutil.copyfileobj(f_in, f_out) - return unpacked_path - - else: - return par_value - - -print("> combine counts data", flush=True) -counts_data = [] - -for input_id, input_counts in zip(par["input_id"], par["input_counts"]): - data = pd.read_table(input_counts, index_col=0, names=["gene_ids", input_id], dtype={'gene_ids': 'U', input_id: 'i'}).transpose() - counts_data.append(data) - -# combine all counts -counts_and_qc = pd.concat(counts_data, axis=0) - -print("> split qc", flush=True) -idx = counts_and_qc.columns.str.startswith("_") -qc = counts_and_qc.loc[:,idx] -qc.columns = qc.columns.str.replace("^__", "", regex=True) -counts = counts_and_qc.loc[:,~idx] - -print("> construct var", flush=True) -with tempfile.TemporaryDirectory(prefix="htseq-", dir=meta["temp_dir"]) as temp_dir: - # checking for compressed files, ungzip files if need be - temp_dir_path = Path(temp_dir) - reference = Path(par["reference"]) - - print(f'>> Check compression of --reference with value: {reference}', flush=True) - par["reference"] = extract_if_need_be(reference, temp_dir_path) - - # read_gtf only works on str object, not pathlib.Path - reference = gtfparse.read_gtf(str(par["reference"])) - - -# This is a polars dataframe, not pandas -reference_genes = reference.filter((pl.col("feature") == "gene") & - (pl.col("gene_id").is_in(list(counts.columns))))\\\\ - .sort("gene_id") - -var = pd.DataFrame( - data={ - "gene_ids": pd.Index(reference_genes.get_column("gene_id")), - "feature_types": "Gene Expression", - "gene_symbol": reference_genes.get_column("gene_name").to_pandas(), - } -).set_index("gene_ids") - -print("> construct anndata", flush=True) -adata = ad.AnnData( - X=counts, - obsm={"qc_htseq": qc}, - var=var, - dtype=np.int32 -) - -print("> convert to mudata", flush=True) -mdata = md.MuData(adata) - -print("> write to file", flush=True) -mdata.write_h5mu(par["output"], compression=par["output_compression"]) -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/mapping_htseq_count_to_h5mu", - "tag" : "0.12.0" - }, - "label" : [ - "highmem", - "midcpu" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/mapping/htseq_count_to_h5mu/nextflow.config b/target/nextflow/mapping/htseq_count_to_h5mu/nextflow.config deleted file mode 100644 index ab14c87d688..00000000000 --- a/target/nextflow/mapping/htseq_count_to_h5mu/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'htseq_count_to_h5mu' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Convert the htseq table to a h5mu.\n' - author = 'Robrecht Cannoodt, Angela Oliveira Pisco' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/mapping/htseq_count_to_h5mu/nextflow_params.yaml b/target/nextflow/mapping/htseq_count_to_h5mu/nextflow_params.yaml deleted file mode 100644 index 004665f9c67..00000000000 --- a/target/nextflow/mapping/htseq_count_to_h5mu/nextflow_params.yaml +++ /dev/null @@ -1,12 +0,0 @@ -# Outputs -# output: "$id.$key.output.h5mu" -# output_compression: "gzip" - -# Input -input_id: # please fill in - example: ["foo"] -input_counts: # please fill in - example: ["counts.tsv"] -reference: # please fill in - example: "gencode_v41_star" - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/mapping/htseq_count_to_h5mu/nextflow_schema.json b/target/nextflow/mapping/htseq_count_to_h5mu/nextflow_schema.json deleted file mode 100644 index b69f2d0fb5f..00000000000 --- a/target/nextflow/mapping/htseq_count_to_h5mu/nextflow_schema.json +++ /dev/null @@ -1,89 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "htseq_count_to_h5mu", - "description": "Convert the htseq table to a h5mu.\n", - "type": "object", - "definitions": { - "outputs" : { - "title": "Outputs", - "type": "object", - "description": "No description", - "properties": { - - "output": { - "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file", - "help_text": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file.", - "default": "$id.$key.output.h5mu" - }, - - "output_compression": { - "type": "string", - "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", - "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", - "enum": ["gzip", "lzf"] - - } - - } - }, - "input" : { - "title": "Input", - "type": "object", - "description": "No description", - "properties": { - - "input_id": { - "type": "string", - "description": "Type: List of `string`, required, example: `foo`, multiple_sep: `\";\"`. The obs index for the counts", - "help_text": "Type: List of `string`, required, example: `foo`, multiple_sep: `\";\"`. The obs index for the counts" - }, - - "input_counts": { - "type": "string", - "description": "Type: List of `file`, required, example: `counts.tsv`, multiple_sep: `\";\"`. The counts as a TSV file as output by HTSeq", - "help_text": "Type: List of `file`, required, example: `counts.tsv`, multiple_sep: `\";\"`. The counts as a TSV file as output by HTSeq." - }, - - "reference": { - "type": "string", - "description": "Type: `file`, required, example: `gencode_v41_star`. The GTF file", - "help_text": "Type: `file`, required, example: `gencode_v41_star`. The GTF file." - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/outputs" - }, - { - "$ref": "#/definitions/input" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/mapping/multi_star/.config.vsh.yaml b/target/nextflow/mapping/multi_star/.config.vsh.yaml deleted file mode 100644 index e891bb4bbb9..00000000000 --- a/target/nextflow/mapping/multi_star/.config.vsh.yaml +++ /dev/null @@ -1,3080 +0,0 @@ -functionality: - name: "multi_star" - namespace: "mapping" - version: "0.12.3" - authors: - - name: "Angela Oliveira Pisco" - roles: - - "author" - info: - role: "Contributor" - links: - github: "aopisco" - orcid: "0000-0003-0142-2355" - linkedin: "aopisco" - organizations: - - name: "Insitro" - href: "https://insitro.com" - role: "Director of Computational Biology" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - - name: "Robrecht Cannoodt" - roles: - - "author" - - "maintainer" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - argument_groups: - - name: "Input/Output" - arguments: - - type: "string" - name: "--input_id" - description: "The ID of the sample being processed. This vector should have\ - \ the same length as the `--input_r1` argument." - info: null - example: - - "mysample" - - "mysample" - required: true - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "file" - name: "--input_r1" - description: "Paths to the sequences to be mapped. If using Illumina paired-end\ - \ reads, only the R1 files should be passed." - info: null - example: - - "mysample_S1_L001_R1_001.fastq.gz" - - "mysample_S1_L002_R1_001.fastq.gz" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "file" - name: "--input_r2" - description: "Paths to the sequences to be mapped. If using Illumina paired-end\ - \ reads, only the R2 files should be passed." - info: null - example: - - "mysample_S1_L001_R2_001.fastq.gz" - - "mysample_S1_L002_R2_001.fastq.gz" - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "file" - name: "--reference_index" - alternatives: - - "--genomeDir" - description: "Path to the reference built by star_build_reference. Corresponds\ - \ to the --genomeDir argument in the STAR command." - info: null - example: - - "/path/to/reference" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--reference_gtf" - description: "Path to the gtf reference file." - info: null - example: - - "genes.gtf" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "--outFileNamePrefix" - description: "Path to output directory. Corresponds to the --outFileNamePrefix\ - \ argument in the STAR command." - info: null - example: - - "/path/to/foo" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Processing arguments" - arguments: - - type: "boolean" - name: "--run_htseq_count" - description: "Whether or not to also run htseq-count after STAR." - info: null - default: - - true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean" - name: "--run_multiqc" - description: "Whether or not to also run MultiQC at the end." - info: null - default: - - true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--min_success_rate" - description: "Fail when the success rate is below this threshold." - info: null - default: - - 0.5 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Run Parameters" - arguments: - - type: "integer" - name: "--runRNGseed" - description: "random number generator seed." - info: - step: "star" - orig_arg: "--runRNGseed" - example: - - 777 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Genome Parameters" - arguments: - - type: "file" - name: "--genomeFastaFiles" - description: "path(s) to the fasta files with the genome sequences, separated\ - \ by spaces. These files should be plain text FASTA files, they *cannot* be\ - \ zipped.\n\nRequired for the genome generation (--runMode genomeGenerate).\ - \ Can also be used in the mapping (--runMode alignReads) to add extra (new)\ - \ sequences to the genome (e.g. spike-ins)." - info: - step: "star" - orig_arg: "--genomeFastaFiles" - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - name: "Splice Junctions Database" - arguments: - - type: "string" - name: "--sjdbFileChrStartEnd" - description: "path to the files with genomic coordinates (chr start \ - \ end strand) for the splice junction introns. Multiple files can be\ - \ supplied and will be concatenated." - info: - step: "star" - orig_arg: "--sjdbFileChrStartEnd" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "file" - name: "--sjdbGTFfile" - description: "path to the GTF file with annotations" - info: - step: "star" - orig_arg: "--sjdbGTFfile" - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--sjdbGTFchrPrefix" - description: "prefix for chromosome names in a GTF file (e.g. 'chr' for using\ - \ ENSMEBL annotations with UCSC genomes)" - info: - step: "star" - orig_arg: "--sjdbGTFchrPrefix" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--sjdbGTFfeatureExon" - description: "feature type in GTF file to be used as exons for building transcripts" - info: - step: "star" - orig_arg: "--sjdbGTFfeatureExon" - example: - - "exon" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--sjdbGTFtagExonParentTranscript" - description: "GTF attribute name for parent transcript ID (default \"transcript_id\"\ - \ works for GTF files)" - info: - step: "star" - orig_arg: "--sjdbGTFtagExonParentTranscript" - example: - - "transcript_id" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--sjdbGTFtagExonParentGene" - description: "GTF attribute name for parent gene ID (default \"gene_id\" works\ - \ for GTF files)" - info: - step: "star" - orig_arg: "--sjdbGTFtagExonParentGene" - example: - - "gene_id" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--sjdbGTFtagExonParentGeneName" - description: "GTF attribute name for parent gene name" - info: - step: "star" - orig_arg: "--sjdbGTFtagExonParentGeneName" - example: - - "gene_name" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--sjdbGTFtagExonParentGeneType" - description: "GTF attribute name for parent gene type" - info: - step: "star" - orig_arg: "--sjdbGTFtagExonParentGeneType" - example: - - "gene_type" - - "gene_biotype" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--sjdbOverhang" - description: "length of the donor/acceptor sequence on each side of the junctions,\ - \ ideally = (mate_length - 1)" - info: - step: "star" - orig_arg: "--sjdbOverhang" - example: - - 100 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--sjdbScore" - description: "extra alignment score for alignments that cross database junctions" - info: - step: "star" - orig_arg: "--sjdbScore" - example: - - 2 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--sjdbInsertSave" - description: "which files to save when sjdb junctions are inserted on the fly\ - \ at the mapping step\n\n- Basic ... only small junction / transcript files\n\ - - All ... all files including big Genome, SA and SAindex - this will create\ - \ a complete genome directory" - info: - step: "star" - orig_arg: "--sjdbInsertSave" - example: - - "Basic" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Variation parameters" - arguments: - - type: "string" - name: "--varVCFfile" - description: "path to the VCF file that contains variation data. The 10th column\ - \ should contain the genotype information, e.g. 0/1" - info: - step: "star" - orig_arg: "--varVCFfile" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Read Parameters" - arguments: - - type: "string" - name: "--readFilesType" - description: "format of input read files\n\n- Fastx ... FASTA or FASTQ\n\ - - SAM SE ... SAM or BAM single-end reads; for BAM use --readFilesCommand\ - \ samtools view\n- SAM PE ... SAM or BAM paired-end reads; for BAM use\ - \ --readFilesCommand samtools view" - info: - step: "star" - orig_arg: "--readFilesType" - example: - - "Fastx" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--readFilesSAMattrKeep" - description: "for --readFilesType SAM SE/PE, which SAM tags to keep in the output\ - \ BAM, e.g.: --readFilesSAMtagsKeep RG PL\n\n- All ... keep all tags\n\ - - None ... do not keep any tags" - info: - step: "star" - orig_arg: "--readFilesSAMattrKeep" - example: - - "All" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "file" - name: "--readFilesManifest" - description: "path to the \"manifest\" file with the names of read files. The\ - \ manifest file should contain 3 tab-separated columns:\n\npaired-end reads:\ - \ read1_file_name $tab$ read2_file_name $tab$ read_group_line.\nsingle-end\ - \ reads: read1_file_name $tab$ - $tab$ read_group_line.\nSpaces,\ - \ but not tabs are allowed in file names.\nIf read_group_line does not start\ - \ with ID:, it can only contain one ID field, and ID: will be added to it.\n\ - If read_group_line starts with ID:, it can contain several fields separated\ - \ by $tab$, and all fields will be be copied verbatim into SAM @RG header\ - \ line." - info: - step: "star" - orig_arg: "--readFilesManifest" - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--readFilesPrefix" - description: "prefix for the read files names, i.e. it will be added in front\ - \ of the strings in --readFilesIn" - info: - step: "star" - orig_arg: "--readFilesPrefix" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--readFilesCommand" - description: "command line to execute for each of the input file. This command\ - \ should generate FASTA or FASTQ text and send it to stdout\n\nFor example:\ - \ zcat - to uncompress .gz files, bzcat - to uncompress .bz2 files, etc." - info: - step: "star" - orig_arg: "--readFilesCommand" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--readMapNumber" - description: "number of reads to map from the beginning of the file\n\n-1: map\ - \ all reads" - info: - step: "star" - orig_arg: "--readMapNumber" - example: - - -1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--readMatesLengthsIn" - description: "Equal/NotEqual - lengths of names,sequences,qualities for both\ - \ mates are the same / not the same. NotEqual is safe in all situations." - info: - step: "star" - orig_arg: "--readMatesLengthsIn" - example: - - "NotEqual" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--readNameSeparator" - description: "character(s) separating the part of the read names that will be\ - \ trimmed in output (read name after space is always trimmed)" - info: - step: "star" - orig_arg: "--readNameSeparator" - example: - - "/" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--readQualityScoreBase" - description: "number to be subtracted from the ASCII code to get Phred quality\ - \ score" - info: - step: "star" - orig_arg: "--readQualityScoreBase" - example: - - 33 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Read Clipping" - arguments: - - type: "string" - name: "--clipAdapterType" - description: "adapter clipping type\n\n- Hamming ... adapter clipping based\ - \ on Hamming distance, with the number of mismatches controlled by --clip5pAdapterMMp\n\ - - CellRanger4 ... 5p and 3p adapter clipping similar to CellRanger4. Utilizes\ - \ Opal package by Martin Sosic: https://github.com/Martinsos/opal\n- None\ - \ ... no adapter clipping, all other clip* parameters are disregarded" - info: - step: "star" - orig_arg: "--clipAdapterType" - example: - - "Hamming" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--clip3pNbases" - description: "number(s) of bases to clip from 3p of each mate. If one value\ - \ is given, it will be assumed the same for both mates." - info: - step: "star" - orig_arg: "--clip3pNbases" - example: - - 0 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--clip3pAdapterSeq" - description: "adapter sequences to clip from 3p of each mate. If one value\ - \ is given, it will be assumed the same for both mates.\n\n- polyA ... polyA\ - \ sequence with the length equal to read length" - info: - step: "star" - orig_arg: "--clip3pAdapterSeq" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "double" - name: "--clip3pAdapterMMp" - description: "max proportion of mismatches for 3p adapter clipping for each\ - \ mate. If one value is given, it will be assumed the same for both mates." - info: - step: "star" - orig_arg: "--clip3pAdapterMMp" - example: - - 0.1 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--clip3pAfterAdapterNbases" - description: "number of bases to clip from 3p of each mate after the adapter\ - \ clipping. If one value is given, it will be assumed the same for both mates." - info: - step: "star" - orig_arg: "--clip3pAfterAdapterNbases" - example: - - 0 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--clip5pNbases" - description: "number(s) of bases to clip from 5p of each mate. If one value\ - \ is given, it will be assumed the same for both mates." - info: - step: "star" - orig_arg: "--clip5pNbases" - example: - - 0 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - name: "Limits" - arguments: - - type: "long" - name: "--limitGenomeGenerateRAM" - description: "maximum available RAM (bytes) for genome generation" - info: - step: "star" - orig_arg: "--limitGenomeGenerateRAM" - example: - - 31000000000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "long" - name: "--limitIObufferSize" - description: "max available buffers size (bytes) for input/output, per thread" - info: - step: "star" - orig_arg: "--limitIObufferSize" - example: - - 30000000 - - 50000000 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "long" - name: "--limitOutSAMoneReadBytes" - description: "max size of the SAM record (bytes) for one read. Recommended value:\ - \ >(2*(LengthMate1+LengthMate2+100)*outFilterMultimapNmax" - info: - step: "star" - orig_arg: "--limitOutSAMoneReadBytes" - example: - - 100000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--limitOutSJoneRead" - description: "max number of junctions for one read (including all multi-mappers)" - info: - step: "star" - orig_arg: "--limitOutSJoneRead" - example: - - 1000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--limitOutSJcollapsed" - description: "max number of collapsed junctions" - info: - step: "star" - orig_arg: "--limitOutSJcollapsed" - example: - - 1000000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "long" - name: "--limitBAMsortRAM" - description: "maximum available RAM (bytes) for sorting BAM. If =0, it will\ - \ be set to the genome index size. 0 value can only be used with --genomeLoad\ - \ NoSharedMemory option." - info: - step: "star" - orig_arg: "--limitBAMsortRAM" - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--limitSjdbInsertNsj" - description: "maximum number of junctions to be inserted to the genome on the\ - \ fly at the mapping stage, including those from annotations and those detected\ - \ in the 1st step of the 2-pass run" - info: - step: "star" - orig_arg: "--limitSjdbInsertNsj" - example: - - 1000000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--limitNreadsSoft" - description: "soft limit on the number of reads" - info: - step: "star" - orig_arg: "--limitNreadsSoft" - example: - - -1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Output: general" - arguments: - - type: "string" - name: "--outTmpKeep" - description: "whether to keep the temporary files after STAR runs is finished\n\ - \n- None ... remove all temporary files\n- All ... keep all files" - info: - step: "star" - orig_arg: "--outTmpKeep" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outStd" - description: "which output will be directed to stdout (standard out)\n\n- Log\ - \ ... log messages\n- SAM ... alignments\ - \ in SAM format (which normally are output to Aligned.out.sam file), normal\ - \ standard output will go into Log.std.out\n- BAM_Unsorted ... alignments\ - \ in BAM format, unsorted. Requires --outSAMtype BAM Unsorted\n- BAM_SortedByCoordinate\ - \ ... alignments in BAM format, sorted by coordinate. Requires --outSAMtype\ - \ BAM SortedByCoordinate\n- BAM_Quant ... alignments to transcriptome\ - \ in BAM format, unsorted. Requires --quantMode TranscriptomeSAM" - info: - step: "star" - orig_arg: "--outStd" - example: - - "Log" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outReadsUnmapped" - description: "output of unmapped and partially mapped (i.e. mapped only one\ - \ mate of a paired end read) reads in separate file(s).\n\n- None ... no\ - \ output\n- Fastx ... output in separate fasta/fastq files, Unmapped.out.mate1/2" - info: - step: "star" - orig_arg: "--outReadsUnmapped" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outQSconversionAdd" - description: "add this number to the quality score (e.g. to convert from Illumina\ - \ to Sanger, use -31)" - info: - step: "star" - orig_arg: "--outQSconversionAdd" - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outMultimapperOrder" - description: "order of multimapping alignments in the output files\n\n- Old_2.4\ - \ ... quasi-random order used before 2.5.0\n- Random \ - \ ... random order of alignments for each multi-mapper. Read mates (pairs)\ - \ are always adjacent, all alignment for each read stay together. This option\ - \ will become default in the future releases." - info: - step: "star" - orig_arg: "--outMultimapperOrder" - example: - - "Old_2.4" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Output: SAM and BAM" - arguments: - - type: "string" - name: "--outSAMmode" - description: "mode of SAM output\n\n- None ... no SAM output\n- Full ... full\ - \ SAM output\n- NoQS ... full SAM but without quality scores" - info: - step: "star" - orig_arg: "--outSAMmode" - example: - - "Full" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outSAMstrandField" - description: "Cufflinks-like strand field flag\n\n- None ... not used\n\ - - intronMotif ... strand derived from the intron motif. This option changes\ - \ the output alignments: reads with inconsistent and/or non-canonical introns\ - \ are filtered out." - info: - step: "star" - orig_arg: "--outSAMstrandField" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outSAMattributes" - description: "a string of desired SAM attributes, in the order desired for the\ - \ output SAM. Tags can be listed in any combination/order.\n\n***Presets:\n\ - - None ... no attributes\n- Standard ... NH HI AS nM\n- All \ - \ ... NH HI AS nM NM MD jM jI MC ch\n***Alignment:\n- NH ...\ - \ number of loci the reads maps to: =1 for unique mappers, >1 for multimappers.\ - \ Standard SAM tag.\n- HI ... multiple alignment index, starts with\ - \ --outSAMattrIHstart (=1 by default). Standard SAM tag.\n- AS ...\ - \ local alignment score, +1/-1 for matches/mismateches, score* penalties for\ - \ indels and gaps. For PE reads, total score for two mates. Stadnard SAM tag.\n\ - - nM ... number of mismatches. For PE reads, sum over two mates.\n\ - - NM ... edit distance to the reference (number of mismatched + inserted\ - \ + deleted bases) for each mate. Standard SAM tag.\n- MD ... string\ - \ encoding mismatched and deleted reference bases (see standard SAM specifications).\ - \ Standard SAM tag.\n- jM ... intron motifs for all junctions (i.e.\ - \ N in CIGAR): 0: non-canonical; 1: GT/AG, 2: CT/AC, 3: GC/AG, 4: CT/GC, 5:\ - \ AT/AC, 6: GT/AT. If splice junctions database is used, and a junction is\ - \ annotated, 20 is added to its motif value.\n- jI ... start and\ - \ end of introns for all junctions (1-based).\n- XS ... alignment\ - \ strand according to --outSAMstrandField.\n- MC ... mate's CIGAR\ - \ string. Standard SAM tag.\n- ch ... marks all segment of all chimeric\ - \ alingments for --chimOutType WithinBAM output.\n- cN ... number\ - \ of bases clipped from the read ends: 5' and 3'\n***Variation:\n- vA \ - \ ... variant allele\n- vG ... genomic coordinate of the variant\ - \ overlapped by the read.\n- vW ... 1 - alignment passes WASP filtering;\ - \ 2,3,4,5,6,7 - alignment does not pass WASP filtering. Requires --waspOutputMode\ - \ SAMtag.\n***STARsolo:\n- CR CY UR UY ... sequences and quality scores of\ - \ cell barcodes and UMIs for the solo* demultiplexing.\n- GX GN ...\ - \ gene ID and gene name for unique-gene reads.\n- gx gn ... gene IDs\ - \ and gene names for unique- and multi-gene reads.\n- CB UB ... error-corrected\ - \ cell barcodes and UMIs for solo* demultiplexing. Requires --outSAMtype BAM\ - \ SortedByCoordinate.\n- sM ... assessment of CB and UMI.\n- sS \ - \ ... sequence of the entire barcode (CB,UMI,adapter).\n- sQ \ - \ ... quality of the entire barcode.\n***Unsupported/undocumented:\n-\ - \ ha ... haplotype (1/2) when mapping to the diploid genome. Requires\ - \ genome generated with --genomeTransformType Diploid .\n- rB ...\ - \ alignment block read/genomic coordinates.\n- vR ... read coordinate\ - \ of the variant." - info: - step: "star" - orig_arg: "--outSAMattributes" - example: - - "Standard" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--outSAMattrIHstart" - description: "start value for the IH attribute. 0 may be required by some downstream\ - \ software, such as Cufflinks or StringTie." - info: - step: "star" - orig_arg: "--outSAMattrIHstart" - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outSAMunmapped" - description: "output of unmapped reads in the SAM format\n\n1st word:\n- None\ - \ ... no output\n- Within ... output unmapped reads within the main SAM\ - \ file (i.e. Aligned.out.sam)\n2nd word:\n- KeepPairs ... record unmapped\ - \ mate for each alignment, and, in case of unsorted output, keep it adjacent\ - \ to its mapped mate. Only affects multi-mapping reads." - info: - step: "star" - orig_arg: "--outSAMunmapped" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--outSAMorder" - description: "type of sorting for the SAM output\n\nPaired: one mate after the\ - \ other for all paired alignments\nPairedKeepInputOrder: one mate after the\ - \ other for all paired alignments, the order is kept the same as in the input\ - \ FASTQ files" - info: - step: "star" - orig_arg: "--outSAMorder" - example: - - "Paired" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outSAMprimaryFlag" - description: "which alignments are considered primary - all others will be marked\ - \ with 0x100 bit in the FLAG\n\n- OneBestScore ... only one alignment with\ - \ the best score is primary\n- AllBestScore ... all alignments with the best\ - \ score are primary" - info: - step: "star" - orig_arg: "--outSAMprimaryFlag" - example: - - "OneBestScore" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outSAMreadID" - description: "read ID record type\n\n- Standard ... first word (until space)\ - \ from the FASTx read ID line, removing /1,/2 from the end\n- Number ...\ - \ read number (index) in the FASTx file" - info: - step: "star" - orig_arg: "--outSAMreadID" - example: - - "Standard" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outSAMmapqUnique" - description: "0 to 255: the MAPQ value for unique mappers" - info: - step: "star" - orig_arg: "--outSAMmapqUnique" - example: - - 255 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outSAMflagOR" - description: "0 to 65535: sam FLAG will be bitwise OR'd with this value, i.e.\ - \ FLAG=FLAG | outSAMflagOR. This is applied after all flags have been set\ - \ by STAR, and after outSAMflagAND. Can be used to set specific bits that\ - \ are not set otherwise." - info: - step: "star" - orig_arg: "--outSAMflagOR" - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outSAMflagAND" - description: "0 to 65535: sam FLAG will be bitwise AND'd with this value, i.e.\ - \ FLAG=FLAG & outSAMflagOR. This is applied after all flags have been set\ - \ by STAR, but before outSAMflagOR. Can be used to unset specific bits that\ - \ are not set otherwise." - info: - step: "star" - orig_arg: "--outSAMflagAND" - example: - - 65535 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outSAMattrRGline" - description: "SAM/BAM read group line. The first word contains the read group\ - \ identifier and must start with \"ID:\", e.g. --outSAMattrRGline ID:xxx CN:yy\ - \ \"DS:z z z\".\n\nxxx will be added as RG tag to each output alignment. Any\ - \ spaces in the tag values have to be double quoted.\nComma separated RG lines\ - \ correspons to different (comma separated) input files in --readFilesIn.\ - \ Commas have to be surrounded by spaces, e.g.\n--outSAMattrRGline ID:xxx\ - \ , ID:zzz \"DS:z z\" , ID:yyy DS:yyyy" - info: - step: "star" - orig_arg: "--outSAMattrRGline" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--outSAMheaderHD" - description: "@HD (header) line of the SAM header" - info: - step: "star" - orig_arg: "--outSAMheaderHD" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--outSAMheaderPG" - description: "extra @PG (software) line of the SAM header (in addition to STAR)" - info: - step: "star" - orig_arg: "--outSAMheaderPG" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--outSAMheaderCommentFile" - description: "path to the file with @CO (comment) lines of the SAM header" - info: - step: "star" - orig_arg: "--outSAMheaderCommentFile" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outSAMfilter" - description: "filter the output into main SAM/BAM files\n\n- KeepOnlyAddedReferences\ - \ ... only keep the reads for which all alignments are to the extra reference\ - \ sequences added with --genomeFastaFiles at the mapping stage.\n- KeepAllAddedReferences\ - \ ... keep all alignments to the extra reference sequences added with --genomeFastaFiles\ - \ at the mapping stage." - info: - step: "star" - orig_arg: "--outSAMfilter" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--outSAMmultNmax" - description: "max number of multiple alignments for a read that will be output\ - \ to the SAM/BAM files. Note that if this value is not equal to -1, the top\ - \ scoring alignment will be output first\n\n- -1 ... all alignments (up to\ - \ --outFilterMultimapNmax) will be output" - info: - step: "star" - orig_arg: "--outSAMmultNmax" - example: - - -1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outSAMtlen" - description: "calculation method for the TLEN field in the SAM/BAM files\n\n\ - - 1 ... leftmost base of the (+)strand mate to rightmost base of the (-)mate.\ - \ (+)sign for the (+)strand mate\n- 2 ... leftmost base of any mate to rightmost\ - \ base of any mate. (+)sign for the mate with the leftmost base. This is different\ - \ from 1 for overlapping mates with protruding ends" - info: - step: "star" - orig_arg: "--outSAMtlen" - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outBAMcompression" - description: "-1 to 10 BAM compression level, -1=default compression (6?),\ - \ 0=no compression, 10=maximum compression" - info: - step: "star" - orig_arg: "--outBAMcompression" - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outBAMsortingThreadN" - description: ">=0: number of threads for BAM sorting. 0 will default to min(6,--runThreadN)." - info: - step: "star" - orig_arg: "--outBAMsortingThreadN" - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outBAMsortingBinsN" - description: ">0: number of genome bins for coordinate-sorting" - info: - step: "star" - orig_arg: "--outBAMsortingBinsN" - example: - - 50 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "BAM processing" - arguments: - - type: "string" - name: "--bamRemoveDuplicatesType" - description: "mark duplicates in the BAM file, for now only works with (i) sorted\ - \ BAM fed with inputBAMfile, and (ii) for paired-end alignments only\n\n-\ - \ - ... no duplicate removal/marking\n- UniqueIdentical\ - \ ... mark all multimappers, and duplicate unique mappers. The coordinates,\ - \ FLAG, CIGAR must be identical\n- UniqueIdenticalNotMulti ... mark duplicate\ - \ unique mappers but not multimappers." - info: - step: "star" - orig_arg: "--bamRemoveDuplicatesType" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--bamRemoveDuplicatesMate2basesN" - description: "number of bases from the 5' of mate 2 to use in collapsing (e.g.\ - \ for RAMPAGE)" - info: - step: "star" - orig_arg: "--bamRemoveDuplicatesMate2basesN" - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Output Wiggle" - arguments: - - type: "string" - name: "--outWigType" - description: "type of signal output, e.g. \"bedGraph\" OR \"bedGraph read1_5p\"\ - . Requires sorted BAM: --outSAMtype BAM SortedByCoordinate .\n\n1st word:\n\ - - None ... no signal output\n- bedGraph ... bedGraph format\n- wiggle\ - \ ... wiggle format\n2nd word:\n- read1_5p ... signal from only 5' of\ - \ the 1st read, useful for CAGE/RAMPAGE etc\n- read2 ... signal from\ - \ only 2nd read" - info: - step: "star" - orig_arg: "--outWigType" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--outWigStrand" - description: "strandedness of wiggle/bedGraph output\n\n- Stranded ... separate\ - \ strands, str1 and str2\n- Unstranded ... collapsed strands" - info: - step: "star" - orig_arg: "--outWigStrand" - example: - - "Stranded" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outWigReferencesPrefix" - description: "prefix matching reference names to include in the output wiggle\ - \ file, e.g. \"chr\", default \"-\" - include all references" - info: - step: "star" - orig_arg: "--outWigReferencesPrefix" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outWigNorm" - description: "type of normalization for the signal\n\n- RPM ... reads per\ - \ million of mapped reads\n- None ... no normalization, \"raw\" counts" - info: - step: "star" - orig_arg: "--outWigNorm" - example: - - "RPM" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Output Filtering" - arguments: - - type: "string" - name: "--outFilterType" - description: "type of filtering\n\n- Normal ... standard filtering using only\ - \ current alignment\n- BySJout ... keep only those reads that contain junctions\ - \ that passed filtering into SJ.out.tab" - info: - step: "star" - orig_arg: "--outFilterType" - example: - - "Normal" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outFilterMultimapScoreRange" - description: "the score range below the maximum score for multimapping alignments" - info: - step: "star" - orig_arg: "--outFilterMultimapScoreRange" - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outFilterMultimapNmax" - description: "maximum number of loci the read is allowed to map to. Alignments\ - \ (all of them) will be output only if the read maps to no more loci than\ - \ this value.\n\nOtherwise no alignments will be output, and the read will\ - \ be counted as \"mapped to too many loci\" in the Log.final.out ." - info: - step: "star" - orig_arg: "--outFilterMultimapNmax" - example: - - 10 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outFilterMismatchNmax" - description: "alignment will be output only if it has no more mismatches than\ - \ this value." - info: - step: "star" - orig_arg: "--outFilterMismatchNmax" - example: - - 10 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--outFilterMismatchNoverLmax" - description: "alignment will be output only if its ratio of mismatches to *mapped*\ - \ length is less than or equal to this value." - info: - step: "star" - orig_arg: "--outFilterMismatchNoverLmax" - example: - - 0.3 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--outFilterMismatchNoverReadLmax" - description: "alignment will be output only if its ratio of mismatches to *read*\ - \ length is less than or equal to this value." - info: - step: "star" - orig_arg: "--outFilterMismatchNoverReadLmax" - example: - - 1.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outFilterScoreMin" - description: "alignment will be output only if its score is higher than or equal\ - \ to this value." - info: - step: "star" - orig_arg: "--outFilterScoreMin" - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--outFilterScoreMinOverLread" - description: "same as outFilterScoreMin, but normalized to read length (sum\ - \ of mates' lengths for paired-end reads)" - info: - step: "star" - orig_arg: "--outFilterScoreMinOverLread" - example: - - 0.66 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outFilterMatchNmin" - description: "alignment will be output only if the number of matched bases is\ - \ higher than or equal to this value." - info: - step: "star" - orig_arg: "--outFilterMatchNmin" - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--outFilterMatchNminOverLread" - description: "sam as outFilterMatchNmin, but normalized to the read length (sum\ - \ of mates' lengths for paired-end reads)." - info: - step: "star" - orig_arg: "--outFilterMatchNminOverLread" - example: - - 0.66 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outFilterIntronMotifs" - description: "filter alignment using their motifs\n\n- None \ - \ ... no filtering\n- RemoveNoncanonical ... filter\ - \ out alignments that contain non-canonical junctions\n- RemoveNoncanonicalUnannotated\ - \ ... filter out alignments that contain non-canonical unannotated junctions\ - \ when using annotated splice junctions database. The annotated non-canonical\ - \ junctions will be kept." - info: - step: "star" - orig_arg: "--outFilterIntronMotifs" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outFilterIntronStrands" - description: "filter alignments\n\n- RemoveInconsistentStrands ... remove\ - \ alignments that have junctions with inconsistent strands\n- None \ - \ ... no filtering" - info: - step: "star" - orig_arg: "--outFilterIntronStrands" - example: - - "RemoveInconsistentStrands" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Output splice junctions (SJ.out.tab)" - arguments: - - type: "string" - name: "--outSJtype" - description: "type of splice junction output\n\n- Standard ... standard SJ.out.tab\ - \ output\n- None ... no splice junction output" - info: - step: "star" - orig_arg: "--outSJtype" - example: - - "Standard" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Output Filtering: Splice Junctions" - arguments: - - type: "string" - name: "--outSJfilterReads" - description: "which reads to consider for collapsed splice junctions output\n\ - \n- All ... all reads, unique- and multi-mappers\n- Unique ... uniquely\ - \ mapping reads only" - info: - step: "star" - orig_arg: "--outSJfilterReads" - example: - - "All" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outSJfilterOverhangMin" - description: "minimum overhang length for splice junctions on both sides for:\ - \ (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC\ - \ motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\n\ - does not apply to annotated junctions" - info: - step: "star" - orig_arg: "--outSJfilterOverhangMin" - example: - - 30 - - 12 - - 12 - - 12 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--outSJfilterCountUniqueMin" - description: "minimum uniquely mapping read count per junction for: (1) non-canonical\ - \ motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC\ - \ and GT/AT motif. -1 means no output for that motif\n\nJunctions are output\ - \ if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin conditions\ - \ are satisfied\ndoes not apply to annotated junctions" - info: - step: "star" - orig_arg: "--outSJfilterCountUniqueMin" - example: - - 3 - - 1 - - 1 - - 1 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--outSJfilterCountTotalMin" - description: "minimum total (multi-mapping+unique) read count per junction for:\ - \ (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC\ - \ motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\n\ - Junctions are output if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin\ - \ conditions are satisfied\ndoes not apply to annotated junctions" - info: - step: "star" - orig_arg: "--outSJfilterCountTotalMin" - example: - - 3 - - 1 - - 1 - - 1 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--outSJfilterDistToOtherSJmin" - description: "minimum allowed distance to other junctions' donor/acceptor\n\n\ - does not apply to annotated junctions" - info: - step: "star" - orig_arg: "--outSJfilterDistToOtherSJmin" - example: - - 10 - - 0 - - 5 - - 10 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--outSJfilterIntronMaxVsReadN" - description: "maximum gap allowed for junctions supported by 1,2,3,,,N reads\n\ - \ni.e. by default junctions supported by 1 read can have gaps <=50000b, by\ - \ 2 reads: <=100000b, by 3 reads: <=200000. by >=4 reads any gap <=alignIntronMax\n\ - does not apply to annotated junctions" - info: - step: "star" - orig_arg: "--outSJfilterIntronMaxVsReadN" - example: - - 50000 - - 100000 - - 200000 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - name: "Scoring" - arguments: - - type: "integer" - name: "--scoreGap" - description: "splice junction penalty (independent on intron motif)" - info: - step: "star" - orig_arg: "--scoreGap" - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--scoreGapNoncan" - description: "non-canonical junction penalty (in addition to scoreGap)" - info: - step: "star" - orig_arg: "--scoreGapNoncan" - example: - - -8 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--scoreGapGCAG" - description: "GC/AG and CT/GC junction penalty (in addition to scoreGap)" - info: - step: "star" - orig_arg: "--scoreGapGCAG" - example: - - -4 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--scoreGapATAC" - description: "AT/AC and GT/AT junction penalty (in addition to scoreGap)" - info: - step: "star" - orig_arg: "--scoreGapATAC" - example: - - -8 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--scoreGenomicLengthLog2scale" - description: "extra score logarithmically scaled with genomic length of the\ - \ alignment: scoreGenomicLengthLog2scale*log2(genomicLength)" - info: - step: "star" - orig_arg: "--scoreGenomicLengthLog2scale" - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--scoreDelOpen" - description: "deletion open penalty" - info: - step: "star" - orig_arg: "--scoreDelOpen" - example: - - -2 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--scoreDelBase" - description: "deletion extension penalty per base (in addition to scoreDelOpen)" - info: - step: "star" - orig_arg: "--scoreDelBase" - example: - - -2 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--scoreInsOpen" - description: "insertion open penalty" - info: - step: "star" - orig_arg: "--scoreInsOpen" - example: - - -2 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--scoreInsBase" - description: "insertion extension penalty per base (in addition to scoreInsOpen)" - info: - step: "star" - orig_arg: "--scoreInsBase" - example: - - -2 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--scoreStitchSJshift" - description: "maximum score reduction while searching for SJ boundaries in the\ - \ stitching step" - info: - step: "star" - orig_arg: "--scoreStitchSJshift" - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Alignments and Seeding" - arguments: - - type: "integer" - name: "--seedSearchStartLmax" - description: "defines the search start point through the read - the read is\ - \ split into pieces no longer than this value" - info: - step: "star" - orig_arg: "--seedSearchStartLmax" - example: - - 50 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--seedSearchStartLmaxOverLread" - description: "seedSearchStartLmax normalized to read length (sum of mates' lengths\ - \ for paired-end reads)" - info: - step: "star" - orig_arg: "--seedSearchStartLmaxOverLread" - example: - - 1.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--seedSearchLmax" - description: "defines the maximum length of the seeds, if =0 seed length is\ - \ not limited" - info: - step: "star" - orig_arg: "--seedSearchLmax" - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--seedMultimapNmax" - description: "only pieces that map fewer than this value are utilized in the\ - \ stitching procedure" - info: - step: "star" - orig_arg: "--seedMultimapNmax" - example: - - 10000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--seedPerReadNmax" - description: "max number of seeds per read" - info: - step: "star" - orig_arg: "--seedPerReadNmax" - example: - - 1000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--seedPerWindowNmax" - description: "max number of seeds per window" - info: - step: "star" - orig_arg: "--seedPerWindowNmax" - example: - - 50 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--seedNoneLociPerWindow" - description: "max number of one seed loci per window" - info: - step: "star" - orig_arg: "--seedNoneLociPerWindow" - example: - - 10 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--seedSplitMin" - description: "min length of the seed sequences split by Ns or mate gap" - info: - step: "star" - orig_arg: "--seedSplitMin" - example: - - 12 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--seedMapMin" - description: "min length of seeds to be mapped" - info: - step: "star" - orig_arg: "--seedMapMin" - example: - - 5 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--alignIntronMin" - description: "minimum intron size, genomic gap is considered intron if its length>=alignIntronMin,\ - \ otherwise it is considered Deletion" - info: - step: "star" - orig_arg: "--alignIntronMin" - example: - - 21 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--alignIntronMax" - description: "maximum intron size, if 0, max intron size will be determined\ - \ by (2^winBinNbits)*winAnchorDistNbins" - info: - step: "star" - orig_arg: "--alignIntronMax" - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--alignMatesGapMax" - description: "maximum gap between two mates, if 0, max intron gap will be determined\ - \ by (2^winBinNbits)*winAnchorDistNbins" - info: - step: "star" - orig_arg: "--alignMatesGapMax" - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--alignSJoverhangMin" - description: "minimum overhang (i.e. block size) for spliced alignments" - info: - step: "star" - orig_arg: "--alignSJoverhangMin" - example: - - 5 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--alignSJstitchMismatchNmax" - description: "maximum number of mismatches for stitching of the splice junctions\ - \ (-1: no limit).\n\n(1) non-canonical motifs, (2) GT/AG and CT/AC motif,\ - \ (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif." - info: - step: "star" - orig_arg: "--alignSJstitchMismatchNmax" - example: - - 0 - - -1 - - 0 - - 0 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--alignSJDBoverhangMin" - description: "minimum overhang (i.e. block size) for annotated (sjdb) spliced\ - \ alignments" - info: - step: "star" - orig_arg: "--alignSJDBoverhangMin" - example: - - 3 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--alignSplicedMateMapLmin" - description: "minimum mapped length for a read mate that is spliced" - info: - step: "star" - orig_arg: "--alignSplicedMateMapLmin" - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--alignSplicedMateMapLminOverLmate" - description: "alignSplicedMateMapLmin normalized to mate length" - info: - step: "star" - orig_arg: "--alignSplicedMateMapLminOverLmate" - example: - - 0.66 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--alignWindowsPerReadNmax" - description: "max number of windows per read" - info: - step: "star" - orig_arg: "--alignWindowsPerReadNmax" - example: - - 10000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--alignTranscriptsPerWindowNmax" - description: "max number of transcripts per window" - info: - step: "star" - orig_arg: "--alignTranscriptsPerWindowNmax" - example: - - 100 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--alignTranscriptsPerReadNmax" - description: "max number of different alignments per read to consider" - info: - step: "star" - orig_arg: "--alignTranscriptsPerReadNmax" - example: - - 10000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--alignEndsType" - description: "type of read ends alignment\n\n- Local ... standard\ - \ local alignment with soft-clipping allowed\n- EndToEnd ... force\ - \ end-to-end read alignment, do not soft-clip\n- Extend5pOfRead1 ... fully\ - \ extend only the 5p of the read1, all other ends: local alignment\n- Extend5pOfReads12\ - \ ... fully extend only the 5p of the both read1 and read2, all other ends:\ - \ local alignment" - info: - step: "star" - orig_arg: "--alignEndsType" - example: - - "Local" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--alignEndsProtrude" - description: "allow protrusion of alignment ends, i.e. start (end) of the +strand\ - \ mate downstream of the start (end) of the -strand mate\n\n1st word: int:\ - \ maximum number of protrusion bases allowed\n2nd word: string:\n- \ - \ ConcordantPair ... report alignments with non-zero protrusion\ - \ as concordant pairs\n- DiscordantPair ... report alignments\ - \ with non-zero protrusion as discordant pairs" - info: - step: "star" - orig_arg: "--alignEndsProtrude" - example: - - "0 ConcordantPair" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--alignSoftClipAtReferenceEnds" - description: "allow the soft-clipping of the alignments past the end of the\ - \ chromosomes\n\n- Yes ... allow\n- No ... prohibit, useful for compatibility\ - \ with Cufflinks" - info: - step: "star" - orig_arg: "--alignSoftClipAtReferenceEnds" - example: - - "Yes" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--alignInsertionFlush" - description: "how to flush ambiguous insertion positions\n\n- None ... insertions\ - \ are not flushed\n- Right ... insertions are flushed to the right" - info: - step: "star" - orig_arg: "--alignInsertionFlush" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Paired-End reads" - arguments: - - type: "integer" - name: "--peOverlapNbasesMin" - description: "minimum number of overlapping bases to trigger mates merging and\ - \ realignment. Specify >0 value to switch on the \"merginf of overlapping\ - \ mates\" algorithm." - info: - step: "star" - orig_arg: "--peOverlapNbasesMin" - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--peOverlapMMp" - description: "maximum proportion of mismatched bases in the overlap area" - info: - step: "star" - orig_arg: "--peOverlapMMp" - example: - - 0.01 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Windows, Anchors, Binning" - arguments: - - type: "integer" - name: "--winAnchorMultimapNmax" - description: "max number of loci anchors are allowed to map to" - info: - step: "star" - orig_arg: "--winAnchorMultimapNmax" - example: - - 50 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--winBinNbits" - description: "=log2(winBin), where winBin is the size of the bin for the windows/clustering,\ - \ each window will occupy an integer number of bins." - info: - step: "star" - orig_arg: "--winBinNbits" - example: - - 16 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--winAnchorDistNbins" - description: "max number of bins between two anchors that allows aggregation\ - \ of anchors into one window" - info: - step: "star" - orig_arg: "--winAnchorDistNbins" - example: - - 9 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--winFlankNbins" - description: "log2(winFlank), where win Flank is the size of the left and right\ - \ flanking regions for each window" - info: - step: "star" - orig_arg: "--winFlankNbins" - example: - - 4 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--winReadCoverageRelativeMin" - description: "minimum relative coverage of the read sequence by the seeds in\ - \ a window, for STARlong algorithm only." - info: - step: "star" - orig_arg: "--winReadCoverageRelativeMin" - example: - - 0.5 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--winReadCoverageBasesMin" - description: "minimum number of bases covered by the seeds in a window , for\ - \ STARlong algorithm only." - info: - step: "star" - orig_arg: "--winReadCoverageBasesMin" - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Chimeric Alignments" - arguments: - - type: "string" - name: "--chimOutType" - description: "type of chimeric output\n\n- Junctions ... Chimeric.out.junction\n\ - - SeparateSAMold ... output old SAM into separate Chimeric.out.sam file\n\ - - WithinBAM ... output into main aligned BAM files (Aligned.*.bam)\n\ - - WithinBAM HardClip ... (default) hard-clipping in the CIGAR for supplemental\ - \ chimeric alignments (default if no 2nd word is present)\n- WithinBAM SoftClip\ - \ ... soft-clipping in the CIGAR for supplemental chimeric alignments" - info: - step: "star" - orig_arg: "--chimOutType" - example: - - "Junctions" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--chimSegmentMin" - description: "minimum length of chimeric segment length, if ==0, no chimeric\ - \ output" - info: - step: "star" - orig_arg: "--chimSegmentMin" - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimScoreMin" - description: "minimum total (summed) score of the chimeric segments" - info: - step: "star" - orig_arg: "--chimScoreMin" - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimScoreDropMax" - description: "max drop (difference) of chimeric score (the sum of scores of\ - \ all chimeric segments) from the read length" - info: - step: "star" - orig_arg: "--chimScoreDropMax" - example: - - 20 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimScoreSeparation" - description: "minimum difference (separation) between the best chimeric score\ - \ and the next one" - info: - step: "star" - orig_arg: "--chimScoreSeparation" - example: - - 10 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimScoreJunctionNonGTAG" - description: "penalty for a non-GT/AG chimeric junction" - info: - step: "star" - orig_arg: "--chimScoreJunctionNonGTAG" - example: - - -1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimJunctionOverhangMin" - description: "minimum overhang for a chimeric junction" - info: - step: "star" - orig_arg: "--chimJunctionOverhangMin" - example: - - 20 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimSegmentReadGapMax" - description: "maximum gap in the read sequence between chimeric segments" - info: - step: "star" - orig_arg: "--chimSegmentReadGapMax" - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--chimFilter" - description: "different filters for chimeric alignments\n\n- None ... no filtering\n\ - - banGenomicN ... Ns are not allowed in the genome sequence around the chimeric\ - \ junction" - info: - step: "star" - orig_arg: "--chimFilter" - example: - - "banGenomicN" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--chimMainSegmentMultNmax" - description: "maximum number of multi-alignments for the main chimeric segment.\ - \ =1 will prohibit multimapping main segments." - info: - step: "star" - orig_arg: "--chimMainSegmentMultNmax" - example: - - 10 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimMultimapNmax" - description: "maximum number of chimeric multi-alignments\n\n- 0 ... use the\ - \ old scheme for chimeric detection which only considered unique alignments" - info: - step: "star" - orig_arg: "--chimMultimapNmax" - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimMultimapScoreRange" - description: "the score range for multi-mapping chimeras below the best chimeric\ - \ score. Only works with --chimMultimapNmax > 1" - info: - step: "star" - orig_arg: "--chimMultimapScoreRange" - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimNonchimScoreDropMin" - description: "to trigger chimeric detection, the drop in the best non-chimeric\ - \ alignment score with respect to the read length has to be greater than this\ - \ value" - info: - step: "star" - orig_arg: "--chimNonchimScoreDropMin" - example: - - 20 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimOutJunctionFormat" - description: "formatting type for the Chimeric.out.junction file\n\n- 0 ...\ - \ no comment lines/headers\n- 1 ... comment lines at the end of the file:\ - \ command line and Nreads: total, unique/multi-mapping" - info: - step: "star" - orig_arg: "--chimOutJunctionFormat" - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Quantification of Annotations" - arguments: - - type: "string" - name: "--quantMode" - description: "types of quantification requested\n\n- - ... none\n\ - - TranscriptomeSAM ... output SAM/BAM alignments to transcriptome into a separate\ - \ file\n- GeneCounts ... count reads per gene" - info: - step: "star" - orig_arg: "--quantMode" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--quantTranscriptomeBAMcompression" - description: "-2 to 10 transcriptome BAM compression level\n\n- -2 ... no\ - \ BAM output\n- -1 ... default compression (6?)\n- 0 ... no compression\n\ - - 10 ... maximum compression" - info: - step: "star" - orig_arg: "--quantTranscriptomeBAMcompression" - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--quantTranscriptomeBan" - description: "prohibit various alignment type\n\n- IndelSoftclipSingleend ...\ - \ prohibit indels, soft clipping and single-end alignments - compatible with\ - \ RSEM\n- Singleend ... prohibit single-end alignments" - info: - step: "star" - orig_arg: "--quantTranscriptomeBan" - example: - - "IndelSoftclipSingleend" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "2-pass Mapping" - arguments: - - type: "string" - name: "--twopassMode" - description: "2-pass mapping mode.\n\n- None ... 1-pass mapping\n- Basic\ - \ ... basic 2-pass mapping, with all 1st pass junctions inserted into\ - \ the genome indices on the fly" - info: - step: "star" - orig_arg: "--twopassMode" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--twopass1readsN" - description: "number of reads to process for the 1st step. Use very large number\ - \ (or default -1) to map all reads in the first step." - info: - step: "star" - orig_arg: "--twopass1readsN" - example: - - -1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "WASP parameters" - arguments: - - type: "string" - name: "--waspOutputMode" - description: "WASP allele-specific output type. This is re-implementation of\ - \ the original WASP mappability filtering by Bryce van de Geijn, Graham McVicker,\ - \ Yoav Gilad & Jonathan K Pritchard. Please cite the original WASP paper:\ - \ Nature Methods 12, 1061-1063 (2015), https://www.nature.com/articles/nmeth.3582\ - \ .\n\n- SAMtag ... add WASP tags to the alignments that pass WASP filtering" - info: - step: "star" - orig_arg: "--waspOutputMode" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "STARsolo (single cell RNA-seq) parameters" - arguments: - - type: "string" - name: "--soloType" - description: "type of single-cell RNA-seq\n\n- CB_UMI_Simple ... (a.k.a. Droplet)\ - \ one UMI and one Cell Barcode of fixed length in read2, e.g. Drop-seq and\ - \ 10X Chromium.\n- CB_UMI_Complex ... multiple Cell Barcodes of varying length,\ - \ one UMI of fixed length and one adapter sequence of fixed length are allowed\ - \ in read2 only (e.g. inDrop, ddSeq).\n- CB_samTagOut ... output Cell Barcode\ - \ as CR and/or CB SAm tag. No UMI counting. --readFilesIn cDNA_read1 [cDNA_read2\ - \ if paired-end] CellBarcode_read . Requires --outSAMtype BAM Unsorted [and/or\ - \ SortedByCoordinate]\n- SmartSeq ... Smart-seq: each cell in a separate\ - \ FASTQ (paired- or single-end), barcodes are corresponding read-groups, no\ - \ UMI sequences, alignments deduplicated according to alignment start and\ - \ end (after extending soft-clipped bases)" - info: - step: "star" - orig_arg: "--soloType" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloCBwhitelist" - description: "file(s) with whitelist(s) of cell barcodes. Only --soloType CB_UMI_Complex\ - \ allows more than one whitelist file.\n\n- None ... no whitelist:\ - \ all cell barcodes are allowed" - info: - step: "star" - orig_arg: "--soloCBwhitelist" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--soloCBstart" - description: "cell barcode start base" - info: - step: "star" - orig_arg: "--soloCBstart" - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--soloCBlen" - description: "cell barcode length" - info: - step: "star" - orig_arg: "--soloCBlen" - example: - - 16 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--soloUMIstart" - description: "UMI start base" - info: - step: "star" - orig_arg: "--soloUMIstart" - example: - - 17 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--soloUMIlen" - description: "UMI length" - info: - step: "star" - orig_arg: "--soloUMIlen" - example: - - 10 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--soloBarcodeReadLength" - description: "length of the barcode read\n\n- 1 ... equal to sum of soloCBlen+soloUMIlen\n\ - - 0 ... not defined, do not check" - info: - step: "star" - orig_arg: "--soloBarcodeReadLength" - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--soloBarcodeMate" - description: "identifies which read mate contains the barcode (CB+UMI) sequence\n\ - \n- 0 ... barcode sequence is on separate read, which should always be the\ - \ last file in the --readFilesIn listed\n- 1 ... barcode sequence is a part\ - \ of mate 1\n- 2 ... barcode sequence is a part of mate 2" - info: - step: "star" - orig_arg: "--soloBarcodeMate" - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--soloCBposition" - description: "position of Cell Barcode(s) on the barcode read.\n\nPresently\ - \ only works with --soloType CB_UMI_Complex, and barcodes are assumed to be\ - \ on Read2.\nFormat for each barcode: startAnchor_startPosition_endAnchor_endPosition\n\ - start(end)Anchor defines the Anchor Base for the CB: 0: read start; 1: read\ - \ end; 2: adapter start; 3: adapter end\nstart(end)Position is the 0-based\ - \ position with of the CB start(end) with respect to the Anchor Base\nString\ - \ for different barcodes are separated by space.\nExample: inDrop (Zilionis\ - \ et al, Nat. Protocols, 2017):\n--soloCBposition 0_0_2_-1 3_1_3_8" - info: - step: "star" - orig_arg: "--soloCBposition" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloUMIposition" - description: "position of the UMI on the barcode read, same as soloCBposition\n\ - \nExample: inDrop (Zilionis et al, Nat. Protocols, 2017):\n--soloCBposition\ - \ 3_9_3_14" - info: - step: "star" - orig_arg: "--soloUMIposition" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--soloAdapterSequence" - description: "adapter sequence to anchor barcodes. Only one adapter sequence\ - \ is allowed." - info: - step: "star" - orig_arg: "--soloAdapterSequence" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--soloAdapterMismatchesNmax" - description: "maximum number of mismatches allowed in adapter sequence." - info: - step: "star" - orig_arg: "--soloAdapterMismatchesNmax" - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--soloCBmatchWLtype" - description: "matching the Cell Barcodes to the WhiteList\n\n- Exact \ - \ ... only exact matches allowed\n- 1MM \ - \ ... only one match in whitelist with 1 mismatched base allowed.\ - \ Allowed CBs have to have at least one read with exact match.\n- 1MM_multi\ - \ ... multiple matches in whitelist with 1 mismatched\ - \ base allowed, posterior probability calculation is used choose one of the\ - \ matches.\nAllowed CBs have to have at least one read with exact match. This\ - \ option matches best with CellRanger 2.2.0\n- 1MM_multi_pseudocounts \ - \ ... same as 1MM_Multi, but pseudocounts of 1 are added to all whitelist\ - \ barcodes.\n- 1MM_multi_Nbase_pseudocounts ... same as 1MM_multi_pseudocounts,\ - \ multimatching to WL is allowed for CBs with N-bases. This option matches\ - \ best with CellRanger >= 3.0.0\n- EditDist_2 ... allow\ - \ up to edit distance of 3 fpr each of the barcodes. May include one deletion\ - \ + one insertion. Only works with --soloType CB_UMI_Complex. Matches to multiple\ - \ passlist barcdoes are not allowed. Similar to ParseBio Split-seq pipeline." - info: - step: "star" - orig_arg: "--soloCBmatchWLtype" - example: - - "1MM_multi" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--soloInputSAMattrBarcodeSeq" - description: "when inputting reads from a SAM file (--readsFileType SAM SE/PE),\ - \ these SAM attributes mark the barcode sequence (in proper order).\n\nFor\ - \ instance, for 10X CellRanger or STARsolo BAMs, use --soloInputSAMattrBarcodeSeq\ - \ CR UR .\nThis parameter is required when running STARsolo with input from\ - \ SAM." - info: - step: "star" - orig_arg: "--soloInputSAMattrBarcodeSeq" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloInputSAMattrBarcodeQual" - description: "when inputting reads from a SAM file (--readsFileType SAM SE/PE),\ - \ these SAM attributes mark the barcode qualities (in proper order).\n\nFor\ - \ instance, for 10X CellRanger or STARsolo BAMs, use --soloInputSAMattrBarcodeQual\ - \ CY UY .\nIf this parameter is '-' (default), the quality 'H' will be assigned\ - \ to all bases." - info: - step: "star" - orig_arg: "--soloInputSAMattrBarcodeQual" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloStrand" - description: "strandedness of the solo libraries:\n\n- Unstranded ... no strand\ - \ information\n- Forward ... read strand same as the original RNA molecule\n\ - - Reverse ... read strand opposite to the original RNA molecule" - info: - step: "star" - orig_arg: "--soloStrand" - example: - - "Forward" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--soloFeatures" - description: "genomic features for which the UMI counts per Cell Barcode are\ - \ collected\n\n- Gene ... genes: reads match the gene transcript\n\ - - SJ ... splice junctions: reported in SJ.out.tab\n- GeneFull\ - \ ... full gene (pre-mRNA): count all reads overlapping genes' exons\ - \ and introns\n- GeneFull_ExonOverIntron ... full gene (pre-mRNA): count all\ - \ reads overlapping genes' exons and introns: prioritize 100% overlap with\ - \ exons\n- GeneFull_Ex50pAS ... full gene (pre-RNA): count all reads\ - \ overlapping genes' exons and introns: prioritize >50% overlap with exons.\ - \ Do not count reads with 100% exonic overlap in the antisense direction." - info: - step: "star" - orig_arg: "--soloFeatures" - example: - - "Gene" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloMultiMappers" - description: "counting method for reads mapping to multiple genes\n\n- Unique\ - \ ... count only reads that map to unique genes\n- Uniform ... uniformly\ - \ distribute multi-genic UMIs to all genes\n- Rescue ... distribute UMIs\ - \ proportionally to unique+uniform counts (~ first iteration of EM)\n- PropUnique\ - \ ... distribute UMIs proportionally to unique mappers, if present, and uniformly\ - \ if not.\n- EM ... multi-gene UMIs are distributed using Expectation\ - \ Maximization algorithm" - info: - step: "star" - orig_arg: "--soloMultiMappers" - example: - - "Unique" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloUMIdedup" - description: "type of UMI deduplication (collapsing) algorithm\n\n- 1MM_All\ - \ ... all UMIs with 1 mismatch distance to each other\ - \ are collapsed (i.e. counted once).\n- 1MM_Directional_UMItools ... follows\ - \ the \"directional\" method from the UMI-tools by Smith, Heger and Sudbery\ - \ (Genome Research 2017).\n- 1MM_Directional ... same as 1MM_Directional_UMItools,\ - \ but with more stringent criteria for duplicate UMIs\n- Exact \ - \ ... only exactly matching UMIs are collapsed.\n- NoDedup \ - \ ... no deduplication of UMIs, count all reads.\n- 1MM_CR\ - \ ... CellRanger2-4 algorithm for 1MM UMI collapsing." - info: - step: "star" - orig_arg: "--soloUMIdedup" - example: - - "1MM_All" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloUMIfiltering" - description: "type of UMI filtering (for reads uniquely mapping to genes)\n\n\ - - - ... basic filtering: remove UMIs with N and homopolymers\ - \ (similar to CellRanger 2.2.0).\n- MultiGeneUMI ... basic + remove\ - \ lower-count UMIs that map to more than one gene.\n- MultiGeneUMI_All ...\ - \ basic + remove all UMIs that map to more than one gene.\n- MultiGeneUMI_CR\ - \ ... basic + remove lower-count UMIs that map to more than one gene, matching\ - \ CellRanger > 3.0.0 .\nOnly works with --soloUMIdedup 1MM_CR" - info: - step: "star" - orig_arg: "--soloUMIfiltering" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloOutFileNames" - description: "file names for STARsolo output:\n\nfile_name_prefix gene_names\ - \ barcode_sequences cell_feature_count_matrix" - info: - step: "star" - orig_arg: "--soloOutFileNames" - example: - - "Solo.out/" - - "features.tsv" - - "barcodes.tsv" - - "matrix.mtx" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloCellFilter" - description: "cell filtering type and parameters\n\n- None ... do\ - \ not output filtered cells\n- TopCells ... only report top cells by\ - \ UMI count, followed by the exact number of cells\n- CellRanger2.2 ...\ - \ simple filtering of CellRanger 2.2.\nCan be followed by numbers: number\ - \ of expected cells, robust maximum percentile for UMI count, maximum to minimum\ - \ ratio for UMI count\nThe harcoded values are from CellRanger: nExpectedCells=3000;\ - \ maxPercentile=0.99; maxMinRatio=10\n- EmptyDrops_CR ... EmptyDrops filtering\ - \ in CellRanger flavor. Please cite the original EmptyDrops paper: A.T.L Lun\ - \ et al, Genome Biology, 20, 63 (2019): https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1662-y\n\ - Can be followed by 10 numeric parameters: nExpectedCells maxPercentile\ - \ maxMinRatio indMin indMax umiMin umiMinFracMedian candMaxN \ - \ FDR simN\nThe harcoded values are from CellRanger: 3000 \ - \ 0.99 10 45000 90000 500 0.01\ - \ 20000 0.01 10000" - info: - step: "star" - orig_arg: "--soloCellFilter" - example: - - "CellRanger2.2" - - "3000" - - "0.99" - - "10" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloOutFormatFeaturesGeneField3" - description: "field 3 in the Gene features.tsv file. If \"-\", then no 3rd field\ - \ is output." - info: - step: "star" - orig_arg: "--soloOutFormatFeaturesGeneField3" - example: - - "Gene Expression" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloCellReadStats" - description: "Output reads statistics for each CB\n\n- Standard ... standard\ - \ output" - info: - step: "star" - orig_arg: "--soloCellReadStats" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "HTSeq arguments" - arguments: - - type: "string" - name: "--stranded" - alternatives: - - "-s" - description: "Whether the data is from a strand-specific assay. 'reverse' means\ - \ 'yes' with reversed strand interpretation." - info: - step: "htseq" - orig_arg: "--stranded" - default: - - "yes" - required: false - choices: - - "yes" - - "no" - - "reverse" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--minimum_alignment_quality" - alternatives: - - "-a" - - "--minaqual" - description: "Skip all reads with MAPQ alignment quality lower than the given\ - \ minimum value. \nMAPQ is the 5th column of a SAM/BAM file and its usage\ - \ depends on the software \nused to map the reads.\n" - info: - step: "htseq" - orig_arg: "--minaqual" - default: - - 10 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--type" - alternatives: - - "-t" - description: "Feature type (3rd column in GTF file) to be used, all features\ - \ of other type are ignored (default, suitable for Ensembl GTF files: exon)" - info: - step: "htseq" - orig_arg: "--type" - example: - - "exon" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--id_attribute" - alternatives: - - "-i" - description: "GTF attribute to be used as feature ID (default, suitable for\ - \ Ensembl GTF files: gene_id).\nAll feature of the right type (see -t option)\ - \ within the same GTF attribute will be added\ntogether. The typical way of\ - \ using this option is to count all exonic reads from each gene\nand add the\ - \ exons but other uses are possible as well. You can call this option multiple\n\ - times: in that case, the combination of all attributes separated by colons\ - \ (:) will be used\nas a unique identifier, e.g. for exons you might use -i\ - \ gene_id -i exon_number.\n" - info: - step: "htseq" - orig_arg: "--idattr" - example: - - "gene_id" - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--additional_attributes" - description: "Additional feature attributes (suitable for Ensembl GTF files:\ - \ gene_name). Use multiple times\nfor more than one additional attribute.\ - \ These attributes are only used as annotations in the\noutput, while the\ - \ determination of how the counts are added together is done based on option\ - \ -i.\n" - info: - step: "htseq" - orig_arg: "--additional-attr" - example: - - "gene_name" - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--add_chromosome_info" - description: "Store information about the chromosome of each feature as an additional\ - \ attribute\n(e.g. colunm in the TSV output file).\n" - info: - step: "htseq" - orig_arg: "--add-chromosome-info" - direction: "input" - dest: "par" - - type: "string" - name: "--mode" - alternatives: - - "-m" - description: "Mode to handle reads overlapping more than one feature." - info: - step: "htseq" - orig_arg: "--mode" - default: - - "union" - required: false - choices: - - "union" - - "intersection-strict" - - "intersection-nonempty" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--non_unique" - description: "Whether and how to score reads that are not uniquely aligned or\ - \ ambiguously assigned to features." - info: - step: "htseq" - orig_arg: "--nonunique" - default: - - "none" - required: false - choices: - - "none" - - "all" - - "fraction" - - "random" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--secondary_alignments" - description: "Whether to score secondary alignments (0x100 flag)." - info: - step: "htseq" - orig_arg: "--secondary-alignments" - required: false - choices: - - "score" - - "ignore" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--supplementary_alignments" - description: "Whether to score supplementary alignments (0x800 flag)." - info: - step: "htseq" - orig_arg: "--supplementary-alignments" - required: false - choices: - - "score" - - "ignore" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--counts_output_sparse" - description: "Store the counts as a sparse matrix (mtx, h5ad, loom)." - info: - step: "htseq" - orig_arg: "--counts-output-sparse" - direction: "input" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - description: "Align fastq files using STAR." - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/cellranger_tiny_fastq" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "docker" - env: - - "STAR_VERSION 2.7.10b" - - "PACKAGES gcc g++ make wget zlib1g-dev unzip" - - type: "docker" - run: - - "apt-get update && \\\n apt-get install -y --no-install-recommends ${PACKAGES}\ - \ && \\\n cd /tmp && \\\n wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip\ - \ && \\\n unzip ${STAR_VERSION}.zip && \\\n cd STAR-${STAR_VERSION}/source\ - \ && \\\n make STARstatic CXXFLAGS_SIMD=-std=c++11 && \\\n cp STAR /usr/local/bin\ - \ && \\\n cd / && \\\n rm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip\ - \ && \\\n apt-get --purge autoremove -y ${PACKAGES} && \\\n apt-get clean\n" - - type: "apt" - packages: - - "samtools" - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "pyyaml" - - "HTSeq" - - "multiprocess" - - "gtfparse<2.0" - - "pandas" - - "multiqc~=1.15.0" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "pytest" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highmem" - - "highcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/mapping/multi_star/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/multi_star" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/multi_star/multi_star" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/mapping/multi_star/main.nf b/target/nextflow/mapping/multi_star/main.nf deleted file mode 100644 index b40985a9bf4..00000000000 --- a/target/nextflow/mapping/multi_star/main.nf +++ /dev/null @@ -1,6497 +0,0 @@ -// multi_star 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Angela Oliveira Pisco (author) -// * Robrecht Cannoodt (author, maintainer) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "multi_star", - "namespace" : "mapping", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Angela Oliveira Pisco", - "roles" : [ - "author" - ], - "info" : { - "role" : "Contributor", - "links" : { - "github" : "aopisco", - "orcid" : "0000-0003-0142-2355", - "linkedin" : "aopisco" - }, - "organizations" : [ - { - "name" : "Insitro", - "href" : "https://insitro.com", - "role" : "Director of Computational Biology" - }, - { - "name" : "Open Problems", - "href" : "https://openproblems.bio", - "role" : "Core Member" - } - ] - } - }, - { - "name" : "Robrecht Cannoodt", - "roles" : [ - "author", - "maintainer" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "robrecht@data-intuitive.com", - "github" : "rcannood", - "orcid" : "0000-0003-3641-729X", - "linkedin" : "robrechtcannoodt" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Science Engineer" - }, - { - "name" : "Open Problems", - "href" : "https://openproblems.bio", - "role" : "Core Member" - } - ] - } - } - ], - "argument_groups" : [ - { - "name" : "Input/Output", - "arguments" : [ - { - "type" : "string", - "name" : "--input_id", - "description" : "The ID of the sample being processed. This vector should have the same length as the `--input_r1` argument.", - "example" : [ - "mysample", - "mysample" - ], - "required" : true, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--input_r1", - "description" : "Paths to the sequences to be mapped. If using Illumina paired-end reads, only the R1 files should be passed.", - "example" : [ - "mysample_S1_L001_R1_001.fastq.gz", - "mysample_S1_L002_R1_001.fastq.gz" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--input_r2", - "description" : "Paths to the sequences to be mapped. If using Illumina paired-end reads, only the R2 files should be passed.", - "example" : [ - "mysample_S1_L001_R2_001.fastq.gz", - "mysample_S1_L002_R2_001.fastq.gz" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--reference_index", - "alternatives" : [ - "--genomeDir" - ], - "description" : "Path to the reference built by star_build_reference. Corresponds to the --genomeDir argument in the STAR command.", - "example" : [ - "/path/to/reference" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--reference_gtf", - "description" : "Path to the gtf reference file.", - "example" : [ - "genes.gtf" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--output", - "alternatives" : [ - "--outFileNamePrefix" - ], - "description" : "Path to output directory. Corresponds to the --outFileNamePrefix argument in the STAR command.", - "example" : [ - "/path/to/foo" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Processing arguments", - "arguments" : [ - { - "type" : "boolean", - "name" : "--run_htseq_count", - "description" : "Whether or not to also run htseq-count after STAR.", - "default" : [ - true - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "boolean", - "name" : "--run_multiqc", - "description" : "Whether or not to also run MultiQC at the end.", - "default" : [ - true - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--min_success_rate", - "description" : "Fail when the success rate is below this threshold.", - "default" : [ - 0.5 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Run Parameters", - "arguments" : [ - { - "type" : "integer", - "name" : "--runRNGseed", - "description" : "random number generator seed.", - "info" : { - "step" : "star", - "orig_arg" : "--runRNGseed" - }, - "example" : [ - 777 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Genome Parameters", - "arguments" : [ - { - "type" : "file", - "name" : "--genomeFastaFiles", - "description" : "path(s) to the fasta files with the genome sequences, separated by spaces. These files should be plain text FASTA files, they *cannot* be zipped.\n\nRequired for the genome generation (--runMode genomeGenerate). Can also be used in the mapping (--runMode alignReads) to add extra (new) sequences to the genome (e.g. spike-ins).", - "info" : { - "step" : "star", - "orig_arg" : "--genomeFastaFiles" - }, - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - } - ] - }, - { - "name" : "Splice Junctions Database", - "arguments" : [ - { - "type" : "string", - "name" : "--sjdbFileChrStartEnd", - "description" : "path to the files with genomic coordinates (chr start end strand) for the splice junction introns. Multiple files can be supplied and will be concatenated.", - "info" : { - "step" : "star", - "orig_arg" : "--sjdbFileChrStartEnd" - }, - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--sjdbGTFfile", - "description" : "path to the GTF file with annotations", - "info" : { - "step" : "star", - "orig_arg" : "--sjdbGTFfile" - }, - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--sjdbGTFchrPrefix", - "description" : "prefix for chromosome names in a GTF file (e.g. 'chr' for using ENSMEBL annotations with UCSC genomes)", - "info" : { - "step" : "star", - "orig_arg" : "--sjdbGTFchrPrefix" - }, - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--sjdbGTFfeatureExon", - "description" : "feature type in GTF file to be used as exons for building transcripts", - "info" : { - "step" : "star", - "orig_arg" : "--sjdbGTFfeatureExon" - }, - "example" : [ - "exon" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--sjdbGTFtagExonParentTranscript", - "description" : "GTF attribute name for parent transcript ID (default \\"transcript_id\\" works for GTF files)", - "info" : { - "step" : "star", - "orig_arg" : "--sjdbGTFtagExonParentTranscript" - }, - "example" : [ - "transcript_id" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--sjdbGTFtagExonParentGene", - "description" : "GTF attribute name for parent gene ID (default \\"gene_id\\" works for GTF files)", - "info" : { - "step" : "star", - "orig_arg" : "--sjdbGTFtagExonParentGene" - }, - "example" : [ - "gene_id" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--sjdbGTFtagExonParentGeneName", - "description" : "GTF attribute name for parent gene name", - "info" : { - "step" : "star", - "orig_arg" : "--sjdbGTFtagExonParentGeneName" - }, - "example" : [ - "gene_name" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--sjdbGTFtagExonParentGeneType", - "description" : "GTF attribute name for parent gene type", - "info" : { - "step" : "star", - "orig_arg" : "--sjdbGTFtagExonParentGeneType" - }, - "example" : [ - "gene_type", - "gene_biotype" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--sjdbOverhang", - "description" : "length of the donor/acceptor sequence on each side of the junctions, ideally = (mate_length - 1)", - "info" : { - "step" : "star", - "orig_arg" : "--sjdbOverhang" - }, - "example" : [ - 100 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--sjdbScore", - "description" : "extra alignment score for alignments that cross database junctions", - "info" : { - "step" : "star", - "orig_arg" : "--sjdbScore" - }, - "example" : [ - 2 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--sjdbInsertSave", - "description" : "which files to save when sjdb junctions are inserted on the fly at the mapping step\n\n- Basic ... only small junction / transcript files\n- All ... all files including big Genome, SA and SAindex - this will create a complete genome directory", - "info" : { - "step" : "star", - "orig_arg" : "--sjdbInsertSave" - }, - "example" : [ - "Basic" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Variation parameters", - "arguments" : [ - { - "type" : "string", - "name" : "--varVCFfile", - "description" : "path to the VCF file that contains variation data. The 10th column should contain the genotype information, e.g. 0/1", - "info" : { - "step" : "star", - "orig_arg" : "--varVCFfile" - }, - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Read Parameters", - "arguments" : [ - { - "type" : "string", - "name" : "--readFilesType", - "description" : "format of input read files\n\n- Fastx ... FASTA or FASTQ\n- SAM SE ... SAM or BAM single-end reads; for BAM use --readFilesCommand samtools view\n- SAM PE ... SAM or BAM paired-end reads; for BAM use --readFilesCommand samtools view", - "info" : { - "step" : "star", - "orig_arg" : "--readFilesType" - }, - "example" : [ - "Fastx" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--readFilesSAMattrKeep", - "description" : "for --readFilesType SAM SE/PE, which SAM tags to keep in the output BAM, e.g.: --readFilesSAMtagsKeep RG PL\n\n- All ... keep all tags\n- None ... do not keep any tags", - "info" : { - "step" : "star", - "orig_arg" : "--readFilesSAMattrKeep" - }, - "example" : [ - "All" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--readFilesManifest", - "description" : "path to the \\"manifest\\" file with the names of read files. The manifest file should contain 3 tab-separated columns:\n\npaired-end reads: read1_file_name $tab$ read2_file_name $tab$ read_group_line.\nsingle-end reads: read1_file_name $tab$ - $tab$ read_group_line.\nSpaces, but not tabs are allowed in file names.\nIf read_group_line does not start with ID:, it can only contain one ID field, and ID: will be added to it.\nIf read_group_line starts with ID:, it can contain several fields separated by $tab$, and all fields will be be copied verbatim into SAM @RG header line.", - "info" : { - "step" : "star", - "orig_arg" : "--readFilesManifest" - }, - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--readFilesPrefix", - "description" : "prefix for the read files names, i.e. it will be added in front of the strings in --readFilesIn", - "info" : { - "step" : "star", - "orig_arg" : "--readFilesPrefix" - }, - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--readFilesCommand", - "description" : "command line to execute for each of the input file. This command should generate FASTA or FASTQ text and send it to stdout\n\nFor example: zcat - to uncompress .gz files, bzcat - to uncompress .bz2 files, etc.", - "info" : { - "step" : "star", - "orig_arg" : "--readFilesCommand" - }, - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--readMapNumber", - "description" : "number of reads to map from the beginning of the file\n\n-1: map all reads", - "info" : { - "step" : "star", - "orig_arg" : "--readMapNumber" - }, - "example" : [ - -1 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--readMatesLengthsIn", - "description" : "Equal/NotEqual - lengths of names,sequences,qualities for both mates are the same / not the same. NotEqual is safe in all situations.", - "info" : { - "step" : "star", - "orig_arg" : "--readMatesLengthsIn" - }, - "example" : [ - "NotEqual" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--readNameSeparator", - "description" : "character(s) separating the part of the read names that will be trimmed in output (read name after space is always trimmed)", - "info" : { - "step" : "star", - "orig_arg" : "--readNameSeparator" - }, - "example" : [ - "/" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--readQualityScoreBase", - "description" : "number to be subtracted from the ASCII code to get Phred quality score", - "info" : { - "step" : "star", - "orig_arg" : "--readQualityScoreBase" - }, - "example" : [ - 33 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Read Clipping", - "arguments" : [ - { - "type" : "string", - "name" : "--clipAdapterType", - "description" : "adapter clipping type\n\n- Hamming ... adapter clipping based on Hamming distance, with the number of mismatches controlled by --clip5pAdapterMMp\n- CellRanger4 ... 5p and 3p adapter clipping similar to CellRanger4. Utilizes Opal package by Martin Sosic: https://github.com/Martinsos/opal\n- None ... no adapter clipping, all other clip* parameters are disregarded", - "info" : { - "step" : "star", - "orig_arg" : "--clipAdapterType" - }, - "example" : [ - "Hamming" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--clip3pNbases", - "description" : "number(s) of bases to clip from 3p of each mate. If one value is given, it will be assumed the same for both mates.", - "info" : { - "step" : "star", - "orig_arg" : "--clip3pNbases" - }, - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--clip3pAdapterSeq", - "description" : "adapter sequences to clip from 3p of each mate. If one value is given, it will be assumed the same for both mates.\n\n- polyA ... polyA sequence with the length equal to read length", - "info" : { - "step" : "star", - "orig_arg" : "--clip3pAdapterSeq" - }, - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--clip3pAdapterMMp", - "description" : "max proportion of mismatches for 3p adapter clipping for each mate. If one value is given, it will be assumed the same for both mates.", - "info" : { - "step" : "star", - "orig_arg" : "--clip3pAdapterMMp" - }, - "example" : [ - 0.1 - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--clip3pAfterAdapterNbases", - "description" : "number of bases to clip from 3p of each mate after the adapter clipping. If one value is given, it will be assumed the same for both mates.", - "info" : { - "step" : "star", - "orig_arg" : "--clip3pAfterAdapterNbases" - }, - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--clip5pNbases", - "description" : "number(s) of bases to clip from 5p of each mate. If one value is given, it will be assumed the same for both mates.", - "info" : { - "step" : "star", - "orig_arg" : "--clip5pNbases" - }, - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - } - ] - }, - { - "name" : "Limits", - "arguments" : [ - { - "type" : "long", - "name" : "--limitGenomeGenerateRAM", - "description" : "maximum available RAM (bytes) for genome generation", - "info" : { - "step" : "star", - "orig_arg" : "--limitGenomeGenerateRAM" - }, - "example" : [ - 31000000000 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "long", - "name" : "--limitIObufferSize", - "description" : "max available buffers size (bytes) for input/output, per thread", - "info" : { - "step" : "star", - "orig_arg" : "--limitIObufferSize" - }, - "example" : [ - 30000000, - 50000000 - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "long", - "name" : "--limitOutSAMoneReadBytes", - "description" : "max size of the SAM record (bytes) for one read. Recommended value: >(2*(LengthMate1+LengthMate2+100)*outFilterMultimapNmax", - "info" : { - "step" : "star", - "orig_arg" : "--limitOutSAMoneReadBytes" - }, - "example" : [ - 100000 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--limitOutSJoneRead", - "description" : "max number of junctions for one read (including all multi-mappers)", - "info" : { - "step" : "star", - "orig_arg" : "--limitOutSJoneRead" - }, - "example" : [ - 1000 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--limitOutSJcollapsed", - "description" : "max number of collapsed junctions", - "info" : { - "step" : "star", - "orig_arg" : "--limitOutSJcollapsed" - }, - "example" : [ - 1000000 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "long", - "name" : "--limitBAMsortRAM", - "description" : "maximum available RAM (bytes) for sorting BAM. If =0, it will be set to the genome index size. 0 value can only be used with --genomeLoad NoSharedMemory option.", - "info" : { - "step" : "star", - "orig_arg" : "--limitBAMsortRAM" - }, - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--limitSjdbInsertNsj", - "description" : "maximum number of junctions to be inserted to the genome on the fly at the mapping stage, including those from annotations and those detected in the 1st step of the 2-pass run", - "info" : { - "step" : "star", - "orig_arg" : "--limitSjdbInsertNsj" - }, - "example" : [ - 1000000 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--limitNreadsSoft", - "description" : "soft limit on the number of reads", - "info" : { - "step" : "star", - "orig_arg" : "--limitNreadsSoft" - }, - "example" : [ - -1 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Output: general", - "arguments" : [ - { - "type" : "string", - "name" : "--outTmpKeep", - "description" : "whether to keep the temporary files after STAR runs is finished\n\n- None ... remove all temporary files\n- All ... keep all files", - "info" : { - "step" : "star", - "orig_arg" : "--outTmpKeep" - }, - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outStd", - "description" : "which output will be directed to stdout (standard out)\n\n- Log ... log messages\n- SAM ... alignments in SAM format (which normally are output to Aligned.out.sam file), normal standard output will go into Log.std.out\n- BAM_Unsorted ... alignments in BAM format, unsorted. Requires --outSAMtype BAM Unsorted\n- BAM_SortedByCoordinate ... alignments in BAM format, sorted by coordinate. Requires --outSAMtype BAM SortedByCoordinate\n- BAM_Quant ... alignments to transcriptome in BAM format, unsorted. Requires --quantMode TranscriptomeSAM", - "info" : { - "step" : "star", - "orig_arg" : "--outStd" - }, - "example" : [ - "Log" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outReadsUnmapped", - "description" : "output of unmapped and partially mapped (i.e. mapped only one mate of a paired end read) reads in separate file(s).\n\n- None ... no output\n- Fastx ... output in separate fasta/fastq files, Unmapped.out.mate1/2", - "info" : { - "step" : "star", - "orig_arg" : "--outReadsUnmapped" - }, - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outQSconversionAdd", - "description" : "add this number to the quality score (e.g. to convert from Illumina to Sanger, use -31)", - "info" : { - "step" : "star", - "orig_arg" : "--outQSconversionAdd" - }, - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outMultimapperOrder", - "description" : "order of multimapping alignments in the output files\n\n- Old_2.4 ... quasi-random order used before 2.5.0\n- Random ... random order of alignments for each multi-mapper. Read mates (pairs) are always adjacent, all alignment for each read stay together. This option will become default in the future releases.", - "info" : { - "step" : "star", - "orig_arg" : "--outMultimapperOrder" - }, - "example" : [ - "Old_2.4" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Output: SAM and BAM", - "arguments" : [ - { - "type" : "string", - "name" : "--outSAMmode", - "description" : "mode of SAM output\n\n- None ... no SAM output\n- Full ... full SAM output\n- NoQS ... full SAM but without quality scores", - "info" : { - "step" : "star", - "orig_arg" : "--outSAMmode" - }, - "example" : [ - "Full" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outSAMstrandField", - "description" : "Cufflinks-like strand field flag\n\n- None ... not used\n- intronMotif ... strand derived from the intron motif. This option changes the output alignments: reads with inconsistent and/or non-canonical introns are filtered out.", - "info" : { - "step" : "star", - "orig_arg" : "--outSAMstrandField" - }, - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outSAMattributes", - "description" : "a string of desired SAM attributes, in the order desired for the output SAM. Tags can be listed in any combination/order.\n\n***Presets:\n- None ... no attributes\n- Standard ... NH HI AS nM\n- All ... NH HI AS nM NM MD jM jI MC ch\n***Alignment:\n- NH ... number of loci the reads maps to: =1 for unique mappers, >1 for multimappers. Standard SAM tag.\n- HI ... multiple alignment index, starts with --outSAMattrIHstart (=1 by default). Standard SAM tag.\n- AS ... local alignment score, +1/-1 for matches/mismateches, score* penalties for indels and gaps. For PE reads, total score for two mates. Stadnard SAM tag.\n- nM ... number of mismatches. For PE reads, sum over two mates.\n- NM ... edit distance to the reference (number of mismatched + inserted + deleted bases) for each mate. Standard SAM tag.\n- MD ... string encoding mismatched and deleted reference bases (see standard SAM specifications). Standard SAM tag.\n- jM ... intron motifs for all junctions (i.e. N in CIGAR): 0: non-canonical; 1: GT/AG, 2: CT/AC, 3: GC/AG, 4: CT/GC, 5: AT/AC, 6: GT/AT. If splice junctions database is used, and a junction is annotated, 20 is added to its motif value.\n- jI ... start and end of introns for all junctions (1-based).\n- XS ... alignment strand according to --outSAMstrandField.\n- MC ... mate's CIGAR string. Standard SAM tag.\n- ch ... marks all segment of all chimeric alingments for --chimOutType WithinBAM output.\n- cN ... number of bases clipped from the read ends: 5' and 3'\n***Variation:\n- vA ... variant allele\n- vG ... genomic coordinate of the variant overlapped by the read.\n- vW ... 1 - alignment passes WASP filtering; 2,3,4,5,6,7 - alignment does not pass WASP filtering. Requires --waspOutputMode SAMtag.\n***STARsolo:\n- CR CY UR UY ... sequences and quality scores of cell barcodes and UMIs for the solo* demultiplexing.\n- GX GN ... gene ID and gene name for unique-gene reads.\n- gx gn ... gene IDs and gene names for unique- and multi-gene reads.\n- CB UB ... error-corrected cell barcodes and UMIs for solo* demultiplexing. Requires --outSAMtype BAM SortedByCoordinate.\n- sM ... assessment of CB and UMI.\n- sS ... sequence of the entire barcode (CB,UMI,adapter).\n- sQ ... quality of the entire barcode.\n***Unsupported/undocumented:\n- ha ... haplotype (1/2) when mapping to the diploid genome. Requires genome generated with --genomeTransformType Diploid .\n- rB ... alignment block read/genomic coordinates.\n- vR ... read coordinate of the variant.", - "info" : { - "step" : "star", - "orig_arg" : "--outSAMattributes" - }, - "example" : [ - "Standard" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outSAMattrIHstart", - "description" : "start value for the IH attribute. 0 may be required by some downstream software, such as Cufflinks or StringTie.", - "info" : { - "step" : "star", - "orig_arg" : "--outSAMattrIHstart" - }, - "example" : [ - 1 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outSAMunmapped", - "description" : "output of unmapped reads in the SAM format\n\n1st word:\n- None ... no output\n- Within ... output unmapped reads within the main SAM file (i.e. Aligned.out.sam)\n2nd word:\n- KeepPairs ... record unmapped mate for each alignment, and, in case of unsorted output, keep it adjacent to its mapped mate. Only affects multi-mapping reads.", - "info" : { - "step" : "star", - "orig_arg" : "--outSAMunmapped" - }, - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outSAMorder", - "description" : "type of sorting for the SAM output\n\nPaired: one mate after the other for all paired alignments\nPairedKeepInputOrder: one mate after the other for all paired alignments, the order is kept the same as in the input FASTQ files", - "info" : { - "step" : "star", - "orig_arg" : "--outSAMorder" - }, - "example" : [ - "Paired" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outSAMprimaryFlag", - "description" : "which alignments are considered primary - all others will be marked with 0x100 bit in the FLAG\n\n- OneBestScore ... only one alignment with the best score is primary\n- AllBestScore ... all alignments with the best score are primary", - "info" : { - "step" : "star", - "orig_arg" : "--outSAMprimaryFlag" - }, - "example" : [ - "OneBestScore" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outSAMreadID", - "description" : "read ID record type\n\n- Standard ... first word (until space) from the FASTx read ID line, removing /1,/2 from the end\n- Number ... read number (index) in the FASTx file", - "info" : { - "step" : "star", - "orig_arg" : "--outSAMreadID" - }, - "example" : [ - "Standard" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outSAMmapqUnique", - "description" : "0 to 255: the MAPQ value for unique mappers", - "info" : { - "step" : "star", - "orig_arg" : "--outSAMmapqUnique" - }, - "example" : [ - 255 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outSAMflagOR", - "description" : "0 to 65535: sam FLAG will be bitwise OR'd with this value, i.e. FLAG=FLAG | outSAMflagOR. This is applied after all flags have been set by STAR, and after outSAMflagAND. Can be used to set specific bits that are not set otherwise.", - "info" : { - "step" : "star", - "orig_arg" : "--outSAMflagOR" - }, - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outSAMflagAND", - "description" : "0 to 65535: sam FLAG will be bitwise AND'd with this value, i.e. FLAG=FLAG & outSAMflagOR. This is applied after all flags have been set by STAR, but before outSAMflagOR. Can be used to unset specific bits that are not set otherwise.", - "info" : { - "step" : "star", - "orig_arg" : "--outSAMflagAND" - }, - "example" : [ - 65535 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outSAMattrRGline", - "description" : "SAM/BAM read group line. The first word contains the read group identifier and must start with \\"ID:\\", e.g. --outSAMattrRGline ID:xxx CN:yy \\"DS:z z z\\".\n\nxxx will be added as RG tag to each output alignment. Any spaces in the tag values have to be double quoted.\nComma separated RG lines correspons to different (comma separated) input files in --readFilesIn. Commas have to be surrounded by spaces, e.g.\n--outSAMattrRGline ID:xxx , ID:zzz \\"DS:z z\\" , ID:yyy DS:yyyy", - "info" : { - "step" : "star", - "orig_arg" : "--outSAMattrRGline" - }, - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outSAMheaderHD", - "description" : "@HD (header) line of the SAM header", - "info" : { - "step" : "star", - "orig_arg" : "--outSAMheaderHD" - }, - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outSAMheaderPG", - "description" : "extra @PG (software) line of the SAM header (in addition to STAR)", - "info" : { - "step" : "star", - "orig_arg" : "--outSAMheaderPG" - }, - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outSAMheaderCommentFile", - "description" : "path to the file with @CO (comment) lines of the SAM header", - "info" : { - "step" : "star", - "orig_arg" : "--outSAMheaderCommentFile" - }, - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outSAMfilter", - "description" : "filter the output into main SAM/BAM files\n\n- KeepOnlyAddedReferences ... only keep the reads for which all alignments are to the extra reference sequences added with --genomeFastaFiles at the mapping stage.\n- KeepAllAddedReferences ... keep all alignments to the extra reference sequences added with --genomeFastaFiles at the mapping stage.", - "info" : { - "step" : "star", - "orig_arg" : "--outSAMfilter" - }, - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outSAMmultNmax", - "description" : "max number of multiple alignments for a read that will be output to the SAM/BAM files. Note that if this value is not equal to -1, the top scoring alignment will be output first\n\n- -1 ... all alignments (up to --outFilterMultimapNmax) will be output", - "info" : { - "step" : "star", - "orig_arg" : "--outSAMmultNmax" - }, - "example" : [ - -1 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outSAMtlen", - "description" : "calculation method for the TLEN field in the SAM/BAM files\n\n- 1 ... leftmost base of the (+)strand mate to rightmost base of the (-)mate. (+)sign for the (+)strand mate\n- 2 ... leftmost base of any mate to rightmost base of any mate. (+)sign for the mate with the leftmost base. This is different from 1 for overlapping mates with protruding ends", - "info" : { - "step" : "star", - "orig_arg" : "--outSAMtlen" - }, - "example" : [ - 1 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outBAMcompression", - "description" : "-1 to 10 BAM compression level, -1=default compression (6?), 0=no compression, 10=maximum compression", - "info" : { - "step" : "star", - "orig_arg" : "--outBAMcompression" - }, - "example" : [ - 1 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outBAMsortingThreadN", - "description" : ">=0: number of threads for BAM sorting. 0 will default to min(6,--runThreadN).", - "info" : { - "step" : "star", - "orig_arg" : "--outBAMsortingThreadN" - }, - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outBAMsortingBinsN", - "description" : ">0: number of genome bins for coordinate-sorting", - "info" : { - "step" : "star", - "orig_arg" : "--outBAMsortingBinsN" - }, - "example" : [ - 50 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "BAM processing", - "arguments" : [ - { - "type" : "string", - "name" : "--bamRemoveDuplicatesType", - "description" : "mark duplicates in the BAM file, for now only works with (i) sorted BAM fed with inputBAMfile, and (ii) for paired-end alignments only\n\n- - ... no duplicate removal/marking\n- UniqueIdentical ... mark all multimappers, and duplicate unique mappers. The coordinates, FLAG, CIGAR must be identical\n- UniqueIdenticalNotMulti ... mark duplicate unique mappers but not multimappers.", - "info" : { - "step" : "star", - "orig_arg" : "--bamRemoveDuplicatesType" - }, - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--bamRemoveDuplicatesMate2basesN", - "description" : "number of bases from the 5' of mate 2 to use in collapsing (e.g. for RAMPAGE)", - "info" : { - "step" : "star", - "orig_arg" : "--bamRemoveDuplicatesMate2basesN" - }, - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Output Wiggle", - "arguments" : [ - { - "type" : "string", - "name" : "--outWigType", - "description" : "type of signal output, e.g. \\"bedGraph\\" OR \\"bedGraph read1_5p\\". Requires sorted BAM: --outSAMtype BAM SortedByCoordinate .\n\n1st word:\n- None ... no signal output\n- bedGraph ... bedGraph format\n- wiggle ... wiggle format\n2nd word:\n- read1_5p ... signal from only 5' of the 1st read, useful for CAGE/RAMPAGE etc\n- read2 ... signal from only 2nd read", - "info" : { - "step" : "star", - "orig_arg" : "--outWigType" - }, - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outWigStrand", - "description" : "strandedness of wiggle/bedGraph output\n\n- Stranded ... separate strands, str1 and str2\n- Unstranded ... collapsed strands", - "info" : { - "step" : "star", - "orig_arg" : "--outWigStrand" - }, - "example" : [ - "Stranded" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outWigReferencesPrefix", - "description" : "prefix matching reference names to include in the output wiggle file, e.g. \\"chr\\", default \\"-\\" - include all references", - "info" : { - "step" : "star", - "orig_arg" : "--outWigReferencesPrefix" - }, - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outWigNorm", - "description" : "type of normalization for the signal\n\n- RPM ... reads per million of mapped reads\n- None ... no normalization, \\"raw\\" counts", - "info" : { - "step" : "star", - "orig_arg" : "--outWigNorm" - }, - "example" : [ - "RPM" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Output Filtering", - "arguments" : [ - { - "type" : "string", - "name" : "--outFilterType", - "description" : "type of filtering\n\n- Normal ... standard filtering using only current alignment\n- BySJout ... keep only those reads that contain junctions that passed filtering into SJ.out.tab", - "info" : { - "step" : "star", - "orig_arg" : "--outFilterType" - }, - "example" : [ - "Normal" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outFilterMultimapScoreRange", - "description" : "the score range below the maximum score for multimapping alignments", - "info" : { - "step" : "star", - "orig_arg" : "--outFilterMultimapScoreRange" - }, - "example" : [ - 1 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outFilterMultimapNmax", - "description" : "maximum number of loci the read is allowed to map to. Alignments (all of them) will be output only if the read maps to no more loci than this value.\n\nOtherwise no alignments will be output, and the read will be counted as \\"mapped to too many loci\\" in the Log.final.out .", - "info" : { - "step" : "star", - "orig_arg" : "--outFilterMultimapNmax" - }, - "example" : [ - 10 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outFilterMismatchNmax", - "description" : "alignment will be output only if it has no more mismatches than this value.", - "info" : { - "step" : "star", - "orig_arg" : "--outFilterMismatchNmax" - }, - "example" : [ - 10 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--outFilterMismatchNoverLmax", - "description" : "alignment will be output only if its ratio of mismatches to *mapped* length is less than or equal to this value.", - "info" : { - "step" : "star", - "orig_arg" : "--outFilterMismatchNoverLmax" - }, - "example" : [ - 0.3 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--outFilterMismatchNoverReadLmax", - "description" : "alignment will be output only if its ratio of mismatches to *read* length is less than or equal to this value.", - "info" : { - "step" : "star", - "orig_arg" : "--outFilterMismatchNoverReadLmax" - }, - "example" : [ - 1.0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outFilterScoreMin", - "description" : "alignment will be output only if its score is higher than or equal to this value.", - "info" : { - "step" : "star", - "orig_arg" : "--outFilterScoreMin" - }, - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--outFilterScoreMinOverLread", - "description" : "same as outFilterScoreMin, but normalized to read length (sum of mates' lengths for paired-end reads)", - "info" : { - "step" : "star", - "orig_arg" : "--outFilterScoreMinOverLread" - }, - "example" : [ - 0.66 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outFilterMatchNmin", - "description" : "alignment will be output only if the number of matched bases is higher than or equal to this value.", - "info" : { - "step" : "star", - "orig_arg" : "--outFilterMatchNmin" - }, - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--outFilterMatchNminOverLread", - "description" : "sam as outFilterMatchNmin, but normalized to the read length (sum of mates' lengths for paired-end reads).", - "info" : { - "step" : "star", - "orig_arg" : "--outFilterMatchNminOverLread" - }, - "example" : [ - 0.66 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outFilterIntronMotifs", - "description" : "filter alignment using their motifs\n\n- None ... no filtering\n- RemoveNoncanonical ... filter out alignments that contain non-canonical junctions\n- RemoveNoncanonicalUnannotated ... filter out alignments that contain non-canonical unannotated junctions when using annotated splice junctions database. The annotated non-canonical junctions will be kept.", - "info" : { - "step" : "star", - "orig_arg" : "--outFilterIntronMotifs" - }, - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outFilterIntronStrands", - "description" : "filter alignments\n\n- RemoveInconsistentStrands ... remove alignments that have junctions with inconsistent strands\n- None ... no filtering", - "info" : { - "step" : "star", - "orig_arg" : "--outFilterIntronStrands" - }, - "example" : [ - "RemoveInconsistentStrands" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Output splice junctions (SJ.out.tab)", - "arguments" : [ - { - "type" : "string", - "name" : "--outSJtype", - "description" : "type of splice junction output\n\n- Standard ... standard SJ.out.tab output\n- None ... no splice junction output", - "info" : { - "step" : "star", - "orig_arg" : "--outSJtype" - }, - "example" : [ - "Standard" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Output Filtering: Splice Junctions", - "arguments" : [ - { - "type" : "string", - "name" : "--outSJfilterReads", - "description" : "which reads to consider for collapsed splice junctions output\n\n- All ... all reads, unique- and multi-mappers\n- Unique ... uniquely mapping reads only", - "info" : { - "step" : "star", - "orig_arg" : "--outSJfilterReads" - }, - "example" : [ - "All" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outSJfilterOverhangMin", - "description" : "minimum overhang length for splice junctions on both sides for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\ndoes not apply to annotated junctions", - "info" : { - "step" : "star", - "orig_arg" : "--outSJfilterOverhangMin" - }, - "example" : [ - 30, - 12, - 12, - 12 - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outSJfilterCountUniqueMin", - "description" : "minimum uniquely mapping read count per junction for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\nJunctions are output if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin conditions are satisfied\ndoes not apply to annotated junctions", - "info" : { - "step" : "star", - "orig_arg" : "--outSJfilterCountUniqueMin" - }, - "example" : [ - 3, - 1, - 1, - 1 - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outSJfilterCountTotalMin", - "description" : "minimum total (multi-mapping+unique) read count per junction for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\nJunctions are output if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin conditions are satisfied\ndoes not apply to annotated junctions", - "info" : { - "step" : "star", - "orig_arg" : "--outSJfilterCountTotalMin" - }, - "example" : [ - 3, - 1, - 1, - 1 - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outSJfilterDistToOtherSJmin", - "description" : "minimum allowed distance to other junctions' donor/acceptor\n\ndoes not apply to annotated junctions", - "info" : { - "step" : "star", - "orig_arg" : "--outSJfilterDistToOtherSJmin" - }, - "example" : [ - 10, - 0, - 5, - 10 - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outSJfilterI''' + '''ntronMaxVsReadN", - "description" : "maximum gap allowed for junctions supported by 1,2,3,,,N reads\n\ni.e. by default junctions supported by 1 read can have gaps <=50000b, by 2 reads: <=100000b, by 3 reads: <=200000. by >=4 reads any gap <=alignIntronMax\ndoes not apply to annotated junctions", - "info" : { - "step" : "star", - "orig_arg" : "--outSJfilterIntronMaxVsReadN" - }, - "example" : [ - 50000, - 100000, - 200000 - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - } - ] - }, - { - "name" : "Scoring", - "arguments" : [ - { - "type" : "integer", - "name" : "--scoreGap", - "description" : "splice junction penalty (independent on intron motif)", - "info" : { - "step" : "star", - "orig_arg" : "--scoreGap" - }, - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--scoreGapNoncan", - "description" : "non-canonical junction penalty (in addition to scoreGap)", - "info" : { - "step" : "star", - "orig_arg" : "--scoreGapNoncan" - }, - "example" : [ - -8 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--scoreGapGCAG", - "description" : "GC/AG and CT/GC junction penalty (in addition to scoreGap)", - "info" : { - "step" : "star", - "orig_arg" : "--scoreGapGCAG" - }, - "example" : [ - -4 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--scoreGapATAC", - "description" : "AT/AC and GT/AT junction penalty (in addition to scoreGap)", - "info" : { - "step" : "star", - "orig_arg" : "--scoreGapATAC" - }, - "example" : [ - -8 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--scoreGenomicLengthLog2scale", - "description" : "extra score logarithmically scaled with genomic length of the alignment: scoreGenomicLengthLog2scale*log2(genomicLength)", - "info" : { - "step" : "star", - "orig_arg" : "--scoreGenomicLengthLog2scale" - }, - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--scoreDelOpen", - "description" : "deletion open penalty", - "info" : { - "step" : "star", - "orig_arg" : "--scoreDelOpen" - }, - "example" : [ - -2 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--scoreDelBase", - "description" : "deletion extension penalty per base (in addition to scoreDelOpen)", - "info" : { - "step" : "star", - "orig_arg" : "--scoreDelBase" - }, - "example" : [ - -2 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--scoreInsOpen", - "description" : "insertion open penalty", - "info" : { - "step" : "star", - "orig_arg" : "--scoreInsOpen" - }, - "example" : [ - -2 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--scoreInsBase", - "description" : "insertion extension penalty per base (in addition to scoreInsOpen)", - "info" : { - "step" : "star", - "orig_arg" : "--scoreInsBase" - }, - "example" : [ - -2 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--scoreStitchSJshift", - "description" : "maximum score reduction while searching for SJ boundaries in the stitching step", - "info" : { - "step" : "star", - "orig_arg" : "--scoreStitchSJshift" - }, - "example" : [ - 1 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Alignments and Seeding", - "arguments" : [ - { - "type" : "integer", - "name" : "--seedSearchStartLmax", - "description" : "defines the search start point through the read - the read is split into pieces no longer than this value", - "info" : { - "step" : "star", - "orig_arg" : "--seedSearchStartLmax" - }, - "example" : [ - 50 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--seedSearchStartLmaxOverLread", - "description" : "seedSearchStartLmax normalized to read length (sum of mates' lengths for paired-end reads)", - "info" : { - "step" : "star", - "orig_arg" : "--seedSearchStartLmaxOverLread" - }, - "example" : [ - 1.0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--seedSearchLmax", - "description" : "defines the maximum length of the seeds, if =0 seed length is not limited", - "info" : { - "step" : "star", - "orig_arg" : "--seedSearchLmax" - }, - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--seedMultimapNmax", - "description" : "only pieces that map fewer than this value are utilized in the stitching procedure", - "info" : { - "step" : "star", - "orig_arg" : "--seedMultimapNmax" - }, - "example" : [ - 10000 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--seedPerReadNmax", - "description" : "max number of seeds per read", - "info" : { - "step" : "star", - "orig_arg" : "--seedPerReadNmax" - }, - "example" : [ - 1000 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--seedPerWindowNmax", - "description" : "max number of seeds per window", - "info" : { - "step" : "star", - "orig_arg" : "--seedPerWindowNmax" - }, - "example" : [ - 50 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--seedNoneLociPerWindow", - "description" : "max number of one seed loci per window", - "info" : { - "step" : "star", - "orig_arg" : "--seedNoneLociPerWindow" - }, - "example" : [ - 10 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--seedSplitMin", - "description" : "min length of the seed sequences split by Ns or mate gap", - "info" : { - "step" : "star", - "orig_arg" : "--seedSplitMin" - }, - "example" : [ - 12 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--seedMapMin", - "description" : "min length of seeds to be mapped", - "info" : { - "step" : "star", - "orig_arg" : "--seedMapMin" - }, - "example" : [ - 5 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--alignIntronMin", - "description" : "minimum intron size, genomic gap is considered intron if its length>=alignIntronMin, otherwise it is considered Deletion", - "info" : { - "step" : "star", - "orig_arg" : "--alignIntronMin" - }, - "example" : [ - 21 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--alignIntronMax", - "description" : "maximum intron size, if 0, max intron size will be determined by (2^winBinNbits)*winAnchorDistNbins", - "info" : { - "step" : "star", - "orig_arg" : "--alignIntronMax" - }, - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--alignMatesGapMax", - "description" : "maximum gap between two mates, if 0, max intron gap will be determined by (2^winBinNbits)*winAnchorDistNbins", - "info" : { - "step" : "star", - "orig_arg" : "--alignMatesGapMax" - }, - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--alignSJoverhangMin", - "description" : "minimum overhang (i.e. block size) for spliced alignments", - "info" : { - "step" : "star", - "orig_arg" : "--alignSJoverhangMin" - }, - "example" : [ - 5 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--alignSJstitchMismatchNmax", - "description" : "maximum number of mismatches for stitching of the splice junctions (-1: no limit).\n\n(1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif.", - "info" : { - "step" : "star", - "orig_arg" : "--alignSJstitchMismatchNmax" - }, - "example" : [ - 0, - -1, - 0, - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--alignSJDBoverhangMin", - "description" : "minimum overhang (i.e. block size) for annotated (sjdb) spliced alignments", - "info" : { - "step" : "star", - "orig_arg" : "--alignSJDBoverhangMin" - }, - "example" : [ - 3 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--alignSplicedMateMapLmin", - "description" : "minimum mapped length for a read mate that is spliced", - "info" : { - "step" : "star", - "orig_arg" : "--alignSplicedMateMapLmin" - }, - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--alignSplicedMateMapLminOverLmate", - "description" : "alignSplicedMateMapLmin normalized to mate length", - "info" : { - "step" : "star", - "orig_arg" : "--alignSplicedMateMapLminOverLmate" - }, - "example" : [ - 0.66 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--alignWindowsPerReadNmax", - "description" : "max number of windows per read", - "info" : { - "step" : "star", - "orig_arg" : "--alignWindowsPerReadNmax" - }, - "example" : [ - 10000 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--alignTranscriptsPerWindowNmax", - "description" : "max number of transcripts per window", - "info" : { - "step" : "star", - "orig_arg" : "--alignTranscriptsPerWindowNmax" - }, - "example" : [ - 100 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--alignTranscriptsPerReadNmax", - "description" : "max number of different alignments per read to consider", - "info" : { - "step" : "star", - "orig_arg" : "--alignTranscriptsPerReadNmax" - }, - "example" : [ - 10000 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--alignEndsType", - "description" : "type of read ends alignment\n\n- Local ... standard local alignment with soft-clipping allowed\n- EndToEnd ... force end-to-end read alignment, do not soft-clip\n- Extend5pOfRead1 ... fully extend only the 5p of the read1, all other ends: local alignment\n- Extend5pOfReads12 ... fully extend only the 5p of the both read1 and read2, all other ends: local alignment", - "info" : { - "step" : "star", - "orig_arg" : "--alignEndsType" - }, - "example" : [ - "Local" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--alignEndsProtrude", - "description" : "allow protrusion of alignment ends, i.e. start (end) of the +strand mate downstream of the start (end) of the -strand mate\n\n1st word: int: maximum number of protrusion bases allowed\n2nd word: string:\n- ConcordantPair ... report alignments with non-zero protrusion as concordant pairs\n- DiscordantPair ... report alignments with non-zero protrusion as discordant pairs", - "info" : { - "step" : "star", - "orig_arg" : "--alignEndsProtrude" - }, - "example" : [ - "0 ConcordantPair" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--alignSoftClipAtReferenceEnds", - "description" : "allow the soft-clipping of the alignments past the end of the chromosomes\n\n- Yes ... allow\n- No ... prohibit, useful for compatibility with Cufflinks", - "info" : { - "step" : "star", - "orig_arg" : "--alignSoftClipAtReferenceEnds" - }, - "example" : [ - "Yes" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--alignInsertionFlush", - "description" : "how to flush ambiguous insertion positions\n\n- None ... insertions are not flushed\n- Right ... insertions are flushed to the right", - "info" : { - "step" : "star", - "orig_arg" : "--alignInsertionFlush" - }, - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Paired-End reads", - "arguments" : [ - { - "type" : "integer", - "name" : "--peOverlapNbasesMin", - "description" : "minimum number of overlapping bases to trigger mates merging and realignment. Specify >0 value to switch on the \\"merginf of overlapping mates\\" algorithm.", - "info" : { - "step" : "star", - "orig_arg" : "--peOverlapNbasesMin" - }, - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--peOverlapMMp", - "description" : "maximum proportion of mismatched bases in the overlap area", - "info" : { - "step" : "star", - "orig_arg" : "--peOverlapMMp" - }, - "example" : [ - 0.01 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Windows, Anchors, Binning", - "arguments" : [ - { - "type" : "integer", - "name" : "--winAnchorMultimapNmax", - "description" : "max number of loci anchors are allowed to map to", - "info" : { - "step" : "star", - "orig_arg" : "--winAnchorMultimapNmax" - }, - "example" : [ - 50 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--winBinNbits", - "description" : "=log2(winBin), where winBin is the size of the bin for the windows/clustering, each window will occupy an integer number of bins.", - "info" : { - "step" : "star", - "orig_arg" : "--winBinNbits" - }, - "example" : [ - 16 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--winAnchorDistNbins", - "description" : "max number of bins between two anchors that allows aggregation of anchors into one window", - "info" : { - "step" : "star", - "orig_arg" : "--winAnchorDistNbins" - }, - "example" : [ - 9 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--winFlankNbins", - "description" : "log2(winFlank), where win Flank is the size of the left and right flanking regions for each window", - "info" : { - "step" : "star", - "orig_arg" : "--winFlankNbins" - }, - "example" : [ - 4 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--winReadCoverageRelativeMin", - "description" : "minimum relative coverage of the read sequence by the seeds in a window, for STARlong algorithm only.", - "info" : { - "step" : "star", - "orig_arg" : "--winReadCoverageRelativeMin" - }, - "example" : [ - 0.5 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--winReadCoverageBasesMin", - "description" : "minimum number of bases covered by the seeds in a window , for STARlong algorithm only.", - "info" : { - "step" : "star", - "orig_arg" : "--winReadCoverageBasesMin" - }, - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Chimeric Alignments", - "arguments" : [ - { - "type" : "string", - "name" : "--chimOutType", - "description" : "type of chimeric output\n\n- Junctions ... Chimeric.out.junction\n- SeparateSAMold ... output old SAM into separate Chimeric.out.sam file\n- WithinBAM ... output into main aligned BAM files (Aligned.*.bam)\n- WithinBAM HardClip ... (default) hard-clipping in the CIGAR for supplemental chimeric alignments (default if no 2nd word is present)\n- WithinBAM SoftClip ... soft-clipping in the CIGAR for supplemental chimeric alignments", - "info" : { - "step" : "star", - "orig_arg" : "--chimOutType" - }, - "example" : [ - "Junctions" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--chimSegmentMin", - "description" : "minimum length of chimeric segment length, if ==0, no chimeric output", - "info" : { - "step" : "star", - "orig_arg" : "--chimSegmentMin" - }, - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--chimScoreMin", - "description" : "minimum total (summed) score of the chimeric segments", - "info" : { - "step" : "star", - "orig_arg" : "--chimScoreMin" - }, - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--chimScoreDropMax", - "description" : "max drop (difference) of chimeric score (the sum of scores of all chimeric segments) from the read length", - "info" : { - "step" : "star", - "orig_arg" : "--chimScoreDropMax" - }, - "example" : [ - 20 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--chimScoreSeparation", - "description" : "minimum difference (separation) between the best chimeric score and the next one", - "info" : { - "step" : "star", - "orig_arg" : "--chimScoreSeparation" - }, - "example" : [ - 10 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--chimScoreJunctionNonGTAG", - "description" : "penalty for a non-GT/AG chimeric junction", - "info" : { - "step" : "star", - "orig_arg" : "--chimScoreJunctionNonGTAG" - }, - "example" : [ - -1 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--chimJunctionOverhangMin", - "description" : "minimum overhang for a chimeric junction", - "info" : { - "step" : "star", - "orig_arg" : "--chimJunctionOverhangMin" - }, - "example" : [ - 20 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--chimSegmentReadGapMax", - "description" : "maximum gap in the read sequence between chimeric segments", - "info" : { - "step" : "star", - "orig_arg" : "--chimSegmentReadGapMax" - }, - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--chimFilter", - "description" : "different filters for chimeric alignments\n\n- None ... no filtering\n- banGenomicN ... Ns are not allowed in the genome sequence around the chimeric junction", - "info" : { - "step" : "star", - "orig_arg" : "--chimFilter" - }, - "example" : [ - "banGenomicN" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--chimMainSegmentMultNmax", - "description" : "maximum number of multi-alignments for the main chimeric segment. =1 will prohibit multimapping main segments.", - "info" : { - "step" : "star", - "orig_arg" : "--chimMainSegmentMultNmax" - }, - "example" : [ - 10 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--chimMultimapNmax", - "description" : "maximum number of chimeric multi-alignments\n\n- 0 ... use the old scheme for chimeric detection which only considered unique alignments", - "info" : { - "step" : "star", - "orig_arg" : "--chimMultimapNmax" - }, - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--chimMultimapScoreRange", - "description" : "the score range for multi-mapping chimeras below the best chimeric score. Only works with --chimMultimapNmax > 1", - "info" : { - "step" : "star", - "orig_arg" : "--chimMultimapScoreRange" - }, - "example" : [ - 1 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--chimNonchimScoreDropMin", - "description" : "to trigger chimeric detection, the drop in the best non-chimeric alignment score with respect to the read length has to be greater than this value", - "info" : { - "step" : "star", - "orig_arg" : "--chimNonchimScoreDropMin" - }, - "example" : [ - 20 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--chimOutJunctionFormat", - "description" : "formatting type for the Chimeric.out.junction file\n\n- 0 ... no comment lines/headers\n- 1 ... comment lines at the end of the file: command line and Nreads: total, unique/multi-mapping", - "info" : { - "step" : "star", - "orig_arg" : "--chimOutJunctionFormat" - }, - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Quantification of Annotations", - "arguments" : [ - { - "type" : "string", - "name" : "--quantMode", - "description" : "types of quantification requested\n\n- - ... none\n- TranscriptomeSAM ... output SAM/BAM alignments to transcriptome into a separate file\n- GeneCounts ... count reads per gene", - "info" : { - "step" : "star", - "orig_arg" : "--quantMode" - }, - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--quantTranscriptomeBAMcompression", - "description" : "-2 to 10 transcriptome BAM compression level\n\n- -2 ... no BAM output\n- -1 ... default compression (6?)\n- 0 ... no compression\n- 10 ... maximum compression", - "info" : { - "step" : "star", - "orig_arg" : "--quantTranscriptomeBAMcompression" - }, - "example" : [ - 1 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--quantTranscriptomeBan", - "description" : "prohibit various alignment type\n\n- IndelSoftclipSingleend ... prohibit indels, soft clipping and single-end alignments - compatible with RSEM\n- Singleend ... prohibit single-end alignments", - "info" : { - "step" : "star", - "orig_arg" : "--quantTranscriptomeBan" - }, - "example" : [ - "IndelSoftclipSingleend" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "2-pass Mapping", - "arguments" : [ - { - "type" : "string", - "name" : "--twopassMode", - "description" : "2-pass mapping mode.\n\n- None ... 1-pass mapping\n- Basic ... basic 2-pass mapping, with all 1st pass junctions inserted into the genome indices on the fly", - "info" : { - "step" : "star", - "orig_arg" : "--twopassMode" - }, - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--twopass1readsN", - "description" : "number of reads to process for the 1st step. Use very large number (or default -1) to map all reads in the first step.", - "info" : { - "step" : "star", - "orig_arg" : "--twopass1readsN" - }, - "example" : [ - -1 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "WASP parameters", - "arguments" : [ - { - "type" : "string", - "name" : "--waspOutputMode", - "description" : "WASP allele-specific output type. This is re-implementation of the original WASP mappability filtering by Bryce van de Geijn, Graham McVicker, Yoav Gilad & Jonathan K Pritchard. Please cite the original WASP paper: Nature Methods 12, 1061-1063 (2015), https://www.nature.com/articles/nmeth.3582 .\n\n- SAMtag ... add WASP tags to the alignments that pass WASP filtering", - "info" : { - "step" : "star", - "orig_arg" : "--waspOutputMode" - }, - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "STARsolo (single cell RNA-seq) parameters", - "arguments" : [ - { - "type" : "string", - "name" : "--soloType", - "description" : "type of single-cell RNA-seq\n\n- CB_UMI_Simple ... (a.k.a. Droplet) one UMI and one Cell Barcode of fixed length in read2, e.g. Drop-seq and 10X Chromium.\n- CB_UMI_Complex ... multiple Cell Barcodes of varying length, one UMI of fixed length and one adapter sequence of fixed length are allowed in read2 only (e.g. inDrop, ddSeq).\n- CB_samTagOut ... output Cell Barcode as CR and/or CB SAm tag. No UMI counting. --readFilesIn cDNA_read1 [cDNA_read2 if paired-end] CellBarcode_read . Requires --outSAMtype BAM Unsorted [and/or SortedByCoordinate]\n- SmartSeq ... Smart-seq: each cell in a separate FASTQ (paired- or single-end), barcodes are corresponding read-groups, no UMI sequences, alignments deduplicated according to alignment start and end (after extending soft-clipped bases)", - "info" : { - "step" : "star", - "orig_arg" : "--soloType" - }, - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--soloCBwhitelist", - "description" : "file(s) with whitelist(s) of cell barcodes. Only --soloType CB_UMI_Complex allows more than one whitelist file.\n\n- None ... no whitelist: all cell barcodes are allowed", - "info" : { - "step" : "star", - "orig_arg" : "--soloCBwhitelist" - }, - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--soloCBstart", - "description" : "cell barcode start base", - "info" : { - "step" : "star", - "orig_arg" : "--soloCBstart" - }, - "example" : [ - 1 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--soloCBlen", - "description" : "cell barcode length", - "info" : { - "step" : "star", - "orig_arg" : "--soloCBlen" - }, - "example" : [ - 16 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--soloUMIstart", - "description" : "UMI start base", - "info" : { - "step" : "star", - "orig_arg" : "--soloUMIstart" - }, - "example" : [ - 17 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--soloUMIlen", - "description" : "UMI length", - "info" : { - "step" : "star", - "orig_arg" : "--soloUMIlen" - }, - "example" : [ - 10 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--soloBarcodeReadLength", - "description" : "length of the barcode read\n\n- 1 ... equal to sum of soloCBlen+soloUMIlen\n- 0 ... not defined, do not check", - "info" : { - "step" : "star", - "orig_arg" : "--soloBarcodeReadLength" - }, - "example" : [ - 1 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--soloBarcodeMate", - "description" : "identifies which read mate contains the barcode (CB+UMI) sequence\n\n- 0 ... barcode sequence is on separate read, which should always be the last file in the --readFilesIn listed\n- 1 ... barcode sequence is a part of mate 1\n- 2 ... barcode sequence is a part of mate 2", - "info" : { - "step" : "star", - "orig_arg" : "--soloBarcodeMate" - }, - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--soloCBposition", - "description" : "position of Cell Barcode(s) on the barcode read.\n\nPresently only works with --soloType CB_UMI_Complex, and barcodes are assumed to be on Read2.\nFormat for each barcode: startAnchor_startPosition_endAnchor_endPosition\nstart(end)Anchor defines the Anchor Base for the CB: 0: read start; 1: read end; 2: adapter start; 3: adapter end\nstart(end)Position is the 0-based position with of the CB start(end) with respect to the Anchor Base\nString for different barcodes are separated by space.\nExample: inDrop (Zilionis et al, Nat. Protocols, 2017):\n--soloCBposition 0_0_2_-1 3_1_3_8", - "info" : { - "step" : "star", - "orig_arg" : "--soloCBposition" - }, - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--soloUMIposition", - "description" : "position of the UMI on the barcode read, same as soloCBposition\n\nExample: inDrop (Zilionis et al, Nat. Protocols, 2017):\n--soloCBposition 3_9_3_14", - "info" : { - "step" : "star", - "orig_arg" : "--soloUMIposition" - }, - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--soloAdapterSequence", - "description" : "adapter sequence to anchor barcodes. Only one adapter sequence is allowed.", - "info" : { - "step" : "star", - "orig_arg" : "--soloAdapterSequence" - }, - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--soloAdapterMismatchesNmax", - "description" : "maximum number of mismatches allowed in adapter sequence.", - "info" : { - "step" : "star", - "orig_arg" : "--soloAdapterMismatchesNmax" - }, - "example" : [ - 1 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--soloCBmatchWLtype", - "description" : "matching the Cell Barcodes to the WhiteList\n\n- Exact ... only exact matches allowed\n- 1MM ... only one match in whitelist with 1 mismatched base allowed. Allowed CBs have to have at least one read with exact match.\n- 1MM_multi ... multiple matches in whitelist with 1 mismatched base allowed, posterior probability calculation is used choose one of the matches.\nAllowed CBs have to have at least one read with exact match. This option matches best with CellRanger 2.2.0\n- 1MM_multi_pseudocounts ... same as 1MM_Multi, but pseudocounts of 1 are added to all whitelist barcodes.\n- 1MM_multi_Nbase_pseudocounts ... same as 1MM_multi_pseudocounts, multimatching to WL is allowed for CBs with N-bases. This option matches best with CellRanger >= 3.0.0\n- EditDist_2 ... allow up to edit distance of 3 fpr each of the barcodes. May include one deletion + one insertion. Only works with --soloType CB_UMI_Complex. Matches to multiple passlist barcdoes are not allowed. Similar to ParseBio Split-seq pipeline.", - "info" : { - "step" : "star", - "orig_arg" : "--soloCBmatchWLtype" - }, - "example" : [ - "1MM_multi" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--soloInputSAMattrBarcodeSeq", - "description" : "when inputting reads from a SAM file (--readsFileType SAM SE/PE), these SAM attributes mark the barcode sequence (in proper order).\n\nFor instance, for 10X CellRanger or STARsolo BAMs, use --soloInputSAMattrBarcodeSeq CR UR .\nThis parameter is required when running STARsolo with input from SAM.", - "info" : { - "step" : "star", - "orig_arg" : "--soloInputSAMattrBarcodeSeq" - }, - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--soloInputSAMattrBarcodeQual", - "description" : "when inputting reads from a SAM file (--readsFileType SAM SE/PE), these SAM attributes mark the barcode qualities (in proper order).\n\nFor instance, for 10X CellRanger or STARsolo BAMs, use --soloInputSAMattrBarcodeQual CY UY .\nIf this parameter is '-' (default), the quality 'H' will be assigned to all bases.", - "info" : { - "step" : "star", - "orig_arg" : "--soloInputSAMattrBarcodeQual" - }, - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--soloStrand", - "description" : "strandedness of the solo libraries:\n\n- Unstranded ... no strand information\n- Forward ... read strand same as the original RNA molecule\n- Reverse ... read strand opposite to the original RNA molecule", - "info" : { - "step" : "star", - "orig_arg" : "--soloStrand" - }, - "example" : [ - "Forward" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--soloFeatures", - "description" : "genomic features for which the UMI counts per Cell Barcode are collected\n\n- Gene ... genes: reads match the gene transcript\n- SJ ... splice junctions: reported in SJ.out.tab\n- GeneFull ... full gene (pre-mRNA): count all reads overlapping genes' exons and introns\n- GeneFull_ExonOverIntron ... full gene (pre-mRNA): count all reads overlapping genes' exons and introns: prioritize 100% overlap with exons\n- GeneFull_Ex50pAS ... full gene (pre-RNA): count all reads overlapping genes' exons and introns: prioritize >50% overlap with exons. Do not count reads with 100% exonic overlap in the antisense direction.", - "info" : { - "step" : "star", - "orig_arg" : "--soloFeatures" - }, - "example" : [ - "Gene" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--soloMultiMappers", - "description" : "counting method for reads mapping to multiple genes\n\n- Unique ... count only reads that map to unique genes\n- Uniform ... uniformly distribute multi-genic UMIs to all genes\n- Rescue ... distribute UMIs proportionally to unique+uniform counts (~ first iteration of EM)\n- PropUnique ... distribute UMIs proportionally to unique mappers, if present, and uniformly if not.\n- EM ... multi-gene UMIs are distributed using Expectation Maximization algorithm", - "info" : { - "step" : "star", - "orig_arg" : "--soloMultiMappers" - }, - "example" : [ - "Unique" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--soloUMIdedup", - "description" : "type of UMI deduplication (collapsing) algorithm\n\n- 1MM_All ... all UMIs with 1 mismatch distance to each other are collapsed (i.e. counted once).\n- 1MM_Directional_UMItools ... follows the \\"directional\\" method from the UMI-tools by Smith, Heger and Sudbery (Genome Research 2017).\n- 1MM_Directional ... same as 1MM_Directional_UMItools, but with more stringent criteria for duplicate UMIs\n- Exact ... only exactly matching UMIs are collapsed.\n- NoDedup ... no deduplication of UMIs, count all reads.\n- 1MM_CR ... CellRanger2-4 algorithm for 1MM UMI collapsing.", - "info" : { - "step" : "star", - "orig_arg" : "--soloUMIdedup" - }, - "example" : [ - "1MM_All" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--soloUMIfiltering", - "description" : "type of UMI filtering (for reads uniquely mapping to genes)\n\n- - ... basic filtering: remove UMIs with N and homopolymers (similar to CellRanger 2.2.0).\n- MultiGeneUMI ... basic + remove lower-count UMIs that map to more than one gene.\n- MultiGeneUMI_All ... basic + remove all UMIs that map to more than one gene.\n- MultiGeneUMI_CR ... basic + remove lower-count UMIs that map to more than one gene, matching CellRanger > 3.0.0 .\nOnly works with --soloUMIdedup 1MM_CR", - "info" : { - "step" : "star", - "orig_arg" : "--soloUMIfiltering" - }, - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--soloOutFileNames", - "description" : "file names for STARsolo output:\n\nfile_name_prefix gene_names barcode_sequences cell_feature_count_matrix", - "info" : { - "step" : "star", - "orig_arg" : "--soloOutFileNames" - }, - "example" : [ - "Solo.out/", - "features.tsv", - "barcodes.tsv", - "matrix.mtx" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--soloCellFilter", - "description" : "cell filtering type and parameters\n\n- None ... do not output filtered cells\n- TopCells ... only report top cells by UMI count, followed by the exact number of cells\n- CellRanger2.2 ... simple filtering of CellRanger 2.2.\nCan be followed by numbers: number of expected cells, robust maximum percentile for UMI count, maximum to minimum ratio for UMI count\nThe harcoded values are from CellRanger: nExpectedCells=3000; maxPercentile=0.99; maxMinRatio=10\n- EmptyDrops_CR ... EmptyDrops filtering in CellRanger flavor. Please cite the original EmptyDrops paper: A.T.L Lun et al, Genome Biology, 20, 63 (2019): https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1662-y\nCan be followed by 10 numeric parameters: nExpectedCells maxPercentile maxMinRatio indMin indMax umiMin umiMinFracMedian candMaxN FDR simN\nThe harcoded values are from CellRanger: 3000 0.99 10 45000 90000 500 0.01 20000 0.01 10000", - "info" : { - "step" : "star", - "orig_arg" : "--soloCellFilter" - }, - "example" : [ - "CellRanger2.2", - "3000", - "0.99", - "10" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--soloOutFormatFeaturesGeneField3", - "description" : "field 3 in the Gene features.tsv file. If \\"-\\", then no 3rd field is output.", - "info" : { - "step" : "star", - "orig_arg" : "--soloOutFormatFeaturesGeneField3" - }, - "example" : [ - "Gene Expression" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--soloCellReadStats", - "description" : "Output reads statistics for each CB\n\n- Standard ... standard output", - "info" : { - "step" : "star", - "orig_arg" : "--soloCellReadStats" - }, - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "HTSeq arguments", - "arguments" : [ - { - "type" : "string", - "name" : "--stranded", - "alternatives" : [ - "-s" - ], - "description" : "Whether the data is from a strand-specific assay. 'reverse' means 'yes' with reversed strand interpretation.", - "info" : { - "step" : "htseq", - "orig_arg" : "--stranded" - }, - "default" : [ - "yes" - ], - "required" : false, - "choices" : [ - "yes", - "no", - "reverse" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--minimum_alignment_quality", - "alternatives" : [ - "-a", - "--minaqual" - ], - "description" : "Skip all reads with MAPQ alignment quality lower than the given minimum value. \nMAPQ is the 5th column of a SAM/BAM file and its usage depends on the software \nused to map the reads.\n", - "info" : { - "step" : "htseq", - "orig_arg" : "--minaqual" - }, - "default" : [ - 10 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--type", - "alternatives" : [ - "-t" - ], - "description" : "Feature type (3rd column in GTF file) to be used, all features of other type are ignored (default, suitable for Ensembl GTF files: exon)", - "info" : { - "step" : "htseq", - "orig_arg" : "--type" - }, - "example" : [ - "exon" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--id_attribute", - "alternatives" : [ - "-i" - ], - "description" : "GTF attribute to be used as feature ID (default, suitable for Ensembl GTF files: gene_id).\nAll feature of the right type (see -t option) within the same GTF attribute will be added\ntogether. The typical way of using this option is to count all exonic reads from each gene\nand add the exons but other uses are possible as well. You can call this option multiple\ntimes: in that case, the combination of all attributes separated by colons (:) will be used\nas a unique identifier, e.g. for exons you might use -i gene_id -i exon_number.\n", - "info" : { - "step" : "htseq", - "orig_arg" : "--idattr" - }, - "example" : [ - "gene_id" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--additional_attributes", - "description" : "Additional feature attributes (suitable for Ensembl GTF files: gene_name). Use multiple times\nfor more than one additional attribute. These attributes are only used as annotations in the\noutput, while the determination of how the counts are added together is done based on option -i.\n", - "info" : { - "step" : "htseq", - "orig_arg" : "--additional-attr" - }, - "example" : [ - "gene_name" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "boolean_true", - "name" : "--add_chromosome_info", - "description" : "Store information about the chromosome of each feature as an additional attribute\n(e.g. colunm in the TSV output file).\n", - "info" : { - "step" : "htseq", - "orig_arg" : "--add-chromosome-info" - }, - "direction" : "input", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--mode", - "alternatives" : [ - "-m" - ], - "description" : "Mode to handle reads overlapping more than one feature.", - "info" : { - "step" : "htseq", - "orig_arg" : "--mode" - }, - "default" : [ - "union" - ], - "required" : false, - "choices" : [ - "union", - "intersection-strict", - "intersection-nonempty" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--non_unique", - "description" : "Whether and how to score reads that are not uniquely aligned or ambiguously assigned to features.", - "info" : { - "step" : "htseq", - "orig_arg" : "--nonunique" - }, - "default" : [ - "none" - ], - "required" : false, - "choices" : [ - "none", - "all", - "fraction", - "random" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--secondary_alignments", - "description" : "Whether to score secondary alignments (0x100 flag).", - "info" : { - "step" : "htseq", - "orig_arg" : "--secondary-alignments" - }, - "required" : false, - "choices" : [ - "score", - "ignore" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--supplementary_alignments", - "description" : "Whether to score supplementary alignments (0x800 flag).", - "info" : { - "step" : "htseq", - "orig_arg" : "--supplementary-alignments" - }, - "required" : false, - "choices" : [ - "score", - "ignore" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "boolean_true", - "name" : "--counts_output_sparse", - "description" : "Store the counts as a sparse matrix (mtx, h5ad, loom).", - "info" : { - "step" : "htseq", - "orig_arg" : "--counts-output-sparse" - }, - "direction" : "input", - "dest" : "par" - } - ] - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/multi_star/" - } - ], - "description" : "Align fastq files using STAR.", - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/multi_star/" - }, - { - "type" : "file", - "path" : "resources_test/cellranger_tiny_fastq", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "python:3.10-slim", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "docker", - "env" : [ - "STAR_VERSION 2.7.10b", - "PACKAGES gcc g++ make wget zlib1g-''' + '''dev unzip" - ] - }, - { - "type" : "docker", - "run" : [ - "apt-get update && \\\\\n apt-get install -y --no-install-recommends ${PACKAGES} && \\\\\n cd /tmp && \\\\\n wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip && \\\\\n unzip ${STAR_VERSION}.zip && \\\\\n cd STAR-${STAR_VERSION}/source && \\\\\n make STARstatic CXXFLAGS_SIMD=-std=c++11 && \\\\\n cp STAR /usr/local/bin && \\\\\n cd / && \\\\\n rm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip && \\\\\n apt-get --purge autoremove -y ${PACKAGES} && \\\\\n apt-get clean\n" - ] - }, - { - "type" : "apt", - "packages" : [ - "samtools", - "procps" - ], - "interactive" : false - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "pyyaml", - "HTSeq", - "multiprocess", - "gtfparse<2.0", - "pandas", - "multiqc~=1.15.0" - ], - "upgrade" : true - } - ], - "test_setup" : [ - { - "type" : "python", - "user" : false, - "packages" : [ - "pytest" - ], - "upgrade" : true - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "highmem", - "highcpu" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/mapping/multi_star/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/multi_star", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -from typing import Any, Dict, List, Tuple -import math -import tempfile -import subprocess -import tarfile -import gzip -import shutil -from pathlib import Path -import yaml -import pandas as pd -from multiprocess import Pool -import gtfparse - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input_id': $( if [ ! -z ${VIASH_PAR_INPUT_ID+x} ]; then echo "r'${VIASH_PAR_INPUT_ID//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'input_r1': $( if [ ! -z ${VIASH_PAR_INPUT_R1+x} ]; then echo "r'${VIASH_PAR_INPUT_R1//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'input_r2': $( if [ ! -z ${VIASH_PAR_INPUT_R2+x} ]; then echo "r'${VIASH_PAR_INPUT_R2//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'reference_index': $( if [ ! -z ${VIASH_PAR_REFERENCE_INDEX+x} ]; then echo "r'${VIASH_PAR_REFERENCE_INDEX//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'reference_gtf': $( if [ ! -z ${VIASH_PAR_REFERENCE_GTF+x} ]; then echo "r'${VIASH_PAR_REFERENCE_GTF//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'run_htseq_count': $( if [ ! -z ${VIASH_PAR_RUN_HTSEQ_COUNT+x} ]; then echo "r'${VIASH_PAR_RUN_HTSEQ_COUNT//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), - 'run_multiqc': $( if [ ! -z ${VIASH_PAR_RUN_MULTIQC+x} ]; then echo "r'${VIASH_PAR_RUN_MULTIQC//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), - 'min_success_rate': $( if [ ! -z ${VIASH_PAR_MIN_SUCCESS_RATE+x} ]; then echo "float(r'${VIASH_PAR_MIN_SUCCESS_RATE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'runRNGseed': $( if [ ! -z ${VIASH_PAR_RUNRNGSEED+x} ]; then echo "int(r'${VIASH_PAR_RUNRNGSEED//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'genomeFastaFiles': $( if [ ! -z ${VIASH_PAR_GENOMEFASTAFILES+x} ]; then echo "r'${VIASH_PAR_GENOMEFASTAFILES//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'sjdbFileChrStartEnd': $( if [ ! -z ${VIASH_PAR_SJDBFILECHRSTARTEND+x} ]; then echo "r'${VIASH_PAR_SJDBFILECHRSTARTEND//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'sjdbGTFfile': $( if [ ! -z ${VIASH_PAR_SJDBGTFFILE+x} ]; then echo "r'${VIASH_PAR_SJDBGTFFILE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'sjdbGTFchrPrefix': $( if [ ! -z ${VIASH_PAR_SJDBGTFCHRPREFIX+x} ]; then echo "r'${VIASH_PAR_SJDBGTFCHRPREFIX//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'sjdbGTFfeatureExon': $( if [ ! -z ${VIASH_PAR_SJDBGTFFEATUREEXON+x} ]; then echo "r'${VIASH_PAR_SJDBGTFFEATUREEXON//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'sjdbGTFtagExonParentTranscript': $( if [ ! -z ${VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT+x} ]; then echo "r'${VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'sjdbGTFtagExonParentGene': $( if [ ! -z ${VIASH_PAR_SJDBGTFTAGEXONPARENTGENE+x} ]; then echo "r'${VIASH_PAR_SJDBGTFTAGEXONPARENTGENE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'sjdbGTFtagExonParentGeneName': $( if [ ! -z ${VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME+x} ]; then echo "r'${VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'sjdbGTFtagExonParentGeneType': $( if [ ! -z ${VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE+x} ]; then echo "r'${VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'sjdbOverhang': $( if [ ! -z ${VIASH_PAR_SJDBOVERHANG+x} ]; then echo "int(r'${VIASH_PAR_SJDBOVERHANG//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'sjdbScore': $( if [ ! -z ${VIASH_PAR_SJDBSCORE+x} ]; then echo "int(r'${VIASH_PAR_SJDBSCORE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'sjdbInsertSave': $( if [ ! -z ${VIASH_PAR_SJDBINSERTSAVE+x} ]; then echo "r'${VIASH_PAR_SJDBINSERTSAVE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'varVCFfile': $( if [ ! -z ${VIASH_PAR_VARVCFFILE+x} ]; then echo "r'${VIASH_PAR_VARVCFFILE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'readFilesType': $( if [ ! -z ${VIASH_PAR_READFILESTYPE+x} ]; then echo "r'${VIASH_PAR_READFILESTYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'readFilesSAMattrKeep': $( if [ ! -z ${VIASH_PAR_READFILESSAMATTRKEEP+x} ]; then echo "r'${VIASH_PAR_READFILESSAMATTRKEEP//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'readFilesManifest': $( if [ ! -z ${VIASH_PAR_READFILESMANIFEST+x} ]; then echo "r'${VIASH_PAR_READFILESMANIFEST//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'readFilesPrefix': $( if [ ! -z ${VIASH_PAR_READFILESPREFIX+x} ]; then echo "r'${VIASH_PAR_READFILESPREFIX//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'readFilesCommand': $( if [ ! -z ${VIASH_PAR_READFILESCOMMAND+x} ]; then echo "r'${VIASH_PAR_READFILESCOMMAND//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'readMapNumber': $( if [ ! -z ${VIASH_PAR_READMAPNUMBER+x} ]; then echo "int(r'${VIASH_PAR_READMAPNUMBER//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'readMatesLengthsIn': $( if [ ! -z ${VIASH_PAR_READMATESLENGTHSIN+x} ]; then echo "r'${VIASH_PAR_READMATESLENGTHSIN//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'readNameSeparator': $( if [ ! -z ${VIASH_PAR_READNAMESEPARATOR+x} ]; then echo "r'${VIASH_PAR_READNAMESEPARATOR//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'readQualityScoreBase': $( if [ ! -z ${VIASH_PAR_READQUALITYSCOREBASE+x} ]; then echo "int(r'${VIASH_PAR_READQUALITYSCOREBASE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'clipAdapterType': $( if [ ! -z ${VIASH_PAR_CLIPADAPTERTYPE+x} ]; then echo "r'${VIASH_PAR_CLIPADAPTERTYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'clip3pNbases': $( if [ ! -z ${VIASH_PAR_CLIP3PNBASES+x} ]; then echo "list(map(int, r'${VIASH_PAR_CLIP3PNBASES//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), - 'clip3pAdapterSeq': $( if [ ! -z ${VIASH_PAR_CLIP3PADAPTERSEQ+x} ]; then echo "r'${VIASH_PAR_CLIP3PADAPTERSEQ//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'clip3pAdapterMMp': $( if [ ! -z ${VIASH_PAR_CLIP3PADAPTERMMP+x} ]; then echo "list(map(float, r'${VIASH_PAR_CLIP3PADAPTERMMP//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), - 'clip3pAfterAdapterNbases': $( if [ ! -z ${VIASH_PAR_CLIP3PAFTERADAPTERNBASES+x} ]; then echo "list(map(int, r'${VIASH_PAR_CLIP3PAFTERADAPTERNBASES//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), - 'clip5pNbases': $( if [ ! -z ${VIASH_PAR_CLIP5PNBASES+x} ]; then echo "list(map(int, r'${VIASH_PAR_CLIP5PNBASES//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), - 'limitGenomeGenerateRAM': $( if [ ! -z ${VIASH_PAR_LIMITGENOMEGENERATERAM+x} ]; then echo "int(r'${VIASH_PAR_LIMITGENOMEGENERATERAM//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'limitIObufferSize': $( if [ ! -z ${VIASH_PAR_LIMITIOBUFFERSIZE+x} ]; then echo "list(map(int, r'${VIASH_PAR_LIMITIOBUFFERSIZE//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), - 'limitOutSAMoneReadBytes': $( if [ ! -z ${VIASH_PAR_LIMITOUTSAMONEREADBYTES+x} ]; then echo "int(r'${VIASH_PAR_LIMITOUTSAMONEREADBYTES//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'limitOutSJoneRead': $( if [ ! -z ${VIASH_PAR_LIMITOUTSJONEREAD+x} ]; then echo "int(r'${VIASH_PAR_LIMITOUTSJONEREAD//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'limitOutSJcollapsed': $( if [ ! -z ${VIASH_PAR_LIMITOUTSJCOLLAPSED+x} ]; then echo "int(r'${VIASH_PAR_LIMITOUTSJCOLLAPSED//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'limitBAMsortRAM': $( if [ ! -z ${VIASH_PAR_LIMITBAMSORTRAM+x} ]; then echo "int(r'${VIASH_PAR_LIMITBAMSORTRAM//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'limitSjdbInsertNsj': $( if [ ! -z ${VIASH_PAR_LIMITSJDBINSERTNSJ+x} ]; then echo "int(r'${VIASH_PAR_LIMITSJDBINSERTNSJ//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'limitNreadsSoft': $( if [ ! -z ${VIASH_PAR_LIMITNREADSSOFT+x} ]; then echo "int(r'${VIASH_PAR_LIMITNREADSSOFT//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outTmpKeep': $( if [ ! -z ${VIASH_PAR_OUTTMPKEEP+x} ]; then echo "r'${VIASH_PAR_OUTTMPKEEP//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'outStd': $( if [ ! -z ${VIASH_PAR_OUTSTD+x} ]; then echo "r'${VIASH_PAR_OUTSTD//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'outReadsUnmapped': $( if [ ! -z ${VIASH_PAR_OUTREADSUNMAPPED+x} ]; then echo "r'${VIASH_PAR_OUTREADSUNMAPPED//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'outQSconversionAdd': $( if [ ! -z ${VIASH_PAR_OUTQSCONVERSIONADD+x} ]; then echo "int(r'${VIASH_PAR_OUTQSCONVERSIONADD//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outMultimapperOrder': $( if [ ! -z ${VIASH_PAR_OUTMULTIMAPPERORDER+x} ]; then echo "r'${VIASH_PAR_OUTMULTIMAPPERORDER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'outSAMmode': $( if [ ! -z ${VIASH_PAR_OUTSAMMODE+x} ]; then echo "r'${VIASH_PAR_OUTSAMMODE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'outSAMstrandField': $( if [ ! -z ${VIASH_PAR_OUTSAMSTRANDFIELD+x} ]; then echo "r'${VIASH_PAR_OUTSAMSTRANDFIELD//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'outSAMattributes': $( if [ ! -z ${VIASH_PAR_OUTSAMATTRIBUTES+x} ]; then echo "r'${VIASH_PAR_OUTSAMATTRIBUTES//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'outSAMattrIHstart': $( if [ ! -z ${VIASH_PAR_OUTSAMATTRIHSTART+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMATTRIHSTART//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outSAMunmapped': $( if [ ! -z ${VIASH_PAR_OUTSAMUNMAPPED+x} ]; then echo "r'${VIASH_PAR_OUTSAMUNMAPPED//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'outSAMorder': $( if [ ! -z ${VIASH_PAR_OUTSAMORDER+x} ]; then echo "r'${VIASH_PAR_OUTSAMORDER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'outSAMprimaryFlag': $( if [ ! -z ${VIASH_PAR_OUTSAMPRIMARYFLAG+x} ]; then echo "r'${VIASH_PAR_OUTSAMPRIMARYFLAG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'outSAMreadID': $( if [ ! -z ${VIASH_PAR_OUTSAMREADID+x} ]; then echo "r'${VIASH_PAR_OUTSAMREADID//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'outSAMmapqUnique': $( if [ ! -z ${VIASH_PAR_OUTSAMMAPQUNIQUE+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMMAPQUNIQUE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outSAMflagOR': $( if [ ! -z ${VIASH_PAR_OUTSAMFLAGOR+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMFLAGOR//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outSAMflagAND': $( if [ ! -z ${VIASH_PAR_OUTSAMFLAGAND+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMFLAGAND//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outSAMattrRGline': $( if [ ! -z ${VIASH_PAR_OUTSAMATTRRGLINE+x} ]; then echo "r'${VIASH_PAR_OUTSAMATTRRGLINE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'outSAMheaderHD': $( if [ ! -z ${VIASH_PAR_OUTSAMHEADERHD+x} ]; then echo "r'${VIASH_PAR_OUTSAMHEADERHD//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'outSAMheaderPG': $( if [ ! -z ${VIASH_PAR_OUTSAMHEADERPG+x} ]; then echo "r'${VIASH_PAR_OUTSAMHEADERPG//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'outSAMheaderCommentFile': $( if [ ! -z ${VIASH_PAR_OUTSAMHEADERCOMMENTFILE+x} ]; then echo "r'${VIASH_PAR_OUTSAMHEADERCOMMENTFILE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'outSAMfilter': $( if [ ! -z ${VIASH_PAR_OUTSAMFILTER+x} ]; then echo "r'${VIASH_PAR_OUTSAMFILTER//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'outSAMmultNmax': $( if [ ! -z ${VIASH_PAR_OUTSAMMULTNMAX+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMMULTNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outSAMtlen': $( if [ ! -z ${VIASH_PAR_OUTSAMTLEN+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMTLEN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outBAMcompression': $( if [ ! -z ${VIASH_PAR_OUTBAMCOMPRESSION+x} ]; then echo "int(r'${VIASH_PAR_OUTBAMCOMPRESSION//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outBAMsortingThreadN': $( if [ ! -z ${VIASH_PAR_OUTBAMSORTINGTHREADN+x} ]; then echo "int(r'${VIASH_PAR_OUTBAMSORTINGTHREADN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outBAMsortingBinsN': $( if [ ! -z ${VIASH_PAR_OUTBAMSORTINGBINSN+x} ]; then echo "int(r'${VIASH_PAR_OUTBAMSORTINGBINSN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'bamRemoveDuplicatesType': $( if [ ! -z ${VIASH_PAR_BAMREMOVEDUPLICATESTYPE+x} ]; then echo "r'${VIASH_PAR_BAMREMOVEDUPLICATESTYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'bamRemoveDuplicatesMate2basesN': $( if [ ! -z ${VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN+x} ]; then echo "int(r'${VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outWigType': $( if [ ! -z ${VIASH_PAR_OUTWIGTYPE+x} ]; then echo "r'${VIASH_PAR_OUTWIGTYPE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'outWigStrand': $( if [ ! -z ${VIASH_PAR_OUTWIGSTRAND+x} ]; then echo "r'${VIASH_PAR_OUTWIGSTRAND//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'outWigReferencesPrefix': $( if [ ! -z ${VIASH_PAR_OUTWIGREFERENCESPREFIX+x} ]; then echo "r'${VIASH_PAR_OUTWIGREFERENCESPREFIX//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'outWigNorm': $( if [ ! -z ${VIASH_PAR_OUTWIGNORM+x} ]; then echo "r'${VIASH_PAR_OUTWIGNORM//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'outFilterType': $( if [ ! -z ${VIASH_PAR_OUTFILTERTYPE+x} ]; then echo "r'${VIASH_PAR_OUTFILTERTYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'outFilterMultimapScoreRange': $( if [ ! -z ${VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outFilterMultimapNmax': $( if [ ! -z ${VIASH_PAR_OUTFILTERMULTIMAPNMAX+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERMULTIMAPNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outFilterMismatchNmax': $( if [ ! -z ${VIASH_PAR_OUTFILTERMISMATCHNMAX+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERMISMATCHNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outFilterMismatchNoverLmax': $( if [ ! -z ${VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX+x} ]; then echo "float(r'${VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outFilterMismatchNoverReadLmax': $( if [ ! -z ${VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX+x} ]; then echo "float(r'${VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outFilterScoreMin': $( if [ ! -z ${VIASH_PAR_OUTFILTERSCOREMIN+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERSCOREMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outFilterScoreMinOverLread': $( if [ ! -z ${VIASH_PAR_OUTFILTERSCOREMINOVERLREAD+x} ]; then echo "float(r'${VIASH_PAR_OUTFILTERSCOREMINOVERLREAD//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outFilterMatchNmin': $( if [ ! -z ${VIASH_PAR_OUTFILTERMATCHNMIN+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERMATCHNMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outFilterMatchNminOverLread': $( if [ ! -z ${VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD+x} ]; then echo "float(r'${VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outFilterIntronMotifs': $( if [ ! -z ${VIASH_PAR_OUTFILTERINTRONMOTIFS+x} ]; then echo "r'${VIASH_PAR_OUTFILTERINTRONMOTIFS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'outFilterIntronStrands': $( if [ ! -z ${VIASH_PAR_OUTFILTERINTRONSTRANDS+x} ]; then echo "r'${VIASH_PAR_OUTFILTERINTRONSTRANDS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'outSJtype': $( if [ ! -z ${VIASH_PAR_OUTSJTYPE+x} ]; then echo "r'${VIASH_PAR_OUTSJTYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'outSJfilterReads': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERREADS+x} ]; then echo "r'${VIASH_PAR_OUTSJFILTERREADS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'outSJfilterOverhangMin': $( if [ ! -z ${VIASH_PAR_OUTSJFILTEROVERHANGMIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTEROVERHANGMIN//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), - 'outSJfilterCountUniqueMin': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), - 'outSJfilterCountTotalMin': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), - 'outSJfilterDistToOtherSJmin': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), - 'outSJfilterIntronMaxVsReadN': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), - 'scoreGap': $( if [ ! -z ${VIASH_PAR_SCOREGAP+x} ]; then echo "int(r'${VIASH_PAR_SCOREGAP//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'scoreGapNoncan': $( if [ ! -z ${VIASH_PAR_SCOREGAPNONCAN+x} ]; then echo "int(r'${VIASH_PAR_SCOREGAPNONCAN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'scoreGapGCAG': $( if [ ! -z ${VIASH_PAR_SCOREGAPGCAG+x} ]; then echo "int(r'${VIASH_PAR_SCOREGAPGCAG//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'scoreGapATAC': $( if [ ! -z ${VIASH_PAR_SCOREGAPATAC+x} ]; then echo "int(r'${VIASH_PAR_SCOREGAPATAC//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'scoreGenomicLengthLog2scale': $( if [ ! -z ${VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE+x} ]; then echo "int(r'${VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'scoreDelOpen': $( if [ ! -z ${VIASH_PAR_SCOREDELOPEN+x} ]; then echo "int(r'${VIASH_PAR_SCOREDELOPEN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'scoreDelBase': $( if [ ! -z ${VIASH_PAR_SCOREDELBASE+x} ]; then echo "int(r'${VIASH_PAR_SCOREDELBASE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'scoreInsOpen': $( if [ ! -z ${VIASH_PAR_SCOREINSOPEN+x} ]; then echo "int(r'${VIASH_PAR_SCOREINSOPEN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'scoreInsBase': $( if [ ! -z ${VIASH_PAR_SCOREINSBASE+x} ]; then echo "int(r'${VIASH_PAR_SCOREINSBASE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'scoreStitchSJshift': $( if [ ! -z ${VIASH_PAR_SCORESTITCHSJSHIFT+x} ]; then echo "int(r'${VIASH_PAR_SCORESTITCHSJSHIFT//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'seedSearchStartLmax': $( if [ ! -z ${VIASH_PAR_SEEDSEARCHSTARTLMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDSEARCHSTARTLMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'seedSearchStartLmaxOverLread': $( if [ ! -z ${VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD+x} ]; then echo "float(r'${VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'seedSearchLmax': $( if [ ! -z ${VIASH_PAR_SEEDSEARCHLMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDSEARCHLMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'seedMultimapNmax': $( if [ ! -z ${VIASH_PAR_SEEDMULTIMAPNMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDMULTIMAPNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'seedPerReadNmax': $( if [ ! -z ${VIASH_PAR_SEEDPERREADNMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDPERREADNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'seedPerWindowNmax': $( if [ ! -z ${VIASH_PAR_SEEDPERWINDOWNMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDPERWINDOWNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'seedNoneLociPerWindow': $( if [ ! -z ${VIASH_PAR_SEEDNONELOCIPERWINDOW+x} ]; then echo "int(r'${VIASH_PAR_SEEDNONELOCIPERWINDOW//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'seedSplitMin': $( if [ ! -z ${VIASH_PAR_SEEDSPLITMIN+x} ]; then echo "int(r'${VIASH_PAR_SEEDSPLITMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'seedMapMin': $( if [ ! -z ${VIASH_PAR_SEEDMAPMIN+x} ]; then echo "int(r'${VIASH_PAR_SEEDMAPMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'alignIntronMin': $( if [ ! -z ${VIASH_PAR_ALIGNINTRONMIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGNINTRONMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'alignIntronMax': $( if [ ! -z ${VIASH_PAR_ALIGNINTRONMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNINTRONMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'alignMatesGapMax': $( if [ ! -z ${VIASH_PAR_ALIGNMATESGAPMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNMATESGAPMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'alignSJoverhangMin': $( if [ ! -z ${VIASH_PAR_ALIGNSJOVERHANGMIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGNSJOVERHANGMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'alignSJstitchMismatchNmax': $( if [ ! -z ${VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX+x} ]; then echo "list(map(int, r'${VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), - 'alignSJDBoverhangMin': $( if [ ! -z ${VIASH_PAR_ALIGNSJDBOVERHANGMIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGNSJDBOVERHANGMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'alignSplicedMateMapLmin': $( if [ ! -z ${VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'alignSplicedMateMapLminOverLmate': $( if [ ! -z ${VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE+x} ]; then echo "float(r'${VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'alignWindowsPerReadNmax': $( if [ ! -z ${VIASH_PAR_ALIGNWINDOWSPERREADNMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNWINDOWSPERREADNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'alignTranscriptsPerWindowNmax': $( if [ ! -z ${VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'alignTranscriptsPerReadNmax': $( if [ ! -z ${VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'alignEndsType': $( if [ ! -z ${VIASH_PAR_ALIGNENDSTYPE+x} ]; then echo "r'${VIASH_PAR_ALIGNENDSTYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'alignEndsProtrude': $( if [ ! -z ${VIASH_PAR_ALIGNENDSPROTRUDE+x} ]; then echo "r'${VIASH_PAR_ALIGNENDSPROTRUDE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'alignSoftClipAtReferenceEnds': $( if [ ! -z ${VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS+x} ]; then echo "r'${VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'alignInsertionFlush': $( if [ ! -z ${VIASH_PAR_ALIGNINSERTIONFLUSH+x} ]; then echo "r'${VIASH_PAR_ALIGNINSERTIONFLUSH//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'peOverlapNbasesMin': $( if [ ! -z ${VIASH_PAR_PEOVERLAPNBASESMIN+x} ]; then echo "int(r'${VIASH_PAR_PEOVERLAPNBASESMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'peOverlapMMp': $( if [ ! -z ${VIASH_PAR_PEOVERLAPMMP+x} ]; then echo "float(r'${VIASH_PAR_PEOVERLAPMMP//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'winAnchorMultimapNmax': $( if [ ! -z ${VIASH_PAR_WINANCHORMULTIMAPNMAX+x} ]; then echo "int(r'${VIASH_PAR_WINANCHORMULTIMAPNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'winBinNbits': $( if [ ! -z ${VIASH_PAR_WINBINNBITS+x} ]; then echo "int(r'${VIASH_PAR_WINBINNBITS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'winAnchorDistNbins': $( if [ ! -z ${VIASH_PAR_WINANCHORDISTNBINS+x} ]; then echo "int(r'${VIASH_PAR_WINANCHORDISTNBINS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'winFlankNbins': $( if [ ! -z ${VIASH_PAR_WINFLANKNBINS+x} ]; then echo "int(r'${VIASH_PAR_WINFLANKNBINS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'winReadCoverageRelativeMin': $( if [ ! -z ${VIASH_PAR_WINREADCOVERAGERELATIVEMIN+x} ]; then echo "float(r'${VIASH_PAR_WINREADCOVERAGERELATIVEMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'winReadCoverageBasesMin': $( if [ ! -z ${VIASH_PAR_WINREADCOVERAGEBASESMIN+x} ]; then echo "int(r'${VIASH_PAR_WINREADCOVERAGEBASESMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'chimOutType': $( if [ ! -z ${VIASH_PAR_CHIMOUTTYPE+x} ]; then echo "r'${VIASH_PAR_CHIMOUTTYPE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'chimSegmentMin': $( if [ ! -z ${VIASH_PAR_CHIMSEGMENTMIN+x} ]; then echo "int(r'${VIASH_PAR_CHIMSEGMENTMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'chimScoreMin': $( if [ ! -z ${VIASH_PAR_CHIMSCOREMIN+x} ]; then echo "int(r'${VIASH_PAR_CHIMSCOREMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'chimScoreDropMax': $( if [ ! -z ${VIASH_PAR_CHIMSCOREDROPMAX+x} ]; then echo "int(r'${VIASH_PAR_CHIMSCOREDROPMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'chimScoreSeparation': $( if [ ! -z ${VIASH_PAR_CHIMSCORESEPARATION+x} ]; then echo "int(r'${VIASH_PAR_CHIMSCORESEPARATION//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'chimScoreJunctionNonGTAG': $( if [ ! -z ${VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG+x} ]; then echo "int(r'${VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'chimJunctionOverhangMin': $( if [ ! -z ${VIASH_PAR_CHIMJUNCTIONOVERHANGMIN+x} ]; then echo "int(r'${VIASH_PAR_CHIMJUNCTIONOVERHANGMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'chimSegmentReadGapMax': $( if [ ! -z ${VIASH_PAR_CHIMSEGMENTREADGAPMAX+x} ]; then echo "int(r'${VIASH_PAR_CHIMSEGMENTREADGAPMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'chimFilter': $( if [ ! -z ${VIASH_PAR_CHIMFILTER+x} ]; then echo "r'${VIASH_PAR_CHIMFILTER//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'chimMainSegmentMultNmax': $( if [ ! -z ${VIASH_PAR_CHIMMAINSEGMENTMULTNMAX+x} ]; then echo "int(r'${VIASH_PAR_CHIMMAINSEGMENTMULTNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'chimMultimapNmax': $( if [ ! -z ${VIASH_PAR_CHIMMULTIMAPNMAX+x} ]; then echo "int(r'${VIASH_PAR_CHIMMULTIMAPNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'chimMultimapScoreRange': $( if [ ! -z ${VIASH_PAR_CHIMMULTIMAPSCORERANGE+x} ]; then echo "int(r'${VIASH_PAR_CHIMMULTIMAPSCORERANGE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'chimNonchimScoreDropMin': $( if [ ! -z ${VIASH_PAR_CHIMNONCHIMSCOREDROPMIN+x} ]; then echo "int(r'${VIASH_PAR_CHIMNONCHIMSCOREDROPMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'chimOutJunctionFormat': $( if [ ! -z ${VIASH_PAR_CHIMOUTJUNCTIONFORMAT+x} ]; then echo "int(r'${VIASH_PAR_CHIMOUTJUNCTIONFORMAT//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'quantMode': $( if [ ! -z ${VIASH_PAR_QUANTMODE+x} ]; then echo "r'${VIASH_PAR_QUANTMODE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'quantTranscriptomeBAMcompression': $( if [ ! -z ${VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION+x} ]; then echo "int(r'${VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'quantTranscriptomeBan': $( if [ ! -z ${VIASH_PAR_QUANTTRANSCRIPTOMEBAN+x} ]; then echo "r'${VIASH_PAR_QUANTTRANSCRIPTOMEBAN//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'twopassMode': $( if [ ! -z ${VIASH_PAR_TWOPASSMODE+x} ]; then echo "r'${VIASH_PAR_TWOPASSMODE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'twopass1readsN': $( if [ ! -z ${VIASH_PAR_TWOPASS1READSN+x} ]; then echo "int(r'${VIASH_PAR_TWOPASS1READSN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'waspOutputMode': $( if [ ! -z ${VIASH_PAR_WASPOUTPUTMODE+x} ]; then echo "r'${VIASH_PAR_WASPOUTPUTMODE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'soloType': $( if [ ! -z ${VIASH_PAR_SOLOTYPE+x} ]; then echo "r'${VIASH_PAR_SOLOTYPE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'soloCBwhitelist': $( if [ ! -z ${VIASH_PAR_SOLOCBWHITELIST+x} ]; then echo "r'${VIASH_PAR_SOLOCBWHITELIST//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'soloCBstart': $( if [ ! -z ${VIASH_PAR_SOLOCBSTART+x} ]; then echo "int(r'${VIASH_PAR_SOLOCBSTART//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'soloCBlen': $( if [ ! -z ${VIASH_PAR_SOLOCBLEN+x} ]; then echo "int(r'${VIASH_PAR_SOLOCBLEN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'soloUMIstart': $( if [ ! -z ${VIASH_PAR_SOLOUMISTART+x} ]; then echo "int(r'${VIASH_PAR_SOLOUMISTART//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'soloUMIlen': $( if [ ! -z ${VIASH_PAR_SOLOUMILEN+x} ]; then echo "int(r'${VIASH_PAR_SOLOUMILEN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'soloBarcodeReadLength': $( if [ ! -z ${VIASH_PAR_SOLOBARCODEREADLENGTH+x} ]; then echo "int(r'${VIASH_PAR_SOLOBARCODEREADLENGTH//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'soloBarcodeMate': $( if [ ! -z ${VIASH_PAR_SOLOBARCODEMATE+x} ]; then echo "int(r'${VIASH_PAR_SOLOBARCODEMATE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'soloCBposition': $( if [ ! -z ${VIASH_PAR_SOLOCBPOSITION+x} ]; then echo "r'${VIASH_PAR_SOLOCBPOSITION//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'soloUMIposition': $( if [ ! -z ${VIASH_PAR_SOLOUMIPOSITION+x} ]; then echo "r'${VIASH_PAR_SOLOUMIPOSITION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'soloAdapterSequence': $( if [ ! -z ${VIASH_PAR_SOLOADAPTERSEQUENCE+x} ]; then echo "r'${VIASH_PAR_SOLOADAPTERSEQUENCE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'soloAdapterMismatchesNmax': $( if [ ! -z ${VIASH_PAR_SOLOADAPTERMISMATCHESNMAX+x} ]; then echo "int(r'${VIASH_PAR_SOLOADAPTERMISMATCHESNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'soloCBmatchWLtype': $( if [ ! -z ${VIASH_PAR_SOLOCBMATCHWLTYPE+x} ]; then echo "r'${VIASH_PAR_SOLOCBMATCHWLTYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'soloInputSAMattrBarcodeSeq': $( if [ ! -z ${VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ+x} ]; then echo "r'${VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'soloInputSAMattrBarcodeQual': $( if [ ! -z ${VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL+x} ]; then echo "r'${VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'soloStrand': $( if [ ! -z ${VIASH_PAR_SOLOSTRAND+x} ]; then echo "r'${VIASH_PAR_SOLOSTRAND//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'soloFeatures': $( if [ ! -z ${VIASH_PAR_SOLOFEATURES+x} ]; then echo "r'${VIASH_PAR_SOLOFEATURES//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'soloMultiMappers': $( if [ ! -z ${VIASH_PAR_SOLOMULTIMAPPERS+x} ]; then echo "r'${VIASH_PAR_SOLOMULTIMAPPERS//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'soloUMIdedup': $( if [ ! -z ${VIASH_PAR_SOLOUMIDEDUP+x} ]; then echo "r'${VIASH_PAR_SOLOUMIDEDUP//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'soloUMIfiltering': $( if [ ! -z ${VIASH_PAR_SOLOUMIFILTERING+x} ]; then echo "r'${VIASH_PAR_SOLOUMIFILTERING//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'soloOutFileNames': $( if [ ! -z ${VIASH_PAR_SOLOOUTFILENAMES+x} ]; then echo "r'${VIASH_PAR_SOLOOUTFILENAMES//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'soloCellFilter': $( if [ ! -z ${VIASH_PAR_SOLOCELLFILTER+x} ]; then echo "r'${VIASH_PAR_SOLOCELLFILTER//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'soloOutFormatFeaturesGeneField3': $( if [ ! -z ${VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3+x} ]; then echo "r'${VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'soloCellReadStats': $( if [ ! -z ${VIASH_PAR_SOLOCELLREADSTATS+x} ]; then echo "r'${VIASH_PAR_SOLOCELLREADSTATS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'stranded': $( if [ ! -z ${VIASH_PAR_STRANDED+x} ]; then echo "r'${VIASH_PAR_STRANDED//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'minimum_alignment_quality': $( if [ ! -z ${VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY+x} ]; then echo "int(r'${VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'type': $( if [ ! -z ${VIASH_PAR_TYPE+x} ]; then echo "r'${VIASH_PAR_TYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'id_attribute': $( if [ ! -z ${VIASH_PAR_ID_ATTRIBUTE+x} ]; then echo "r'${VIASH_PAR_ID_ATTRIBUTE//\\'/\\'\\"\\'\\"r\\'}'.split(':')"; else echo None; fi ), - 'additional_attributes': $( if [ ! -z ${VIASH_PAR_ADDITIONAL_ATTRIBUTES+x} ]; then echo "r'${VIASH_PAR_ADDITIONAL_ATTRIBUTES//\\'/\\'\\"\\'\\"r\\'}'.split(':')"; else echo None; fi ), - 'add_chromosome_info': $( if [ ! -z ${VIASH_PAR_ADD_CHROMOSOME_INFO+x} ]; then echo "r'${VIASH_PAR_ADD_CHROMOSOME_INFO//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), - 'mode': $( if [ ! -z ${VIASH_PAR_MODE+x} ]; then echo "r'${VIASH_PAR_MODE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'non_unique': $( if [ ! -z ${VIASH_PAR_NON_UNIQUE+x} ]; then echo "r'${VIASH_PAR_NON_UNIQUE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'secondary_alignments': $( if [ ! -z ${VIASH_PAR_SECONDARY_ALIGNMENTS+x} ]; then echo "r'${VIASH_PAR_SECONDARY_ALIGNMENTS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'supplementary_alignments': $( if [ ! -z ${VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS+x} ]; then echo "r'${VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'counts_output_sparse': $( if [ ! -z ${VIASH_PAR_COUNTS_OUTPUT_SPARSE+x} ]; then echo "r'${VIASH_PAR_COUNTS_OUTPUT_SPARSE//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -## VIASH END - -######################## -### Helper functions ### -######################## - - -def fetch_arguments_info(config: Dict[str, Any]) -> Dict[str, Any]: - """Fetch arguments from config""" - arguments = { - arg["name"].removeprefix("-").removeprefix("-"): arg - for group in config["functionality"]["argument_groups"] - for arg in group["arguments"] - } - return arguments - -def process_par( - par: Dict[str, Any], - arguments_info: Dict[str, Any], - gz_args: List[str], - temp_dir: Path -) -> Dict[str, Any]: - """ - Process the Viash par dictionary - - This turns file strings into Path objects and extracting gzipped files if need be. - - Parameters - ---------- - par: The par dictionary created by Viash - arguments_info: The arguments info Dictionary created by \\`fetch_arguments_info\\` - gz_args: A list of argument keys which could be gzip files which need to be decompressed. - temp_dir: A temporary directory in which to ungzip files - """ - new_par = {} - for key, value in par.items(): - arg_info = arguments_info[key] - # turn file arguments into paths - if value and arg_info["type"] == "file": - is_multiple = isinstance(value, list) - - if is_multiple: - value = [Path(val) for val in value] - else: - value = Path(value) - - if key in gz_args: - print(f">> Checking compression of --{key}", flush=True) - # turn value into list if need be - if not is_multiple: - value = [value] - - # extract - value = [extract_if_need_be(path, temp_dir) for path in value] - - # unlist if need be - if not is_multiple: - value = value[0] - - new_par[key] = value - return new_par - -def generate_cmd_arguments(par, arguments_info, step_filter=None, flatten=False): - """ - Generate command-line arguments by fetching the relevant args - - Parameters - ---------- - par: The par dictionary created by Viash - arguments_info: The arguments info Dictionary created by \\`fetch_arguments_info\\` - step_filter: If provided,\\`par\\` will be filtered to only contain arguments for which - argument.info.step == step_filter. - flatten: If \\`False\\`, the command for an argument with multiple values will be - \\`["--key", "value1", "--key", "value2"]\\`, otherwise \\`["--key", "value1", "value2"]\\`. - """ - cmd_args = [] - - for key, arg in arguments_info.items(): - arg_val = par.get(key) - # The info key is always present (changed in viash 0.7.4) - # in the parsed config (None if not specified in source config) - info = arg["info"] or {} - orig_arg = info.get("orig_arg") - step = info.get("step") - if arg_val and orig_arg and (not step_filter or step == step_filter): - if not arg.get("multiple", False): - arg_val = [arg_val] - - if arg["type"] in ["boolean_true", "boolean_false"]: - # if argument is a boolean_true or boolean_false, simply add the flag - arg_val = [orig_arg] - elif orig_arg.startswith("-"): - # if the orig arg flag is not a positional, - # add the flag in front of each element and flatten - if flatten: - arg_val = [str(x) for x in [orig_arg] + arg_val] - else: - arg_val = [str(x) for val in arg_val for x in [orig_arg, val]] - - cmd_args.extend(arg_val) - - return cmd_args - -def is_gz_file(path: Path) -> bool: - """Check whether something is a gzip""" - with open(path, "rb") as file: - return file.read(2) == b"\\\\x1f\\\\x8b" - -def extract_if_need_be(par_value: Path, temp_dir_path: Path) -> Path: - """if {par_value} is a Path, extract it to a temp_dir_path and return the resulting path""" - if par_value.is_file() and tarfile.is_tarfile(par_value): - # Remove two extensions (if they exist) - extaction_dir_name = Path(par_value.stem).stem - unpacked_path = temp_dir_path / extaction_dir_name - print(f" Tar detected; extracting {par_value} to {unpacked_path}", flush=True) - - with tarfile.open(par_value, "r") as open_tar: - members = open_tar.getmembers() - root_dirs = [ - member - for member in members - if member.isdir() and member.name != "." and "/" not in member.name - ] - # if there is only one root_dir (and there are files in that directory) - # strip that directory name from the destination folder - if len(root_dirs) == 1: - for mem in members: - mem.path = Path(*Path(mem.path).parts[1:]) - members_to_move = [mem for mem in members if mem.path != Path(".")] - open_tar.extractall(unpacked_path, members=members_to_move) - return unpacked_path - - elif par_value.is_file() and is_gz_file(par_value): - # Remove extension (if it exists) - extaction_file_name = Path(par_value.stem) - unpacked_path = temp_dir_path / extaction_file_name - print(f" Gzip detected; extracting {par_value} to {unpacked_path}", flush=True) - - with gzip.open(par_value, "rb") as f_in: - with open(unpacked_path, "wb") as f_out: - shutil.copyfileobj(f_in, f_out) - return unpacked_path - - else: - return par_value - -def load_star_reference(reference_index: str) -> None: - """Load star reference index into memory.""" - subprocess.run( - [ - "STAR", - "--genomeLoad", "LoadAndExit", - "--genomeDir", str(reference_index), - ], - check=True - ) - -def unload_star_reference(reference_index: str) -> None: - """Remove star reference index from memory.""" - subprocess.run( - [ - "STAR", - "--genomeLoad", "Remove", - "--genomeDir", str(reference_index), - ], - check=True - ) - -def star_and_htseq( - group_id: str, - r1_files: List[Path], - r2_files: List[Path], - temp_dir: Path, - par: Dict[str, Any], - arguments_info: Dict[str, Any], - num_threads: int -) -> Tuple[int, str] : - star_output = par["output"] / "per" / group_id - temp_dir_group = temp_dir / f"star_tmp_{group_id}" - unsorted_bam = star_output / "Aligned.out.bam" - sorted_bam = star_output / "Aligned.sorted.out.bam" - counts_file = star_output / "htseq-count.txt" - multiqc_path = star_output / "multiqc_data" - - print(f">> Running STAR for group '{group_id}' with command:", flush=True) - star_output.mkdir(parents=True, exist_ok=True) - temp_dir_group.parent.mkdir(parents=True, exist_ok=True) - run_star( - r1_files=r1_files, - r2_files=r2_files, - output_dir=star_output, - temp_dir=temp_dir / f"star_tmp_{group_id}", - par=par, - arguments_info=arguments_info, - num_threads=num_threads - ) - if not unsorted_bam.exists(): - return (1, f"Could not find unsorted bam at '{unsorted_bam}'") - - if par["run_htseq_count"]: - print(f">> Running samtools sort for group '{group_id}' with command:", flush=True) - run_samtools_sort(unsorted_bam, sorted_bam) - if not sorted_bam.exists(): - return (1, f"Could not find sorted bam at '{unsorted_bam}'") - - print(f">> Running htseq-count for group '{group_id}' with command:", flush=True) - run_htseq_count(sorted_bam, counts_file, par, arguments_info) - if not counts_file.exists(): - return (1, f"Could not find counts at '{counts_file}'") - - if par["run_multiqc"]: - run_multiqc(star_output) - if not multiqc_path.exists(): - return (1, f"Could not find MultiQC output at '{multiqc_path}'") - - return (0, "") - -def run_star( - r1_files: List[Path], - r2_files: List[Path], - output_dir: Path, - temp_dir: Path, - par: Dict[str, Any], - arguments_info: Dict[str, Any], - num_threads: int -) -> None: - """Run star""" - # process manual arguments - r1_pasted = [",".join([str(r1) for r1 in r1_files])] - r2_pasted = [",".join([str(r2) for r2 in r2_files])] if r2_files else [] - manual_par = { - "--genomeDir": [par["reference_index"]], - "--genomeLoad": ["LoadAndRemove"], - "--runThreadN": [str(num_threads)], - "--runMode": ["alignReads"], - "--readFilesIn": r1_pasted + r2_pasted, - # create a tempdir per group - "--outTmpDir": [temp_dir], - # make sure there is a trailing / - "--outFileNamePrefix": [f"{output_dir}/"], - # fix the outSAMtype to return unsorted BAM files - "--outSAMtype": ["BAM", "Unsorted"] - } - manual_cmd = [str(x) - for key, values in manual_par.items() - for x in [key] + values - ] - - # process all passthrough star arguments - par_cmd = generate_cmd_arguments(par, arguments_info, "star", flatten=True) - - # combine into one command and turn into strings - cmd_args = [str(val) for val in ["STAR"] + manual_cmd + par_cmd] - - # run star - subprocess.run(cmd_args, check=True) - -def run_samtools_sort( - unsorted_bam: Path, - sorted_bam: Path -) -> None: - "Run samtools sort" - cmd_args = [ - "samtools", - "sort", - "-o", - sorted_bam, - unsorted_bam, - ] - subprocess.run(cmd_args, check=True) - -def run_htseq_count( - sorted_bam: Path, - counts_file: Path, - par: Dict[str, Any], - arguments_info: Dict[str, Any] -) -> None: - """Run HTSeq count""" - # process manual arguments - manual_cmd = [ - sorted_bam, - par["reference_gtf"] - ] - - # process all passthrough htseq arguments - par_cmd = generate_cmd_arguments(par, arguments_info, "htseq") - - # combine into one command and turn into strings - cmd_args = [str(val) for val in ["htseq-count"] + manual_cmd + par_cmd] - - # run htseq - with open(counts_file, "w", encoding="utf-8") as file: - subprocess.run(cmd_args, check=True, stdout=file) - -def get_feature_info(reference_gtf) -> pd.DataFrame: - ref = gtfparse.read_gtf(reference_gtf) - ref_genes = ref.loc[(ref["feature"] == "gene") | (ref["source"] == "ERCC")] - return pd.DataFrame( - { - "feature_id": ref_genes["gene_id"], - "feature_type": "Gene Expression", - "feature_name": ref_genes["gene_name"] - } - ) - -def run_multiqc(input_dir: Path) -> None: - cmd_args = ["multiqc", str(input_dir), "--outdir", str(input_dir), "--no-report", "--force"] - - # run multiqc - subprocess.run(cmd_args, check=True) - - -######################## -### Main code ### -######################## - -def main(par, meta): - """Main function""" - - # check input arguments - assert len(par["input_id"]) == len(par["input_r1"]), "--input_r1 should have same length as --input_id" - if par["input_r2"]: - assert len(par["input_id"]) == len(par["input_r2"]), "--input_r2 should have same length as --input_id" - - # read config arguments - with open(meta["config"], "r", encoding="utf-8") as file: - config = yaml.safe_load(file) - - # fetch all arguments from the config and turn it into a Dict[str, Argument] - arguments_info = fetch_arguments_info(config) - - # temp_dir = "tmp/" - with tempfile.TemporaryDirectory( - prefix=f"{meta['functionality_name']}-", - dir=meta["temp_dir"], - ignore_cleanup_errors=True - ) as temp_dir: - temp_dir = Path(temp_dir) - temp_dir.mkdir(parents=True, exist_ok=True) - - # turn file strings into Paths and decompress gzip if need be - gz_args = ["input_r1", "input_r2", "reference_index", "reference_gtf"] - par = process_par(par, arguments_info, gz_args, temp_dir) - - # make sure input_r2 has same length as input_r1 - if not par["input_r2"]: - par["input_r2"] = [None for _ in par["input_r1"]] - - # group input_files by input_id - print(">> Group by --input_id", flush=True) - grouped_inputs = {} - for group_id, file_r1, file_r2 in zip(par["input_id"], par["input_r1"], par["input_r2"]): - if group_id not in grouped_inputs: - grouped_inputs[group_id] = ([], []) - grouped_inputs[group_id][0].append(file_r1) - if file_r2: - grouped_inputs[group_id][1].append(file_r2) - - # create output dir if need be - par["output"].mkdir(parents=True, exist_ok=True) - - # store features metadata - feature_info = get_feature_info(str(par["reference_gtf"])) - with open(par["output"] / "feature_info.tsv", "w", encoding="utf-8") as file: - feature_info.to_csv(file, sep="\\\\t", index=False) - - # try: - # print(">> Loading genome in memory", flush=True) - # load_star_reference(par["reference_index"]) - - cpus = meta.get("cpus", 1) - num_items = len(grouped_inputs) - pool_size = min(cpus, num_items) - num_threads_per_task = math.ceil(cpus / pool_size) - - with Pool(pool_size) as pool: - outs = pool.starmap( - lambda group_id, files: star_and_htseq( - group_id=group_id, - r1_files=files[0], - r2_files=files[1], - temp_dir=temp_dir, - par=par, - arguments_info=arguments_info, - num_threads=num_threads_per_task - ), - grouped_inputs.items() - ) - - num_errored = 0 - for exit, msg in outs: - if exit != 0: - print(f"Error: {msg}") - num_errored += 1 - - pct_succeeded = 1.0 - num_errored / len(outs) - print("------------------") - print(f"Success rate: {math.ceil(pct_succeeded * 100)}%") - - assert pct_succeeded >= par["min_success_rate"], f"Success rate should be at least {math.ceil(par['min_success_rate'] * 100)}%" - -if __name__ == "__main__": - main(par, meta) -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/mapping_multi_star", - "tag" : "0.12.0" - }, - "label" : [ - "highmem", - "highcpu" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/mapping/multi_star/nextflow.config b/target/nextflow/mapping/multi_star/nextflow.config deleted file mode 100644 index 7a054623f93..00000000000 --- a/target/nextflow/mapping/multi_star/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'multi_star' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Align fastq files using STAR.' - author = 'Angela Oliveira Pisco, Robrecht Cannoodt' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/mapping/multi_star/nextflow_params.yaml b/target/nextflow/mapping/multi_star/nextflow_params.yaml deleted file mode 100644 index 0c697bff22a..00000000000 --- a/target/nextflow/mapping/multi_star/nextflow_params.yaml +++ /dev/null @@ -1,16 +0,0 @@ -# Input/Output -input_id: # please fill in - example: ["mysample", "mysample"] -input_r1: # please fill in - example: ["mysample_S1_L001_R1_001.fastq.gz", "mysample_S1_L002_R1_001.fastq.gz"] -# input_r2: ["mysample_S1_L001_R2_001.fastq.gz", "mysample_S1_L002_R2_001.fastq.gz"] -reference_index: # please fill in - example: "/path/to/reference" -reference_gtf: # please fill in - example: "genes.gtf" -# output: "$id.$key.output.output" - -# Processing arguments -run_htseq_count: true -run_multiqc: true -min_success_rate: 0.5 - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/mapping/multi_star/nextflow_schema.json b/target/nextflow/mapping/multi_star/nextflow_schema.json deleted file mode 100644 index 5e4a7de6707..00000000000 --- a/target/nextflow/mapping/multi_star/nextflow_schema.json +++ /dev/null @@ -1,114 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "multi_star", - "description": "Align fastq files using STAR.", - "type": "object", - "definitions": { - "input/output" : { - "title": "Input/Output", - "type": "object", - "description": "No description", - "properties": { - - "input_id": { - "type": "string", - "description": "Type: List of `string`, required, example: `mysample;mysample`, multiple_sep: `\";\"`. The ID of the sample being processed", - "help_text": "Type: List of `string`, required, example: `mysample;mysample`, multiple_sep: `\";\"`. The ID of the sample being processed. This vector should have the same length as the `--input_r1` argument." - }, - - "input_r1": { - "type": "string", - "description": "Type: List of `file`, required, example: `mysample_S1_L001_R1_001.fastq.gz;mysample_S1_L002_R1_001.fastq.gz`, multiple_sep: `\";\"`. Paths to the sequences to be mapped", - "help_text": "Type: List of `file`, required, example: `mysample_S1_L001_R1_001.fastq.gz;mysample_S1_L002_R1_001.fastq.gz`, multiple_sep: `\";\"`. Paths to the sequences to be mapped. If using Illumina paired-end reads, only the R1 files should be passed." - }, - - "input_r2": { - "type": "string", - "description": "Type: List of `file`, example: `mysample_S1_L001_R2_001.fastq.gz;mysample_S1_L002_R2_001.fastq.gz`, multiple_sep: `\";\"`. Paths to the sequences to be mapped", - "help_text": "Type: List of `file`, example: `mysample_S1_L001_R2_001.fastq.gz;mysample_S1_L002_R2_001.fastq.gz`, multiple_sep: `\";\"`. Paths to the sequences to be mapped. If using Illumina paired-end reads, only the R2 files should be passed." - }, - - "reference_index": { - "type": "string", - "description": "Type: `file`, required, example: `/path/to/reference`. Path to the reference built by star_build_reference", - "help_text": "Type: `file`, required, example: `/path/to/reference`. Path to the reference built by star_build_reference. Corresponds to the --genomeDir argument in the STAR command." - }, - - "reference_gtf": { - "type": "string", - "description": "Type: `file`, required, example: `genes.gtf`. Path to the gtf reference file", - "help_text": "Type: `file`, required, example: `genes.gtf`. Path to the gtf reference file." - }, - - "output": { - "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.output`, example: `/path/to/foo`. Path to output directory", - "help_text": "Type: `file`, required, default: `$id.$key.output.output`, example: `/path/to/foo`. Path to output directory. Corresponds to the --outFileNamePrefix argument in the STAR command.", - "default": "$id.$key.output.output" - } - - } - }, - "processing arguments" : { - "title": "Processing arguments", - "type": "object", - "description": "No description", - "properties": { - - "run_htseq_count": { - "type": "boolean", - "description": "Type: `boolean`, default: `true`. Whether or not to also run htseq-count after STAR", - "help_text": "Type: `boolean`, default: `true`. Whether or not to also run htseq-count after STAR.", - "default": "True" - }, - - "run_multiqc": { - "type": "boolean", - "description": "Type: `boolean`, default: `true`. Whether or not to also run MultiQC at the end", - "help_text": "Type: `boolean`, default: `true`. Whether or not to also run MultiQC at the end.", - "default": "True" - }, - - "min_success_rate": { - "type": "number", - "description": "Type: `double`, default: `0.5`. Fail when the success rate is below this threshold", - "help_text": "Type: `double`, default: `0.5`. Fail when the success rate is below this threshold.", - "default": "0.5" - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/input/output" - }, - { - "$ref": "#/definitions/processing arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/mapping/multi_star_to_h5mu/.config.vsh.yaml b/target/nextflow/mapping/multi_star_to_h5mu/.config.vsh.yaml deleted file mode 100644 index ba85db61dc0..00000000000 --- a/target/nextflow/mapping/multi_star_to_h5mu/.config.vsh.yaml +++ /dev/null @@ -1,179 +0,0 @@ -functionality: - name: "multi_star_to_h5mu" - namespace: "mapping" - version: "0.12.3" - authors: - - name: "Robrecht Cannoodt" - roles: - - "author" - - "maintainer" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - - name: "Angela Oliveira Pisco" - roles: - - "author" - info: - role: "Contributor" - links: - github: "aopisco" - orcid: "0000-0003-0142-2355" - linkedin: "aopisco" - organizations: - - name: "Insitro" - href: "https://insitro.com" - role: "Director of Computational Biology" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - arguments: - - type: "file" - name: "--input" - description: "The directory created by `multi_star`" - info: null - example: - - "/path/to/foo" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output h5mu file." - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - description: "Convert the output of `multi_star` to a h5mu.\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/cellranger_tiny_fastq/multi_star" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "midmem" - - "midcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/mapping/multi_star_to_h5mu/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/multi_star_to_h5mu" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/multi_star_to_h5mu/multi_star_to_h5mu" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/mapping/multi_star_to_h5mu/main.nf b/target/nextflow/mapping/multi_star_to_h5mu/main.nf deleted file mode 100644 index 1da6d7a6576..00000000000 --- a/target/nextflow/mapping/multi_star_to_h5mu/main.nf +++ /dev/null @@ -1,2625 +0,0 @@ -// multi_star_to_h5mu 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Robrecht Cannoodt (author, maintainer) -// * Angela Oliveira Pisco (author) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "multi_star_to_h5mu", - "namespace" : "mapping", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Robrecht Cannoodt", - "roles" : [ - "author", - "maintainer" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "robrecht@data-intuitive.com", - "github" : "rcannood", - "orcid" : "0000-0003-3641-729X", - "linkedin" : "robrechtcannoodt" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Science Engineer" - }, - { - "name" : "Open Problems", - "href" : "https://openproblems.bio", - "role" : "Core Member" - } - ] - } - }, - { - "name" : "Angela Oliveira Pisco", - "roles" : [ - "author" - ], - "info" : { - "role" : "Contributor", - "links" : { - "github" : "aopisco", - "orcid" : "0000-0003-0142-2355", - "linkedin" : "aopisco" - }, - "organizations" : [ - { - "name" : "Insitro", - "href" : "https://insitro.com", - "role" : "Director of Computational Biology" - }, - { - "name" : "Open Problems", - "href" : "https://openproblems.bio", - "role" : "Core Member" - } - ] - } - } - ], - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "description" : "The directory created by `multi_star`", - "example" : [ - "/path/to/foo" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--output", - "alternatives" : [ - "-o" - ], - "description" : "Output h5mu file.", - "example" : [ - "output.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_compression", - "description" : "The compression format to be used on the output h5mu object.", - "example" : [ - "gzip" - ], - "required" : false, - "choices" : [ - "gzip", - "lzf" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/multi_star_to_h5mu/" - } - ], - "description" : "Convert the output of `multi_star` to a h5mu.\n", - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/multi_star_to_h5mu/" - }, - { - "type" : "file", - "path" : "resources_test/cellranger_tiny_fastq/multi_star", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "python:3.10-slim", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "procps" - ], - "interactive" : false - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "mudata~=0.2.3", - "anndata~=0.9.1" - ], - "upgrade" : true - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "midmem", - "midcpu" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/mapping/multi_star_to_h5mu/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/multi_star_to_h5mu", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -from pathlib import Path -import pandas as pd -import mudata as md -import anndata as ad -import numpy as np -import json - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -## VIASH END - -# convert to path -input_dir = Path(par["input"]) - -# read counts information -print("> Read counts data", flush=True) -per_obs_data = [] - -for input_counts in (input_dir / "per").glob("**/htseq-count.txt"): - per_obs_dir = input_counts.parent - input_id = per_obs_dir.name - input_multiqc = per_obs_dir / "multiqc_data" / "multiqc_data.json" - - data = pd.read_table( - input_counts, - index_col=0, - names=["cell_id", input_id], - dtype={"cell_id": "U", input_id: "i"} - ) - data2 = data[~data.index.str.startswith("__")] - - with open(input_multiqc, "r") as file: - qc = json.load(file) - - qc_star = qc.get("report_saved_raw_data", {}).get("multiqc_star", {}).get(input_id) - qc_htseq = qc.get("report_saved_raw_data", {}).get("multiqc_htseq", {}).get("htseq-count") - - per_obs_data.append({ - "counts": data2.transpose(), - "qc_star": pd.DataFrame(qc_star, index=[input_id]), - "qc_htseq": pd.DataFrame(qc_htseq, index=[input_id]) - }) - - -# combine all counts -counts = pd.concat([x["counts"] for x in per_obs_data], axis=0) -qc_star = pd.concat([x["qc_star"] for x in per_obs_data], axis=0) -qc_htseq = pd.concat([x["qc_htseq"] for x in per_obs_data], axis=0) - -# read feature info -feature_info = pd.read_csv(input_dir / "feature_info.tsv", sep="\\\\t", index_col=0) -feature_info_ord = feature_info.loc[counts.columns] - -var = pd.DataFrame( - data={ - "gene_ids": feature_info_ord.index, - "feature_types": "Gene Expression", - "gene_name": feature_info_ord["feature_name"], - } -).set_index("gene_ids") - -print("> construct anndata", flush=True) -adata = ad.AnnData( - X=counts, - obsm={"qc_star": qc_star, "qc_htseq": qc_htseq}, - var=var, - dtype=np.int32 -) - -print("> convert to mudata", flush=True) -mdata = md.MuData(adata) - -print("> write to file", flush=True) -mdata.write_h5mu(par["output"], compression=par["output_compression"]) -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/mapping_multi_star_to_h5mu", - "tag" : "0.12.0" - }, - "label" : [ - "midmem", - "midcpu" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/mapping/multi_star_to_h5mu/nextflow.config b/target/nextflow/mapping/multi_star_to_h5mu/nextflow.config deleted file mode 100644 index c5f453f114e..00000000000 --- a/target/nextflow/mapping/multi_star_to_h5mu/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'multi_star_to_h5mu' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Convert the output of `multi_star` to a h5mu.\n' - author = 'Robrecht Cannoodt, Angela Oliveira Pisco' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/mapping/multi_star_to_h5mu/nextflow_params.yaml b/target/nextflow/mapping/multi_star_to_h5mu/nextflow_params.yaml deleted file mode 100644 index 9fcbaf165d7..00000000000 --- a/target/nextflow/mapping/multi_star_to_h5mu/nextflow_params.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# Arguments -input: # please fill in - example: "/path/to/foo" -# output: "$id.$key.output.h5mu" -# output_compression: "gzip" - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/mapping/multi_star_to_h5mu/nextflow_schema.json b/target/nextflow/mapping/multi_star_to_h5mu/nextflow_schema.json deleted file mode 100644 index f40c1335697..00000000000 --- a/target/nextflow/mapping/multi_star_to_h5mu/nextflow_schema.json +++ /dev/null @@ -1,66 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "multi_star_to_h5mu", - "description": "Convert the output of `multi_star` to a h5mu.\n", - "type": "object", - "definitions": { - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: `file`, required, example: `/path/to/foo`. The directory created by `multi_star`", - "help_text": "Type: `file`, required, example: `/path/to/foo`. The directory created by `multi_star`" - }, - - "output": { - "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file", - "help_text": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file.", - "default": "$id.$key.output.h5mu" - }, - - "output_compression": { - "type": "string", - "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", - "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", - "enum": ["gzip", "lzf"] - - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/mapping/samtools_sort/.config.vsh.yaml b/target/nextflow/mapping/samtools_sort/.config.vsh.yaml deleted file mode 100644 index dedbf23de8e..00000000000 --- a/target/nextflow/mapping/samtools_sort/.config.vsh.yaml +++ /dev/null @@ -1,270 +0,0 @@ -functionality: - name: "samtools_sort" - namespace: "mapping" - version: "0.12.3" - authors: - - name: "Robrecht Cannoodt" - roles: - - "author" - - "maintainer" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - - name: "Angela Oliveira Pisco" - roles: - - "author" - info: - role: "Contributor" - links: - github: "aopisco" - orcid: "0000-0003-0142-2355" - linkedin: "aopisco" - organizations: - - name: "Insitro" - href: "https://insitro.com" - role: "Director of Computational Biology" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - argument_groups: - - name: "Input" - arguments: - - type: "file" - name: "--input" - description: "Path to the SAM/BAM/CRAM files containing the mapped reads." - info: - orig_arg: "in_sam" - example: - - "input.bam" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Output" - arguments: - - type: "file" - name: "--output_bam" - description: "Filename to output the counts to." - info: - orig_arg: "-o" - example: - - "output.bam" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output_bai" - description: "BAI-format index for BAM file." - info: null - example: - - "output.bam.bai" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_format" - description: "The output format. By default, samtools tries to select a format\ - \ based on the -o filename extension; if output is to standard output or no\ - \ format can be deduced, bam is selected." - info: - orig_arg: "-O" - example: - - "bam" - required: false - choices: - - "sam" - - "bam" - - "cram" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--compression" - description: "Compression level, from 0 (uncompressed) to 9 (best" - info: - orig_arg: "-l" - example: - - 5 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Arguments" - arguments: - - type: "boolean_true" - name: "--minimizer_cluster" - description: "Sort unmapped reads (those in chromosome \"*\") by their sequence\ - \ minimiser (Schleimer et al., 2003; Roberts et al., 2004), \nalso reverse\ - \ complementing as appropriate. This has the effect of collating some similar\ - \ data together, improving the \ncompressibility of the unmapped sequence.\ - \ The minimiser kmer size is adjusted using the -K option. Note data compressed\ - \ \nin this manner may need to be name collated prior to conversion back to\ - \ fastq.\n\nMapped sequences are sorted by chromosome and position. \n" - info: - orig_arg: "-M" - direction: "input" - dest: "par" - - type: "integer" - name: "--minimizer_kmer" - description: "Sets the kmer size to be used in the -M option." - info: - orig_arg: "-K" - example: - - 20 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--sort_by_read_names" - description: "Sort by read names (i.e., the QNAME field) rather than by chromosomal\ - \ coordinates." - info: - orig_arg: "-n" - direction: "input" - dest: "par" - - type: "string" - name: "--sort_by" - description: "Sort first by this value in the alignment tag, then by position\ - \ or name (if also using -n)." - info: - orig_arg: "-t" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--no_pg" - description: "Do not add a @PG line to the header of the output file." - info: - orig_arg: "--no-PG" - direction: "input" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - description: "Sort and (optionally) index alignments.\n\nReads are sorted by leftmost\ - \ coordinates, or by read name when `--sort_by_read_names` is used.\n\nAn appropriate\ - \ `@HD-SO` sort order header tag will be added or an existing one updated if necessary.\n\ - \nNote that to generate an index file (by specifying `--output_bai`), the default\ - \ coordinate sort must be used.\nThus the `--sort_by_read_names` and `--sort_by\ - \ ` options are incompatible with `--output_bai`. \n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/cellranger_tiny_fastq" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "samtools" - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "pyyaml" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highmem" - - "highcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/mapping/samtools_sort/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/samtools_sort" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/samtools_sort/samtools_sort" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/mapping/samtools_sort/main.nf b/target/nextflow/mapping/samtools_sort/main.nf deleted file mode 100644 index 4580e8e1798..00000000000 --- a/target/nextflow/mapping/samtools_sort/main.nf +++ /dev/null @@ -1,2740 +0,0 @@ -// samtools_sort 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Robrecht Cannoodt (author, maintainer) -// * Angela Oliveira Pisco (author) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "samtools_sort", - "namespace" : "mapping", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Robrecht Cannoodt", - "roles" : [ - "author", - "maintainer" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "robrecht@data-intuitive.com", - "github" : "rcannood", - "orcid" : "0000-0003-3641-729X", - "linkedin" : "robrechtcannoodt" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Science Engineer" - }, - { - "name" : "Open Problems", - "href" : "https://openproblems.bio", - "role" : "Core Member" - } - ] - } - }, - { - "name" : "Angela Oliveira Pisco", - "roles" : [ - "author" - ], - "info" : { - "role" : "Contributor", - "links" : { - "github" : "aopisco", - "orcid" : "0000-0003-0142-2355", - "linkedin" : "aopisco" - }, - "organizations" : [ - { - "name" : "Insitro", - "href" : "https://insitro.com", - "role" : "Director of Computational Biology" - }, - { - "name" : "Open Problems", - "href" : "https://openproblems.bio", - "role" : "Core Member" - } - ] - } - } - ], - "argument_groups" : [ - { - "name" : "Input", - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "description" : "Path to the SAM/BAM/CRAM files containing the mapped reads.", - "info" : { - "orig_arg" : "in_sam" - }, - "example" : [ - "input.bam" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Output", - "arguments" : [ - { - "type" : "file", - "name" : "--output_bam", - "description" : "Filename to output the counts to.", - "info" : { - "orig_arg" : "-o" - }, - "example" : [ - "output.bam" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--output_bai", - "description" : "BAI-format index for BAM file.", - "example" : [ - "output.bam.bai" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_format", - "description" : "The output format. By default, samtools tries to select a format based on the -o filename extension; if output is to standard output or no format can be deduced, bam is selected.", - "info" : { - "orig_arg" : "-O" - }, - "example" : [ - "bam" - ], - "required" : false, - "choices" : [ - "sam", - "bam", - "cram" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--compression", - "description" : "Compression level, from 0 (uncompressed) to 9 (best", - "info" : { - "orig_arg" : "-l" - }, - "example" : [ - 5 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Arguments", - "arguments" : [ - { - "type" : "boolean_true", - "name" : "--minimizer_cluster", - "description" : "Sort unmapped reads (those in chromosome \\"*\\") by their sequence minimiser (Schleimer et al., 2003; Roberts et al., 2004), \nalso reverse complementing as appropriate. This has the effect of collating some similar data together, improving the \ncompressibility of the unmapped sequence. The minimiser kmer size is adjusted using the -K option. Note data compressed \nin this manner may need to be name collated prior to conversion back to fastq.\n\nMapped sequences are sorted by chromosome and position. \n", - "info" : { - "orig_arg" : "-M" - }, - "direction" : "input", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--minimizer_kmer", - "description" : "Sets the kmer size to be used in the -M option.", - "info" : { - "orig_arg" : "-K" - }, - "example" : [ - 20 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "boolean_true", - "name" : "--sort_by_read_names", - "description" : "Sort by read names (i.e., the QNAME field) rather than by chromosomal coordinates.", - "info" : { - "orig_arg" : "-n" - }, - "direction" : "input", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--sort_by", - "description" : "Sort first by this value in the alignment tag, then by position or name (if also using -n).", - "info" : { - "orig_arg" : "-t" - }, - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "boolean_true", - "name" : "--no_pg", - "description" : "Do not add a @PG line to the header of the output file.", - "info" : { - "orig_arg" : "--no-PG" - }, - "direction" : "input", - "dest" : "par" - } - ] - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/samtools_sort/" - } - ], - "description" : "Sort and (optionally) index alignments.\n\nReads are sorted by leftmost coordinates, or by read name when `--sort_by_read_names` is used.\n\nAn appropriate `@HD-SO` sort order header tag will be added or an existing one updated if necessary.\n\nNote that to generate an index file (by specifying `--output_bai`), the default coordinate sort must be used.\nThus the `--sort_by_read_names` and `--sort_by ` options are incompatible with `--output_bai`. \n", - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/samtools_sort/" - }, - { - "type" : "file", - "path" : "resources_test/cellranger_tiny_fastq", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "python:3.10-slim", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "samtools", - "procps" - ], - "interactive" : false - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "pyyaml" - ], - "upgrade" : true - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "highmem", - "highcpu" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/mapping/samtools_sort/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/samtools_sort", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -import tempfile -import subprocess -from pathlib import Path -import yaml - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_bam': $( if [ ! -z ${VIASH_PAR_OUTPUT_BAM+x} ]; then echo "r'${VIASH_PAR_OUTPUT_BAM//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_bai': $( if [ ! -z ${VIASH_PAR_OUTPUT_BAI+x} ]; then echo "r'${VIASH_PAR_OUTPUT_BAI//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_format': $( if [ ! -z ${VIASH_PAR_OUTPUT_FORMAT+x} ]; then echo "r'${VIASH_PAR_OUTPUT_FORMAT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'compression': $( if [ ! -z ${VIASH_PAR_COMPRESSION+x} ]; then echo "int(r'${VIASH_PAR_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'minimizer_cluster': $( if [ ! -z ${VIASH_PAR_MINIMIZER_CLUSTER+x} ]; then echo "r'${VIASH_PAR_MINIMIZER_CLUSTER//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), - 'minimizer_kmer': $( if [ ! -z ${VIASH_PAR_MINIMIZER_KMER+x} ]; then echo "int(r'${VIASH_PAR_MINIMIZER_KMER//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'sort_by_read_names': $( if [ ! -z ${VIASH_PAR_SORT_BY_READ_NAMES+x} ]; then echo "r'${VIASH_PAR_SORT_BY_READ_NAMES//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), - 'sort_by': $( if [ ! -z ${VIASH_PAR_SORT_BY+x} ]; then echo "r'${VIASH_PAR_SORT_BY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'no_pg': $( if [ ! -z ${VIASH_PAR_NO_PG+x} ]; then echo "r'${VIASH_PAR_NO_PG//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -## VIASH END - -def generate_args(par, config): - # fetch arguments from config - arguments = [ - arg - for group in config["functionality"]["argument_groups"] - for arg in group["arguments"] - ] - - cmd_args = [] - - for arg in arguments: - arg_val = par.get(arg["name"].removeprefix("--")) - # The info key is always present (changed in viash 0.7.4) - # in the parsed config (None if not specified in source config) - info = arg["info"] or {} - orig_arg = info.get("orig_arg") - if arg_val and orig_arg: - if not arg.get("multiple", False): - arg_val = [arg_val] - - if arg["type"] in ["boolean_true", "boolean_false"]: - # if argument is a boolean_true or boolean_false, simply add the flag - arg_val = [orig_arg] - elif orig_arg.startswith("-"): - # if the orig arg flag is not a positional, - # add the flag in front of each element and flatten - arg_val = [str(x) for val in arg_val for x in [orig_arg, val]] - - cmd_args.extend(arg_val) - - return cmd_args - -# read config arguments -config = yaml.safe_load(Path(meta["config"]).read_text()) - -print(">> Constructing command", flush=True) -cmd_args = [ "samtools", "sort" ] + generate_args(par, config) - -# manually process cpus parameter -if 'cpus' in meta and meta['cpus']: - cmd_args.extend(["--threads", str(meta["cpus"])]) -# add memory -if 'memory_mb' in meta and meta['memory_mb']: - import math - mem_per_thread = math.ceil(meta['memory_mb'] * .8 / meta['cpus']) - cmd_args.extend(["-m", f"{mem_per_thread}M"]) - -with tempfile.TemporaryDirectory(prefix="samtools-", dir=meta["temp_dir"]) as temp_dir: - # add tempdir - cmd_args.extend(["-T", str(temp_dir + "/")]) - - # run command - print(">> Running samtools sort with command:", flush=True) - print("+ " + ' '.join([str(x) for x in cmd_args]), flush=True) - subprocess.run(cmd_args, check=True) - -if par.get("output_bai"): - print(">> Running samtools index with command:", flush=True) - cmd_index_args = ["samtools", "index", "-b", par["output_bam"], par["output_bai"]] - print("+ " + ' '.join([str(x) for x in cmd_index_args]), flush=True) - subprocess.run(cmd_index_args, check=True) -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/mapping_samtools_sort", - "tag" : "0.12.0" - }, - "label" : [ - "highmem", - "highcpu" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/mapping/samtools_sort/nextflow.config b/target/nextflow/mapping/samtools_sort/nextflow.config deleted file mode 100644 index eb0386297d9..00000000000 --- a/target/nextflow/mapping/samtools_sort/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'samtools_sort' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Sort and (optionally) index alignments.\n\nReads are sorted by leftmost coordinates, or by read name when `--sort_by_read_names` is used.\n\nAn appropriate `@HD-SO` sort order header tag will be added or an existing one updated if necessary.\n\nNote that to generate an index file (by specifying `--output_bai`), the default coordinate sort must be used.\nThus the `--sort_by_read_names` and `--sort_by ` options are incompatible with `--output_bai`. \n' - author = 'Robrecht Cannoodt, Angela Oliveira Pisco' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/mapping/samtools_sort/nextflow_params.yaml b/target/nextflow/mapping/samtools_sort/nextflow_params.yaml deleted file mode 100644 index c3bf9c86cc2..00000000000 --- a/target/nextflow/mapping/samtools_sort/nextflow_params.yaml +++ /dev/null @@ -1,19 +0,0 @@ -# Arguments -minimizer_cluster: false -# minimizer_kmer: 20 -sort_by_read_names: false -# sort_by: "foo" -no_pg: false - -# Input -input: # please fill in - example: "input.bam" - -# Output -# output_bam: "$id.$key.output_bam.bam" -# output_bai: "$id.$key.output_bai.bai" -# output_format: "bam" -# compression: 5 - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/mapping/samtools_sort/nextflow_schema.json b/target/nextflow/mapping/samtools_sort/nextflow_schema.json deleted file mode 100644 index 141979bf12b..00000000000 --- a/target/nextflow/mapping/samtools_sort/nextflow_schema.json +++ /dev/null @@ -1,134 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "samtools_sort", - "description": "Sort and (optionally) index alignments.\n\nReads are sorted by leftmost coordinates, or by read name when `--sort_by_read_names` is used.\n\nAn appropriate `@HD-SO` sort order header tag will be added or an existing one updated if necessary.\n\nNote that to generate an index file (by specifying `--output_bai`), the default coordinate sort must be used.\nThus the `--sort_by_read_names` and `--sort_by \u003cTAG\u003e` options are incompatible with `--output_bai`. \n", - "type": "object", - "definitions": { - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "minimizer_cluster": { - "type": "boolean", - "description": "Type: `boolean_true`, default: `false`. Sort unmapped reads (those in chromosome \"*\") by their sequence minimiser (Schleimer et al", - "help_text": "Type: `boolean_true`, default: `false`. Sort unmapped reads (those in chromosome \"*\") by their sequence minimiser (Schleimer et al., 2003; Roberts et al., 2004), \nalso reverse complementing as appropriate. This has the effect of collating some similar data together, improving the \ncompressibility of the unmapped sequence. The minimiser kmer size is adjusted using the -K option. Note data compressed \nin this manner may need to be name collated prior to conversion back to fastq.\n\nMapped sequences are sorted by chromosome and position. \n", - "default": "False" - }, - - "minimizer_kmer": { - "type": "integer", - "description": "Type: `integer`, example: `20`. Sets the kmer size to be used in the -M option", - "help_text": "Type: `integer`, example: `20`. Sets the kmer size to be used in the -M option." - }, - - "sort_by_read_names": { - "type": "boolean", - "description": "Type: `boolean_true`, default: `false`. Sort by read names (i", - "help_text": "Type: `boolean_true`, default: `false`. Sort by read names (i.e., the QNAME field) rather than by chromosomal coordinates.", - "default": "False" - }, - - "sort_by": { - "type": "string", - "description": "Type: `string`. Sort first by this value in the alignment tag, then by position or name (if also using -n)", - "help_text": "Type: `string`. Sort first by this value in the alignment tag, then by position or name (if also using -n)." - }, - - "no_pg": { - "type": "boolean", - "description": "Type: `boolean_true`, default: `false`. Do not add a @PG line to the header of the output file", - "help_text": "Type: `boolean_true`, default: `false`. Do not add a @PG line to the header of the output file.", - "default": "False" - } - - } - }, - "input" : { - "title": "Input", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: `file`, required, example: `input.bam`. Path to the SAM/BAM/CRAM files containing the mapped reads", - "help_text": "Type: `file`, required, example: `input.bam`. Path to the SAM/BAM/CRAM files containing the mapped reads." - } - - } - }, - "output" : { - "title": "Output", - "type": "object", - "description": "No description", - "properties": { - - "output_bam": { - "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output_bam.bam`, example: `output.bam`. Filename to output the counts to", - "help_text": "Type: `file`, required, default: `$id.$key.output_bam.bam`, example: `output.bam`. Filename to output the counts to.", - "default": "$id.$key.output_bam.bam" - }, - - "output_bai": { - "type": "string", - "description": "Type: `file`, default: `$id.$key.output_bai.bai`, example: `output.bam.bai`. BAI-format index for BAM file", - "help_text": "Type: `file`, default: `$id.$key.output_bai.bai`, example: `output.bam.bai`. BAI-format index for BAM file.", - "default": "$id.$key.output_bai.bai" - }, - - "output_format": { - "type": "string", - "description": "Type: `string`, example: `bam`, choices: ``sam`, `bam`, `cram``. The output format", - "help_text": "Type: `string`, example: `bam`, choices: ``sam`, `bam`, `cram``. The output format. By default, samtools tries to select a format based on the -o filename extension; if output is to standard output or no format can be deduced, bam is selected.", - "enum": ["sam", "bam", "cram"] - - }, - - "compression": { - "type": "integer", - "description": "Type: `integer`, example: `5`. Compression level, from 0 (uncompressed) to 9 (best", - "help_text": "Type: `integer`, example: `5`. Compression level, from 0 (uncompressed) to 9 (best" - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/input" - }, - { - "$ref": "#/definitions/output" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/mapping/star_align/.config.vsh.yaml b/target/nextflow/mapping/star_align/.config.vsh.yaml deleted file mode 100644 index 5b9a13670cf..00000000000 --- a/target/nextflow/mapping/star_align/.config.vsh.yaml +++ /dev/null @@ -1,2535 +0,0 @@ -functionality: - name: "star_align" - namespace: "mapping" - version: "0.12.3" - authors: - - name: "Angela Oliveira Pisco" - roles: - - "author" - info: - role: "Contributor" - links: - github: "aopisco" - orcid: "0000-0003-0142-2355" - linkedin: "aopisco" - organizations: - - name: "Insitro" - href: "https://insitro.com" - role: "Director of Computational Biology" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - - name: "Robrecht Cannoodt" - roles: - - "author" - - "maintainer" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - argument_groups: - - name: "Input/Output" - arguments: - - type: "file" - name: "--input" - alternatives: - - "--readFilesIn" - description: "The FASTQ files to be analyzed. Corresponds to the --readFilesIn\ - \ argument in the STAR command." - info: null - example: - - "mysample_S1_L001_R1_001.fastq.gz" - - "mysample_S1_L001_R2_001.fastq.gz" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "file" - name: "--reference" - alternatives: - - "--genomeDir" - description: "Path to the reference built by star_build_reference. Corresponds\ - \ to the --genomeDir argument in the STAR command." - info: null - example: - - "/path/to/reference" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "--outFileNamePrefix" - description: "Path to output directory. Corresponds to the --outFileNamePrefix\ - \ argument in the STAR command." - info: null - example: - - "/path/to/foo" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Run Parameters" - arguments: - - type: "integer" - name: "--runRNGseed" - description: "random number generator seed." - info: null - example: - - 777 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Genome Parameters" - arguments: - - type: "string" - name: "--genomeLoad" - description: "mode of shared memory usage for the genome files. Only used with\ - \ --runMode alignReads.\n\n- LoadAndKeep ... load genome into shared and\ - \ keep it in memory after run\n- LoadAndRemove ... load genome into shared\ - \ but remove it after run\n- LoadAndExit ... load genome into shared memory\ - \ and exit, keeping the genome in memory for future runs\n- Remove \ - \ ... do not map anything, just remove loaded genome from memory\n- NoSharedMemory\ - \ ... do not use shared memory, each job will have its own private copy of\ - \ the genome" - info: null - example: - - "NoSharedMemory" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--genomeFastaFiles" - description: "path(s) to the fasta files with the genome sequences, separated\ - \ by spaces. These files should be plain text FASTA files, they *cannot* be\ - \ zipped.\n\nRequired for the genome generation (--runMode genomeGenerate).\ - \ Can also be used in the mapping (--runMode alignReads) to add extra (new)\ - \ sequences to the genome (e.g. spike-ins)." - info: null - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--genomeFileSizes" - description: "genome files exact sizes in bytes. Typically, this should not\ - \ be defined by the user." - info: null - example: - - 0 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--genomeTransformOutput" - description: "which output to transform back to original genome\n\n- SAM \ - \ ... SAM/BAM alignments\n- SJ ... splice junctions (SJ.out.tab)\n-\ - \ None ... no transformation of the output" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--genomeChrSetMitochondrial" - description: "names of the mitochondrial chromosomes. Presently only used for\ - \ STARsolo statistics output/" - info: null - example: - - "chrM" - - "M" - - "MT" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - name: "Splice Junctions Database" - arguments: - - type: "string" - name: "--sjdbFileChrStartEnd" - description: "path to the files with genomic coordinates (chr start \ - \ end strand) for the splice junction introns. Multiple files can be\ - \ supplied and will be concatenated." - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "file" - name: "--sjdbGTFfile" - description: "path to the GTF file with annotations" - info: null - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--sjdbGTFchrPrefix" - description: "prefix for chromosome names in a GTF file (e.g. 'chr' for using\ - \ ENSMEBL annotations with UCSC genomes)" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--sjdbGTFfeatureExon" - description: "feature type in GTF file to be used as exons for building transcripts" - info: null - example: - - "exon" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--sjdbGTFtagExonParentTranscript" - description: "GTF attribute name for parent transcript ID (default \"transcript_id\"\ - \ works for GTF files)" - info: null - example: - - "transcript_id" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--sjdbGTFtagExonParentGene" - description: "GTF attribute name for parent gene ID (default \"gene_id\" works\ - \ for GTF files)" - info: null - example: - - "gene_id" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--sjdbGTFtagExonParentGeneName" - description: "GTF attribute name for parent gene name" - info: null - example: - - "gene_name" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--sjdbGTFtagExonParentGeneType" - description: "GTF attribute name for parent gene type" - info: null - example: - - "gene_type" - - "gene_biotype" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--sjdbOverhang" - description: "length of the donor/acceptor sequence on each side of the junctions,\ - \ ideally = (mate_length - 1)" - info: null - example: - - 100 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--sjdbScore" - description: "extra alignment score for alignments that cross database junctions" - info: null - example: - - 2 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--sjdbInsertSave" - description: "which files to save when sjdb junctions are inserted on the fly\ - \ at the mapping step\n\n- Basic ... only small junction / transcript files\n\ - - All ... all files including big Genome, SA and SAindex - this will create\ - \ a complete genome directory" - info: null - example: - - "Basic" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Variation parameters" - arguments: - - type: "string" - name: "--varVCFfile" - description: "path to the VCF file that contains variation data. The 10th column\ - \ should contain the genotype information, e.g. 0/1" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Read Parameters" - arguments: - - type: "string" - name: "--readFilesType" - description: "format of input read files\n\n- Fastx ... FASTA or FASTQ\n\ - - SAM SE ... SAM or BAM single-end reads; for BAM use --readFilesCommand\ - \ samtools view\n- SAM PE ... SAM or BAM paired-end reads; for BAM use\ - \ --readFilesCommand samtools view" - info: null - example: - - "Fastx" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--readFilesSAMattrKeep" - description: "for --readFilesType SAM SE/PE, which SAM tags to keep in the output\ - \ BAM, e.g.: --readFilesSAMtagsKeep RG PL\n\n- All ... keep all tags\n\ - - None ... do not keep any tags" - info: null - example: - - "All" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "file" - name: "--readFilesManifest" - description: "path to the \"manifest\" file with the names of read files. The\ - \ manifest file should contain 3 tab-separated columns:\n\npaired-end reads:\ - \ read1_file_name $tab$ read2_file_name $tab$ read_group_line.\nsingle-end\ - \ reads: read1_file_name $tab$ - $tab$ read_group_line.\nSpaces,\ - \ but not tabs are allowed in file names.\nIf read_group_line does not start\ - \ with ID:, it can only contain one ID field, and ID: will be added to it.\n\ - If read_group_line starts with ID:, it can contain several fields separated\ - \ by $tab$, and all fields will be be copied verbatim into SAM @RG header\ - \ line." - info: null - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--readFilesPrefix" - description: "prefix for the read files names, i.e. it will be added in front\ - \ of the strings in --readFilesIn" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--readFilesCommand" - description: "command line to execute for each of the input file. This command\ - \ should generate FASTA or FASTQ text and send it to stdout\n\nFor example:\ - \ zcat - to uncompress .gz files, bzcat - to uncompress .bz2 files, etc." - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--readMapNumber" - description: "number of reads to map from the beginning of the file\n\n-1: map\ - \ all reads" - info: null - example: - - -1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--readMatesLengthsIn" - description: "Equal/NotEqual - lengths of names,sequences,qualities for both\ - \ mates are the same / not the same. NotEqual is safe in all situations." - info: null - example: - - "NotEqual" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--readNameSeparator" - description: "character(s) separating the part of the read names that will be\ - \ trimmed in output (read name after space is always trimmed)" - info: null - example: - - "/" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--readQualityScoreBase" - description: "number to be subtracted from the ASCII code to get Phred quality\ - \ score" - info: null - example: - - 33 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Read Clipping" - arguments: - - type: "string" - name: "--clipAdapterType" - description: "adapter clipping type\n\n- Hamming ... adapter clipping based\ - \ on Hamming distance, with the number of mismatches controlled by --clip5pAdapterMMp\n\ - - CellRanger4 ... 5p and 3p adapter clipping similar to CellRanger4. Utilizes\ - \ Opal package by Martin Sosic: https://github.com/Martinsos/opal\n- None\ - \ ... no adapter clipping, all other clip* parameters are disregarded" - info: null - example: - - "Hamming" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--clip3pNbases" - description: "number(s) of bases to clip from 3p of each mate. If one value\ - \ is given, it will be assumed the same for both mates." - info: null - example: - - 0 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--clip3pAdapterSeq" - description: "adapter sequences to clip from 3p of each mate. If one value\ - \ is given, it will be assumed the same for both mates.\n\n- polyA ... polyA\ - \ sequence with the length equal to read length" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "double" - name: "--clip3pAdapterMMp" - description: "max proportion of mismatches for 3p adapter clipping for each\ - \ mate. If one value is given, it will be assumed the same for both mates." - info: null - example: - - 0.1 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--clip3pAfterAdapterNbases" - description: "number of bases to clip from 3p of each mate after the adapter\ - \ clipping. If one value is given, it will be assumed the same for both mates." - info: null - example: - - 0 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--clip5pNbases" - description: "number(s) of bases to clip from 5p of each mate. If one value\ - \ is given, it will be assumed the same for both mates." - info: null - example: - - 0 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - name: "Limits" - arguments: - - type: "long" - name: "--limitGenomeGenerateRAM" - description: "maximum available RAM (bytes) for genome generation" - info: null - example: - - 31000000000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "long" - name: "--limitIObufferSize" - description: "max available buffers size (bytes) for input/output, per thread" - info: null - example: - - 30000000 - - 50000000 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "long" - name: "--limitOutSAMoneReadBytes" - description: "max size of the SAM record (bytes) for one read. Recommended value:\ - \ >(2*(LengthMate1+LengthMate2+100)*outFilterMultimapNmax" - info: null - example: - - 100000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--limitOutSJoneRead" - description: "max number of junctions for one read (including all multi-mappers)" - info: null - example: - - 1000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--limitOutSJcollapsed" - description: "max number of collapsed junctions" - info: null - example: - - 1000000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "long" - name: "--limitBAMsortRAM" - description: "maximum available RAM (bytes) for sorting BAM. If =0, it will\ - \ be set to the genome index size. 0 value can only be used with --genomeLoad\ - \ NoSharedMemory option." - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--limitSjdbInsertNsj" - description: "maximum number of junctions to be inserted to the genome on the\ - \ fly at the mapping stage, including those from annotations and those detected\ - \ in the 1st step of the 2-pass run" - info: null - example: - - 1000000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--limitNreadsSoft" - description: "soft limit on the number of reads" - info: null - example: - - -1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Output: general" - arguments: - - type: "string" - name: "--outTmpKeep" - description: "whether to keep the temporary files after STAR runs is finished\n\ - \n- None ... remove all temporary files\n- All ... keep all files" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outStd" - description: "which output will be directed to stdout (standard out)\n\n- Log\ - \ ... log messages\n- SAM ... alignments\ - \ in SAM format (which normally are output to Aligned.out.sam file), normal\ - \ standard output will go into Log.std.out\n- BAM_Unsorted ... alignments\ - \ in BAM format, unsorted. Requires --outSAMtype BAM Unsorted\n- BAM_SortedByCoordinate\ - \ ... alignments in BAM format, sorted by coordinate. Requires --outSAMtype\ - \ BAM SortedByCoordinate\n- BAM_Quant ... alignments to transcriptome\ - \ in BAM format, unsorted. Requires --quantMode TranscriptomeSAM" - info: null - example: - - "Log" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outReadsUnmapped" - description: "output of unmapped and partially mapped (i.e. mapped only one\ - \ mate of a paired end read) reads in separate file(s).\n\n- None ... no\ - \ output\n- Fastx ... output in separate fasta/fastq files, Unmapped.out.mate1/2" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outQSconversionAdd" - description: "add this number to the quality score (e.g. to convert from Illumina\ - \ to Sanger, use -31)" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outMultimapperOrder" - description: "order of multimapping alignments in the output files\n\n- Old_2.4\ - \ ... quasi-random order used before 2.5.0\n- Random \ - \ ... random order of alignments for each multi-mapper. Read mates (pairs)\ - \ are always adjacent, all alignment for each read stay together. This option\ - \ will become default in the future releases." - info: null - example: - - "Old_2.4" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Output: SAM and BAM" - arguments: - - type: "string" - name: "--outSAMtype" - description: "type of SAM/BAM output\n\n1st word:\n- BAM ... output BAM without\ - \ sorting\n- SAM ... output SAM without sorting\n- None ... no SAM/BAM output\n\ - 2nd, 3rd:\n- Unsorted ... standard unsorted\n- SortedByCoordinate\ - \ ... sorted by coordinate. This option will allocate extra memory for sorting\ - \ which can be specified by --limitBAMsortRAM." - info: null - example: - - "SAM" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--outSAMmode" - description: "mode of SAM output\n\n- None ... no SAM output\n- Full ... full\ - \ SAM output\n- NoQS ... full SAM but without quality scores" - info: null - example: - - "Full" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outSAMstrandField" - description: "Cufflinks-like strand field flag\n\n- None ... not used\n\ - - intronMotif ... strand derived from the intron motif. This option changes\ - \ the output alignments: reads with inconsistent and/or non-canonical introns\ - \ are filtered out." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outSAMattributes" - description: "a string of desired SAM attributes, in the order desired for the\ - \ output SAM. Tags can be listed in any combination/order.\n\n***Presets:\n\ - - None ... no attributes\n- Standard ... NH HI AS nM\n- All \ - \ ... NH HI AS nM NM MD jM jI MC ch\n***Alignment:\n- NH ...\ - \ number of loci the reads maps to: =1 for unique mappers, >1 for multimappers.\ - \ Standard SAM tag.\n- HI ... multiple alignment index, starts with\ - \ --outSAMattrIHstart (=1 by default). Standard SAM tag.\n- AS ...\ - \ local alignment score, +1/-1 for matches/mismateches, score* penalties for\ - \ indels and gaps. For PE reads, total score for two mates. Stadnard SAM tag.\n\ - - nM ... number of mismatches. For PE reads, sum over two mates.\n\ - - NM ... edit distance to the reference (number of mismatched + inserted\ - \ + deleted bases) for each mate. Standard SAM tag.\n- MD ... string\ - \ encoding mismatched and deleted reference bases (see standard SAM specifications).\ - \ Standard SAM tag.\n- jM ... intron motifs for all junctions (i.e.\ - \ N in CIGAR): 0: non-canonical; 1: GT/AG, 2: CT/AC, 3: GC/AG, 4: CT/GC, 5:\ - \ AT/AC, 6: GT/AT. If splice junctions database is used, and a junction is\ - \ annotated, 20 is added to its motif value.\n- jI ... start and\ - \ end of introns for all junctions (1-based).\n- XS ... alignment\ - \ strand according to --outSAMstrandField.\n- MC ... mate's CIGAR\ - \ string. Standard SAM tag.\n- ch ... marks all segment of all chimeric\ - \ alingments for --chimOutType WithinBAM output.\n- cN ... number\ - \ of bases clipped from the read ends: 5' and 3'\n***Variation:\n- vA \ - \ ... variant allele\n- vG ... genomic coordinate of the variant\ - \ overlapped by the read.\n- vW ... 1 - alignment passes WASP filtering;\ - \ 2,3,4,5,6,7 - alignment does not pass WASP filtering. Requires --waspOutputMode\ - \ SAMtag.\n***STARsolo:\n- CR CY UR UY ... sequences and quality scores of\ - \ cell barcodes and UMIs for the solo* demultiplexing.\n- GX GN ...\ - \ gene ID and gene name for unique-gene reads.\n- gx gn ... gene IDs\ - \ and gene names for unique- and multi-gene reads.\n- CB UB ... error-corrected\ - \ cell barcodes and UMIs for solo* demultiplexing. Requires --outSAMtype BAM\ - \ SortedByCoordinate.\n- sM ... assessment of CB and UMI.\n- sS \ - \ ... sequence of the entire barcode (CB,UMI,adapter).\n- sQ \ - \ ... quality of the entire barcode.\n***Unsupported/undocumented:\n-\ - \ ha ... haplotype (1/2) when mapping to the diploid genome. Requires\ - \ genome generated with --genomeTransformType Diploid .\n- rB ...\ - \ alignment block read/genomic coordinates.\n- vR ... read coordinate\ - \ of the variant." - info: null - example: - - "Standard" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--outSAMattrIHstart" - description: "start value for the IH attribute. 0 may be required by some downstream\ - \ software, such as Cufflinks or StringTie." - info: null - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outSAMunmapped" - description: "output of unmapped reads in the SAM format\n\n1st word:\n- None\ - \ ... no output\n- Within ... output unmapped reads within the main SAM\ - \ file (i.e. Aligned.out.sam)\n2nd word:\n- KeepPairs ... record unmapped\ - \ mate for each alignment, and, in case of unsorted output, keep it adjacent\ - \ to its mapped mate. Only affects multi-mapping reads." - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--outSAMorder" - description: "type of sorting for the SAM output\n\nPaired: one mate after the\ - \ other for all paired alignments\nPairedKeepInputOrder: one mate after the\ - \ other for all paired alignments, the order is kept the same as in the input\ - \ FASTQ files" - info: null - example: - - "Paired" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outSAMprimaryFlag" - description: "which alignments are considered primary - all others will be marked\ - \ with 0x100 bit in the FLAG\n\n- OneBestScore ... only one alignment with\ - \ the best score is primary\n- AllBestScore ... all alignments with the best\ - \ score are primary" - info: null - example: - - "OneBestScore" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outSAMreadID" - description: "read ID record type\n\n- Standard ... first word (until space)\ - \ from the FASTx read ID line, removing /1,/2 from the end\n- Number ...\ - \ read number (index) in the FASTx file" - info: null - example: - - "Standard" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outSAMmapqUnique" - description: "0 to 255: the MAPQ value for unique mappers" - info: null - example: - - 255 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outSAMflagOR" - description: "0 to 65535: sam FLAG will be bitwise OR'd with this value, i.e.\ - \ FLAG=FLAG | outSAMflagOR. This is applied after all flags have been set\ - \ by STAR, and after outSAMflagAND. Can be used to set specific bits that\ - \ are not set otherwise." - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outSAMflagAND" - description: "0 to 65535: sam FLAG will be bitwise AND'd with this value, i.e.\ - \ FLAG=FLAG & outSAMflagOR. This is applied after all flags have been set\ - \ by STAR, but before outSAMflagOR. Can be used to unset specific bits that\ - \ are not set otherwise." - info: null - example: - - 65535 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outSAMattrRGline" - description: "SAM/BAM read group line. The first word contains the read group\ - \ identifier and must start with \"ID:\", e.g. --outSAMattrRGline ID:xxx CN:yy\ - \ \"DS:z z z\".\n\nxxx will be added as RG tag to each output alignment. Any\ - \ spaces in the tag values have to be double quoted.\nComma separated RG lines\ - \ correspons to different (comma separated) input files in --readFilesIn.\ - \ Commas have to be surrounded by spaces, e.g.\n--outSAMattrRGline ID:xxx\ - \ , ID:zzz \"DS:z z\" , ID:yyy DS:yyyy" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--outSAMheaderHD" - description: "@HD (header) line of the SAM header" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--outSAMheaderPG" - description: "extra @PG (software) line of the SAM header (in addition to STAR)" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--outSAMheaderCommentFile" - description: "path to the file with @CO (comment) lines of the SAM header" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outSAMfilter" - description: "filter the output into main SAM/BAM files\n\n- KeepOnlyAddedReferences\ - \ ... only keep the reads for which all alignments are to the extra reference\ - \ sequences added with --genomeFastaFiles at the mapping stage.\n- KeepAllAddedReferences\ - \ ... keep all alignments to the extra reference sequences added with --genomeFastaFiles\ - \ at the mapping stage." - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--outSAMmultNmax" - description: "max number of multiple alignments for a read that will be output\ - \ to the SAM/BAM files. Note that if this value is not equal to -1, the top\ - \ scoring alignment will be output first\n\n- -1 ... all alignments (up to\ - \ --outFilterMultimapNmax) will be output" - info: null - example: - - -1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outSAMtlen" - description: "calculation method for the TLEN field in the SAM/BAM files\n\n\ - - 1 ... leftmost base of the (+)strand mate to rightmost base of the (-)mate.\ - \ (+)sign for the (+)strand mate\n- 2 ... leftmost base of any mate to rightmost\ - \ base of any mate. (+)sign for the mate with the leftmost base. This is different\ - \ from 1 for overlapping mates with protruding ends" - info: null - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outBAMcompression" - description: "-1 to 10 BAM compression level, -1=default compression (6?),\ - \ 0=no compression, 10=maximum compression" - info: null - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outBAMsortingThreadN" - description: ">=0: number of threads for BAM sorting. 0 will default to min(6,--runThreadN)." - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outBAMsortingBinsN" - description: ">0: number of genome bins for coordinate-sorting" - info: null - example: - - 50 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "BAM processing" - arguments: - - type: "string" - name: "--bamRemoveDuplicatesType" - description: "mark duplicates in the BAM file, for now only works with (i) sorted\ - \ BAM fed with inputBAMfile, and (ii) for paired-end alignments only\n\n-\ - \ - ... no duplicate removal/marking\n- UniqueIdentical\ - \ ... mark all multimappers, and duplicate unique mappers. The coordinates,\ - \ FLAG, CIGAR must be identical\n- UniqueIdenticalNotMulti ... mark duplicate\ - \ unique mappers but not multimappers." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--bamRemoveDuplicatesMate2basesN" - description: "number of bases from the 5' of mate 2 to use in collapsing (e.g.\ - \ for RAMPAGE)" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Output Wiggle" - arguments: - - type: "string" - name: "--outWigType" - description: "type of signal output, e.g. \"bedGraph\" OR \"bedGraph read1_5p\"\ - . Requires sorted BAM: --outSAMtype BAM SortedByCoordinate .\n\n1st word:\n\ - - None ... no signal output\n- bedGraph ... bedGraph format\n- wiggle\ - \ ... wiggle format\n2nd word:\n- read1_5p ... signal from only 5' of\ - \ the 1st read, useful for CAGE/RAMPAGE etc\n- read2 ... signal from\ - \ only 2nd read" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--outWigStrand" - description: "strandedness of wiggle/bedGraph output\n\n- Stranded ... separate\ - \ strands, str1 and str2\n- Unstranded ... collapsed strands" - info: null - example: - - "Stranded" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outWigReferencesPrefix" - description: "prefix matching reference names to include in the output wiggle\ - \ file, e.g. \"chr\", default \"-\" - include all references" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outWigNorm" - description: "type of normalization for the signal\n\n- RPM ... reads per\ - \ million of mapped reads\n- None ... no normalization, \"raw\" counts" - info: null - example: - - "RPM" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Output Filtering" - arguments: - - type: "string" - name: "--outFilterType" - description: "type of filtering\n\n- Normal ... standard filtering using only\ - \ current alignment\n- BySJout ... keep only those reads that contain junctions\ - \ that passed filtering into SJ.out.tab" - info: null - example: - - "Normal" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outFilterMultimapScoreRange" - description: "the score range below the maximum score for multimapping alignments" - info: null - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outFilterMultimapNmax" - description: "maximum number of loci the read is allowed to map to. Alignments\ - \ (all of them) will be output only if the read maps to no more loci than\ - \ this value.\n\nOtherwise no alignments will be output, and the read will\ - \ be counted as \"mapped to too many loci\" in the Log.final.out ." - info: null - example: - - 10 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outFilterMismatchNmax" - description: "alignment will be output only if it has no more mismatches than\ - \ this value." - info: null - example: - - 10 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--outFilterMismatchNoverLmax" - description: "alignment will be output only if its ratio of mismatches to *mapped*\ - \ length is less than or equal to this value." - info: null - example: - - 0.3 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--outFilterMismatchNoverReadLmax" - description: "alignment will be output only if its ratio of mismatches to *read*\ - \ length is less than or equal to this value." - info: null - example: - - 1.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outFilterScoreMin" - description: "alignment will be output only if its score is higher than or equal\ - \ to this value." - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--outFilterScoreMinOverLread" - description: "same as outFilterScoreMin, but normalized to read length (sum\ - \ of mates' lengths for paired-end reads)" - info: null - example: - - 0.66 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outFilterMatchNmin" - description: "alignment will be output only if the number of matched bases is\ - \ higher than or equal to this value." - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--outFilterMatchNminOverLread" - description: "sam as outFilterMatchNmin, but normalized to the read length (sum\ - \ of mates' lengths for paired-end reads)." - info: null - example: - - 0.66 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outFilterIntronMotifs" - description: "filter alignment using their motifs\n\n- None \ - \ ... no filtering\n- RemoveNoncanonical ... filter\ - \ out alignments that contain non-canonical junctions\n- RemoveNoncanonicalUnannotated\ - \ ... filter out alignments that contain non-canonical unannotated junctions\ - \ when using annotated splice junctions database. The annotated non-canonical\ - \ junctions will be kept." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outFilterIntronStrands" - description: "filter alignments\n\n- RemoveInconsistentStrands ... remove\ - \ alignments that have junctions with inconsistent strands\n- None \ - \ ... no filtering" - info: null - example: - - "RemoveInconsistentStrands" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Output splice junctions (SJ.out.tab)" - arguments: - - type: "string" - name: "--outSJtype" - description: "type of splice junction output\n\n- Standard ... standard SJ.out.tab\ - \ output\n- None ... no splice junction output" - info: null - example: - - "Standard" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Output Filtering: Splice Junctions" - arguments: - - type: "string" - name: "--outSJfilterReads" - description: "which reads to consider for collapsed splice junctions output\n\ - \n- All ... all reads, unique- and multi-mappers\n- Unique ... uniquely\ - \ mapping reads only" - info: null - example: - - "All" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outSJfilterOverhangMin" - description: "minimum overhang length for splice junctions on both sides for:\ - \ (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC\ - \ motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\n\ - does not apply to annotated junctions" - info: null - example: - - 30 - - 12 - - 12 - - 12 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--outSJfilterCountUniqueMin" - description: "minimum uniquely mapping read count per junction for: (1) non-canonical\ - \ motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC\ - \ and GT/AT motif. -1 means no output for that motif\n\nJunctions are output\ - \ if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin conditions\ - \ are satisfied\ndoes not apply to annotated junctions" - info: null - example: - - 3 - - 1 - - 1 - - 1 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--outSJfilterCountTotalMin" - description: "minimum total (multi-mapping+unique) read count per junction for:\ - \ (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC\ - \ motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\n\ - Junctions are output if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin\ - \ conditions are satisfied\ndoes not apply to annotated junctions" - info: null - example: - - 3 - - 1 - - 1 - - 1 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--outSJfilterDistToOtherSJmin" - description: "minimum allowed distance to other junctions' donor/acceptor\n\n\ - does not apply to annotated junctions" - info: null - example: - - 10 - - 0 - - 5 - - 10 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--outSJfilterIntronMaxVsReadN" - description: "maximum gap allowed for junctions supported by 1,2,3,,,N reads\n\ - \ni.e. by default junctions supported by 1 read can have gaps <=50000b, by\ - \ 2 reads: <=100000b, by 3 reads: <=200000. by >=4 reads any gap <=alignIntronMax\n\ - does not apply to annotated junctions" - info: null - example: - - 50000 - - 100000 - - 200000 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - name: "Scoring" - arguments: - - type: "integer" - name: "--scoreGap" - description: "splice junction penalty (independent on intron motif)" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--scoreGapNoncan" - description: "non-canonical junction penalty (in addition to scoreGap)" - info: null - example: - - -8 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--scoreGapGCAG" - description: "GC/AG and CT/GC junction penalty (in addition to scoreGap)" - info: null - example: - - -4 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--scoreGapATAC" - description: "AT/AC and GT/AT junction penalty (in addition to scoreGap)" - info: null - example: - - -8 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--scoreGenomicLengthLog2scale" - description: "extra score logarithmically scaled with genomic length of the\ - \ alignment: scoreGenomicLengthLog2scale*log2(genomicLength)" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--scoreDelOpen" - description: "deletion open penalty" - info: null - example: - - -2 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--scoreDelBase" - description: "deletion extension penalty per base (in addition to scoreDelOpen)" - info: null - example: - - -2 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--scoreInsOpen" - description: "insertion open penalty" - info: null - example: - - -2 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--scoreInsBase" - description: "insertion extension penalty per base (in addition to scoreInsOpen)" - info: null - example: - - -2 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--scoreStitchSJshift" - description: "maximum score reduction while searching for SJ boundaries in the\ - \ stitching step" - info: null - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Alignments and Seeding" - arguments: - - type: "integer" - name: "--seedSearchStartLmax" - description: "defines the search start point through the read - the read is\ - \ split into pieces no longer than this value" - info: null - example: - - 50 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--seedSearchStartLmaxOverLread" - description: "seedSearchStartLmax normalized to read length (sum of mates' lengths\ - \ for paired-end reads)" - info: null - example: - - 1.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--seedSearchLmax" - description: "defines the maximum length of the seeds, if =0 seed length is\ - \ not limited" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--seedMultimapNmax" - description: "only pieces that map fewer than this value are utilized in the\ - \ stitching procedure" - info: null - example: - - 10000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--seedPerReadNmax" - description: "max number of seeds per read" - info: null - example: - - 1000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--seedPerWindowNmax" - description: "max number of seeds per window" - info: null - example: - - 50 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--seedNoneLociPerWindow" - description: "max number of one seed loci per window" - info: null - example: - - 10 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--seedSplitMin" - description: "min length of the seed sequences split by Ns or mate gap" - info: null - example: - - 12 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--seedMapMin" - description: "min length of seeds to be mapped" - info: null - example: - - 5 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--alignIntronMin" - description: "minimum intron size, genomic gap is considered intron if its length>=alignIntronMin,\ - \ otherwise it is considered Deletion" - info: null - example: - - 21 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--alignIntronMax" - description: "maximum intron size, if 0, max intron size will be determined\ - \ by (2^winBinNbits)*winAnchorDistNbins" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--alignMatesGapMax" - description: "maximum gap between two mates, if 0, max intron gap will be determined\ - \ by (2^winBinNbits)*winAnchorDistNbins" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--alignSJoverhangMin" - description: "minimum overhang (i.e. block size) for spliced alignments" - info: null - example: - - 5 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--alignSJstitchMismatchNmax" - description: "maximum number of mismatches for stitching of the splice junctions\ - \ (-1: no limit).\n\n(1) non-canonical motifs, (2) GT/AG and CT/AC motif,\ - \ (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif." - info: null - example: - - 0 - - -1 - - 0 - - 0 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--alignSJDBoverhangMin" - description: "minimum overhang (i.e. block size) for annotated (sjdb) spliced\ - \ alignments" - info: null - example: - - 3 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--alignSplicedMateMapLmin" - description: "minimum mapped length for a read mate that is spliced" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--alignSplicedMateMapLminOverLmate" - description: "alignSplicedMateMapLmin normalized to mate length" - info: null - example: - - 0.66 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--alignWindowsPerReadNmax" - description: "max number of windows per read" - info: null - example: - - 10000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--alignTranscriptsPerWindowNmax" - description: "max number of transcripts per window" - info: null - example: - - 100 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--alignTranscriptsPerReadNmax" - description: "max number of different alignments per read to consider" - info: null - example: - - 10000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--alignEndsType" - description: "type of read ends alignment\n\n- Local ... standard\ - \ local alignment with soft-clipping allowed\n- EndToEnd ... force\ - \ end-to-end read alignment, do not soft-clip\n- Extend5pOfRead1 ... fully\ - \ extend only the 5p of the read1, all other ends: local alignment\n- Extend5pOfReads12\ - \ ... fully extend only the 5p of the both read1 and read2, all other ends:\ - \ local alignment" - info: null - example: - - "Local" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--alignEndsProtrude" - description: "allow protrusion of alignment ends, i.e. start (end) of the +strand\ - \ mate downstream of the start (end) of the -strand mate\n\n1st word: int:\ - \ maximum number of protrusion bases allowed\n2nd word: string:\n- \ - \ ConcordantPair ... report alignments with non-zero protrusion\ - \ as concordant pairs\n- DiscordantPair ... report alignments\ - \ with non-zero protrusion as discordant pairs" - info: null - example: - - "0 ConcordantPair" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--alignSoftClipAtReferenceEnds" - description: "allow the soft-clipping of the alignments past the end of the\ - \ chromosomes\n\n- Yes ... allow\n- No ... prohibit, useful for compatibility\ - \ with Cufflinks" - info: null - example: - - "Yes" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--alignInsertionFlush" - description: "how to flush ambiguous insertion positions\n\n- None ... insertions\ - \ are not flushed\n- Right ... insertions are flushed to the right" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Paired-End reads" - arguments: - - type: "integer" - name: "--peOverlapNbasesMin" - description: "minimum number of overlapping bases to trigger mates merging and\ - \ realignment. Specify >0 value to switch on the \"merginf of overlapping\ - \ mates\" algorithm." - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--peOverlapMMp" - description: "maximum proportion of mismatched bases in the overlap area" - info: null - example: - - 0.01 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Windows, Anchors, Binning" - arguments: - - type: "integer" - name: "--winAnchorMultimapNmax" - description: "max number of loci anchors are allowed to map to" - info: null - example: - - 50 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--winBinNbits" - description: "=log2(winBin), where winBin is the size of the bin for the windows/clustering,\ - \ each window will occupy an integer number of bins." - info: null - example: - - 16 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--winAnchorDistNbins" - description: "max number of bins between two anchors that allows aggregation\ - \ of anchors into one window" - info: null - example: - - 9 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--winFlankNbins" - description: "log2(winFlank), where win Flank is the size of the left and right\ - \ flanking regions for each window" - info: null - example: - - 4 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--winReadCoverageRelativeMin" - description: "minimum relative coverage of the read sequence by the seeds in\ - \ a window, for STARlong algorithm only." - info: null - example: - - 0.5 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--winReadCoverageBasesMin" - description: "minimum number of bases covered by the seeds in a window , for\ - \ STARlong algorithm only." - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Chimeric Alignments" - arguments: - - type: "string" - name: "--chimOutType" - description: "type of chimeric output\n\n- Junctions ... Chimeric.out.junction\n\ - - SeparateSAMold ... output old SAM into separate Chimeric.out.sam file\n\ - - WithinBAM ... output into main aligned BAM files (Aligned.*.bam)\n\ - - WithinBAM HardClip ... (default) hard-clipping in the CIGAR for supplemental\ - \ chimeric alignments (default if no 2nd word is present)\n- WithinBAM SoftClip\ - \ ... soft-clipping in the CIGAR for supplemental chimeric alignments" - info: null - example: - - "Junctions" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--chimSegmentMin" - description: "minimum length of chimeric segment length, if ==0, no chimeric\ - \ output" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimScoreMin" - description: "minimum total (summed) score of the chimeric segments" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimScoreDropMax" - description: "max drop (difference) of chimeric score (the sum of scores of\ - \ all chimeric segments) from the read length" - info: null - example: - - 20 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimScoreSeparation" - description: "minimum difference (separation) between the best chimeric score\ - \ and the next one" - info: null - example: - - 10 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimScoreJunctionNonGTAG" - description: "penalty for a non-GT/AG chimeric junction" - info: null - example: - - -1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimJunctionOverhangMin" - description: "minimum overhang for a chimeric junction" - info: null - example: - - 20 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimSegmentReadGapMax" - description: "maximum gap in the read sequence between chimeric segments" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--chimFilter" - description: "different filters for chimeric alignments\n\n- None ... no filtering\n\ - - banGenomicN ... Ns are not allowed in the genome sequence around the chimeric\ - \ junction" - info: null - example: - - "banGenomicN" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--chimMainSegmentMultNmax" - description: "maximum number of multi-alignments for the main chimeric segment.\ - \ =1 will prohibit multimapping main segments." - info: null - example: - - 10 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimMultimapNmax" - description: "maximum number of chimeric multi-alignments\n\n- 0 ... use the\ - \ old scheme for chimeric detection which only considered unique alignments" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimMultimapScoreRange" - description: "the score range for multi-mapping chimeras below the best chimeric\ - \ score. Only works with --chimMultimapNmax > 1" - info: null - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimNonchimScoreDropMin" - description: "to trigger chimeric detection, the drop in the best non-chimeric\ - \ alignment score with respect to the read length has to be greater than this\ - \ value" - info: null - example: - - 20 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimOutJunctionFormat" - description: "formatting type for the Chimeric.out.junction file\n\n- 0 ...\ - \ no comment lines/headers\n- 1 ... comment lines at the end of the file:\ - \ command line and Nreads: total, unique/multi-mapping" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Quantification of Annotations" - arguments: - - type: "string" - name: "--quantMode" - description: "types of quantification requested\n\n- - ... none\n\ - - TranscriptomeSAM ... output SAM/BAM alignments to transcriptome into a separate\ - \ file\n- GeneCounts ... count reads per gene" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--quantTranscriptomeBAMcompression" - description: "-2 to 10 transcriptome BAM compression level\n\n- -2 ... no\ - \ BAM output\n- -1 ... default compression (6?)\n- 0 ... no compression\n\ - - 10 ... maximum compression" - info: null - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--quantTranscriptomeBan" - description: "prohibit various alignment type\n\n- IndelSoftclipSingleend ...\ - \ prohibit indels, soft clipping and single-end alignments - compatible with\ - \ RSEM\n- Singleend ... prohibit single-end alignments" - info: null - example: - - "IndelSoftclipSingleend" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "2-pass Mapping" - arguments: - - type: "string" - name: "--twopassMode" - description: "2-pass mapping mode.\n\n- None ... 1-pass mapping\n- Basic\ - \ ... basic 2-pass mapping, with all 1st pass junctions inserted into\ - \ the genome indices on the fly" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--twopass1readsN" - description: "number of reads to process for the 1st step. Use very large number\ - \ (or default -1) to map all reads in the first step." - info: null - example: - - -1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "WASP parameters" - arguments: - - type: "string" - name: "--waspOutputMode" - description: "WASP allele-specific output type. This is re-implementation of\ - \ the original WASP mappability filtering by Bryce van de Geijn, Graham McVicker,\ - \ Yoav Gilad & Jonathan K Pritchard. Please cite the original WASP paper:\ - \ Nature Methods 12, 1061-1063 (2015), https://www.nature.com/articles/nmeth.3582\ - \ .\n\n- SAMtag ... add WASP tags to the alignments that pass WASP filtering" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "STARsolo (single cell RNA-seq) parameters" - arguments: - - type: "string" - name: "--soloType" - description: "type of single-cell RNA-seq\n\n- CB_UMI_Simple ... (a.k.a. Droplet)\ - \ one UMI and one Cell Barcode of fixed length in read2, e.g. Drop-seq and\ - \ 10X Chromium.\n- CB_UMI_Complex ... multiple Cell Barcodes of varying length,\ - \ one UMI of fixed length and one adapter sequence of fixed length are allowed\ - \ in read2 only (e.g. inDrop, ddSeq).\n- CB_samTagOut ... output Cell Barcode\ - \ as CR and/or CB SAm tag. No UMI counting. --readFilesIn cDNA_read1 [cDNA_read2\ - \ if paired-end] CellBarcode_read . Requires --outSAMtype BAM Unsorted [and/or\ - \ SortedByCoordinate]\n- SmartSeq ... Smart-seq: each cell in a separate\ - \ FASTQ (paired- or single-end), barcodes are corresponding read-groups, no\ - \ UMI sequences, alignments deduplicated according to alignment start and\ - \ end (after extending soft-clipped bases)" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloCBwhitelist" - description: "file(s) with whitelist(s) of cell barcodes. Only --soloType CB_UMI_Complex\ - \ allows more than one whitelist file.\n\n- None ... no whitelist:\ - \ all cell barcodes are allowed" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--soloCBstart" - description: "cell barcode start base" - info: null - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--soloCBlen" - description: "cell barcode length" - info: null - example: - - 16 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--soloUMIstart" - description: "UMI start base" - info: null - example: - - 17 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--soloUMIlen" - description: "UMI length" - info: null - example: - - 10 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--soloBarcodeReadLength" - description: "length of the barcode read\n\n- 1 ... equal to sum of soloCBlen+soloUMIlen\n\ - - 0 ... not defined, do not check" - info: null - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--soloBarcodeMate" - description: "identifies which read mate contains the barcode (CB+UMI) sequence\n\ - \n- 0 ... barcode sequence is on separate read, which should always be the\ - \ last file in the --readFilesIn listed\n- 1 ... barcode sequence is a part\ - \ of mate 1\n- 2 ... barcode sequence is a part of mate 2" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--soloCBposition" - description: "position of Cell Barcode(s) on the barcode read.\n\nPresently\ - \ only works with --soloType CB_UMI_Complex, and barcodes are assumed to be\ - \ on Read2.\nFormat for each barcode: startAnchor_startPosition_endAnchor_endPosition\n\ - start(end)Anchor defines the Anchor Base for the CB: 0: read start; 1: read\ - \ end; 2: adapter start; 3: adapter end\nstart(end)Position is the 0-based\ - \ position with of the CB start(end) with respect to the Anchor Base\nString\ - \ for different barcodes are separated by space.\nExample: inDrop (Zilionis\ - \ et al, Nat. Protocols, 2017):\n--soloCBposition 0_0_2_-1 3_1_3_8" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloUMIposition" - description: "position of the UMI on the barcode read, same as soloCBposition\n\ - \nExample: inDrop (Zilionis et al, Nat. Protocols, 2017):\n--soloCBposition\ - \ 3_9_3_14" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--soloAdapterSequence" - description: "adapter sequence to anchor barcodes. Only one adapter sequence\ - \ is allowed." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--soloAdapterMismatchesNmax" - description: "maximum number of mismatches allowed in adapter sequence." - info: null - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--soloCBmatchWLtype" - description: "matching the Cell Barcodes to the WhiteList\n\n- Exact \ - \ ... only exact matches allowed\n- 1MM \ - \ ... only one match in whitelist with 1 mismatched base allowed.\ - \ Allowed CBs have to have at least one read with exact match.\n- 1MM_multi\ - \ ... multiple matches in whitelist with 1 mismatched\ - \ base allowed, posterior probability calculation is used choose one of the\ - \ matches.\nAllowed CBs have to have at least one read with exact match. This\ - \ option matches best with CellRanger 2.2.0\n- 1MM_multi_pseudocounts \ - \ ... same as 1MM_Multi, but pseudocounts of 1 are added to all whitelist\ - \ barcodes.\n- 1MM_multi_Nbase_pseudocounts ... same as 1MM_multi_pseudocounts,\ - \ multimatching to WL is allowed for CBs with N-bases. This option matches\ - \ best with CellRanger >= 3.0.0\n- EditDist_2 ... allow\ - \ up to edit distance of 3 fpr each of the barcodes. May include one deletion\ - \ + one insertion. Only works with --soloType CB_UMI_Complex. Matches to multiple\ - \ passlist barcdoes are not allowed. Similar to ParseBio Split-seq pipeline." - info: null - example: - - "1MM_multi" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--soloInputSAMattrBarcodeSeq" - description: "when inputting reads from a SAM file (--readsFileType SAM SE/PE),\ - \ these SAM attributes mark the barcode sequence (in proper order).\n\nFor\ - \ instance, for 10X CellRanger or STARsolo BAMs, use --soloInputSAMattrBarcodeSeq\ - \ CR UR .\nThis parameter is required when running STARsolo with input from\ - \ SAM." - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloInputSAMattrBarcodeQual" - description: "when inputting reads from a SAM file (--readsFileType SAM SE/PE),\ - \ these SAM attributes mark the barcode qualities (in proper order).\n\nFor\ - \ instance, for 10X CellRanger or STARsolo BAMs, use --soloInputSAMattrBarcodeQual\ - \ CY UY .\nIf this parameter is '-' (default), the quality 'H' will be assigned\ - \ to all bases." - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloStrand" - description: "strandedness of the solo libraries:\n\n- Unstranded ... no strand\ - \ information\n- Forward ... read strand same as the original RNA molecule\n\ - - Reverse ... read strand opposite to the original RNA molecule" - info: null - example: - - "Forward" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--soloFeatures" - description: "genomic features for which the UMI counts per Cell Barcode are\ - \ collected\n\n- Gene ... genes: reads match the gene transcript\n\ - - SJ ... splice junctions: reported in SJ.out.tab\n- GeneFull\ - \ ... full gene (pre-mRNA): count all reads overlapping genes' exons\ - \ and introns\n- GeneFull_ExonOverIntron ... full gene (pre-mRNA): count all\ - \ reads overlapping genes' exons and introns: prioritize 100% overlap with\ - \ exons\n- GeneFull_Ex50pAS ... full gene (pre-RNA): count all reads\ - \ overlapping genes' exons and introns: prioritize >50% overlap with exons.\ - \ Do not count reads with 100% exonic overlap in the antisense direction." - info: null - example: - - "Gene" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloMultiMappers" - description: "counting method for reads mapping to multiple genes\n\n- Unique\ - \ ... count only reads that map to unique genes\n- Uniform ... uniformly\ - \ distribute multi-genic UMIs to all genes\n- Rescue ... distribute UMIs\ - \ proportionally to unique+uniform counts (~ first iteration of EM)\n- PropUnique\ - \ ... distribute UMIs proportionally to unique mappers, if present, and uniformly\ - \ if not.\n- EM ... multi-gene UMIs are distributed using Expectation\ - \ Maximization algorithm" - info: null - example: - - "Unique" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloUMIdedup" - description: "type of UMI deduplication (collapsing) algorithm\n\n- 1MM_All\ - \ ... all UMIs with 1 mismatch distance to each other\ - \ are collapsed (i.e. counted once).\n- 1MM_Directional_UMItools ... follows\ - \ the \"directional\" method from the UMI-tools by Smith, Heger and Sudbery\ - \ (Genome Research 2017).\n- 1MM_Directional ... same as 1MM_Directional_UMItools,\ - \ but with more stringent criteria for duplicate UMIs\n- Exact \ - \ ... only exactly matching UMIs are collapsed.\n- NoDedup \ - \ ... no deduplication of UMIs, count all reads.\n- 1MM_CR\ - \ ... CellRanger2-4 algorithm for 1MM UMI collapsing." - info: null - example: - - "1MM_All" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloUMIfiltering" - description: "type of UMI filtering (for reads uniquely mapping to genes)\n\n\ - - - ... basic filtering: remove UMIs with N and homopolymers\ - \ (similar to CellRanger 2.2.0).\n- MultiGeneUMI ... basic + remove\ - \ lower-count UMIs that map to more than one gene.\n- MultiGeneUMI_All ...\ - \ basic + remove all UMIs that map to more than one gene.\n- MultiGeneUMI_CR\ - \ ... basic + remove lower-count UMIs that map to more than one gene, matching\ - \ CellRanger > 3.0.0 .\nOnly works with --soloUMIdedup 1MM_CR" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloOutFileNames" - description: "file names for STARsolo output:\n\nfile_name_prefix gene_names\ - \ barcode_sequences cell_feature_count_matrix" - info: null - example: - - "Solo.out/" - - "features.tsv" - - "barcodes.tsv" - - "matrix.mtx" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloCellFilter" - description: "cell filtering type and parameters\n\n- None ... do\ - \ not output filtered cells\n- TopCells ... only report top cells by\ - \ UMI count, followed by the exact number of cells\n- CellRanger2.2 ...\ - \ simple filtering of CellRanger 2.2.\nCan be followed by numbers: number\ - \ of expected cells, robust maximum percentile for UMI count, maximum to minimum\ - \ ratio for UMI count\nThe harcoded values are from CellRanger: nExpectedCells=3000;\ - \ maxPercentile=0.99; maxMinRatio=10\n- EmptyDrops_CR ... EmptyDrops filtering\ - \ in CellRanger flavor. Please cite the original EmptyDrops paper: A.T.L Lun\ - \ et al, Genome Biology, 20, 63 (2019): https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1662-y\n\ - Can be followed by 10 numeric parameters: nExpectedCells maxPercentile\ - \ maxMinRatio indMin indMax umiMin umiMinFracMedian candMaxN \ - \ FDR simN\nThe harcoded values are from CellRanger: 3000 \ - \ 0.99 10 45000 90000 500 0.01\ - \ 20000 0.01 10000" - info: null - example: - - "CellRanger2.2" - - "3000" - - "0.99" - - "10" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloOutFormatFeaturesGeneField3" - description: "field 3 in the Gene features.tsv file. If \"-\", then no 3rd field\ - \ is output." - info: null - example: - - "Gene Expression" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloCellReadStats" - description: "Output reads statistics for each CB\n\n- Standard ... standard\ - \ output" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Align fastq files using STAR." - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/cellranger_tiny_fastq" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "docker" - env: - - "STAR_VERSION 2.7.10b" - - "PACKAGES gcc g++ make wget zlib1g-dev unzip" - - type: "docker" - run: - - "apt-get update && \\\n apt-get install -y --no-install-recommends ${PACKAGES}\ - \ && \\\n cd /tmp && \\\n wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip\ - \ && \\\n unzip ${STAR_VERSION}.zip && \\\n cd STAR-${STAR_VERSION}/source\ - \ && \\\n make STARstatic CXXFLAGS_SIMD=-std=c++11 && \\\n cp STAR /usr/local/bin\ - \ && \\\n cd / && \\\n rm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip\ - \ && \\\n apt-get --purge autoremove -y ${PACKAGES} && \\\n apt-get clean\n" - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highmem" - - "highcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/mapping/star_align/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/star_align" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/star_align/star_align" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/mapping/star_align/main.nf b/target/nextflow/mapping/star_align/main.nf deleted file mode 100644 index a01860e8fa0..00000000000 --- a/target/nextflow/mapping/star_align/main.nf +++ /dev/null @@ -1,5287 +0,0 @@ -// star_align 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Angela Oliveira Pisco (author) -// * Robrecht Cannoodt (author, maintainer) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "star_align", - "namespace" : "mapping", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Angela Oliveira Pisco", - "roles" : [ - "author" - ], - "info" : { - "role" : "Contributor", - "links" : { - "github" : "aopisco", - "orcid" : "0000-0003-0142-2355", - "linkedin" : "aopisco" - }, - "organizations" : [ - { - "name" : "Insitro", - "href" : "https://insitro.com", - "role" : "Director of Computational Biology" - }, - { - "name" : "Open Problems", - "href" : "https://openproblems.bio", - "role" : "Core Member" - } - ] - } - }, - { - "name" : "Robrecht Cannoodt", - "roles" : [ - "author", - "maintainer" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "robrecht@data-intuitive.com", - "github" : "rcannood", - "orcid" : "0000-0003-3641-729X", - "linkedin" : "robrechtcannoodt" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Science Engineer" - }, - { - "name" : "Open Problems", - "href" : "https://openproblems.bio", - "role" : "Core Member" - } - ] - } - } - ], - "argument_groups" : [ - { - "name" : "Input/Output", - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "alternatives" : [ - "--readFilesIn" - ], - "description" : "The FASTQ files to be analyzed. Corresponds to the --readFilesIn argument in the STAR command.", - "example" : [ - "mysample_S1_L001_R1_001.fastq.gz", - "mysample_S1_L001_R2_001.fastq.gz" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--reference", - "alternatives" : [ - "--genomeDir" - ], - "description" : "Path to the reference built by star_build_reference. Corresponds to the --genomeDir argument in the STAR command.", - "example" : [ - "/path/to/reference" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--output", - "alternatives" : [ - "--outFileNamePrefix" - ], - "description" : "Path to output directory. Corresponds to the --outFileNamePrefix argument in the STAR command.", - "example" : [ - "/path/to/foo" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Run Parameters", - "arguments" : [ - { - "type" : "integer", - "name" : "--runRNGseed", - "description" : "random number generator seed.", - "example" : [ - 777 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Genome Parameters", - "arguments" : [ - { - "type" : "string", - "name" : "--genomeLoad", - "description" : "mode of shared memory usage for the genome files. Only used with --runMode alignReads.\n\n- LoadAndKeep ... load genome into shared and keep it in memory after run\n- LoadAndRemove ... load genome into shared but remove it after run\n- LoadAndExit ... load genome into shared memory and exit, keeping the genome in memory for future runs\n- Remove ... do not map anything, just remove loaded genome from memory\n- NoSharedMemory ... do not use shared memory, each job will have its own private copy of the genome", - "example" : [ - "NoSharedMemory" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--genomeFastaFiles", - "description" : "path(s) to the fasta files with the genome sequences, separated by spaces. These files should be plain text FASTA files, they *cannot* be zipped.\n\nRequired for the genome generation (--runMode genomeGenerate). Can also be used in the mapping (--runMode alignReads) to add extra (new) sequences to the genome (e.g. spike-ins).", - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--genomeFileSizes", - "description" : "genome files exact sizes in bytes. Typically, this should not be defined by the user.", - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--genomeTransformOutput", - "description" : "which output to transform back to original genome\n\n- SAM ... SAM/BAM alignments\n- SJ ... splice junctions (SJ.out.tab)\n- None ... no transformation of the output", - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--genomeChrSetMitochondrial", - "description" : "names of the mitochondrial chromosomes. Presently only used for STARsolo statistics output/", - "example" : [ - "chrM", - "M", - "MT" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - } - ] - }, - { - "name" : "Splice Junctions Database", - "arguments" : [ - { - "type" : "string", - "name" : "--sjdbFileChrStartEnd", - "description" : "path to the files with genomic coordinates (chr start end strand) for the splice junction introns. Multiple files can be supplied and will be concatenated.", - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--sjdbGTFfile", - "description" : "path to the GTF file with annotations", - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--sjdbGTFchrPrefix", - "description" : "prefix for chromosome names in a GTF file (e.g. 'chr' for using ENSMEBL annotations with UCSC genomes)", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--sjdbGTFfeatureExon", - "description" : "feature type in GTF file to be used as exons for building transcripts", - "example" : [ - "exon" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--sjdbGTFtagExonParentTranscript", - "description" : "GTF attribute name for parent transcript ID (default \\"transcript_id\\" works for GTF files)", - "example" : [ - "transcript_id" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--sjdbGTFtagExonParentGene", - "description" : "GTF attribute name for parent gene ID (default \\"gene_id\\" works for GTF files)", - "example" : [ - "gene_id" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--sjdbGTFtagExonParentGeneName", - "description" : "GTF attribute name for parent gene name", - "example" : [ - "gene_name" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--sjdbGTFtagExonParentGeneType", - "description" : "GTF attribute name for parent gene type", - "example" : [ - "gene_type", - "gene_biotype" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--sjdbOverhang", - "description" : "length of the donor/acceptor sequence on each side of the junctions, ideally = (mate_length - 1)", - "example" : [ - 100 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--sjdbScore", - "description" : "extra alignment score for alignments that cross database junctions", - "example" : [ - 2 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--sjdbInsertSave", - "description" : "which files to save when sjdb junctions are inserted on the fly at the mapping step\n\n- Basic ... only small junction / transcript files\n- All ... all files including big Genome, SA and SAindex - this will create a complete genome directory", - "example" : [ - "Basic" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Variation parameters", - "arguments" : [ - { - "type" : "string", - "name" : "--varVCFfile", - "description" : "path to the VCF file that contains variation data. The 10th column should contain the genotype information, e.g. 0/1", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Read Parameters", - "arguments" : [ - { - "type" : "string", - "name" : "--readFilesType", - "description" : "format of input read files\n\n- Fastx ... FASTA or FASTQ\n- SAM SE ... SAM or BAM single-end reads; for BAM use --readFilesCommand samtools view\n- SAM PE ... SAM or BAM paired-end reads; for BAM use --readFilesCommand samtools view", - "example" : [ - "Fastx" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--readFilesSAMattrKeep", - "description" : "for --readFilesType SAM SE/PE, which SAM tags to keep in the output BAM, e.g.: --readFilesSAMtagsKeep RG PL\n\n- All ... keep all tags\n- None ... do not keep any tags", - "example" : [ - "All" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--readFilesManifest", - "description" : "path to the \\"manifest\\" file with the names of read files. The manifest file should contain 3 tab-separated columns:\n\npaired-end reads: read1_file_name $tab$ read2_file_name $tab$ read_group_line.\nsingle-end reads: read1_file_name $tab$ - $tab$ read_group_line.\nSpaces, but not tabs are allowed in file names.\nIf read_group_line does not start with ID:, it can only contain one ID field, and ID: will be added to it.\nIf read_group_line starts with ID:, it can contain several fields separated by $tab$, and all fields will be be copied verbatim into SAM @RG header line.", - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--readFilesPrefix", - "description" : "prefix for the read files names, i.e. it will be added in front of the strings in --readFilesIn", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--readFilesCommand", - "description" : "command line to execute for each of the input file. This command should generate FASTA or FASTQ text and send it to stdout\n\nFor example: zcat - to uncompress .gz files, bzcat - to uncompress .bz2 files, etc.", - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--readMapNumber", - "description" : "number of reads to map from the beginning of the file\n\n-1: map all reads", - "example" : [ - -1 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--readMatesLengthsIn", - "description" : "Equal/NotEqual - lengths of names,sequences,qualities for both mates are the same / not the same. NotEqual is safe in all situations.", - "example" : [ - "NotEqual" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--readNameSeparator", - "description" : "character(s) separating the part of the read names that will be trimmed in output (read name after space is always trimmed)", - "example" : [ - "/" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--readQualityScoreBase", - "description" : "number to be subtracted from the ASCII code to get Phred quality score", - "example" : [ - 33 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Read Clipping", - "arguments" : [ - { - "type" : "string", - "name" : "--clipAdapterType", - "description" : "adapter clipping type\n\n- Hamming ... adapter clipping based on Hamming distance, with the number of mismatches controlled by --clip5pAdapterMMp\n- CellRanger4 ... 5p and 3p adapter clipping similar to CellRanger4. Utilizes Opal package by Martin Sosic: https://github.com/Martinsos/opal\n- None ... no adapter clipping, all other clip* parameters are disregarded", - "example" : [ - "Hamming" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--clip3pNbases", - "description" : "number(s) of bases to clip from 3p of each mate. If one value is given, it will be assumed the same for both mates.", - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--clip3pAdapterSeq", - "description" : "adapter sequences to clip from 3p of each mate. If one value is given, it will be assumed the same for both mates.\n\n- polyA ... polyA sequence with the length equal to read length", - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--clip3pAdapterMMp", - "description" : "max proportion of mismatches for 3p adapter clipping for each mate. If one value is given, it will be assumed the same for both mates.", - "example" : [ - 0.1 - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--clip3pAfterAdapterNbases", - "description" : "number of bases to clip from 3p of each mate after the adapter clipping. If one value is given, it will be assumed the same for both mates.", - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--clip5pNbases", - "description" : "number(s) of bases to clip from 5p of each mate. If one value is given, it will be assumed the same for both mates.", - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - } - ] - }, - { - "name" : "Limits", - "arguments" : [ - { - "type" : "long", - "name" : "--limitGenomeGenerateRAM", - "description" : "maximum available RAM (bytes) for genome generation", - "example" : [ - 31000000000 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "long", - "name" : "--limitIObufferSize", - "description" : "max available buffers size (bytes) for input/output, per thread", - "example" : [ - 30000000, - 50000000 - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "long", - "name" : "--limitOutSAMoneReadBytes", - "description" : "max size of the SAM record (bytes) for one read. Recommended value: >(2*(LengthMate1+LengthMate2+100)*outFilterMultimapNmax", - "example" : [ - 100000 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--limitOutSJoneRead", - "description" : "max number of junctions for one read (including all multi-mappers)", - "example" : [ - 1000 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--limitOutSJcollapsed", - "description" : "max number of collapsed junctions", - "example" : [ - 1000000 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "long", - "name" : "--limitBAMsortRAM", - "description" : "maximum available RAM (bytes) for sorting BAM. If =0, it will be set to the genome index size. 0 value can only be used with --genomeLoad NoSharedMemory option.", - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--limitSjdbInsertNsj", - "description" : "maximum number of junctions to be inserted to the genome on the fly at the mapping stage, including those from annotations and those detected in the 1st step of the 2-pass run", - "example" : [ - 1000000 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--limitNreadsSoft", - "description" : "soft limit on the number of reads", - "example" : [ - -1 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Output: general", - "arguments" : [ - { - "type" : "string", - "name" : "--outTmpKeep", - "description" : "whether to keep the temporary files after STAR runs is finished\n\n- None ... remove all temporary files\n- All ... keep all files", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outStd", - "description" : "which output will be directed to stdout (standard out)\n\n- Log ... log messages\n- SAM ... alignments in SAM format (which normally are output to Aligned.out.sam file), normal standard output will go into Log.std.out\n- BAM_Unsorted ... alignments in BAM format, unsorted. Requires --outSAMtype BAM Unsorted\n- BAM_SortedByCoordinate ... alignments in BAM format, sorted by coordinate. Requires --outSAMtype BAM SortedByCoordinate\n- BAM_Quant ... alignments to transcriptome in BAM format, unsorted. Requires --quantMode TranscriptomeSAM", - "example" : [ - "Log" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outReadsUnmapped", - "description" : "output of unmapped and partially mapped (i.e. mapped only one mate of a paired end read) reads in separate file(s).\n\n- None ... no output\n- Fastx ... output in separate fasta/fastq files, Unmapped.out.mate1/2", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outQSconversionAdd", - "description" : "add this number to the quality score (e.g. to convert from Illumina to Sanger, use -31)", - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outMultimapperOrder", - "description" : "order of multimapping alignments in the output files\n\n- Old_2.4 ... quasi-random order used before 2.5.0\n- Random ... random order of alignments for each multi-mapper. Read mates (pairs) are always adjacent, all alignment for each read stay together. This option will become default in the future releases.", - "example" : [ - "Old_2.4" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Output: SAM and BAM", - "arguments" : [ - { - "type" : "string", - "name" : "--outSAMtype", - "description" : "type of SAM/BAM output\n\n1st word:\n- BAM ... output BAM without sorting\n- SAM ... output SAM without sorting\n- None ... no SAM/BAM output\n2nd, 3rd:\n- Unsorted ... standard unsorted\n- SortedByCoordinate ... sorted by coordinate. This option will allocate extra memory for sorting which can be specified by --limitBAMsortRAM.", - "example" : [ - "SAM" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outSAMmode", - "description" : "mode of SAM output\n\n- None ... no SAM output\n- Full ... full SAM output\n- NoQS ... full SAM but without quality scores", - "example" : [ - "Full" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outSAMstrandField", - "description" : "Cufflinks-like strand field flag\n\n- None ... not used\n- intronMotif ... strand derived from the intron motif. This option changes the output alignments: reads with inconsistent and/or non-canonical introns are filtered out.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outSAMattributes", - "description" : "a string of desired SAM attributes, in the order desired for the output SAM. Tags can be listed in any combination/order.\n\n***Presets:\n- None ... no attributes\n- Standard ... NH HI AS nM\n- All ... NH HI AS nM NM MD jM jI MC ch\n***Alignment:\n- NH ... number of loci the reads maps to: =1 for unique mappers, >1 for multimappers. Standard SAM tag.\n- HI ... multiple alignment index, starts with --outSAMattrIHstart (=1 by default). Standard SAM tag.\n- AS ... local alignment score, +1/-1 for matches/mismateches, score* penalties for indels and gaps. For PE reads, total score for two mates. Stadnard SAM tag.\n- nM ... number of mismatches. For PE reads, sum over two mates.\n- NM ... edit distance to the reference (number of mismatched + inserted + deleted bases) for each mate. Standard SAM tag.\n- MD ... string encoding mismatched and deleted reference bases (see standard SAM specifications). Standard SAM tag.\n- jM ... intron motifs for all junctions (i.e. N in CIGAR): 0: non-canonical; 1: GT/AG, 2: CT/AC, 3: GC/AG, 4: CT/GC, 5: AT/AC, 6: GT/AT. If splice junctions database is used, and a junction is annotated, 20 is added to its motif value.\n- jI ... start and end of introns for all junctions (1-based).\n- XS ... alignment strand according to --outSAMstrandField.\n- MC ... mate's CIGAR string. Standard SAM tag.\n- ch ... marks all segment of all chimeric alingments for --chimOutType WithinBAM output.\n- cN ... number of bases clipped from the read ends: 5' and 3'\n***Variation:\n- vA ... variant allele\n- vG ... genomic coordinate of the variant overlapped by the read.\n- vW ... 1 - alignment passes WASP filtering; 2,3,4,5,6,7 - alignment does not pass WASP filtering. Requires --waspOutputMode SAMtag.\n***STARsolo:\n- CR CY UR UY ... sequences and quality scores of cell barcodes and UMIs for the solo* demultiplexing.\n- GX GN ... gene ID and gene name for unique-gene reads.\n- gx gn ... gene IDs and gene names for unique- and multi-gene reads.\n- CB UB ... error-corrected cell barcodes and UMIs for solo* demultiplexing. Requires --outSAMtype BAM SortedByCoordinate.\n- sM ... assessment of CB and UMI.\n- sS ... sequence of the entire barcode (CB,UMI,adapter).\n- sQ ... quality of the entire barcode.\n***Unsupported/undocumented:\n- ha ... haplotype (1/2) when mapping to the diploid genome. Requires genome generated with --genomeTransformType Diploid .\n- rB ... alignment block read/genomic coordinates.\n- vR ... read coordinate of the variant.", - "example" : [ - "Standard" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outSAMattrIHstart", - "description" : "start value for the IH attribute. 0 may be required by some downstream software, such as Cufflinks or StringTie.", - "example" : [ - 1 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outSAMunmapped", - "description" : "output of unmapped reads in the SAM format\n\n1st word:\n- None ... no output\n- Within ... output unmapped reads within the main SAM file (i.e. Aligned.out.sam)\n2nd word:\n- KeepPairs ... record unmapped mate for each alignment, and, in case of unsorted output, keep it adjacent to its mapped mate. Only affects multi-mapping reads.", - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outSAMorder", - "description" : "type of sorting for the SAM output\n\nPaired: one mate after the other for all paired alignments\nPairedKeepInputOrder: one mate after the other for all paired alignments, the order is kept the same as in the input FASTQ files", - "example" : [ - "Paired" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outSAMprimaryFlag", - "description" : "which alignments are considered primary - all others will be marked with 0x100 bit in the FLAG\n\n- OneBestScore ... only one alignment with the best score is primary\n- AllBestScore ... all alignments with the best score are primary", - "example" : [ - "OneBestScore" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outSAMreadID", - "description" : "read ID record type\n\n- Standard ... first word (until space) from the FASTx read ID line, removing /1,/2 from the end\n- Number ... read number (index) in the FASTx file", - "example" : [ - "Standard" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outSAMmapqUnique", - "description" : "0 to 255: the MAPQ value for unique mappers", - "example" : [ - 255 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outSAMflagOR", - "description" : "0 to 65535: sam FLAG will be bitwise OR'd with this value, i.e. FLAG=FLAG | outSAMflagOR. This is applied after all flags have been set by STAR, and after outSAMflagAND. Can be used to set specific bits that are not set otherwise.", - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outSAMflagAND", - "description" : "0 to 65535: sam FLAG will be bitwise AND'd with this value, i.e. FLAG=FLAG & outSAMflagOR. This is applied after all flags have been set by STAR, but before outSAMflagOR. Can be used to unset specific bits that are not set otherwise.", - "example" : [ - 65535 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outSAMattrRGline", - "description" : "SAM/BAM read group line. The first word contains the read group identifier and must start with \\"ID:\\", e.g. --outSAMattrRGline ID:xxx CN:yy \\"DS:z z z\\".\n\nxxx will be added as RG tag to each output alignment. Any spaces in the tag values have to be double quoted.\nComma separated RG lines correspons to different (comma separated) input files in --readFilesIn. Commas have to be surrounded by spaces, e.g.\n--outSAMattrRGline ID:xxx , ID:zzz \\"DS:z z\\" , ID:yyy DS:yyyy", - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outSAMheaderHD", - "description" : "@HD (header) line of the SAM header", - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outSAMheaderPG", - "description" : "extra @PG (software) line of the SAM header (in addition to STAR)", - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outSAMheaderCommentFile", - "description" : "path to the file with @CO (comment) lines of the SAM header", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outSAMfilter", - "description" : "filter the output into main SAM/BAM files\n\n- KeepOnlyAddedReferences ... only keep the reads for which all alignments are to the extra reference sequences added with --genomeFastaFiles at the mapping stage.\n- KeepAllAddedReferences ... keep all alignments to the extra reference sequences added with --genomeFastaFiles at the mapping stage.", - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outSAMmultNmax", - "description" : "max number of multiple alignments for a read that will be output to the SAM/BAM files. Note that if this value is not equal to -1, the top scoring alignment will be output first\n\n- -1 ... all alignments (up to --outFilterMultimapNmax) will be output", - "example" : [ - -1 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outSAMtlen", - "description" : "calculation method for the TLEN field in the SAM/BAM files\n\n- 1 ... leftmost base of the (+)strand mate to rightmost base of the (-)mate. (+)sign for the (+)strand mate\n- 2 ... leftmost base of any mate to rightmost base of any mate. (+)sign for the mate with the leftmost base. This is different from 1 for overlapping mates with protruding ends", - "example" : [ - 1 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outBAMcompression", - "description" : "-1 to 10 BAM compression level, -1=default compression (6?), 0=no compression, 10=maximum compression", - "example" : [ - 1 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outBAMsortingThreadN", - "description" : ">=0: number of threads for BAM sorting. 0 will default to min(6,--runThreadN).", - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outBAMsortingBinsN", - "description" : ">0: number of genome bins for coordinate-sorting", - "example" : [ - 50 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "BAM processing", - "arguments" : [ - { - "type" : "string", - "name" : "--bamRemoveDuplicatesType", - "description" : "mark duplicates in the BAM file, for now only works with (i) sorted BAM fed with inputBAMfile, and (ii) for paired-end alignments only\n\n- - ... no duplicate removal/marking\n- UniqueIdentical ... mark all multimappers, and duplicate unique mappers. The coordinates, FLAG, CIGAR must be identical\n- UniqueIdenticalNotMulti ... mark duplicate unique mappers but not multimappers.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--bamRemoveDuplicatesMate2basesN", - "description" : "number of bases from the 5' of mate 2 to use in collapsing (e.g. for RAMPAGE)", - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Output Wiggle", - "arguments" : [ - { - "type" : "string", - "name" : "--outWigType", - "description" : "type of signal output, e.g. \\"bedGraph\\" OR \\"bedGraph read1_5p\\". Requires sorted BAM: --outSAMtype BAM SortedByCoordinate .\n\n1st word:\n- None ... no signal output\n- bedGraph ... bedGraph format\n- wiggle ... wiggle format\n2nd word:\n- read1_5p ... signal from only 5' of the 1st read, useful for CAGE/RAMPAGE etc\n- read2 ... signal from only 2nd read", - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outWigStrand", - "description" : "strandedness of wiggle/bedGraph output\n\n- Stranded ... separate strands, str1 and str2\n- Unstranded ... collapsed strands", - "example" : [ - "Stranded" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outWigReferencesPrefix", - "description" : "prefix matching reference names to include in the output wiggle file, e.g. \\"chr\\", default \\"-\\" - include all references", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outWigNorm", - "description" : "type of normalization for the signal\n\n- RPM ... reads per million of mapped reads\n- None ... no normalization, \\"raw\\" counts", - "example" : [ - "RPM" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Output Filtering", - "arguments" : [ - { - "type" : "string", - "name" : "--outFilterType", - "description" : "type of filtering\n\n- Normal ... standard filtering using only current alignment\n- BySJout ... keep only those reads that contain junctions that passed filtering into SJ.out.tab", - "example" : [ - "Normal" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outFilterMultimapScoreRange", - "description" : "the score range below the maximum score for multimapping alignments", - "example" : [ - 1 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outFilterMultimapNmax", - "description" : "maximum number of loci the read is allowed to map to. Alignments (all of them) will be output only if the read maps to no more loci than this value.\n\nOtherwise no alignments will be output, and the read will be counted as \\"mapped to too many loci\\" in the Log.final.out .", - "example" : [ - 10 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outFilterMismatchNmax", - "description" : "alignment will be output only if it has no more mismatches than this value.", - "example" : [ - 10 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--outFilterMismatchNoverLmax", - "description" : "alignment will be output only if its ratio of mismatches to *mapped* length is less than or equal to this value.", - "example" : [ - 0.3 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--outFilterMismatchNoverReadLmax", - "description" : "alignment will be output only if its ratio of mismatches to *read* length is less than or equal to this value.", - "example" : [ - 1.0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outFilterScoreMin", - "description" : "alignment will be output only if its score is higher than or equal to this value.", - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--outFilterScoreMinOverLread", - "description" : "same as outFilterScoreMin, but normalized to read length (sum of mates' lengths for paired-end reads)", - "example" : [ - 0.66 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outFilterMatchNmin", - "description" : "alignment will be output only if the number of matched bases is higher than or equal to this value.", - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--outFilterMatchNminOverLread", - "description" : "sam as outFilterMatchNmin, but normalized to the read length (sum of mates' lengths for paired-end reads).", - "example" : [ - 0.66 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outFilterIntronMotifs", - "description" : "filter alignment using their motifs\n\n- None ... no filtering\n- RemoveNoncanonical ... filter out alignments that contain non-canonical junctions\n- RemoveNoncanonicalUnannotated ... filter out alignments that contain non-canonical unannotated junctions when using annotated splice junctions database. The annotated non-canonical junctions will be kept.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outFilterIntronStrands", - "description" : "filter alignments\n\n- RemoveInconsistentStrands ... remove alignments that have junctions with inconsistent strands\n- None ... no filtering", - "example" : [ - "RemoveInconsistentStrands" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Output splice junctions (SJ.out.tab)", - "arguments" : [ - { - "type" : "string", - "name" : "--outSJtype", - "description" : "type of splice junction output\n\n- Standard ... standard SJ.out.tab output\n- None ... no splice junction output", - "example" : [ - "Standard" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Output Filtering: Splice Junctions", - "arguments" : [ - { - "type" : "string", - "name" : "--outSJfilterReads", - "description" : "which reads to consider for collapsed splice junctions output\n\n- All ... all reads, unique- and multi-mappers\n- Unique ... uniquely mapping reads only", - "example" : [ - "All" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outSJfilterOverhangMin", - "description" : "minimum overhang length for splice junctions on both sides for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\ndoes not apply to annotated junctions", - "example" : [ - 30, - 12, - 12, - 12 - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outSJfilterCountUniqueMin", - "description" : "minimum uniquely mapping read count per junction for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\nJunctions are output if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin conditions are satisfied\ndoes not apply to annotated junctions", - "example" : [ - 3, - 1, - 1, - 1 - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outSJfilterCountTotalMin", - "description" : "minimum total (multi-mapping+unique) read count per junction for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\nJunctions are output if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin conditions are satisfied\ndoes not apply to annotated junctions", - "example" : [ - 3, - 1, - 1, - 1 - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outSJfilterDistToOtherSJmin", - "description" : "minimum allowed distance to other junctions' donor/acceptor\n\ndoes not apply to annotated junctions", - "example" : [ - 10, - 0, - 5, - 10 - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outSJfilterIntronMaxVsReadN", - "description" : "maximum gap allowed for junctions supported by 1,2,3,,,N reads\n\ni.e. by default junctions supported by 1 read can have gaps <=50000b, by 2 reads: <=100000b, by 3 reads: <=200000. by >=4 reads any gap <=alignIntronMax\ndoes not apply to annotated junctions", - "example" : [ - 50000, - 100000, - 200000 - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - } - ] - }, - { - "name" : "Scoring", - "arguments" : [ - { - "type" : "integer", - "name" : "--scoreGap", - "description" : "splice junction penalty (independent on intron motif)", - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--scoreGapNoncan", - "description" : "non-canonical junction penalty (in addition to scoreGap)", - "example" : [ - -8 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--scoreGapGCAG", - "description" : "GC/AG and CT/GC junction penalty (in addition to scoreGap)", - "example" : [ - -4 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--scoreGapATAC", - "description" : "AT/AC and GT/AT junction penalty (in addition to scoreGap)", - "example" : [ - -8 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--scoreGenomicLengthLog2scale", - "description" : "extra score logarithmically scaled with genomic length of the alignment: scoreGenomicLengthLog2scale*log2(genomicLength)", - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--scoreDelOpen", - "description" : "deletion open penalty", - "example" : [ - -2 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--scoreDelBase", - "description" : "deletion extension penalty per base (in addition to scoreDelOpen)", - "example" : [ - -2 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--scoreInsOpen", - "description" : "insertion open penalty", - "example" : [ - -2 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--scoreInsBase", - "description" : "insertion extension penalty per base (in addition to scoreInsOpen)", - "example" : [ - -2 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--scoreStitchSJshift", - "description" : "maximum score reduction while searching for SJ boundaries in the stitching step", - "example" : [ - 1 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Alignments and Seeding", - "arguments" : [ - { - "type" : "integer", - "name" : "--seedSearchStartLmax", - "description" : "defines the search start point through the read - the read is split into pieces no longer than this value", - "example" : [ - 50 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--seedSearchStartLmaxOverLread", - "description" : "seedSearchStartLmax normalized to read length (sum of mates' lengths for paired-end reads)", - "example" : [ - 1.0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--seedSearchLmax", - "description" : "defines the maximum length of the seeds, if =0 seed length is not limited", - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--seedMultimapNmax", - "description" : "only pieces that map fewer than this value are utilized in the stitching procedure", - "example" : [ - 10000 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--seedPerReadNmax", - "description" : "max number of seeds per read", - "example" : [ - 1000 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--seedPerWindowNmax", - "description" : "max number of seeds per window", - "example" : [ - 50 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--seedNoneLociPerWindow", - "description" : "max number of one seed loci per window", - "example" : [ - 10 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--seedSplitMin", - "description" : "min length of the seed sequences split by Ns or mate gap", - "example" : [ - 12 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--seedMapMin", - "description" : "min length of seeds to be mapped", - "example" : [ - 5 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--alignIntronMin", - "description" : "minimum intron size, genomic gap is considered intron if its length>=alignIntronMin, otherwise it is considered Deletion", - "example" : [ - 21 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--alignIntronMax", - "description" : "maximum intron size, if 0, max intron size will be determined by (2^winBinNbits)*winAnchorDistNbins", - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--alignMatesGapMax", - "description" : "maximum gap between two mates, if 0, max intron gap will be determined by (2^winBinNbits)*winAnchorDistNbins", - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--alignSJoverhangMin", - "description" : "minimum overhang (i.e. block size) for spliced alignments", - "exampl''' + '''e" : [ - 5 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--alignSJstitchMismatchNmax", - "description" : "maximum number of mismatches for stitching of the splice junctions (-1: no limit).\n\n(1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif.", - "example" : [ - 0, - -1, - 0, - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--alignSJDBoverhangMin", - "description" : "minimum overhang (i.e. block size) for annotated (sjdb) spliced alignments", - "example" : [ - 3 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--alignSplicedMateMapLmin", - "description" : "minimum mapped length for a read mate that is spliced", - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--alignSplicedMateMapLminOverLmate", - "description" : "alignSplicedMateMapLmin normalized to mate length", - "example" : [ - 0.66 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--alignWindowsPerReadNmax", - "description" : "max number of windows per read", - "example" : [ - 10000 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--alignTranscriptsPerWindowNmax", - "description" : "max number of transcripts per window", - "example" : [ - 100 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--alignTranscriptsPerReadNmax", - "description" : "max number of different alignments per read to consider", - "example" : [ - 10000 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--alignEndsType", - "description" : "type of read ends alignment\n\n- Local ... standard local alignment with soft-clipping allowed\n- EndToEnd ... force end-to-end read alignment, do not soft-clip\n- Extend5pOfRead1 ... fully extend only the 5p of the read1, all other ends: local alignment\n- Extend5pOfReads12 ... fully extend only the 5p of the both read1 and read2, all other ends: local alignment", - "example" : [ - "Local" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--alignEndsProtrude", - "description" : "allow protrusion of alignment ends, i.e. start (end) of the +strand mate downstream of the start (end) of the -strand mate\n\n1st word: int: maximum number of protrusion bases allowed\n2nd word: string:\n- ConcordantPair ... report alignments with non-zero protrusion as concordant pairs\n- DiscordantPair ... report alignments with non-zero protrusion as discordant pairs", - "example" : [ - "0 ConcordantPair" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--alignSoftClipAtReferenceEnds", - "description" : "allow the soft-clipping of the alignments past the end of the chromosomes\n\n- Yes ... allow\n- No ... prohibit, useful for compatibility with Cufflinks", - "example" : [ - "Yes" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--alignInsertionFlush", - "description" : "how to flush ambiguous insertion positions\n\n- None ... insertions are not flushed\n- Right ... insertions are flushed to the right", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Paired-End reads", - "arguments" : [ - { - "type" : "integer", - "name" : "--peOverlapNbasesMin", - "description" : "minimum number of overlapping bases to trigger mates merging and realignment. Specify >0 value to switch on the \\"merginf of overlapping mates\\" algorithm.", - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--peOverlapMMp", - "description" : "maximum proportion of mismatched bases in the overlap area", - "example" : [ - 0.01 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Windows, Anchors, Binning", - "arguments" : [ - { - "type" : "integer", - "name" : "--winAnchorMultimapNmax", - "description" : "max number of loci anchors are allowed to map to", - "example" : [ - 50 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--winBinNbits", - "description" : "=log2(winBin), where winBin is the size of the bin for the windows/clustering, each window will occupy an integer number of bins.", - "example" : [ - 16 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--winAnchorDistNbins", - "description" : "max number of bins between two anchors that allows aggregation of anchors into one window", - "example" : [ - 9 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--winFlankNbins", - "description" : "log2(winFlank), where win Flank is the size of the left and right flanking regions for each window", - "example" : [ - 4 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--winReadCoverageRelativeMin", - "description" : "minimum relative coverage of the read sequence by the seeds in a window, for STARlong algorithm only.", - "example" : [ - 0.5 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--winReadCoverageBasesMin", - "description" : "minimum number of bases covered by the seeds in a window , for STARlong algorithm only.", - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Chimeric Alignments", - "arguments" : [ - { - "type" : "string", - "name" : "--chimOutType", - "description" : "type of chimeric output\n\n- Junctions ... Chimeric.out.junction\n- SeparateSAMold ... output old SAM into separate Chimeric.out.sam file\n- WithinBAM ... output into main aligned BAM files (Aligned.*.bam)\n- WithinBAM HardClip ... (default) hard-clipping in the CIGAR for supplemental chimeric alignments (default if no 2nd word is present)\n- WithinBAM SoftClip ... soft-clipping in the CIGAR for supplemental chimeric alignments", - "example" : [ - "Junctions" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--chimSegmentMin", - "description" : "minimum length of chimeric segment length, if ==0, no chimeric output", - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--chimScoreMin", - "description" : "minimum total (summed) score of the chimeric segments", - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--chimScoreDropMax", - "description" : "max drop (difference) of chimeric score (the sum of scores of all chimeric segments) from the read length", - "example" : [ - 20 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--chimScoreSeparation", - "description" : "minimum difference (separation) between the best chimeric score and the next one", - "example" : [ - 10 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--chimScoreJunctionNonGTAG", - "description" : "penalty for a non-GT/AG chimeric junction", - "example" : [ - -1 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--chimJunctionOverhangMin", - "description" : "minimum overhang for a chimeric junction", - "example" : [ - 20 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--chimSegmentReadGapMax", - "description" : "maximum gap in the read sequence between chimeric segments", - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--chimFilter", - "description" : "different filters for chimeric alignments\n\n- None ... no filtering\n- banGenomicN ... Ns are not allowed in the genome sequence around the chimeric junction", - "example" : [ - "banGenomicN" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--chimMainSegmentMultNmax", - "description" : "maximum number of multi-alignments for the main chimeric segment. =1 will prohibit multimapping main segments.", - "example" : [ - 10 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--chimMultimapNmax", - "description" : "maximum number of chimeric multi-alignments\n\n- 0 ... use the old scheme for chimeric detection which only considered unique alignments", - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--chimMultimapScoreRange", - "description" : "the score range for multi-mapping chimeras below the best chimeric score. Only works with --chimMultimapNmax > 1", - "example" : [ - 1 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--chimNonchimScoreDropMin", - "description" : "to trigger chimeric detection, the drop in the best non-chimeric alignment score with respect to the read length has to be greater than this value", - "example" : [ - 20 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--chimOutJunctionFormat", - "description" : "formatting type for the Chimeric.out.junction file\n\n- 0 ... no comment lines/headers\n- 1 ... comment lines at the end of the file: command line and Nreads: total, unique/multi-mapping", - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Quantification of Annotations", - "arguments" : [ - { - "type" : "string", - "name" : "--quantMode", - "description" : "types of quantification requested\n\n- - ... none\n- TranscriptomeSAM ... output SAM/BAM alignments to transcriptome into a separate file\n- GeneCounts ... count reads per gene", - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--quantTranscriptomeBAMcompression", - "description" : "-2 to 10 transcriptome BAM compression level\n\n- -2 ... no BAM output\n- -1 ... default compression (6?)\n- 0 ... no compression\n- 10 ... maximum compression", - "example" : [ - 1 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--quantTranscriptomeBan", - "description" : "prohibit various alignment type\n\n- IndelSoftclipSingleend ... prohibit indels, soft clipping and single-end alignments - compatible with RSEM\n- Singleend ... prohibit single-end alignments", - "example" : [ - "IndelSoftclipSingleend" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "2-pass Mapping", - "arguments" : [ - { - "type" : "string", - "name" : "--twopassMode", - "description" : "2-pass mapping mode.\n\n- None ... 1-pass mapping\n- Basic ... basic 2-pass mapping, with all 1st pass junctions inserted into the genome indices on the fly", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--twopass1readsN", - "description" : "number of reads to process for the 1st step. Use very large number (or default -1) to map all reads in the first step.", - "example" : [ - -1 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "WASP parameters", - "arguments" : [ - { - "type" : "string", - "name" : "--waspOutputMode", - "description" : "WASP allele-specific output type. This is re-implementation of the original WASP mappability filtering by Bryce van de Geijn, Graham McVicker, Yoav Gilad & Jonathan K Pritchard. Please cite the original WASP paper: Nature Methods 12, 1061-1063 (2015), https://www.nature.com/articles/nmeth.3582 .\n\n- SAMtag ... add WASP tags to the alignments that pass WASP filtering", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "STARsolo (single cell RNA-seq) parameters", - "arguments" : [ - { - "type" : "string", - "name" : "--soloType", - "description" : "type of single-cell RNA-seq\n\n- CB_UMI_Simple ... (a.k.a. Droplet) one UMI and one Cell Barcode of fixed length in read2, e.g. Drop-seq and 10X Chromium.\n- CB_UMI_Complex ... multiple Cell Barcodes of varying length, one UMI of fixed length and one adapter sequence of fixed length are allowed in read2 only (e.g. inDrop, ddSeq).\n- CB_samTagOut ... output Cell Barcode as CR and/or CB SAm tag. No UMI counting. --readFilesIn cDNA_read1 [cDNA_read2 if paired-end] CellBarcode_read . Requires --outSAMtype BAM Unsorted [and/or SortedByCoordinate]\n- SmartSeq ... Smart-seq: each cell in a separate FASTQ (paired- or single-end), barcodes are corresponding read-groups, no UMI sequences, alignments deduplicated according to alignment start and end (after extending soft-clipped bases)", - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--soloCBwhitelist", - "description" : "file(s) with whitelist(s) of cell barcodes. Only --soloType CB_UMI_Complex allows more than one whitelist file.\n\n- None ... no whitelist: all cell barcodes are allowed", - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--soloCBstart", - "description" : "cell barcode start base", - "example" : [ - 1 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--soloCBlen", - "description" : "cell barcode length", - "example" : [ - 16 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--soloUMIstart", - "description" : "UMI start base", - "example" : [ - 17 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--soloUMIlen", - "description" : "UMI length", - "example" : [ - 10 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--soloBarcodeReadLength", - "description" : "length of the barcode read\n\n- 1 ... equal to sum of soloCBlen+soloUMIlen\n- 0 ... not defined, do not check", - "example" : [ - 1 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--soloBarcodeMate", - "description" : "identifies which read mate contains the barcode (CB+UMI) sequence\n\n- 0 ... barcode sequence is on separate read, which should always be the last file in the --readFilesIn listed\n- 1 ... barcode sequence is a part of mate 1\n- 2 ... barcode sequence is a part of mate 2", - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--soloCBposition", - "description" : "position of Cell Barcode(s) on the barcode read.\n\nPresently only works with --soloType CB_UMI_Complex, and barcodes are assumed to be on Read2.\nFormat for each barcode: startAnchor_startPosition_endAnchor_endPosition\nstart(end)Anchor defines the Anchor Base for the CB: 0: read start; 1: read end; 2: adapter start; 3: adapter end\nstart(end)Position is the 0-based position with of the CB start(end) with respect to the Anchor Base\nString for different barcodes are separated by space.\nExample: inDrop (Zilionis et al, Nat. Protocols, 2017):\n--soloCBposition 0_0_2_-1 3_1_3_8", - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--soloUMIposition", - "description" : "position of the UMI on the barcode read, same as soloCBposition\n\nExample: inDrop (Zilionis et al, Nat. Protocols, 2017):\n--soloCBposition 3_9_3_14", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--soloAdapterSequence", - "description" : "adapter sequence to anchor barcodes. Only one adapter sequence is allowed.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--soloAdapterMismatchesNmax", - "description" : "maximum number of mismatches allowed in adapter sequence.", - "example" : [ - 1 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--soloCBmatchWLtype", - "description" : "matching the Cell Barcodes to the WhiteList\n\n- Exact ... only exact matches allowed\n- 1MM ... only one match in whitelist with 1 mismatched base allowed. Allowed CBs have to have at least one read with exact match.\n- 1MM_multi ... multiple matches in whitelist with 1 mismatched base allowed, posterior probability calculation is used choose one of the matches.\nAllowed CBs have to have at least one read with exact match. This option matches best with CellRanger 2.2.0\n- 1MM_multi_pseudocounts ... same as 1MM_Multi, but pseudocounts of 1 are added to all whitelist barcodes.\n- 1MM_multi_Nbase_pseudocounts ... same as 1MM_multi_pseudocounts, multimatching to WL is allowed for CBs with N-bases. This option matches best with CellRanger >= 3.0.0\n- EditDist_2 ... allow up to edit distance of 3 fpr each of the barcodes. May include one deletion + one insertion. Only works with --soloType CB_UMI_Complex. Matches to multiple passlist barcdoes are not allowed. Similar to ParseBio Split-seq pipeline.", - "example" : [ - "1MM_multi" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--soloInputSAMattrBarcodeSeq", - "description" : "when inputting reads from a SAM file (--readsFileType SAM SE/PE), these SAM attributes mark the barcode sequence (in proper order).\n\nFor instance, for 10X CellRanger or STARsolo BAMs, use --soloInputSAMattrBarcodeSeq CR UR .\nThis parameter is required when running STARsolo with input from SAM.", - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--soloInputSAMattrBarcodeQual", - "description" : "when inputting reads from a SAM file (--readsFileType SAM SE/PE), these SAM attributes mark the barcode qualities (in proper order).\n\nFor instance, for 10X CellRanger or STARsolo BAMs, use --soloInputSAMattrBarcodeQual CY UY .\nIf this parameter is '-' (default), the quality 'H' will be assigned to all bases.", - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--soloStrand", - "description" : "strandedness of the solo libraries:\n\n- Unstranded ... no strand information\n- Forward ... read strand same as the original RNA molecule\n- Reverse ... read strand opposite to the original RNA molecule", - "example" : [ - "Forward" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--soloFeatures", - "description" : "genomic features for which the UMI counts per Cell Barcode are collected\n\n- Gene ... genes: reads match the gene transcript\n- SJ ... splice junctions: reported in SJ.out.tab\n- GeneFull ... full gene (pre-mRNA): count all reads overlapping genes' exons and introns\n- GeneFull_ExonOverIntron ... full gene (pre-mRNA): count all reads overlapping genes' exons and introns: prioritize 100% overlap with exons\n- GeneFull_Ex50pAS ... full gene (pre-RNA): count all reads overlapping genes' exons and introns: prioritize >50% overlap with exons. Do not count reads with 100% exonic overlap in the antisense direction.", - "example" : [ - "Gene" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--soloMultiMappers", - "description" : "counting method for reads mapping to multiple genes\n\n- Unique ... count only reads that map to unique genes\n- Uniform ... uniformly distribute multi-genic UMIs to all genes\n- Rescue ... distribute UMIs proportionally to unique+uniform counts (~ first iteration of EM)\n- PropUnique ... distribute UMIs proportionally to unique mappers, if present, and uniformly if not.\n- EM ... multi-gene UMIs are distributed using Expectation Maximization algorithm", - "example" : [ - "Unique" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--soloUMIdedup", - "description" : "type of UMI deduplication (collapsing) algorithm\n\n- 1MM_All ... all UMIs with 1 mismatch distance to each other are collapsed (i.e. counted once).\n- 1MM_Directional_UMItools ... follows the \\"directional\\" method from the UMI-tools by Smith, Heger and Sudbery (Genome Research 2017).\n- 1MM_Directional ... same as 1MM_Directional_UMItools, but with more stringent criteria for duplicate UMIs\n- Exact ... only exactly matching UMIs are collapsed.\n- NoDedup ... no deduplication of UMIs, count all reads.\n- 1MM_CR ... CellRanger2-4 algorithm for 1MM UMI collapsing.", - "example" : [ - "1MM_All" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--soloUMIfiltering", - "description" : "type of UMI filtering (for reads uniquely mapping to genes)\n\n- - ... basic filtering: remove UMIs with N and homopolymers (similar to CellRanger 2.2.0).\n- MultiGeneUMI ... basic + remove lower-count UMIs that map to more than one gene.\n- MultiGeneUMI_All ... basic + remove all UMIs that map to more than one gene.\n- MultiGeneUMI_CR ... basic + remove lower-count UMIs that map to more than one gene, matching CellRanger > 3.0.0 .\nOnly works with --soloUMIdedup 1MM_CR", - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--soloOutFileNames", - "description" : "file names for STARsolo output:\n\nfile_name_prefix gene_names barcode_sequences cell_feature_count_matrix", - "example" : [ - "Solo.out/", - "features.tsv", - "barcodes.tsv", - "matrix.mtx" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--soloCellFilter", - "description" : "cell filtering type and parameters\n\n- None ... do not output filtered cells\n- TopCells ... only report top cells by UMI count, followed by the exact number of cells\n- CellRanger2.2 ... simple filtering of CellRanger 2.2.\nCan be followed by numbers: number of expected cells, robust maximum percentile for UMI count, maximum to minimum ratio for UMI count\nThe harcoded values are from CellRanger: nExpectedCells=3000; maxPercentile=0.99; maxMinRatio=10\n- EmptyDrops_CR ... EmptyDrops filtering in CellRanger flavor. Please cite the original EmptyDrops paper: A.T.L Lun et al, Genome Biology, 20, 63 (2019): https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1662-y\nCan be followed by 10 numeric parameters: nExpectedCells maxPercentile maxMinRatio indMin indMax umiMin umiMinFracMedian candMaxN FDR simN\nThe harcoded values are from CellRanger: 3000 0.99 10 45000 90000 500 0.01 20000 0.01 10000", - "example" : [ - "CellRanger2.2", - "3000", - "0.99", - "10" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--soloOutFormatFeaturesGeneField3", - "description" : "field 3 in the Gene features.tsv file. If \\"-\\", then no 3rd field is output.", - "example" : [ - "Gene Expression" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--soloCellReadStats", - "description" : "Output reads statistics for each CB\n\n- Standard ... standard output", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/star_align/" - }, - { - "type" : "file", - "path" : "src/utils/setup_logger.py", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "description" : "Align fastq files using STAR.", - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/star_align/" - }, - { - "type" : "file", - "path" : "resources_test/cellranger_tiny_fastq", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "python:3.10-slim", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "procps" - ], - "interactive" : false - }, - { - "type" : "docker", - "env" : [ - "STAR_VERSION 2.7.10b", - "PACKAGES gcc g++ make wget zlib1g-dev unzip" - ] - }, - { - "type" : "docker", - "run" : [ - "apt-get update && \\\\\n apt-get install -y --no-install-recommends ${PACKAGES} && \\\\\n cd /tmp && \\\\\n wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip && \\\\\n unzip ${STAR_VERSION}.zip && \\\\\n cd STAR-${STAR_VERSION}/source && \\\\\n make STARstatic CXXFLAGS_SIMD=-std=c++11 && \\\\\n cp STAR /usr/local/bin && \\\\\n cd / && \\\\\n rm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip && \\\\\n apt-get --purge autoremove -y ${PACKAGES} && \\\\\n apt-get clean\n" - ] - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "highmem", - "highcpu" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/mapping/star_align/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/star_align", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -import re -import tempfile -import subprocess -from pathlib import Path -import tarfile -import gzip -import shutil - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'reference': $( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "r'${VIASH_PAR_REFERENCE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'runRNGseed': $( if [ ! -z ${VIASH_PAR_RUNRNGSEED+x} ]; then echo "int(r'${VIASH_PAR_RUNRNGSEED//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'genomeLoad': $( if [ ! -z ${VIASH_PAR_GENOMELOAD+x} ]; then echo "r'${VIASH_PAR_GENOMELOAD//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'genomeFastaFiles': $( if [ ! -z ${VIASH_PAR_GENOMEFASTAFILES+x} ]; then echo "r'${VIASH_PAR_GENOMEFASTAFILES//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'genomeFileSizes': $( if [ ! -z ${VIASH_PAR_GENOMEFILESIZES+x} ]; then echo "list(map(int, r'${VIASH_PAR_GENOMEFILESIZES//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), - 'genomeTransformOutput': $( if [ ! -z ${VIASH_PAR_GENOMETRANSFORMOUTPUT+x} ]; then echo "r'${VIASH_PAR_GENOMETRANSFORMOUTPUT//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'genomeChrSetMitochondrial': $( if [ ! -z ${VIASH_PAR_GENOMECHRSETMITOCHONDRIAL+x} ]; then echo "r'${VIASH_PAR_GENOMECHRSETMITOCHONDRIAL//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'sjdbFileChrStartEnd': $( if [ ! -z ${VIASH_PAR_SJDBFILECHRSTARTEND+x} ]; then echo "r'${VIASH_PAR_SJDBFILECHRSTARTEND//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'sjdbGTFfile': $( if [ ! -z ${VIASH_PAR_SJDBGTFFILE+x} ]; then echo "r'${VIASH_PAR_SJDBGTFFILE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'sjdbGTFchrPrefix': $( if [ ! -z ${VIASH_PAR_SJDBGTFCHRPREFIX+x} ]; then echo "r'${VIASH_PAR_SJDBGTFCHRPREFIX//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'sjdbGTFfeatureExon': $( if [ ! -z ${VIASH_PAR_SJDBGTFFEATUREEXON+x} ]; then echo "r'${VIASH_PAR_SJDBGTFFEATUREEXON//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'sjdbGTFtagExonParentTranscript': $( if [ ! -z ${VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT+x} ]; then echo "r'${VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'sjdbGTFtagExonParentGene': $( if [ ! -z ${VIASH_PAR_SJDBGTFTAGEXONPARENTGENE+x} ]; then echo "r'${VIASH_PAR_SJDBGTFTAGEXONPARENTGENE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'sjdbGTFtagExonParentGeneName': $( if [ ! -z ${VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME+x} ]; then echo "r'${VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'sjdbGTFtagExonParentGeneType': $( if [ ! -z ${VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE+x} ]; then echo "r'${VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'sjdbOverhang': $( if [ ! -z ${VIASH_PAR_SJDBOVERHANG+x} ]; then echo "int(r'${VIASH_PAR_SJDBOVERHANG//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'sjdbScore': $( if [ ! -z ${VIASH_PAR_SJDBSCORE+x} ]; then echo "int(r'${VIASH_PAR_SJDBSCORE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'sjdbInsertSave': $( if [ ! -z ${VIASH_PAR_SJDBINSERTSAVE+x} ]; then echo "r'${VIASH_PAR_SJDBINSERTSAVE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'varVCFfile': $( if [ ! -z ${VIASH_PAR_VARVCFFILE+x} ]; then echo "r'${VIASH_PAR_VARVCFFILE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'readFilesType': $( if [ ! -z ${VIASH_PAR_READFILESTYPE+x} ]; then echo "r'${VIASH_PAR_READFILESTYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'readFilesSAMattrKeep': $( if [ ! -z ${VIASH_PAR_READFILESSAMATTRKEEP+x} ]; then echo "r'${VIASH_PAR_READFILESSAMATTRKEEP//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'readFilesManifest': $( if [ ! -z ${VIASH_PAR_READFILESMANIFEST+x} ]; then echo "r'${VIASH_PAR_READFILESMANIFEST//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'readFilesPrefix': $( if [ ! -z ${VIASH_PAR_READFILESPREFIX+x} ]; then echo "r'${VIASH_PAR_READFILESPREFIX//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'readFilesCommand': $( if [ ! -z ${VIASH_PAR_READFILESCOMMAND+x} ]; then echo "r'${VIASH_PAR_READFILESCOMMAND//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'readMapNumber': $( if [ ! -z ${VIASH_PAR_READMAPNUMBER+x} ]; then echo "int(r'${VIASH_PAR_READMAPNUMBER//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'readMatesLengthsIn': $( if [ ! -z ${VIASH_PAR_READMATESLENGTHSIN+x} ]; then echo "r'${VIASH_PAR_READMATESLENGTHSIN//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'readNameSeparator': $( if [ ! -z ${VIASH_PAR_READNAMESEPARATOR+x} ]; then echo "r'${VIASH_PAR_READNAMESEPARATOR//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'readQualityScoreBase': $( if [ ! -z ${VIASH_PAR_READQUALITYSCOREBASE+x} ]; then echo "int(r'${VIASH_PAR_READQUALITYSCOREBASE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'clipAdapterType': $( if [ ! -z ${VIASH_PAR_CLIPADAPTERTYPE+x} ]; then echo "r'${VIASH_PAR_CLIPADAPTERTYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'clip3pNbases': $( if [ ! -z ${VIASH_PAR_CLIP3PNBASES+x} ]; then echo "list(map(int, r'${VIASH_PAR_CLIP3PNBASES//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), - 'clip3pAdapterSeq': $( if [ ! -z ${VIASH_PAR_CLIP3PADAPTERSEQ+x} ]; then echo "r'${VIASH_PAR_CLIP3PADAPTERSEQ//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'clip3pAdapterMMp': $( if [ ! -z ${VIASH_PAR_CLIP3PADAPTERMMP+x} ]; then echo "list(map(float, r'${VIASH_PAR_CLIP3PADAPTERMMP//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), - 'clip3pAfterAdapterNbases': $( if [ ! -z ${VIASH_PAR_CLIP3PAFTERADAPTERNBASES+x} ]; then echo "list(map(int, r'${VIASH_PAR_CLIP3PAFTERADAPTERNBASES//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), - 'clip5pNbases': $( if [ ! -z ${VIASH_PAR_CLIP5PNBASES+x} ]; then echo "list(map(int, r'${VIASH_PAR_CLIP5PNBASES//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), - 'limitGenomeGenerateRAM': $( if [ ! -z ${VIASH_PAR_LIMITGENOMEGENERATERAM+x} ]; then echo "int(r'${VIASH_PAR_LIMITGENOMEGENERATERAM//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'limitIObufferSize': $( if [ ! -z ${VIASH_PAR_LIMITIOBUFFERSIZE+x} ]; then echo "list(map(int, r'${VIASH_PAR_LIMITIOBUFFERSIZE//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), - 'limitOutSAMoneReadBytes': $( if [ ! -z ${VIASH_PAR_LIMITOUTSAMONEREADBYTES+x} ]; then echo "int(r'${VIASH_PAR_LIMITOUTSAMONEREADBYTES//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'limitOutSJoneRead': $( if [ ! -z ${VIASH_PAR_LIMITOUTSJONEREAD+x} ]; then echo "int(r'${VIASH_PAR_LIMITOUTSJONEREAD//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'limitOutSJcollapsed': $( if [ ! -z ${VIASH_PAR_LIMITOUTSJCOLLAPSED+x} ]; then echo "int(r'${VIASH_PAR_LIMITOUTSJCOLLAPSED//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'limitBAMsortRAM': $( if [ ! -z ${VIASH_PAR_LIMITBAMSORTRAM+x} ]; then echo "int(r'${VIASH_PAR_LIMITBAMSORTRAM//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'limitSjdbInsertNsj': $( if [ ! -z ${VIASH_PAR_LIMITSJDBINSERTNSJ+x} ]; then echo "int(r'${VIASH_PAR_LIMITSJDBINSERTNSJ//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'limitNreadsSoft': $( if [ ! -z ${VIASH_PAR_LIMITNREADSSOFT+x} ]; then echo "int(r'${VIASH_PAR_LIMITNREADSSOFT//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outTmpKeep': $( if [ ! -z ${VIASH_PAR_OUTTMPKEEP+x} ]; then echo "r'${VIASH_PAR_OUTTMPKEEP//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'outStd': $( if [ ! -z ${VIASH_PAR_OUTSTD+x} ]; then echo "r'${VIASH_PAR_OUTSTD//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'outReadsUnmapped': $( if [ ! -z ${VIASH_PAR_OUTREADSUNMAPPED+x} ]; then echo "r'${VIASH_PAR_OUTREADSUNMAPPED//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'outQSconversionAdd': $( if [ ! -z ${VIASH_PAR_OUTQSCONVERSIONADD+x} ]; then echo "int(r'${VIASH_PAR_OUTQSCONVERSIONADD//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outMultimapperOrder': $( if [ ! -z ${VIASH_PAR_OUTMULTIMAPPERORDER+x} ]; then echo "r'${VIASH_PAR_OUTMULTIMAPPERORDER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'outSAMtype': $( if [ ! -z ${VIASH_PAR_OUTSAMTYPE+x} ]; then echo "r'${VIASH_PAR_OUTSAMTYPE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'outSAMmode': $( if [ ! -z ${VIASH_PAR_OUTSAMMODE+x} ]; then echo "r'${VIASH_PAR_OUTSAMMODE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'outSAMstrandField': $( if [ ! -z ${VIASH_PAR_OUTSAMSTRANDFIELD+x} ]; then echo "r'${VIASH_PAR_OUTSAMSTRANDFIELD//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'outSAMattributes': $( if [ ! -z ${VIASH_PAR_OUTSAMATTRIBUTES+x} ]; then echo "r'${VIASH_PAR_OUTSAMATTRIBUTES//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'outSAMattrIHstart': $( if [ ! -z ${VIASH_PAR_OUTSAMATTRIHSTART+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMATTRIHSTART//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outSAMunmapped': $( if [ ! -z ${VIASH_PAR_OUTSAMUNMAPPED+x} ]; then echo "r'${VIASH_PAR_OUTSAMUNMAPPED//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'outSAMorder': $( if [ ! -z ${VIASH_PAR_OUTSAMORDER+x} ]; then echo "r'${VIASH_PAR_OUTSAMORDER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'outSAMprimaryFlag': $( if [ ! -z ${VIASH_PAR_OUTSAMPRIMARYFLAG+x} ]; then echo "r'${VIASH_PAR_OUTSAMPRIMARYFLAG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'outSAMreadID': $( if [ ! -z ${VIASH_PAR_OUTSAMREADID+x} ]; then echo "r'${VIASH_PAR_OUTSAMREADID//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'outSAMmapqUnique': $( if [ ! -z ${VIASH_PAR_OUTSAMMAPQUNIQUE+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMMAPQUNIQUE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outSAMflagOR': $( if [ ! -z ${VIASH_PAR_OUTSAMFLAGOR+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMFLAGOR//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outSAMflagAND': $( if [ ! -z ${VIASH_PAR_OUTSAMFLAGAND+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMFLAGAND//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outSAMattrRGline': $( if [ ! -z ${VIASH_PAR_OUTSAMATTRRGLINE+x} ]; then echo "r'${VIASH_PAR_OUTSAMATTRRGLINE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'outSAMheaderHD': $( if [ ! -z ${VIASH_PAR_OUTSAMHEADERHD+x} ]; then echo "r'${VIASH_PAR_OUTSAMHEADERHD//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'outSAMheaderPG': $( if [ ! -z ${VIASH_PAR_OUTSAMHEADERPG+x} ]; then echo "r'${VIASH_PAR_OUTSAMHEADERPG//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'outSAMheaderCommentFile': $( if [ ! -z ${VIASH_PAR_OUTSAMHEADERCOMMENTFILE+x} ]; then echo "r'${VIASH_PAR_OUTSAMHEADERCOMMENTFILE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'outSAMfilter': $( if [ ! -z ${VIASH_PAR_OUTSAMFILTER+x} ]; then echo "r'${VIASH_PAR_OUTSAMFILTER//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'outSAMmultNmax': $( if [ ! -z ${VIASH_PAR_OUTSAMMULTNMAX+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMMULTNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outSAMtlen': $( if [ ! -z ${VIASH_PAR_OUTSAMTLEN+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMTLEN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outBAMcompression': $( if [ ! -z ${VIASH_PAR_OUTBAMCOMPRESSION+x} ]; then echo "int(r'${VIASH_PAR_OUTBAMCOMPRESSION//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outBAMsortingThreadN': $( if [ ! -z ${VIASH_PAR_OUTBAMSORTINGTHREADN+x} ]; then echo "int(r'${VIASH_PAR_OUTBAMSORTINGTHREADN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outBAMsortingBinsN': $( if [ ! -z ${VIASH_PAR_OUTBAMSORTINGBINSN+x} ]; then echo "int(r'${VIASH_PAR_OUTBAMSORTINGBINSN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'bamRemoveDuplicatesType': $( if [ ! -z ${VIASH_PAR_BAMREMOVEDUPLICATESTYPE+x} ]; then echo "r'${VIASH_PAR_BAMREMOVEDUPLICATESTYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'bamRemoveDuplicatesMate2basesN': $( if [ ! -z ${VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN+x} ]; then echo "int(r'${VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outWigType': $( if [ ! -z ${VIASH_PAR_OUTWIGTYPE+x} ]; then echo "r'${VIASH_PAR_OUTWIGTYPE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'outWigStrand': $( if [ ! -z ${VIASH_PAR_OUTWIGSTRAND+x} ]; then echo "r'${VIASH_PAR_OUTWIGSTRAND//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'outWigReferencesPrefix': $( if [ ! -z ${VIASH_PAR_OUTWIGREFERENCESPREFIX+x} ]; then echo "r'${VIASH_PAR_OUTWIGREFERENCESPREFIX//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'outWigNorm': $( if [ ! -z ${VIASH_PAR_OUTWIGNORM+x} ]; then echo "r'${VIASH_PAR_OUTWIGNORM//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'outFilterType': $( if [ ! -z ${VIASH_PAR_OUTFILTERTYPE+x} ]; then echo "r'${VIASH_PAR_OUTFILTERTYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'outFilterMultimapScoreRange': $( if [ ! -z ${VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outFilterMultimapNmax': $( if [ ! -z ${VIASH_PAR_OUTFILTERMULTIMAPNMAX+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERMULTIMAPNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outFilterMismatchNmax': $( if [ ! -z ${VIASH_PAR_OUTFILTERMISMATCHNMAX+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERMISMATCHNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outFilterMismatchNoverLmax': $( if [ ! -z ${VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX+x} ]; then echo "float(r'${VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outFilterMismatchNoverReadLmax': $( if [ ! -z ${VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX+x} ]; then echo "float(r'${VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outFilterScoreMin': $( if [ ! -z ${VIASH_PAR_OUTFILTERSCOREMIN+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERSCOREMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outFilterScoreMinOverLread': $( if [ ! -z ${VIASH_PAR_OUTFILTERSCOREMINOVERLREAD+x} ]; then echo "float(r'${VIASH_PAR_OUTFILTERSCOREMINOVERLREAD//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outFilterMatchNmin': $( if [ ! -z ${VIASH_PAR_OUTFILTERMATCHNMIN+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERMATCHNMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outFilterMatchNminOverLread': $( if [ ! -z ${VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD+x} ]; then echo "float(r'${VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outFilterIntronMotifs': $( if [ ! -z ${VIASH_PAR_OUTFILTERINTRONMOTIFS+x} ]; then echo "r'${VIASH_PAR_OUTFILTERINTRONMOTIFS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'outFilterIntronStrands': $( if [ ! -z ${VIASH_PAR_OUTFILTERINTRONSTRANDS+x} ]; then echo "r'${VIASH_PAR_OUTFILTERINTRONSTRANDS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'outSJtype': $( if [ ! -z ${VIASH_PAR_OUTSJTYPE+x} ]; then echo "r'${VIASH_PAR_OUTSJTYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'outSJfilterReads': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERREADS+x} ]; then echo "r'${VIASH_PAR_OUTSJFILTERREADS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'outSJfilterOverhangMin': $( if [ ! -z ${VIASH_PAR_OUTSJFILTEROVERHANGMIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTEROVERHANGMIN//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), - 'outSJfilterCountUniqueMin': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), - 'outSJfilterCountTotalMin': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), - 'outSJfilterDistToOtherSJmin': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), - 'outSJfilterIntronMaxVsReadN': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), - 'scoreGap': $( if [ ! -z ${VIASH_PAR_SCOREGAP+x} ]; then echo "int(r'${VIASH_PAR_SCOREGAP//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'scoreGapNoncan': $( if [ ! -z ${VIASH_PAR_SCOREGAPNONCAN+x} ]; then echo "int(r'${VIASH_PAR_SCOREGAPNONCAN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'scoreGapGCAG': $( if [ ! -z ${VIASH_PAR_SCOREGAPGCAG+x} ]; then echo "int(r'${VIASH_PAR_SCOREGAPGCAG//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'scoreGapATAC': $( if [ ! -z ${VIASH_PAR_SCOREGAPATAC+x} ]; then echo "int(r'${VIASH_PAR_SCOREGAPATAC//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'scoreGenomicLengthLog2scale': $( if [ ! -z ${VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE+x} ]; then echo "int(r'${VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'scoreDelOpen': $( if [ ! -z ${VIASH_PAR_SCOREDELOPEN+x} ]; then echo "int(r'${VIASH_PAR_SCOREDELOPEN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'scoreDelBase': $( if [ ! -z ${VIASH_PAR_SCOREDELBASE+x} ]; then echo "int(r'${VIASH_PAR_SCOREDELBASE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'scoreInsOpen': $( if [ ! -z ${VIASH_PAR_SCOREINSOPEN+x} ]; then echo "int(r'${VIASH_PAR_SCOREINSOPEN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'scoreInsBase': $( if [ ! -z ${VIASH_PAR_SCOREINSBASE+x} ]; then echo "int(r'${VIASH_PAR_SCOREINSBASE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'scoreStitchSJshift': $( if [ ! -z ${VIASH_PAR_SCORESTITCHSJSHIFT+x} ]; then echo "int(r'${VIASH_PAR_SCORESTITCHSJSHIFT//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'seedSearchStartLmax': $( if [ ! -z ${VIASH_PAR_SEEDSEARCHSTARTLMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDSEARCHSTARTLMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'seedSearchStartLmaxOverLread': $( if [ ! -z ${VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD+x} ]; then echo "float(r'${VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'seedSearchLmax': $( if [ ! -z ${VIASH_PAR_SEEDSEARCHLMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDSEARCHLMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'seedMultimapNmax': $( if [ ! -z ${VIASH_PAR_SEEDMULTIMAPNMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDMULTIMAPNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'seedPerReadNmax': $( if [ ! -z ${VIASH_PAR_SEEDPERREADNMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDPERREADNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'seedPerWindowNmax': $( if [ ! -z ${VIASH_PAR_SEEDPERWINDOWNMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDPERWINDOWNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'seedNoneLociPerWindow': $( if [ ! -z ${VIASH_PAR_SEEDNONELOCIPERWINDOW+x} ]; then echo "int(r'${VIASH_PAR_SEEDNONELOCIPERWINDOW//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'seedSplitMin': $( if [ ! -z ${VIASH_PAR_SEEDSPLITMIN+x} ]; then echo "int(r'${VIASH_PAR_SEEDSPLITMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'seedMapMin': $( if [ ! -z ${VIASH_PAR_SEEDMAPMIN+x} ]; then echo "int(r'${VIASH_PAR_SEEDMAPMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'alignIntronMin': $( if [ ! -z ${VIASH_PAR_ALIGNINTRONMIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGNINTRONMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'alignIntronMax': $( if [ ! -z ${VIASH_PAR_ALIGNINTRONMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNINTRONMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'alignMatesGapMax': $( if [ ! -z ${VIASH_PAR_ALIGNMATESGAPMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNMATESGAPMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'alignSJoverhangMin': $( if [ ! -z ${VIASH_PAR_ALIGNSJOVERHANGMIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGNSJOVERHANGMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'alignSJstitchMismatchNmax': $( if [ ! -z ${VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX+x} ]; then echo "list(map(int, r'${VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), - 'alignSJDBoverhangMin': $( if [ ! -z ${VIASH_PAR_ALIGNSJDBOVERHANGMIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGNSJDBOVERHANGMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'alignSplicedMateMapLmin': $( if [ ! -z ${VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'alignSplicedMateMapLminOverLmate': $( if [ ! -z ${VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE+x} ]; then echo "float(r'${VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'alignWindowsPerReadNmax': $( if [ ! -z ${VIASH_PAR_ALIGNWINDOWSPERREADNMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNWINDOWSPERREADNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'alignTranscriptsPerWindowNmax': $( if [ ! -z ${VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'alignTranscriptsPerReadNmax': $( if [ ! -z ${VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'alignEndsType': $( if [ ! -z ${VIASH_PAR_ALIGNENDSTYPE+x} ]; then echo "r'${VIASH_PAR_ALIGNENDSTYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'alignEndsProtrude': $( if [ ! -z ${VIASH_PAR_ALIGNENDSPROTRUDE+x} ]; then echo "r'${VIASH_PAR_ALIGNENDSPROTRUDE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'alignSoftClipAtReferenceEnds': $( if [ ! -z ${VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS+x} ]; then echo "r'${VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'alignInsertionFlush': $( if [ ! -z ${VIASH_PAR_ALIGNINSERTIONFLUSH+x} ]; then echo "r'${VIASH_PAR_ALIGNINSERTIONFLUSH//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'peOverlapNbasesMin': $( if [ ! -z ${VIASH_PAR_PEOVERLAPNBASESMIN+x} ]; then echo "int(r'${VIASH_PAR_PEOVERLAPNBASESMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'peOverlapMMp': $( if [ ! -z ${VIASH_PAR_PEOVERLAPMMP+x} ]; then echo "float(r'${VIASH_PAR_PEOVERLAPMMP//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'winAnchorMultimapNmax': $( if [ ! -z ${VIASH_PAR_WINANCHORMULTIMAPNMAX+x} ]; then echo "int(r'${VIASH_PAR_WINANCHORMULTIMAPNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'winBinNbits': $( if [ ! -z ${VIASH_PAR_WINBINNBITS+x} ]; then echo "int(r'${VIASH_PAR_WINBINNBITS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'winAnchorDistNbins': $( if [ ! -z ${VIASH_PAR_WINANCHORDISTNBINS+x} ]; then echo "int(r'${VIASH_PAR_WINANCHORDISTNBINS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'winFlankNbins': $( if [ ! -z ${VIASH_PAR_WINFLANKNBINS+x} ]; then echo "int(r'${VIASH_PAR_WINFLANKNBINS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'winReadCoverageRelativeMin': $( if [ ! -z ${VIASH_PAR_WINREADCOVERAGERELATIVEMIN+x} ]; then echo "float(r'${VIASH_PAR_WINREADCOVERAGERELATIVEMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'winReadCoverageBasesMin': $( if [ ! -z ${VIASH_PAR_WINREADCOVERAGEBASESMIN+x} ]; then echo "int(r'${VIASH_PAR_WINREADCOVERAGEBASESMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'chimOutType': $( if [ ! -z ${VIASH_PAR_CHIMOUTTYPE+x} ]; then echo "r'${VIASH_PAR_CHIMOUTTYPE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'chimSegmentMin': $( if [ ! -z ${VIASH_PAR_CHIMSEGMENTMIN+x} ]; then echo "int(r'${VIASH_PAR_CHIMSEGMENTMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'chimScoreMin': $( if [ ! -z ${VIASH_PAR_CHIMSCOREMIN+x} ]; then echo "int(r'${VIASH_PAR_CHIMSCOREMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'chimScoreDropMax': $( if [ ! -z ${VIASH_PAR_CHIMSCOREDROPMAX+x} ]; then echo "int(r'${VIASH_PAR_CHIMSCOREDROPMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'chimScoreSeparation': $( if [ ! -z ${VIASH_PAR_CHIMSCORESEPARATION+x} ]; then echo "int(r'${VIASH_PAR_CHIMSCORESEPARATION//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'chimScoreJunctionNonGTAG': $( if [ ! -z ${VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG+x} ]; then echo "int(r'${VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'chimJunctionOverhangMin': $( if [ ! -z ${VIASH_PAR_CHIMJUNCTIONOVERHANGMIN+x} ]; then echo "int(r'${VIASH_PAR_CHIMJUNCTIONOVERHANGMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'chimSegmentReadGapMax': $( if [ ! -z ${VIASH_PAR_CHIMSEGMENTREADGAPMAX+x} ]; then echo "int(r'${VIASH_PAR_CHIMSEGMENTREADGAPMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'chimFilter': $( if [ ! -z ${VIASH_PAR_CHIMFILTER+x} ]; then echo "r'${VIASH_PAR_CHIMFILTER//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'chimMainSegmentMultNmax': $( if [ ! -z ${VIASH_PAR_CHIMMAINSEGMENTMULTNMAX+x} ]; then echo "int(r'${VIASH_PAR_CHIMMAINSEGMENTMULTNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'chimMultimapNmax': $( if [ ! -z ${VIASH_PAR_CHIMMULTIMAPNMAX+x} ]; then echo "int(r'${VIASH_PAR_CHIMMULTIMAPNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'chimMultimapScoreRange': $( if [ ! -z ${VIASH_PAR_CHIMMULTIMAPSCORERANGE+x} ]; then echo "int(r'${VIASH_PAR_CHIMMULTIMAPSCORERANGE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'chimNonchimScoreDropMin': $( if [ ! -z ${VIASH_PAR_CHIMNONCHIMSCOREDROPMIN+x} ]; then echo "int(r'${VIASH_PAR_CHIMNONCHIMSCOREDROPMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'chimOutJunctionFormat': $( if [ ! -z ${VIASH_PAR_CHIMOUTJUNCTIONFORMAT+x} ]; then echo "int(r'${VIASH_PAR_CHIMOUTJUNCTIONFORMAT//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'quantMode': $( if [ ! -z ${VIASH_PAR_QUANTMODE+x} ]; then echo "r'${VIASH_PAR_QUANTMODE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'quantTranscriptomeBAMcompression': $( if [ ! -z ${VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION+x} ]; then echo "int(r'${VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'quantTranscriptomeBan': $( if [ ! -z ${VIASH_PAR_QUANTTRANSCRIPTOMEBAN+x} ]; then echo "r'${VIASH_PAR_QUANTTRANSCRIPTOMEBAN//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'twopassMode': $( if [ ! -z ${VIASH_PAR_TWOPASSMODE+x} ]; then echo "r'${VIASH_PAR_TWOPASSMODE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'twopass1readsN': $( if [ ! -z ${VIASH_PAR_TWOPASS1READSN+x} ]; then echo "int(r'${VIASH_PAR_TWOPASS1READSN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'waspOutputMode': $( if [ ! -z ${VIASH_PAR_WASPOUTPUTMODE+x} ]; then echo "r'${VIASH_PAR_WASPOUTPUTMODE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'soloType': $( if [ ! -z ${VIASH_PAR_SOLOTYPE+x} ]; then echo "r'${VIASH_PAR_SOLOTYPE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'soloCBwhitelist': $( if [ ! -z ${VIASH_PAR_SOLOCBWHITELIST+x} ]; then echo "r'${VIASH_PAR_SOLOCBWHITELIST//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'soloCBstart': $( if [ ! -z ${VIASH_PAR_SOLOCBSTART+x} ]; then echo "int(r'${VIASH_PAR_SOLOCBSTART//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'soloCBlen': $( if [ ! -z ${VIASH_PAR_SOLOCBLEN+x} ]; then echo "int(r'${VIASH_PAR_SOLOCBLEN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'soloUMIstart': $( if [ ! -z ${VIASH_PAR_SOLOUMISTART+x} ]; then echo "int(r'${VIASH_PAR_SOLOUMISTART//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'soloUMIlen': $( if [ ! -z ${VIASH_PAR_SOLOUMILEN+x} ]; then echo "int(r'${VIASH_PAR_SOLOUMILEN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'soloBarcodeReadLength': $( if [ ! -z ${VIASH_PAR_SOLOBARCODEREADLENGTH+x} ]; then echo "int(r'${VIASH_PAR_SOLOBARCODEREADLENGTH//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'soloBarcodeMate': $( if [ ! -z ${VIASH_PAR_SOLOBARCODEMATE+x} ]; then echo "int(r'${VIASH_PAR_SOLOBARCODEMATE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'soloCBposition': $( if [ ! -z ${VIASH_PAR_SOLOCBPOSITION+x} ]; then echo "r'${VIASH_PAR_SOLOCBPOSITION//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'soloUMIposition': $( if [ ! -z ${VIASH_PAR_SOLOUMIPOSITION+x} ]; then echo "r'${VIASH_PAR_SOLOUMIPOSITION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'soloAdapterSequence': $( if [ ! -z ${VIASH_PAR_SOLOADAPTERSEQUENCE+x} ]; then echo "r'${VIASH_PAR_SOLOADAPTERSEQUENCE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'soloAdapterMismatchesNmax': $( if [ ! -z ${VIASH_PAR_SOLOADAPTERMISMATCHESNMAX+x} ]; then echo "int(r'${VIASH_PAR_SOLOADAPTERMISMATCHESNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'soloCBmatchWLtype': $( if [ ! -z ${VIASH_PAR_SOLOCBMATCHWLTYPE+x} ]; then echo "r'${VIASH_PAR_SOLOCBMATCHWLTYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'soloInputSAMattrBarcodeSeq': $( if [ ! -z ${VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ+x} ]; then echo "r'${VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'soloInputSAMattrBarcodeQual': $( if [ ! -z ${VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL+x} ]; then echo "r'${VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'soloStrand': $( if [ ! -z ${VIASH_PAR_SOLOSTRAND+x} ]; then echo "r'${VIASH_PAR_SOLOSTRAND//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'soloFeatures': $( if [ ! -z ${VIASH_PAR_SOLOFEATURES+x} ]; then echo "r'${VIASH_PAR_SOLOFEATURES//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'soloMultiMappers': $( if [ ! -z ${VIASH_PAR_SOLOMULTIMAPPERS+x} ]; then echo "r'${VIASH_PAR_SOLOMULTIMAPPERS//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'soloUMIdedup': $( if [ ! -z ${VIASH_PAR_SOLOUMIDEDUP+x} ]; then echo "r'${VIASH_PAR_SOLOUMIDEDUP//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'soloUMIfiltering': $( if [ ! -z ${VIASH_PAR_SOLOUMIFILTERING+x} ]; then echo "r'${VIASH_PAR_SOLOUMIFILTERING//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'soloOutFileNames': $( if [ ! -z ${VIASH_PAR_SOLOOUTFILENAMES+x} ]; then echo "r'${VIASH_PAR_SOLOOUTFILENAMES//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'soloCellFilter': $( if [ ! -z ${VIASH_PAR_SOLOCELLFILTER+x} ]; then echo "r'${VIASH_PAR_SOLOCELLFILTER//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'soloOutFormatFeaturesGeneField3': $( if [ ! -z ${VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3+x} ]; then echo "r'${VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'soloCellReadStats': $( if [ ! -z ${VIASH_PAR_SOLOCELLREADSTATS+x} ]; then echo "r'${VIASH_PAR_SOLOCELLREADSTATS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -## VIASH END - -######################## -### Helper functions ### -######################## - -# regex for matching R[12] fastq(gz) files -# examples: -# - TSP10_Fat_MAT_SS2_B134171_B115063_Immune_A1_L003_R1.fastq.gz -# - tinygex_S1_L001_I1_001.fastq.gz -fastqgz_regex = r'(.+)_(R\\\\d+)(_\\\\d+)?\\\\.fastq(\\\\.gz)?' - -# helper function for cheching whether something is a gzip -def is_gz_file(path: Path) -> bool: - with open(path, 'rb') as file: - return file.read(2) == b'\\\\x1f\\\\x8b' - -# look for fastq files in a directory -def search_fastqs(path: Path) -> list[Path]: - if path.is_dir(): - print(f"Input '{path}' is a directory, traversing to see if we can detect any FASTQ files.", flush=True) - value_paths = [file for file in path.iterdir() if re.match(fastqgz_regex, file.name) ] - return value_paths - else: - return [path] - -# if {par_value} is a Path, extract it to a temp_dir_path and return the resulting path -def extract_if_need_be(par_value: Path, temp_dir_path: Path) -> Path: - - if par_value.is_file() and tarfile.is_tarfile(par_value): - # Remove two extensions (if they exist) - extaction_dir_name = Path(par_value.stem).stem - unpacked_path = temp_dir_path / extaction_dir_name - print(f' Tar detected; extracting {par_value} to {unpacked_path}', flush=True) - - with tarfile.open(par_value, 'r') as open_tar: - members = open_tar.getmembers() - root_dirs = [member - for member in members - if member.isdir() and member.name != '.' and '/' not in member.name] - # if there is only one root_dir (and there are files in that directory) - # strip that directory name from the destination folder - if len(root_dirs) == 1: - for mem in members: - mem.path = Path(*Path(mem.path).parts[1:]) - members_to_move = [mem for mem in members if mem.path != Path('.')] - open_tar.extractall(unpacked_path, members=members_to_move) - return unpacked_path - - elif par_value.is_file() and is_gz_file(par_value): - # Remove extension (if it exists) - extaction_file_name = Path(par_value.stem) - unpacked_path = temp_dir_path / extaction_file_name - print(f' Gzip detected; extracting {par_value} to {unpacked_path}', flush=True) - - with gzip.open(par_value, 'rb') as f_in: - with open(unpacked_path, 'wb') as f_out: - shutil.copyfileobj(f_in, f_out) - return unpacked_path - - else: - return par_value - -######################## -### Main code ### -######################## - -# rename keys and convert path strings to Path -# note: only list file arguments here. if non-file arguments also need to be renamed, -# the \\`processPar()\\` generator needs to be adapted -to_rename = {'input': 'readFilesIn', 'reference': 'genomeDir', 'output': 'outFileNamePrefix'} - -def process_par(orig_par, to_rename): - for key, value in orig_par.items(): - # rename the key in par based on the \\`to_rename\\` dict - if key in to_rename.keys(): - new_key = to_rename[key] - - # also turn value into a Path - if isinstance(value, list): - new_value = [Path(val) for val in value] - else: - new_value = Path(value) - else: - new_key = key - new_value = value - yield new_key, new_value -par = dict(process_par(par, to_rename)) - -# create output dir if need be -par["outFileNamePrefix"].mkdir(parents=True, exist_ok=True) - -with tempfile.TemporaryDirectory(prefix="star-", dir=meta["temp_dir"], ignore_cleanup_errors=True) as temp_dir: - print(">> Check whether input files are directories", flush=True) - new_read_files_in = [] - for path in par["readFilesIn"]: - new_read_files_in.extend(search_fastqs(path)) - par["readFilesIn"] = new_read_files_in - print("", flush=True) - - # checking for compressed files, ungzip files if need be - temp_dir_path = Path(temp_dir) - for par_name in ["genomeDir", "readFilesIn"]: - par_values = par[par_name] - if par_values: - # turn value into list - is_multiple = isinstance(par_values, list) - if not is_multiple: - par_values = [ par_values ] - - # output list - new_values = [] - for par_value in par_values: - print(f'>> Check compression of --{par_name} with value: {par_value}', flush=True) - new_value = extract_if_need_be(par_value, temp_dir_path) - new_values.append(new_value) - - # unlist if need be - if not is_multiple: - new_values = new_values[0] - - # replace value - par[par_name] = new_values - # end ungzipping - print("", flush=True) - - print("Grouping R1/R2 input files into pairs", flush=True) - input_grouped = {} - for path in par['readFilesIn']: - key = re.search(fastqgz_regex, path.name).group(2) - if key not in input_grouped: - input_grouped[key] = [] - input_grouped[key].append(str(path)) - par['readFilesIn'] = [ ','.join(val) for val in input_grouped.values() ] - print("", flush=True) - - print(">> Constructing command", flush=True) - par["runMode"] = "alignReads" - par["outTmpDir"] = temp_dir_path / "run" - if 'cpus' in meta and meta['cpus']: - par["runThreadN"] = meta["cpus"] - # make sure there is a trailing / - par["outFileNamePrefix"] = f"{par['outFileNamePrefix']}/" - - cmd_args = [ "STAR" ] - for name, value in par.items(): - if value is not None: - if isinstance(value, list): - cmd_args.extend(["--" + name] + [str(x) for x in value]) - else: - cmd_args.extend(["--" + name, str(value)]) - print("", flush=True) - - print(">> Running STAR with command:", flush=True) - print("+ " + ' '.join([str(x) for x in cmd_args]), flush=True) - print("", flush=True) - - subprocess.run( - cmd_args, - check=True - ) -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/mapping_star_align", - "tag" : "0.12.0" - }, - "label" : [ - "highmem", - "highcpu" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/mapping/star_align/nextflow.config b/target/nextflow/mapping/star_align/nextflow.config deleted file mode 100644 index 3709eda85c7..00000000000 --- a/target/nextflow/mapping/star_align/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'star_align' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Align fastq files using STAR.' - author = 'Angela Oliveira Pisco, Robrecht Cannoodt' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/mapping/star_align/nextflow_params.yaml b/target/nextflow/mapping/star_align/nextflow_params.yaml deleted file mode 100644 index 7c77e19eb4a..00000000000 --- a/target/nextflow/mapping/star_align/nextflow_params.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# Input/Output -input: # please fill in - example: ["mysample_S1_L001_R1_001.fastq.gz", "mysample_S1_L001_R2_001.fastq.gz"] -reference: # please fill in - example: "/path/to/reference" -# output: "$id.$key.output.output" - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/mapping/star_align/nextflow_schema.json b/target/nextflow/mapping/star_align/nextflow_schema.json deleted file mode 100644 index 161b59159b8..00000000000 --- a/target/nextflow/mapping/star_align/nextflow_schema.json +++ /dev/null @@ -1,64 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "star_align", - "description": "Align fastq files using STAR.", - "type": "object", - "definitions": { - "input/output" : { - "title": "Input/Output", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: List of `file`, required, example: `mysample_S1_L001_R1_001.fastq.gz;mysample_S1_L001_R2_001.fastq.gz`, multiple_sep: `\";\"`. The FASTQ files to be analyzed", - "help_text": "Type: List of `file`, required, example: `mysample_S1_L001_R1_001.fastq.gz;mysample_S1_L001_R2_001.fastq.gz`, multiple_sep: `\";\"`. The FASTQ files to be analyzed. Corresponds to the --readFilesIn argument in the STAR command." - }, - - "reference": { - "type": "string", - "description": "Type: `file`, required, example: `/path/to/reference`. Path to the reference built by star_build_reference", - "help_text": "Type: `file`, required, example: `/path/to/reference`. Path to the reference built by star_build_reference. Corresponds to the --genomeDir argument in the STAR command." - }, - - "output": { - "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.output`, example: `/path/to/foo`. Path to output directory", - "help_text": "Type: `file`, required, default: `$id.$key.output.output`, example: `/path/to/foo`. Path to output directory. Corresponds to the --outFileNamePrefix argument in the STAR command.", - "default": "$id.$key.output.output" - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/input/output" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/mapping/star_align/setup_logger.py b/target/nextflow/mapping/star_align/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/nextflow/mapping/star_align/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/nextflow/mapping/star_align_v273a/.config.vsh.yaml b/target/nextflow/mapping/star_align_v273a/.config.vsh.yaml deleted file mode 100644 index dcb92f67bbf..00000000000 --- a/target/nextflow/mapping/star_align_v273a/.config.vsh.yaml +++ /dev/null @@ -1,2535 +0,0 @@ -functionality: - name: "star_align_v273a" - namespace: "mapping" - version: "0.12.3" - authors: - - name: "Angela Oliveira Pisco" - roles: - - "author" - info: - role: "Contributor" - links: - github: "aopisco" - orcid: "0000-0003-0142-2355" - linkedin: "aopisco" - organizations: - - name: "Insitro" - href: "https://insitro.com" - role: "Director of Computational Biology" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - - name: "Robrecht Cannoodt" - roles: - - "author" - - "maintainer" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - argument_groups: - - name: "Input/Output" - arguments: - - type: "file" - name: "--input" - alternatives: - - "--readFilesIn" - description: "The FASTQ files to be analyzed. Corresponds to the --readFilesIn\ - \ in the STAR command." - info: null - example: - - "mysample_S1_L001_R1_001.fastq.gz" - - "mysample_S1_L001_R2_001.fastq.gz" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "file" - name: "--reference" - alternatives: - - "--genomeDir" - description: "Path to the reference built by star_build_reference. Corresponds\ - \ to the --genomeDir in the STAR command." - info: null - example: - - "/path/to/reference" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "--outFileNamePrefix" - description: "Path to output directory. Corresponds to the --outFileNamePrefix\ - \ in the STAR command." - info: null - example: - - "/path/to/foo" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Run Parameters" - arguments: - - type: "integer" - name: "--runRNGseed" - description: "random number generator seed." - info: null - example: - - 777 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Genome Parameters" - arguments: - - type: "string" - name: "--genomeLoad" - description: "mode of shared memory usage for the genome files. Only used with\ - \ --runMode alignReads.\n\n- LoadAndKeep ... load genome into shared and\ - \ keep it in memory after run\n- LoadAndRemove ... load genome into shared\ - \ but remove it after run\n- LoadAndExit ... load genome into shared memory\ - \ and exit, keeping the genome in memory for future runs\n- Remove \ - \ ... do not map anything, just remove loaded genome from memory\n- NoSharedMemory\ - \ ... do not use shared memory, each job will have its own private copy of\ - \ the genome" - info: null - example: - - "NoSharedMemory" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--genomeFastaFiles" - description: "path(s) to the fasta files with the genome sequences, separated\ - \ by spaces. These files should be plain text FASTA files, they *cannot* be\ - \ zipped.\n\nRequired for the genome generation (--runMode genomeGenerate).\ - \ Can also be used in the mapping (--runMode alignReads) to add extra (new)\ - \ sequences to the genome (e.g. spike-ins)." - info: null - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--genomeFileSizes" - description: "genome files exact sizes in bytes. Typically, this should not\ - \ be defined by the user." - info: null - example: - - 0 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--genomeTransformOutput" - description: "which output to transform back to original genome\n\n- SAM \ - \ ... SAM/BAM alignments\n- SJ ... splice junctions (SJ.out.tab)\n-\ - \ None ... no transformation of the output" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--genomeChrSetMitochondrial" - description: "names of the mitochondrial chromosomes. Presently only used for\ - \ STARsolo statistics output/" - info: null - example: - - "chrM" - - "M" - - "MT" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - name: "Splice Junctions Database" - arguments: - - type: "string" - name: "--sjdbFileChrStartEnd" - description: "path to the files with genomic coordinates (chr start \ - \ end strand) for the splice junction introns. Multiple files can be\ - \ supplied and will be concatenated." - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "file" - name: "--sjdbGTFfile" - description: "path to the GTF file with annotations" - info: null - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--sjdbGTFchrPrefix" - description: "prefix for chromosome names in a GTF file (e.g. 'chr' for using\ - \ ENSMEBL annotations with UCSC genomes)" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--sjdbGTFfeatureExon" - description: "feature type in GTF file to be used as exons for building transcripts" - info: null - example: - - "exon" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--sjdbGTFtagExonParentTranscript" - description: "GTF attribute name for parent transcript ID (default \"transcript_id\"\ - \ works for GTF files)" - info: null - example: - - "transcript_id" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--sjdbGTFtagExonParentGene" - description: "GTF attribute name for parent gene ID (default \"gene_id\" works\ - \ for GTF files)" - info: null - example: - - "gene_id" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--sjdbGTFtagExonParentGeneName" - description: "GTF attribute name for parent gene name" - info: null - example: - - "gene_name" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--sjdbGTFtagExonParentGeneType" - description: "GTF attribute name for parent gene type" - info: null - example: - - "gene_type" - - "gene_biotype" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--sjdbOverhang" - description: "length of the donor/acceptor sequence on each side of the junctions,\ - \ ideally = (mate_length - 1)" - info: null - example: - - 100 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--sjdbScore" - description: "extra alignment score for alignments that cross database junctions" - info: null - example: - - 2 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--sjdbInsertSave" - description: "which files to save when sjdb junctions are inserted on the fly\ - \ at the mapping step\n\n- Basic ... only small junction / transcript files\n\ - - All ... all files including big Genome, SA and SAindex - this will create\ - \ a complete genome directory" - info: null - example: - - "Basic" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Variation parameters" - arguments: - - type: "string" - name: "--varVCFfile" - description: "path to the VCF file that contains variation data. The 10th column\ - \ should contain the genotype information, e.g. 0/1" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Read Parameters" - arguments: - - type: "string" - name: "--readFilesType" - description: "format of input read files\n\n- Fastx ... FASTA or FASTQ\n\ - - SAM SE ... SAM or BAM single-end reads; for BAM use --readFilesCommand\ - \ samtools view\n- SAM PE ... SAM or BAM paired-end reads; for BAM use\ - \ --readFilesCommand samtools view" - info: null - example: - - "Fastx" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--readFilesSAMattrKeep" - description: "for --readFilesType SAM SE/PE, which SAM tags to keep in the output\ - \ BAM, e.g.: --readFilesSAMtagsKeep RG PL\n\n- All ... keep all tags\n\ - - None ... do not keep any tags" - info: null - example: - - "All" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "file" - name: "--readFilesManifest" - description: "path to the \"manifest\" file with the names of read files. The\ - \ manifest file should contain 3 tab-separated columns:\n\npaired-end reads:\ - \ read1_file_name $tab$ read2_file_name $tab$ read_group_line.\nsingle-end\ - \ reads: read1_file_name $tab$ - $tab$ read_group_line.\nSpaces,\ - \ but not tabs are allowed in file names.\nIf read_group_line does not start\ - \ with ID:, it can only contain one ID field, and ID: will be added to it.\n\ - If read_group_line starts with ID:, it can contain several fields separated\ - \ by $tab$, and all fields will be be copied verbatim into SAM @RG header\ - \ line." - info: null - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--readFilesPrefix" - description: "prefix for the read files names, i.e. it will be added in front\ - \ of the strings in --readFilesIn" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--readFilesCommand" - description: "command line to execute for each of the input file. This command\ - \ should generate FASTA or FASTQ text and send it to stdout\n\nFor example:\ - \ zcat - to uncompress .gz files, bzcat - to uncompress .bz2 files, etc." - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--readMapNumber" - description: "number of reads to map from the beginning of the file\n\n-1: map\ - \ all reads" - info: null - example: - - -1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--readMatesLengthsIn" - description: "Equal/NotEqual - lengths of names,sequences,qualities for both\ - \ mates are the same / not the same. NotEqual is safe in all situations." - info: null - example: - - "NotEqual" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--readNameSeparator" - description: "character(s) separating the part of the read names that will be\ - \ trimmed in output (read name after space is always trimmed)" - info: null - example: - - "/" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--readQualityScoreBase" - description: "number to be subtracted from the ASCII code to get Phred quality\ - \ score" - info: null - example: - - 33 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Read Clipping" - arguments: - - type: "string" - name: "--clipAdapterType" - description: "adapter clipping type\n\n- Hamming ... adapter clipping based\ - \ on Hamming distance, with the number of mismatches controlled by --clip5pAdapterMMp\n\ - - CellRanger4 ... 5p and 3p adapter clipping similar to CellRanger4. Utilizes\ - \ Opal package by Martin Sosic: https://github.com/Martinsos/opal\n- None\ - \ ... no adapter clipping, all other clip* parameters are disregarded" - info: null - example: - - "Hamming" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--clip3pNbases" - description: "number(s) of bases to clip from 3p of each mate. If one value\ - \ is given, it will be assumed the same for both mates." - info: null - example: - - 0 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--clip3pAdapterSeq" - description: "adapter sequences to clip from 3p of each mate. If one value\ - \ is given, it will be assumed the same for both mates.\n\n- polyA ... polyA\ - \ sequence with the length equal to read length" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "double" - name: "--clip3pAdapterMMp" - description: "max proportion of mismatches for 3p adapter clipping for each\ - \ mate. If one value is given, it will be assumed the same for both mates." - info: null - example: - - 0.1 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--clip3pAfterAdapterNbases" - description: "number of bases to clip from 3p of each mate after the adapter\ - \ clipping. If one value is given, it will be assumed the same for both mates." - info: null - example: - - 0 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--clip5pNbases" - description: "number(s) of bases to clip from 5p of each mate. If one value\ - \ is given, it will be assumed the same for both mates." - info: null - example: - - 0 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - name: "Limits" - arguments: - - type: "long" - name: "--limitGenomeGenerateRAM" - description: "maximum available RAM (bytes) for genome generation" - info: null - example: - - 31000000000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "long" - name: "--limitIObufferSize" - description: "max available buffers size (bytes) for input/output, per thread" - info: null - example: - - 30000000 - - 50000000 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "long" - name: "--limitOutSAMoneReadBytes" - description: "max size of the SAM record (bytes) for one read. Recommended value:\ - \ >(2*(LengthMate1+LengthMate2+100)*outFilterMultimapNmax" - info: null - example: - - 100000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--limitOutSJoneRead" - description: "max number of junctions for one read (including all multi-mappers)" - info: null - example: - - 1000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--limitOutSJcollapsed" - description: "max number of collapsed junctions" - info: null - example: - - 1000000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "long" - name: "--limitBAMsortRAM" - description: "maximum available RAM (bytes) for sorting BAM. If =0, it will\ - \ be set to the genome index size. 0 value can only be used with --genomeLoad\ - \ NoSharedMemory option." - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--limitSjdbInsertNsj" - description: "maximum number of junctions to be inserted to the genome on the\ - \ fly at the mapping stage, including those from annotations and those detected\ - \ in the 1st step of the 2-pass run" - info: null - example: - - 1000000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--limitNreadsSoft" - description: "soft limit on the number of reads" - info: null - example: - - -1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Output: general" - arguments: - - type: "string" - name: "--outTmpKeep" - description: "whether to keep the temporary files after STAR runs is finished\n\ - \n- None ... remove all temporary files\n- All ... keep all files" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outStd" - description: "which output will be directed to stdout (standard out)\n\n- Log\ - \ ... log messages\n- SAM ... alignments\ - \ in SAM format (which normally are output to Aligned.out.sam file), normal\ - \ standard output will go into Log.std.out\n- BAM_Unsorted ... alignments\ - \ in BAM format, unsorted. Requires --outSAMtype BAM Unsorted\n- BAM_SortedByCoordinate\ - \ ... alignments in BAM format, sorted by coordinate. Requires --outSAMtype\ - \ BAM SortedByCoordinate\n- BAM_Quant ... alignments to transcriptome\ - \ in BAM format, unsorted. Requires --quantMode TranscriptomeSAM" - info: null - example: - - "Log" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outReadsUnmapped" - description: "output of unmapped and partially mapped (i.e. mapped only one\ - \ mate of a paired end read) reads in separate file(s).\n\n- None ... no\ - \ output\n- Fastx ... output in separate fasta/fastq files, Unmapped.out.mate1/2" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outQSconversionAdd" - description: "add this number to the quality score (e.g. to convert from Illumina\ - \ to Sanger, use -31)" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outMultimapperOrder" - description: "order of multimapping alignments in the output files\n\n- Old_2.4\ - \ ... quasi-random order used before 2.5.0\n- Random \ - \ ... random order of alignments for each multi-mapper. Read mates (pairs)\ - \ are always adjacent, all alignment for each read stay together. This option\ - \ will become default in the future releases." - info: null - example: - - "Old_2.4" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Output: SAM and BAM" - arguments: - - type: "string" - name: "--outSAMtype" - description: "type of SAM/BAM output\n\n1st word:\n- BAM ... output BAM without\ - \ sorting\n- SAM ... output SAM without sorting\n- None ... no SAM/BAM output\n\ - 2nd, 3rd:\n- Unsorted ... standard unsorted\n- SortedByCoordinate\ - \ ... sorted by coordinate. This option will allocate extra memory for sorting\ - \ which can be specified by --limitBAMsortRAM." - info: null - example: - - "SAM" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--outSAMmode" - description: "mode of SAM output\n\n- None ... no SAM output\n- Full ... full\ - \ SAM output\n- NoQS ... full SAM but without quality scores" - info: null - example: - - "Full" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outSAMstrandField" - description: "Cufflinks-like strand field flag\n\n- None ... not used\n\ - - intronMotif ... strand derived from the intron motif. This option changes\ - \ the output alignments: reads with inconsistent and/or non-canonical introns\ - \ are filtered out." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outSAMattributes" - description: "a string of desired SAM attributes, in the order desired for the\ - \ output SAM. Tags can be listed in any combination/order.\n\n***Presets:\n\ - - None ... no attributes\n- Standard ... NH HI AS nM\n- All \ - \ ... NH HI AS nM NM MD jM jI MC ch\n***Alignment:\n- NH ...\ - \ number of loci the reads maps to: =1 for unique mappers, >1 for multimappers.\ - \ Standard SAM tag.\n- HI ... multiple alignment index, starts with\ - \ --outSAMattrIHstart (=1 by default). Standard SAM tag.\n- AS ...\ - \ local alignment score, +1/-1 for matches/mismateches, score* penalties for\ - \ indels and gaps. For PE reads, total score for two mates. Stadnard SAM tag.\n\ - - nM ... number of mismatches. For PE reads, sum over two mates.\n\ - - NM ... edit distance to the reference (number of mismatched + inserted\ - \ + deleted bases) for each mate. Standard SAM tag.\n- MD ... string\ - \ encoding mismatched and deleted reference bases (see standard SAM specifications).\ - \ Standard SAM tag.\n- jM ... intron motifs for all junctions (i.e.\ - \ N in CIGAR): 0: non-canonical; 1: GT/AG, 2: CT/AC, 3: GC/AG, 4: CT/GC, 5:\ - \ AT/AC, 6: GT/AT. If splice junctions database is used, and a junction is\ - \ annotated, 20 is added to its motif value.\n- jI ... start and\ - \ end of introns for all junctions (1-based).\n- XS ... alignment\ - \ strand according to --outSAMstrandField.\n- MC ... mate's CIGAR\ - \ string. Standard SAM tag.\n- ch ... marks all segment of all chimeric\ - \ alingments for --chimOutType WithinBAM output.\n- cN ... number\ - \ of bases clipped from the read ends: 5' and 3'\n***Variation:\n- vA \ - \ ... variant allele\n- vG ... genomic coordinate of the variant\ - \ overlapped by the read.\n- vW ... 1 - alignment passes WASP filtering;\ - \ 2,3,4,5,6,7 - alignment does not pass WASP filtering. Requires --waspOutputMode\ - \ SAMtag.\n***STARsolo:\n- CR CY UR UY ... sequences and quality scores of\ - \ cell barcodes and UMIs for the solo* demultiplexing.\n- GX GN ...\ - \ gene ID and gene name for unique-gene reads.\n- gx gn ... gene IDs\ - \ and gene names for unique- and multi-gene reads.\n- CB UB ... error-corrected\ - \ cell barcodes and UMIs for solo* demultiplexing. Requires --outSAMtype BAM\ - \ SortedByCoordinate.\n- sM ... assessment of CB and UMI.\n- sS \ - \ ... sequence of the entire barcode (CB,UMI,adapter).\n- sQ \ - \ ... quality of the entire barcode.\n***Unsupported/undocumented:\n-\ - \ ha ... haplotype (1/2) when mapping to the diploid genome. Requires\ - \ genome generated with --genomeTransformType Diploid .\n- rB ...\ - \ alignment block read/genomic coordinates.\n- vR ... read coordinate\ - \ of the variant." - info: null - example: - - "Standard" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--outSAMattrIHstart" - description: "start value for the IH attribute. 0 may be required by some downstream\ - \ software, such as Cufflinks or StringTie." - info: null - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outSAMunmapped" - description: "output of unmapped reads in the SAM format\n\n1st word:\n- None\ - \ ... no output\n- Within ... output unmapped reads within the main SAM\ - \ file (i.e. Aligned.out.sam)\n2nd word:\n- KeepPairs ... record unmapped\ - \ mate for each alignment, and, in case of unsorted output, keep it adjacent\ - \ to its mapped mate. Only affects multi-mapping reads." - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--outSAMorder" - description: "type of sorting for the SAM output\n\nPaired: one mate after the\ - \ other for all paired alignments\nPairedKeepInputOrder: one mate after the\ - \ other for all paired alignments, the order is kept the same as in the input\ - \ FASTQ files" - info: null - example: - - "Paired" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outSAMprimaryFlag" - description: "which alignments are considered primary - all others will be marked\ - \ with 0x100 bit in the FLAG\n\n- OneBestScore ... only one alignment with\ - \ the best score is primary\n- AllBestScore ... all alignments with the best\ - \ score are primary" - info: null - example: - - "OneBestScore" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outSAMreadID" - description: "read ID record type\n\n- Standard ... first word (until space)\ - \ from the FASTx read ID line, removing /1,/2 from the end\n- Number ...\ - \ read number (index) in the FASTx file" - info: null - example: - - "Standard" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outSAMmapqUnique" - description: "0 to 255: the MAPQ value for unique mappers" - info: null - example: - - 255 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outSAMflagOR" - description: "0 to 65535: sam FLAG will be bitwise OR'd with this value, i.e.\ - \ FLAG=FLAG | outSAMflagOR. This is applied after all flags have been set\ - \ by STAR, and after outSAMflagAND. Can be used to set specific bits that\ - \ are not set otherwise." - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outSAMflagAND" - description: "0 to 65535: sam FLAG will be bitwise AND'd with this value, i.e.\ - \ FLAG=FLAG & outSAMflagOR. This is applied after all flags have been set\ - \ by STAR, but before outSAMflagOR. Can be used to unset specific bits that\ - \ are not set otherwise." - info: null - example: - - 65535 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outSAMattrRGline" - description: "SAM/BAM read group line. The first word contains the read group\ - \ identifier and must start with \"ID:\", e.g. --outSAMattrRGline ID:xxx CN:yy\ - \ \"DS:z z z\".\n\nxxx will be added as RG tag to each output alignment. Any\ - \ spaces in the tag values have to be double quoted.\nComma separated RG lines\ - \ correspons to different (comma separated) input files in --readFilesIn.\ - \ Commas have to be surrounded by spaces, e.g.\n--outSAMattrRGline ID:xxx\ - \ , ID:zzz \"DS:z z\" , ID:yyy DS:yyyy" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--outSAMheaderHD" - description: "@HD (header) line of the SAM header" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--outSAMheaderPG" - description: "extra @PG (software) line of the SAM header (in addition to STAR)" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--outSAMheaderCommentFile" - description: "path to the file with @CO (comment) lines of the SAM header" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outSAMfilter" - description: "filter the output into main SAM/BAM files\n\n- KeepOnlyAddedReferences\ - \ ... only keep the reads for which all alignments are to the extra reference\ - \ sequences added with --genomeFastaFiles at the mapping stage.\n- KeepAllAddedReferences\ - \ ... keep all alignments to the extra reference sequences added with --genomeFastaFiles\ - \ at the mapping stage." - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--outSAMmultNmax" - description: "max number of multiple alignments for a read that will be output\ - \ to the SAM/BAM files. Note that if this value is not equal to -1, the top\ - \ scoring alignment will be output first\n\n- -1 ... all alignments (up to\ - \ --outFilterMultimapNmax) will be output" - info: null - example: - - -1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outSAMtlen" - description: "calculation method for the TLEN field in the SAM/BAM files\n\n\ - - 1 ... leftmost base of the (+)strand mate to rightmost base of the (-)mate.\ - \ (+)sign for the (+)strand mate\n- 2 ... leftmost base of any mate to rightmost\ - \ base of any mate. (+)sign for the mate with the leftmost base. This is different\ - \ from 1 for overlapping mates with protruding ends" - info: null - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outBAMcompression" - description: "-1 to 10 BAM compression level, -1=default compression (6?),\ - \ 0=no compression, 10=maximum compression" - info: null - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outBAMsortingThreadN" - description: ">=0: number of threads for BAM sorting. 0 will default to min(6,--runThreadN)." - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outBAMsortingBinsN" - description: ">0: number of genome bins for coordinate-sorting" - info: null - example: - - 50 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "BAM processing" - arguments: - - type: "string" - name: "--bamRemoveDuplicatesType" - description: "mark duplicates in the BAM file, for now only works with (i) sorted\ - \ BAM fed with inputBAMfile, and (ii) for paired-end alignments only\n\n-\ - \ - ... no duplicate removal/marking\n- UniqueIdentical\ - \ ... mark all multimappers, and duplicate unique mappers. The coordinates,\ - \ FLAG, CIGAR must be identical\n- UniqueIdenticalNotMulti ... mark duplicate\ - \ unique mappers but not multimappers." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--bamRemoveDuplicatesMate2basesN" - description: "number of bases from the 5' of mate 2 to use in collapsing (e.g.\ - \ for RAMPAGE)" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Output Wiggle" - arguments: - - type: "string" - name: "--outWigType" - description: "type of signal output, e.g. \"bedGraph\" OR \"bedGraph read1_5p\"\ - . Requires sorted BAM: --outSAMtype BAM SortedByCoordinate .\n\n1st word:\n\ - - None ... no signal output\n- bedGraph ... bedGraph format\n- wiggle\ - \ ... wiggle format\n2nd word:\n- read1_5p ... signal from only 5' of\ - \ the 1st read, useful for CAGE/RAMPAGE etc\n- read2 ... signal from\ - \ only 2nd read" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--outWigStrand" - description: "strandedness of wiggle/bedGraph output\n\n- Stranded ... separate\ - \ strands, str1 and str2\n- Unstranded ... collapsed strands" - info: null - example: - - "Stranded" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outWigReferencesPrefix" - description: "prefix matching reference names to include in the output wiggle\ - \ file, e.g. \"chr\", default \"-\" - include all references" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outWigNorm" - description: "type of normalization for the signal\n\n- RPM ... reads per\ - \ million of mapped reads\n- None ... no normalization, \"raw\" counts" - info: null - example: - - "RPM" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Output Filtering" - arguments: - - type: "string" - name: "--outFilterType" - description: "type of filtering\n\n- Normal ... standard filtering using only\ - \ current alignment\n- BySJout ... keep only those reads that contain junctions\ - \ that passed filtering into SJ.out.tab" - info: null - example: - - "Normal" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outFilterMultimapScoreRange" - description: "the score range below the maximum score for multimapping alignments" - info: null - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outFilterMultimapNmax" - description: "maximum number of loci the read is allowed to map to. Alignments\ - \ (all of them) will be output only if the read maps to no more loci than\ - \ this value.\n\nOtherwise no alignments will be output, and the read will\ - \ be counted as \"mapped to too many loci\" in the Log.final.out ." - info: null - example: - - 10 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outFilterMismatchNmax" - description: "alignment will be output only if it has no more mismatches than\ - \ this value." - info: null - example: - - 10 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--outFilterMismatchNoverLmax" - description: "alignment will be output only if its ratio of mismatches to *mapped*\ - \ length is less than or equal to this value." - info: null - example: - - 0.3 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--outFilterMismatchNoverReadLmax" - description: "alignment will be output only if its ratio of mismatches to *read*\ - \ length is less than or equal to this value." - info: null - example: - - 1.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outFilterScoreMin" - description: "alignment will be output only if its score is higher than or equal\ - \ to this value." - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--outFilterScoreMinOverLread" - description: "same as outFilterScoreMin, but normalized to read length (sum\ - \ of mates' lengths for paired-end reads)" - info: null - example: - - 0.66 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outFilterMatchNmin" - description: "alignment will be output only if the number of matched bases is\ - \ higher than or equal to this value." - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--outFilterMatchNminOverLread" - description: "sam as outFilterMatchNmin, but normalized to the read length (sum\ - \ of mates' lengths for paired-end reads)." - info: null - example: - - 0.66 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outFilterIntronMotifs" - description: "filter alignment using their motifs\n\n- None \ - \ ... no filtering\n- RemoveNoncanonical ... filter\ - \ out alignments that contain non-canonical junctions\n- RemoveNoncanonicalUnannotated\ - \ ... filter out alignments that contain non-canonical unannotated junctions\ - \ when using annotated splice junctions database. The annotated non-canonical\ - \ junctions will be kept." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--outFilterIntronStrands" - description: "filter alignments\n\n- RemoveInconsistentStrands ... remove\ - \ alignments that have junctions with inconsistent strands\n- None \ - \ ... no filtering" - info: null - example: - - "RemoveInconsistentStrands" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Output splice junctions (SJ.out.tab)" - arguments: - - type: "string" - name: "--outSJtype" - description: "type of splice junction output\n\n- Standard ... standard SJ.out.tab\ - \ output\n- None ... no splice junction output" - info: null - example: - - "Standard" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Output Filtering: Splice Junctions" - arguments: - - type: "string" - name: "--outSJfilterReads" - description: "which reads to consider for collapsed splice junctions output\n\ - \n- All ... all reads, unique- and multi-mappers\n- Unique ... uniquely\ - \ mapping reads only" - info: null - example: - - "All" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--outSJfilterOverhangMin" - description: "minimum overhang length for splice junctions on both sides for:\ - \ (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC\ - \ motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\n\ - does not apply to annotated junctions" - info: null - example: - - 30 - - 12 - - 12 - - 12 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--outSJfilterCountUniqueMin" - description: "minimum uniquely mapping read count per junction for: (1) non-canonical\ - \ motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC\ - \ and GT/AT motif. -1 means no output for that motif\n\nJunctions are output\ - \ if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin conditions\ - \ are satisfied\ndoes not apply to annotated junctions" - info: null - example: - - 3 - - 1 - - 1 - - 1 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--outSJfilterCountTotalMin" - description: "minimum total (multi-mapping+unique) read count per junction for:\ - \ (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC\ - \ motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\n\ - Junctions are output if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin\ - \ conditions are satisfied\ndoes not apply to annotated junctions" - info: null - example: - - 3 - - 1 - - 1 - - 1 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--outSJfilterDistToOtherSJmin" - description: "minimum allowed distance to other junctions' donor/acceptor\n\n\ - does not apply to annotated junctions" - info: null - example: - - 10 - - 0 - - 5 - - 10 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--outSJfilterIntronMaxVsReadN" - description: "maximum gap allowed for junctions supported by 1,2,3,,,N reads\n\ - \ni.e. by default junctions supported by 1 read can have gaps <=50000b, by\ - \ 2 reads: <=100000b, by 3 reads: <=200000. by >=4 reads any gap <=alignIntronMax\n\ - does not apply to annotated junctions" - info: null - example: - - 50000 - - 100000 - - 200000 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - name: "Scoring" - arguments: - - type: "integer" - name: "--scoreGap" - description: "splice junction penalty (independent on intron motif)" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--scoreGapNoncan" - description: "non-canonical junction penalty (in addition to scoreGap)" - info: null - example: - - -8 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--scoreGapGCAG" - description: "GC/AG and CT/GC junction penalty (in addition to scoreGap)" - info: null - example: - - -4 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--scoreGapATAC" - description: "AT/AC and GT/AT junction penalty (in addition to scoreGap)" - info: null - example: - - -8 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--scoreGenomicLengthLog2scale" - description: "extra score logarithmically scaled with genomic length of the\ - \ alignment: scoreGenomicLengthLog2scale*log2(genomicLength)" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--scoreDelOpen" - description: "deletion open penalty" - info: null - example: - - -2 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--scoreDelBase" - description: "deletion extension penalty per base (in addition to scoreDelOpen)" - info: null - example: - - -2 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--scoreInsOpen" - description: "insertion open penalty" - info: null - example: - - -2 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--scoreInsBase" - description: "insertion extension penalty per base (in addition to scoreInsOpen)" - info: null - example: - - -2 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--scoreStitchSJshift" - description: "maximum score reduction while searching for SJ boundaries in the\ - \ stitching step" - info: null - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Alignments and Seeding" - arguments: - - type: "integer" - name: "--seedSearchStartLmax" - description: "defines the search start point through the read - the read is\ - \ split into pieces no longer than this value" - info: null - example: - - 50 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--seedSearchStartLmaxOverLread" - description: "seedSearchStartLmax normalized to read length (sum of mates' lengths\ - \ for paired-end reads)" - info: null - example: - - 1.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--seedSearchLmax" - description: "defines the maximum length of the seeds, if =0 seed length is\ - \ not limited" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--seedMultimapNmax" - description: "only pieces that map fewer than this value are utilized in the\ - \ stitching procedure" - info: null - example: - - 10000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--seedPerReadNmax" - description: "max number of seeds per read" - info: null - example: - - 1000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--seedPerWindowNmax" - description: "max number of seeds per window" - info: null - example: - - 50 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--seedNoneLociPerWindow" - description: "max number of one seed loci per window" - info: null - example: - - 10 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--seedSplitMin" - description: "min length of the seed sequences split by Ns or mate gap" - info: null - example: - - 12 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--seedMapMin" - description: "min length of seeds to be mapped" - info: null - example: - - 5 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--alignIntronMin" - description: "minimum intron size, genomic gap is considered intron if its length>=alignIntronMin,\ - \ otherwise it is considered Deletion" - info: null - example: - - 21 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--alignIntronMax" - description: "maximum intron size, if 0, max intron size will be determined\ - \ by (2^winBinNbits)*winAnchorDistNbins" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--alignMatesGapMax" - description: "maximum gap between two mates, if 0, max intron gap will be determined\ - \ by (2^winBinNbits)*winAnchorDistNbins" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--alignSJoverhangMin" - description: "minimum overhang (i.e. block size) for spliced alignments" - info: null - example: - - 5 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--alignSJstitchMismatchNmax" - description: "maximum number of mismatches for stitching of the splice junctions\ - \ (-1: no limit).\n\n(1) non-canonical motifs, (2) GT/AG and CT/AC motif,\ - \ (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif." - info: null - example: - - 0 - - -1 - - 0 - - 0 - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--alignSJDBoverhangMin" - description: "minimum overhang (i.e. block size) for annotated (sjdb) spliced\ - \ alignments" - info: null - example: - - 3 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--alignSplicedMateMapLmin" - description: "minimum mapped length for a read mate that is spliced" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--alignSplicedMateMapLminOverLmate" - description: "alignSplicedMateMapLmin normalized to mate length" - info: null - example: - - 0.66 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--alignWindowsPerReadNmax" - description: "max number of windows per read" - info: null - example: - - 10000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--alignTranscriptsPerWindowNmax" - description: "max number of transcripts per window" - info: null - example: - - 100 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--alignTranscriptsPerReadNmax" - description: "max number of different alignments per read to consider" - info: null - example: - - 10000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--alignEndsType" - description: "type of read ends alignment\n\n- Local ... standard\ - \ local alignment with soft-clipping allowed\n- EndToEnd ... force\ - \ end-to-end read alignment, do not soft-clip\n- Extend5pOfRead1 ... fully\ - \ extend only the 5p of the read1, all other ends: local alignment\n- Extend5pOfReads12\ - \ ... fully extend only the 5p of the both read1 and read2, all other ends:\ - \ local alignment" - info: null - example: - - "Local" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--alignEndsProtrude" - description: "allow protrusion of alignment ends, i.e. start (end) of the +strand\ - \ mate downstream of the start (end) of the -strand mate\n\n1st word: int:\ - \ maximum number of protrusion bases allowed\n2nd word: string:\n- \ - \ ConcordantPair ... report alignments with non-zero protrusion\ - \ as concordant pairs\n- DiscordantPair ... report alignments\ - \ with non-zero protrusion as discordant pairs" - info: null - example: - - "0 ConcordantPair" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--alignSoftClipAtReferenceEnds" - description: "allow the soft-clipping of the alignments past the end of the\ - \ chromosomes\n\n- Yes ... allow\n- No ... prohibit, useful for compatibility\ - \ with Cufflinks" - info: null - example: - - "Yes" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--alignInsertionFlush" - description: "how to flush ambiguous insertion positions\n\n- None ... insertions\ - \ are not flushed\n- Right ... insertions are flushed to the right" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Paired-End reads" - arguments: - - type: "integer" - name: "--peOverlapNbasesMin" - description: "minimum number of overlapping bases to trigger mates merging and\ - \ realignment. Specify >0 value to switch on the \"merginf of overlapping\ - \ mates\" algorithm." - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--peOverlapMMp" - description: "maximum proportion of mismatched bases in the overlap area" - info: null - example: - - 0.01 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Windows, Anchors, Binning" - arguments: - - type: "integer" - name: "--winAnchorMultimapNmax" - description: "max number of loci anchors are allowed to map to" - info: null - example: - - 50 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--winBinNbits" - description: "=log2(winBin), where winBin is the size of the bin for the windows/clustering,\ - \ each window will occupy an integer number of bins." - info: null - example: - - 16 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--winAnchorDistNbins" - description: "max number of bins between two anchors that allows aggregation\ - \ of anchors into one window" - info: null - example: - - 9 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--winFlankNbins" - description: "log2(winFlank), where win Flank is the size of the left and right\ - \ flanking regions for each window" - info: null - example: - - 4 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--winReadCoverageRelativeMin" - description: "minimum relative coverage of the read sequence by the seeds in\ - \ a window, for STARlong algorithm only." - info: null - example: - - 0.5 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--winReadCoverageBasesMin" - description: "minimum number of bases covered by the seeds in a window , for\ - \ STARlong algorithm only." - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Chimeric Alignments" - arguments: - - type: "string" - name: "--chimOutType" - description: "type of chimeric output\n\n- Junctions ... Chimeric.out.junction\n\ - - SeparateSAMold ... output old SAM into separate Chimeric.out.sam file\n\ - - WithinBAM ... output into main aligned BAM files (Aligned.*.bam)\n\ - - WithinBAM HardClip ... (default) hard-clipping in the CIGAR for supplemental\ - \ chimeric alignments (default if no 2nd word is present)\n- WithinBAM SoftClip\ - \ ... soft-clipping in the CIGAR for supplemental chimeric alignments" - info: null - example: - - "Junctions" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--chimSegmentMin" - description: "minimum length of chimeric segment length, if ==0, no chimeric\ - \ output" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimScoreMin" - description: "minimum total (summed) score of the chimeric segments" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimScoreDropMax" - description: "max drop (difference) of chimeric score (the sum of scores of\ - \ all chimeric segments) from the read length" - info: null - example: - - 20 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimScoreSeparation" - description: "minimum difference (separation) between the best chimeric score\ - \ and the next one" - info: null - example: - - 10 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimScoreJunctionNonGTAG" - description: "penalty for a non-GT/AG chimeric junction" - info: null - example: - - -1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimJunctionOverhangMin" - description: "minimum overhang for a chimeric junction" - info: null - example: - - 20 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimSegmentReadGapMax" - description: "maximum gap in the read sequence between chimeric segments" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--chimFilter" - description: "different filters for chimeric alignments\n\n- None ... no filtering\n\ - - banGenomicN ... Ns are not allowed in the genome sequence around the chimeric\ - \ junction" - info: null - example: - - "banGenomicN" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--chimMainSegmentMultNmax" - description: "maximum number of multi-alignments for the main chimeric segment.\ - \ =1 will prohibit multimapping main segments." - info: null - example: - - 10 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimMultimapNmax" - description: "maximum number of chimeric multi-alignments\n\n- 0 ... use the\ - \ old scheme for chimeric detection which only considered unique alignments" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimMultimapScoreRange" - description: "the score range for multi-mapping chimeras below the best chimeric\ - \ score. Only works with --chimMultimapNmax > 1" - info: null - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimNonchimScoreDropMin" - description: "to trigger chimeric detection, the drop in the best non-chimeric\ - \ alignment score with respect to the read length has to be greater than this\ - \ value" - info: null - example: - - 20 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--chimOutJunctionFormat" - description: "formatting type for the Chimeric.out.junction file\n\n- 0 ...\ - \ no comment lines/headers\n- 1 ... comment lines at the end of the file:\ - \ command line and Nreads: total, unique/multi-mapping" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Quantification of Annotations" - arguments: - - type: "string" - name: "--quantMode" - description: "types of quantification requested\n\n- - ... none\n\ - - TranscriptomeSAM ... output SAM/BAM alignments to transcriptome into a separate\ - \ file\n- GeneCounts ... count reads per gene" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--quantTranscriptomeBAMcompression" - description: "-2 to 10 transcriptome BAM compression level\n\n- -2 ... no\ - \ BAM output\n- -1 ... default compression (6?)\n- 0 ... no compression\n\ - - 10 ... maximum compression" - info: null - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--quantTranscriptomeBan" - description: "prohibit various alignment type\n\n- IndelSoftclipSingleend ...\ - \ prohibit indels, soft clipping and single-end alignments - compatible with\ - \ RSEM\n- Singleend ... prohibit single-end alignments" - info: null - example: - - "IndelSoftclipSingleend" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "2-pass Mapping" - arguments: - - type: "string" - name: "--twopassMode" - description: "2-pass mapping mode.\n\n- None ... 1-pass mapping\n- Basic\ - \ ... basic 2-pass mapping, with all 1st pass junctions inserted into\ - \ the genome indices on the fly" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--twopass1readsN" - description: "number of reads to process for the 1st step. Use very large number\ - \ (or default -1) to map all reads in the first step." - info: null - example: - - -1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "WASP parameters" - arguments: - - type: "string" - name: "--waspOutputMode" - description: "WASP allele-specific output type. This is re-implementation of\ - \ the original WASP mappability filtering by Bryce van de Geijn, Graham McVicker,\ - \ Yoav Gilad & Jonathan K Pritchard. Please cite the original WASP paper:\ - \ Nature Methods 12, 1061-1063 (2015), https://www.nature.com/articles/nmeth.3582\ - \ .\n\n- SAMtag ... add WASP tags to the alignments that pass WASP filtering" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "STARsolo (single cell RNA-seq) parameters" - arguments: - - type: "string" - name: "--soloType" - description: "type of single-cell RNA-seq\n\n- CB_UMI_Simple ... (a.k.a. Droplet)\ - \ one UMI and one Cell Barcode of fixed length in read2, e.g. Drop-seq and\ - \ 10X Chromium.\n- CB_UMI_Complex ... multiple Cell Barcodes of varying length,\ - \ one UMI of fixed length and one adapter sequence of fixed length are allowed\ - \ in read2 only (e.g. inDrop, ddSeq).\n- CB_samTagOut ... output Cell Barcode\ - \ as CR and/or CB SAm tag. No UMI counting. --readFilesIn cDNA_read1 [cDNA_read2\ - \ if paired-end] CellBarcode_read . Requires --outSAMtype BAM Unsorted [and/or\ - \ SortedByCoordinate]\n- SmartSeq ... Smart-seq: each cell in a separate\ - \ FASTQ (paired- or single-end), barcodes are corresponding read-groups, no\ - \ UMI sequences, alignments deduplicated according to alignment start and\ - \ end (after extending soft-clipped bases)" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloCBwhitelist" - description: "file(s) with whitelist(s) of cell barcodes. Only --soloType CB_UMI_Complex\ - \ allows more than one whitelist file.\n\n- None ... no whitelist:\ - \ all cell barcodes are allowed" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "integer" - name: "--soloCBstart" - description: "cell barcode start base" - info: null - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--soloCBlen" - description: "cell barcode length" - info: null - example: - - 16 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--soloUMIstart" - description: "UMI start base" - info: null - example: - - 17 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--soloUMIlen" - description: "UMI length" - info: null - example: - - 10 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--soloBarcodeReadLength" - description: "length of the barcode read\n\n- 1 ... equal to sum of soloCBlen+soloUMIlen\n\ - - 0 ... not defined, do not check" - info: null - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--soloBarcodeMate" - description: "identifies which read mate contains the barcode (CB+UMI) sequence\n\ - \n- 0 ... barcode sequence is on separate read, which should always be the\ - \ last file in the --readFilesIn listed\n- 1 ... barcode sequence is a part\ - \ of mate 1\n- 2 ... barcode sequence is a part of mate 2" - info: null - example: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--soloCBposition" - description: "position of Cell Barcode(s) on the barcode read.\n\nPresently\ - \ only works with --soloType CB_UMI_Complex, and barcodes are assumed to be\ - \ on Read2.\nFormat for each barcode: startAnchor_startPosition_endAnchor_endPosition\n\ - start(end)Anchor defines the Anchor Base for the CB: 0: read start; 1: read\ - \ end; 2: adapter start; 3: adapter end\nstart(end)Position is the 0-based\ - \ position with of the CB start(end) with respect to the Anchor Base\nString\ - \ for different barcodes are separated by space.\nExample: inDrop (Zilionis\ - \ et al, Nat. Protocols, 2017):\n--soloCBposition 0_0_2_-1 3_1_3_8" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloUMIposition" - description: "position of the UMI on the barcode read, same as soloCBposition\n\ - \nExample: inDrop (Zilionis et al, Nat. Protocols, 2017):\n--soloCBposition\ - \ 3_9_3_14" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--soloAdapterSequence" - description: "adapter sequence to anchor barcodes. Only one adapter sequence\ - \ is allowed." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--soloAdapterMismatchesNmax" - description: "maximum number of mismatches allowed in adapter sequence." - info: null - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--soloCBmatchWLtype" - description: "matching the Cell Barcodes to the WhiteList\n\n- Exact \ - \ ... only exact matches allowed\n- 1MM \ - \ ... only one match in whitelist with 1 mismatched base allowed.\ - \ Allowed CBs have to have at least one read with exact match.\n- 1MM_multi\ - \ ... multiple matches in whitelist with 1 mismatched\ - \ base allowed, posterior probability calculation is used choose one of the\ - \ matches.\nAllowed CBs have to have at least one read with exact match. This\ - \ option matches best with CellRanger 2.2.0\n- 1MM_multi_pseudocounts \ - \ ... same as 1MM_Multi, but pseudocounts of 1 are added to all whitelist\ - \ barcodes.\n- 1MM_multi_Nbase_pseudocounts ... same as 1MM_multi_pseudocounts,\ - \ multimatching to WL is allowed for CBs with N-bases. This option matches\ - \ best with CellRanger >= 3.0.0\n- EditDist_2 ... allow\ - \ up to edit distance of 3 fpr each of the barcodes. May include one deletion\ - \ + one insertion. Only works with --soloType CB_UMI_Complex. Matches to multiple\ - \ passlist barcdoes are not allowed. Similar to ParseBio Split-seq pipeline." - info: null - example: - - "1MM_multi" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--soloInputSAMattrBarcodeSeq" - description: "when inputting reads from a SAM file (--readsFileType SAM SE/PE),\ - \ these SAM attributes mark the barcode sequence (in proper order).\n\nFor\ - \ instance, for 10X CellRanger or STARsolo BAMs, use --soloInputSAMattrBarcodeSeq\ - \ CR UR .\nThis parameter is required when running STARsolo with input from\ - \ SAM." - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloInputSAMattrBarcodeQual" - description: "when inputting reads from a SAM file (--readsFileType SAM SE/PE),\ - \ these SAM attributes mark the barcode qualities (in proper order).\n\nFor\ - \ instance, for 10X CellRanger or STARsolo BAMs, use --soloInputSAMattrBarcodeQual\ - \ CY UY .\nIf this parameter is '-' (default), the quality 'H' will be assigned\ - \ to all bases." - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloStrand" - description: "strandedness of the solo libraries:\n\n- Unstranded ... no strand\ - \ information\n- Forward ... read strand same as the original RNA molecule\n\ - - Reverse ... read strand opposite to the original RNA molecule" - info: null - example: - - "Forward" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--soloFeatures" - description: "genomic features for which the UMI counts per Cell Barcode are\ - \ collected\n\n- Gene ... genes: reads match the gene transcript\n\ - - SJ ... splice junctions: reported in SJ.out.tab\n- GeneFull\ - \ ... full gene (pre-mRNA): count all reads overlapping genes' exons\ - \ and introns\n- GeneFull_ExonOverIntron ... full gene (pre-mRNA): count all\ - \ reads overlapping genes' exons and introns: prioritize 100% overlap with\ - \ exons\n- GeneFull_Ex50pAS ... full gene (pre-RNA): count all reads\ - \ overlapping genes' exons and introns: prioritize >50% overlap with exons.\ - \ Do not count reads with 100% exonic overlap in the antisense direction." - info: null - example: - - "Gene" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloMultiMappers" - description: "counting method for reads mapping to multiple genes\n\n- Unique\ - \ ... count only reads that map to unique genes\n- Uniform ... uniformly\ - \ distribute multi-genic UMIs to all genes\n- Rescue ... distribute UMIs\ - \ proportionally to unique+uniform counts (~ first iteration of EM)\n- PropUnique\ - \ ... distribute UMIs proportionally to unique mappers, if present, and uniformly\ - \ if not.\n- EM ... multi-gene UMIs are distributed using Expectation\ - \ Maximization algorithm" - info: null - example: - - "Unique" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloUMIdedup" - description: "type of UMI deduplication (collapsing) algorithm\n\n- 1MM_All\ - \ ... all UMIs with 1 mismatch distance to each other\ - \ are collapsed (i.e. counted once).\n- 1MM_Directional_UMItools ... follows\ - \ the \"directional\" method from the UMI-tools by Smith, Heger and Sudbery\ - \ (Genome Research 2017).\n- 1MM_Directional ... same as 1MM_Directional_UMItools,\ - \ but with more stringent criteria for duplicate UMIs\n- Exact \ - \ ... only exactly matching UMIs are collapsed.\n- NoDedup \ - \ ... no deduplication of UMIs, count all reads.\n- 1MM_CR\ - \ ... CellRanger2-4 algorithm for 1MM UMI collapsing." - info: null - example: - - "1MM_All" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloUMIfiltering" - description: "type of UMI filtering (for reads uniquely mapping to genes)\n\n\ - - - ... basic filtering: remove UMIs with N and homopolymers\ - \ (similar to CellRanger 2.2.0).\n- MultiGeneUMI ... basic + remove\ - \ lower-count UMIs that map to more than one gene.\n- MultiGeneUMI_All ...\ - \ basic + remove all UMIs that map to more than one gene.\n- MultiGeneUMI_CR\ - \ ... basic + remove lower-count UMIs that map to more than one gene, matching\ - \ CellRanger > 3.0.0 .\nOnly works with --soloUMIdedup 1MM_CR" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloOutFileNames" - description: "file names for STARsolo output:\n\nfile_name_prefix gene_names\ - \ barcode_sequences cell_feature_count_matrix" - info: null - example: - - "Solo.out/" - - "features.tsv" - - "barcodes.tsv" - - "matrix.mtx" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloCellFilter" - description: "cell filtering type and parameters\n\n- None ... do\ - \ not output filtered cells\n- TopCells ... only report top cells by\ - \ UMI count, followed by the exact number of cells\n- CellRanger2.2 ...\ - \ simple filtering of CellRanger 2.2.\nCan be followed by numbers: number\ - \ of expected cells, robust maximum percentile for UMI count, maximum to minimum\ - \ ratio for UMI count\nThe harcoded values are from CellRanger: nExpectedCells=3000;\ - \ maxPercentile=0.99; maxMinRatio=10\n- EmptyDrops_CR ... EmptyDrops filtering\ - \ in CellRanger flavor. Please cite the original EmptyDrops paper: A.T.L Lun\ - \ et al, Genome Biology, 20, 63 (2019): https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1662-y\n\ - Can be followed by 10 numeric parameters: nExpectedCells maxPercentile\ - \ maxMinRatio indMin indMax umiMin umiMinFracMedian candMaxN \ - \ FDR simN\nThe harcoded values are from CellRanger: 3000 \ - \ 0.99 10 45000 90000 500 0.01\ - \ 20000 0.01 10000" - info: null - example: - - "CellRanger2.2" - - "3000" - - "0.99" - - "10" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloOutFormatFeaturesGeneField3" - description: "field 3 in the Gene features.tsv file. If \"-\", then no 3rd field\ - \ is output." - info: null - example: - - "Gene Expression" - required: false - direction: "input" - multiple: true - multiple_sep: ";" - dest: "par" - - type: "string" - name: "--soloCellReadStats" - description: "Output reads statistics for each CB\n\n- Standard ... standard\ - \ output" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "../star_align/script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Align fastq files using STAR." - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/cellranger_tiny_fastq" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "docker" - env: - - "STAR_VERSION 2.7.3a" - - "PACKAGES gcc g++ make wget zlib1g-dev unzip" - - type: "docker" - run: - - "apt-get update && \\\n apt-get install -y --no-install-recommends ${PACKAGES}\ - \ && \\\n cd /tmp && \\\n wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip\ - \ && \\\n unzip ${STAR_VERSION}.zip && \\\n cd STAR-${STAR_VERSION}/source\ - \ && \\\n make STARstatic CXXFLAGS_SIMD=-std=c++11 && \\\n cp STAR /usr/local/bin\ - \ && \\\n cd / && \\\n rm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip\ - \ && \\\n apt-get --purge autoremove -y ${PACKAGES} && \\\n apt-get clean\n" - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highmem" - - "highcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/mapping/star_align_v273a/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/star_align_v273a" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/star_align_v273a/star_align_v273a" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/mapping/star_align_v273a/main.nf b/target/nextflow/mapping/star_align_v273a/main.nf deleted file mode 100644 index 03682104bb8..00000000000 --- a/target/nextflow/mapping/star_align_v273a/main.nf +++ /dev/null @@ -1,5287 +0,0 @@ -// star_align_v273a 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Angela Oliveira Pisco (author) -// * Robrecht Cannoodt (author, maintainer) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "star_align_v273a", - "namespace" : "mapping", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Angela Oliveira Pisco", - "roles" : [ - "author" - ], - "info" : { - "role" : "Contributor", - "links" : { - "github" : "aopisco", - "orcid" : "0000-0003-0142-2355", - "linkedin" : "aopisco" - }, - "organizations" : [ - { - "name" : "Insitro", - "href" : "https://insitro.com", - "role" : "Director of Computational Biology" - }, - { - "name" : "Open Problems", - "href" : "https://openproblems.bio", - "role" : "Core Member" - } - ] - } - }, - { - "name" : "Robrecht Cannoodt", - "roles" : [ - "author", - "maintainer" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "robrecht@data-intuitive.com", - "github" : "rcannood", - "orcid" : "0000-0003-3641-729X", - "linkedin" : "robrechtcannoodt" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Science Engineer" - }, - { - "name" : "Open Problems", - "href" : "https://openproblems.bio", - "role" : "Core Member" - } - ] - } - } - ], - "argument_groups" : [ - { - "name" : "Input/Output", - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "alternatives" : [ - "--readFilesIn" - ], - "description" : "The FASTQ files to be analyzed. Corresponds to the --readFilesIn in the STAR command.", - "example" : [ - "mysample_S1_L001_R1_001.fastq.gz", - "mysample_S1_L001_R2_001.fastq.gz" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--reference", - "alternatives" : [ - "--genomeDir" - ], - "description" : "Path to the reference built by star_build_reference. Corresponds to the --genomeDir in the STAR command.", - "example" : [ - "/path/to/reference" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--output", - "alternatives" : [ - "--outFileNamePrefix" - ], - "description" : "Path to output directory. Corresponds to the --outFileNamePrefix in the STAR command.", - "example" : [ - "/path/to/foo" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Run Parameters", - "arguments" : [ - { - "type" : "integer", - "name" : "--runRNGseed", - "description" : "random number generator seed.", - "example" : [ - 777 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Genome Parameters", - "arguments" : [ - { - "type" : "string", - "name" : "--genomeLoad", - "description" : "mode of shared memory usage for the genome files. Only used with --runMode alignReads.\n\n- LoadAndKeep ... load genome into shared and keep it in memory after run\n- LoadAndRemove ... load genome into shared but remove it after run\n- LoadAndExit ... load genome into shared memory and exit, keeping the genome in memory for future runs\n- Remove ... do not map anything, just remove loaded genome from memory\n- NoSharedMemory ... do not use shared memory, each job will have its own private copy of the genome", - "example" : [ - "NoSharedMemory" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--genomeFastaFiles", - "description" : "path(s) to the fasta files with the genome sequences, separated by spaces. These files should be plain text FASTA files, they *cannot* be zipped.\n\nRequired for the genome generation (--runMode genomeGenerate). Can also be used in the mapping (--runMode alignReads) to add extra (new) sequences to the genome (e.g. spike-ins).", - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--genomeFileSizes", - "description" : "genome files exact sizes in bytes. Typically, this should not be defined by the user.", - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--genomeTransformOutput", - "description" : "which output to transform back to original genome\n\n- SAM ... SAM/BAM alignments\n- SJ ... splice junctions (SJ.out.tab)\n- None ... no transformation of the output", - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--genomeChrSetMitochondrial", - "description" : "names of the mitochondrial chromosomes. Presently only used for STARsolo statistics output/", - "example" : [ - "chrM", - "M", - "MT" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - } - ] - }, - { - "name" : "Splice Junctions Database", - "arguments" : [ - { - "type" : "string", - "name" : "--sjdbFileChrStartEnd", - "description" : "path to the files with genomic coordinates (chr start end strand) for the splice junction introns. Multiple files can be supplied and will be concatenated.", - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--sjdbGTFfile", - "description" : "path to the GTF file with annotations", - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--sjdbGTFchrPrefix", - "description" : "prefix for chromosome names in a GTF file (e.g. 'chr' for using ENSMEBL annotations with UCSC genomes)", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--sjdbGTFfeatureExon", - "description" : "feature type in GTF file to be used as exons for building transcripts", - "example" : [ - "exon" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--sjdbGTFtagExonParentTranscript", - "description" : "GTF attribute name for parent transcript ID (default \\"transcript_id\\" works for GTF files)", - "example" : [ - "transcript_id" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--sjdbGTFtagExonParentGene", - "description" : "GTF attribute name for parent gene ID (default \\"gene_id\\" works for GTF files)", - "example" : [ - "gene_id" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--sjdbGTFtagExonParentGeneName", - "description" : "GTF attribute name for parent gene name", - "example" : [ - "gene_name" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--sjdbGTFtagExonParentGeneType", - "description" : "GTF attribute name for parent gene type", - "example" : [ - "gene_type", - "gene_biotype" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--sjdbOverhang", - "description" : "length of the donor/acceptor sequence on each side of the junctions, ideally = (mate_length - 1)", - "example" : [ - 100 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--sjdbScore", - "description" : "extra alignment score for alignments that cross database junctions", - "example" : [ - 2 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--sjdbInsertSave", - "description" : "which files to save when sjdb junctions are inserted on the fly at the mapping step\n\n- Basic ... only small junction / transcript files\n- All ... all files including big Genome, SA and SAindex - this will create a complete genome directory", - "example" : [ - "Basic" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Variation parameters", - "arguments" : [ - { - "type" : "string", - "name" : "--varVCFfile", - "description" : "path to the VCF file that contains variation data. The 10th column should contain the genotype information, e.g. 0/1", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Read Parameters", - "arguments" : [ - { - "type" : "string", - "name" : "--readFilesType", - "description" : "format of input read files\n\n- Fastx ... FASTA or FASTQ\n- SAM SE ... SAM or BAM single-end reads; for BAM use --readFilesCommand samtools view\n- SAM PE ... SAM or BAM paired-end reads; for BAM use --readFilesCommand samtools view", - "example" : [ - "Fastx" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--readFilesSAMattrKeep", - "description" : "for --readFilesType SAM SE/PE, which SAM tags to keep in the output BAM, e.g.: --readFilesSAMtagsKeep RG PL\n\n- All ... keep all tags\n- None ... do not keep any tags", - "example" : [ - "All" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--readFilesManifest", - "description" : "path to the \\"manifest\\" file with the names of read files. The manifest file should contain 3 tab-separated columns:\n\npaired-end reads: read1_file_name $tab$ read2_file_name $tab$ read_group_line.\nsingle-end reads: read1_file_name $tab$ - $tab$ read_group_line.\nSpaces, but not tabs are allowed in file names.\nIf read_group_line does not start with ID:, it can only contain one ID field, and ID: will be added to it.\nIf read_group_line starts with ID:, it can contain several fields separated by $tab$, and all fields will be be copied verbatim into SAM @RG header line.", - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--readFilesPrefix", - "description" : "prefix for the read files names, i.e. it will be added in front of the strings in --readFilesIn", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--readFilesCommand", - "description" : "command line to execute for each of the input file. This command should generate FASTA or FASTQ text and send it to stdout\n\nFor example: zcat - to uncompress .gz files, bzcat - to uncompress .bz2 files, etc.", - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--readMapNumber", - "description" : "number of reads to map from the beginning of the file\n\n-1: map all reads", - "example" : [ - -1 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--readMatesLengthsIn", - "description" : "Equal/NotEqual - lengths of names,sequences,qualities for both mates are the same / not the same. NotEqual is safe in all situations.", - "example" : [ - "NotEqual" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--readNameSeparator", - "description" : "character(s) separating the part of the read names that will be trimmed in output (read name after space is always trimmed)", - "example" : [ - "/" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--readQualityScoreBase", - "description" : "number to be subtracted from the ASCII code to get Phred quality score", - "example" : [ - 33 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Read Clipping", - "arguments" : [ - { - "type" : "string", - "name" : "--clipAdapterType", - "description" : "adapter clipping type\n\n- Hamming ... adapter clipping based on Hamming distance, with the number of mismatches controlled by --clip5pAdapterMMp\n- CellRanger4 ... 5p and 3p adapter clipping similar to CellRanger4. Utilizes Opal package by Martin Sosic: https://github.com/Martinsos/opal\n- None ... no adapter clipping, all other clip* parameters are disregarded", - "example" : [ - "Hamming" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--clip3pNbases", - "description" : "number(s) of bases to clip from 3p of each mate. If one value is given, it will be assumed the same for both mates.", - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--clip3pAdapterSeq", - "description" : "adapter sequences to clip from 3p of each mate. If one value is given, it will be assumed the same for both mates.\n\n- polyA ... polyA sequence with the length equal to read length", - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--clip3pAdapterMMp", - "description" : "max proportion of mismatches for 3p adapter clipping for each mate. If one value is given, it will be assumed the same for both mates.", - "example" : [ - 0.1 - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--clip3pAfterAdapterNbases", - "description" : "number of bases to clip from 3p of each mate after the adapter clipping. If one value is given, it will be assumed the same for both mates.", - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--clip5pNbases", - "description" : "number(s) of bases to clip from 5p of each mate. If one value is given, it will be assumed the same for both mates.", - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - } - ] - }, - { - "name" : "Limits", - "arguments" : [ - { - "type" : "long", - "name" : "--limitGenomeGenerateRAM", - "description" : "maximum available RAM (bytes) for genome generation", - "example" : [ - 31000000000 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "long", - "name" : "--limitIObufferSize", - "description" : "max available buffers size (bytes) for input/output, per thread", - "example" : [ - 30000000, - 50000000 - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "long", - "name" : "--limitOutSAMoneReadBytes", - "description" : "max size of the SAM record (bytes) for one read. Recommended value: >(2*(LengthMate1+LengthMate2+100)*outFilterMultimapNmax", - "example" : [ - 100000 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--limitOutSJoneRead", - "description" : "max number of junctions for one read (including all multi-mappers)", - "example" : [ - 1000 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--limitOutSJcollapsed", - "description" : "max number of collapsed junctions", - "example" : [ - 1000000 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "long", - "name" : "--limitBAMsortRAM", - "description" : "maximum available RAM (bytes) for sorting BAM. If =0, it will be set to the genome index size. 0 value can only be used with --genomeLoad NoSharedMemory option.", - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--limitSjdbInsertNsj", - "description" : "maximum number of junctions to be inserted to the genome on the fly at the mapping stage, including those from annotations and those detected in the 1st step of the 2-pass run", - "example" : [ - 1000000 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--limitNreadsSoft", - "description" : "soft limit on the number of reads", - "example" : [ - -1 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Output: general", - "arguments" : [ - { - "type" : "string", - "name" : "--outTmpKeep", - "description" : "whether to keep the temporary files after STAR runs is finished\n\n- None ... remove all temporary files\n- All ... keep all files", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outStd", - "description" : "which output will be directed to stdout (standard out)\n\n- Log ... log messages\n- SAM ... alignments in SAM format (which normally are output to Aligned.out.sam file), normal standard output will go into Log.std.out\n- BAM_Unsorted ... alignments in BAM format, unsorted. Requires --outSAMtype BAM Unsorted\n- BAM_SortedByCoordinate ... alignments in BAM format, sorted by coordinate. Requires --outSAMtype BAM SortedByCoordinate\n- BAM_Quant ... alignments to transcriptome in BAM format, unsorted. Requires --quantMode TranscriptomeSAM", - "example" : [ - "Log" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outReadsUnmapped", - "description" : "output of unmapped and partially mapped (i.e. mapped only one mate of a paired end read) reads in separate file(s).\n\n- None ... no output\n- Fastx ... output in separate fasta/fastq files, Unmapped.out.mate1/2", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outQSconversionAdd", - "description" : "add this number to the quality score (e.g. to convert from Illumina to Sanger, use -31)", - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outMultimapperOrder", - "description" : "order of multimapping alignments in the output files\n\n- Old_2.4 ... quasi-random order used before 2.5.0\n- Random ... random order of alignments for each multi-mapper. Read mates (pairs) are always adjacent, all alignment for each read stay together. This option will become default in the future releases.", - "example" : [ - "Old_2.4" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Output: SAM and BAM", - "arguments" : [ - { - "type" : "string", - "name" : "--outSAMtype", - "description" : "type of SAM/BAM output\n\n1st word:\n- BAM ... output BAM without sorting\n- SAM ... output SAM without sorting\n- None ... no SAM/BAM output\n2nd, 3rd:\n- Unsorted ... standard unsorted\n- SortedByCoordinate ... sorted by coordinate. This option will allocate extra memory for sorting which can be specified by --limitBAMsortRAM.", - "example" : [ - "SAM" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outSAMmode", - "description" : "mode of SAM output\n\n- None ... no SAM output\n- Full ... full SAM output\n- NoQS ... full SAM but without quality scores", - "example" : [ - "Full" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outSAMstrandField", - "description" : "Cufflinks-like strand field flag\n\n- None ... not used\n- intronMotif ... strand derived from the intron motif. This option changes the output alignments: reads with inconsistent and/or non-canonical introns are filtered out.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outSAMattributes", - "description" : "a string of desired SAM attributes, in the order desired for the output SAM. Tags can be listed in any combination/order.\n\n***Presets:\n- None ... no attributes\n- Standard ... NH HI AS nM\n- All ... NH HI AS nM NM MD jM jI MC ch\n***Alignment:\n- NH ... number of loci the reads maps to: =1 for unique mappers, >1 for multimappers. Standard SAM tag.\n- HI ... multiple alignment index, starts with --outSAMattrIHstart (=1 by default). Standard SAM tag.\n- AS ... local alignment score, +1/-1 for matches/mismateches, score* penalties for indels and gaps. For PE reads, total score for two mates. Stadnard SAM tag.\n- nM ... number of mismatches. For PE reads, sum over two mates.\n- NM ... edit distance to the reference (number of mismatched + inserted + deleted bases) for each mate. Standard SAM tag.\n- MD ... string encoding mismatched and deleted reference bases (see standard SAM specifications). Standard SAM tag.\n- jM ... intron motifs for all junctions (i.e. N in CIGAR): 0: non-canonical; 1: GT/AG, 2: CT/AC, 3: GC/AG, 4: CT/GC, 5: AT/AC, 6: GT/AT. If splice junctions database is used, and a junction is annotated, 20 is added to its motif value.\n- jI ... start and end of introns for all junctions (1-based).\n- XS ... alignment strand according to --outSAMstrandField.\n- MC ... mate's CIGAR string. Standard SAM tag.\n- ch ... marks all segment of all chimeric alingments for --chimOutType WithinBAM output.\n- cN ... number of bases clipped from the read ends: 5' and 3'\n***Variation:\n- vA ... variant allele\n- vG ... genomic coordinate of the variant overlapped by the read.\n- vW ... 1 - alignment passes WASP filtering; 2,3,4,5,6,7 - alignment does not pass WASP filtering. Requires --waspOutputMode SAMtag.\n***STARsolo:\n- CR CY UR UY ... sequences and quality scores of cell barcodes and UMIs for the solo* demultiplexing.\n- GX GN ... gene ID and gene name for unique-gene reads.\n- gx gn ... gene IDs and gene names for unique- and multi-gene reads.\n- CB UB ... error-corrected cell barcodes and UMIs for solo* demultiplexing. Requires --outSAMtype BAM SortedByCoordinate.\n- sM ... assessment of CB and UMI.\n- sS ... sequence of the entire barcode (CB,UMI,adapter).\n- sQ ... quality of the entire barcode.\n***Unsupported/undocumented:\n- ha ... haplotype (1/2) when mapping to the diploid genome. Requires genome generated with --genomeTransformType Diploid .\n- rB ... alignment block read/genomic coordinates.\n- vR ... read coordinate of the variant.", - "example" : [ - "Standard" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outSAMattrIHstart", - "description" : "start value for the IH attribute. 0 may be required by some downstream software, such as Cufflinks or StringTie.", - "example" : [ - 1 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outSAMunmapped", - "description" : "output of unmapped reads in the SAM format\n\n1st word:\n- None ... no output\n- Within ... output unmapped reads within the main SAM file (i.e. Aligned.out.sam)\n2nd word:\n- KeepPairs ... record unmapped mate for each alignment, and, in case of unsorted output, keep it adjacent to its mapped mate. Only affects multi-mapping reads.", - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outSAMorder", - "description" : "type of sorting for the SAM output\n\nPaired: one mate after the other for all paired alignments\nPairedKeepInputOrder: one mate after the other for all paired alignments, the order is kept the same as in the input FASTQ files", - "example" : [ - "Paired" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outSAMprimaryFlag", - "description" : "which alignments are considered primary - all others will be marked with 0x100 bit in the FLAG\n\n- OneBestScore ... only one alignment with the best score is primary\n- AllBestScore ... all alignments with the best score are primary", - "example" : [ - "OneBestScore" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outSAMreadID", - "description" : "read ID record type\n\n- Standard ... first word (until space) from the FASTx read ID line, removing /1,/2 from the end\n- Number ... read number (index) in the FASTx file", - "example" : [ - "Standard" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outSAMmapqUnique", - "description" : "0 to 255: the MAPQ value for unique mappers", - "example" : [ - 255 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outSAMflagOR", - "description" : "0 to 65535: sam FLAG will be bitwise OR'd with this value, i.e. FLAG=FLAG | outSAMflagOR. This is applied after all flags have been set by STAR, and after outSAMflagAND. Can be used to set specific bits that are not set otherwise.", - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outSAMflagAND", - "description" : "0 to 65535: sam FLAG will be bitwise AND'd with this value, i.e. FLAG=FLAG & outSAMflagOR. This is applied after all flags have been set by STAR, but before outSAMflagOR. Can be used to unset specific bits that are not set otherwise.", - "example" : [ - 65535 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outSAMattrRGline", - "description" : "SAM/BAM read group line. The first word contains the read group identifier and must start with \\"ID:\\", e.g. --outSAMattrRGline ID:xxx CN:yy \\"DS:z z z\\".\n\nxxx will be added as RG tag to each output alignment. Any spaces in the tag values have to be double quoted.\nComma separated RG lines correspons to different (comma separated) input files in --readFilesIn. Commas have to be surrounded by spaces, e.g.\n--outSAMattrRGline ID:xxx , ID:zzz \\"DS:z z\\" , ID:yyy DS:yyyy", - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outSAMheaderHD", - "description" : "@HD (header) line of the SAM header", - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outSAMheaderPG", - "description" : "extra @PG (software) line of the SAM header (in addition to STAR)", - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outSAMheaderCommentFile", - "description" : "path to the file with @CO (comment) lines of the SAM header", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outSAMfilter", - "description" : "filter the output into main SAM/BAM files\n\n- KeepOnlyAddedReferences ... only keep the reads for which all alignments are to the extra reference sequences added with --genomeFastaFiles at the mapping stage.\n- KeepAllAddedReferences ... keep all alignments to the extra reference sequences added with --genomeFastaFiles at the mapping stage.", - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outSAMmultNmax", - "description" : "max number of multiple alignments for a read that will be output to the SAM/BAM files. Note that if this value is not equal to -1, the top scoring alignment will be output first\n\n- -1 ... all alignments (up to --outFilterMultimapNmax) will be output", - "example" : [ - -1 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outSAMtlen", - "description" : "calculation method for the TLEN field in the SAM/BAM files\n\n- 1 ... leftmost base of the (+)strand mate to rightmost base of the (-)mate. (+)sign for the (+)strand mate\n- 2 ... leftmost base of any mate to rightmost base of any mate. (+)sign for the mate with the leftmost base. This is different from 1 for overlapping mates with protruding ends", - "example" : [ - 1 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outBAMcompression", - "description" : "-1 to 10 BAM compression level, -1=default compression (6?), 0=no compression, 10=maximum compression", - "example" : [ - 1 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outBAMsortingThreadN", - "description" : ">=0: number of threads for BAM sorting. 0 will default to min(6,--runThreadN).", - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outBAMsortingBinsN", - "description" : ">0: number of genome bins for coordinate-sorting", - "example" : [ - 50 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "BAM processing", - "arguments" : [ - { - "type" : "string", - "name" : "--bamRemoveDuplicatesType", - "description" : "mark duplicates in the BAM file, for now only works with (i) sorted BAM fed with inputBAMfile, and (ii) for paired-end alignments only\n\n- - ... no duplicate removal/marking\n- UniqueIdentical ... mark all multimappers, and duplicate unique mappers. The coordinates, FLAG, CIGAR must be identical\n- UniqueIdenticalNotMulti ... mark duplicate unique mappers but not multimappers.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--bamRemoveDuplicatesMate2basesN", - "description" : "number of bases from the 5' of mate 2 to use in collapsing (e.g. for RAMPAGE)", - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Output Wiggle", - "arguments" : [ - { - "type" : "string", - "name" : "--outWigType", - "description" : "type of signal output, e.g. \\"bedGraph\\" OR \\"bedGraph read1_5p\\". Requires sorted BAM: --outSAMtype BAM SortedByCoordinate .\n\n1st word:\n- None ... no signal output\n- bedGraph ... bedGraph format\n- wiggle ... wiggle format\n2nd word:\n- read1_5p ... signal from only 5' of the 1st read, useful for CAGE/RAMPAGE etc\n- read2 ... signal from only 2nd read", - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outWigStrand", - "description" : "strandedness of wiggle/bedGraph output\n\n- Stranded ... separate strands, str1 and str2\n- Unstranded ... collapsed strands", - "example" : [ - "Stranded" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outWigReferencesPrefix", - "description" : "prefix matching reference names to include in the output wiggle file, e.g. \\"chr\\", default \\"-\\" - include all references", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outWigNorm", - "description" : "type of normalization for the signal\n\n- RPM ... reads per million of mapped reads\n- None ... no normalization, \\"raw\\" counts", - "example" : [ - "RPM" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Output Filtering", - "arguments" : [ - { - "type" : "string", - "name" : "--outFilterType", - "description" : "type of filtering\n\n- Normal ... standard filtering using only current alignment\n- BySJout ... keep only those reads that contain junctions that passed filtering into SJ.out.tab", - "example" : [ - "Normal" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outFilterMultimapScoreRange", - "description" : "the score range below the maximum score for multimapping alignments", - "example" : [ - 1 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outFilterMultimapNmax", - "description" : "maximum number of loci the read is allowed to map to. Alignments (all of them) will be output only if the read maps to no more loci than this value.\n\nOtherwise no alignments will be output, and the read will be counted as \\"mapped to too many loci\\" in the Log.final.out .", - "example" : [ - 10 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outFilterMismatchNmax", - "description" : "alignment will be output only if it has no more mismatches than this value.", - "example" : [ - 10 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--outFilterMismatchNoverLmax", - "description" : "alignment will be output only if its ratio of mismatches to *mapped* length is less than or equal to this value.", - "example" : [ - 0.3 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--outFilterMismatchNoverReadLmax", - "description" : "alignment will be output only if its ratio of mismatches to *read* length is less than or equal to this value.", - "example" : [ - 1.0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outFilterScoreMin", - "description" : "alignment will be output only if its score is higher than or equal to this value.", - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--outFilterScoreMinOverLread", - "description" : "same as outFilterScoreMin, but normalized to read length (sum of mates' lengths for paired-end reads)", - "example" : [ - 0.66 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outFilterMatchNmin", - "description" : "alignment will be output only if the number of matched bases is higher than or equal to this value.", - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--outFilterMatchNminOverLread", - "description" : "sam as outFilterMatchNmin, but normalized to the read length (sum of mates' lengths for paired-end reads).", - "example" : [ - 0.66 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outFilterIntronMotifs", - "description" : "filter alignment using their motifs\n\n- None ... no filtering\n- RemoveNoncanonical ... filter out alignments that contain non-canonical junctions\n- RemoveNoncanonicalUnannotated ... filter out alignments that contain non-canonical unannotated junctions when using annotated splice junctions database. The annotated non-canonical junctions will be kept.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--outFilterIntronStrands", - "description" : "filter alignments\n\n- RemoveInconsistentStrands ... remove alignments that have junctions with inconsistent strands\n- None ... no filtering", - "example" : [ - "RemoveInconsistentStrands" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Output splice junctions (SJ.out.tab)", - "arguments" : [ - { - "type" : "string", - "name" : "--outSJtype", - "description" : "type of splice junction output\n\n- Standard ... standard SJ.out.tab output\n- None ... no splice junction output", - "example" : [ - "Standard" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Output Filtering: Splice Junctions", - "arguments" : [ - { - "type" : "string", - "name" : "--outSJfilterReads", - "description" : "which reads to consider for collapsed splice junctions output\n\n- All ... all reads, unique- and multi-mappers\n- Unique ... uniquely mapping reads only", - "example" : [ - "All" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outSJfilterOverhangMin", - "description" : "minimum overhang length for splice junctions on both sides for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\ndoes not apply to annotated junctions", - "example" : [ - 30, - 12, - 12, - 12 - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outSJfilterCountUniqueMin", - "description" : "minimum uniquely mapping read count per junction for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\nJunctions are output if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin conditions are satisfied\ndoes not apply to annotated junctions", - "example" : [ - 3, - 1, - 1, - 1 - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outSJfilterCountTotalMin", - "description" : "minimum total (multi-mapping+unique) read count per junction for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\nJunctions are output if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin conditions are satisfied\ndoes not apply to annotated junctions", - "example" : [ - 3, - 1, - 1, - 1 - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outSJfilterDistToOtherSJmin", - "description" : "minimum allowed distance to other junctions' donor/acceptor\n\ndoes not apply to annotated junctions", - "example" : [ - 10, - 0, - 5, - 10 - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--outSJfilterIntronMaxVsReadN", - "description" : "maximum gap allowed for junctions supported by 1,2,3,,,N reads\n\ni.e. by default junctions supported by 1 read can have gaps <=50000b, by 2 reads: <=100000b, by 3 reads: <=200000. by >=4 reads any gap <=alignIntronMax\ndoes not apply to annotated junctions", - "example" : [ - 50000, - 100000, - 200000 - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - } - ] - }, - { - "name" : "Scoring", - "arguments" : [ - { - "type" : "integer", - "name" : "--scoreGap", - "description" : "splice junction penalty (independent on intron motif)", - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--scoreGapNoncan", - "description" : "non-canonical junction penalty (in addition to scoreGap)", - "example" : [ - -8 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--scoreGapGCAG", - "description" : "GC/AG and CT/GC junction penalty (in addition to scoreGap)", - "example" : [ - -4 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--scoreGapATAC", - "description" : "AT/AC and GT/AT junction penalty (in addition to scoreGap)", - "example" : [ - -8 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--scoreGenomicLengthLog2scale", - "description" : "extra score logarithmically scaled with genomic length of the alignment: scoreGenomicLengthLog2scale*log2(genomicLength)", - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--scoreDelOpen", - "description" : "deletion open penalty", - "example" : [ - -2 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--scoreDelBase", - "description" : "deletion extension penalty per base (in addition to scoreDelOpen)", - "example" : [ - -2 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--scoreInsOpen", - "description" : "insertion open penalty", - "example" : [ - -2 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--scoreInsBase", - "description" : "insertion extension penalty per base (in addition to scoreInsOpen)", - "example" : [ - -2 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--scoreStitchSJshift", - "description" : "maximum score reduction while searching for SJ boundaries in the stitching step", - "example" : [ - 1 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Alignments and Seeding", - "arguments" : [ - { - "type" : "integer", - "name" : "--seedSearchStartLmax", - "description" : "defines the search start point through the read - the read is split into pieces no longer than this value", - "example" : [ - 50 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--seedSearchStartLmaxOverLread", - "description" : "seedSearchStartLmax normalized to read length (sum of mates' lengths for paired-end reads)", - "example" : [ - 1.0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--seedSearchLmax", - "description" : "defines the maximum length of the seeds, if =0 seed length is not limited", - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--seedMultimapNmax", - "description" : "only pieces that map fewer than this value are utilized in the stitching procedure", - "example" : [ - 10000 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--seedPerReadNmax", - "description" : "max number of seeds per read", - "example" : [ - 1000 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--seedPerWindowNmax", - "description" : "max number of seeds per window", - "example" : [ - 50 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--seedNoneLociPerWindow", - "description" : "max number of one seed loci per window", - "example" : [ - 10 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--seedSplitMin", - "description" : "min length of the seed sequences split by Ns or mate gap", - "example" : [ - 12 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--seedMapMin", - "description" : "min length of seeds to be mapped", - "example" : [ - 5 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--alignIntronMin", - "description" : "minimum intron size, genomic gap is considered intron if its length>=alignIntronMin, otherwise it is considered Deletion", - "example" : [ - 21 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--alignIntronMax", - "description" : "maximum intron size, if 0, max intron size will be determined by (2^winBinNbits)*winAnchorDistNbins", - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--alignMatesGapMax", - "description" : "maximum gap between two mates, if 0, max intron gap will be determined by (2^winBinNbits)*winAnchorDistNbins", - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--alignSJoverhangMin", - "description" : "minimum overhang (i.e. block size) for spliced alignments", - "example" : [ - ''' + '''5 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--alignSJstitchMismatchNmax", - "description" : "maximum number of mismatches for stitching of the splice junctions (-1: no limit).\n\n(1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif.", - "example" : [ - 0, - -1, - 0, - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--alignSJDBoverhangMin", - "description" : "minimum overhang (i.e. block size) for annotated (sjdb) spliced alignments", - "example" : [ - 3 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--alignSplicedMateMapLmin", - "description" : "minimum mapped length for a read mate that is spliced", - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--alignSplicedMateMapLminOverLmate", - "description" : "alignSplicedMateMapLmin normalized to mate length", - "example" : [ - 0.66 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--alignWindowsPerReadNmax", - "description" : "max number of windows per read", - "example" : [ - 10000 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--alignTranscriptsPerWindowNmax", - "description" : "max number of transcripts per window", - "example" : [ - 100 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--alignTranscriptsPerReadNmax", - "description" : "max number of different alignments per read to consider", - "example" : [ - 10000 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--alignEndsType", - "description" : "type of read ends alignment\n\n- Local ... standard local alignment with soft-clipping allowed\n- EndToEnd ... force end-to-end read alignment, do not soft-clip\n- Extend5pOfRead1 ... fully extend only the 5p of the read1, all other ends: local alignment\n- Extend5pOfReads12 ... fully extend only the 5p of the both read1 and read2, all other ends: local alignment", - "example" : [ - "Local" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--alignEndsProtrude", - "description" : "allow protrusion of alignment ends, i.e. start (end) of the +strand mate downstream of the start (end) of the -strand mate\n\n1st word: int: maximum number of protrusion bases allowed\n2nd word: string:\n- ConcordantPair ... report alignments with non-zero protrusion as concordant pairs\n- DiscordantPair ... report alignments with non-zero protrusion as discordant pairs", - "example" : [ - "0 ConcordantPair" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--alignSoftClipAtReferenceEnds", - "description" : "allow the soft-clipping of the alignments past the end of the chromosomes\n\n- Yes ... allow\n- No ... prohibit, useful for compatibility with Cufflinks", - "example" : [ - "Yes" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--alignInsertionFlush", - "description" : "how to flush ambiguous insertion positions\n\n- None ... insertions are not flushed\n- Right ... insertions are flushed to the right", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Paired-End reads", - "arguments" : [ - { - "type" : "integer", - "name" : "--peOverlapNbasesMin", - "description" : "minimum number of overlapping bases to trigger mates merging and realignment. Specify >0 value to switch on the \\"merginf of overlapping mates\\" algorithm.", - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--peOverlapMMp", - "description" : "maximum proportion of mismatched bases in the overlap area", - "example" : [ - 0.01 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Windows, Anchors, Binning", - "arguments" : [ - { - "type" : "integer", - "name" : "--winAnchorMultimapNmax", - "description" : "max number of loci anchors are allowed to map to", - "example" : [ - 50 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--winBinNbits", - "description" : "=log2(winBin), where winBin is the size of the bin for the windows/clustering, each window will occupy an integer number of bins.", - "example" : [ - 16 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--winAnchorDistNbins", - "description" : "max number of bins between two anchors that allows aggregation of anchors into one window", - "example" : [ - 9 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--winFlankNbins", - "description" : "log2(winFlank), where win Flank is the size of the left and right flanking regions for each window", - "example" : [ - 4 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--winReadCoverageRelativeMin", - "description" : "minimum relative coverage of the read sequence by the seeds in a window, for STARlong algorithm only.", - "example" : [ - 0.5 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--winReadCoverageBasesMin", - "description" : "minimum number of bases covered by the seeds in a window , for STARlong algorithm only.", - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Chimeric Alignments", - "arguments" : [ - { - "type" : "string", - "name" : "--chimOutType", - "description" : "type of chimeric output\n\n- Junctions ... Chimeric.out.junction\n- SeparateSAMold ... output old SAM into separate Chimeric.out.sam file\n- WithinBAM ... output into main aligned BAM files (Aligned.*.bam)\n- WithinBAM HardClip ... (default) hard-clipping in the CIGAR for supplemental chimeric alignments (default if no 2nd word is present)\n- WithinBAM SoftClip ... soft-clipping in the CIGAR for supplemental chimeric alignments", - "example" : [ - "Junctions" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--chimSegmentMin", - "description" : "minimum length of chimeric segment length, if ==0, no chimeric output", - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--chimScoreMin", - "description" : "minimum total (summed) score of the chimeric segments", - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--chimScoreDropMax", - "description" : "max drop (difference) of chimeric score (the sum of scores of all chimeric segments) from the read length", - "example" : [ - 20 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--chimScoreSeparation", - "description" : "minimum difference (separation) between the best chimeric score and the next one", - "example" : [ - 10 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--chimScoreJunctionNonGTAG", - "description" : "penalty for a non-GT/AG chimeric junction", - "example" : [ - -1 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--chimJunctionOverhangMin", - "description" : "minimum overhang for a chimeric junction", - "example" : [ - 20 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--chimSegmentReadGapMax", - "description" : "maximum gap in the read sequence between chimeric segments", - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--chimFilter", - "description" : "different filters for chimeric alignments\n\n- None ... no filtering\n- banGenomicN ... Ns are not allowed in the genome sequence around the chimeric junction", - "example" : [ - "banGenomicN" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--chimMainSegmentMultNmax", - "description" : "maximum number of multi-alignments for the main chimeric segment. =1 will prohibit multimapping main segments.", - "example" : [ - 10 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--chimMultimapNmax", - "description" : "maximum number of chimeric multi-alignments\n\n- 0 ... use the old scheme for chimeric detection which only considered unique alignments", - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--chimMultimapScoreRange", - "description" : "the score range for multi-mapping chimeras below the best chimeric score. Only works with --chimMultimapNmax > 1", - "example" : [ - 1 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--chimNonchimScoreDropMin", - "description" : "to trigger chimeric detection, the drop in the best non-chimeric alignment score with respect to the read length has to be greater than this value", - "example" : [ - 20 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--chimOutJunctionFormat", - "description" : "formatting type for the Chimeric.out.junction file\n\n- 0 ... no comment lines/headers\n- 1 ... comment lines at the end of the file: command line and Nreads: total, unique/multi-mapping", - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Quantification of Annotations", - "arguments" : [ - { - "type" : "string", - "name" : "--quantMode", - "description" : "types of quantification requested\n\n- - ... none\n- TranscriptomeSAM ... output SAM/BAM alignments to transcriptome into a separate file\n- GeneCounts ... count reads per gene", - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--quantTranscriptomeBAMcompression", - "description" : "-2 to 10 transcriptome BAM compression level\n\n- -2 ... no BAM output\n- -1 ... default compression (6?)\n- 0 ... no compression\n- 10 ... maximum compression", - "example" : [ - 1 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--quantTranscriptomeBan", - "description" : "prohibit various alignment type\n\n- IndelSoftclipSingleend ... prohibit indels, soft clipping and single-end alignments - compatible with RSEM\n- Singleend ... prohibit single-end alignments", - "example" : [ - "IndelSoftclipSingleend" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "2-pass Mapping", - "arguments" : [ - { - "type" : "string", - "name" : "--twopassMode", - "description" : "2-pass mapping mode.\n\n- None ... 1-pass mapping\n- Basic ... basic 2-pass mapping, with all 1st pass junctions inserted into the genome indices on the fly", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--twopass1readsN", - "description" : "number of reads to process for the 1st step. Use very large number (or default -1) to map all reads in the first step.", - "example" : [ - -1 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "WASP parameters", - "arguments" : [ - { - "type" : "string", - "name" : "--waspOutputMode", - "description" : "WASP allele-specific output type. This is re-implementation of the original WASP mappability filtering by Bryce van de Geijn, Graham McVicker, Yoav Gilad & Jonathan K Pritchard. Please cite the original WASP paper: Nature Methods 12, 1061-1063 (2015), https://www.nature.com/articles/nmeth.3582 .\n\n- SAMtag ... add WASP tags to the alignments that pass WASP filtering", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "STARsolo (single cell RNA-seq) parameters", - "arguments" : [ - { - "type" : "string", - "name" : "--soloType", - "description" : "type of single-cell RNA-seq\n\n- CB_UMI_Simple ... (a.k.a. Droplet) one UMI and one Cell Barcode of fixed length in read2, e.g. Drop-seq and 10X Chromium.\n- CB_UMI_Complex ... multiple Cell Barcodes of varying length, one UMI of fixed length and one adapter sequence of fixed length are allowed in read2 only (e.g. inDrop, ddSeq).\n- CB_samTagOut ... output Cell Barcode as CR and/or CB SAm tag. No UMI counting. --readFilesIn cDNA_read1 [cDNA_read2 if paired-end] CellBarcode_read . Requires --outSAMtype BAM Unsorted [and/or SortedByCoordinate]\n- SmartSeq ... Smart-seq: each cell in a separate FASTQ (paired- or single-end), barcodes are corresponding read-groups, no UMI sequences, alignments deduplicated according to alignment start and end (after extending soft-clipped bases)", - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--soloCBwhitelist", - "description" : "file(s) with whitelist(s) of cell barcodes. Only --soloType CB_UMI_Complex allows more than one whitelist file.\n\n- None ... no whitelist: all cell barcodes are allowed", - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--soloCBstart", - "description" : "cell barcode start base", - "example" : [ - 1 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--soloCBlen", - "description" : "cell barcode length", - "example" : [ - 16 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--soloUMIstart", - "description" : "UMI start base", - "example" : [ - 17 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--soloUMIlen", - "description" : "UMI length", - "example" : [ - 10 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--soloBarcodeReadLength", - "description" : "length of the barcode read\n\n- 1 ... equal to sum of soloCBlen+soloUMIlen\n- 0 ... not defined, do not check", - "example" : [ - 1 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--soloBarcodeMate", - "description" : "identifies which read mate contains the barcode (CB+UMI) sequence\n\n- 0 ... barcode sequence is on separate read, which should always be the last file in the --readFilesIn listed\n- 1 ... barcode sequence is a part of mate 1\n- 2 ... barcode sequence is a part of mate 2", - "example" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--soloCBposition", - "description" : "position of Cell Barcode(s) on the barcode read.\n\nPresently only works with --soloType CB_UMI_Complex, and barcodes are assumed to be on Read2.\nFormat for each barcode: startAnchor_startPosition_endAnchor_endPosition\nstart(end)Anchor defines the Anchor Base for the CB: 0: read start; 1: read end; 2: adapter start; 3: adapter end\nstart(end)Position is the 0-based position with of the CB start(end) with respect to the Anchor Base\nString for different barcodes are separated by space.\nExample: inDrop (Zilionis et al, Nat. Protocols, 2017):\n--soloCBposition 0_0_2_-1 3_1_3_8", - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--soloUMIposition", - "description" : "position of the UMI on the barcode read, same as soloCBposition\n\nExample: inDrop (Zilionis et al, Nat. Protocols, 2017):\n--soloCBposition 3_9_3_14", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--soloAdapterSequence", - "description" : "adapter sequence to anchor barcodes. Only one adapter sequence is allowed.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--soloAdapterMismatchesNmax", - "description" : "maximum number of mismatches allowed in adapter sequence.", - "example" : [ - 1 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--soloCBmatchWLtype", - "description" : "matching the Cell Barcodes to the WhiteList\n\n- Exact ... only exact matches allowed\n- 1MM ... only one match in whitelist with 1 mismatched base allowed. Allowed CBs have to have at least one read with exact match.\n- 1MM_multi ... multiple matches in whitelist with 1 mismatched base allowed, posterior probability calculation is used choose one of the matches.\nAllowed CBs have to have at least one read with exact match. This option matches best with CellRanger 2.2.0\n- 1MM_multi_pseudocounts ... same as 1MM_Multi, but pseudocounts of 1 are added to all whitelist barcodes.\n- 1MM_multi_Nbase_pseudocounts ... same as 1MM_multi_pseudocounts, multimatching to WL is allowed for CBs with N-bases. This option matches best with CellRanger >= 3.0.0\n- EditDist_2 ... allow up to edit distance of 3 fpr each of the barcodes. May include one deletion + one insertion. Only works with --soloType CB_UMI_Complex. Matches to multiple passlist barcdoes are not allowed. Similar to ParseBio Split-seq pipeline.", - "example" : [ - "1MM_multi" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--soloInputSAMattrBarcodeSeq", - "description" : "when inputting reads from a SAM file (--readsFileType SAM SE/PE), these SAM attributes mark the barcode sequence (in proper order).\n\nFor instance, for 10X CellRanger or STARsolo BAMs, use --soloInputSAMattrBarcodeSeq CR UR .\nThis parameter is required when running STARsolo with input from SAM.", - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--soloInputSAMattrBarcodeQual", - "description" : "when inputting reads from a SAM file (--readsFileType SAM SE/PE), these SAM attributes mark the barcode qualities (in proper order).\n\nFor instance, for 10X CellRanger or STARsolo BAMs, use --soloInputSAMattrBarcodeQual CY UY .\nIf this parameter is '-' (default), the quality 'H' will be assigned to all bases.", - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--soloStrand", - "description" : "strandedness of the solo libraries:\n\n- Unstranded ... no strand information\n- Forward ... read strand same as the original RNA molecule\n- Reverse ... read strand opposite to the original RNA molecule", - "example" : [ - "Forward" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--soloFeatures", - "description" : "genomic features for which the UMI counts per Cell Barcode are collected\n\n- Gene ... genes: reads match the gene transcript\n- SJ ... splice junctions: reported in SJ.out.tab\n- GeneFull ... full gene (pre-mRNA): count all reads overlapping genes' exons and introns\n- GeneFull_ExonOverIntron ... full gene (pre-mRNA): count all reads overlapping genes' exons and introns: prioritize 100% overlap with exons\n- GeneFull_Ex50pAS ... full gene (pre-RNA): count all reads overlapping genes' exons and introns: prioritize >50% overlap with exons. Do not count reads with 100% exonic overlap in the antisense direction.", - "example" : [ - "Gene" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--soloMultiMappers", - "description" : "counting method for reads mapping to multiple genes\n\n- Unique ... count only reads that map to unique genes\n- Uniform ... uniformly distribute multi-genic UMIs to all genes\n- Rescue ... distribute UMIs proportionally to unique+uniform counts (~ first iteration of EM)\n- PropUnique ... distribute UMIs proportionally to unique mappers, if present, and uniformly if not.\n- EM ... multi-gene UMIs are distributed using Expectation Maximization algorithm", - "example" : [ - "Unique" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--soloUMIdedup", - "description" : "type of UMI deduplication (collapsing) algorithm\n\n- 1MM_All ... all UMIs with 1 mismatch distance to each other are collapsed (i.e. counted once).\n- 1MM_Directional_UMItools ... follows the \\"directional\\" method from the UMI-tools by Smith, Heger and Sudbery (Genome Research 2017).\n- 1MM_Directional ... same as 1MM_Directional_UMItools, but with more stringent criteria for duplicate UMIs\n- Exact ... only exactly matching UMIs are collapsed.\n- NoDedup ... no deduplication of UMIs, count all reads.\n- 1MM_CR ... CellRanger2-4 algorithm for 1MM UMI collapsing.", - "example" : [ - "1MM_All" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--soloUMIfiltering", - "description" : "type of UMI filtering (for reads uniquely mapping to genes)\n\n- - ... basic filtering: remove UMIs with N and homopolymers (similar to CellRanger 2.2.0).\n- MultiGeneUMI ... basic + remove lower-count UMIs that map to more than one gene.\n- MultiGeneUMI_All ... basic + remove all UMIs that map to more than one gene.\n- MultiGeneUMI_CR ... basic + remove lower-count UMIs that map to more than one gene, matching CellRanger > 3.0.0 .\nOnly works with --soloUMIdedup 1MM_CR", - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--soloOutFileNames", - "description" : "file names for STARsolo output:\n\nfile_name_prefix gene_names barcode_sequences cell_feature_count_matrix", - "example" : [ - "Solo.out/", - "features.tsv", - "barcodes.tsv", - "matrix.mtx" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--soloCellFilter", - "description" : "cell filtering type and parameters\n\n- None ... do not output filtered cells\n- TopCells ... only report top cells by UMI count, followed by the exact number of cells\n- CellRanger2.2 ... simple filtering of CellRanger 2.2.\nCan be followed by numbers: number of expected cells, robust maximum percentile for UMI count, maximum to minimum ratio for UMI count\nThe harcoded values are from CellRanger: nExpectedCells=3000; maxPercentile=0.99; maxMinRatio=10\n- EmptyDrops_CR ... EmptyDrops filtering in CellRanger flavor. Please cite the original EmptyDrops paper: A.T.L Lun et al, Genome Biology, 20, 63 (2019): https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1662-y\nCan be followed by 10 numeric parameters: nExpectedCells maxPercentile maxMinRatio indMin indMax umiMin umiMinFracMedian candMaxN FDR simN\nThe harcoded values are from CellRanger: 3000 0.99 10 45000 90000 500 0.01 20000 0.01 10000", - "example" : [ - "CellRanger2.2", - "3000", - "0.99", - "10" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--soloOutFormatFeaturesGeneField3", - "description" : "field 3 in the Gene features.tsv file. If \\"-\\", then no 3rd field is output.", - "example" : [ - "Gene Expression" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--soloCellReadStats", - "description" : "Output reads statistics for each CB\n\n- Standard ... standard output", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "../star_align/script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/star_align_v273a/" - }, - { - "type" : "file", - "path" : "src/utils/setup_logger.py", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "description" : "Align fastq files using STAR.", - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/star_align_v273a/" - }, - { - "type" : "file", - "path" : "resources_test/cellranger_tiny_fastq", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "python:3.10-slim", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "procps" - ], - "interactive" : false - }, - { - "type" : "docker", - "env" : [ - "STAR_VERSION 2.7.3a", - "PACKAGES gcc g++ make wget zlib1g-dev unzip" - ] - }, - { - "type" : "docker", - "run" : [ - "apt-get update && \\\\\n apt-get install -y --no-install-recommends ${PACKAGES} && \\\\\n cd /tmp && \\\\\n wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip && \\\\\n unzip ${STAR_VERSION}.zip && \\\\\n cd STAR-${STAR_VERSION}/source && \\\\\n make STARstatic CXXFLAGS_SIMD=-std=c++11 && \\\\\n cp STAR /usr/local/bin && \\\\\n cd / && \\\\\n rm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip && \\\\\n apt-get --purge autoremove -y ${PACKAGES} && \\\\\n apt-get clean\n" - ] - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "highmem", - "highcpu" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/mapping/star_align_v273a/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/star_align_v273a", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -import re -import tempfile -import subprocess -from pathlib import Path -import tarfile -import gzip -import shutil - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'reference': $( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "r'${VIASH_PAR_REFERENCE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'runRNGseed': $( if [ ! -z ${VIASH_PAR_RUNRNGSEED+x} ]; then echo "int(r'${VIASH_PAR_RUNRNGSEED//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'genomeLoad': $( if [ ! -z ${VIASH_PAR_GENOMELOAD+x} ]; then echo "r'${VIASH_PAR_GENOMELOAD//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'genomeFastaFiles': $( if [ ! -z ${VIASH_PAR_GENOMEFASTAFILES+x} ]; then echo "r'${VIASH_PAR_GENOMEFASTAFILES//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'genomeFileSizes': $( if [ ! -z ${VIASH_PAR_GENOMEFILESIZES+x} ]; then echo "list(map(int, r'${VIASH_PAR_GENOMEFILESIZES//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), - 'genomeTransformOutput': $( if [ ! -z ${VIASH_PAR_GENOMETRANSFORMOUTPUT+x} ]; then echo "r'${VIASH_PAR_GENOMETRANSFORMOUTPUT//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'genomeChrSetMitochondrial': $( if [ ! -z ${VIASH_PAR_GENOMECHRSETMITOCHONDRIAL+x} ]; then echo "r'${VIASH_PAR_GENOMECHRSETMITOCHONDRIAL//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'sjdbFileChrStartEnd': $( if [ ! -z ${VIASH_PAR_SJDBFILECHRSTARTEND+x} ]; then echo "r'${VIASH_PAR_SJDBFILECHRSTARTEND//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'sjdbGTFfile': $( if [ ! -z ${VIASH_PAR_SJDBGTFFILE+x} ]; then echo "r'${VIASH_PAR_SJDBGTFFILE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'sjdbGTFchrPrefix': $( if [ ! -z ${VIASH_PAR_SJDBGTFCHRPREFIX+x} ]; then echo "r'${VIASH_PAR_SJDBGTFCHRPREFIX//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'sjdbGTFfeatureExon': $( if [ ! -z ${VIASH_PAR_SJDBGTFFEATUREEXON+x} ]; then echo "r'${VIASH_PAR_SJDBGTFFEATUREEXON//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'sjdbGTFtagExonParentTranscript': $( if [ ! -z ${VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT+x} ]; then echo "r'${VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'sjdbGTFtagExonParentGene': $( if [ ! -z ${VIASH_PAR_SJDBGTFTAGEXONPARENTGENE+x} ]; then echo "r'${VIASH_PAR_SJDBGTFTAGEXONPARENTGENE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'sjdbGTFtagExonParentGeneName': $( if [ ! -z ${VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME+x} ]; then echo "r'${VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'sjdbGTFtagExonParentGeneType': $( if [ ! -z ${VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE+x} ]; then echo "r'${VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'sjdbOverhang': $( if [ ! -z ${VIASH_PAR_SJDBOVERHANG+x} ]; then echo "int(r'${VIASH_PAR_SJDBOVERHANG//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'sjdbScore': $( if [ ! -z ${VIASH_PAR_SJDBSCORE+x} ]; then echo "int(r'${VIASH_PAR_SJDBSCORE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'sjdbInsertSave': $( if [ ! -z ${VIASH_PAR_SJDBINSERTSAVE+x} ]; then echo "r'${VIASH_PAR_SJDBINSERTSAVE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'varVCFfile': $( if [ ! -z ${VIASH_PAR_VARVCFFILE+x} ]; then echo "r'${VIASH_PAR_VARVCFFILE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'readFilesType': $( if [ ! -z ${VIASH_PAR_READFILESTYPE+x} ]; then echo "r'${VIASH_PAR_READFILESTYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'readFilesSAMattrKeep': $( if [ ! -z ${VIASH_PAR_READFILESSAMATTRKEEP+x} ]; then echo "r'${VIASH_PAR_READFILESSAMATTRKEEP//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'readFilesManifest': $( if [ ! -z ${VIASH_PAR_READFILESMANIFEST+x} ]; then echo "r'${VIASH_PAR_READFILESMANIFEST//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'readFilesPrefix': $( if [ ! -z ${VIASH_PAR_READFILESPREFIX+x} ]; then echo "r'${VIASH_PAR_READFILESPREFIX//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'readFilesCommand': $( if [ ! -z ${VIASH_PAR_READFILESCOMMAND+x} ]; then echo "r'${VIASH_PAR_READFILESCOMMAND//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'readMapNumber': $( if [ ! -z ${VIASH_PAR_READMAPNUMBER+x} ]; then echo "int(r'${VIASH_PAR_READMAPNUMBER//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'readMatesLengthsIn': $( if [ ! -z ${VIASH_PAR_READMATESLENGTHSIN+x} ]; then echo "r'${VIASH_PAR_READMATESLENGTHSIN//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'readNameSeparator': $( if [ ! -z ${VIASH_PAR_READNAMESEPARATOR+x} ]; then echo "r'${VIASH_PAR_READNAMESEPARATOR//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'readQualityScoreBase': $( if [ ! -z ${VIASH_PAR_READQUALITYSCOREBASE+x} ]; then echo "int(r'${VIASH_PAR_READQUALITYSCOREBASE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'clipAdapterType': $( if [ ! -z ${VIASH_PAR_CLIPADAPTERTYPE+x} ]; then echo "r'${VIASH_PAR_CLIPADAPTERTYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'clip3pNbases': $( if [ ! -z ${VIASH_PAR_CLIP3PNBASES+x} ]; then echo "list(map(int, r'${VIASH_PAR_CLIP3PNBASES//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), - 'clip3pAdapterSeq': $( if [ ! -z ${VIASH_PAR_CLIP3PADAPTERSEQ+x} ]; then echo "r'${VIASH_PAR_CLIP3PADAPTERSEQ//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'clip3pAdapterMMp': $( if [ ! -z ${VIASH_PAR_CLIP3PADAPTERMMP+x} ]; then echo "list(map(float, r'${VIASH_PAR_CLIP3PADAPTERMMP//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), - 'clip3pAfterAdapterNbases': $( if [ ! -z ${VIASH_PAR_CLIP3PAFTERADAPTERNBASES+x} ]; then echo "list(map(int, r'${VIASH_PAR_CLIP3PAFTERADAPTERNBASES//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), - 'clip5pNbases': $( if [ ! -z ${VIASH_PAR_CLIP5PNBASES+x} ]; then echo "list(map(int, r'${VIASH_PAR_CLIP5PNBASES//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), - 'limitGenomeGenerateRAM': $( if [ ! -z ${VIASH_PAR_LIMITGENOMEGENERATERAM+x} ]; then echo "int(r'${VIASH_PAR_LIMITGENOMEGENERATERAM//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'limitIObufferSize': $( if [ ! -z ${VIASH_PAR_LIMITIOBUFFERSIZE+x} ]; then echo "list(map(int, r'${VIASH_PAR_LIMITIOBUFFERSIZE//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), - 'limitOutSAMoneReadBytes': $( if [ ! -z ${VIASH_PAR_LIMITOUTSAMONEREADBYTES+x} ]; then echo "int(r'${VIASH_PAR_LIMITOUTSAMONEREADBYTES//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'limitOutSJoneRead': $( if [ ! -z ${VIASH_PAR_LIMITOUTSJONEREAD+x} ]; then echo "int(r'${VIASH_PAR_LIMITOUTSJONEREAD//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'limitOutSJcollapsed': $( if [ ! -z ${VIASH_PAR_LIMITOUTSJCOLLAPSED+x} ]; then echo "int(r'${VIASH_PAR_LIMITOUTSJCOLLAPSED//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'limitBAMsortRAM': $( if [ ! -z ${VIASH_PAR_LIMITBAMSORTRAM+x} ]; then echo "int(r'${VIASH_PAR_LIMITBAMSORTRAM//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'limitSjdbInsertNsj': $( if [ ! -z ${VIASH_PAR_LIMITSJDBINSERTNSJ+x} ]; then echo "int(r'${VIASH_PAR_LIMITSJDBINSERTNSJ//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'limitNreadsSoft': $( if [ ! -z ${VIASH_PAR_LIMITNREADSSOFT+x} ]; then echo "int(r'${VIASH_PAR_LIMITNREADSSOFT//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outTmpKeep': $( if [ ! -z ${VIASH_PAR_OUTTMPKEEP+x} ]; then echo "r'${VIASH_PAR_OUTTMPKEEP//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'outStd': $( if [ ! -z ${VIASH_PAR_OUTSTD+x} ]; then echo "r'${VIASH_PAR_OUTSTD//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'outReadsUnmapped': $( if [ ! -z ${VIASH_PAR_OUTREADSUNMAPPED+x} ]; then echo "r'${VIASH_PAR_OUTREADSUNMAPPED//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'outQSconversionAdd': $( if [ ! -z ${VIASH_PAR_OUTQSCONVERSIONADD+x} ]; then echo "int(r'${VIASH_PAR_OUTQSCONVERSIONADD//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outMultimapperOrder': $( if [ ! -z ${VIASH_PAR_OUTMULTIMAPPERORDER+x} ]; then echo "r'${VIASH_PAR_OUTMULTIMAPPERORDER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'outSAMtype': $( if [ ! -z ${VIASH_PAR_OUTSAMTYPE+x} ]; then echo "r'${VIASH_PAR_OUTSAMTYPE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'outSAMmode': $( if [ ! -z ${VIASH_PAR_OUTSAMMODE+x} ]; then echo "r'${VIASH_PAR_OUTSAMMODE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'outSAMstrandField': $( if [ ! -z ${VIASH_PAR_OUTSAMSTRANDFIELD+x} ]; then echo "r'${VIASH_PAR_OUTSAMSTRANDFIELD//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'outSAMattributes': $( if [ ! -z ${VIASH_PAR_OUTSAMATTRIBUTES+x} ]; then echo "r'${VIASH_PAR_OUTSAMATTRIBUTES//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'outSAMattrIHstart': $( if [ ! -z ${VIASH_PAR_OUTSAMATTRIHSTART+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMATTRIHSTART//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outSAMunmapped': $( if [ ! -z ${VIASH_PAR_OUTSAMUNMAPPED+x} ]; then echo "r'${VIASH_PAR_OUTSAMUNMAPPED//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'outSAMorder': $( if [ ! -z ${VIASH_PAR_OUTSAMORDER+x} ]; then echo "r'${VIASH_PAR_OUTSAMORDER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'outSAMprimaryFlag': $( if [ ! -z ${VIASH_PAR_OUTSAMPRIMARYFLAG+x} ]; then echo "r'${VIASH_PAR_OUTSAMPRIMARYFLAG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'outSAMreadID': $( if [ ! -z ${VIASH_PAR_OUTSAMREADID+x} ]; then echo "r'${VIASH_PAR_OUTSAMREADID//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'outSAMmapqUnique': $( if [ ! -z ${VIASH_PAR_OUTSAMMAPQUNIQUE+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMMAPQUNIQUE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outSAMflagOR': $( if [ ! -z ${VIASH_PAR_OUTSAMFLAGOR+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMFLAGOR//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outSAMflagAND': $( if [ ! -z ${VIASH_PAR_OUTSAMFLAGAND+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMFLAGAND//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outSAMattrRGline': $( if [ ! -z ${VIASH_PAR_OUTSAMATTRRGLINE+x} ]; then echo "r'${VIASH_PAR_OUTSAMATTRRGLINE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'outSAMheaderHD': $( if [ ! -z ${VIASH_PAR_OUTSAMHEADERHD+x} ]; then echo "r'${VIASH_PAR_OUTSAMHEADERHD//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'outSAMheaderPG': $( if [ ! -z ${VIASH_PAR_OUTSAMHEADERPG+x} ]; then echo "r'${VIASH_PAR_OUTSAMHEADERPG//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'outSAMheaderCommentFile': $( if [ ! -z ${VIASH_PAR_OUTSAMHEADERCOMMENTFILE+x} ]; then echo "r'${VIASH_PAR_OUTSAMHEADERCOMMENTFILE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'outSAMfilter': $( if [ ! -z ${VIASH_PAR_OUTSAMFILTER+x} ]; then echo "r'${VIASH_PAR_OUTSAMFILTER//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'outSAMmultNmax': $( if [ ! -z ${VIASH_PAR_OUTSAMMULTNMAX+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMMULTNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outSAMtlen': $( if [ ! -z ${VIASH_PAR_OUTSAMTLEN+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMTLEN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outBAMcompression': $( if [ ! -z ${VIASH_PAR_OUTBAMCOMPRESSION+x} ]; then echo "int(r'${VIASH_PAR_OUTBAMCOMPRESSION//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outBAMsortingThreadN': $( if [ ! -z ${VIASH_PAR_OUTBAMSORTINGTHREADN+x} ]; then echo "int(r'${VIASH_PAR_OUTBAMSORTINGTHREADN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outBAMsortingBinsN': $( if [ ! -z ${VIASH_PAR_OUTBAMSORTINGBINSN+x} ]; then echo "int(r'${VIASH_PAR_OUTBAMSORTINGBINSN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'bamRemoveDuplicatesType': $( if [ ! -z ${VIASH_PAR_BAMREMOVEDUPLICATESTYPE+x} ]; then echo "r'${VIASH_PAR_BAMREMOVEDUPLICATESTYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'bamRemoveDuplicatesMate2basesN': $( if [ ! -z ${VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN+x} ]; then echo "int(r'${VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outWigType': $( if [ ! -z ${VIASH_PAR_OUTWIGTYPE+x} ]; then echo "r'${VIASH_PAR_OUTWIGTYPE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'outWigStrand': $( if [ ! -z ${VIASH_PAR_OUTWIGSTRAND+x} ]; then echo "r'${VIASH_PAR_OUTWIGSTRAND//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'outWigReferencesPrefix': $( if [ ! -z ${VIASH_PAR_OUTWIGREFERENCESPREFIX+x} ]; then echo "r'${VIASH_PAR_OUTWIGREFERENCESPREFIX//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'outWigNorm': $( if [ ! -z ${VIASH_PAR_OUTWIGNORM+x} ]; then echo "r'${VIASH_PAR_OUTWIGNORM//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'outFilterType': $( if [ ! -z ${VIASH_PAR_OUTFILTERTYPE+x} ]; then echo "r'${VIASH_PAR_OUTFILTERTYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'outFilterMultimapScoreRange': $( if [ ! -z ${VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outFilterMultimapNmax': $( if [ ! -z ${VIASH_PAR_OUTFILTERMULTIMAPNMAX+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERMULTIMAPNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outFilterMismatchNmax': $( if [ ! -z ${VIASH_PAR_OUTFILTERMISMATCHNMAX+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERMISMATCHNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outFilterMismatchNoverLmax': $( if [ ! -z ${VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX+x} ]; then echo "float(r'${VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outFilterMismatchNoverReadLmax': $( if [ ! -z ${VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX+x} ]; then echo "float(r'${VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outFilterScoreMin': $( if [ ! -z ${VIASH_PAR_OUTFILTERSCOREMIN+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERSCOREMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outFilterScoreMinOverLread': $( if [ ! -z ${VIASH_PAR_OUTFILTERSCOREMINOVERLREAD+x} ]; then echo "float(r'${VIASH_PAR_OUTFILTERSCOREMINOVERLREAD//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outFilterMatchNmin': $( if [ ! -z ${VIASH_PAR_OUTFILTERMATCHNMIN+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERMATCHNMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outFilterMatchNminOverLread': $( if [ ! -z ${VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD+x} ]; then echo "float(r'${VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'outFilterIntronMotifs': $( if [ ! -z ${VIASH_PAR_OUTFILTERINTRONMOTIFS+x} ]; then echo "r'${VIASH_PAR_OUTFILTERINTRONMOTIFS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'outFilterIntronStrands': $( if [ ! -z ${VIASH_PAR_OUTFILTERINTRONSTRANDS+x} ]; then echo "r'${VIASH_PAR_OUTFILTERINTRONSTRANDS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'outSJtype': $( if [ ! -z ${VIASH_PAR_OUTSJTYPE+x} ]; then echo "r'${VIASH_PAR_OUTSJTYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'outSJfilterReads': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERREADS+x} ]; then echo "r'${VIASH_PAR_OUTSJFILTERREADS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'outSJfilterOverhangMin': $( if [ ! -z ${VIASH_PAR_OUTSJFILTEROVERHANGMIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTEROVERHANGMIN//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), - 'outSJfilterCountUniqueMin': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), - 'outSJfilterCountTotalMin': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), - 'outSJfilterDistToOtherSJmin': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), - 'outSJfilterIntronMaxVsReadN': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), - 'scoreGap': $( if [ ! -z ${VIASH_PAR_SCOREGAP+x} ]; then echo "int(r'${VIASH_PAR_SCOREGAP//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'scoreGapNoncan': $( if [ ! -z ${VIASH_PAR_SCOREGAPNONCAN+x} ]; then echo "int(r'${VIASH_PAR_SCOREGAPNONCAN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'scoreGapGCAG': $( if [ ! -z ${VIASH_PAR_SCOREGAPGCAG+x} ]; then echo "int(r'${VIASH_PAR_SCOREGAPGCAG//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'scoreGapATAC': $( if [ ! -z ${VIASH_PAR_SCOREGAPATAC+x} ]; then echo "int(r'${VIASH_PAR_SCOREGAPATAC//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'scoreGenomicLengthLog2scale': $( if [ ! -z ${VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE+x} ]; then echo "int(r'${VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'scoreDelOpen': $( if [ ! -z ${VIASH_PAR_SCOREDELOPEN+x} ]; then echo "int(r'${VIASH_PAR_SCOREDELOPEN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'scoreDelBase': $( if [ ! -z ${VIASH_PAR_SCOREDELBASE+x} ]; then echo "int(r'${VIASH_PAR_SCOREDELBASE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'scoreInsOpen': $( if [ ! -z ${VIASH_PAR_SCOREINSOPEN+x} ]; then echo "int(r'${VIASH_PAR_SCOREINSOPEN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'scoreInsBase': $( if [ ! -z ${VIASH_PAR_SCOREINSBASE+x} ]; then echo "int(r'${VIASH_PAR_SCOREINSBASE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'scoreStitchSJshift': $( if [ ! -z ${VIASH_PAR_SCORESTITCHSJSHIFT+x} ]; then echo "int(r'${VIASH_PAR_SCORESTITCHSJSHIFT//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'seedSearchStartLmax': $( if [ ! -z ${VIASH_PAR_SEEDSEARCHSTARTLMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDSEARCHSTARTLMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'seedSearchStartLmaxOverLread': $( if [ ! -z ${VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD+x} ]; then echo "float(r'${VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'seedSearchLmax': $( if [ ! -z ${VIASH_PAR_SEEDSEARCHLMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDSEARCHLMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'seedMultimapNmax': $( if [ ! -z ${VIASH_PAR_SEEDMULTIMAPNMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDMULTIMAPNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'seedPerReadNmax': $( if [ ! -z ${VIASH_PAR_SEEDPERREADNMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDPERREADNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'seedPerWindowNmax': $( if [ ! -z ${VIASH_PAR_SEEDPERWINDOWNMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDPERWINDOWNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'seedNoneLociPerWindow': $( if [ ! -z ${VIASH_PAR_SEEDNONELOCIPERWINDOW+x} ]; then echo "int(r'${VIASH_PAR_SEEDNONELOCIPERWINDOW//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'seedSplitMin': $( if [ ! -z ${VIASH_PAR_SEEDSPLITMIN+x} ]; then echo "int(r'${VIASH_PAR_SEEDSPLITMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'seedMapMin': $( if [ ! -z ${VIASH_PAR_SEEDMAPMIN+x} ]; then echo "int(r'${VIASH_PAR_SEEDMAPMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'alignIntronMin': $( if [ ! -z ${VIASH_PAR_ALIGNINTRONMIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGNINTRONMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'alignIntronMax': $( if [ ! -z ${VIASH_PAR_ALIGNINTRONMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNINTRONMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'alignMatesGapMax': $( if [ ! -z ${VIASH_PAR_ALIGNMATESGAPMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNMATESGAPMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'alignSJoverhangMin': $( if [ ! -z ${VIASH_PAR_ALIGNSJOVERHANGMIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGNSJOVERHANGMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'alignSJstitchMismatchNmax': $( if [ ! -z ${VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX+x} ]; then echo "list(map(int, r'${VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), - 'alignSJDBoverhangMin': $( if [ ! -z ${VIASH_PAR_ALIGNSJDBOVERHANGMIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGNSJDBOVERHANGMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'alignSplicedMateMapLmin': $( if [ ! -z ${VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'alignSplicedMateMapLminOverLmate': $( if [ ! -z ${VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE+x} ]; then echo "float(r'${VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'alignWindowsPerReadNmax': $( if [ ! -z ${VIASH_PAR_ALIGNWINDOWSPERREADNMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNWINDOWSPERREADNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'alignTranscriptsPerWindowNmax': $( if [ ! -z ${VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'alignTranscriptsPerReadNmax': $( if [ ! -z ${VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'alignEndsType': $( if [ ! -z ${VIASH_PAR_ALIGNENDSTYPE+x} ]; then echo "r'${VIASH_PAR_ALIGNENDSTYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'alignEndsProtrude': $( if [ ! -z ${VIASH_PAR_ALIGNENDSPROTRUDE+x} ]; then echo "r'${VIASH_PAR_ALIGNENDSPROTRUDE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'alignSoftClipAtReferenceEnds': $( if [ ! -z ${VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS+x} ]; then echo "r'${VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'alignInsertionFlush': $( if [ ! -z ${VIASH_PAR_ALIGNINSERTIONFLUSH+x} ]; then echo "r'${VIASH_PAR_ALIGNINSERTIONFLUSH//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'peOverlapNbasesMin': $( if [ ! -z ${VIASH_PAR_PEOVERLAPNBASESMIN+x} ]; then echo "int(r'${VIASH_PAR_PEOVERLAPNBASESMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'peOverlapMMp': $( if [ ! -z ${VIASH_PAR_PEOVERLAPMMP+x} ]; then echo "float(r'${VIASH_PAR_PEOVERLAPMMP//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'winAnchorMultimapNmax': $( if [ ! -z ${VIASH_PAR_WINANCHORMULTIMAPNMAX+x} ]; then echo "int(r'${VIASH_PAR_WINANCHORMULTIMAPNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'winBinNbits': $( if [ ! -z ${VIASH_PAR_WINBINNBITS+x} ]; then echo "int(r'${VIASH_PAR_WINBINNBITS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'winAnchorDistNbins': $( if [ ! -z ${VIASH_PAR_WINANCHORDISTNBINS+x} ]; then echo "int(r'${VIASH_PAR_WINANCHORDISTNBINS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'winFlankNbins': $( if [ ! -z ${VIASH_PAR_WINFLANKNBINS+x} ]; then echo "int(r'${VIASH_PAR_WINFLANKNBINS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'winReadCoverageRelativeMin': $( if [ ! -z ${VIASH_PAR_WINREADCOVERAGERELATIVEMIN+x} ]; then echo "float(r'${VIASH_PAR_WINREADCOVERAGERELATIVEMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'winReadCoverageBasesMin': $( if [ ! -z ${VIASH_PAR_WINREADCOVERAGEBASESMIN+x} ]; then echo "int(r'${VIASH_PAR_WINREADCOVERAGEBASESMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'chimOutType': $( if [ ! -z ${VIASH_PAR_CHIMOUTTYPE+x} ]; then echo "r'${VIASH_PAR_CHIMOUTTYPE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'chimSegmentMin': $( if [ ! -z ${VIASH_PAR_CHIMSEGMENTMIN+x} ]; then echo "int(r'${VIASH_PAR_CHIMSEGMENTMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'chimScoreMin': $( if [ ! -z ${VIASH_PAR_CHIMSCOREMIN+x} ]; then echo "int(r'${VIASH_PAR_CHIMSCOREMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'chimScoreDropMax': $( if [ ! -z ${VIASH_PAR_CHIMSCOREDROPMAX+x} ]; then echo "int(r'${VIASH_PAR_CHIMSCOREDROPMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'chimScoreSeparation': $( if [ ! -z ${VIASH_PAR_CHIMSCORESEPARATION+x} ]; then echo "int(r'${VIASH_PAR_CHIMSCORESEPARATION//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'chimScoreJunctionNonGTAG': $( if [ ! -z ${VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG+x} ]; then echo "int(r'${VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'chimJunctionOverhangMin': $( if [ ! -z ${VIASH_PAR_CHIMJUNCTIONOVERHANGMIN+x} ]; then echo "int(r'${VIASH_PAR_CHIMJUNCTIONOVERHANGMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'chimSegmentReadGapMax': $( if [ ! -z ${VIASH_PAR_CHIMSEGMENTREADGAPMAX+x} ]; then echo "int(r'${VIASH_PAR_CHIMSEGMENTREADGAPMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'chimFilter': $( if [ ! -z ${VIASH_PAR_CHIMFILTER+x} ]; then echo "r'${VIASH_PAR_CHIMFILTER//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'chimMainSegmentMultNmax': $( if [ ! -z ${VIASH_PAR_CHIMMAINSEGMENTMULTNMAX+x} ]; then echo "int(r'${VIASH_PAR_CHIMMAINSEGMENTMULTNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'chimMultimapNmax': $( if [ ! -z ${VIASH_PAR_CHIMMULTIMAPNMAX+x} ]; then echo "int(r'${VIASH_PAR_CHIMMULTIMAPNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'chimMultimapScoreRange': $( if [ ! -z ${VIASH_PAR_CHIMMULTIMAPSCORERANGE+x} ]; then echo "int(r'${VIASH_PAR_CHIMMULTIMAPSCORERANGE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'chimNonchimScoreDropMin': $( if [ ! -z ${VIASH_PAR_CHIMNONCHIMSCOREDROPMIN+x} ]; then echo "int(r'${VIASH_PAR_CHIMNONCHIMSCOREDROPMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'chimOutJunctionFormat': $( if [ ! -z ${VIASH_PAR_CHIMOUTJUNCTIONFORMAT+x} ]; then echo "int(r'${VIASH_PAR_CHIMOUTJUNCTIONFORMAT//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'quantMode': $( if [ ! -z ${VIASH_PAR_QUANTMODE+x} ]; then echo "r'${VIASH_PAR_QUANTMODE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'quantTranscriptomeBAMcompression': $( if [ ! -z ${VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION+x} ]; then echo "int(r'${VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'quantTranscriptomeBan': $( if [ ! -z ${VIASH_PAR_QUANTTRANSCRIPTOMEBAN+x} ]; then echo "r'${VIASH_PAR_QUANTTRANSCRIPTOMEBAN//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'twopassMode': $( if [ ! -z ${VIASH_PAR_TWOPASSMODE+x} ]; then echo "r'${VIASH_PAR_TWOPASSMODE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'twopass1readsN': $( if [ ! -z ${VIASH_PAR_TWOPASS1READSN+x} ]; then echo "int(r'${VIASH_PAR_TWOPASS1READSN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'waspOutputMode': $( if [ ! -z ${VIASH_PAR_WASPOUTPUTMODE+x} ]; then echo "r'${VIASH_PAR_WASPOUTPUTMODE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'soloType': $( if [ ! -z ${VIASH_PAR_SOLOTYPE+x} ]; then echo "r'${VIASH_PAR_SOLOTYPE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'soloCBwhitelist': $( if [ ! -z ${VIASH_PAR_SOLOCBWHITELIST+x} ]; then echo "r'${VIASH_PAR_SOLOCBWHITELIST//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'soloCBstart': $( if [ ! -z ${VIASH_PAR_SOLOCBSTART+x} ]; then echo "int(r'${VIASH_PAR_SOLOCBSTART//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'soloCBlen': $( if [ ! -z ${VIASH_PAR_SOLOCBLEN+x} ]; then echo "int(r'${VIASH_PAR_SOLOCBLEN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'soloUMIstart': $( if [ ! -z ${VIASH_PAR_SOLOUMISTART+x} ]; then echo "int(r'${VIASH_PAR_SOLOUMISTART//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'soloUMIlen': $( if [ ! -z ${VIASH_PAR_SOLOUMILEN+x} ]; then echo "int(r'${VIASH_PAR_SOLOUMILEN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'soloBarcodeReadLength': $( if [ ! -z ${VIASH_PAR_SOLOBARCODEREADLENGTH+x} ]; then echo "int(r'${VIASH_PAR_SOLOBARCODEREADLENGTH//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'soloBarcodeMate': $( if [ ! -z ${VIASH_PAR_SOLOBARCODEMATE+x} ]; then echo "int(r'${VIASH_PAR_SOLOBARCODEMATE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'soloCBposition': $( if [ ! -z ${VIASH_PAR_SOLOCBPOSITION+x} ]; then echo "r'${VIASH_PAR_SOLOCBPOSITION//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'soloUMIposition': $( if [ ! -z ${VIASH_PAR_SOLOUMIPOSITION+x} ]; then echo "r'${VIASH_PAR_SOLOUMIPOSITION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'soloAdapterSequence': $( if [ ! -z ${VIASH_PAR_SOLOADAPTERSEQUENCE+x} ]; then echo "r'${VIASH_PAR_SOLOADAPTERSEQUENCE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'soloAdapterMismatchesNmax': $( if [ ! -z ${VIASH_PAR_SOLOADAPTERMISMATCHESNMAX+x} ]; then echo "int(r'${VIASH_PAR_SOLOADAPTERMISMATCHESNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'soloCBmatchWLtype': $( if [ ! -z ${VIASH_PAR_SOLOCBMATCHWLTYPE+x} ]; then echo "r'${VIASH_PAR_SOLOCBMATCHWLTYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'soloInputSAMattrBarcodeSeq': $( if [ ! -z ${VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ+x} ]; then echo "r'${VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'soloInputSAMattrBarcodeQual': $( if [ ! -z ${VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL+x} ]; then echo "r'${VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'soloStrand': $( if [ ! -z ${VIASH_PAR_SOLOSTRAND+x} ]; then echo "r'${VIASH_PAR_SOLOSTRAND//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'soloFeatures': $( if [ ! -z ${VIASH_PAR_SOLOFEATURES+x} ]; then echo "r'${VIASH_PAR_SOLOFEATURES//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'soloMultiMappers': $( if [ ! -z ${VIASH_PAR_SOLOMULTIMAPPERS+x} ]; then echo "r'${VIASH_PAR_SOLOMULTIMAPPERS//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'soloUMIdedup': $( if [ ! -z ${VIASH_PAR_SOLOUMIDEDUP+x} ]; then echo "r'${VIASH_PAR_SOLOUMIDEDUP//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'soloUMIfiltering': $( if [ ! -z ${VIASH_PAR_SOLOUMIFILTERING+x} ]; then echo "r'${VIASH_PAR_SOLOUMIFILTERING//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'soloOutFileNames': $( if [ ! -z ${VIASH_PAR_SOLOOUTFILENAMES+x} ]; then echo "r'${VIASH_PAR_SOLOOUTFILENAMES//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'soloCellFilter': $( if [ ! -z ${VIASH_PAR_SOLOCELLFILTER+x} ]; then echo "r'${VIASH_PAR_SOLOCELLFILTER//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'soloOutFormatFeaturesGeneField3': $( if [ ! -z ${VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3+x} ]; then echo "r'${VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'soloCellReadStats': $( if [ ! -z ${VIASH_PAR_SOLOCELLREADSTATS+x} ]; then echo "r'${VIASH_PAR_SOLOCELLREADSTATS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -## VIASH END - -######################## -### Helper functions ### -######################## - -# regex for matching R[12] fastq(gz) files -# examples: -# - TSP10_Fat_MAT_SS2_B134171_B115063_Immune_A1_L003_R1.fastq.gz -# - tinygex_S1_L001_I1_001.fastq.gz -fastqgz_regex = r'(.+)_(R\\\\d+)(_\\\\d+)?\\\\.fastq(\\\\.gz)?' - -# helper function for cheching whether something is a gzip -def is_gz_file(path: Path) -> bool: - with open(path, 'rb') as file: - return file.read(2) == b'\\\\x1f\\\\x8b' - -# look for fastq files in a directory -def search_fastqs(path: Path) -> list[Path]: - if path.is_dir(): - print(f"Input '{path}' is a directory, traversing to see if we can detect any FASTQ files.", flush=True) - value_paths = [file for file in path.iterdir() if re.match(fastqgz_regex, file.name) ] - return value_paths - else: - return [path] - -# if {par_value} is a Path, extract it to a temp_dir_path and return the resulting path -def extract_if_need_be(par_value: Path, temp_dir_path: Path) -> Path: - - if par_value.is_file() and tarfile.is_tarfile(par_value): - # Remove two extensions (if they exist) - extaction_dir_name = Path(par_value.stem).stem - unpacked_path = temp_dir_path / extaction_dir_name - print(f' Tar detected; extracting {par_value} to {unpacked_path}', flush=True) - - with tarfile.open(par_value, 'r') as open_tar: - members = open_tar.getmembers() - root_dirs = [member - for member in members - if member.isdir() and member.name != '.' and '/' not in member.name] - # if there is only one root_dir (and there are files in that directory) - # strip that directory name from the destination folder - if len(root_dirs) == 1: - for mem in members: - mem.path = Path(*Path(mem.path).parts[1:]) - members_to_move = [mem for mem in members if mem.path != Path('.')] - open_tar.extractall(unpacked_path, members=members_to_move) - return unpacked_path - - elif par_value.is_file() and is_gz_file(par_value): - # Remove extension (if it exists) - extaction_file_name = Path(par_value.stem) - unpacked_path = temp_dir_path / extaction_file_name - print(f' Gzip detected; extracting {par_value} to {unpacked_path}', flush=True) - - with gzip.open(par_value, 'rb') as f_in: - with open(unpacked_path, 'wb') as f_out: - shutil.copyfileobj(f_in, f_out) - return unpacked_path - - else: - return par_value - -######################## -### Main code ### -######################## - -# rename keys and convert path strings to Path -# note: only list file arguments here. if non-file arguments also need to be renamed, -# the \\`processPar()\\` generator needs to be adapted -to_rename = {'input': 'readFilesIn', 'reference': 'genomeDir', 'output': 'outFileNamePrefix'} - -def process_par(orig_par, to_rename): - for key, value in orig_par.items(): - # rename the key in par based on the \\`to_rename\\` dict - if key in to_rename.keys(): - new_key = to_rename[key] - - # also turn value into a Path - if isinstance(value, list): - new_value = [Path(val) for val in value] - else: - new_value = Path(value) - else: - new_key = key - new_value = value - yield new_key, new_value -par = dict(process_par(par, to_rename)) - -# create output dir if need be -par["outFileNamePrefix"].mkdir(parents=True, exist_ok=True) - -with tempfile.TemporaryDirectory(prefix="star-", dir=meta["temp_dir"], ignore_cleanup_errors=True) as temp_dir: - print(">> Check whether input files are directories", flush=True) - new_read_files_in = [] - for path in par["readFilesIn"]: - new_read_files_in.extend(search_fastqs(path)) - par["readFilesIn"] = new_read_files_in - print("", flush=True) - - # checking for compressed files, ungzip files if need be - temp_dir_path = Path(temp_dir) - for par_name in ["genomeDir", "readFilesIn"]: - par_values = par[par_name] - if par_values: - # turn value into list - is_multiple = isinstance(par_values, list) - if not is_multiple: - par_values = [ par_values ] - - # output list - new_values = [] - for par_value in par_values: - print(f'>> Check compression of --{par_name} with value: {par_value}', flush=True) - new_value = extract_if_need_be(par_value, temp_dir_path) - new_values.append(new_value) - - # unlist if need be - if not is_multiple: - new_values = new_values[0] - - # replace value - par[par_name] = new_values - # end ungzipping - print("", flush=True) - - print("Grouping R1/R2 input files into pairs", flush=True) - input_grouped = {} - for path in par['readFilesIn']: - key = re.search(fastqgz_regex, path.name).group(2) - if key not in input_grouped: - input_grouped[key] = [] - input_grouped[key].append(str(path)) - par['readFilesIn'] = [ ','.join(val) for val in input_grouped.values() ] - print("", flush=True) - - print(">> Constructing command", flush=True) - par["runMode"] = "alignReads" - par["outTmpDir"] = temp_dir_path / "run" - if 'cpus' in meta and meta['cpus']: - par["runThreadN"] = meta["cpus"] - # make sure there is a trailing / - par["outFileNamePrefix"] = f"{par['outFileNamePrefix']}/" - - cmd_args = [ "STAR" ] - for name, value in par.items(): - if value is not None: - if isinstance(value, list): - cmd_args.extend(["--" + name] + [str(x) for x in value]) - else: - cmd_args.extend(["--" + name, str(value)]) - print("", flush=True) - - print(">> Running STAR with command:", flush=True) - print("+ " + ' '.join([str(x) for x in cmd_args]), flush=True) - print("", flush=True) - - subprocess.run( - cmd_args, - check=True - ) -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/mapping_star_align_v273a", - "tag" : "0.12.0" - }, - "label" : [ - "highmem", - "highcpu" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/mapping/star_align_v273a/nextflow.config b/target/nextflow/mapping/star_align_v273a/nextflow.config deleted file mode 100644 index 12713a49708..00000000000 --- a/target/nextflow/mapping/star_align_v273a/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'star_align_v273a' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Align fastq files using STAR.' - author = 'Angela Oliveira Pisco, Robrecht Cannoodt' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/mapping/star_align_v273a/nextflow_params.yaml b/target/nextflow/mapping/star_align_v273a/nextflow_params.yaml deleted file mode 100644 index 7c77e19eb4a..00000000000 --- a/target/nextflow/mapping/star_align_v273a/nextflow_params.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# Input/Output -input: # please fill in - example: ["mysample_S1_L001_R1_001.fastq.gz", "mysample_S1_L001_R2_001.fastq.gz"] -reference: # please fill in - example: "/path/to/reference" -# output: "$id.$key.output.output" - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/mapping/star_align_v273a/nextflow_schema.json b/target/nextflow/mapping/star_align_v273a/nextflow_schema.json deleted file mode 100644 index 93a4b284c3b..00000000000 --- a/target/nextflow/mapping/star_align_v273a/nextflow_schema.json +++ /dev/null @@ -1,64 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "star_align_v273a", - "description": "Align fastq files using STAR.", - "type": "object", - "definitions": { - "input/output" : { - "title": "Input/Output", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: List of `file`, required, example: `mysample_S1_L001_R1_001.fastq.gz;mysample_S1_L001_R2_001.fastq.gz`, multiple_sep: `\";\"`. The FASTQ files to be analyzed", - "help_text": "Type: List of `file`, required, example: `mysample_S1_L001_R1_001.fastq.gz;mysample_S1_L001_R2_001.fastq.gz`, multiple_sep: `\";\"`. The FASTQ files to be analyzed. Corresponds to the --readFilesIn in the STAR command." - }, - - "reference": { - "type": "string", - "description": "Type: `file`, required, example: `/path/to/reference`. Path to the reference built by star_build_reference", - "help_text": "Type: `file`, required, example: `/path/to/reference`. Path to the reference built by star_build_reference. Corresponds to the --genomeDir in the STAR command." - }, - - "output": { - "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.output`, example: `/path/to/foo`. Path to output directory", - "help_text": "Type: `file`, required, default: `$id.$key.output.output`, example: `/path/to/foo`. Path to output directory. Corresponds to the --outFileNamePrefix in the STAR command.", - "default": "$id.$key.output.output" - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/input/output" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/mapping/star_align_v273a/setup_logger.py b/target/nextflow/mapping/star_align_v273a/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/nextflow/mapping/star_align_v273a/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/nextflow/mapping/star_build_reference/.config.vsh.yaml b/target/nextflow/mapping/star_build_reference/.config.vsh.yaml deleted file mode 100644 index 39f24970175..00000000000 --- a/target/nextflow/mapping/star_build_reference/.config.vsh.yaml +++ /dev/null @@ -1,190 +0,0 @@ -functionality: - name: "star_build_reference" - namespace: "mapping" - version: "0.12.3" - authors: - - name: "Dries Schaumont" - roles: - - "author" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - argument_groups: - - name: "Input/Output" - arguments: - - type: "file" - name: "--genome_fasta" - alternatives: - - "--genomeFastaFiles" - description: "The fasta files to be included in the reference. Corresponds to\ - \ the --genomeFastaFiles argument in the STAR command." - info: null - example: - - "chr1.fasta" - - "chr2.fasta" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: true - multiple_sep: " " - dest: "par" - - type: "file" - name: "--transcriptome_gtf" - alternatives: - - "--sjdbGTFfile" - description: "Specifies the path to the file with annotated transcripts in the\ - \ standard GTF\nformat. STAR will extract splice junctions from this file\ - \ and use them to greatly improve\naccuracy of the mapping. Corresponds to\ - \ the --sjdbGTFfile argument in the STAR command.\n" - info: null - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "--genomeDir" - description: "Path to output directory. Corresponds to the --genomeDir argument\ - \ in the STAR command." - info: null - example: - - "/path/to/foo" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Genome indexing arguments" - arguments: - - type: "integer" - name: "--genomeSAindexNbases" - description: "Length (bases) of the SA pre-indexing string. Typically between\ - \ 10 and 15.\nLonger strings will use much more memory, but allow faster searches.\ - \ For small\ngenomes, the parameter {genomeSAindexNbases must be scaled down\ - \ to\nmin(14, log2(GenomeLength)/2 - 1).\n" - info: null - default: - - 14 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - description: "Create a reference for STAR from a set of fasta files." - test_resources: - - type: "bash_script" - path: "test.sh" - is_executable: true - - type: "file" - path: "../../../resources_test/cellranger_tiny_fastq" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "docker" - env: - - "STAR_VERSION 2.7.10b" - - "PACKAGES gcc g++ make wget zlib1g-dev unzip" - - type: "docker" - run: - - "apt-get update && \\\n apt-get install -y --no-install-recommends ${PACKAGES}\ - \ && \\\n cd /tmp && \\\n wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip\ - \ && \\\n unzip ${STAR_VERSION}.zip && \\\n cd STAR-${STAR_VERSION}/source\ - \ && \\\n make STARstatic CXXFLAGS_SIMD=-std=c++11 && \\\n cp STAR /usr/local/bin\ - \ && \\\n cd / && \\\n rm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip\ - \ && \\\n apt-get --purge autoremove -y ${PACKAGES} && \\\n apt-get clean\n" - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highmem" - - "highcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/mapping/star_build_reference/config.vsh.yml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/star_build_reference" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/star_build_reference/star_build_reference" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/mapping/star_build_reference/main.nf b/target/nextflow/mapping/star_build_reference/main.nf deleted file mode 100644 index 9d59580527a..00000000000 --- a/target/nextflow/mapping/star_build_reference/main.nf +++ /dev/null @@ -1,2686 +0,0 @@ -// star_build_reference 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Dries Schaumont (author) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "star_build_reference", - "namespace" : "mapping", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Dries Schaumont", - "roles" : [ - "author" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "dries@data-intuitive.com", - "github" : "DriesSchaumont", - "orcid" : "0000-0002-4389-0440", - "linkedin" : "dries-schaumont" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Scientist" - } - ] - } - } - ], - "argument_groups" : [ - { - "name" : "Input/Output", - "arguments" : [ - { - "type" : "file", - "name" : "--genome_fasta", - "alternatives" : [ - "--genomeFastaFiles" - ], - "description" : "The fasta files to be included in the reference. Corresponds to the --genomeFastaFiles argument in the STAR command.", - "example" : [ - "chr1.fasta", - "chr2.fasta" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : true, - "multiple_sep" : " ", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--transcriptome_gtf", - "alternatives" : [ - "--sjdbGTFfile" - ], - "description" : "Specifies the path to the file with annotated transcripts in the standard GTF\nformat. STAR will extract splice junctions from this file and use them to greatly improve\naccuracy of the mapping. Corresponds to the --sjdbGTFfile argument in the STAR command.\n", - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--output", - "alternatives" : [ - "--genomeDir" - ], - "description" : "Path to output directory. Corresponds to the --genomeDir argument in the STAR command.", - "example" : [ - "/path/to/foo" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Genome indexing arguments", - "arguments" : [ - { - "type" : "integer", - "name" : "--genomeSAindexNbases", - "description" : "Length (bases) of the SA pre-indexing string. Typically between 10 and 15.\nLonger strings will use much more memory, but allow faster searches. For small\ngenomes, the parameter {genomeSAindexNbases must be scaled down to\nmin(14, log2(GenomeLength)/2 - 1).\n", - "default" : [ - 14 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/star_build_reference/" - } - ], - "description" : "Create a reference for STAR from a set of fasta files.", - "test_resources" : [ - { - "type" : "bash_script", - "path" : "test.sh", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/star_build_reference/" - }, - { - "type" : "file", - "path" : "../../../resources_test/cellranger_tiny_fastq", - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/star_build_reference/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "python:3.10-slim", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "procps" - ], - "interactive" : false - }, - { - "type" : "docker", - "env" : [ - "STAR_VERSION 2.7.10b", - "PACKAGES gcc g++ make wget zlib1g-dev unzip" - ] - }, - { - "type" : "docker", - "run" : [ - "apt-get update && \\\\\n apt-get install -y --no-install-recommends ${PACKAGES} && \\\\\n cd /tmp && \\\\\n wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip && \\\\\n unzip ${STAR_VERSION}.zip && \\\\\n cd STAR-${STAR_VERSION}/source && \\\\\n make STARstatic CXXFLAGS_SIMD=-std=c++11 && \\\\\n cp STAR /usr/local/bin && \\\\\n cd / && \\\\\n rm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip && \\\\\n apt-get --purge autoremove -y ${PACKAGES} && \\\\\n apt-get clean\n" - ] - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "highmem", - "highcpu" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/mapping/star_build_reference/config.vsh.yml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/star_build_reference", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -import re -import tempfile -import subprocess -from pathlib import Path -import tarfile -import gzip -import shutil - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'genome_fasta': $( if [ ! -z ${VIASH_PAR_GENOME_FASTA+x} ]; then echo "r'${VIASH_PAR_GENOME_FASTA//\\'/\\'\\"\\'\\"r\\'}'.split(' ')"; else echo None; fi ), - 'transcriptome_gtf': $( if [ ! -z ${VIASH_PAR_TRANSCRIPTOME_GTF+x} ]; then echo "r'${VIASH_PAR_TRANSCRIPTOME_GTF//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'genomeSAindexNbases': $( if [ ! -z ${VIASH_PAR_GENOMESAINDEXNBASES+x} ]; then echo "int(r'${VIASH_PAR_GENOMESAINDEXNBASES//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -## VIASH END - -######################## -### Helper functions ### -######################## - -# helper function for cheching whether something is a gzip -def is_gz_file(path: Path) -> bool: - with open(path, 'rb') as file: - return file.read(2) == b'\\\\x1f\\\\x8b' - -# if {par_value} is a Path, extract it to a temp_dir_path and return the resulting path -def extract_if_need_be(par_value: Path, temp_dir_path: Path) -> Path: - if par_value.is_file() and tarfile.is_tarfile(par_value): - # Remove two extensions (if they exist) - extaction_dir_name = Path(par_value.stem).stem - unpacked_path = temp_dir_path / extaction_dir_name - print(f' Tar detected; extracting {par_value} to {unpacked_path}', flush=True) - - with tarfile.open(par_value, 'r') as open_tar: - members = open_tar.getmembers() - root_dirs = [member - for member in members - if member.isdir() and member.name != '.' and '/' not in member.name] - # if there is only one root_dir (and there are files in that directory) - # strip that directory name from the destination folder - if len(root_dirs) == 1: - for mem in members: - mem.path = Path(*Path(mem.path).parts[1:]) - members_to_move = [mem for mem in members if mem.path != Path('.')] - open_tar.extractall(unpacked_path, members=members_to_move) - return unpacked_path - - elif par_value.is_file() and is_gz_file(par_value): - # Remove extension (if it exists) - extaction_file_name = Path(par_value.stem) - unpacked_path = temp_dir_path / extaction_file_name - print(f' Gzip detected; extracting {par_value} to {unpacked_path}', flush=True) - - with gzip.open(par_value, 'rb') as f_in: - with open(unpacked_path, 'wb') as f_out: - shutil.copyfileobj(f_in, f_out) - return unpacked_path - - else: - return par_value - -######################## -### Main code ### -######################## - -# rename keys and convert path strings to Path -# note: only list file arguments here. if non-file arguments also need to be renamed, -# the \\`processPar()\\` generator needs to be adapted -to_rename = {'genome_fasta': 'genomeFastaFiles', 'output': 'genomeDir', 'transcriptome_gtf': 'sjdbGTFfile'} - -def process_par(orig_par, to_rename): - for key, value in orig_par.items(): - # rename the key in par based on the \\`to_rename\\` dict - if key in to_rename.keys(): - new_key = to_rename[key] - - # also turn value into a Path - if isinstance(value, list): - new_value = [Path(val) for val in value] - else: - new_value = Path(value) - else: - new_key = key - new_value = value - yield new_key, new_value -par = dict(process_par(par, to_rename)) - -# create output dir if need be -par["genomeDir"].mkdir(parents=True, exist_ok=True) - -with tempfile.TemporaryDirectory(prefix="star-", dir=meta["temp_dir"]) as temp_dir: - - # checking for compressed files, ungzip files if need be - temp_dir_path = Path(temp_dir) - for par_name in ["genomeFastaFiles", "sjdbGTFfile"]: - par_values = par[par_name] - if par_values: - # turn value into list - is_multiple = isinstance(par_values, list) - if not is_multiple: - par_values = [ par_values ] - - # output list - new_values = [] - for par_value in par_values: - print(f'>> Check compression of --{par_name} with value: {par_value}', flush=True) - new_value = extract_if_need_be(par_value, temp_dir_path) - new_values.append(new_value) - - # unlist if need be - if not is_multiple: - new_values = new_values[0] - - # replace value - par[par_name] = new_values - # end ungzipping - print("", flush=True) - - print(">> Constructing command", flush=True) - par["runMode"] = "genomeGenerate" - par["outTmpDir"] = temp_dir_path / "run" - if 'cpus' in meta and meta['cpus']: - par["runThreadN"] = meta["cpus"] - - - cmd_args = [ "STAR" ] - for name, value in par.items(): - if value is not None: - if isinstance(value, list): - cmd_args.extend(["--" + name] + [str(x) for x in value]) - else: - cmd_args.extend(["--" + name, str(value)]) - print("", flush=True) - - print(">> Running STAR with command:", flush=True) - print("+ " + ' '.join([str(x) for x in cmd_args]), flush=True) - print("", flush=True) - - subprocess.run( - cmd_args, - check=True - ) -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/mapping_star_build_reference", - "tag" : "0.12.0" - }, - "label" : [ - "highmem", - "highcpu" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/mapping/star_build_reference/nextflow.config b/target/nextflow/mapping/star_build_reference/nextflow.config deleted file mode 100644 index 54bf431f934..00000000000 --- a/target/nextflow/mapping/star_build_reference/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'star_build_reference' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Create a reference for STAR from a set of fasta files.' - author = 'Dries Schaumont' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/mapping/star_build_reference/nextflow_params.yaml b/target/nextflow/mapping/star_build_reference/nextflow_params.yaml deleted file mode 100644 index 616c69ec6f2..00000000000 --- a/target/nextflow/mapping/star_build_reference/nextflow_params.yaml +++ /dev/null @@ -1,11 +0,0 @@ -# Input/Output -genome_fasta: # please fill in - example: ["chr1.fasta", "chr2.fasta"] -# transcriptome_gtf: "path/to/file" -# output: "$id.$key.output.output" - -# Genome indexing arguments -genomeSAindexNbases: 14 - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/mapping/star_build_reference/nextflow_schema.json b/target/nextflow/mapping/star_build_reference/nextflow_schema.json deleted file mode 100644 index 221ec30f588..00000000000 --- a/target/nextflow/mapping/star_build_reference/nextflow_schema.json +++ /dev/null @@ -1,82 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "star_build_reference", - "description": "Create a reference for STAR from a set of fasta files.", - "type": "object", - "definitions": { - "input/output" : { - "title": "Input/Output", - "type": "object", - "description": "No description", - "properties": { - - "genome_fasta": { - "type": "string", - "description": "Type: List of `file`, required, example: `chr1.fasta chr2.fasta`, multiple_sep: `\" \"`. The fasta files to be included in the reference", - "help_text": "Type: List of `file`, required, example: `chr1.fasta chr2.fasta`, multiple_sep: `\" \"`. The fasta files to be included in the reference. Corresponds to the --genomeFastaFiles argument in the STAR command." - }, - - "transcriptome_gtf": { - "type": "string", - "description": "Type: `file`. Specifies the path to the file with annotated transcripts in the standard GTF\nformat", - "help_text": "Type: `file`. Specifies the path to the file with annotated transcripts in the standard GTF\nformat. STAR will extract splice junctions from this file and use them to greatly improve\naccuracy of the mapping. Corresponds to the --sjdbGTFfile argument in the STAR command.\n" - }, - - "output": { - "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.output`, example: `/path/to/foo`. Path to output directory", - "help_text": "Type: `file`, required, default: `$id.$key.output.output`, example: `/path/to/foo`. Path to output directory. Corresponds to the --genomeDir argument in the STAR command.", - "default": "$id.$key.output.output" - } - - } - }, - "genome indexing arguments" : { - "title": "Genome indexing arguments", - "type": "object", - "description": "No description", - "properties": { - - "genomeSAindexNbases": { - "type": "integer", - "description": "Type: `integer`, default: `14`. Length (bases) of the SA pre-indexing string", - "help_text": "Type: `integer`, default: `14`. Length (bases) of the SA pre-indexing string. Typically between 10 and 15.\nLonger strings will use much more memory, but allow faster searches. For small\ngenomes, the parameter {genomeSAindexNbases must be scaled down to\nmin(14, log2(GenomeLength)/2 - 1).\n", - "default": "14" - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/input/output" - }, - { - "$ref": "#/definitions/genome indexing arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/metadata/add_id/.config.vsh.yaml b/target/nextflow/metadata/add_id/.config.vsh.yaml deleted file mode 100644 index be9a9a8efa7..00000000000 --- a/target/nextflow/metadata/add_id/.config.vsh.yaml +++ /dev/null @@ -1,197 +0,0 @@ -functionality: - name: "add_id" - namespace: "metadata" - version: "0.12.3" - authors: - - name: "Dries Schaumont" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Path to the input .h5mu." - info: null - example: - - "sample_path" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--input_id" - description: "The input id." - info: null - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_output" - description: "Name of the .obs column where to store the id." - info: null - default: - - "sample_id" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--make_observation_keys_unique" - description: "Join the id to the .obs index (.obs_names)." - info: null - direction: "input" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Add id of .obs. Also allows to make .obs_names (the .obs index) unique\ - \ \nby prefixing the values with an unique id per .h5mu file.\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/concat_test_data/e18_mouse_brain_fresh_5k_filtered_feature_bc_matrix_subset_unique_obs.h5mu" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "native" - id: "native" -- type: "nextflow" - id: "nextflow" - directives: - label: - - "singlecpu" - - "lowmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/metadata/add_id/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/metadata/add_id" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/metadata/add_id/add_id" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/metadata/add_id/main.nf b/target/nextflow/metadata/add_id/main.nf deleted file mode 100644 index b298c27a847..00000000000 --- a/target/nextflow/metadata/add_id/main.nf +++ /dev/null @@ -1,2631 +0,0 @@ -// add_id 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Dries Schaumont (maintainer) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "add_id", - "namespace" : "metadata", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Dries Schaumont", - "roles" : [ - "maintainer" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "dries@data-intuitive.com", - "github" : "DriesSchaumont", - "orcid" : "0000-0002-4389-0440", - "linkedin" : "dries-schaumont" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Scientist" - } - ] - } - } - ], - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "alternatives" : [ - "-i" - ], - "description" : "Path to the input .h5mu.", - "example" : [ - "sample_path" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--input_id", - "description" : "The input id.", - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--obs_output", - "description" : "Name of the .obs column where to store the id.", - "default" : [ - "sample_id" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--output", - "alternatives" : [ - "-o" - ], - "example" : [ - "output.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_compression", - "description" : "The compression format to be used on the output h5mu object.", - "example" : [ - "gzip" - ], - "required" : false, - "choices" : [ - "gzip", - "lzf" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "boolean_true", - "name" : "--make_observation_keys_unique", - "description" : "Join the id to the .obs index (.obs_names).", - "direction" : "input", - "dest" : "par" - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/metadata/add_id/" - }, - { - "type" : "file", - "path" : "src/utils/setup_logger.py", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "description" : "Add id of .obs. Also allows to make .obs_names (the .obs index) unique \nby prefixing the values with an unique id per .h5mu file.\n", - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/metadata/add_id/" - }, - { - "type" : "file", - "path" : "resources_test/concat_test_data/e18_mouse_brain_fresh_5k_filtered_feature_bc_matrix_subset_unique_obs.h5mu", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "python:3.10-slim", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "procps" - ], - "interactive" : false - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "mudata~=0.2.3", - "anndata~=0.9.1" - ], - "upgrade" : true - } - ], - "test_setup" : [ - { - "type" : "python", - "user" : false, - "packages" : [ - "viashpy==0.5.0" - ], - "upgrade" : true - } - ] - }, - { - "type" : "native", - "id" : "native" - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "singlecpu", - "lowmem" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/metadata/add_id/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/metadata/add_id", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -from __future__ import annotations -import sys -from mudata import read_h5mu, MuData - -### VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'input_id': $( if [ ! -z ${VIASH_PAR_INPUT_ID+x} ]; then echo "r'${VIASH_PAR_INPUT_ID//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'obs_output': $( if [ ! -z ${VIASH_PAR_OBS_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OBS_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'make_observation_keys_unique': $( if [ ! -z ${VIASH_PAR_MAKE_OBSERVATION_KEYS_UNIQUE+x} ]; then echo "r'${VIASH_PAR_MAKE_OBSERVATION_KEYS_UNIQUE//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -### VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -def make_observation_keys_unique(sample_id: str, sample: MuData) -> None: - """ - Make the observation keys unique across all samples. At input, - the observation keys are unique within a sample. By adding the sample name - (unique for a sample) to each observation key, the observation key is made - unique across all samples as well. - """ - logger.info('Making observation keys unique across all samples.') - sample.obs.index = f"{sample_id}_" + sample.obs.index - make_observation_keys_unique_per_mod(sample_id, sample) - - -def make_observation_keys_unique_per_mod(sample_id: str, sample: MuData) -> None: - """ - Updating MuData.obs_names is not allowed (it is read-only). - So the observation keys for each modality has to be updated manually. - """ - for mod in sample.mod.values(): - mod.obs_names = f"{sample_id}_" + mod.obs_names - -def main(): - input_data = read_h5mu(par["input"]) - input_data.obs[par["obs_output"]] = par["input_id"] - for mod_data in input_data.mod.values(): - mod_data.obs[par["obs_output"]] = par["input_id"] - if par["make_observation_keys_unique"]: - make_observation_keys_unique(par["input_id"], input_data) - logger.info("Writing out data to '%s'.", par["output"]) - input_data.write_h5mu(par["output"], compression=par["output_compression"]) - -if __name__ == '__main__': - main() -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/metadata_add_id", - "tag" : "0.12.0" - }, - "label" : [ - "singlecpu", - "lowmem" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/metadata/add_id/nextflow.config b/target/nextflow/metadata/add_id/nextflow.config deleted file mode 100644 index 88b79808515..00000000000 --- a/target/nextflow/metadata/add_id/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'add_id' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Add id of .obs. Also allows to make .obs_names (the .obs index) unique \nby prefixing the values with an unique id per .h5mu file.\n' - author = 'Dries Schaumont' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/metadata/add_id/nextflow_params.yaml b/target/nextflow/metadata/add_id/nextflow_params.yaml deleted file mode 100644 index 9be66e50b43..00000000000 --- a/target/nextflow/metadata/add_id/nextflow_params.yaml +++ /dev/null @@ -1,11 +0,0 @@ -# Arguments -input: # please fill in - example: "sample_path" -input_id: # please fill in - example: "foo" -obs_output: "sample_id" -# output: "$id.$key.output.h5mu" -# output_compression: "gzip" -make_observation_keys_unique: false - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/metadata/add_id/nextflow_schema.json b/target/nextflow/metadata/add_id/nextflow_schema.json deleted file mode 100644 index e95aa967ada..00000000000 --- a/target/nextflow/metadata/add_id/nextflow_schema.json +++ /dev/null @@ -1,86 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "add_id", - "description": "Add id of .obs. Also allows to make .obs_names (the .obs index) unique \nby prefixing the values with an unique id per .h5mu file.\n", - "type": "object", - "definitions": { - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: `file`, required, example: `sample_path`. Path to the input ", - "help_text": "Type: `file`, required, example: `sample_path`. Path to the input .h5mu." - }, - - "input_id": { - "type": "string", - "description": "Type: `string`, required. The input id", - "help_text": "Type: `string`, required. The input id." - }, - - "obs_output": { - "type": "string", - "description": "Type: `string`, default: `sample_id`. Name of the ", - "help_text": "Type: `string`, default: `sample_id`. Name of the .obs column where to store the id.", - "default": "sample_id" - }, - - "output": { - "type": "string", - "description": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. ", - "help_text": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. ", - "default": "$id.$key.output.h5mu" - }, - - "output_compression": { - "type": "string", - "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", - "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", - "enum": ["gzip", "lzf"] - - }, - - "make_observation_keys_unique": { - "type": "boolean", - "description": "Type: `boolean_true`, default: `false`. Join the id to the ", - "help_text": "Type: `boolean_true`, default: `false`. Join the id to the .obs index (.obs_names).", - "default": "False" - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/metadata/add_id/setup_logger.py b/target/nextflow/metadata/add_id/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/nextflow/metadata/add_id/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/nextflow/metadata/grep_annotation_column/.config.vsh.yaml b/target/nextflow/metadata/grep_annotation_column/.config.vsh.yaml deleted file mode 100644 index 3a6f571dc37..00000000000 --- a/target/nextflow/metadata/grep_annotation_column/.config.vsh.yaml +++ /dev/null @@ -1,244 +0,0 @@ -functionality: - name: "grep_annotation_column" - namespace: "metadata" - version: "0.12.3" - authors: - - name: "Dries Schaumont" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - argument_groups: - - name: "Inputs" - description: "Arguments related to the input dataset." - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Path to the input .h5mu." - info: null - example: - - "sample_path" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--input_column" - description: "Column to query. If not specified, use .var_names or .obs_names,\ - \ depending on the value of --matrix" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - description: "Which modality to get the annotation matrix from.\n" - info: null - example: - - "rna" - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--matrix" - description: "Matrix to fetch the column from that will be searched." - info: null - example: - - "var" - required: false - choices: - - "var" - - "obs" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Outputs" - description: "Arguments related to how the output will be written." - arguments: - - type: "file" - name: "--output" - alternatives: - - "-o" - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_match_column" - description: "Name of the column to write the result to." - info: null - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_fraction_column" - description: "For the opposite axis, name of the column to write the fraction\ - \ of \nobservations that matches to the pattern.\n" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Query options" - description: "Options related to the query" - arguments: - - type: "string" - name: "--regex_pattern" - description: "Regex to use to match with the input column." - info: null - example: - - "^[mM][tT]-" - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - description: "Perform a regex lookup on a column from the annotation matrices .obs\ - \ or .var.\nThe annotation matrix can originate from either a modality, or all\ - \ modalities (global .var or .obs).\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/concat_test_data/e18_mouse_brain_fresh_5k_filtered_feature_bc_matrix_subset_unique_obs.h5mu" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "native" - id: "native" -- type: "nextflow" - id: "nextflow" - directives: - label: - - "singlecpu" - - "lowmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/metadata/grep_annotation_column/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/metadata/grep_annotation_column" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/metadata/grep_annotation_column/grep_annotation_column" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/metadata/grep_annotation_column/main.nf b/target/nextflow/metadata/grep_annotation_column/main.nf deleted file mode 100644 index 060de1169c2..00000000000 --- a/target/nextflow/metadata/grep_annotation_column/main.nf +++ /dev/null @@ -1,2700 +0,0 @@ -// grep_annotation_column 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Dries Schaumont (maintainer) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "grep_annotation_column", - "namespace" : "metadata", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Dries Schaumont", - "roles" : [ - "maintainer" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "dries@data-intuitive.com", - "github" : "DriesSchaumont", - "orcid" : "0000-0002-4389-0440", - "linkedin" : "dries-schaumont" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Scientist" - } - ] - } - } - ], - "argument_groups" : [ - { - "name" : "Inputs", - "description" : "Arguments related to the input dataset.", - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "alternatives" : [ - "-i" - ], - "description" : "Path to the input .h5mu.", - "example" : [ - "sample_path" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--input_column", - "description" : "Column to query. If not specified, use .var_names or .obs_names, depending on the value of --matrix", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--modality", - "description" : "Which modality to get the annotation matrix from.\n", - "example" : [ - "rna" - ], - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--matrix", - "description" : "Matrix to fetch the column from that will be searched.", - "example" : [ - "var" - ], - "required" : false, - "choices" : [ - "var", - "obs" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Outputs", - "description" : "Arguments related to how the output will be written.", - "arguments" : [ - { - "type" : "file", - "name" : "--output", - "alternatives" : [ - "-o" - ], - "example" : [ - "output.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_compression", - "description" : "The compression format to be used on the output h5mu object.", - "example" : [ - "gzip" - ], - "required" : false, - "choices" : [ - "gzip", - "lzf" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_match_column", - "description" : "Name of the column to write the result to.", - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_fraction_column", - "description" : "For the opposite axis, name of the column to write the fraction of \nobservations that matches to the pattern.\n", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Query options", - "description" : "Options related to the query", - "arguments" : [ - { - "type" : "string", - "name" : "--regex_pattern", - "description" : "Regex to use to match with the input column.", - "example" : [ - "^[mM][tT]-" - ], - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/metadata/grep_annotation_column/" - } - ], - "description" : "Perform a regex lookup on a column from the annotation matrices .obs or .var.\nThe annotation matrix can originate from either a modality, or all modalities (global .var or .obs).\n", - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/metadata/grep_annotation_column/" - }, - { - "type" : "file", - "path" : "resources_test/concat_test_data/e18_mouse_brain_fresh_5k_filtered_feature_bc_matrix_subset_unique_obs.h5mu", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "python:3.10-slim", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "procps" - ], - "interactive" : false - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "mudata~=0.2.3", - "anndata~=0.9.1" - ], - "upgrade" : true - } - ], - "test_setup" : [ - { - "type" : "python", - "user" : false, - "packages" : [ - "viashpy==0.5.0" - ], - "upgrade" : true - } - ] - }, - { - "type" : "native", - "id" : "native" - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "singlecpu", - "lowmem" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/metadata/grep_annotation_column/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/metadata/grep_annotation_column", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -import mudata as mu -from pathlib import Path -from operator import attrgetter -import re -import numpy as np - - -### VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'input_column': $( if [ ! -z ${VIASH_PAR_INPUT_COLUMN+x} ]; then echo "r'${VIASH_PAR_INPUT_COLUMN//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'matrix': $( if [ ! -z ${VIASH_PAR_MATRIX+x} ]; then echo "r'${VIASH_PAR_MATRIX//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_match_column': $( if [ ! -z ${VIASH_PAR_OUTPUT_MATCH_COLUMN+x} ]; then echo "r'${VIASH_PAR_OUTPUT_MATCH_COLUMN//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_fraction_column': $( if [ ! -z ${VIASH_PAR_OUTPUT_FRACTION_COLUMN+x} ]; then echo "r'${VIASH_PAR_OUTPUT_FRACTION_COLUMN//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'regex_pattern': $( if [ ! -z ${VIASH_PAR_REGEX_PATTERN+x} ]; then echo "r'${VIASH_PAR_REGEX_PATTERN//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -### VIASH END - -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -def main(par): - input_file, output_file, mod_name = Path(par["input"]), Path(par["output"]), par['modality'] - try: - compiled_regex = re.compile(par["regex_pattern"]) - except (TypeError, re.error) as e: - raise ValueError(f"{par['regex_pattern']} is not a valid regular expression pattern.") from e - else: - if compiled_regex.groups: - raise NotImplementedError("Using match groups is not supported by this component.") - logger.info('Reading input file %s, modality %s.', input_file, mod_name) - - mudata = mu.read_h5mu(input_file) - modality_data = mudata[mod_name] - annotation_matrix = getattr(modality_data, par['matrix']) - default_column = { - "var": attrgetter("var_names"), - "obs": attrgetter("obs_names") - } - if par["input_column"]: - try: - annotation_column = annotation_matrix[par["input_column"]] - except KeyError as e: - raise ValueError(f"Column {par['input_column']} could not be found for modality " - f"{par['modality']}. Available columns: {','.join(annotation_matrix.columns.to_list())}") from e - else: - annotation_column = default_column[par['matrix']](modality_data) - grep_result = annotation_column.str.contains(par["regex_pattern"], regex=True) - - other_axis_attribute = { - "var": "obs", - "obs": "var" - } - if par['output_fraction_column']: - pct_matching = np.ravel(np.sum(modality_data[:, grep_result].X, axis=1) / np.sum(modality_data.X, axis=1)) - getattr(modality_data, other_axis_attribute[par['matrix']])[par['output_fraction_column']] = pct_matching - getattr(modality_data, par['matrix'])[par["output_match_column"]] = grep_result - mudata.write(output_file, compression=par["output_compression"]) - -if __name__ == "__main__": - main(par) -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/metadata_grep_annotation_column", - "tag" : "0.12.0" - }, - "label" : [ - "singlecpu", - "lowmem" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/metadata/grep_annotation_column/nextflow.config b/target/nextflow/metadata/grep_annotation_column/nextflow.config deleted file mode 100644 index 3a7a5d0b395..00000000000 --- a/target/nextflow/metadata/grep_annotation_column/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'grep_annotation_column' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Perform a regex lookup on a column from the annotation matrices .obs or .var.\nThe annotation matrix can originate from either a modality, or all modalities (global .var or .obs).\n' - author = 'Dries Schaumont' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/metadata/grep_annotation_column/nextflow_params.yaml b/target/nextflow/metadata/grep_annotation_column/nextflow_params.yaml deleted file mode 100644 index 7244549f726..00000000000 --- a/target/nextflow/metadata/grep_annotation_column/nextflow_params.yaml +++ /dev/null @@ -1,18 +0,0 @@ -# Inputs -input: # please fill in - example: "sample_path" -# input_column: "foo" -modality: # please fill in - example: "rna" -# matrix: "var" - -# Outputs -# output: "$id.$key.output.h5mu" -# output_compression: "gzip" -output_match_column: # please fill in - example: "foo" -# output_fraction_column: "foo" - -# Query options -regex_pattern: # please fill in - example: "^[mM][tT]-" - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/metadata/grep_annotation_column/nextflow_schema.json b/target/nextflow/metadata/grep_annotation_column/nextflow_schema.json deleted file mode 100644 index ceae9c3ec04..00000000000 --- a/target/nextflow/metadata/grep_annotation_column/nextflow_schema.json +++ /dev/null @@ -1,126 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "grep_annotation_column", - "description": "Perform a regex lookup on a column from the annotation matrices .obs or .var.\nThe annotation matrix can originate from either a modality, or all modalities (global .var or .obs).\n", - "type": "object", - "definitions": { - "inputs" : { - "title": "Inputs", - "type": "object", - "description": "Arguments related to the input dataset.", - "properties": { - - "input": { - "type": "string", - "description": "Type: `file`, required, example: `sample_path`. Path to the input ", - "help_text": "Type: `file`, required, example: `sample_path`. Path to the input .h5mu." - }, - - "input_column": { - "type": "string", - "description": "Type: `string`. Column to query", - "help_text": "Type: `string`. Column to query. If not specified, use .var_names or .obs_names, depending on the value of --matrix" - }, - - "modality": { - "type": "string", - "description": "Type: `string`, required, example: `rna`. Which modality to get the annotation matrix from", - "help_text": "Type: `string`, required, example: `rna`. Which modality to get the annotation matrix from.\n" - }, - - "matrix": { - "type": "string", - "description": "Type: `string`, example: `var`, choices: ``var`, `obs``. Matrix to fetch the column from that will be searched", - "help_text": "Type: `string`, example: `var`, choices: ``var`, `obs``. Matrix to fetch the column from that will be searched.", - "enum": ["var", "obs"] - - } - - } - }, - "outputs" : { - "title": "Outputs", - "type": "object", - "description": "Arguments related to how the output will be written.", - "properties": { - - "output": { - "type": "string", - "description": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. ", - "help_text": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. ", - "default": "$id.$key.output.h5mu" - }, - - "output_compression": { - "type": "string", - "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", - "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", - "enum": ["gzip", "lzf"] - - }, - - "output_match_column": { - "type": "string", - "description": "Type: `string`, required. Name of the column to write the result to", - "help_text": "Type: `string`, required. Name of the column to write the result to." - }, - - "output_fraction_column": { - "type": "string", - "description": "Type: `string`. For the opposite axis, name of the column to write the fraction of \nobservations that matches to the pattern", - "help_text": "Type: `string`. For the opposite axis, name of the column to write the fraction of \nobservations that matches to the pattern.\n" - } - - } - }, - "query options" : { - "title": "Query options", - "type": "object", - "description": "Options related to the query", - "properties": { - - "regex_pattern": { - "type": "string", - "description": "Type: `string`, required, example: `^[mM][tT]-`. Regex to use to match with the input column", - "help_text": "Type: `string`, required, example: `^[mM][tT]-`. Regex to use to match with the input column." - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/inputs" - }, - { - "$ref": "#/definitions/outputs" - }, - { - "$ref": "#/definitions/query options" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/metadata/join_csv/.config.vsh.yaml b/target/nextflow/metadata/join_csv/.config.vsh.yaml deleted file mode 100644 index 3de4208c579..00000000000 --- a/target/nextflow/metadata/join_csv/.config.vsh.yaml +++ /dev/null @@ -1,229 +0,0 @@ -functionality: - name: "join_csv" - namespace: "metadata" - version: "0.12.3" - authors: - - name: "Dries Schaumont" - roles: - - "author" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - argument_groups: - - name: "MuData Input" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input h5mu file" - info: null - example: - - "input.h5mu" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_key" - description: "Obs column name where the sample id can be found for each observation\ - \ to join on.\nUseful when adding metadata to concatenated samples.\nMutually\ - \ exclusive with `--var_key`.\"\n" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--var_key" - description: "Var column name where the sample id can be found for each variable\ - \ to join on.\nMutually exclusive with `--obs_key`.\"\n" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "MuData Output" - arguments: - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output h5mu file." - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Metadata Input" - arguments: - - type: "file" - name: "--input_csv" - description: ".csv file containing metadata" - info: null - example: - - "metadata.csv" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--csv_key" - description: "column of the the csv that corresponds to the sample id." - info: null - default: - - "id" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Join a csv containing metadata to the .obs or .var field of a mudata\ - \ file." - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "singlecpu" - - "lowmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/metadata/join_csv/config.vsh.yml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/metadata/join_csv" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/metadata/join_csv/join_csv" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/metadata/join_csv/main.nf b/target/nextflow/metadata/join_csv/main.nf deleted file mode 100644 index 93d41508581..00000000000 --- a/target/nextflow/metadata/join_csv/main.nf +++ /dev/null @@ -1,2670 +0,0 @@ -// join_csv 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Dries Schaumont (author) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "join_csv", - "namespace" : "metadata", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Dries Schaumont", - "roles" : [ - "author" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "dries@data-intuitive.com", - "github" : "DriesSchaumont", - "orcid" : "0000-0002-4389-0440", - "linkedin" : "dries-schaumont" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Scientist" - } - ] - } - } - ], - "argument_groups" : [ - { - "name" : "MuData Input", - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "alternatives" : [ - "-i" - ], - "description" : "Input h5mu file", - "example" : [ - "input.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--modality", - "default" : [ - "rna" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--obs_key", - "description" : "Obs column name where the sample id can be found for each observation to join on.\nUseful when adding metadata to concatenated samples.\nMutually exclusive with `--var_key`.\\"\n", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--var_key", - "description" : "Var column name where the sample id can be found for each variable to join on.\nMutually exclusive with `--obs_key`.\\"\n", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "MuData Output", - "arguments" : [ - { - "type" : "file", - "name" : "--output", - "alternatives" : [ - "-o" - ], - "description" : "Output h5mu file.", - "example" : [ - "output.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_compression", - "description" : "The compression format to be used on the output h5mu object.", - "example" : [ - "gzip" - ], - "required" : false, - "choices" : [ - "gzip", - "lzf" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Metadata Input", - "arguments" : [ - { - "type" : "file", - "name" : "--input_csv", - "description" : ".csv file containing metadata", - "example" : [ - "metadata.csv" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--csv_key", - "description" : "column of the the csv that corresponds to the sample id.", - "default" : [ - "id" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/metadata/join_csv/" - }, - { - "type" : "file", - "path" : "src/utils/setup_logger.py", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "description" : "Join a csv containing metadata to the .obs or .var field of a mudata file.", - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/metadata/join_csv/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "python:3.10-slim", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "procps" - ], - "interactive" : false - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "mudata~=0.2.3", - "anndata~=0.9.1" - ], - "upgrade" : true - } - ], - "test_setup" : [ - { - "type" : "python", - "user" : false, - "packages" : [ - "viashpy==0.5.0" - ], - "upgrade" : true - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "singlecpu", - "lowmem" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/metadata/join_csv/config.vsh.yml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/metadata/join_csv", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -import sys -import pandas as pd -from mudata import read_h5mu - -### VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'obs_key': $( if [ ! -z ${VIASH_PAR_OBS_KEY+x} ]; then echo "r'${VIASH_PAR_OBS_KEY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'var_key': $( if [ ! -z ${VIASH_PAR_VAR_KEY+x} ]; then echo "r'${VIASH_PAR_VAR_KEY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'input_csv': $( if [ ! -z ${VIASH_PAR_INPUT_CSV+x} ]; then echo "r'${VIASH_PAR_INPUT_CSV//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'csv_key': $( if [ ! -z ${VIASH_PAR_CSV_KEY+x} ]; then echo "r'${VIASH_PAR_CSV_KEY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -### VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -if par["obs_key"] and par["var_key"]: - raise ValueError("--obs_key can not be used in conjuction with --var_key.") -if not (par["obs_key"] or par["var_key"]): - raise ValueError("Must define either --obs_key or --var_key") - -logger.info("Read metadata csv from file") -metadata = pd.read_csv(par['input_csv'], sep=",", header=0, index_col=par["csv_key"]) -metadata.fillna('', inplace=True) - -logger.info("Read mudata from file") -mdata = read_h5mu(par['input']) -mod_data = mdata.mod[par['modality']] - -logger.info("Joining csv to mudata") -matrix = 'var' if par["var_key"] else 'obs' -matrix_sample_column_name = par["var_key"] if par["var_key"] else par["obs_key"] -original_matrix = getattr(mod_data, matrix) -sample_ids = original_matrix[matrix_sample_column_name] - -try: - new_columns = metadata.loc[sample_ids.tolist()] -except KeyError as e: - raise KeyError(f"Not all sample IDs selected from {matrix} " - "(using the column selected with --var_key or --obs_key) were found in " - "the csv file.") from e -new_matrix = pd.concat([original_matrix.reset_index(drop=True), - new_columns.reset_index(drop=True)], axis=1)\\\\ - .set_axis(original_matrix.index) -setattr(mod_data, matrix, new_matrix) - -logger.info("Write output to mudata file") -mdata.write_h5mu(par['output'], compression=par["output_compression"]) -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/metadata_join_csv", - "tag" : "0.12.0" - }, - "label" : [ - "singlecpu", - "lowmem" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/metadata/join_csv/nextflow.config b/target/nextflow/metadata/join_csv/nextflow.config deleted file mode 100644 index 900d214369f..00000000000 --- a/target/nextflow/metadata/join_csv/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'join_csv' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Join a csv containing metadata to the .obs or .var field of a mudata file.' - author = 'Dries Schaumont' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/metadata/join_csv/nextflow_params.yaml b/target/nextflow/metadata/join_csv/nextflow_params.yaml deleted file mode 100644 index 4d1cdd4de31..00000000000 --- a/target/nextflow/metadata/join_csv/nextflow_params.yaml +++ /dev/null @@ -1,17 +0,0 @@ -# MuData Input -input: # please fill in - example: "input.h5mu" -modality: "rna" -# obs_key: "foo" -# var_key: "foo" - -# MuData Output -# output: "$id.$key.output.h5mu" -# output_compression: "gzip" - -# Metadata Input -input_csv: # please fill in - example: "metadata.csv" -csv_key: "id" - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/metadata/join_csv/nextflow_schema.json b/target/nextflow/metadata/join_csv/nextflow_schema.json deleted file mode 100644 index 2fa3592ef1b..00000000000 --- a/target/nextflow/metadata/join_csv/nextflow_schema.json +++ /dev/null @@ -1,120 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "join_csv", - "description": "Join a csv containing metadata to the .obs or .var field of a mudata file.", - "type": "object", - "definitions": { - "mudata input" : { - "title": "MuData Input", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: `file`, required, example: `input.h5mu`. Input h5mu file", - "help_text": "Type: `file`, required, example: `input.h5mu`. Input h5mu file" - }, - - "modality": { - "type": "string", - "description": "Type: `string`, default: `rna`. ", - "help_text": "Type: `string`, default: `rna`. ", - "default": "rna" - }, - - "obs_key": { - "type": "string", - "description": "Type: `string`. Obs column name where the sample id can be found for each observation to join on", - "help_text": "Type: `string`. Obs column name where the sample id can be found for each observation to join on.\nUseful when adding metadata to concatenated samples.\nMutually exclusive with `--var_key`.\"\n" - }, - - "var_key": { - "type": "string", - "description": "Type: `string`. Var column name where the sample id can be found for each variable to join on", - "help_text": "Type: `string`. Var column name where the sample id can be found for each variable to join on.\nMutually exclusive with `--obs_key`.\"\n" - } - - } - }, - "mudata output" : { - "title": "MuData Output", - "type": "object", - "description": "No description", - "properties": { - - "output": { - "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file", - "help_text": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file.", - "default": "$id.$key.output.h5mu" - }, - - "output_compression": { - "type": "string", - "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", - "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", - "enum": ["gzip", "lzf"] - - } - - } - }, - "metadata input" : { - "title": "Metadata Input", - "type": "object", - "description": "No description", - "properties": { - - "input_csv": { - "type": "string", - "description": "Type: `file`, required, example: `metadata.csv`. ", - "help_text": "Type: `file`, required, example: `metadata.csv`. .csv file containing metadata" - }, - - "csv_key": { - "type": "string", - "description": "Type: `string`, default: `id`. column of the the csv that corresponds to the sample id", - "help_text": "Type: `string`, default: `id`. column of the the csv that corresponds to the sample id.", - "default": "id" - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/mudata input" - }, - { - "$ref": "#/definitions/mudata output" - }, - { - "$ref": "#/definitions/metadata input" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/metadata/join_csv/setup_logger.py b/target/nextflow/metadata/join_csv/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/nextflow/metadata/join_csv/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/nextflow/metadata/join_uns_to_obs/.config.vsh.yaml b/target/nextflow/metadata/join_uns_to_obs/.config.vsh.yaml deleted file mode 100644 index cc71bc00f6f..00000000000 --- a/target/nextflow/metadata/join_uns_to_obs/.config.vsh.yaml +++ /dev/null @@ -1,171 +0,0 @@ -functionality: - name: "join_uns_to_obs" - namespace: "metadata" - version: "0.12.3" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input h5mu file" - info: null - example: - - "input.h5mu" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--uns_key" - info: null - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output h5mu file." - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Join a data frame of length 1 (1 row index value) in .uns containing\ - \ metadata to the .obs of a mudata file." - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "singlecpu" - - "lowmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/metadata/join_uns_to_obs/config.vsh.yml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/metadata/join_uns_to_obs" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/metadata/join_uns_to_obs/join_uns_to_obs" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/metadata/join_uns_to_obs/main.nf b/target/nextflow/metadata/join_uns_to_obs/main.nf deleted file mode 100644 index 001f201741f..00000000000 --- a/target/nextflow/metadata/join_uns_to_obs/main.nf +++ /dev/null @@ -1,2577 +0,0 @@ -// join_uns_to_obs 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "join_uns_to_obs", - "namespace" : "metadata", - "version" : "0.12.3", - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "alternatives" : [ - "-i" - ], - "description" : "Input h5mu file", - "example" : [ - "input.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--modality", - "default" : [ - "rna" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--uns_key", - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--output", - "alternatives" : [ - "-o" - ], - "description" : "Output h5mu file.", - "example" : [ - "output.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_compression", - "description" : "The compression format to be used on the output h5mu object.", - "example" : [ - "gzip" - ], - "required" : false, - "choices" : [ - "gzip", - "lzf" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/metadata/join_uns_to_obs/" - }, - { - "type" : "file", - "path" : "src/utils/setup_logger.py", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "description" : "Join a data frame of length 1 (1 row index value) in .uns containing metadata to the .obs of a mudata file.", - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/metadata/join_uns_to_obs/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "python:3.10-slim", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "procps" - ], - "interactive" : false - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "mudata~=0.2.3", - "anndata~=0.9.1" - ], - "upgrade" : true - } - ], - "test_setup" : [ - { - "type" : "python", - "user" : false, - "packages" : [ - "viashpy==0.5.0" - ], - "upgrade" : true - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "singlecpu", - "lowmem" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/metadata/join_uns_to_obs/config.vsh.yml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/metadata/join_uns_to_obs", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -import sys -import pandas as pd -from mudata import read_h5mu - -### VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'uns_key': $( if [ ! -z ${VIASH_PAR_UNS_KEY+x} ]; then echo "r'${VIASH_PAR_UNS_KEY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -### VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -logger.info("Read mudata from file") -mdata = read_h5mu(par['input']) -mod_data = mdata.mod[par['modality']] - -logger.info("Joining uns to obs") -# get data frame -uns_df = mod_data.uns[par['uns_key']] - -# check for overlapping colnames -intersect_keys = uns_df.keys().intersection(mod_data.obs.keys()) -obs_drop = mod_data.obs.drop(intersect_keys, axis=1) - -# create data frame to join -uns_df_rep = uns_df.loc[uns_df.index.repeat(mod_data.n_obs)] -uns_df_rep.index = mod_data.obs_names - -# create new obs -mod_data.obs = pd.concat([obs_drop, uns_df_rep], axis=1) - -logger.info("Write output to mudata file") -mdata.write_h5mu(par['output'], compression=par["output_compression"]) - - -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/metadata_join_uns_to_obs", - "tag" : "0.12.0" - }, - "label" : [ - "singlecpu", - "lowmem" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/metadata/join_uns_to_obs/nextflow.config b/target/nextflow/metadata/join_uns_to_obs/nextflow.config deleted file mode 100644 index 18335f85f2a..00000000000 --- a/target/nextflow/metadata/join_uns_to_obs/nextflow.config +++ /dev/null @@ -1,107 +0,0 @@ -manifest { - name = 'join_uns_to_obs' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Join a data frame of length 1 (1 row index value) in .uns containing metadata to the .obs of a mudata file.' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/metadata/join_uns_to_obs/nextflow_params.yaml b/target/nextflow/metadata/join_uns_to_obs/nextflow_params.yaml deleted file mode 100644 index c3420e64d1b..00000000000 --- a/target/nextflow/metadata/join_uns_to_obs/nextflow_params.yaml +++ /dev/null @@ -1,10 +0,0 @@ -# Arguments -input: # please fill in - example: "input.h5mu" -modality: "rna" -uns_key: # please fill in - example: "foo" -# output: "$id.$key.output.h5mu" -# output_compression: "gzip" - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/metadata/join_uns_to_obs/nextflow_schema.json b/target/nextflow/metadata/join_uns_to_obs/nextflow_schema.json deleted file mode 100644 index 87ee9413166..00000000000 --- a/target/nextflow/metadata/join_uns_to_obs/nextflow_schema.json +++ /dev/null @@ -1,79 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "join_uns_to_obs", - "description": "Join a data frame of length 1 (1 row index value) in .uns containing metadata to the .obs of a mudata file.", - "type": "object", - "definitions": { - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: `file`, required, example: `input.h5mu`. Input h5mu file", - "help_text": "Type: `file`, required, example: `input.h5mu`. Input h5mu file" - }, - - "modality": { - "type": "string", - "description": "Type: `string`, default: `rna`. ", - "help_text": "Type: `string`, default: `rna`. ", - "default": "rna" - }, - - "uns_key": { - "type": "string", - "description": "Type: `string`, required. ", - "help_text": "Type: `string`, required. " - }, - - "output": { - "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file", - "help_text": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file.", - "default": "$id.$key.output.h5mu" - }, - - "output_compression": { - "type": "string", - "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", - "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", - "enum": ["gzip", "lzf"] - - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/metadata/join_uns_to_obs/setup_logger.py b/target/nextflow/metadata/join_uns_to_obs/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/nextflow/metadata/join_uns_to_obs/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/nextflow/metadata/move_obsm_to_obs/.config.vsh.yaml b/target/nextflow/metadata/move_obsm_to_obs/.config.vsh.yaml deleted file mode 100644 index 99515d2e742..00000000000 --- a/target/nextflow/metadata/move_obsm_to_obs/.config.vsh.yaml +++ /dev/null @@ -1,192 +0,0 @@ -functionality: - name: "move_obsm_to_obs" - namespace: "metadata" - version: "0.12.3" - authors: - - name: "Dries Schaumont" - roles: - - "author" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - argument_groups: - - name: "MuData Input" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input h5mu file" - info: null - example: - - "input.h5mu" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obsm_key" - description: "Key of a data structure to move from `.obsm` to `.obs`." - info: null - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "MuData Output" - arguments: - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output h5mu file." - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Move a matrix from .obsm to .obs. Newly created columns in .obs will\ - \ \nbe created from the .obsm key suffixed with an underscore and the name of\ - \ the columns\nof the specified .obsm matrix.\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "singlecpu" - - "lowmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/metadata/move_obsm_to_obs/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/metadata/move_obsm_to_obs" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/metadata/move_obsm_to_obs/move_obsm_to_obs" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/metadata/move_obsm_to_obs/main.nf b/target/nextflow/metadata/move_obsm_to_obs/main.nf deleted file mode 100644 index aebb7c34820..00000000000 --- a/target/nextflow/metadata/move_obsm_to_obs/main.nf +++ /dev/null @@ -1,2626 +0,0 @@ -// move_obsm_to_obs 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Dries Schaumont (author) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "move_obsm_to_obs", - "namespace" : "metadata", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Dries Schaumont", - "roles" : [ - "author" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "dries@data-intuitive.com", - "github" : "DriesSchaumont", - "orcid" : "0000-0002-4389-0440", - "linkedin" : "dries-schaumont" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Scientist" - } - ] - } - } - ], - "argument_groups" : [ - { - "name" : "MuData Input", - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "alternatives" : [ - "-i" - ], - "description" : "Input h5mu file", - "example" : [ - "input.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--modality", - "default" : [ - "rna" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--obsm_key", - "description" : "Key of a data structure to move from `.obsm` to `.obs`.", - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "MuData Output", - "arguments" : [ - { - "type" : "file", - "name" : "--output", - "alternatives" : [ - "-o" - ], - "description" : "Output h5mu file.", - "example" : [ - "output.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_compression", - "description" : "The compression format to be used on the output h5mu object.", - "example" : [ - "gzip" - ], - "required" : false, - "choices" : [ - "gzip", - "lzf" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/metadata/move_obsm_to_obs/" - }, - { - "type" : "file", - "path" : "src/utils/setup_logger.py", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "description" : "Move a matrix from .obsm to .obs. Newly created columns in .obs will \nbe created from the .obsm key suffixed with an underscore and the name of the columns\nof the specified .obsm matrix.\n", - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/metadata/move_obsm_to_obs/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "python:3.10-slim", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "procps" - ], - "interactive" : false - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "mudata~=0.2.3", - "anndata~=0.9.1" - ], - "upgrade" : true - } - ], - "test_setup" : [ - { - "type" : "python", - "user" : false, - "packages" : [ - "viashpy==0.5.0" - ], - "upgrade" : true - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "singlecpu", - "lowmem" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/metadata/move_obsm_to_obs/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/metadata/move_obsm_to_obs", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -import sys -from functools import partial -from pandas.errors import MergeError -from mudata import read_h5mu - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'obsm_key': $( if [ ! -z ${VIASH_PAR_OBSM_KEY+x} ]; then echo "r'${VIASH_PAR_OBSM_KEY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -## VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -logger.info("Read mudata from file") -mdata = read_h5mu(par['input']) -try: - mod_data = mdata.mod[par['modality']] -except KeyError: - raise ValueError(f"Modality {par['modality']} does not exist.") - -logger.info("Moving .obm key %s", par["obsm_key"]) -try: - obsm_matrix = mod_data.obsm[par["obsm_key"]].copy() -except KeyError: - raise ValueError(f".obsm key {par['obsm_key']} was not found in " - f".obsm slot for modality {par['modality']}.") - - -obsm_matrix.rename(partial("{key}_{}".format, key=par["obsm_key"]), - axis="columns", copy=False, inplace=True) - -original_n_obs = len(mod_data.obs) -try: - logger.info(f".obs names: {mod_data.obs_names}") - logger.info(f".obsm index: {obsm_matrix.index}") - mod_data.obs = mod_data.obs.merge(obsm_matrix, how="left", - validate="one_to_one", - left_index=True, right_index=True) -except MergeError as e: - raise ValueError(f"Could not join .obsm matrix at {par['obsm_key']} to .obs because there " - "are some observation that are not overlapping between the two matrices " - "(indexes should overlap). This is either a bug or your mudata file is corrupt.") -del mod_data.obsm[par["obsm_key"]] - -logger.info("Write output to mudata file") -mdata.write_h5mu(par['output'], compression=par["output_compression"]) -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/metadata_move_obsm_to_obs", - "tag" : "0.12.0" - }, - "label" : [ - "singlecpu", - "lowmem" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/metadata/move_obsm_to_obs/nextflow.config b/target/nextflow/metadata/move_obsm_to_obs/nextflow.config deleted file mode 100644 index 54d2a441792..00000000000 --- a/target/nextflow/metadata/move_obsm_to_obs/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'move_obsm_to_obs' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Move a matrix from .obsm to .obs. Newly created columns in .obs will \nbe created from the .obsm key suffixed with an underscore and the name of the columns\nof the specified .obsm matrix.\n' - author = 'Dries Schaumont' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/metadata/move_obsm_to_obs/nextflow_params.yaml b/target/nextflow/metadata/move_obsm_to_obs/nextflow_params.yaml deleted file mode 100644 index 9d15103b6a6..00000000000 --- a/target/nextflow/metadata/move_obsm_to_obs/nextflow_params.yaml +++ /dev/null @@ -1,12 +0,0 @@ -# MuData Input -input: # please fill in - example: "input.h5mu" -modality: "rna" -obsm_key: # please fill in - example: "foo" - -# MuData Output -# output: "$id.$key.output.h5mu" -# output_compression: "gzip" - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/metadata/move_obsm_to_obs/nextflow_schema.json b/target/nextflow/metadata/move_obsm_to_obs/nextflow_schema.json deleted file mode 100644 index 2378dbaca3d..00000000000 --- a/target/nextflow/metadata/move_obsm_to_obs/nextflow_schema.json +++ /dev/null @@ -1,90 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "move_obsm_to_obs", - "description": "Move a matrix from .obsm to .obs. Newly created columns in .obs will \nbe created from the .obsm key suffixed with an underscore and the name of the columns\nof the specified .obsm matrix.\n", - "type": "object", - "definitions": { - "mudata input" : { - "title": "MuData Input", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: `file`, required, example: `input.h5mu`. Input h5mu file", - "help_text": "Type: `file`, required, example: `input.h5mu`. Input h5mu file" - }, - - "modality": { - "type": "string", - "description": "Type: `string`, default: `rna`. ", - "help_text": "Type: `string`, default: `rna`. ", - "default": "rna" - }, - - "obsm_key": { - "type": "string", - "description": "Type: `string`, required. Key of a data structure to move from `", - "help_text": "Type: `string`, required. Key of a data structure to move from `.obsm` to `.obs`." - } - - } - }, - "mudata output" : { - "title": "MuData Output", - "type": "object", - "description": "No description", - "properties": { - - "output": { - "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file", - "help_text": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file.", - "default": "$id.$key.output.h5mu" - }, - - "output_compression": { - "type": "string", - "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", - "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", - "enum": ["gzip", "lzf"] - - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/mudata input" - }, - { - "$ref": "#/definitions/mudata output" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/metadata/move_obsm_to_obs/setup_logger.py b/target/nextflow/metadata/move_obsm_to_obs/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/nextflow/metadata/move_obsm_to_obs/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/nextflow/neighbors/bbknn/.config.vsh.yaml b/target/nextflow/neighbors/bbknn/.config.vsh.yaml deleted file mode 100644 index f77d5ae8390..00000000000 --- a/target/nextflow/neighbors/bbknn/.config.vsh.yaml +++ /dev/null @@ -1,289 +0,0 @@ -functionality: - name: "bbknn" - namespace: "neighbors" - version: "0.12.3" - authors: - - name: "Dries De Maeyer" - roles: - - "author" - info: - role: "Core Team Member" - links: - email: "ddemaeyer@gmail.com" - github: "ddemaeyer" - linkedin: "dries-de-maeyer-b46a814" - organizations: - - name: "Janssen Pharmaceuticals" - href: "https://www.janssen.com" - role: "Principal Scientist" - - name: "Dries Schaumont" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input h5mu file" - info: null - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obsm_input" - description: "The dimensionality reduction in `.obsm` to use for neighbour detection.\ - \ Defaults to X_pca." - info: null - default: - - "X_pca" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_batch" - description: ".obs column name discriminating between your batches." - info: null - default: - - "batch" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output .h5mu file." - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--uns_output" - description: "Mandatory .uns slot to store various neighbor output objects." - info: null - default: - - "neighbors" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obsp_distances" - description: "In which .obsp slot to store the distance matrix between the resulting\ - \ neighbors." - info: null - default: - - "distances" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obsp_connectivities" - description: "In which .obsp slot to store the connectivities matrix between the\ - \ resulting neighbors." - info: null - default: - - "connectivities" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--n_neighbors_within_batch" - description: "How many top neighbours to report for each batch; total number of\ - \ neighbours in the initial k-nearest-neighbours computation will be this number\ - \ times the number of batches." - info: null - default: - - 3 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--n_pcs" - description: "How many dimensions (in case of PCA, principal components) to use\ - \ in the analysis." - info: null - default: - - 50 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--n_trim" - description: "Trim the neighbours of each cell to these many top connectivities.\ - \ May help with population independence and improve the tidiness of clustering.\ - \ The lower the value the more independent the individual populations, at the\ - \ cost of more conserved batch effect. If `None` (default), sets the parameter\ - \ value automatically to 10 times `neighbors_within_batch` times the number\ - \ of batches. Set to 0 to skip." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - description: "BBKNN network generation\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - - "build-essential" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "scanpy~=1.9.5" - - "bbknn" - - "scikit-learn~=1.2.2" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "lowcpu" - - "highmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/neighbors/bbknn/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/neighbors/bbknn" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/neighbors/bbknn/bbknn" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/neighbors/bbknn/main.nf b/target/nextflow/neighbors/bbknn/main.nf deleted file mode 100644 index b93b848a879..00000000000 --- a/target/nextflow/neighbors/bbknn/main.nf +++ /dev/null @@ -1,2706 +0,0 @@ -// bbknn 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Dries De Maeyer (author) -// * Dries Schaumont (maintainer) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "bbknn", - "namespace" : "neighbors", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Dries De Maeyer", - "roles" : [ - "author" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "ddemaeyer@gmail.com", - "github" : "ddemaeyer", - "linkedin" : "dries-de-maeyer-b46a814" - }, - "organizations" : [ - { - "name" : "Janssen Pharmaceuticals", - "href" : "https://www.janssen.com", - "role" : "Principal Scientist" - } - ] - } - }, - { - "name" : "Dries Schaumont", - "roles" : [ - "maintainer" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "dries@data-intuitive.com", - "github" : "DriesSchaumont", - "orcid" : "0000-0002-4389-0440", - "linkedin" : "dries-schaumont" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Scientist" - } - ] - } - } - ], - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "alternatives" : [ - "-i" - ], - "description" : "Input h5mu file", - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--modality", - "default" : [ - "rna" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--obsm_input", - "description" : "The dimensionality reduction in `.obsm` to use for neighbour detection. Defaults to X_pca.", - "default" : [ - "X_pca" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--obs_batch", - "description" : ".obs column name discriminating between your batches.", - "default" : [ - "batch" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--output", - "alternatives" : [ - "-o" - ], - "description" : "Output .h5mu file.", - "example" : [ - "output.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_compression", - "description" : "The compression format to be used on the output h5mu object.", - "example" : [ - "gzip" - ], - "required" : false, - "choices" : [ - "gzip", - "lzf" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--uns_output", - "description" : "Mandatory .uns slot to store various neighbor output objects.", - "default" : [ - "neighbors" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--obsp_distances", - "description" : "In which .obsp slot to store the distance matrix between the resulting neighbors.", - "default" : [ - "distances" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--obsp_connectivities", - "description" : "In which .obsp slot to store the connectivities matrix between the resulting neighbors.", - "default" : [ - "connectivities" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--n_neighbors_within_batch", - "description" : "How many top neighbours to report for each batch; total number of neighbours in the initial k-nearest-neighbours computation will be this number times the number of batches.", - "default" : [ - 3 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--n_pcs", - "description" : "How many dimensions (in case of PCA, principal components) to use in the analysis.", - "default" : [ - 50 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--n_trim", - "description" : "Trim the neighbours of each cell to these many top connectivities. May help with population independence and improve the tidiness of clustering. The lower the value the more independent the individual populations, at the cost of more conserved batch effect. If `None` (default), sets the parameter value automatically to 10 times `neighbors_within_batch` times the number of batches. Set to 0 to skip.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/neighbors/bbknn/" - } - ], - "description" : "BBKNN network generation\n", - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/neighbors/bbknn/" - }, - { - "type" : "file", - "path" : "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "python:3.10-slim", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "procps", - "build-essential" - ], - "interactive" : false - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "mudata~=0.2.3", - "anndata~=0.9.1", - "scanpy~=1.9.5", - "bbknn", - "scikit-learn~=1.2.2" - ], - "upgrade" : true - } - ], - "test_setup" : [ - { - "type" : "python", - "user" : false, - "packages" : [ - "viashpy==0.5.0" - ], - "upgrade" : true - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "lowcpu", - "highmem" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/neighbors/bbknn/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/neighbors/bbknn", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -from mudata import read_h5mu -import bbknn - -### VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'obsm_input': $( if [ ! -z ${VIASH_PAR_OBSM_INPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'obs_batch': $( if [ ! -z ${VIASH_PAR_OBS_BATCH+x} ]; then echo "r'${VIASH_PAR_OBS_BATCH//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'uns_output': $( if [ ! -z ${VIASH_PAR_UNS_OUTPUT+x} ]; then echo "r'${VIASH_PAR_UNS_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'obsp_distances': $( if [ ! -z ${VIASH_PAR_OBSP_DISTANCES+x} ]; then echo "r'${VIASH_PAR_OBSP_DISTANCES//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'obsp_connectivities': $( if [ ! -z ${VIASH_PAR_OBSP_CONNECTIVITIES+x} ]; then echo "r'${VIASH_PAR_OBSP_CONNECTIVITIES//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'n_neighbors_within_batch': $( if [ ! -z ${VIASH_PAR_N_NEIGHBORS_WITHIN_BATCH+x} ]; then echo "int(r'${VIASH_PAR_N_NEIGHBORS_WITHIN_BATCH//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'n_pcs': $( if [ ! -z ${VIASH_PAR_N_PCS+x} ]; then echo "int(r'${VIASH_PAR_N_PCS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'n_trim': $( if [ ! -z ${VIASH_PAR_N_TRIM+x} ]; then echo "int(r'${VIASH_PAR_N_TRIM//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -### VIASH END - -mudata = read_h5mu(par["input"]) -adata = mudata.mod[par["modality"]] - -# copy data -tmp_adata = adata.copy() -bbknn.bbknn( - tmp_adata, - use_rep=par["obsm_input"], - batch_key = par["obs_batch"], - neighbors_within_batch=par["n_neighbors_within_batch"], - n_pcs=par["n_pcs"], - trim=par["n_trim"] -) - -# store output -adata.obsp[par["obsp_connectivities"]] = tmp_adata.obsp["connectivities"] -adata.obsp[par["obsp_distances"]] = tmp_adata.obsp["distances"] -adata.uns[par["uns_output"]] = tmp_adata.uns["neighbors"] -adata.uns[par["uns_output"]]["distances_key"] = par["obsp_distances"] -adata.uns[par["uns_output"]]["connectivities_key"] = par["obsp_connectivities"] - -# write to file -mudata.write_h5mu(par["output"], compression=par["output_compression"]) -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/neighbors_bbknn", - "tag" : "0.12.0" - }, - "label" : [ - "lowcpu", - "highmem" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/neighbors/bbknn/nextflow.config b/target/nextflow/neighbors/bbknn/nextflow.config deleted file mode 100644 index fc6fd3a76e2..00000000000 --- a/target/nextflow/neighbors/bbknn/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'bbknn' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'BBKNN network generation\n' - author = 'Dries De Maeyer, Dries Schaumont' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/neighbors/bbknn/nextflow_params.yaml b/target/nextflow/neighbors/bbknn/nextflow_params.yaml deleted file mode 100644 index f7d8955af44..00000000000 --- a/target/nextflow/neighbors/bbknn/nextflow_params.yaml +++ /dev/null @@ -1,17 +0,0 @@ -# Arguments -input: # please fill in - example: "path/to/file" -modality: "rna" -obsm_input: "X_pca" -obs_batch: "batch" -# output: "$id.$key.output.h5mu" -# output_compression: "gzip" -uns_output: "neighbors" -obsp_distances: "distances" -obsp_connectivities: "connectivities" -n_neighbors_within_batch: 3 -n_pcs: 50 -# n_trim: 123 - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/neighbors/bbknn/nextflow_schema.json b/target/nextflow/neighbors/bbknn/nextflow_schema.json deleted file mode 100644 index d0bf3dedb42..00000000000 --- a/target/nextflow/neighbors/bbknn/nextflow_schema.json +++ /dev/null @@ -1,128 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "bbknn", - "description": "BBKNN network generation\n", - "type": "object", - "definitions": { - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: `file`, required. Input h5mu file", - "help_text": "Type: `file`, required. Input h5mu file" - }, - - "modality": { - "type": "string", - "description": "Type: `string`, default: `rna`. ", - "help_text": "Type: `string`, default: `rna`. ", - "default": "rna" - }, - - "obsm_input": { - "type": "string", - "description": "Type: `string`, default: `X_pca`. The dimensionality reduction in `", - "help_text": "Type: `string`, default: `X_pca`. The dimensionality reduction in `.obsm` to use for neighbour detection. Defaults to X_pca.", - "default": "X_pca" - }, - - "obs_batch": { - "type": "string", - "description": "Type: `string`, default: `batch`. ", - "help_text": "Type: `string`, default: `batch`. .obs column name discriminating between your batches.", - "default": "batch" - }, - - "output": { - "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output ", - "help_text": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output .h5mu file.", - "default": "$id.$key.output.h5mu" - }, - - "output_compression": { - "type": "string", - "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", - "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", - "enum": ["gzip", "lzf"] - - }, - - "uns_output": { - "type": "string", - "description": "Type: `string`, default: `neighbors`. Mandatory ", - "help_text": "Type: `string`, default: `neighbors`. Mandatory .uns slot to store various neighbor output objects.", - "default": "neighbors" - }, - - "obsp_distances": { - "type": "string", - "description": "Type: `string`, default: `distances`. In which ", - "help_text": "Type: `string`, default: `distances`. In which .obsp slot to store the distance matrix between the resulting neighbors.", - "default": "distances" - }, - - "obsp_connectivities": { - "type": "string", - "description": "Type: `string`, default: `connectivities`. In which ", - "help_text": "Type: `string`, default: `connectivities`. In which .obsp slot to store the connectivities matrix between the resulting neighbors.", - "default": "connectivities" - }, - - "n_neighbors_within_batch": { - "type": "integer", - "description": "Type: `integer`, default: `3`. How many top neighbours to report for each batch; total number of neighbours in the initial k-nearest-neighbours computation will be this number times the number of batches", - "help_text": "Type: `integer`, default: `3`. How many top neighbours to report for each batch; total number of neighbours in the initial k-nearest-neighbours computation will be this number times the number of batches.", - "default": "3" - }, - - "n_pcs": { - "type": "integer", - "description": "Type: `integer`, default: `50`. How many dimensions (in case of PCA, principal components) to use in the analysis", - "help_text": "Type: `integer`, default: `50`. How many dimensions (in case of PCA, principal components) to use in the analysis.", - "default": "50" - }, - - "n_trim": { - "type": "integer", - "description": "Type: `integer`. Trim the neighbours of each cell to these many top connectivities", - "help_text": "Type: `integer`. Trim the neighbours of each cell to these many top connectivities. May help with population independence and improve the tidiness of clustering. The lower the value the more independent the individual populations, at the cost of more conserved batch effect. If `None` (default), sets the parameter value automatically to 10 times `neighbors_within_batch` times the number of batches. Set to 0 to skip." - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/neighbors/find_neighbors/.config.vsh.yaml b/target/nextflow/neighbors/find_neighbors/.config.vsh.yaml deleted file mode 100644 index 5a5a678ec3a..00000000000 --- a/target/nextflow/neighbors/find_neighbors/.config.vsh.yaml +++ /dev/null @@ -1,309 +0,0 @@ -functionality: - name: "find_neighbors" - namespace: "neighbors" - version: "0.12.3" - authors: - - name: "Dries De Maeyer" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "ddemaeyer@gmail.com" - github: "ddemaeyer" - linkedin: "dries-de-maeyer-b46a814" - organizations: - - name: "Janssen Pharmaceuticals" - href: "https://www.janssen.com" - role: "Principal Scientist" - - name: "Robrecht Cannoodt" - roles: - - "contributor" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input h5mu file" - info: null - example: - - "input.h5mu" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obsm_input" - description: "Which .obsm slot to use as a starting PCA embedding." - info: null - default: - - "X_pca" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output h5mu file containing the found neighbors." - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--uns_output" - description: "Mandatory .uns slot to store various neighbor output objects." - info: null - default: - - "neighbors" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obsp_distances" - description: "In which .obsp slot to store the distance matrix between the resulting\ - \ neighbors." - info: null - default: - - "distances" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obsp_connectivities" - description: "In which .obsp slot to store the connectivities matrix between the\ - \ resulting neighbors." - info: null - default: - - "connectivities" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--metric" - description: "The distance metric to be used in the generation of the nearest\ - \ neighborhood network." - info: null - default: - - "euclidean" - required: false - choices: - - "cityblock" - - "cosine" - - "euclidean" - - "l1" - - "l2" - - "manhattan" - - "braycurtis" - - "canberra" - - "chebyshev" - - "correlation" - - "dice" - - "hamming" - - "jaccard" - - "kulsinski" - - "mahalanobis" - - "minkowski" - - "rogerstanimoto" - - "russellrao" - - "seuclidean" - - "sokalmichener" - - "sokalsneath" - - "sqeuclidean" - - "yule" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--num_neighbors" - description: "The size of local neighborhood (in terms of number of neighboring\ - \ data points) used for manifold approximation. Larger values result in more\ - \ global views of the manifold, while smaller values result in more local data\ - \ being preserved. In general values should be in the range 2 to 100. If knn\ - \ is True, number of nearest neighbors to be searched. If knn is False, a Gaussian\ - \ kernel width is set to the distance of the n_neighbors neighbor." - info: null - default: - - 15 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--seed" - description: "A random seed." - info: null - default: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Compute a neighborhood graph of observations [McInnes18].\n\nThe neighbor\ - \ search efficiency of this heavily relies on UMAP [McInnes18], which also provides\ - \ a method for estimating connectivities of data points - the connectivity of\ - \ the manifold (method=='umap'). If method=='gauss', connectivities are computed\ - \ according to [Coifman05], in the adaption of [Haghverdi16].\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "scanpy~=1.9.5" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "lowcpu" - - "midmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/neighbors/find_neighbors/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/neighbors/find_neighbors" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/neighbors/find_neighbors/find_neighbors" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/neighbors/find_neighbors/main.nf b/target/nextflow/neighbors/find_neighbors/main.nf deleted file mode 100644 index 413261c9451..00000000000 --- a/target/nextflow/neighbors/find_neighbors/main.nf +++ /dev/null @@ -1,2759 +0,0 @@ -// find_neighbors 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Dries De Maeyer (maintainer) -// * Robrecht Cannoodt (contributor) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "find_neighbors", - "namespace" : "neighbors", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Dries De Maeyer", - "roles" : [ - "maintainer" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "ddemaeyer@gmail.com", - "github" : "ddemaeyer", - "linkedin" : "dries-de-maeyer-b46a814" - }, - "organizations" : [ - { - "name" : "Janssen Pharmaceuticals", - "href" : "https://www.janssen.com", - "role" : "Principal Scientist" - } - ] - } - }, - { - "name" : "Robrecht Cannoodt", - "roles" : [ - "contributor" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "robrecht@data-intuitive.com", - "github" : "rcannood", - "orcid" : "0000-0003-3641-729X", - "linkedin" : "robrechtcannoodt" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Science Engineer" - }, - { - "name" : "Open Problems", - "href" : "https://openproblems.bio", - "role" : "Core Member" - } - ] - } - } - ], - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "alternatives" : [ - "-i" - ], - "description" : "Input h5mu file", - "example" : [ - "input.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--modality", - "default" : [ - "rna" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--obsm_input", - "description" : "Which .obsm slot to use as a starting PCA embedding.", - "default" : [ - "X_pca" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--output", - "alternatives" : [ - "-o" - ], - "description" : "Output h5mu file containing the found neighbors.", - "example" : [ - "output.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_compression", - "description" : "The compression format to be used on the output h5mu object.", - "example" : [ - "gzip" - ], - "required" : false, - "choices" : [ - "gzip", - "lzf" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--uns_output", - "description" : "Mandatory .uns slot to store various neighbor output objects.", - "default" : [ - "neighbors" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--obsp_distances", - "description" : "In which .obsp slot to store the distance matrix between the resulting neighbors.", - "default" : [ - "distances" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--obsp_connectivities", - "description" : "In which .obsp slot to store the connectivities matrix between the resulting neighbors.", - "default" : [ - "connectivities" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--metric", - "description" : "The distance metric to be used in the generation of the nearest neighborhood network.", - "default" : [ - "euclidean" - ], - "required" : false, - "choices" : [ - "cityblock", - "cosine", - "euclidean", - "l1", - "l2", - "manhattan", - "braycurtis", - "canberra", - "chebyshev", - "correlation", - "dice", - "hamming", - "jaccard", - "kulsinski", - "mahalanobis", - "minkowski", - "rogerstanimoto", - "russellrao", - "seuclidean", - "sokalmichener", - "sokalsneath", - "sqeuclidean", - "yule" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--num_neighbors", - "description" : "The size of local neighborhood (in terms of number of neighboring data points) used for manifold approximation. Larger values result in more global views of the manifold, while smaller values result in more local data being preserved. In general values should be in the range 2 to 100. If knn is True, number of nearest neighbors to be searched. If knn is False, a Gaussian kernel width is set to the distance of the n_neighbors neighbor.", - "default" : [ - 15 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--seed", - "description" : "A random seed.", - "default" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/neighbors/find_neighbors/" - }, - { - "type" : "file", - "path" : "src/utils/setup_logger.py", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "description" : "Compute a neighborhood graph of observations [McInnes18].\n\nThe neighbor search efficiency of this heavily relies on UMAP [McInnes18], which also provides a method for estimating connectivities of data points - the connectivity of the manifold (method=='umap'). If method=='gauss', connectivities are computed according to [Coifman05], in the adaption of [Haghverdi16].\n", - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/neighbors/find_neighbors/" - }, - { - "type" : "file", - "path" : "resources_test/pbmc_1k_protein_v3", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "python:3.10-slim", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "procps" - ], - "interactive" : false - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "mudata~=0.2.3", - "anndata~=0.9.1", - "scanpy~=1.9.5" - ], - "upgrade" : true - } - ], - "test_setup" : [ - { - "type" : "python", - "user" : false, - "packages" : [ - "viashpy==0.5.0" - ], - "upgrade" : true - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "lowcpu", - "midmem" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/neighbors/find_neighbors/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/neighbors/find_neighbors", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -import mudata as mu -import scanpy as sc -import sys - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'obsm_input': $( if [ ! -z ${VIASH_PAR_OBSM_INPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'uns_output': $( if [ ! -z ${VIASH_PAR_UNS_OUTPUT+x} ]; then echo "r'${VIASH_PAR_UNS_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'obsp_distances': $( if [ ! -z ${VIASH_PAR_OBSP_DISTANCES+x} ]; then echo "r'${VIASH_PAR_OBSP_DISTANCES//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'obsp_connectivities': $( if [ ! -z ${VIASH_PAR_OBSP_CONNECTIVITIES+x} ]; then echo "r'${VIASH_PAR_OBSP_CONNECTIVITIES//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'metric': $( if [ ! -z ${VIASH_PAR_METRIC+x} ]; then echo "r'${VIASH_PAR_METRIC//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'num_neighbors': $( if [ ! -z ${VIASH_PAR_NUM_NEIGHBORS+x} ]; then echo "int(r'${VIASH_PAR_NUM_NEIGHBORS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'seed': $( if [ ! -z ${VIASH_PAR_SEED+x} ]; then echo "int(r'${VIASH_PAR_SEED//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -## VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -logger.info("Reading input mudata") -mdata = mu.read_h5mu(par["input"]) - -mod = par["modality"] -logger.info("Computing a neighborhood graph on modality %s", mod) -adata = mdata.mod[mod] -neighbors = sc.Neighbors(adata) -neighbors.compute_neighbors( - n_neighbors=par["num_neighbors"], - use_rep=par["obsm_input"], - metric=par["metric"], - random_state=par["seed"], - method="umap" -) - -adata.uns[par["uns_output"]] = { - 'connectivities_key': par["obsp_connectivities"], - 'distances_key': par["obsp_distances"], - 'params': { - 'n_neighbors': neighbors.n_neighbors, - 'method': "umap", - 'random_state': par["seed"], - 'metric': par["metric"], - 'use_rep': par["obsm_input"] - } -} - -adata.obsp[par["obsp_distances"]] = neighbors.distances -adata.obsp[par["obsp_connectivities"]] = neighbors.connectivities - -logger.info("Writing to %s", par["output"]) -mdata.write_h5mu(filename=par["output"], compression=par["output_compression"]) -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/neighbors_find_neighbors", - "tag" : "0.12.0" - }, - "label" : [ - "lowcpu", - "midmem" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/neighbors/find_neighbors/nextflow.config b/target/nextflow/neighbors/find_neighbors/nextflow.config deleted file mode 100644 index 18042a2871e..00000000000 --- a/target/nextflow/neighbors/find_neighbors/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'find_neighbors' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Compute a neighborhood graph of observations [McInnes18].\n\nThe neighbor search efficiency of this heavily relies on UMAP [McInnes18], which also provides a method for estimating connectivities of data points - the connectivity of the manifold (method==\'umap\'). If method==\'gauss\', connectivities are computed according to [Coifman05], in the adaption of [Haghverdi16].\n' - author = 'Dries De Maeyer, Robrecht Cannoodt' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/neighbors/find_neighbors/nextflow_params.yaml b/target/nextflow/neighbors/find_neighbors/nextflow_params.yaml deleted file mode 100644 index a709dbf4760..00000000000 --- a/target/nextflow/neighbors/find_neighbors/nextflow_params.yaml +++ /dev/null @@ -1,16 +0,0 @@ -# Arguments -input: # please fill in - example: "input.h5mu" -modality: "rna" -obsm_input: "X_pca" -# output: "$id.$key.output.h5mu" -# output_compression: "gzip" -uns_output: "neighbors" -obsp_distances: "distances" -obsp_connectivities: "connectivities" -metric: "euclidean" -num_neighbors: 15 -seed: 0 - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/neighbors/find_neighbors/nextflow_schema.json b/target/nextflow/neighbors/find_neighbors/nextflow_schema.json deleted file mode 100644 index d88b61109ca..00000000000 --- a/target/nextflow/neighbors/find_neighbors/nextflow_schema.json +++ /dev/null @@ -1,124 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "find_neighbors", - "description": "Compute a neighborhood graph of observations [McInnes18].\n\nThe neighbor search efficiency of this heavily relies on UMAP [McInnes18], which also provides a method for estimating connectivities of data points - the connectivity of the manifold (method==\u0027umap\u0027). If method==\u0027gauss\u0027, connectivities are computed according to [Coifman05], in the adaption of [Haghverdi16].\n", - "type": "object", - "definitions": { - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: `file`, required, example: `input.h5mu`. Input h5mu file", - "help_text": "Type: `file`, required, example: `input.h5mu`. Input h5mu file" - }, - - "modality": { - "type": "string", - "description": "Type: `string`, default: `rna`. ", - "help_text": "Type: `string`, default: `rna`. ", - "default": "rna" - }, - - "obsm_input": { - "type": "string", - "description": "Type: `string`, default: `X_pca`. Which ", - "help_text": "Type: `string`, default: `X_pca`. Which .obsm slot to use as a starting PCA embedding.", - "default": "X_pca" - }, - - "output": { - "type": "string", - "description": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file containing the found neighbors", - "help_text": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file containing the found neighbors.", - "default": "$id.$key.output.h5mu" - }, - - "output_compression": { - "type": "string", - "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", - "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", - "enum": ["gzip", "lzf"] - - }, - - "uns_output": { - "type": "string", - "description": "Type: `string`, default: `neighbors`. Mandatory ", - "help_text": "Type: `string`, default: `neighbors`. Mandatory .uns slot to store various neighbor output objects.", - "default": "neighbors" - }, - - "obsp_distances": { - "type": "string", - "description": "Type: `string`, default: `distances`. In which ", - "help_text": "Type: `string`, default: `distances`. In which .obsp slot to store the distance matrix between the resulting neighbors.", - "default": "distances" - }, - - "obsp_connectivities": { - "type": "string", - "description": "Type: `string`, default: `connectivities`. In which ", - "help_text": "Type: `string`, default: `connectivities`. In which .obsp slot to store the connectivities matrix between the resulting neighbors.", - "default": "connectivities" - }, - - "metric": { - "type": "string", - "description": "Type: `string`, default: `euclidean`, choices: ``cityblock`, `cosine`, `euclidean`, `l1`, `l2`, `manhattan`, `braycurtis`, `canberra`, `chebyshev`, `correlation`, `dice`, `hamming`, `jaccard`, `kulsinski`, `mahalanobis`, `minkowski`, `rogerstanimoto`, `russellrao`, `seuclidean`, `sokalmichener`, `sokalsneath`, `sqeuclidean`, `yule``. The distance metric to be used in the generation of the nearest neighborhood network", - "help_text": "Type: `string`, default: `euclidean`, choices: ``cityblock`, `cosine`, `euclidean`, `l1`, `l2`, `manhattan`, `braycurtis`, `canberra`, `chebyshev`, `correlation`, `dice`, `hamming`, `jaccard`, `kulsinski`, `mahalanobis`, `minkowski`, `rogerstanimoto`, `russellrao`, `seuclidean`, `sokalmichener`, `sokalsneath`, `sqeuclidean`, `yule``. The distance metric to be used in the generation of the nearest neighborhood network.", - "enum": ["cityblock", "cosine", "euclidean", "l1", "l2", "manhattan", "braycurtis", "canberra", "chebyshev", "correlation", "dice", "hamming", "jaccard", "kulsinski", "mahalanobis", "minkowski", "rogerstanimoto", "russellrao", "seuclidean", "sokalmichener", "sokalsneath", "sqeuclidean", "yule"] - , - "default": "euclidean" - }, - - "num_neighbors": { - "type": "integer", - "description": "Type: `integer`, default: `15`. The size of local neighborhood (in terms of number of neighboring data points) used for manifold approximation", - "help_text": "Type: `integer`, default: `15`. The size of local neighborhood (in terms of number of neighboring data points) used for manifold approximation. Larger values result in more global views of the manifold, while smaller values result in more local data being preserved. In general values should be in the range 2 to 100. If knn is True, number of nearest neighbors to be searched. If knn is False, a Gaussian kernel width is set to the distance of the n_neighbors neighbor.", - "default": "15" - }, - - "seed": { - "type": "integer", - "description": "Type: `integer`, default: `0`. A random seed", - "help_text": "Type: `integer`, default: `0`. A random seed.", - "default": "0" - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/neighbors/find_neighbors/setup_logger.py b/target/nextflow/neighbors/find_neighbors/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/nextflow/neighbors/find_neighbors/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/nextflow/process_10xh5/filter_10xh5/.config.vsh.yaml b/target/nextflow/process_10xh5/filter_10xh5/.config.vsh.yaml deleted file mode 100644 index 5a0d1dbef6d..00000000000 --- a/target/nextflow/process_10xh5/filter_10xh5/.config.vsh.yaml +++ /dev/null @@ -1,195 +0,0 @@ -functionality: - name: "filter_10xh5" - namespace: "process_10xh5" - version: "0.12.3" - authors: - - name: "Robrecht Cannoodt" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - arguments: - - type: "file" - name: "--input" - description: "An h5 file from the 10x genomics website." - info: null - example: - - "pbmc_1k_protein_v3_raw_feature_bc_matrix.h5" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - description: "Output h5 file." - info: null - example: - - "pbmc_1k_protein_v3_raw_feature_bc_matrix_filtered.h5" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--min_library_size" - description: "Minimum library size." - info: null - default: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--min_cells_per_gene" - description: "Minimum number of cells per gene." - info: null - default: - - 0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--keep_feature_types" - description: "Specify which feature types will never be filtered out" - info: null - example: - - "Antibody Capture" - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--verbose" - description: "Increase verbosity" - info: null - direction: "input" - dest: "par" - resources: - - type: "r_script" - path: "script.R" - is_executable: true - description: "Filter a 10x h5 dataset.\n" - usage: "filter_10xh5 \\\n --input pbmc_1k_protein_v3_raw_feature_bc_matrix.h5 \\\ - \n --output pbmc_1k_protein_v3_raw_feature_bc_matrix_filtered.h5 \\\n --min_library_size\ - \ 1000 --min_cells_per_gene 300\n" - test_resources: - - type: "r_script" - path: "run_test.R" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "eddelbuettel/r2u:22.04" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "libhdf5-dev python3-pip python3-dev" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "scanpy~=1.9.5" - upgrade: true - - type: "r" - cran: - - "testthat" - - "anndata" - - "hdf5r" - bioc_force_install: false - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "singlecpu" - - "lowmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/process_10xh5/filter_10xh5/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/process_10xh5/filter_10xh5" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/process_10xh5/filter_10xh5/filter_10xh5" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/process_10xh5/filter_10xh5/main.nf b/target/nextflow/process_10xh5/filter_10xh5/main.nf deleted file mode 100644 index faff379d179..00000000000 --- a/target/nextflow/process_10xh5/filter_10xh5/main.nf +++ /dev/null @@ -1,2642 +0,0 @@ -// filter_10xh5 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Robrecht Cannoodt (maintainer) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "filter_10xh5", - "namespace" : "process_10xh5", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Robrecht Cannoodt", - "roles" : [ - "maintainer" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "robrecht@data-intuitive.com", - "github" : "rcannood", - "orcid" : "0000-0003-3641-729X", - "linkedin" : "robrechtcannoodt" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Science Engineer" - }, - { - "name" : "Open Problems", - "href" : "https://openproblems.bio", - "role" : "Core Member" - } - ] - } - } - ], - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "description" : "An h5 file from the 10x genomics website.", - "example" : [ - "pbmc_1k_protein_v3_raw_feature_bc_matrix.h5" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--output", - "description" : "Output h5 file.", - "example" : [ - "pbmc_1k_protein_v3_raw_feature_bc_matrix_filtered.h5" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--min_library_size", - "description" : "Minimum library size.", - "default" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--min_cells_per_gene", - "description" : "Minimum number of cells per gene.", - "default" : [ - 0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--keep_feature_types", - "description" : "Specify which feature types will never be filtered out", - "example" : [ - "Antibody Capture" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "boolean_true", - "name" : "--verbose", - "description" : "Increase verbosity", - "direction" : "input", - "dest" : "par" - } - ], - "resources" : [ - { - "type" : "r_script", - "path" : "script.R", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/process_10xh5/filter_10xh5/" - } - ], - "description" : "Filter a 10x h5 dataset.\n", - "usage" : "filter_10xh5 \\\\\n --input pbmc_1k_protein_v3_raw_feature_bc_matrix.h5 \\\\\n --output pbmc_1k_protein_v3_raw_feature_bc_matrix_filtered.h5 \\\\\n --min_library_size 1000 --min_cells_per_gene 300\n", - "test_resources" : [ - { - "type" : "r_script", - "path" : "run_test.R", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/process_10xh5/filter_10xh5/" - }, - { - "type" : "file", - "path" : "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "eddelbuettel/r2u:22.04", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "libhdf5-dev python3-pip python3-dev" - ], - "interactive" : false - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "mudata~=0.2.3", - "anndata~=0.9.1", - "scanpy~=1.9.5" - ], - "upgrade" : true - }, - { - "type" : "r", - "cran" : [ - "testthat", - "anndata", - "hdf5r" - ], - "bioc_force_install" : false - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "singlecpu", - "lowmem" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/process_10xh5/filter_10xh5/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/process_10xh5/filter_10xh5", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -## VIASH START -# The following code has been auto-generated by Viash. -# treat warnings as errors -.viash_orig_warn <- options(warn = 2) - -par <- list( - "input" = $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_INPUT" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), - "output" = $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_OUTPUT" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), - "min_library_size" = $( if [ ! -z ${VIASH_PAR_MIN_LIBRARY_SIZE+x} ]; then echo -n "as.integer('"; echo -n "$VIASH_PAR_MIN_LIBRARY_SIZE" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), - "min_cells_per_gene" = $( if [ ! -z ${VIASH_PAR_MIN_CELLS_PER_GENE+x} ]; then echo -n "as.integer('"; echo -n "$VIASH_PAR_MIN_CELLS_PER_GENE" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), - "keep_feature_types" = $( if [ ! -z ${VIASH_PAR_KEEP_FEATURE_TYPES+x} ]; then echo -n "strsplit('"; echo -n "$VIASH_PAR_KEEP_FEATURE_TYPES" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "', split = ':')[[1]]"; else echo NULL; fi ), - "verbose" = $( if [ ! -z ${VIASH_PAR_VERBOSE+x} ]; then echo -n "as.logical(toupper('"; echo -n "$VIASH_PAR_VERBOSE" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'))"; else echo NULL; fi ) -) -meta <- list( - "functionality_name" = $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo -n "'"; echo -n "$VIASH_META_FUNCTIONALITY_NAME" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), - "resources_dir" = $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo -n "'"; echo -n "$VIASH_META_RESOURCES_DIR" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), - "executable" = $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo -n "'"; echo -n "$VIASH_META_EXECUTABLE" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), - "config" = $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo -n "'"; echo -n "$VIASH_META_CONFIG" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), - "temp_dir" = $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo -n "'"; echo -n "$VIASH_META_TEMP_DIR" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), - "cpus" = $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo -n "as.integer('"; echo -n "$VIASH_META_CPUS" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), - "memory_b" = $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_B" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), - "memory_kb" = $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_KB" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), - "memory_mb" = $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_MB" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), - "memory_gb" = $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_GB" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), - "memory_tb" = $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_TB" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), - "memory_pb" = $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_PB" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ) -) - - -# restore original warn setting -options(.viash_orig_warn) -rm(.viash_orig_warn) - -## VIASH END - -if (par\\$verbose) cat("Loading dependencies\\\\n") -requireNamespace("hdf5r", quietly = TRUE) - -if (par\\$verbose) cat("Opening h5 file\\\\n") -h5 <- hdf5r::H5File\\$new(par\\$input, mode = "r") - -if (par\\$verbose) cat("Reading data in memory\\\\n") -features__all_tag_keys <- h5[["matrix/features/_all_tag_keys"]][] - -features <- data.frame( - feature_type = h5[["matrix/features/feature_type"]][], - genome = h5[["matrix/features/genome"]][], - id = h5[["matrix/features/id"]][], - name = h5[["matrix/features/name"]][] -) - -mat <- Matrix::sparseMatrix( - i = h5[["matrix/indices"]][], - p = h5[["matrix/indptr"]][], - x = h5[["matrix/data"]][], - dims = h5[["matrix/shape"]][], - index1 = FALSE, - dimnames = list( - features\\$id, - h5[["matrix/barcodes"]][] - ) -) - -if (par\\$verbose) cat("Filtering out cells with library size < ", par\\$min_library_size, "\\\\n", sep = "") -library_size <- Matrix::colSums(mat) -mat2 <- mat[, library_size >= par\\$min_library_size, drop = FALSE] - -if (par\\$verbose) cat("Filtering genes with num cells < ", par\\$min_cells_per_gene, "\\\\n", sep = "") -num_cells <- Matrix::rowSums(mat2 > 0) -mat3 <- mat2[num_cells >= par\\$min_cells_per_gene | features\\$feature_type %in% par\\$keep_feature_types, , drop = FALSE] -features2 <- features[match(rownames(mat3), features\\$id), , drop = FALSE] - -# helper fun -set_with_type <- function(path, value) { - orig_dtype <- h5[[path]]\\$get_type() - orig_chunk <- h5[[path]]\\$chunk_dims - if (is.na(orig_chunk)) orig_chunk <- "auto" - h5new\\$create_dataset(path, value, dtype = orig_dtype, chunk_dims = orig_chunk) -} - -# create new file -if (par\\$verbose) cat("Saving h5 file at '", par\\$output, "'\\\\n", sep = "") -h5new <- hdf5r::H5File\\$new(par\\$output, mode = "w") -zz <- h5new\\$create_group("matrix") -zz <- h5new\\$create_group("matrix/features") - -set_with_type("matrix/features/feature_type", features2\\$feature_type) -set_with_type("matrix/features/genome", features2\\$genome) -set_with_type("matrix/features/id", features2\\$id) -set_with_type("matrix/features/name", features2\\$name) -set_with_type("matrix/features/_all_tag_keys", features__all_tag_keys) -set_with_type("matrix/indices", mat3@i) -set_with_type("matrix/indptr", mat3@p) -set_with_type("matrix/data", as.integer(mat3@x)) -set_with_type("matrix/shape", dim(mat3)) -set_with_type("matrix/barcodes", colnames(mat3)) - -for (attname in hdf5r::h5attr_names(h5)) { - h5new\\$create_attr(attname, hdf5r::h5attr(h5, attname)) -} -h5new\\$close_all() -h5\\$close_all() -VIASHMAIN -Rscript "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/process_10xh5_filter_10xh5", - "tag" : "0.12.0" - }, - "label" : [ - "singlecpu", - "lowmem" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/process_10xh5/filter_10xh5/nextflow.config b/target/nextflow/process_10xh5/filter_10xh5/nextflow.config deleted file mode 100644 index a1318b935b8..00000000000 --- a/target/nextflow/process_10xh5/filter_10xh5/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'filter_10xh5' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Filter a 10x h5 dataset.\n' - author = 'Robrecht Cannoodt' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/process_10xh5/filter_10xh5/nextflow_params.yaml b/target/nextflow/process_10xh5/filter_10xh5/nextflow_params.yaml deleted file mode 100644 index 62f6a24296e..00000000000 --- a/target/nextflow/process_10xh5/filter_10xh5/nextflow_params.yaml +++ /dev/null @@ -1,11 +0,0 @@ -# Arguments -input: # please fill in - example: "pbmc_1k_protein_v3_raw_feature_bc_matrix.h5" -# output: "$id.$key.output.h5" -min_library_size: 0 -min_cells_per_gene: 0 -# keep_feature_types: ["Antibody Capture"] -verbose: false - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/process_10xh5/filter_10xh5/nextflow_schema.json b/target/nextflow/process_10xh5/filter_10xh5/nextflow_schema.json deleted file mode 100644 index 30490b5589a..00000000000 --- a/target/nextflow/process_10xh5/filter_10xh5/nextflow_schema.json +++ /dev/null @@ -1,85 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "filter_10xh5", - "description": "Filter a 10x h5 dataset.\n", - "type": "object", - "definitions": { - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: `file`, required, example: `pbmc_1k_protein_v3_raw_feature_bc_matrix.h5`. An h5 file from the 10x genomics website", - "help_text": "Type: `file`, required, example: `pbmc_1k_protein_v3_raw_feature_bc_matrix.h5`. An h5 file from the 10x genomics website." - }, - - "output": { - "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.h5`, example: `pbmc_1k_protein_v3_raw_feature_bc_matrix_filtered.h5`. Output h5 file", - "help_text": "Type: `file`, required, default: `$id.$key.output.h5`, example: `pbmc_1k_protein_v3_raw_feature_bc_matrix_filtered.h5`. Output h5 file.", - "default": "$id.$key.output.h5" - }, - - "min_library_size": { - "type": "integer", - "description": "Type: `integer`, default: `0`. Minimum library size", - "help_text": "Type: `integer`, default: `0`. Minimum library size.", - "default": "0" - }, - - "min_cells_per_gene": { - "type": "integer", - "description": "Type: `integer`, default: `0`. Minimum number of cells per gene", - "help_text": "Type: `integer`, default: `0`. Minimum number of cells per gene.", - "default": "0" - }, - - "keep_feature_types": { - "type": "string", - "description": "Type: List of `string`, example: `Antibody Capture`, multiple_sep: `\":\"`. Specify which feature types will never be filtered out", - "help_text": "Type: List of `string`, example: `Antibody Capture`, multiple_sep: `\":\"`. Specify which feature types will never be filtered out" - }, - - "verbose": { - "type": "boolean", - "description": "Type: `boolean_true`, default: `false`. Increase verbosity", - "help_text": "Type: `boolean_true`, default: `false`. Increase verbosity", - "default": "False" - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/qc/calculate_qc_metrics/.config.vsh.yaml b/target/nextflow/qc/calculate_qc_metrics/.config.vsh.yaml deleted file mode 100644 index ff0ff847a5a..00000000000 --- a/target/nextflow/qc/calculate_qc_metrics/.config.vsh.yaml +++ /dev/null @@ -1,235 +0,0 @@ -functionality: - name: "calculate_qc_metrics" - namespace: "qc" - version: "0.12.3" - authors: - - name: "Dries Schaumont" - roles: - - "author" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - argument_groups: - - name: "Inputs" - arguments: - - type: "file" - name: "--input" - description: "Input h5mu file" - info: null - example: - - "input.h5mu" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--layer" - info: null - example: - - "raw_counts" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--var_qc_metrics" - description: "Keys to select a boolean (containing only True or False) column\ - \ from .var.\nFor each cell, calculate the proportion of total values for\ - \ genes which are labeled 'True', \ncompared to the total sum of the values\ - \ for all genes.\n" - info: null - example: - - "ercc,highly_variable,mitochondrial" - required: false - direction: "input" - multiple: true - multiple_sep: "," - dest: "par" - - type: "boolean" - name: "--var_qc_metrics_fill_na_value" - description: "Fill any 'NA' values found in the columns specified with --var_qc_metrics\ - \ to 'True' or 'False'.\nas False.\n" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--top_n_vars" - description: "Number of top vars to be used to calculate cumulative proportions.\n\ - If not specified, proportions are not calculated. `--top_n_vars 20,50` finds\n\ - cumulative proportion to the 20th and 50th most expressed vars.\n" - info: null - required: false - direction: "input" - multiple: true - multiple_sep: "," - dest: "par" - - name: "Outputs" - arguments: - - type: "file" - name: "--output" - description: "Output h5mu file." - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Add basic quality control metrics to an .h5mu file.\n\nThe metrics\ - \ are comparable to what scanpy.pp.calculate_qc_metrics output,\nalthough they\ - \ have slightly different names:\n\nVar metrics (name in this component -> name\ - \ in scanpy):\n - pct_dropout -> pct_dropout_by_{expr_type}\n - num_nonzero_obs\ - \ -> n_cells_by_{expr_type}\n - obs_mean -> mean_{expr_type}\n - total_counts\ - \ -> total_{expr_type}\n\nObs metrics:\n - num_nonzero_vars -> n_genes_by_{expr_type}\n\ - \ - pct_{var_qc_metrics} -> pct_{expr_type}_{qc_var}\n - total_counts_{var_qc_metrics}\ - \ -> total_{expr_type}_{qc_var}\n - pct_of_counts_in_top_{top_n_vars}_vars ->\ - \ pct_{expr_type}_in_top_{n}_{var_type}\n - total_counts -> total_{expr_type}\n\ - \ \n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.9-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "scikit-learn~=1.2.0" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - - "scanpy~=1.9.5" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "singlecpu" - - "midmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/qc/calculate_qc_metrics/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/qc/calculate_qc_metrics" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/qc/calculate_qc_metrics/calculate_qc_metrics" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/qc/calculate_qc_metrics/main.nf b/target/nextflow/qc/calculate_qc_metrics/main.nf deleted file mode 100644 index b884cef0609..00000000000 --- a/target/nextflow/qc/calculate_qc_metrics/main.nf +++ /dev/null @@ -1,2739 +0,0 @@ -// calculate_qc_metrics 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Dries Schaumont (author) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "calculate_qc_metrics", - "namespace" : "qc", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Dries Schaumont", - "roles" : [ - "author" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "dries@data-intuitive.com", - "github" : "DriesSchaumont", - "orcid" : "0000-0002-4389-0440", - "linkedin" : "dries-schaumont" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Scientist" - } - ] - } - } - ], - "argument_groups" : [ - { - "name" : "Inputs", - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "description" : "Input h5mu file", - "example" : [ - "input.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--modality", - "default" : [ - "rna" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--layer", - "example" : [ - "raw_counts" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--var_qc_metrics", - "description" : "Keys to select a boolean (containing only True or False) column from .var.\nFor each cell, calculate the proportion of total values for genes which are labeled 'True', \ncompared to the total sum of the values for all genes.\n", - "example" : [ - "ercc,highly_variable,mitochondrial" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ",", - "dest" : "par" - }, - { - "type" : "boolean", - "name" : "--var_qc_metrics_fill_na_value", - "description" : "Fill any 'NA' values found in the columns specified with --var_qc_metrics to 'True' or 'False'.\nas False.\n", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--top_n_vars", - "description" : "Number of top vars to be used to calculate cumulative proportions.\nIf not specified, proportions are not calculated. `--top_n_vars 20,50` finds\ncumulative proportion to the 20th and 50th most expressed vars.\n", - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ",", - "dest" : "par" - } - ] - }, - { - "name" : "Outputs", - "arguments" : [ - { - "type" : "file", - "name" : "--output", - "description" : "Output h5mu file.", - "example" : [ - "output.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_compression", - "description" : "The compression format to be used on the output h5mu object.", - "example" : [ - "gzip" - ], - "required" : false, - "choices" : [ - "gzip", - "lzf" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/qc/calculate_qc_metrics/" - }, - { - "type" : "file", - "path" : "src/utils/setup_logger.py", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "description" : "Add basic quality control metrics to an .h5mu file.\n\nThe metrics are comparable to what scanpy.pp.calculate_qc_metrics output,\nalthough they have slightly different names:\n\nVar metrics (name in this component -> name in scanpy):\n - pct_dropout -> pct_dropout_by_{expr_type}\n - num_nonzero_obs -> n_cells_by_{expr_type}\n - obs_mean -> mean_{expr_type}\n - total_counts -> total_{expr_type}\n\nObs metrics:\n - num_nonzero_vars -> n_genes_by_{expr_type}\n - pct_{var_qc_metrics} -> pct_{expr_type}_{qc_var}\n - total_counts_{var_qc_metrics} -> total_{expr_type}_{qc_var}\n - pct_of_counts_in_top_{top_n_vars}_vars -> pct_{expr_type}_in_top_{n}_{var_type}\n - total_counts -> total_{expr_type}\n \n", - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/qc/calculate_qc_metrics/" - }, - { - "type" : "file", - "path" : "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "python:3.9-slim", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "procps" - ], - "interactive" : false - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "mudata~=0.2.3", - "anndata~=0.9.1", - "scikit-learn~=1.2.0" - ], - "upgrade" : true - } - ], - "test_setup" : [ - { - "type" : "python", - "user" : false, - "packages" : [ - "viashpy==0.5.0", - "scanpy~=1.9.5" - ], - "upgrade" : true - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "singlecpu", - "midmem" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/qc/calculate_qc_metrics/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/qc/calculate_qc_metrics", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -import sys -from mudata import read_h5mu -from scipy.sparse import issparse, isspmatrix_coo, csr_matrix -from sklearn.utils.sparsefuncs import mean_variance_axis -import numpy as np - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'layer': $( if [ ! -z ${VIASH_PAR_LAYER+x} ]; then echo "r'${VIASH_PAR_LAYER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'var_qc_metrics': $( if [ ! -z ${VIASH_PAR_VAR_QC_METRICS+x} ]; then echo "r'${VIASH_PAR_VAR_QC_METRICS//\\'/\\'\\"\\'\\"r\\'}'.split(',')"; else echo None; fi ), - 'var_qc_metrics_fill_na_value': $( if [ ! -z ${VIASH_PAR_VAR_QC_METRICS_FILL_NA_VALUE+x} ]; then echo "r'${VIASH_PAR_VAR_QC_METRICS_FILL_NA_VALUE//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), - 'top_n_vars': $( if [ ! -z ${VIASH_PAR_TOP_N_VARS+x} ]; then echo "list(map(int, r'${VIASH_PAR_TOP_N_VARS//\\'/\\'\\"\\'\\"r\\'}'.split(',')))"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -## VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -def main(): - input_data = read_h5mu(par["input"]) - modality_data = input_data.mod[par["modality"]] - var = modality_data.var - layer = modality_data.X if not par['layer'] else modality_data.layers[par['layer']] - if not issparse(layer): - raise NotImplementedError("Expected layer to be in sparse format.") - if isspmatrix_coo(layer): - layer = csr_matrix(layer) - layer.eliminate_zeros() - - # var statistics - num_nonzero_obs = layer.getnnz(axis=0) - obs_mean, _ = mean_variance_axis(layer, axis=0) - pct_dropout = (1 - num_nonzero_obs / layer.shape[0]) * 100 - total_counts_obs = np.ravel(layer.sum(axis=0)) - - # obs statistics - num_nonzero_vars = layer.getnnz(axis=1) - total_counts_var = np.ravel(layer.sum(axis=1)) - - top_metrics = {} - if par["top_n_vars"]: - par["top_n_vars"] = sorted(par["top_n_vars"]) - distributions = get_top_from_csr_matrix(layer, par["top_n_vars"]) - top_metrics = {distribution_size: distribution * 100 - for distribution_size, distribution - in zip(par["top_n_vars"], distributions.T)} - - total_expr_qc = {} - pct_expr_qc = {} - if par["var_qc_metrics"]: - for qc_metric in par["var_qc_metrics"]: - if not qc_metric in var: - raise ValueError(f"Value for --var_qc_metrics, {qc_metric} " - f"not found in .var for modality {par['modality']}") - qc_column = var[qc_metric] - if qc_column.isna().any(): - if par["var_qc_metrics_fill_na_value"] is None: - raise ValueError(f"The .var column '{qc_metric}', selected by '--var_qc_metrics', contains NA values. " - "It is ambiguous whether or not to include these values in the static calulation. " - "You can explicitly map the NA values to 'False' or 'True using '--var_qc_metrics_fill_na_value'") - else: - qc_column = qc_column.fillna(par['var_qc_metrics_fill_na_value'], inplace=False) - qc_column = qc_column.values - if set(np.unique(qc_column)) - {True, False}: - raise ValueError(f"Column {qc_metric} in .var for modality {par['modality']} " - f"must only contain boolean values") - - total_expr_qc[qc_metric] = np.ravel(layer[:, qc_column].sum(axis=1)) - pct_expr_qc[qc_metric] = total_expr_qc[qc_metric] / total_counts_var * 100 - - # Write all of the calculated statistics - modality_data.var = modality_data.var.assign( - **{"pct_dropout": pct_dropout, - "num_nonzero_obs": num_nonzero_obs, - "obs_mean": obs_mean, - "total_counts": total_counts_obs}) - - modality_data.obs = modality_data.obs.assign( - **({"num_nonzero_vars": num_nonzero_vars, - "total_counts": total_counts_var} | \\\\ - {f"pct_{qc_metric}": col for qc_metric, col in pct_expr_qc.items()} | \\\\ - {f"total_counts_{qc_metrix}": col for qc_metrix, col in total_expr_qc.items()}) | \\\\ - {f"pct_of_counts_in_top_{n_top}_vars": col for n_top, col in top_metrics.items()}) - - input_data.write(par["output"], compression=par["output_compression"]) - -def get_top_from_csr_matrix(matrix, top_n_genes): - # csr matrices stores a 3D matrix in a format such that data for individual cells - # are stored in 1 array. Another array (indptr) here stores the ranges of indices - # to select from the data-array (.e.g. data[indptr[0]:indptr[1]] for row 0) for each row. - # Another array 'indices' maps each element of data to a column - # (data and indices arrays have the same length) - top_n_genes = np.array(top_n_genes).astype(np.int64) - assert np.all(top_n_genes[:-1] <= top_n_genes[1:]), "top_n_genes must be sorted" - row_indices, data = matrix.indptr, matrix.data - number_of_rows, max_genes_to_parse = row_indices.size-1, top_n_genes[-1] - top_data = np.zeros((number_of_rows, max_genes_to_parse), - dtype=data.dtype) - # Loop over each row to create a dense matrix without the 0 counts, - # but not for the whole matrix, only store the genes up until - # the largest number of top n genes. - for row_number in range(number_of_rows): - row_start_index, row_end_index = row_indices[row_number], row_indices[row_number+1] - row_data = data[row_start_index:row_end_index] # all non-zero counts for an row - try: - # There are less genes with counts in the row than the - # maximum number of genes we would like to select - # all these genes are in the top genes, just store them - top_data[row_number, :row_end_index-row_start_index] = row_data - except ValueError: - # Store the counts for the top genes - top_data[row_number, :] = np.partition(row_data, -max_genes_to_parse)[-max_genes_to_parse:] - - # Partition works from smallest to largest, but we want largest - # so do smallest to largest first (but with reversed indices) - top_data = np.partition(top_data, max_genes_to_parse - top_n_genes) - # And then switch the order around - top_data = np.flip(top_data, axis=1) - - cumulative = top_data.cumsum(axis=1, dtype=np.float64)[:,top_n_genes-1] - return cumulative / np.array(matrix.sum(axis=1)) - -if __name__ == "__main__": - main() -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/qc_calculate_qc_metrics", - "tag" : "0.12.0" - }, - "label" : [ - "singlecpu", - "midmem" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/qc/calculate_qc_metrics/nextflow.config b/target/nextflow/qc/calculate_qc_metrics/nextflow.config deleted file mode 100644 index b81c428a5af..00000000000 --- a/target/nextflow/qc/calculate_qc_metrics/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'calculate_qc_metrics' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Add basic quality control metrics to an .h5mu file.\n\nThe metrics are comparable to what scanpy.pp.calculate_qc_metrics output,\nalthough they have slightly different names:\n\nVar metrics (name in this component -> name in scanpy):\n - pct_dropout -> pct_dropout_by_{expr_type}\n - num_nonzero_obs -> n_cells_by_{expr_type}\n - obs_mean -> mean_{expr_type}\n - total_counts -> total_{expr_type}\n\nObs metrics:\n - num_nonzero_vars -> n_genes_by_{expr_type}\n - pct_{var_qc_metrics} -> pct_{expr_type}_{qc_var}\n - total_counts_{var_qc_metrics} -> total_{expr_type}_{qc_var}\n - pct_of_counts_in_top_{top_n_vars}_vars -> pct_{expr_type}_in_top_{n}_{var_type}\n - total_counts -> total_{expr_type}\n \n' - author = 'Dries Schaumont' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/qc/calculate_qc_metrics/nextflow_params.yaml b/target/nextflow/qc/calculate_qc_metrics/nextflow_params.yaml deleted file mode 100644 index 90ae6c7000e..00000000000 --- a/target/nextflow/qc/calculate_qc_metrics/nextflow_params.yaml +++ /dev/null @@ -1,15 +0,0 @@ -# Inputs -input: # please fill in - example: "input.h5mu" -modality: "rna" -# layer: "raw_counts" -# var_qc_metrics: ["ercc", "highly_variable", "mitochondrial"] -# var_qc_metrics_fill_na_value: true -# top_n_vars: [123] - -# Outputs -# output: "$id.$key.output.h5mu" -# output_compression: "gzip" - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/qc/calculate_qc_metrics/nextflow_schema.json b/target/nextflow/qc/calculate_qc_metrics/nextflow_schema.json deleted file mode 100644 index 4dd04b7ec2b..00000000000 --- a/target/nextflow/qc/calculate_qc_metrics/nextflow_schema.json +++ /dev/null @@ -1,108 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "calculate_qc_metrics", - "description": "Add basic quality control metrics to an .h5mu file.\n\nThe metrics are comparable to what scanpy.pp.calculate_qc_metrics output,\nalthough they have slightly different names:\n\nVar metrics (name in this component -\u003e name in scanpy):\n - pct_dropout -\u003e pct_dropout_by_{expr_type}\n - num_nonzero_obs -\u003e n_cells_by_{expr_type}\n - obs_mean -\u003e mean_{expr_type}\n - total_counts -\u003e total_{expr_type}\n\nObs metrics:\n - num_nonzero_vars -\u003e n_genes_by_{expr_type}\n - pct_{var_qc_metrics} -\u003e pct_{expr_type}_{qc_var}\n - total_counts_{var_qc_metrics} -\u003e total_{expr_type}_{qc_var}\n - pct_of_counts_in_top_{top_n_vars}_vars -\u003e pct_{expr_type}_in_top_{n}_{var_type}\n - total_counts -\u003e total_{expr_type}\n \n", - "type": "object", - "definitions": { - "inputs" : { - "title": "Inputs", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: `file`, required, example: `input.h5mu`. Input h5mu file", - "help_text": "Type: `file`, required, example: `input.h5mu`. Input h5mu file" - }, - - "modality": { - "type": "string", - "description": "Type: `string`, default: `rna`. ", - "help_text": "Type: `string`, default: `rna`. ", - "default": "rna" - }, - - "layer": { - "type": "string", - "description": "Type: `string`, example: `raw_counts`. ", - "help_text": "Type: `string`, example: `raw_counts`. " - }, - - "var_qc_metrics": { - "type": "string", - "description": "Type: List of `string`, example: `ercc,highly_variable,mitochondrial`, multiple_sep: `\",\"`. Keys to select a boolean (containing only True or False) column from ", - "help_text": "Type: List of `string`, example: `ercc,highly_variable,mitochondrial`, multiple_sep: `\",\"`. Keys to select a boolean (containing only True or False) column from .var.\nFor each cell, calculate the proportion of total values for genes which are labeled \u0027True\u0027, \ncompared to the total sum of the values for all genes.\n" - }, - - "var_qc_metrics_fill_na_value": { - "type": "boolean", - "description": "Type: `boolean`. Fill any \u0027NA\u0027 values found in the columns specified with --var_qc_metrics to \u0027True\u0027 or \u0027False\u0027", - "help_text": "Type: `boolean`. Fill any \u0027NA\u0027 values found in the columns specified with --var_qc_metrics to \u0027True\u0027 or \u0027False\u0027.\nas False.\n" - }, - - "top_n_vars": { - "type": "string", - "description": "Type: List of `integer`, multiple_sep: `\",\"`. Number of top vars to be used to calculate cumulative proportions", - "help_text": "Type: List of `integer`, multiple_sep: `\",\"`. Number of top vars to be used to calculate cumulative proportions.\nIf not specified, proportions are not calculated. `--top_n_vars 20,50` finds\ncumulative proportion to the 20th and 50th most expressed vars.\n" - } - - } - }, - "outputs" : { - "title": "Outputs", - "type": "object", - "description": "No description", - "properties": { - - "output": { - "type": "string", - "description": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file", - "help_text": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file.", - "default": "$id.$key.output.h5mu" - }, - - "output_compression": { - "type": "string", - "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", - "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", - "enum": ["gzip", "lzf"] - - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/inputs" - }, - { - "$ref": "#/definitions/outputs" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/qc/calculate_qc_metrics/setup_logger.py b/target/nextflow/qc/calculate_qc_metrics/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/nextflow/qc/calculate_qc_metrics/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/nextflow/qc/fastqc/.config.vsh.yaml b/target/nextflow/qc/fastqc/.config.vsh.yaml deleted file mode 100644 index cb068c0f230..00000000000 --- a/target/nextflow/qc/fastqc/.config.vsh.yaml +++ /dev/null @@ -1,156 +0,0 @@ -functionality: - name: "fastqc" - namespace: "qc" - version: "0.12.3" - arguments: - - type: "string" - name: "--mode" - alternatives: - - "-m" - description: "The mode in which the component works. Can be either files or dir." - info: null - default: - - "files" - required: false - choices: - - "files" - - "dir" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Directory containing input fastq files." - info: null - example: - - "fastq_dir" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output directory to write reports to." - info: null - example: - - "qc" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--threads" - alternatives: - - "-t" - description: "Specifies the number of files which can be processed simultaneously.\ - \ Each thread will be allocated 250MB of\nmemory.\n" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "bash_script" - path: "script.sh" - is_executable: true - description: "Fastqc component, please see https://www.bioinformatics.babraham.ac.uk/projects/fastqc/.\ - \ This component can take one or more files (by means of shell globbing) or a\ - \ complete directory.\n" - test_resources: - - type: "bash_script" - path: "test.sh" - is_executable: true - - type: "file" - path: "resources_test/cellranger_tiny_fastq/cellranger_tiny_fastq" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "ubuntu:22.04" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "fastqc" - interactive: false - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "lowcpu" - - "midmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/qc/fastqc/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/qc/fastqc" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/qc/fastqc/fastqc" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/qc/fastqc/main.nf b/target/nextflow/qc/fastqc/main.nf deleted file mode 100644 index 0902adc5c64..00000000000 --- a/target/nextflow/qc/fastqc/main.nf +++ /dev/null @@ -1,2512 +0,0 @@ -// fastqc 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "fastqc", - "namespace" : "qc", - "version" : "0.12.3", - "arguments" : [ - { - "type" : "string", - "name" : "--mode", - "alternatives" : [ - "-m" - ], - "description" : "The mode in which the component works. Can be either files or dir.", - "default" : [ - "files" - ], - "required" : false, - "choices" : [ - "files", - "dir" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--input", - "alternatives" : [ - "-i" - ], - "description" : "Directory containing input fastq files.", - "example" : [ - "fastq_dir" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--output", - "alternatives" : [ - "-o" - ], - "description" : "Output directory to write reports to.", - "example" : [ - "qc" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--threads", - "alternatives" : [ - "-t" - ], - "description" : "Specifies the number of files which can be processed simultaneously. Each thread will be allocated 250MB of\nmemory.\n", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ], - "resources" : [ - { - "type" : "bash_script", - "path" : "script.sh", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/qc/fastqc/" - } - ], - "description" : "Fastqc component, please see https://www.bioinformatics.babraham.ac.uk/projects/fastqc/. This component can take one or more files (by means of shell globbing) or a complete directory.\n", - "test_resources" : [ - { - "type" : "bash_script", - "path" : "test.sh", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/qc/fastqc/" - }, - { - "type" : "file", - "path" : "resources_test/cellranger_tiny_fastq/cellranger_tiny_fastq", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "ubuntu:22.04", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "fastqc" - ], - "interactive" : false - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "lowcpu", - "midmem" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/qc/fastqc/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/qc/fastqc", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -## VIASH START -# The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_MODE+x} ]; then echo "${VIASH_PAR_MODE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_mode='&'#" ; else echo "# par_mode="; fi ) -$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) -$( if [ ! -z ${VIASH_PAR_THREADS+x} ]; then echo "${VIASH_PAR_THREADS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_threads='&'#" ; else echo "# par_threads="; fi ) -$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) -$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) -$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) -$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) -$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) -$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) - -## VIASH END -#!/bin/bash - -set -eo pipefail - -mkdir -p "\\$par_output" - -if [ "\\$par_mode" == "dir" ]; then - par_input="\\$par_input/*.fastq.gz" -fi - -eval fastqc \\${par_threads:+--threads \\$par_threads} -o "\\$par_output" "\\$par_input" -VIASHMAIN -bash "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/qc_fastqc", - "tag" : "0.12.0" - }, - "label" : [ - "lowcpu", - "midmem" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/qc/fastqc/nextflow.config b/target/nextflow/qc/fastqc/nextflow.config deleted file mode 100644 index a04bfa01236..00000000000 --- a/target/nextflow/qc/fastqc/nextflow.config +++ /dev/null @@ -1,107 +0,0 @@ -manifest { - name = 'fastqc' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Fastqc component, please see https://www.bioinformatics.babraham.ac.uk/projects/fastqc/. This component can take one or more files (by means of shell globbing) or a complete directory.\n' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/qc/fastqc/nextflow_params.yaml b/target/nextflow/qc/fastqc/nextflow_params.yaml deleted file mode 100644 index 7492ca67288..00000000000 --- a/target/nextflow/qc/fastqc/nextflow_params.yaml +++ /dev/null @@ -1,9 +0,0 @@ -# Arguments -mode: "files" -input: # please fill in - example: "fastq_dir/" -# output: "$id.$key.output.output" -# threads: 123 - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/qc/fastqc/nextflow_schema.json b/target/nextflow/qc/fastqc/nextflow_schema.json deleted file mode 100644 index dbec0923f26..00000000000 --- a/target/nextflow/qc/fastqc/nextflow_schema.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "fastqc", - "description": "Fastqc component, please see https://www.bioinformatics.babraham.ac.uk/projects/fastqc/. This component can take one or more files (by means of shell globbing) or a complete directory.\n", - "type": "object", - "definitions": { - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "mode": { - "type": "string", - "description": "Type: `string`, default: `files`, choices: ``files`, `dir``. The mode in which the component works", - "help_text": "Type: `string`, default: `files`, choices: ``files`, `dir``. The mode in which the component works. Can be either files or dir.", - "enum": ["files", "dir"] - , - "default": "files" - }, - - "input": { - "type": "string", - "description": "Type: `file`, required, example: `fastq_dir/`. Directory containing input fastq files", - "help_text": "Type: `file`, required, example: `fastq_dir/`. Directory containing input fastq files." - }, - - "output": { - "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.output`, example: `qc/`. Output directory to write reports to", - "help_text": "Type: `file`, required, default: `$id.$key.output.output`, example: `qc/`. Output directory to write reports to.", - "default": "$id.$key.output.output" - }, - - "threads": { - "type": "integer", - "description": "Type: `integer`. Specifies the number of files which can be processed simultaneously", - "help_text": "Type: `integer`. Specifies the number of files which can be processed simultaneously. Each thread will be allocated 250MB of\nmemory.\n" - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/qc/multiqc/.config.vsh.yaml b/target/nextflow/qc/multiqc/.config.vsh.yaml deleted file mode 100644 index 99c382ce3a6..00000000000 --- a/target/nextflow/qc/multiqc/.config.vsh.yaml +++ /dev/null @@ -1,140 +0,0 @@ -functionality: - name: "multiqc" - namespace: "qc" - version: "0.12.3" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Inputs for MultiQC." - info: null - example: - - "input.txt" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Create report in the specified output directory." - info: null - example: - - "report" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - description: "MultiQC aggregates results from bioinformatics analyses across many\ - \ samples into a single report.\nIt searches a given directory for analysis logs\ - \ and compiles a HTML report. It's a general use tool, perfect for summarising\ - \ the output from numerous bioinformatics tools.\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/10x_5k_anticmv/fastqc/" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "multiqc" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "singlecpu" - - "lowmem" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/qc/multiqc/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/qc/multiqc" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/qc/multiqc/multiqc" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/qc/multiqc/main.nf b/target/nextflow/qc/multiqc/main.nf deleted file mode 100644 index e4b0858e85f..00000000000 --- a/target/nextflow/qc/multiqc/main.nf +++ /dev/null @@ -1,2493 +0,0 @@ -// multiqc 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "multiqc", - "namespace" : "qc", - "version" : "0.12.3", - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "alternatives" : [ - "-i" - ], - "description" : "Inputs for MultiQC.", - "example" : [ - "input.txt" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--output", - "alternatives" : [ - "-o" - ], - "description" : "Create report in the specified output directory.", - "example" : [ - "report" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/qc/multiqc/" - } - ], - "description" : "MultiQC aggregates results from bioinformatics analyses across many samples into a single report.\nIt searches a given directory for analysis logs and compiles a HTML report. It's a general use tool, perfect for summarising the output from numerous bioinformatics tools.\n", - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/qc/multiqc/" - }, - { - "type" : "file", - "path" : "resources_test/10x_5k_anticmv/fastqc/", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "python:3.10-slim", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "procps" - ], - "interactive" : false - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "multiqc" - ], - "upgrade" : true - } - ], - "test_setup" : [ - { - "type" : "python", - "user" : false, - "packages" : [ - "viashpy==0.5.0" - ], - "upgrade" : true - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "singlecpu", - "lowmem" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/qc/multiqc/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/qc/multiqc", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -import subprocess - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'.split(':')"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -## VIASH END - -# Run MultiQC -subprocess.run(["multiqc", "-o", par["output"]] + par["input"]) -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/qc_multiqc", - "tag" : "0.12.0" - }, - "label" : [ - "singlecpu", - "lowmem" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/qc/multiqc/nextflow.config b/target/nextflow/qc/multiqc/nextflow.config deleted file mode 100644 index 92a640398ff..00000000000 --- a/target/nextflow/qc/multiqc/nextflow.config +++ /dev/null @@ -1,107 +0,0 @@ -manifest { - name = 'multiqc' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'MultiQC aggregates results from bioinformatics analyses across many samples into a single report.\nIt searches a given directory for analysis logs and compiles a HTML report. It\'s a general use tool, perfect for summarising the output from numerous bioinformatics tools.\n' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/qc/multiqc/nextflow_params.yaml b/target/nextflow/qc/multiqc/nextflow_params.yaml deleted file mode 100644 index f9dd0ac813d..00000000000 --- a/target/nextflow/qc/multiqc/nextflow_params.yaml +++ /dev/null @@ -1,7 +0,0 @@ -# Arguments -input: # please fill in - example: ["input.txt"] -# output: "$id.$key.output.output" - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/qc/multiqc/nextflow_schema.json b/target/nextflow/qc/multiqc/nextflow_schema.json deleted file mode 100644 index 5616bd74a8e..00000000000 --- a/target/nextflow/qc/multiqc/nextflow_schema.json +++ /dev/null @@ -1,58 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "multiqc", - "description": "MultiQC aggregates results from bioinformatics analyses across many samples into a single report.\nIt searches a given directory for analysis logs and compiles a HTML report. It\u0027s a general use tool, perfect for summarising the output from numerous bioinformatics tools.\n", - "type": "object", - "definitions": { - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: List of `file`, required, example: `input.txt`, multiple_sep: `\":\"`. Inputs for MultiQC", - "help_text": "Type: List of `file`, required, example: `input.txt`, multiple_sep: `\":\"`. Inputs for MultiQC." - }, - - "output": { - "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.output`, example: `report`. Create report in the specified output directory", - "help_text": "Type: `file`, required, default: `$id.$key.output.output`, example: `report`. Create report in the specified output directory.", - "default": "$id.$key.output.output" - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/query/cellxgene_census/.config.vsh.yaml b/target/nextflow/query/cellxgene_census/.config.vsh.yaml deleted file mode 100644 index 7ea63f9e06b..00000000000 --- a/target/nextflow/query/cellxgene_census/.config.vsh.yaml +++ /dev/null @@ -1,260 +0,0 @@ -functionality: - name: "cellxgene_census" - namespace: "query" - version: "0.12.3" - authors: - - name: "Matthias Beyens" - info: - role: "Contributor" - links: - github: "MatthiasBeyens" - orcid: "0000-0003-3304-0706" - email: "matthias.beyens@gmail.com" - linkedin: "mbeyens" - organizations: - - name: "Janssen Pharmaceuticals" - href: "https://www.janssen.com" - role: "Principal Scientist" - - name: "Dries De Maeyer" - roles: - - "author" - info: - role: "Core Team Member" - links: - email: "ddemaeyer@gmail.com" - github: "ddemaeyer" - linkedin: "dries-de-maeyer-b46a814" - organizations: - - name: "Janssen Pharmaceuticals" - href: "https://www.janssen.com" - role: "Principal Scientist" - argument_groups: - - name: "Inputs" - description: "Arguments related to the input (aka query) dataset." - arguments: - - type: "string" - name: "--input_database" - description: "Full input database S3 prefix URL. Default: CellxGene Census" - info: null - example: - - "s3://" - default: - - "CellxGene" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - description: "Which modality to store the output in." - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--cellxgene_release" - description: "CellxGene Census release date. More information: https://chanzuckerberg.github.io/cellxgene-census/cellxgene_census_docsite_data_release_info.html" - info: null - default: - - "2023-05-15" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Query" - description: "Arguments related to the query." - arguments: - - type: "string" - name: "--species" - description: "Specie(s) of interest. If not specified, Homo Sapiens will be\ - \ queried." - info: null - example: - - "homo_sapiens" - default: - - "homo_sapiens" - required: false - choices: - - "homo_sapiens" - - "mus_musculus" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--cell_query" - description: "The query for selecting the cells as defined by the cellxgene\ - \ census schema." - info: null - example: - - "is_primary_data == True and cell_type_ontology_term_id in ['CL:0000136',\ - \ 'CL:1000311', 'CL:0002616'] and suspension_type == 'cell'" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--cells_filter_columns" - description: "The query for selecting the cells as defined by the cellxgene\ - \ census schema." - info: null - example: - - "dataset_id" - - "tissue" - - "assay" - - "disease" - - "cell_type" - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--min_cells_filter_columns" - description: "Minimum of amount of summed cells_filter_columns cells" - info: null - example: - - 100.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Outputs" - description: "Output arguments." - arguments: - - type: "file" - name: "--output" - description: "Output h5mu file." - info: null - example: - - "output.h5mu" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Query CellxGene Census or user-specified TileDBSoma object, and eventually\ - \ fetch cell and gene metadata or/and expression counts." - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.9" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "cellxgene-census~=1.2.0" - - "obonet~=1.0.0" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highmem" - - "midcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/query/cellxgene_census/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/query/cellxgene_census" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/query/cellxgene_census/cellxgene_census" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/query/cellxgene_census/main.nf b/target/nextflow/query/cellxgene_census/main.nf deleted file mode 100644 index 0ad390f627a..00000000000 --- a/target/nextflow/query/cellxgene_census/main.nf +++ /dev/null @@ -1,2803 +0,0 @@ -// cellxgene_census 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Matthias Beyens -// * Dries De Maeyer (author) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "cellxgene_census", - "namespace" : "query", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Matthias Beyens", - "info" : { - "role" : "Contributor", - "links" : { - "github" : "MatthiasBeyens", - "orcid" : "0000-0003-3304-0706", - "email" : "matthias.beyens@gmail.com", - "linkedin" : "mbeyens" - }, - "organizations" : [ - { - "name" : "Janssen Pharmaceuticals", - "href" : "https://www.janssen.com", - "role" : "Principal Scientist" - } - ] - } - }, - { - "name" : "Dries De Maeyer", - "roles" : [ - "author" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "ddemaeyer@gmail.com", - "github" : "ddemaeyer", - "linkedin" : "dries-de-maeyer-b46a814" - }, - "organizations" : [ - { - "name" : "Janssen Pharmaceuticals", - "href" : "https://www.janssen.com", - "role" : "Principal Scientist" - } - ] - } - } - ], - "argument_groups" : [ - { - "name" : "Inputs", - "description" : "Arguments related to the input (aka query) dataset.", - "arguments" : [ - { - "type" : "string", - "name" : "--input_database", - "description" : "Full input database S3 prefix URL. Default: CellxGene Census", - "example" : [ - "s3://" - ], - "default" : [ - "CellxGene" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--modality", - "description" : "Which modality to store the output in.", - "default" : [ - "rna" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--cellxgene_release", - "description" : "CellxGene Census release date. More information: https://chanzuckerberg.github.io/cellxgene-census/cellxgene_census_docsite_data_release_info.html", - "default" : [ - "2023-05-15" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Query", - "description" : "Arguments related to the query.", - "arguments" : [ - { - "type" : "string", - "name" : "--species", - "description" : "Specie(s) of interest. If not specified, Homo Sapiens will be queried.", - "example" : [ - "homo_sapiens" - ], - "default" : [ - "homo_sapiens" - ], - "required" : false, - "choices" : [ - "homo_sapiens", - "mus_musculus" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--cell_query", - "description" : "The query for selecting the cells as defined by the cellxgene census schema.", - "example" : [ - "is_primary_data == True and cell_type_ontology_term_id in ['CL:0000136', 'CL:1000311', 'CL:0002616'] and suspension_type == 'cell'" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--cells_filter_columns", - "description" : "The query for selecting the cells as defined by the cellxgene census schema.", - "example" : [ - "dataset_id", - "tissue", - "assay", - "disease", - "cell_type" - ], - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--min_cells_filter_columns", - "description" : "Minimum of amount of summed cells_filter_columns cells", - "example" : [ - 100.0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Outputs", - "description" : "Output arguments.", - "arguments" : [ - { - "type" : "file", - "name" : "--output", - "description" : "Output h5mu file.", - "example" : [ - "output.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_compression", - "example" : [ - "gzip" - ], - "required" : false, - "choices" : [ - "gzip", - "lzf" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/query/cellxgene_census/" - }, - { - "type" : "file", - "path" : "src/utils/setup_logger.py", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "description" : "Query CellxGene Census or user-specified TileDBSoma object, and eventually fetch cell and gene metadata or/and expression counts.", - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/query/cellxgene_census/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "python:3.9", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "python", - "user" : false, - "packages" : [ - "mudata~=0.2.3", - "anndata~=0.9.1", - "cellxgene-census~=1.2.0", - "obonet~=1.0.0" - ], - "upgrade" : true - } - ], - "test_setup" : [ - { - "type" : "python", - "user" : false, - "packages" : [ - "viashpy==0.5.0" - ], - "upgrade" : true - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "highmem", - "midcpu" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/query/cellxgene_census/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/query/cellxgene_census", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -import sys -import os -import cellxgene_census -import mudata as mu -import anndata as ad - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input_database': $( if [ ! -z ${VIASH_PAR_INPUT_DATABASE+x} ]; then echo "r'${VIASH_PAR_INPUT_DATABASE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cellxgene_release': $( if [ ! -z ${VIASH_PAR_CELLXGENE_RELEASE+x} ]; then echo "r'${VIASH_PAR_CELLXGENE_RELEASE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'species': $( if [ ! -z ${VIASH_PAR_SPECIES+x} ]; then echo "r'${VIASH_PAR_SPECIES//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cell_query': $( if [ ! -z ${VIASH_PAR_CELL_QUERY+x} ]; then echo "r'${VIASH_PAR_CELL_QUERY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cells_filter_columns': $( if [ ! -z ${VIASH_PAR_CELLS_FILTER_COLUMNS+x} ]; then echo "r'${VIASH_PAR_CELLS_FILTER_COLUMNS//\\'/\\'\\"\\'\\"r\\'}'.split(':')"; else echo None; fi ), - 'min_cells_filter_columns': $( if [ ! -z ${VIASH_PAR_MIN_CELLS_FILTER_COLUMNS+x} ]; then echo "float(r'${VIASH_PAR_MIN_CELLS_FILTER_COLUMNS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -### VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -def connect_census(input_database, release): - """ - Connect to CellxGene Census or user-provided TileDBSoma object - """ - if input_database != "CellxGene": - raise NotImplementedError( - "Custom census database is not implemented yet!" - ) - - logger.info( - "Initializing %s release %s", - input_database, release - ) - return cellxgene_census.open_soma( - census_version = release - ) - - -def get_anndata(census_connection, cell_query, species): - logger.info( - "Getting gene expression data based on %s query.", - cell_query - ) - return cellxgene_census.get_anndata( - census = census_connection, - obs_value_filter = cell_query, - organism = species - ) - - -def add_cellcensus_metadata_obs(census_connection, query_data): - logger.info( - "Adding extented metadata to gene expression data." - ) - census_datasets = census_connection["census_info"]["datasets"].read().concat().to_pandas() - - query_data.obs.dataset_id = query_data.obs.dataset_id.astype("category") - - dataset_info = census_datasets[census_datasets.dataset_id.isin(query_data.obs.dataset_id.cat.categories)]\\\\ - [['collection_id', 'collection_name', 'collection_doi', 'dataset_id', 'dataset_title']]\\\\ - .reset_index(drop=True)\\\\ - .apply(lambda x: x.astype('category')) - - return query_data.obs.merge( - dataset_info, on='dataset_id', how = 'left' - ) - - -def cellcensus_cell_filter(query_data, cells_filter_columns, min_cells_filter_columns): - t0 = query_data.shape - query_data = query_data[ - query_data.obs.groupby(cells_filter_columns)["soma_joinid"].transform('count') >= min_cells_filter_columns - ] - t1 = query_data.shape - logger.info( - 'Removed %s cells based on %s min_cells_filter_columns of %s cells_filter_columns.' - % ((t0[0] - t1[0]), min_cells_filter_columns, cells_filter_columns) - ) - return query_data - - -def write_mudata(mdata, output_location, compression): - logger.info("Writing %s", output_location) - mdata.write_h5mu( - output_location, - compression=compression - ) - - -def main(): - - # start dev - logger.info('cells_filter_columns: %s' % par["cells_filter_columns"]) - logger.info('min_cells_filter_columns: %s' % par["min_cells_filter_columns"]) - # end dev - - census_connection = connect_census( - par["input_database"], - par["cellxgene_release"] - ) - - query_data = get_anndata( - census_connection, - par["cell_query"], - par["species"] - ) - - query_data.obs = add_cellcensus_metadata_obs( - census_connection, - query_data - ) - - census_connection.close() - del census_connection - - if par["cells_filter_columns"]: - if not par["min_cells_filter_columns"]: - raise NotImplementedError( - "You specified cells_filter_columns, thus add min_cells_filter_columns!" - ) - query_data = cellcensus_cell_filter( - query_data, - par["cells_filter_columns"], - par["min_cells_filter_columns"] - ) - - query_data.var_names = query_data.var["feature_id"] - query_data.var["gene_symbol"] = query_data.var["feature_name"] - - # Create empty mudata file - mdata = mu.MuData({par["modality"]: ad.AnnData()}) - - write_mudata( - mdata, - par["output"], - par["output_compression"] - ) - - mu.write_h5ad(par["output"], data=query_data, mod=par["modality"]) - -if __name__ == "__main__": - main() -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/query_cellxgene_census", - "tag" : "0.12.0" - }, - "label" : [ - "highmem", - "midcpu" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/query/cellxgene_census/nextflow.config b/target/nextflow/query/cellxgene_census/nextflow.config deleted file mode 100644 index a0e0d0280f1..00000000000 --- a/target/nextflow/query/cellxgene_census/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'cellxgene_census' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Query CellxGene Census or user-specified TileDBSoma object, and eventually fetch cell and gene metadata or/and expression counts.' - author = 'Matthias Beyens, Dries De Maeyer' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/query/cellxgene_census/nextflow_params.yaml b/target/nextflow/query/cellxgene_census/nextflow_params.yaml deleted file mode 100644 index 83970b54f64..00000000000 --- a/target/nextflow/query/cellxgene_census/nextflow_params.yaml +++ /dev/null @@ -1,18 +0,0 @@ -# Inputs -input_database: "CellxGene" -modality: "rna" -cellxgene_release: "2023-05-15" - -# Outputs -# output: "$id.$key.output.h5mu" -# output_compression: "gzip" - -# Query -species: "homo_sapiens" -# cell_query: "is_primary_data == True and cell_type_ontology_term_id in ['CL:0000136', 'CL:1000311', 'CL:0002616'] and suspension_type == 'cell'" -# cells_filter_columns: ["dataset_id", "tissue", "assay", "disease", "cell_type"] -# min_cells_filter_columns: 100 - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/query/cellxgene_census/nextflow_schema.json b/target/nextflow/query/cellxgene_census/nextflow_schema.json deleted file mode 100644 index bdee1313566..00000000000 --- a/target/nextflow/query/cellxgene_census/nextflow_schema.json +++ /dev/null @@ -1,130 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "cellxgene_census", - "description": "Query CellxGene Census or user-specified TileDBSoma object, and eventually fetch cell and gene metadata or/and expression counts.", - "type": "object", - "definitions": { - "inputs" : { - "title": "Inputs", - "type": "object", - "description": "Arguments related to the input (aka query) dataset.", - "properties": { - - "input_database": { - "type": "string", - "description": "Type: `string`, default: `CellxGene`, example: `s3://`. Full input database S3 prefix URL", - "help_text": "Type: `string`, default: `CellxGene`, example: `s3://`. Full input database S3 prefix URL. Default: CellxGene Census", - "default": "CellxGene" - }, - - "modality": { - "type": "string", - "description": "Type: `string`, default: `rna`. Which modality to store the output in", - "help_text": "Type: `string`, default: `rna`. Which modality to store the output in.", - "default": "rna" - }, - - "cellxgene_release": { - "type": "string", - "description": "Type: `string`, default: `2023-05-15`. CellxGene Census release date", - "help_text": "Type: `string`, default: `2023-05-15`. CellxGene Census release date. More information: https://chanzuckerberg.github.io/cellxgene-census/cellxgene_census_docsite_data_release_info.html", - "default": "2023-05-15" - } - - } - }, - "outputs" : { - "title": "Outputs", - "type": "object", - "description": "Output arguments.", - "properties": { - - "output": { - "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file", - "help_text": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file.", - "default": "$id.$key.output.h5mu" - }, - - "output_compression": { - "type": "string", - "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. ", - "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. ", - "enum": ["gzip", "lzf"] - - } - - } - }, - "query" : { - "title": "Query", - "type": "object", - "description": "Arguments related to the query.", - "properties": { - - "species": { - "type": "string", - "description": "Type: `string`, default: `homo_sapiens`, example: `homo_sapiens`, choices: ``homo_sapiens`, `mus_musculus``. Specie(s) of interest", - "help_text": "Type: `string`, default: `homo_sapiens`, example: `homo_sapiens`, choices: ``homo_sapiens`, `mus_musculus``. Specie(s) of interest. If not specified, Homo Sapiens will be queried.", - "enum": ["homo_sapiens", "mus_musculus"] - , - "default": "homo_sapiens" - }, - - "cell_query": { - "type": "string", - "description": "Type: `string`, example: `is_primary_data == True and cell_type_ontology_term_id in [\u0027CL:0000136\u0027, \u0027CL:1000311\u0027, \u0027CL:0002616\u0027] and suspension_type == \u0027cell\u0027`. The query for selecting the cells as defined by the cellxgene census schema", - "help_text": "Type: `string`, example: `is_primary_data == True and cell_type_ontology_term_id in [\u0027CL:0000136\u0027, \u0027CL:1000311\u0027, \u0027CL:0002616\u0027] and suspension_type == \u0027cell\u0027`. The query for selecting the cells as defined by the cellxgene census schema." - }, - - "cells_filter_columns": { - "type": "string", - "description": "Type: List of `string`, example: `dataset_id:tissue:assay:disease:cell_type`, multiple_sep: `\":\"`. The query for selecting the cells as defined by the cellxgene census schema", - "help_text": "Type: List of `string`, example: `dataset_id:tissue:assay:disease:cell_type`, multiple_sep: `\":\"`. The query for selecting the cells as defined by the cellxgene census schema." - }, - - "min_cells_filter_columns": { - "type": "number", - "description": "Type: `double`, example: `100`. Minimum of amount of summed cells_filter_columns cells", - "help_text": "Type: `double`, example: `100`. Minimum of amount of summed cells_filter_columns cells" - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/inputs" - }, - { - "$ref": "#/definitions/outputs" - }, - { - "$ref": "#/definitions/query" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/query/cellxgene_census/setup_logger.py b/target/nextflow/query/cellxgene_census/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/nextflow/query/cellxgene_census/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/nextflow/reference/build_bdrhap_reference/.config.vsh.yaml b/target/nextflow/reference/build_bdrhap_reference/.config.vsh.yaml deleted file mode 100644 index 7e87683edd5..00000000000 --- a/target/nextflow/reference/build_bdrhap_reference/.config.vsh.yaml +++ /dev/null @@ -1,186 +0,0 @@ -functionality: - name: "build_bdrhap_reference" - namespace: "reference" - version: "0.12.3" - authors: - - name: "Angela Oliveira Pisco" - roles: - - "author" - info: - role: "Contributor" - links: - github: "aopisco" - orcid: "0000-0003-0142-2355" - linkedin: "aopisco" - organizations: - - name: "Insitro" - href: "https://insitro.com" - role: "Director of Computational Biology" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - - name: "Robrecht Cannoodt" - roles: - - "author" - - "maintainer" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - arguments: - - type: "file" - name: "--genome_fasta" - description: "Reference genome fasta." - info: null - example: - - "genome_sequence.fa.gz" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--transcriptome_gtf" - description: "Reference transcriptome annotation." - info: null - example: - - "transcriptome_annotation.gtf.gz" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - description: "Star index" - info: null - example: - - "star_index.tar.gz" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "bash_script" - path: "script.sh" - is_executable: true - description: "Compile a reference into a STAR index compatible with the BD Rhapsody\ - \ pipeline." - test_resources: - - type: "bash_script" - path: "run_test.sh" - is_executable: true - - type: "file" - path: "resources_test/reference_gencodev41_chr1" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "bdgenomics/rhapsody:1.10.1" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "pigz" - interactive: false - test_setup: - - type: "docker" - env: - - "GOPATH /root/go" - - "GOBIN /root/go/bin" - - "PATH \"${PATH}:/root/go/bin\"" - - type: "apt" - packages: - - "golang" - interactive: false - - type: "docker" - run: - - "go get golang.org/dl/go1.20.6 && go1.20.6 download && \\\ngit clone --branch\ - \ v2.5.0 https://github.com/shenwei356/seqkit.git && \\\ncd seqkit/seqkit/ &&\ - \ go1.20.6 build && cp seqkit /usr/bin/ && cd ../../ && rm -rf seqkit\n" - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highmem" - - "highcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/reference/build_bdrhap_reference/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/reference/build_bdrhap_reference" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/reference/build_bdrhap_reference/build_bdrhap_reference" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/reference/build_bdrhap_reference/main.nf b/target/nextflow/reference/build_bdrhap_reference/main.nf deleted file mode 100644 index 0135dd2ac1a..00000000000 --- a/target/nextflow/reference/build_bdrhap_reference/main.nf +++ /dev/null @@ -1,2597 +0,0 @@ -// build_bdrhap_reference 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Angela Oliveira Pisco (author) -// * Robrecht Cannoodt (author, maintainer) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "build_bdrhap_reference", - "namespace" : "reference", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Angela Oliveira Pisco", - "roles" : [ - "author" - ], - "info" : { - "role" : "Contributor", - "links" : { - "github" : "aopisco", - "orcid" : "0000-0003-0142-2355", - "linkedin" : "aopisco" - }, - "organizations" : [ - { - "name" : "Insitro", - "href" : "https://insitro.com", - "role" : "Director of Computational Biology" - }, - { - "name" : "Open Problems", - "href" : "https://openproblems.bio", - "role" : "Core Member" - } - ] - } - }, - { - "name" : "Robrecht Cannoodt", - "roles" : [ - "author", - "maintainer" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "robrecht@data-intuitive.com", - "github" : "rcannood", - "orcid" : "0000-0003-3641-729X", - "linkedin" : "robrechtcannoodt" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Science Engineer" - }, - { - "name" : "Open Problems", - "href" : "https://openproblems.bio", - "role" : "Core Member" - } - ] - } - } - ], - "arguments" : [ - { - "type" : "file", - "name" : "--genome_fasta", - "description" : "Reference genome fasta.", - "example" : [ - "genome_sequence.fa.gz" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--transcriptome_gtf", - "description" : "Reference transcriptome annotation.", - "example" : [ - "transcriptome_annotation.gtf.gz" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--output", - "description" : "Star index", - "example" : [ - "star_index.tar.gz" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ], - "resources" : [ - { - "type" : "bash_script", - "path" : "script.sh", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/reference/build_bdrhap_reference/" - } - ], - "description" : "Compile a reference into a STAR index compatible with the BD Rhapsody pipeline.", - "test_resources" : [ - { - "type" : "bash_script", - "path" : "run_test.sh", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/reference/build_bdrhap_reference/" - }, - { - "type" : "file", - "path" : "resources_test/reference_gencodev41_chr1", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "bdgenomics/rhapsody:1.10.1", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "pigz" - ], - "interactive" : false - } - ], - "test_setup" : [ - { - "type" : "docker", - "env" : [ - "GOPATH /root/go", - "GOBIN /root/go/bin", - "PATH \\"${PATH}:/root/go/bin\\"" - ] - }, - { - "type" : "apt", - "packages" : [ - "golang" - ], - "interactive" : false - }, - { - "type" : "docker", - "run" : [ - "go get golang.org/dl/go1.20.6 && go1.20.6 download && \\\\\ngit clone --branch v2.5.0 https://github.com/shenwei356/seqkit.git && \\\\\ncd seqkit/seqkit/ && go1.20.6 build && cp seqkit /usr/bin/ && cd ../../ && rm -rf seqkit\n" - ] - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "highmem", - "highcpu" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/reference/build_bdrhap_reference/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/reference/build_bdrhap_reference", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -#!/bin/bash - -set -eo pipefail - -## VIASH START -# The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_GENOME_FASTA+x} ]; then echo "${VIASH_PAR_GENOME_FASTA}" | sed "s#'#'\\"'\\"'#g;s#.*#par_genome_fasta='&'#" ; else echo "# par_genome_fasta="; fi ) -$( if [ ! -z ${VIASH_PAR_TRANSCRIPTOME_GTF+x} ]; then echo "${VIASH_PAR_TRANSCRIPTOME_GTF}" | sed "s#'#'\\"'\\"'#g;s#.*#par_transcriptome_gtf='&'#" ; else echo "# par_transcriptome_gtf="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) -$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) -$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) -$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) -$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) -$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) -$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) - -## VIASH END - -# create temporary directory -tmpdir=\\$(mktemp -d "$VIASH_TEMP/\\$meta_functionality_name-XXXXXXXX") -function clean_up { - rm -rf "\\$tmpdir" -} -trap clean_up EXIT - -meta_cpus="\\${meta_cpus:-1}" - -# process params -extra_params=( ) - -if [ ! -z "\\$meta_cpus" ]; then - extra_params+=( "--runThreadN \\$meta_cpus" ) -fi - -echo "> Unzipping input files" -unpigz -c "\\$par_genome_fasta" > "\\$tmpdir/genome.fa" -unpigz -c "\\$par_transcriptome_gtf" > "\\$tmpdir/transcriptome.gtf" - -echo "> Building star index" -mkdir "\\$tmpdir/out" -STAR \\\\ - --runMode genomeGenerate \\\\ - --genomeDir "\\$tmpdir/out" \\\\ - --genomeFastaFiles "\\$tmpdir/genome.fa" \\\\ - --sjdbGTFfile "\\$tmpdir/transcriptome.gtf" \\\\ - --sjdbOverhang 100 \\\\ - --genomeSAindexNbases 11 \\\\ - "\\${extra_params[@]}" - -echo "> Creating archive" -tar --use-compress-program="pigz -k " -cf "\\$par_output" -C "\\$tmpdir/out" . -VIASHMAIN -bash "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/reference_build_bdrhap_reference", - "tag" : "0.12.0" - }, - "label" : [ - "highmem", - "highcpu" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/reference/build_bdrhap_reference/nextflow.config b/target/nextflow/reference/build_bdrhap_reference/nextflow.config deleted file mode 100644 index 833454546b5..00000000000 --- a/target/nextflow/reference/build_bdrhap_reference/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'build_bdrhap_reference' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Compile a reference into a STAR index compatible with the BD Rhapsody pipeline.' - author = 'Angela Oliveira Pisco, Robrecht Cannoodt' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/reference/build_bdrhap_reference/nextflow_params.yaml b/target/nextflow/reference/build_bdrhap_reference/nextflow_params.yaml deleted file mode 100644 index 827860782eb..00000000000 --- a/target/nextflow/reference/build_bdrhap_reference/nextflow_params.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# Arguments -genome_fasta: # please fill in - example: "genome_sequence.fa.gz" -transcriptome_gtf: # please fill in - example: "transcriptome_annotation.gtf.gz" -# output: "$id.$key.output.gz" - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/reference/build_bdrhap_reference/nextflow_schema.json b/target/nextflow/reference/build_bdrhap_reference/nextflow_schema.json deleted file mode 100644 index e3f0578f9ac..00000000000 --- a/target/nextflow/reference/build_bdrhap_reference/nextflow_schema.json +++ /dev/null @@ -1,64 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "build_bdrhap_reference", - "description": "Compile a reference into a STAR index compatible with the BD Rhapsody pipeline.", - "type": "object", - "definitions": { - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "genome_fasta": { - "type": "string", - "description": "Type: `file`, required, example: `genome_sequence.fa.gz`. Reference genome fasta", - "help_text": "Type: `file`, required, example: `genome_sequence.fa.gz`. Reference genome fasta." - }, - - "transcriptome_gtf": { - "type": "string", - "description": "Type: `file`, required, example: `transcriptome_annotation.gtf.gz`. Reference transcriptome annotation", - "help_text": "Type: `file`, required, example: `transcriptome_annotation.gtf.gz`. Reference transcriptome annotation." - }, - - "output": { - "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.gz`, example: `star_index.tar.gz`. Star index", - "help_text": "Type: `file`, required, default: `$id.$key.output.gz`, example: `star_index.tar.gz`. Star index", - "default": "$id.$key.output.gz" - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/reference/build_cellranger_reference/.config.vsh.yaml b/target/nextflow/reference/build_cellranger_reference/.config.vsh.yaml deleted file mode 100644 index c37de090499..00000000000 --- a/target/nextflow/reference/build_cellranger_reference/.config.vsh.yaml +++ /dev/null @@ -1,187 +0,0 @@ -functionality: - name: "build_cellranger_reference" - namespace: "reference" - version: "0.12.3" - authors: - - name: "Angela Oliveira Pisco" - roles: - - "author" - info: - role: "Contributor" - links: - github: "aopisco" - orcid: "0000-0003-0142-2355" - linkedin: "aopisco" - organizations: - - name: "Insitro" - href: "https://insitro.com" - role: "Director of Computational Biology" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - - name: "Robrecht Cannoodt" - roles: - - "author" - - "maintainer" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - arguments: - - type: "file" - name: "--genome_fasta" - description: "Reference genome fasta." - info: null - example: - - "genome_sequence.fa.gz" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--transcriptome_gtf" - description: "Reference transcriptome annotation." - info: null - example: - - "transcriptome_annotation.gtf.gz" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - description: "Output folder" - info: null - example: - - "cellranger_reference" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "bash_script" - path: "script.sh" - is_executable: true - description: "Build a Cell Ranger-compatible reference folder from user-supplied\ - \ genome FASTA and gene GTF files. Creates a new folder named after the genome." - test_resources: - - type: "bash_script" - path: "run_test.sh" - is_executable: true - - type: "file" - path: "resources_test/reference_gencodev41_chr1" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "ghcr.io/data-intuitive/cellranger:7.0" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "pigz" - interactive: false - test_setup: - - type: "docker" - env: - - "GOPATH /root/go" - - "GOBIN /root/go/bin" - - "PATH \"${PATH}:/root/go/bin\"" - - type: "apt" - packages: - - "golang" - - "git" - interactive: false - - type: "docker" - run: - - "go install golang.org/dl/go1.20.6@latest && go1.20.6 download && \\\ngit clone\ - \ --branch v2.5.0 https://github.com/shenwei356/seqkit.git && \\\ncd seqkit/seqkit/\ - \ && go1.20.6 build && cp seqkit /usr/bin/ && cd ../../ && rm -rf seqkit\n" - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highmem" - - "highcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/reference/build_cellranger_reference/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/reference/build_cellranger_reference" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/reference/build_cellranger_reference/build_cellranger_reference" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/reference/build_cellranger_reference/main.nf b/target/nextflow/reference/build_cellranger_reference/main.nf deleted file mode 100644 index 50b7ce7c806..00000000000 --- a/target/nextflow/reference/build_cellranger_reference/main.nf +++ /dev/null @@ -1,2602 +0,0 @@ -// build_cellranger_reference 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Angela Oliveira Pisco (author) -// * Robrecht Cannoodt (author, maintainer) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "build_cellranger_reference", - "namespace" : "reference", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Angela Oliveira Pisco", - "roles" : [ - "author" - ], - "info" : { - "role" : "Contributor", - "links" : { - "github" : "aopisco", - "orcid" : "0000-0003-0142-2355", - "linkedin" : "aopisco" - }, - "organizations" : [ - { - "name" : "Insitro", - "href" : "https://insitro.com", - "role" : "Director of Computational Biology" - }, - { - "name" : "Open Problems", - "href" : "https://openproblems.bio", - "role" : "Core Member" - } - ] - } - }, - { - "name" : "Robrecht Cannoodt", - "roles" : [ - "author", - "maintainer" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "robrecht@data-intuitive.com", - "github" : "rcannood", - "orcid" : "0000-0003-3641-729X", - "linkedin" : "robrechtcannoodt" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Science Engineer" - }, - { - "name" : "Open Problems", - "href" : "https://openproblems.bio", - "role" : "Core Member" - } - ] - } - } - ], - "arguments" : [ - { - "type" : "file", - "name" : "--genome_fasta", - "description" : "Reference genome fasta.", - "example" : [ - "genome_sequence.fa.gz" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--transcriptome_gtf", - "description" : "Reference transcriptome annotation.", - "example" : [ - "transcriptome_annotation.gtf.gz" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--output", - "description" : "Output folder", - "example" : [ - "cellranger_reference" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ], - "resources" : [ - { - "type" : "bash_script", - "path" : "script.sh", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/reference/build_cellranger_reference/" - } - ], - "description" : "Build a Cell Ranger-compatible reference folder from user-supplied genome FASTA and gene GTF files. Creates a new folder named after the genome.", - "test_resources" : [ - { - "type" : "bash_script", - "path" : "run_test.sh", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/reference/build_cellranger_reference/" - }, - { - "type" : "file", - "path" : "resources_test/reference_gencodev41_chr1", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "ghcr.io/data-intuitive/cellranger:7.0", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "pigz" - ], - "interactive" : false - } - ], - "test_setup" : [ - { - "type" : "docker", - "env" : [ - "GOPATH /root/go", - "GOBIN /root/go/bin", - "PATH \\"${PATH}:/root/go/bin\\"" - ] - }, - { - "type" : "apt", - "packages" : [ - "golang", - "git" - ], - "interactive" : false - }, - { - "type" : "docker", - "run" : [ - "go install golang.org/dl/go1.20.6@latest && go1.20.6 download && \\\\\ngit clone --branch v2.5.0 https://github.com/shenwei356/seqkit.git && \\\\\ncd seqkit/seqkit/ && go1.20.6 build && cp seqkit /usr/bin/ && cd ../../ && rm -rf seqkit\n" - ] - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "highmem", - "highcpu" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/reference/build_cellranger_reference/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/reference/build_cellranger_reference", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -#!/bin/bash - -set -eo pipefail - -## VIASH START -# The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_GENOME_FASTA+x} ]; then echo "${VIASH_PAR_GENOME_FASTA}" | sed "s#'#'\\"'\\"'#g;s#.*#par_genome_fasta='&'#" ; else echo "# par_genome_fasta="; fi ) -$( if [ ! -z ${VIASH_PAR_TRANSCRIPTOME_GTF+x} ]; then echo "${VIASH_PAR_TRANSCRIPTOME_GTF}" | sed "s#'#'\\"'\\"'#g;s#.*#par_transcriptome_gtf='&'#" ; else echo "# par_transcriptome_gtf="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) -$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) -$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) -$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) -$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) -$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) -$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) - -## VIASH END - -# create temporary directory -tmpdir=\\$(mktemp -d "$VIASH_TEMP/\\$meta_functionality_name-XXXXXXXX") -function clean_up { - rm -rf "\\$tmpdir" -} -trap clean_up EXIT - -# just to make sure -par_genome_fasta=\\`realpath \\$par_genome_fasta\\` -par_transcriptome_gtf=\\`realpath \\$par_transcriptome_gtf\\` -par_output=\\`realpath \\$par_output\\` - -# process params -extra_params=( ) - -if [ ! -z "\\$meta_cpus" ]; then - extra_params+=( "--nthreads=\\$meta_cpus" ) -fi -if [ ! -z "\\$meta_memory_gb" ]; then - # always keep 2gb for the OS itself - memory_gb=\\`python -c "print(int('\\$meta_memory_gb') - 2)"\\` - extra_params+=( "--memgb=\\$memory_gb" ) -fi - -echo "> Unzipping input files" -unpigz -c "\\$par_genome_fasta" > "\\$tmpdir/genome.fa" - -echo "> Building star index" -cd "\\$tmpdir" -cellranger mkref \\\\ - --fasta "\\$tmpdir/genome.fa" \\\\ - --genes "\\$par_transcriptome_gtf" \\\\ - --genome output \\\\ - "\\${extra_params[@]}" - -echo "> Creating archive" -tar --use-compress-program="pigz -k " -cf "\\$par_output" -C "\\$tmpdir/output" . -VIASHMAIN -bash "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/reference_build_cellranger_reference", - "tag" : "0.12.0" - }, - "label" : [ - "highmem", - "highcpu" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/reference/build_cellranger_reference/nextflow.config b/target/nextflow/reference/build_cellranger_reference/nextflow.config deleted file mode 100644 index 3fd391dcb12..00000000000 --- a/target/nextflow/reference/build_cellranger_reference/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'build_cellranger_reference' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Build a Cell Ranger-compatible reference folder from user-supplied genome FASTA and gene GTF files. Creates a new folder named after the genome.' - author = 'Angela Oliveira Pisco, Robrecht Cannoodt' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/reference/build_cellranger_reference/nextflow_params.yaml b/target/nextflow/reference/build_cellranger_reference/nextflow_params.yaml deleted file mode 100644 index 4779fa85c32..00000000000 --- a/target/nextflow/reference/build_cellranger_reference/nextflow_params.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# Arguments -genome_fasta: # please fill in - example: "genome_sequence.fa.gz" -transcriptome_gtf: # please fill in - example: "transcriptome_annotation.gtf.gz" -# output: "$id.$key.output.output" - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/reference/build_cellranger_reference/nextflow_schema.json b/target/nextflow/reference/build_cellranger_reference/nextflow_schema.json deleted file mode 100644 index 2dfc3c92efc..00000000000 --- a/target/nextflow/reference/build_cellranger_reference/nextflow_schema.json +++ /dev/null @@ -1,64 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "build_cellranger_reference", - "description": "Build a Cell Ranger-compatible reference folder from user-supplied genome FASTA and gene GTF files. Creates a new folder named after the genome.", - "type": "object", - "definitions": { - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "genome_fasta": { - "type": "string", - "description": "Type: `file`, required, example: `genome_sequence.fa.gz`. Reference genome fasta", - "help_text": "Type: `file`, required, example: `genome_sequence.fa.gz`. Reference genome fasta." - }, - - "transcriptome_gtf": { - "type": "string", - "description": "Type: `file`, required, example: `transcriptome_annotation.gtf.gz`. Reference transcriptome annotation", - "help_text": "Type: `file`, required, example: `transcriptome_annotation.gtf.gz`. Reference transcriptome annotation." - }, - - "output": { - "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.output`, example: `cellranger_reference`. Output folder", - "help_text": "Type: `file`, required, default: `$id.$key.output.output`, example: `cellranger_reference`. Output folder", - "default": "$id.$key.output.output" - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/reference/make_reference/.config.vsh.yaml b/target/nextflow/reference/make_reference/.config.vsh.yaml deleted file mode 100644 index ffa4fde4afc..00000000000 --- a/target/nextflow/reference/make_reference/.config.vsh.yaml +++ /dev/null @@ -1,212 +0,0 @@ -functionality: - name: "make_reference" - namespace: "reference" - version: "0.12.3" - authors: - - name: "Angela Oliveira Pisco" - roles: - - "author" - info: - role: "Contributor" - links: - github: "aopisco" - orcid: "0000-0003-0142-2355" - linkedin: "aopisco" - organizations: - - name: "Insitro" - href: "https://insitro.com" - role: "Director of Computational Biology" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - - name: "Robrecht Cannoodt" - roles: - - "author" - - "maintainer" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - arguments: - - type: "file" - name: "--genome_fasta" - description: "Reference genome fasta. Example: " - info: null - example: - - "genome_fasta.fa.gz" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--transcriptome_gtf" - description: "Reference transcriptome annotation." - info: null - example: - - "transcriptome.gtf.gz" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--ercc" - description: "ERCC sequence and annotation file." - info: null - example: - - "ercc.zip" - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--subset_regex" - description: "Will subset the reference chromosomes using the given regex." - info: null - example: - - "(ERCC-00002|chr1)" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output_fasta" - description: "Output genome sequence fasta." - info: null - example: - - "genome_sequence.fa.gz" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output_gtf" - description: "Output transcriptome annotation gtf." - info: null - example: - - "transcriptome_annotation.gtf.gz" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "bash_script" - path: "script.sh" - is_executable: true - description: "Preprocess and build a transcriptome reference.\n\nExample input files\ - \ are:\n - `genome_fasta`: https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_41/GRCh38.primary_assembly.genome.fa.gz\n\ - \ - `transcriptome_gtf`: https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_41/gencode.v41.annotation.gtf.gz\n\ - \ - `ercc`: https://assets.thermofisher.com/TFS-Assets/LSG/manuals/ERCC92.zip\n" - test_resources: - - type: "bash_script" - path: "run_test.sh" - is_executable: true - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "ubuntu:22.04" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "pigz" - - "seqkit" - - "curl" - - "wget" - - "unzip" - interactive: false - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highmem" - - "highcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/reference/make_reference/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/reference/make_reference" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/reference/make_reference/make_reference" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/reference/make_reference/main.nf b/target/nextflow/reference/make_reference/main.nf deleted file mode 100644 index d915b934e2c..00000000000 --- a/target/nextflow/reference/make_reference/main.nf +++ /dev/null @@ -1,2635 +0,0 @@ -// make_reference 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Angela Oliveira Pisco (author) -// * Robrecht Cannoodt (author, maintainer) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "make_reference", - "namespace" : "reference", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Angela Oliveira Pisco", - "roles" : [ - "author" - ], - "info" : { - "role" : "Contributor", - "links" : { - "github" : "aopisco", - "orcid" : "0000-0003-0142-2355", - "linkedin" : "aopisco" - }, - "organizations" : [ - { - "name" : "Insitro", - "href" : "https://insitro.com", - "role" : "Director of Computational Biology" - }, - { - "name" : "Open Problems", - "href" : "https://openproblems.bio", - "role" : "Core Member" - } - ] - } - }, - { - "name" : "Robrecht Cannoodt", - "roles" : [ - "author", - "maintainer" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "robrecht@data-intuitive.com", - "github" : "rcannood", - "orcid" : "0000-0003-3641-729X", - "linkedin" : "robrechtcannoodt" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Science Engineer" - }, - { - "name" : "Open Problems", - "href" : "https://openproblems.bio", - "role" : "Core Member" - } - ] - } - } - ], - "arguments" : [ - { - "type" : "file", - "name" : "--genome_fasta", - "description" : "Reference genome fasta. Example: ", - "example" : [ - "genome_fasta.fa.gz" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--transcriptome_gtf", - "description" : "Reference transcriptome annotation.", - "example" : [ - "transcriptome.gtf.gz" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--ercc", - "description" : "ERCC sequence and annotation file.", - "example" : [ - "ercc.zip" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--subset_regex", - "description" : "Will subset the reference chromosomes using the given regex.", - "example" : [ - "(ERCC-00002|chr1)" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--output_fasta", - "description" : "Output genome sequence fasta.", - "example" : [ - "genome_sequence.fa.gz" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--output_gtf", - "description" : "Output transcriptome annotation gtf.", - "example" : [ - "transcriptome_annotation.gtf.gz" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ], - "resources" : [ - { - "type" : "bash_script", - "path" : "script.sh", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/reference/make_reference/" - } - ], - "description" : "Preprocess and build a transcriptome reference.\n\nExample input files are:\n - `genome_fasta`: https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_41/GRCh38.primary_assembly.genome.fa.gz\n - `transcriptome_gtf`: https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_41/gencode.v41.annotation.gtf.gz\n - `ercc`: https://assets.thermofisher.com/TFS-Assets/LSG/manuals/ERCC92.zip\n", - "test_resources" : [ - { - "type" : "bash_script", - "path" : "run_test.sh", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/reference/make_reference/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "ubuntu:22.04", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "pigz", - "seqkit", - "curl", - "wget", - "unzip" - ], - "interactive" : false - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "highmem", - "highcpu" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/reference/make_reference/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/reference/make_reference", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -#!/bin/bash - -set -eo pipefail - -## VIASH START -# The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_GENOME_FASTA+x} ]; then echo "${VIASH_PAR_GENOME_FASTA}" | sed "s#'#'\\"'\\"'#g;s#.*#par_genome_fasta='&'#" ; else echo "# par_genome_fasta="; fi ) -$( if [ ! -z ${VIASH_PAR_TRANSCRIPTOME_GTF+x} ]; then echo "${VIASH_PAR_TRANSCRIPTOME_GTF}" | sed "s#'#'\\"'\\"'#g;s#.*#par_transcriptome_gtf='&'#" ; else echo "# par_transcriptome_gtf="; fi ) -$( if [ ! -z ${VIASH_PAR_ERCC+x} ]; then echo "${VIASH_PAR_ERCC}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ercc='&'#" ; else echo "# par_ercc="; fi ) -$( if [ ! -z ${VIASH_PAR_SUBSET_REGEX+x} ]; then echo "${VIASH_PAR_SUBSET_REGEX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_subset_regex='&'#" ; else echo "# par_subset_regex="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT_FASTA+x} ]; then echo "${VIASH_PAR_OUTPUT_FASTA}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_fasta='&'#" ; else echo "# par_output_fasta="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT_GTF+x} ]; then echo "${VIASH_PAR_OUTPUT_GTF}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_gtf='&'#" ; else echo "# par_output_gtf="; fi ) -$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) -$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) -$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) -$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) -$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) -$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) - -## VIASH END - -# create temporary directory -tmpdir=\\$(mktemp -d "$VIASH_TEMP/\\$meta_functionality_name-XXXXXXXX") -function clean_up { - rm -rf "\\$tmpdir" -} -trap clean_up EXIT - -echo "> Processing genome sequence" -genome_fasta="\\$tmpdir/genome_sequence.fa" -# curl "\\$par_genome_fasta" | gunzip > "\\$genome_fasta" -gunzip -c "\\$par_genome_fasta" > "\\$genome_fasta" - -echo "> Processing transcriptome annotation" -transcriptome_gtf="\\$tmpdir/transcriptome_annotation.gtf" -# curl "\\$par_transcriptome_gtf" | gunzip > "\\$transcriptome_gtf" -gunzip -c "\\$par_transcriptome_gtf"> "\\$transcriptome_gtf" - -if [[ ! -z \\$par_ercc ]]; then - echo "> Processing ERCC sequences" - # wget "\\$par_ercc" -O "\\$tmpdir/ercc.zip" - # unzip "\\$tmpdir/ercc.zip" -d "\\$tmpdir" - unzip "\\$par_ercc" -d "\\$tmpdir" - cat "\\$tmpdir/ERCC92.fa" >> "\\$genome_fasta" - cat "\\$tmpdir/ERCC92.gtf" >> "\\$transcriptome_gtf" -fi - -# create output & filter reference if so desired -if [[ ! -z \\$par_subset_regex ]]; then - echo "> Subsetting reference with regex '\\$par_subset_regex'" - awk '{print \\$1}' "\\$genome_fasta" | seqkit grep -r -p "^\\$par_subset_regex\\\\\\$" > "\\$tmpdir/genome_sequence_filtered.fa" - genome_fasta="\\$tmpdir/genome_sequence_filtered.fa" - grep -E "^\\$par_subset_regex[^A-Za-z0-9]" "\\$transcriptome_gtf" > "\\$tmpdir/transcriptome_annotation_filtered.gtf" - transcriptome_gtf="\\$tmpdir/transcriptome_annotation_filtered.gtf" - - echo - echo "Matched tags:" - cat "\\$genome_fasta" | grep '^>' | sed 's#^>##' | sed 's# .*##' | sort | uniq - echo -fi - -echo "> Gzipping outputs" -pigz -c "\\$genome_fasta" > "\\$par_output_fasta" -pigz -c "\\$transcriptome_gtf" > "\\$par_output_gtf" - -# to do: re enable -# echo "> Sanity check of outputs" -# readarray -t fasta_tags < <( cat "\\$genome_fasta" | grep '^>' | sed 's#^>##' | sed 's# .*##' | sort | uniq ) -# readarray -t transcriptome_tags < <( cat "\\$transcriptome_gtf" | cut -d\\$'\\\\t' -f1 | sort | uniq | grep '^[^#]' ) -# [ "\\${fasta_tags[*]}" == "\\${transcriptome_tags[*]}" ] || { echo "Warning: fasta tags differ from transcriptome tags"; exit 1; } -VIASHMAIN -bash "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/reference_make_reference", - "tag" : "0.12.0" - }, - "label" : [ - "highmem", - "highcpu" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/reference/make_reference/nextflow.config b/target/nextflow/reference/make_reference/nextflow.config deleted file mode 100644 index e525a0ce00d..00000000000 --- a/target/nextflow/reference/make_reference/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'make_reference' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Preprocess and build a transcriptome reference.\n\nExample input files are:\n - `genome_fasta`: https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_41/GRCh38.primary_assembly.genome.fa.gz\n - `transcriptome_gtf`: https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_41/gencode.v41.annotation.gtf.gz\n - `ercc`: https://assets.thermofisher.com/TFS-Assets/LSG/manuals/ERCC92.zip\n' - author = 'Angela Oliveira Pisco, Robrecht Cannoodt' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/reference/make_reference/nextflow_params.yaml b/target/nextflow/reference/make_reference/nextflow_params.yaml deleted file mode 100644 index f904aa639cb..00000000000 --- a/target/nextflow/reference/make_reference/nextflow_params.yaml +++ /dev/null @@ -1,11 +0,0 @@ -# Arguments -genome_fasta: # please fill in - example: "genome_fasta.fa.gz" -transcriptome_gtf: # please fill in - example: "transcriptome.gtf.gz" -# ercc: "ercc.zip" -# subset_regex: "(ERCC-00002|chr1)" -# output_fasta: "$id.$key.output_fasta.gz" -# output_gtf: "$id.$key.output_gtf.gz" - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/reference/make_reference/nextflow_schema.json b/target/nextflow/reference/make_reference/nextflow_schema.json deleted file mode 100644 index b960a369176..00000000000 --- a/target/nextflow/reference/make_reference/nextflow_schema.json +++ /dev/null @@ -1,83 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "make_reference", - "description": "Preprocess and build a transcriptome reference.\n\nExample input files are:\n - `genome_fasta`: https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_41/GRCh38.primary_assembly.genome.fa.gz\n - `transcriptome_gtf`: https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_41/gencode.v41.annotation.gtf.gz\n - `ercc`: https://assets.thermofisher.com/TFS-Assets/LSG/manuals/ERCC92.zip\n", - "type": "object", - "definitions": { - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "genome_fasta": { - "type": "string", - "description": "Type: `file`, required, example: `genome_fasta.fa.gz`. Reference genome fasta", - "help_text": "Type: `file`, required, example: `genome_fasta.fa.gz`. Reference genome fasta. Example: " - }, - - "transcriptome_gtf": { - "type": "string", - "description": "Type: `file`, required, example: `transcriptome.gtf.gz`. Reference transcriptome annotation", - "help_text": "Type: `file`, required, example: `transcriptome.gtf.gz`. Reference transcriptome annotation." - }, - - "ercc": { - "type": "string", - "description": "Type: `file`, example: `ercc.zip`. ERCC sequence and annotation file", - "help_text": "Type: `file`, example: `ercc.zip`. ERCC sequence and annotation file." - }, - - "subset_regex": { - "type": "string", - "description": "Type: `string`, example: `(ERCC-00002|chr1)`. Will subset the reference chromosomes using the given regex", - "help_text": "Type: `string`, example: `(ERCC-00002|chr1)`. Will subset the reference chromosomes using the given regex." - }, - - "output_fasta": { - "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output_fasta.gz`, example: `genome_sequence.fa.gz`. Output genome sequence fasta", - "help_text": "Type: `file`, required, default: `$id.$key.output_fasta.gz`, example: `genome_sequence.fa.gz`. Output genome sequence fasta.", - "default": "$id.$key.output_fasta.gz" - }, - - "output_gtf": { - "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output_gtf.gz`, example: `transcriptome_annotation.gtf.gz`. Output transcriptome annotation gtf", - "help_text": "Type: `file`, required, default: `$id.$key.output_gtf.gz`, example: `transcriptome_annotation.gtf.gz`. Output transcriptome annotation gtf.", - "default": "$id.$key.output_gtf.gz" - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/report/mermaid/.config.vsh.yaml b/target/nextflow/report/mermaid/.config.vsh.yaml deleted file mode 100644 index 8f07f821e35..00000000000 --- a/target/nextflow/report/mermaid/.config.vsh.yaml +++ /dev/null @@ -1,185 +0,0 @@ -functionality: - name: "mermaid" - namespace: "report" - version: "0.12.3" - authors: - - name: "Dries De Maeyer" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "ddemaeyer@gmail.com" - github: "ddemaeyer" - linkedin: "dries-de-maeyer-b46a814" - organizations: - - name: "Janssen Pharmaceuticals" - href: "https://www.janssen.com" - role: "Principal Scientist" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input directory" - info: null - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Generated network as output." - info: null - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_format" - description: "Output format for the generated image. By default will be inferred\ - \ from the extension \nof the file specified with --output.\n" - info: null - required: false - choices: - - "svg" - - "png" - - "pdf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--width" - description: "Width of the page" - info: null - default: - - 800 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--height" - description: "Height of the page" - info: null - default: - - 600 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--background_color" - description: "Background color for pngs/svgs (not pdfs)" - info: null - example: - - "#F0F0F0" - default: - - "white" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "bash_script" - path: "script.sh" - is_executable: true - - type: "file" - path: "./puppeteer-config.json" - description: "Generates a network from mermaid code.\n" - test_resources: - - type: "bash_script" - path: "test.sh" - is_executable: true - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "node:20-bullseye" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "javascript" - npm: - - "@mermaid-js/mermaid-cli" - - type: "apt" - packages: - - "chromium" - interactive: false - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/report/mermaid/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/report/mermaid" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/report/mermaid/mermaid" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/report/mermaid/main.nf b/target/nextflow/report/mermaid/main.nf deleted file mode 100644 index 81597f72fb1..00000000000 --- a/target/nextflow/report/mermaid/main.nf +++ /dev/null @@ -1,2554 +0,0 @@ -// mermaid 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Dries De Maeyer (maintainer) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "mermaid", - "namespace" : "report", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Dries De Maeyer", - "roles" : [ - "maintainer" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "ddemaeyer@gmail.com", - "github" : "ddemaeyer", - "linkedin" : "dries-de-maeyer-b46a814" - }, - "organizations" : [ - { - "name" : "Janssen Pharmaceuticals", - "href" : "https://www.janssen.com", - "role" : "Principal Scientist" - } - ] - } - } - ], - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "alternatives" : [ - "-i" - ], - "description" : "Input directory", - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--output", - "alternatives" : [ - "-o" - ], - "description" : "Generated network as output.", - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_format", - "description" : "Output format for the generated image. By default will be inferred from the extension \nof the file specified with --output.\n", - "required" : false, - "choices" : [ - "svg", - "png", - "pdf" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--width", - "description" : "Width of the page", - "default" : [ - 800 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--height", - "description" : "Height of the page", - "default" : [ - 600 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--background_color", - "description" : "Background color for pngs/svgs (not pdfs)", - "example" : [ - "#F0F0F0" - ], - "default" : [ - "white" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ], - "resources" : [ - { - "type" : "bash_script", - "path" : "script.sh", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/report/mermaid/" - }, - { - "type" : "file", - "path" : "./puppeteer-config.json", - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/report/mermaid/" - } - ], - "description" : "Generates a network from mermaid code.\n", - "test_resources" : [ - { - "type" : "bash_script", - "path" : "test.sh", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/report/mermaid/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "node:20-bullseye", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "javascript", - "npm" : [ - "@mermaid-js/mermaid-cli" - ] - }, - { - "type" : "apt", - "packages" : [ - "chromium" - ], - "interactive" : false - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/report/mermaid/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/report/mermaid", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -## VIASH START -# The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT_FORMAT+x} ]; then echo "${VIASH_PAR_OUTPUT_FORMAT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_format='&'#" ; else echo "# par_output_format="; fi ) -$( if [ ! -z ${VIASH_PAR_WIDTH+x} ]; then echo "${VIASH_PAR_WIDTH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_width='&'#" ; else echo "# par_width="; fi ) -$( if [ ! -z ${VIASH_PAR_HEIGHT+x} ]; then echo "${VIASH_PAR_HEIGHT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_height='&'#" ; else echo "# par_height="; fi ) -$( if [ ! -z ${VIASH_PAR_BACKGROUND_COLOR+x} ]; then echo "${VIASH_PAR_BACKGROUND_COLOR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_background_color='&'#" ; else echo "# par_background_color="; fi ) -$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) -$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) -$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) -$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) -$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) -$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) - -## VIASH END -#!/bin/bash - -mmdc -p "\\$meta_resources_dir/puppeteer-config.json" \\\\ - -i "\\$par_input" \\\\ - -o "\\$par_output" \\\\ - --width "\\$par_width" \\\\ - --height "\\$par_height" \\\\ - \\${par_background_color:+--backgroundColor \\$par_background_color} \\\\ - \\${output_format:+--outputFormat \\$par_output_format} -VIASHMAIN -bash "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/report_mermaid", - "tag" : "0.12.0" - }, - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/report/mermaid/nextflow.config b/target/nextflow/report/mermaid/nextflow.config deleted file mode 100644 index a2788a9845e..00000000000 --- a/target/nextflow/report/mermaid/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'mermaid' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Generates a network from mermaid code.\n' - author = 'Dries De Maeyer' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/report/mermaid/nextflow_params.yaml b/target/nextflow/report/mermaid/nextflow_params.yaml deleted file mode 100644 index 6e937e2d1b0..00000000000 --- a/target/nextflow/report/mermaid/nextflow_params.yaml +++ /dev/null @@ -1,11 +0,0 @@ -# Arguments -input: # please fill in - example: "path/to/file" -# output: "$id.$key.output.output" -# output_format: "foo" -width: 800 -height: 600 -background_color: "white" - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/report/mermaid/nextflow_schema.json b/target/nextflow/report/mermaid/nextflow_schema.json deleted file mode 100644 index 9117514aa29..00000000000 --- a/target/nextflow/report/mermaid/nextflow_schema.json +++ /dev/null @@ -1,87 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "mermaid", - "description": "Generates a network from mermaid code.\n", - "type": "object", - "definitions": { - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: `file`, required. Input directory", - "help_text": "Type: `file`, required. Input directory" - }, - - "output": { - "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.output`. Generated network as output", - "help_text": "Type: `file`, required, default: `$id.$key.output.output`. Generated network as output.", - "default": "$id.$key.output.output" - }, - - "output_format": { - "type": "string", - "description": "Type: `string`, choices: ``svg`, `png`, `pdf``. Output format for the generated image", - "help_text": "Type: `string`, choices: ``svg`, `png`, `pdf``. Output format for the generated image. By default will be inferred from the extension \nof the file specified with --output.\n", - "enum": ["svg", "png", "pdf"] - - }, - - "width": { - "type": "integer", - "description": "Type: `integer`, default: `800`. Width of the page", - "help_text": "Type: `integer`, default: `800`. Width of the page", - "default": "800" - }, - - "height": { - "type": "integer", - "description": "Type: `integer`, default: `600`. Height of the page", - "help_text": "Type: `integer`, default: `600`. Height of the page", - "default": "600" - }, - - "background_color": { - "type": "string", - "description": "Type: `string`, default: `white`, example: `#F0F0F0`. Background color for pngs/svgs (not pdfs)", - "help_text": "Type: `string`, default: `white`, example: `#F0F0F0`. Background color for pngs/svgs (not pdfs)", - "default": "white" - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/report/mermaid/puppeteer-config.json b/target/nextflow/report/mermaid/puppeteer-config.json deleted file mode 100644 index 7b2851c2995..00000000000 --- a/target/nextflow/report/mermaid/puppeteer-config.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "executablePath": "/usr/bin/chromium", - "args": [ - "--no-sandbox" - ] -} \ No newline at end of file diff --git a/target/nextflow/transfer/publish/.config.vsh.yaml b/target/nextflow/transfer/publish/.config.vsh.yaml deleted file mode 100644 index 94bb9755a43..00000000000 --- a/target/nextflow/transfer/publish/.config.vsh.yaml +++ /dev/null @@ -1,125 +0,0 @@ -functionality: - name: "publish" - namespace: "transfer" - version: "0.12.3" - authors: - - name: "Toni Verbeiren" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - github: "tverbeiren" - linkedin: "verbeiren" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist and CEO" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input filename" - info: null - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output filename" - info: null - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "bash_script" - path: "script.sh" - is_executable: true - description: "Publish an artifact and optionally rename with parameters" - test_resources: - - type: "bash_script" - path: "run_test.sh" - is_executable: true - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "ubuntu:22.04" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/transfer/publish/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/transfer/publish" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/transfer/publish/publish" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/transfer/publish/main.nf b/target/nextflow/transfer/publish/main.nf deleted file mode 100644 index 354f635ef87..00000000000 --- a/target/nextflow/transfer/publish/main.nf +++ /dev/null @@ -1,2474 +0,0 @@ -// publish 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Toni Verbeiren (maintainer) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "publish", - "namespace" : "transfer", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Toni Verbeiren", - "roles" : [ - "maintainer" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "github" : "tverbeiren", - "linkedin" : "verbeiren" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Scientist and CEO" - } - ] - } - } - ], - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "alternatives" : [ - "-i" - ], - "description" : "Input filename", - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--output", - "alternatives" : [ - "-o" - ], - "description" : "Output filename", - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ], - "resources" : [ - { - "type" : "bash_script", - "path" : "script.sh", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/transfer/publish/" - } - ], - "description" : "Publish an artifact and optionally rename with parameters", - "test_resources" : [ - { - "type" : "bash_script", - "path" : "run_test.sh", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/transfer/publish/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "ubuntu:22.04", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline" - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/transfer/publish/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/transfer/publish", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -#!/bin/bash - -set -eo pipefail - -## VIASH START -# The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) -$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) -$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) -$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) -$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) -$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) -$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) - -## VIASH END - -parent=\\`dirname "\\$par_output"\\` -if [[ ! -d "\\$parent" ]]; then - mkdir -p "\\$parent" -fi - -cp -r "\\$par_input" "\\$par_output" -VIASHMAIN -bash "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/transfer_publish", - "tag" : "0.12.0" - }, - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/transfer/publish/nextflow.config b/target/nextflow/transfer/publish/nextflow.config deleted file mode 100644 index e5d69ba26da..00000000000 --- a/target/nextflow/transfer/publish/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'publish' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Publish an artifact and optionally rename with parameters' - author = 'Toni Verbeiren' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/transfer/publish/nextflow_params.yaml b/target/nextflow/transfer/publish/nextflow_params.yaml deleted file mode 100644 index ba67ce27737..00000000000 --- a/target/nextflow/transfer/publish/nextflow_params.yaml +++ /dev/null @@ -1,7 +0,0 @@ -# Arguments -input: # please fill in - example: "path/to/file" -# output: "$id.$key.output.output" - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/transfer/publish/nextflow_schema.json b/target/nextflow/transfer/publish/nextflow_schema.json deleted file mode 100644 index 697c42bed79..00000000000 --- a/target/nextflow/transfer/publish/nextflow_schema.json +++ /dev/null @@ -1,58 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "publish", - "description": "Publish an artifact and optionally rename with parameters", - "type": "object", - "definitions": { - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: `file`, required. Input filename", - "help_text": "Type: `file`, required. Input filename" - }, - - "output": { - "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.output`. Output filename", - "help_text": "Type: `file`, required, default: `$id.$key.output.output`. Output filename", - "default": "$id.$key.output.output" - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/transform/clr/.config.vsh.yaml b/target/nextflow/transform/clr/.config.vsh.yaml deleted file mode 100644 index 7d983156f1f..00000000000 --- a/target/nextflow/transform/clr/.config.vsh.yaml +++ /dev/null @@ -1,188 +0,0 @@ -functionality: - name: "clr" - namespace: "transform" - version: "0.12.3" - authors: - - name: "Dries Schaumont" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input h5mu file" - info: null - example: - - "input.h5mu" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - default: - - "prot" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output h5mu file." - info: null - default: - - "output.h5mu" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_layer" - description: "Output layer to use. By default, use X." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - description: "Perform CLR normalization on CITE-seq data (Stoeckius et al., 2017).\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "scanpy~=1.9.5" - - "muon~=0.1.5" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "lowmem" - - "midcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/transform/clr/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/transform/clr" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/transform/clr/clr" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/transform/clr/main.nf b/target/nextflow/transform/clr/main.nf deleted file mode 100644 index fbacb5d71a7..00000000000 --- a/target/nextflow/transform/clr/main.nf +++ /dev/null @@ -1,2577 +0,0 @@ -// clr 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Dries Schaumont (maintainer) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "clr", - "namespace" : "transform", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Dries Schaumont", - "roles" : [ - "maintainer" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "dries@data-intuitive.com", - "github" : "DriesSchaumont", - "orcid" : "0000-0002-4389-0440", - "linkedin" : "dries-schaumont" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Scientist" - } - ] - } - } - ], - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "alternatives" : [ - "-i" - ], - "description" : "Input h5mu file", - "example" : [ - "input.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--modality", - "default" : [ - "prot" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--output", - "alternatives" : [ - "-o" - ], - "description" : "Output h5mu file.", - "default" : [ - "output.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_compression", - "description" : "The compression format to be used on the output h5mu object.", - "example" : [ - "gzip" - ], - "required" : false, - "choices" : [ - "gzip", - "lzf" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_layer", - "description" : "Output layer to use. By default, use X.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/transform/clr/" - } - ], - "description" : "Perform CLR normalization on CITE-seq data (Stoeckius et al., 2017).\n", - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/transform/clr/" - }, - { - "type" : "file", - "path" : "resources_test/pbmc_1k_protein_v3", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "python:3.10-slim", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "procps" - ], - "interactive" : false - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "mudata~=0.2.3", - "anndata~=0.9.1", - "scanpy~=1.9.5", - "muon~=0.1.5" - ], - "upgrade" : true - } - ], - "test_setup" : [ - { - "type" : "python", - "user" : false, - "packages" : [ - "viashpy==0.5.0" - ], - "upgrade" : true - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "lowmem", - "midcpu" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/transform/clr/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/transform/clr", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -from muon import prot as pt -from mudata import read_h5mu - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_layer': $( if [ ! -z ${VIASH_PAR_OUTPUT_LAYER+x} ]; then echo "r'${VIASH_PAR_OUTPUT_LAYER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -## VIASH END - - -def main(): - input_h5mu = read_h5mu(par['input']) - modality = input_h5mu[par['modality']] - normalized_counts = pt.pp.clr(modality, inplace=False if par['output_layer'] else True) - if par['output_layer'] and not normalized_counts: - raise RuntimeError("CLR failed to return the requested output layer") - if normalized_counts: - input_h5mu[par["modality"]].layers[par['output_layer']] = normalized_counts.X - input_h5mu.write_h5mu(par['output'], compression=par["output_compression"]) - -if __name__ == "__main__": - main() -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/transform_clr", - "tag" : "0.12.0" - }, - "label" : [ - "lowmem", - "midcpu" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/transform/clr/nextflow.config b/target/nextflow/transform/clr/nextflow.config deleted file mode 100644 index 611553cd2c8..00000000000 --- a/target/nextflow/transform/clr/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'clr' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Perform CLR normalization on CITE-seq data (Stoeckius et al., 2017).\n' - author = 'Dries Schaumont' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/transform/clr/nextflow_params.yaml b/target/nextflow/transform/clr/nextflow_params.yaml deleted file mode 100644 index f8ea11d94b4..00000000000 --- a/target/nextflow/transform/clr/nextflow_params.yaml +++ /dev/null @@ -1,10 +0,0 @@ -# Arguments -input: # please fill in - example: "input.h5mu" -modality: "prot" -# output: "$id.$key.output.h5mu" -# output_compression: "gzip" -# output_layer: "foo" - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/transform/clr/nextflow_schema.json b/target/nextflow/transform/clr/nextflow_schema.json deleted file mode 100644 index 68adcf8557d..00000000000 --- a/target/nextflow/transform/clr/nextflow_schema.json +++ /dev/null @@ -1,79 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "clr", - "description": "Perform CLR normalization on CITE-seq data (Stoeckius et al., 2017).\n", - "type": "object", - "definitions": { - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: `file`, required, example: `input.h5mu`. Input h5mu file", - "help_text": "Type: `file`, required, example: `input.h5mu`. Input h5mu file" - }, - - "modality": { - "type": "string", - "description": "Type: `string`, default: `prot`. ", - "help_text": "Type: `string`, default: `prot`. ", - "default": "prot" - }, - - "output": { - "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.h5mu`. Output h5mu file", - "help_text": "Type: `file`, required, default: `$id.$key.output.h5mu`. Output h5mu file.", - "default": "$id.$key.output.h5mu" - }, - - "output_compression": { - "type": "string", - "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", - "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", - "enum": ["gzip", "lzf"] - - }, - - "output_layer": { - "type": "string", - "description": "Type: `string`. Output layer to use", - "help_text": "Type: `string`. Output layer to use. By default, use X." - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/transform/delete_layer/.config.vsh.yaml b/target/nextflow/transform/delete_layer/.config.vsh.yaml deleted file mode 100644 index c637b0deb48..00000000000 --- a/target/nextflow/transform/delete_layer/.config.vsh.yaml +++ /dev/null @@ -1,196 +0,0 @@ -functionality: - name: "delete_layer" - namespace: "transform" - version: "0.12.3" - authors: - - name: "Dries Schaumont" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input h5mu file" - info: null - example: - - "input.h5mu" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--layer" - description: "Input layer to remove" - info: null - required: true - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output h5mu file." - info: null - default: - - "output.h5mu" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--missing_ok" - description: "Do not raise an error if the layer does not exist for all modalities." - info: null - direction: "input" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/compress_h5mu.py" - - type: "file" - path: "src/utils/setup_logger.py" - description: "Delete an anndata layer from one or more modalities.\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.9-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "midmem" - - "singlecpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/transform/delete_layer/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/transform/delete_layer" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/transform/delete_layer/delete_layer" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/transform/delete_layer/compress_h5mu.py b/target/nextflow/transform/delete_layer/compress_h5mu.py deleted file mode 100644 index 9d92395a573..00000000000 --- a/target/nextflow/transform/delete_layer/compress_h5mu.py +++ /dev/null @@ -1,49 +0,0 @@ -from h5py import File as H5File -from h5py import Group, Dataset -from pathlib import Path -from typing import Union, Literal -from functools import partial - - -def compress_h5mu(input_path: Union[str, Path], - output_path: Union[str, Path], - compression: Union[Literal['gzip'], Literal['lzf']]): - input_path, output_path = str(input_path), str(output_path) - - def copy_attributes(in_object, out_object): - for key, value in in_object.attrs.items(): - out_object.attrs[key] = value - - def visit_path(output_h5: H5File, - compression: Union[Literal['gzip'], Literal['lzf']], - name: str, object: Union[Group, Dataset]): - if isinstance(object, Group): - new_group = output_h5.create_group(name) - copy_attributes(object, new_group) - elif isinstance(object, Dataset): - # Compression only works for non-scalar Dataset objects - # Scalar objects dont have a shape defined - if not object.compression and object.shape not in [None, ()]: - new_dataset = output_h5.create_dataset(name, data=object, compression=compression) - copy_attributes(object, new_dataset) - else: - output_h5.copy(object, name) - else: - raise NotImplementedError(f"Could not copy element {name}, " - f"type has not been implemented yet: {type(object)}") - - with H5File(input_path, 'r') as input_h5, H5File(output_path, 'w', userblock_size=512) as output_h5: - copy_attributes(input_h5, output_h5) - input_h5.visititems(partial(visit_path, output_h5, compression)) - - with open(input_path, "rb") as input_bytes: - # Mudata puts metadata like this in the first 512 bytes: - # MuData (format-version=0.1.0;creator=muon;creator-version=0.2.0) - # See mudata/_core/io.py, read_h5mu() function - starting_metadata = input_bytes.read(100) - # The metadata is padded with extra null bytes up until 512 bytes - truncate_location = starting_metadata.find(b"\x00") - starting_metadata = starting_metadata[:truncate_location] - with open(output_path, "br+") as f: - nbytes = f.write(starting_metadata) - f.write(b"\0" * (512 - nbytes)) diff --git a/target/nextflow/transform/delete_layer/main.nf b/target/nextflow/transform/delete_layer/main.nf deleted file mode 100644 index bdd4cbb2e5b..00000000000 --- a/target/nextflow/transform/delete_layer/main.nf +++ /dev/null @@ -1,2681 +0,0 @@ -// delete_layer 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Dries Schaumont (maintainer) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "delete_layer", - "namespace" : "transform", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Dries Schaumont", - "roles" : [ - "maintainer" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "dries@data-intuitive.com", - "github" : "DriesSchaumont", - "orcid" : "0000-0002-4389-0440", - "linkedin" : "dries-schaumont" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Scientist" - } - ] - } - } - ], - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "alternatives" : [ - "-i" - ], - "description" : "Input h5mu file", - "example" : [ - "input.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--modality", - "default" : [ - "rna" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--layer", - "description" : "Input layer to remove", - "required" : true, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--output", - "alternatives" : [ - "-o" - ], - "description" : "Output h5mu file.", - "default" : [ - "output.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_compression", - "description" : "The compression format to be used on the output h5mu object.", - "example" : [ - "gzip" - ], - "required" : false, - "choices" : [ - "gzip", - "lzf" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "boolean_true", - "name" : "--missing_ok", - "description" : "Do not raise an error if the layer does not exist for all modalities.", - "direction" : "input", - "dest" : "par" - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/transform/delete_layer/" - }, - { - "type" : "file", - "path" : "src/utils/compress_h5mu.py", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - }, - { - "type" : "file", - "path" : "src/utils/setup_logger.py", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "description" : "Delete an anndata layer from one or more modalities.\n", - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/transform/delete_layer/" - }, - { - "type" : "file", - "path" : "resources_test/pbmc_1k_protein_v3", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "python:3.9-slim", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "procps" - ], - "interactive" : false - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "mudata~=0.2.3", - "anndata~=0.9.1" - ], - "upgrade" : true - } - ], - "test_setup" : [ - { - "type" : "python", - "user" : false, - "packages" : [ - "viashpy==0.5.0" - ], - "upgrade" : true - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "midmem", - "singlecpu" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/transform/delete_layer/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/transform/delete_layer", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -import sys -from mudata import read_h5ad, write_h5ad -import shutil -from pathlib import Path - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'layer': $( if [ ! -z ${VIASH_PAR_LAYER+x} ]; then echo "r'${VIASH_PAR_LAYER//\\'/\\'\\"\\'\\"r\\'}'.split(':')"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'missing_ok': $( if [ ! -z ${VIASH_PAR_MISSING_OK+x} ]; then echo "r'${VIASH_PAR_MISSING_OK//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -## VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -# START TEMPORARY WORKAROUND compress_h5mu -# reason: resources aren't available when using Nextflow fusion -# from compress_h5mu import compress_h5mu -from h5py import File as H5File -from h5py import Group, Dataset -from pathlib import Path -from typing import Union, Literal -from functools import partial - - -def compress_h5mu(input_path: Union[str, Path], - output_path: Union[str, Path], - compression: Union[Literal['gzip'], Literal['lzf']]): - input_path, output_path = str(input_path), str(output_path) - - def copy_attributes(in_object, out_object): - for key, value in in_object.attrs.items(): - out_object.attrs[key] = value - - def visit_path(output_h5: H5File, - compression: Union[Literal['gzip'], Literal['lzf']], - name: str, object: Union[Group, Dataset]): - if isinstance(object, Group): - new_group = output_h5.create_group(name) - copy_attributes(object, new_group) - elif isinstance(object, Dataset): - # Compression only works for non-scalar Dataset objects - # Scalar objects dont have a shape defined - if not object.compression and object.shape not in [None, ()]: - new_dataset = output_h5.create_dataset(name, data=object, compression=compression) - copy_attributes(object, new_dataset) - else: - output_h5.copy(object, name) - else: - raise NotImplementedError(f"Could not copy element {name}, " - f"type has not been implemented yet: {type(object)}") - - with H5File(input_path, 'r') as input_h5, H5File(output_path, 'w', userblock_size=512) as output_h5: - copy_attributes(input_h5, output_h5) - input_h5.visititems(partial(visit_path, output_h5, compression)) - - with open(input_path, "rb") as input_bytes: - # Mudata puts metadata like this in the first 512 bytes: - # MuData (format-version=0.1.0;creator=muon;creator-version=0.2.0) - # See mudata/_core/io.py, read_h5mu() function - starting_metadata = input_bytes.read(100) - # The metadata is padded with extra null bytes up until 512 bytes - truncate_location = starting_metadata.find(b"\\\\x00") - starting_metadata = starting_metadata[:truncate_location] - with open(output_path, "br+") as f: - nbytes = f.write(starting_metadata) - f.write(b"\\\\0" * (512 - nbytes)) -# END TEMPORARY WORKAROUND compress_h5mu - -def main(): - input_file, output_file, mod_name = Path(par["input"]), Path(par["output"]), par['modality'] - - logger.info('Reading input file %s, modality %s.', input_file, mod_name) - mod = read_h5ad(input_file, mod=mod_name) - for layer in par['layer']: - if layer not in mod.layers: - if par['missing_ok']: - continue - raise ValueError(f"Layer '{layer}' is not present in modality {mod_name}.") - logger.info('Deleting layer %s from modality %s.', layer, mod_name) - del mod.layers[layer] - - logger.info('Writing output to %s.', par['output']) - output_file_uncompressed = output_file.with_name(output_file.stem + "_uncompressed.h5mu") \\\\ - if par["output_compression"] else output_file - shutil.copyfile(par['input'], output_file_uncompressed) - write_h5ad(filename=output_file_uncompressed, mod=mod_name, data=mod) - if par["output_compression"]: - compress_h5mu(output_file_uncompressed, output_file, compression=par["output_compression"]) - output_file_uncompressed.unlink() - - logger.info('Finished.') - -if __name__ == "__main__": - main() -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/transform_delete_layer", - "tag" : "0.12.0" - }, - "label" : [ - "midmem", - "singlecpu" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/transform/delete_layer/nextflow.config b/target/nextflow/transform/delete_layer/nextflow.config deleted file mode 100644 index 340a7bf206e..00000000000 --- a/target/nextflow/transform/delete_layer/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'delete_layer' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Delete an anndata layer from one or more modalities.\n' - author = 'Dries Schaumont' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/transform/delete_layer/nextflow_params.yaml b/target/nextflow/transform/delete_layer/nextflow_params.yaml deleted file mode 100644 index b3cca00ac16..00000000000 --- a/target/nextflow/transform/delete_layer/nextflow_params.yaml +++ /dev/null @@ -1,11 +0,0 @@ -# Arguments -input: # please fill in - example: "input.h5mu" -modality: "rna" -layer: # please fill in - example: ["foo"] -# output: "$id.$key.output.h5mu" -# output_compression: "gzip" -missing_ok: false - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/transform/delete_layer/nextflow_schema.json b/target/nextflow/transform/delete_layer/nextflow_schema.json deleted file mode 100644 index 222b694010d..00000000000 --- a/target/nextflow/transform/delete_layer/nextflow_schema.json +++ /dev/null @@ -1,86 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "delete_layer", - "description": "Delete an anndata layer from one or more modalities.\n", - "type": "object", - "definitions": { - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: `file`, required, example: `input.h5mu`. Input h5mu file", - "help_text": "Type: `file`, required, example: `input.h5mu`. Input h5mu file" - }, - - "modality": { - "type": "string", - "description": "Type: `string`, default: `rna`. ", - "help_text": "Type: `string`, default: `rna`. ", - "default": "rna" - }, - - "layer": { - "type": "string", - "description": "Type: List of `string`, required, multiple_sep: `\":\"`. Input layer to remove", - "help_text": "Type: List of `string`, required, multiple_sep: `\":\"`. Input layer to remove" - }, - - "output": { - "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.h5mu`. Output h5mu file", - "help_text": "Type: `file`, required, default: `$id.$key.output.h5mu`. Output h5mu file.", - "default": "$id.$key.output.h5mu" - }, - - "output_compression": { - "type": "string", - "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", - "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", - "enum": ["gzip", "lzf"] - - }, - - "missing_ok": { - "type": "boolean", - "description": "Type: `boolean_true`, default: `false`. Do not raise an error if the layer does not exist for all modalities", - "help_text": "Type: `boolean_true`, default: `false`. Do not raise an error if the layer does not exist for all modalities.", - "default": "False" - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/transform/delete_layer/setup_logger.py b/target/nextflow/transform/delete_layer/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/nextflow/transform/delete_layer/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/nextflow/transform/log1p/.config.vsh.yaml b/target/nextflow/transform/log1p/.config.vsh.yaml deleted file mode 100644 index 6dfef8173a4..00000000000 --- a/target/nextflow/transform/log1p/.config.vsh.yaml +++ /dev/null @@ -1,225 +0,0 @@ -functionality: - name: "log1p" - namespace: "transform" - version: "0.12.3" - authors: - - name: "Dries De Maeyer" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "ddemaeyer@gmail.com" - github: "ddemaeyer" - linkedin: "dries-de-maeyer-b46a814" - organizations: - - name: "Janssen Pharmaceuticals" - href: "https://www.janssen.com" - role: "Principal Scientist" - - name: "Robrecht Cannoodt" - roles: - - "contributor" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input h5mu file" - info: null - example: - - "input.h5mu" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--input_layer" - description: "Input layer to use. If None, X is normalized" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_layer" - description: "Output layer to use. By default, use X." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output h5mu file." - info: null - default: - - "output.h5mu" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--base" - info: null - example: - - 2.0 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Logarithmize the data matrix. Computes X = log(X + 1), where log denotes\ - \ the natural logarithm unless a different base is given.\n" - test_resources: - - type: "python_script" - path: "run_test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.9-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "scanpy~=1.9.5" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "midmem" - - "lowcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/transform/log1p/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/transform/log1p" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/transform/log1p/log1p" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/transform/log1p/main.nf b/target/nextflow/transform/log1p/main.nf deleted file mode 100644 index 018e33275f8..00000000000 --- a/target/nextflow/transform/log1p/main.nf +++ /dev/null @@ -1,2655 +0,0 @@ -// log1p 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Dries De Maeyer (maintainer) -// * Robrecht Cannoodt (contributor) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "log1p", - "namespace" : "transform", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Dries De Maeyer", - "roles" : [ - "maintainer" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "ddemaeyer@gmail.com", - "github" : "ddemaeyer", - "linkedin" : "dries-de-maeyer-b46a814" - }, - "organizations" : [ - { - "name" : "Janssen Pharmaceuticals", - "href" : "https://www.janssen.com", - "role" : "Principal Scientist" - } - ] - } - }, - { - "name" : "Robrecht Cannoodt", - "roles" : [ - "contributor" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "robrecht@data-intuitive.com", - "github" : "rcannood", - "orcid" : "0000-0003-3641-729X", - "linkedin" : "robrechtcannoodt" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Science Engineer" - }, - { - "name" : "Open Problems", - "href" : "https://openproblems.bio", - "role" : "Core Member" - } - ] - } - } - ], - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "alternatives" : [ - "-i" - ], - "description" : "Input h5mu file", - "example" : [ - "input.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--modality", - "default" : [ - "rna" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--input_layer", - "description" : "Input layer to use. If None, X is normalized", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_layer", - "description" : "Output layer to use. By default, use X.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--output", - "alternatives" : [ - "-o" - ], - "description" : "Output h5mu file.", - "default" : [ - "output.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_compression", - "description" : "The compression format to be used on the output h5mu object.", - "example" : [ - "gzip" - ], - "required" : false, - "choices" : [ - "gzip", - "lzf" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--base", - "example" : [ - 2.0 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/transform/log1p/" - }, - { - "type" : "file", - "path" : "src/utils/setup_logger.py", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "description" : "Logarithmize the data matrix. Computes X = log(X + 1), where log denotes the natural logarithm unless a different base is given.\n", - "test_resources" : [ - { - "type" : "python_script", - "path" : "run_test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/transform/log1p/" - }, - { - "type" : "file", - "path" : "resources_test/pbmc_1k_protein_v3", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "python:3.9-slim", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "procps" - ], - "interactive" : false - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "mudata~=0.2.3", - "anndata~=0.9.1", - "scanpy~=1.9.5" - ], - "upgrade" : true - } - ], - "test_setup" : [ - { - "type" : "python", - "user" : false, - "packages" : [ - "viashpy==0.5.0" - ], - "upgrade" : true - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "midmem", - "lowcpu" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/transform/log1p/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/transform/log1p", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -import scanpy as sc -import mudata as mu -import sys - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'input_layer': $( if [ ! -z ${VIASH_PAR_INPUT_LAYER+x} ]; then echo "r'${VIASH_PAR_INPUT_LAYER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_layer': $( if [ ! -z ${VIASH_PAR_OUTPUT_LAYER+x} ]; then echo "r'${VIASH_PAR_OUTPUT_LAYER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'base': $( if [ ! -z ${VIASH_PAR_BASE+x} ]; then echo "float(r'${VIASH_PAR_BASE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -## VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -logger.info("Reading input mudata") -mdata = mu.read_h5mu(par["input"]) -mdata.var_names_make_unique() - -mod = par["modality"] -logger.info("Performing log transformation on modality %s", mod) -data = mdata.mod[mod] -new_layer = sc.pp.log1p(data, - base=par["base"], - copy=True if par['output_layer'] else False) -if new_layer: - data.layers[par['output_layer']] = new_layer.X - data.uns['log1p'] = new_layer.uns['log1p'] - -logger.info("Writing to file %s", par["output"]) -mdata.write_h5mu(filename=par["output"], compression=par["output_compression"]) -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/transform_log1p", - "tag" : "0.12.0" - }, - "label" : [ - "midmem", - "lowcpu" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/transform/log1p/nextflow.config b/target/nextflow/transform/log1p/nextflow.config deleted file mode 100644 index 5993fe87207..00000000000 --- a/target/nextflow/transform/log1p/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'log1p' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Logarithmize the data matrix. Computes X = log(X + 1), where log denotes the natural logarithm unless a different base is given.\n' - author = 'Dries De Maeyer, Robrecht Cannoodt' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/transform/log1p/nextflow_params.yaml b/target/nextflow/transform/log1p/nextflow_params.yaml deleted file mode 100644 index 6a8622d8570..00000000000 --- a/target/nextflow/transform/log1p/nextflow_params.yaml +++ /dev/null @@ -1,12 +0,0 @@ -# Arguments -input: # please fill in - example: "input.h5mu" -modality: "rna" -# input_layer: "foo" -# output_layer: "foo" -# output: "$id.$key.output.h5mu" -# output_compression: "gzip" -# base: 2 - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/transform/log1p/nextflow_schema.json b/target/nextflow/transform/log1p/nextflow_schema.json deleted file mode 100644 index 0e2fde9b00d..00000000000 --- a/target/nextflow/transform/log1p/nextflow_schema.json +++ /dev/null @@ -1,91 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "log1p", - "description": "Logarithmize the data matrix. Computes X = log(X + 1), where log denotes the natural logarithm unless a different base is given.\n", - "type": "object", - "definitions": { - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: `file`, required, example: `input.h5mu`. Input h5mu file", - "help_text": "Type: `file`, required, example: `input.h5mu`. Input h5mu file" - }, - - "modality": { - "type": "string", - "description": "Type: `string`, default: `rna`. ", - "help_text": "Type: `string`, default: `rna`. ", - "default": "rna" - }, - - "input_layer": { - "type": "string", - "description": "Type: `string`. Input layer to use", - "help_text": "Type: `string`. Input layer to use. If None, X is normalized" - }, - - "output_layer": { - "type": "string", - "description": "Type: `string`. Output layer to use", - "help_text": "Type: `string`. Output layer to use. By default, use X." - }, - - "output": { - "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.h5mu`. Output h5mu file", - "help_text": "Type: `file`, required, default: `$id.$key.output.h5mu`. Output h5mu file.", - "default": "$id.$key.output.h5mu" - }, - - "output_compression": { - "type": "string", - "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", - "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", - "enum": ["gzip", "lzf"] - - }, - - "base": { - "type": "number", - "description": "Type: `double`, example: `2`. ", - "help_text": "Type: `double`, example: `2`. " - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/transform/log1p/setup_logger.py b/target/nextflow/transform/log1p/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/nextflow/transform/log1p/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/nextflow/transform/normalize_total/.config.vsh.yaml b/target/nextflow/transform/normalize_total/.config.vsh.yaml deleted file mode 100644 index f23f99b8927..00000000000 --- a/target/nextflow/transform/normalize_total/.config.vsh.yaml +++ /dev/null @@ -1,242 +0,0 @@ -functionality: - name: "normalize_total" - namespace: "transform" - version: "0.12.3" - authors: - - name: "Dries De Maeyer" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "ddemaeyer@gmail.com" - github: "ddemaeyer" - linkedin: "dries-de-maeyer-b46a814" - organizations: - - name: "Janssen Pharmaceuticals" - href: "https://www.janssen.com" - role: "Principal Scientist" - - name: "Robrecht Cannoodt" - roles: - - "contributor" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input h5mu file" - info: null - example: - - "input.h5mu" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--input_layer" - description: "Input layer to use. By default, X is normalized" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output h5mu file." - info: null - default: - - "output.h5mu" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_layer" - description: "Output layer to use. By default, use X." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--target_sum" - description: "If None, after normalization, each observation (cell) has a total\ - \ count equal to the median of total counts for observations (cells) before\ - \ normalization." - info: null - default: - - 10000 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--exclude_highly_expressed" - description: "Exclude (very) highly expressed genes for the computation of the\ - \ normalization factor (size factor) for each cell. A gene is considered highly\ - \ expressed, if it has more than max_fraction of the total counts in at least\ - \ one cell. The not-excluded genes will sum up to target_sum." - info: null - direction: "input" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Normalize counts per cell.\n\nNormalize each cell by total counts\ - \ over all genes, so that every cell has the same total count after normalization.\ - \ If choosing target_sum=1e6, this is CPM normalization.\n\nIf exclude_highly_expressed=True,\ - \ very highly expressed genes are excluded from the computation of the normalization\ - \ factor (size factor) for each cell. This is meaningful as these can strongly\ - \ influence the resulting normalized values for all other genes [Weinreb17].\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim-bullseye" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "libhdf5-dev" - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "scanpy~=1.9.5" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "midmem" - - "lowcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/transform/normalize_total/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/transform/normalize_total" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/transform/normalize_total/normalize_total" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/transform/normalize_total/main.nf b/target/nextflow/transform/normalize_total/main.nf deleted file mode 100644 index 3b2ce549a7b..00000000000 --- a/target/nextflow/transform/normalize_total/main.nf +++ /dev/null @@ -1,2669 +0,0 @@ -// normalize_total 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Dries De Maeyer (maintainer) -// * Robrecht Cannoodt (contributor) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "normalize_total", - "namespace" : "transform", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Dries De Maeyer", - "roles" : [ - "maintainer" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "ddemaeyer@gmail.com", - "github" : "ddemaeyer", - "linkedin" : "dries-de-maeyer-b46a814" - }, - "organizations" : [ - { - "name" : "Janssen Pharmaceuticals", - "href" : "https://www.janssen.com", - "role" : "Principal Scientist" - } - ] - } - }, - { - "name" : "Robrecht Cannoodt", - "roles" : [ - "contributor" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "robrecht@data-intuitive.com", - "github" : "rcannood", - "orcid" : "0000-0003-3641-729X", - "linkedin" : "robrechtcannoodt" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Science Engineer" - }, - { - "name" : "Open Problems", - "href" : "https://openproblems.bio", - "role" : "Core Member" - } - ] - } - } - ], - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "alternatives" : [ - "-i" - ], - "description" : "Input h5mu file", - "example" : [ - "input.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--modality", - "default" : [ - "rna" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--input_layer", - "description" : "Input layer to use. By default, X is normalized", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--output", - "alternatives" : [ - "-o" - ], - "description" : "Output h5mu file.", - "default" : [ - "output.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_compression", - "description" : "The compression format to be used on the output h5mu object.", - "example" : [ - "gzip" - ], - "required" : false, - "choices" : [ - "gzip", - "lzf" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_layer", - "description" : "Output layer to use. By default, use X.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--target_sum", - "description" : "If None, after normalization, each observation (cell) has a total count equal to the median of total counts for observations (cells) before normalization.", - "default" : [ - 10000 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "boolean_true", - "name" : "--exclude_highly_expressed", - "description" : "Exclude (very) highly expressed genes for the computation of the normalization factor (size factor) for each cell. A gene is considered highly expressed, if it has more than max_fraction of the total counts in at least one cell. The not-excluded genes will sum up to target_sum.", - "direction" : "input", - "dest" : "par" - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/transform/normalize_total/" - }, - { - "type" : "file", - "path" : "src/utils/setup_logger.py", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "description" : "Normalize counts per cell.\n\nNormalize each cell by total counts over all genes, so that every cell has the same total count after normalization. If choosing target_sum=1e6, this is CPM normalization.\n\nIf exclude_highly_expressed=True, very highly expressed genes are excluded from the computation of the normalization factor (size factor) for each cell. This is meaningful as these can strongly influence the resulting normalized values for all other genes [Weinreb17].\n", - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/transform/normalize_total/" - }, - { - "type" : "file", - "path" : "resources_test/pbmc_1k_protein_v3", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "python:3.10-slim-bullseye", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "libhdf5-dev", - "procps" - ], - "interactive" : false - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "mudata~=0.2.3", - "anndata~=0.9.1", - "scanpy~=1.9.5" - ], - "upgrade" : true - } - ], - "test_setup" : [ - { - "type" : "python", - "user" : false, - "packages" : [ - "viashpy==0.5.0" - ], - "upgrade" : true - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "midmem", - "lowcpu" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/transform/normalize_total/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/transform/normalize_total", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -import sys -import scanpy as sc -import mudata as mu - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'input_layer': $( if [ ! -z ${VIASH_PAR_INPUT_LAYER+x} ]; then echo "r'${VIASH_PAR_INPUT_LAYER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_layer': $( if [ ! -z ${VIASH_PAR_OUTPUT_LAYER+x} ]; then echo "r'${VIASH_PAR_OUTPUT_LAYER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'target_sum': $( if [ ! -z ${VIASH_PAR_TARGET_SUM+x} ]; then echo "int(r'${VIASH_PAR_TARGET_SUM//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'exclude_highly_expressed': $( if [ ! -z ${VIASH_PAR_EXCLUDE_HIGHLY_EXPRESSED+x} ]; then echo "r'${VIASH_PAR_EXCLUDE_HIGHLY_EXPRESSED//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -## VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -logger.info("Reading input mudata") -mdata = mu.read_h5mu(par["input"]) -mdata.var_names_make_unique() - -logger.info(par) - -mod = par["modality"] -logger.info("Performing total normalization on modality %s", mod) -dat = mdata.mod[mod] -if par['input_layer'] and not par['input_layer'] in dat.layers.keys(): - raise ValueError(f"Input layer {par['input_layer']} not found in {mod}") -output_data = sc.pp.normalize_total(dat, - layer=par["input_layer"], - copy=True if par["output_layer"] else False) - -if output_data: - dat.layers[par["output_layer"]] = output_data.X - -logger.info("Writing to file") -mdata.write_h5mu(filename=par["output"], compression=par["output_compression"]) -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/transform_normalize_total", - "tag" : "0.12.0" - }, - "label" : [ - "midmem", - "lowcpu" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/transform/normalize_total/nextflow.config b/target/nextflow/transform/normalize_total/nextflow.config deleted file mode 100644 index b6d6189bb28..00000000000 --- a/target/nextflow/transform/normalize_total/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'normalize_total' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Normalize counts per cell.\n\nNormalize each cell by total counts over all genes, so that every cell has the same total count after normalization. If choosing target_sum=1e6, this is CPM normalization.\n\nIf exclude_highly_expressed=True, very highly expressed genes are excluded from the computation of the normalization factor (size factor) for each cell. This is meaningful as these can strongly influence the resulting normalized values for all other genes [Weinreb17].\n' - author = 'Dries De Maeyer, Robrecht Cannoodt' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/transform/normalize_total/nextflow_params.yaml b/target/nextflow/transform/normalize_total/nextflow_params.yaml deleted file mode 100644 index fbd8c75f1ec..00000000000 --- a/target/nextflow/transform/normalize_total/nextflow_params.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# Arguments -input: # please fill in - example: "input.h5mu" -modality: "rna" -# input_layer: "foo" -# output: "$id.$key.output.h5mu" -# output_compression: "gzip" -# output_layer: "foo" -target_sum: 10000 -exclude_highly_expressed: false - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/transform/normalize_total/nextflow_schema.json b/target/nextflow/transform/normalize_total/nextflow_schema.json deleted file mode 100644 index bce523cd7f3..00000000000 --- a/target/nextflow/transform/normalize_total/nextflow_schema.json +++ /dev/null @@ -1,99 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "normalize_total", - "description": "Normalize counts per cell.\n\nNormalize each cell by total counts over all genes, so that every cell has the same total count after normalization. If choosing target_sum=1e6, this is CPM normalization.\n\nIf exclude_highly_expressed=True, very highly expressed genes are excluded from the computation of the normalization factor (size factor) for each cell. This is meaningful as these can strongly influence the resulting normalized values for all other genes [Weinreb17].\n", - "type": "object", - "definitions": { - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: `file`, required, example: `input.h5mu`. Input h5mu file", - "help_text": "Type: `file`, required, example: `input.h5mu`. Input h5mu file" - }, - - "modality": { - "type": "string", - "description": "Type: `string`, default: `rna`. ", - "help_text": "Type: `string`, default: `rna`. ", - "default": "rna" - }, - - "input_layer": { - "type": "string", - "description": "Type: `string`. Input layer to use", - "help_text": "Type: `string`. Input layer to use. By default, X is normalized" - }, - - "output": { - "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.h5mu`. Output h5mu file", - "help_text": "Type: `file`, required, default: `$id.$key.output.h5mu`. Output h5mu file.", - "default": "$id.$key.output.h5mu" - }, - - "output_compression": { - "type": "string", - "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", - "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", - "enum": ["gzip", "lzf"] - - }, - - "output_layer": { - "type": "string", - "description": "Type: `string`. Output layer to use", - "help_text": "Type: `string`. Output layer to use. By default, use X." - }, - - "target_sum": { - "type": "integer", - "description": "Type: `integer`, default: `10000`. If None, after normalization, each observation (cell) has a total count equal to the median of total counts for observations (cells) before normalization", - "help_text": "Type: `integer`, default: `10000`. If None, after normalization, each observation (cell) has a total count equal to the median of total counts for observations (cells) before normalization.", - "default": "10000" - }, - - "exclude_highly_expressed": { - "type": "boolean", - "description": "Type: `boolean_true`, default: `false`. Exclude (very) highly expressed genes for the computation of the normalization factor (size factor) for each cell", - "help_text": "Type: `boolean_true`, default: `false`. Exclude (very) highly expressed genes for the computation of the normalization factor (size factor) for each cell. A gene is considered highly expressed, if it has more than max_fraction of the total counts in at least one cell. The not-excluded genes will sum up to target_sum.", - "default": "False" - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/transform/normalize_total/setup_logger.py b/target/nextflow/transform/normalize_total/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/nextflow/transform/normalize_total/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/nextflow/transform/regress_out/.config.vsh.yaml b/target/nextflow/transform/regress_out/.config.vsh.yaml deleted file mode 100644 index 9bffc20ea66..00000000000 --- a/target/nextflow/transform/regress_out/.config.vsh.yaml +++ /dev/null @@ -1,195 +0,0 @@ -functionality: - name: "regress_out" - namespace: "transform" - version: "0.12.3" - authors: - - name: "Robrecht Cannoodt" - roles: - - "maintainer" - - "contributor" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - arguments: - - type: "file" - name: "--input" - description: "Input h5mu file" - info: null - example: - - "input.h5mu" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output h5mu file." - info: null - default: - - "output.h5mu" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - description: "Which modality (one or more) to run this component on." - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--obs_keys" - description: "Which .obs keys to regress on." - info: null - required: false - direction: "input" - multiple: true - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Regress out (mostly) unwanted sources of variation.\nUses simple linear\ - \ regression. This is inspired by Seurat's regressOut function in R [Satija15].\ - \ \nNote that this function tends to overcorrect in certain circumstances as described\ - \ in issue theislab/scanpy#526.\nSee https://github.com/theislab/scanpy/issues/526.\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "scanpy~=1.9.5" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "lowmem" - - "lowcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/transform/regress_out/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/transform/regress_out" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/transform/regress_out/regress_out" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/transform/regress_out/main.nf b/target/nextflow/transform/regress_out/main.nf deleted file mode 100644 index 53de04a888f..00000000000 --- a/target/nextflow/transform/regress_out/main.nf +++ /dev/null @@ -1,2613 +0,0 @@ -// regress_out 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Robrecht Cannoodt (maintainer, contributor) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "regress_out", - "namespace" : "transform", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Robrecht Cannoodt", - "roles" : [ - "maintainer", - "contributor" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "robrecht@data-intuitive.com", - "github" : "rcannood", - "orcid" : "0000-0003-3641-729X", - "linkedin" : "robrechtcannoodt" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Science Engineer" - }, - { - "name" : "Open Problems", - "href" : "https://openproblems.bio", - "role" : "Core Member" - } - ] - } - } - ], - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "description" : "Input h5mu file", - "example" : [ - "input.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--output", - "alternatives" : [ - "-o" - ], - "description" : "Output h5mu file.", - "default" : [ - "output.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_compression", - "description" : "The compression format to be used on the output h5mu object.", - "example" : [ - "gzip" - ], - "required" : false, - "choices" : [ - "gzip", - "lzf" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--modality", - "description" : "Which modality (one or more) to run this component on.", - "default" : [ - "rna" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--obs_keys", - "description" : "Which .obs keys to regress on.", - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ":", - "dest" : "par" - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/transform/regress_out/" - }, - { - "type" : "file", - "path" : "src/utils/setup_logger.py", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "description" : "Regress out (mostly) unwanted sources of variation.\nUses simple linear regression. This is inspired by Seurat's regressOut function in R [Satija15]. \nNote that this function tends to overcorrect in certain circumstances as described in issue theislab/scanpy#526.\nSee https://github.com/theislab/scanpy/issues/526.\n", - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/transform/regress_out/" - }, - { - "type" : "file", - "path" : "resources_test/pbmc_1k_protein_v3", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "python:3.10-slim", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "procps" - ], - "interactive" : false - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "mudata~=0.2.3", - "anndata~=0.9.1", - "scanpy~=1.9.5" - ], - "upgrade" : true - } - ], - "test_setup" : [ - { - "type" : "python", - "user" : false, - "packages" : [ - "viashpy==0.5.0" - ], - "upgrade" : true - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "lowmem", - "lowcpu" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/transform/regress_out/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/transform/regress_out", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -import scanpy as sc -import mudata as mu -import multiprocessing -import sys - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'obs_keys': $( if [ ! -z ${VIASH_PAR_OBS_KEYS+x} ]; then echo "r'${VIASH_PAR_OBS_KEYS//\\'/\\'\\"\\'\\"r\\'}'.split(':')"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -## VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -logger.info("Reading input mudata") -mdata = mu.read_h5mu(par["input"]) -mdata.var_names_make_unique() - -if ( - par["obs_keys"] is not None - and len(par["obs_keys"]) > 0 -): - mod = par["modality"] - logger.info("Regress out variables on modality %s", mod) - data = mdata.mod[mod] - - sc.pp.regress_out( - data, - keys=par["obs_keys"], - n_jobs=multiprocessing.cpu_count() - 1 - ) - -logger.info("Writing to file") -mdata.write_h5mu(filename=par["output"], compression=par["output_compression"]) -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/transform_regress_out", - "tag" : "0.12.0" - }, - "label" : [ - "lowmem", - "lowcpu" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/transform/regress_out/nextflow.config b/target/nextflow/transform/regress_out/nextflow.config deleted file mode 100644 index 36073fc822e..00000000000 --- a/target/nextflow/transform/regress_out/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'regress_out' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Regress out (mostly) unwanted sources of variation.\nUses simple linear regression. This is inspired by Seurat\'s regressOut function in R [Satija15]. \nNote that this function tends to overcorrect in certain circumstances as described in issue theislab/scanpy#526.\nSee https://github.com/theislab/scanpy/issues/526.\n' - author = 'Robrecht Cannoodt' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/transform/regress_out/nextflow_params.yaml b/target/nextflow/transform/regress_out/nextflow_params.yaml deleted file mode 100644 index 0988f9dcdf5..00000000000 --- a/target/nextflow/transform/regress_out/nextflow_params.yaml +++ /dev/null @@ -1,10 +0,0 @@ -# Arguments -input: # please fill in - example: "input.h5mu" -# output: "$id.$key.output.h5mu" -# output_compression: "gzip" -modality: "rna" -# obs_keys: ["foo"] - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/transform/regress_out/nextflow_schema.json b/target/nextflow/transform/regress_out/nextflow_schema.json deleted file mode 100644 index 394303af4ac..00000000000 --- a/target/nextflow/transform/regress_out/nextflow_schema.json +++ /dev/null @@ -1,79 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "regress_out", - "description": "Regress out (mostly) unwanted sources of variation.\nUses simple linear regression. This is inspired by Seurat\u0027s regressOut function in R [Satija15]. \nNote that this function tends to overcorrect in certain circumstances as described in issue theislab/scanpy#526.\nSee https://github.com/theislab/scanpy/issues/526.\n", - "type": "object", - "definitions": { - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: `file`, required, example: `input.h5mu`. Input h5mu file", - "help_text": "Type: `file`, required, example: `input.h5mu`. Input h5mu file" - }, - - "output": { - "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.h5mu`. Output h5mu file", - "help_text": "Type: `file`, required, default: `$id.$key.output.h5mu`. Output h5mu file.", - "default": "$id.$key.output.h5mu" - }, - - "output_compression": { - "type": "string", - "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", - "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", - "enum": ["gzip", "lzf"] - - }, - - "modality": { - "type": "string", - "description": "Type: `string`, default: `rna`. Which modality (one or more) to run this component on", - "help_text": "Type: `string`, default: `rna`. Which modality (one or more) to run this component on.", - "default": "rna" - }, - - "obs_keys": { - "type": "string", - "description": "Type: List of `string`, multiple_sep: `\":\"`. Which ", - "help_text": "Type: List of `string`, multiple_sep: `\":\"`. Which .obs keys to regress on." - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/transform/regress_out/setup_logger.py b/target/nextflow/transform/regress_out/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/nextflow/transform/regress_out/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/nextflow/transform/scale/.config.vsh.yaml b/target/nextflow/transform/scale/.config.vsh.yaml deleted file mode 100644 index 5a8ea4ffe27..00000000000 --- a/target/nextflow/transform/scale/.config.vsh.yaml +++ /dev/null @@ -1,205 +0,0 @@ -functionality: - name: "scale" - namespace: "transform" - version: "0.12.3" - authors: - - name: "Dries Schaumont" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Input h5mu file." - info: null - example: - - "input.h5mu" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--modality" - description: "List of modalities to process." - info: null - default: - - "rna" - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "double" - name: "--max_value" - description: "Clip (truncate) to this value after scaling. Does not clip by default." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean" - name: "--zero_center" - description: "If False, omit zero-centering variables, which allows to handle\ - \ sparse input efficiently." - info: null - default: - - true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Output h5mu file." - info: null - default: - - "output.h5mu" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - description: "Scale data to unit variance and zero mean.\n" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/pbmc_1k_protein_v3" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.10-slim-bullseye" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "libhdf5-dev" - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "scanpy~=1.9.5" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "nextflow" - id: "nextflow" - directives: - label: - - "lowmem" - - "lowcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -- type: "native" - id: "native" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/transform/scaling/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/transform/scale" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/transform/scale/scale" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/transform/scale/main.nf b/target/nextflow/transform/scale/main.nf deleted file mode 100644 index c731506a950..00000000000 --- a/target/nextflow/transform/scale/main.nf +++ /dev/null @@ -1,2625 +0,0 @@ -// scale 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Dries Schaumont (maintainer) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "scale", - "namespace" : "transform", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Dries Schaumont", - "roles" : [ - "maintainer" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "dries@data-intuitive.com", - "github" : "DriesSchaumont", - "orcid" : "0000-0002-4389-0440", - "linkedin" : "dries-schaumont" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Scientist" - } - ] - } - } - ], - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "alternatives" : [ - "-i" - ], - "description" : "Input h5mu file.", - "example" : [ - "input.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--modality", - "description" : "List of modalities to process.", - "default" : [ - "rna" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "double", - "name" : "--max_value", - "description" : "Clip (truncate) to this value after scaling. Does not clip by default.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "boolean", - "name" : "--zero_center", - "description" : "If False, omit zero-centering variables, which allows to handle sparse input efficiently.", - "default" : [ - true - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--output", - "alternatives" : [ - "-o" - ], - "description" : "Output h5mu file.", - "default" : [ - "output.h5mu" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_compression", - "description" : "The compression format to be used on the output h5mu object.", - "example" : [ - "gzip" - ], - "required" : false, - "choices" : [ - "gzip", - "lzf" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/transform/scaling/" - }, - { - "type" : "file", - "path" : "src/utils/setup_logger.py", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "description" : "Scale data to unit variance and zero mean.\n", - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/transform/scaling/" - }, - { - "type" : "file", - "path" : "resources_test/pbmc_1k_protein_v3", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "python:3.10-slim-bullseye", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "libhdf5-dev", - "procps" - ], - "interactive" : false - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "mudata~=0.2.3", - "anndata~=0.9.1", - "scanpy~=1.9.5" - ], - "upgrade" : true - } - ], - "test_setup" : [ - { - "type" : "python", - "user" : false, - "packages" : [ - "viashpy==0.5.0" - ], - "upgrade" : true - } - ] - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "lowmem", - "lowcpu" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - }, - { - "type" : "native", - "id" : "native" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/transform/scaling/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/transform/scale", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -import sys -from mudata import read_h5mu -import scanpy - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'max_value': $( if [ ! -z ${VIASH_PAR_MAX_VALUE+x} ]; then echo "float(r'${VIASH_PAR_MAX_VALUE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'zero_center': $( if [ ! -z ${VIASH_PAR_ZERO_CENTER+x} ]; then echo "r'${VIASH_PAR_ZERO_CENTER//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -## VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -def main(): - logger.info(f'Reading .h5mu file: {par["input"]}') - mudata = read_h5mu(par["input"]) - mod = par["modality"] - data = mudata.mod[mod] - - logger.info("Scaling modality: %s", mod) - scanpy.pp.scale(data, - zero_center=par["zero_center"], - max_value=par["max_value"]) - - logger.info("Writing to %s", par["output"]) - mudata.write_h5mu(filename=par["output"], compression=par["output_compression"]) - logger.info("Finished") - -if __name__ == "__main__": - main() -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/transform_scale", - "tag" : "0.12.0" - }, - "label" : [ - "lowmem", - "lowcpu" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/transform/scale/nextflow.config b/target/nextflow/transform/scale/nextflow.config deleted file mode 100644 index 600f084181c..00000000000 --- a/target/nextflow/transform/scale/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'scale' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Scale data to unit variance and zero mean.\n' - author = 'Dries Schaumont' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/transform/scale/nextflow_params.yaml b/target/nextflow/transform/scale/nextflow_params.yaml deleted file mode 100644 index 8ac8cc5c9f6..00000000000 --- a/target/nextflow/transform/scale/nextflow_params.yaml +++ /dev/null @@ -1,11 +0,0 @@ -# Arguments -input: # please fill in - example: "input.h5mu" -modality: "rna" -# max_value: 123.0 -zero_center: true -# output: "$id.$key.output.h5mu" -# output_compression: "gzip" - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/transform/scale/nextflow_schema.json b/target/nextflow/transform/scale/nextflow_schema.json deleted file mode 100644 index c7c845010eb..00000000000 --- a/target/nextflow/transform/scale/nextflow_schema.json +++ /dev/null @@ -1,86 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "scale", - "description": "Scale data to unit variance and zero mean.\n", - "type": "object", - "definitions": { - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: `file`, required, example: `input.h5mu`. Input h5mu file", - "help_text": "Type: `file`, required, example: `input.h5mu`. Input h5mu file." - }, - - "modality": { - "type": "string", - "description": "Type: `string`, default: `rna`. List of modalities to process", - "help_text": "Type: `string`, default: `rna`. List of modalities to process.", - "default": "rna" - }, - - "max_value": { - "type": "number", - "description": "Type: `double`. Clip (truncate) to this value after scaling", - "help_text": "Type: `double`. Clip (truncate) to this value after scaling. Does not clip by default." - }, - - "zero_center": { - "type": "boolean", - "description": "Type: `boolean`, default: `true`. If False, omit zero-centering variables, which allows to handle sparse input efficiently", - "help_text": "Type: `boolean`, default: `true`. If False, omit zero-centering variables, which allows to handle sparse input efficiently.", - "default": "True" - }, - - "output": { - "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.h5mu`. Output h5mu file", - "help_text": "Type: `file`, required, default: `$id.$key.output.h5mu`. Output h5mu file.", - "default": "$id.$key.output.h5mu" - }, - - "output_compression": { - "type": "string", - "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", - "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", - "enum": ["gzip", "lzf"] - - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/transform/scale/setup_logger.py b/target/nextflow/transform/scale/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/nextflow/transform/scale/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/nextflow/velocity/scvelo/.config.vsh.yaml b/target/nextflow/velocity/scvelo/.config.vsh.yaml deleted file mode 100644 index 1032a08d4d1..00000000000 --- a/target/nextflow/velocity/scvelo/.config.vsh.yaml +++ /dev/null @@ -1,276 +0,0 @@ -functionality: - name: "scvelo" - namespace: "velocity" - version: "0.12.3" - authors: - - name: "Dries Schaumont" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "dries@data-intuitive.com" - github: "DriesSchaumont" - orcid: "0000-0002-4389-0440" - linkedin: "dries-schaumont" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Scientist" - argument_groups: - - name: "Inputs" - arguments: - - type: "file" - name: "--input" - description: "Velocyto loom file." - info: null - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Outputs" - arguments: - - type: "file" - name: "--output" - description: "Output directory. If it does not exist, will be created." - info: null - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--output_compression" - description: "The compression format to be used on the output h5mu object." - info: null - example: - - "gzip" - required: false - choices: - - "gzip" - - "lzf" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Filtering and normalization" - description: "Arguments for filtering, normalization an log transform (see scvelo.pp.filter_and_normalize\ - \ function)" - arguments: - - type: "integer" - name: "--min_counts" - description: "Minimum number of counts required for a gene to pass filtering\ - \ (spliced)." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--min_counts_u" - description: "Minimum number of counts required for a gene to pass filtering\ - \ (unspliced)." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--min_cells" - description: "Minimum number of cells expressed required to pass filtering (spliced)." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--min_cells_u" - description: "Minimum number of cells expressed required to pass filtering (unspliced)." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--min_shared_counts" - description: "Minimum number of counts (both unspliced and spliced) required\ - \ for a gene." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--min_shared_cells" - description: "Minimum number of cells required to be expressed (both unspliced\ - \ and spliced)." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--n_top_genes" - description: "Number of genes to keep." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean" - name: "--log_transform" - description: "Do not log transform counts." - info: null - default: - - true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - name: "Fitting parameters" - description: "Arguments for fitting the data" - arguments: - - type: "integer" - name: "--n_principal_components" - description: "Number of principal components to use for calculating moments." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "integer" - name: "--n_neighbors" - description: "Number of neighbors to use. First/second-order moments are computed\ - \ for each\ncell across its nearest neighbors, where the neighbor graph is\ - \ obtained from\neuclidean distances in PCA space.\n" - info: null - default: - - 30 - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "python_script" - path: "script.py" - is_executable: true - - type: "file" - path: "src/utils/setup_logger.py" - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/rna_velocity/velocyto_processed/cellranger_tiny.loom" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.9-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - interactive: false - - type: "python" - user: false - packages: - - "mudata~=0.2.3" - - "anndata~=0.9.1" - - "scvelo~=0.2.5" - - "numpy~=1.23.5" - - "matplotlib<3.8.0" - upgrade: true - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "native" - id: "native" -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highmem" - - "highcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/velocity/scvelo/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/velocity/scvelo" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/velocity/scvelo/scvelo" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/velocity/scvelo/main.nf b/target/nextflow/velocity/scvelo/main.nf deleted file mode 100644 index 2f2a020d322..00000000000 --- a/target/nextflow/velocity/scvelo/main.nf +++ /dev/null @@ -1,2761 +0,0 @@ -// scvelo 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Dries Schaumont (maintainer) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "scvelo", - "namespace" : "velocity", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Dries Schaumont", - "roles" : [ - "maintainer" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "dries@data-intuitive.com", - "github" : "DriesSchaumont", - "orcid" : "0000-0002-4389-0440", - "linkedin" : "dries-schaumont" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Scientist" - } - ] - } - } - ], - "argument_groups" : [ - { - "name" : "Inputs", - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "description" : "Velocyto loom file.", - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Outputs", - "arguments" : [ - { - "type" : "file", - "name" : "--output", - "description" : "Output directory. If it does not exist, will be created.", - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--output_compression", - "description" : "The compression format to be used on the output h5mu object.", - "example" : [ - "gzip" - ], - "required" : false, - "choices" : [ - "gzip", - "lzf" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Filtering and normalization", - "description" : "Arguments for filtering, normalization an log transform (see scvelo.pp.filter_and_normalize function)", - "arguments" : [ - { - "type" : "integer", - "name" : "--min_counts", - "description" : "Minimum number of counts required for a gene to pass filtering (spliced).", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--min_counts_u", - "description" : "Minimum number of counts required for a gene to pass filtering (unspliced).", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--min_cells", - "description" : "Minimum number of cells expressed required to pass filtering (spliced).", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--min_cells_u", - "description" : "Minimum number of cells expressed required to pass filtering (unspliced).", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--min_shared_counts", - "description" : "Minimum number of counts (both unspliced and spliced) required for a gene.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--min_shared_cells", - "description" : "Minimum number of cells required to be expressed (both unspliced and spliced).", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--n_top_genes", - "description" : "Number of genes to keep.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "boolean", - "name" : "--log_transform", - "description" : "Do not log transform counts.", - "default" : [ - true - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - }, - { - "name" : "Fitting parameters", - "description" : "Arguments for fitting the data", - "arguments" : [ - { - "type" : "integer", - "name" : "--n_principal_components", - "description" : "Number of principal components to use for calculating moments.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "integer", - "name" : "--n_neighbors", - "description" : "Number of neighbors to use. First/second-order moments are computed for each\ncell across its nearest neighbors, where the neighbor graph is obtained from\neuclidean distances in PCA space.\n", - "default" : [ - 30 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ] - } - ], - "resources" : [ - { - "type" : "python_script", - "path" : "script.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/velocity/scvelo/" - }, - { - "type" : "file", - "path" : "src/utils/setup_logger.py", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/velocity/scvelo/" - }, - { - "type" : "file", - "path" : "resources_test/rna_velocity/velocyto_processed/cellranger_tiny.loom", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "python:3.9-slim", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "procps" - ], - "interactive" : false - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "mudata~=0.2.3", - "anndata~=0.9.1", - "scvelo~=0.2.5", - "numpy~=1.23.5", - "matplotlib<3.8.0" - ], - "upgrade" : true - } - ], - "test_setup" : [ - { - "type" : "python", - "user" : false, - "packages" : [ - "viashpy==0.5.0" - ], - "upgrade" : true - } - ] - }, - { - "type" : "native", - "id" : "native" - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "highmem", - "highcpu" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/velocity/scvelo/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/velocity/scvelo", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -import sys -import scvelo -import mudata -from contextlib import redirect_stdout -from pathlib import Path -import matplotlib as mpl - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'min_counts': $( if [ ! -z ${VIASH_PAR_MIN_COUNTS+x} ]; then echo "int(r'${VIASH_PAR_MIN_COUNTS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'min_counts_u': $( if [ ! -z ${VIASH_PAR_MIN_COUNTS_U+x} ]; then echo "int(r'${VIASH_PAR_MIN_COUNTS_U//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'min_cells': $( if [ ! -z ${VIASH_PAR_MIN_CELLS+x} ]; then echo "int(r'${VIASH_PAR_MIN_CELLS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'min_cells_u': $( if [ ! -z ${VIASH_PAR_MIN_CELLS_U+x} ]; then echo "int(r'${VIASH_PAR_MIN_CELLS_U//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'min_shared_counts': $( if [ ! -z ${VIASH_PAR_MIN_SHARED_COUNTS+x} ]; then echo "int(r'${VIASH_PAR_MIN_SHARED_COUNTS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'min_shared_cells': $( if [ ! -z ${VIASH_PAR_MIN_SHARED_CELLS+x} ]; then echo "int(r'${VIASH_PAR_MIN_SHARED_CELLS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'n_top_genes': $( if [ ! -z ${VIASH_PAR_N_TOP_GENES+x} ]; then echo "int(r'${VIASH_PAR_N_TOP_GENES//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'log_transform': $( if [ ! -z ${VIASH_PAR_LOG_TRANSFORM+x} ]; then echo "r'${VIASH_PAR_LOG_TRANSFORM//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), - 'n_principal_components': $( if [ ! -z ${VIASH_PAR_N_PRINCIPAL_COMPONENTS+x} ]; then echo "int(r'${VIASH_PAR_N_PRINCIPAL_COMPONENTS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'n_neighbors': $( if [ ! -z ${VIASH_PAR_N_NEIGHBORS+x} ]; then echo "int(r'${VIASH_PAR_N_NEIGHBORS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} -meta = { - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) -} - -## VIASH END - -sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger -logger = setup_logger() - -mpl.rcParams['savefig.dpi']=150 - -# Script must be wrapped into a main function because scvelo spawn subprocesses -# and this fails when the functions are not wrapped. -def main(): - # Create output directory - output_dir = Path(par['output']) - output_dir.mkdir(parents=True, exist_ok=True) - scvelo.settings.figdir = str(output_dir) - - - # Calculate the sample name - sample_name = par["output"].removesuffix(".loom") - sample_name = Path(sample_name).name - - # Read the input data - adata = scvelo.read(par['input']) - - # Save spliced vs unspliced proportions to file - with (output_dir / "proportions.txt").open('w') as target: - with redirect_stdout(target): - scvelo.utils.show_proportions(adata) - - # Plot piecharts of spliced vs unspliced proportions - scvelo.pl.proportions(adata, save=True, show=False) - - # Perform preprocessing - scvelo.pp.filter_and_normalize(adata, - min_counts=par["min_counts"], - min_counts_u=par["min_counts_u"], - min_cells=par["min_cells"], - min_cells_u=par["min_cells_u"], - min_shared_counts=par["min_shared_counts"], - min_shared_cells=par["min_shared_cells"], - n_top_genes=par["n_top_genes"], - log=par["log_transform"]) - - # Fitting - scvelo.pp.moments(adata, - n_pcs=par["n_principal_components"], - n_neighbors=par["n_neighbors"]) - - - # Second step in velocyto calculations - # Velocity calculation and visualization - # From the scvelo manual: - # The solution to the full dynamical model is obtained by setting mode='dynamical', - # which requires to run scv.tl.recover_dynamics(adata) beforehand - scvelo.tl.recover_dynamics(adata) - scvelo.tl.velocity(adata, mode="dynamical") - scvelo.tl.velocity_graph(adata) - scvelo.pl.velocity_graph(adata, save=str(output_dir / "scvelo_graph.pdf"), show=False) - - # Plotting - # TODO: add more here. - scvelo.pl.velocity_embedding_stream(adata, save=str(output_dir / "scvelo_embedding.pdf"), show=False) - - # Create output - ouput_data = mudata.MuData({'rna_velocity': adata}) - ouput_data.write_h5mu(output_dir / f"{sample_name}.h5mu", compression=par["output_compression"]) - -if __name__ == "__main__": - main() -VIASHMAIN -python -B "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/velocity_scvelo", - "tag" : "0.12.0" - }, - "label" : [ - "highmem", - "highcpu" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/velocity/scvelo/nextflow.config b/target/nextflow/velocity/scvelo/nextflow.config deleted file mode 100644 index 957ad6a2c86..00000000000 --- a/target/nextflow/velocity/scvelo/nextflow.config +++ /dev/null @@ -1,107 +0,0 @@ -manifest { - name = 'scvelo' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - author = 'Dries Schaumont' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/velocity/scvelo/nextflow_params.yaml b/target/nextflow/velocity/scvelo/nextflow_params.yaml deleted file mode 100644 index 58cca805fd1..00000000000 --- a/target/nextflow/velocity/scvelo/nextflow_params.yaml +++ /dev/null @@ -1,24 +0,0 @@ -# Inputs -input: # please fill in - example: "path/to/file" - -# Outputs -# output: "$id.$key.output.output" -# output_compression: "gzip" - -# Filtering and normalization -# min_counts: 123 -# min_counts_u: 123 -# min_cells: 123 -# min_cells_u: 123 -# min_shared_counts: 123 -# min_shared_cells: 123 -# n_top_genes: 123 -log_transform: true - -# Fitting parameters -# n_principal_components: 123 -n_neighbors: 30 - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/velocity/scvelo/nextflow_schema.json b/target/nextflow/velocity/scvelo/nextflow_schema.json deleted file mode 100644 index 07df84b903b..00000000000 --- a/target/nextflow/velocity/scvelo/nextflow_schema.json +++ /dev/null @@ -1,161 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "scvelo", - "description": "No description", - "type": "object", - "definitions": { - "inputs" : { - "title": "Inputs", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: `file`, required. Velocyto loom file", - "help_text": "Type: `file`, required. Velocyto loom file." - } - - } - }, - "outputs" : { - "title": "Outputs", - "type": "object", - "description": "No description", - "properties": { - - "output": { - "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.output`. Output directory", - "help_text": "Type: `file`, required, default: `$id.$key.output.output`. Output directory. If it does not exist, will be created.", - "default": "$id.$key.output.output" - }, - - "output_compression": { - "type": "string", - "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", - "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", - "enum": ["gzip", "lzf"] - - } - - } - }, - "filtering and normalization" : { - "title": "Filtering and normalization", - "type": "object", - "description": "Arguments for filtering, normalization an log transform (see scvelo.pp.filter_and_normalize function)", - "properties": { - - "min_counts": { - "type": "integer", - "description": "Type: `integer`. Minimum number of counts required for a gene to pass filtering (spliced)", - "help_text": "Type: `integer`. Minimum number of counts required for a gene to pass filtering (spliced)." - }, - - "min_counts_u": { - "type": "integer", - "description": "Type: `integer`. Minimum number of counts required for a gene to pass filtering (unspliced)", - "help_text": "Type: `integer`. Minimum number of counts required for a gene to pass filtering (unspliced)." - }, - - "min_cells": { - "type": "integer", - "description": "Type: `integer`. Minimum number of cells expressed required to pass filtering (spliced)", - "help_text": "Type: `integer`. Minimum number of cells expressed required to pass filtering (spliced)." - }, - - "min_cells_u": { - "type": "integer", - "description": "Type: `integer`. Minimum number of cells expressed required to pass filtering (unspliced)", - "help_text": "Type: `integer`. Minimum number of cells expressed required to pass filtering (unspliced)." - }, - - "min_shared_counts": { - "type": "integer", - "description": "Type: `integer`. Minimum number of counts (both unspliced and spliced) required for a gene", - "help_text": "Type: `integer`. Minimum number of counts (both unspliced and spliced) required for a gene." - }, - - "min_shared_cells": { - "type": "integer", - "description": "Type: `integer`. Minimum number of cells required to be expressed (both unspliced and spliced)", - "help_text": "Type: `integer`. Minimum number of cells required to be expressed (both unspliced and spliced)." - }, - - "n_top_genes": { - "type": "integer", - "description": "Type: `integer`. Number of genes to keep", - "help_text": "Type: `integer`. Number of genes to keep." - }, - - "log_transform": { - "type": "boolean", - "description": "Type: `boolean`, default: `true`. Do not log transform counts", - "help_text": "Type: `boolean`, default: `true`. Do not log transform counts.", - "default": "True" - } - - } - }, - "fitting parameters" : { - "title": "Fitting parameters", - "type": "object", - "description": "Arguments for fitting the data", - "properties": { - - "n_principal_components": { - "type": "integer", - "description": "Type: `integer`. Number of principal components to use for calculating moments", - "help_text": "Type: `integer`. Number of principal components to use for calculating moments." - }, - - "n_neighbors": { - "type": "integer", - "description": "Type: `integer`, default: `30`. Number of neighbors to use", - "help_text": "Type: `integer`, default: `30`. Number of neighbors to use. First/second-order moments are computed for each\ncell across its nearest neighbors, where the neighbor graph is obtained from\neuclidean distances in PCA space.\n", - "default": "30" - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/inputs" - }, - { - "$ref": "#/definitions/outputs" - }, - { - "$ref": "#/definitions/filtering and normalization" - }, - { - "$ref": "#/definitions/fitting parameters" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} diff --git a/target/nextflow/velocity/scvelo/setup_logger.py b/target/nextflow/velocity/scvelo/setup_logger.py deleted file mode 100644 index ae71eb96115..00000000000 --- a/target/nextflow/velocity/scvelo/setup_logger.py +++ /dev/null @@ -1,12 +0,0 @@ -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger \ No newline at end of file diff --git a/target/nextflow/velocity/velocyto/.config.vsh.yaml b/target/nextflow/velocity/velocyto/.config.vsh.yaml deleted file mode 100644 index 4ebb609e299..00000000000 --- a/target/nextflow/velocity/velocyto/.config.vsh.yaml +++ /dev/null @@ -1,225 +0,0 @@ -functionality: - name: "velocyto" - namespace: "velocity" - version: "0.12.3" - authors: - - name: "Robrecht Cannoodt" - roles: - - "maintainer" - info: - role: "Core Team Member" - links: - email: "robrecht@data-intuitive.com" - github: "rcannood" - orcid: "0000-0003-3641-729X" - linkedin: "robrechtcannoodt" - organizations: - - name: "Data Intuitive" - href: "https://www.data-intuitive.com" - role: "Data Science Engineer" - - name: "Open Problems" - href: "https://openproblems.bio" - role: "Core Member" - arguments: - - type: "file" - name: "--input" - alternatives: - - "-i" - description: "Path to BAM file" - info: null - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--transcriptome" - alternatives: - - "-t" - description: "Path to GTF file" - info: null - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "file" - name: "--barcode" - alternatives: - - "-b" - description: "Valid barcodes file, to filter the bam. If --bcfile is not specified\ - \ all the cell barcodes will be included.\nCell barcodes should be specified\ - \ in the bcfile as the 'CB' tag for each read\n" - info: null - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "boolean_true" - name: "--without_umi" - description: "foo" - info: null - direction: "input" - dest: "par" - - type: "file" - name: "--output" - alternatives: - - "-o" - description: "Velocyto loom file" - info: null - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ":" - dest: "par" - - type: "string" - name: "--logic" - alternatives: - - "-l" - description: "The logic to use for the filtering." - info: null - default: - - "Default" - required: false - choices: - - "Default" - - "Permissive10X" - - "Intermediate10X" - - "ValidatedIntrons10X" - - "Stricter10X" - - "ObservedSpanning10X" - - "Discordant10X" - - "SmartSeq2" - direction: "input" - multiple: false - multiple_sep: ":" - dest: "par" - resources: - - type: "bash_script" - path: "script.sh" - is_executable: true - description: "Runs the velocity analysis on a BAM file, outputting a loom file." - test_resources: - - type: "python_script" - path: "test.py" - is_executable: true - - type: "file" - path: "resources_test/cellranger_tiny_fastq" - - type: "file" - path: "resources_test/rna_velocity" - - type: "file" - path: "resources_test/reference_gencodev41_chr1" - info: null - status: "enabled" - requirements: - commands: - - "ps" - set_wd_to_resources_dir: false -platforms: -- type: "docker" - id: "docker" - image: "python:3.9-slim" - target_organization: "openpipelines-bio" - target_registry: "ghcr.io" - target_tag: "0.12.0" - namespace_separator: "_" - resolve_volume: "Automatic" - chown: true - setup_strategy: "ifneedbepullelsecachedbuild" - target_image_source: "https://github.com/openpipelines-bio/openpipeline" - setup: - - type: "apt" - packages: - - "procps" - - "build-essential" - - "file" - interactive: false - - type: "python" - user: false - pip: - - "numpy" - - "Cython" - upgrade: true - - type: "python" - user: false - pip: - - "velocyto" - upgrade: true - - type: "apt" - packages: - - "samtools" - interactive: false - test_setup: - - type: "python" - user: false - packages: - - "viashpy==0.5.0" - upgrade: true - entrypoint: [] - cmd: null -- type: "native" - id: "native" -- type: "nextflow" - id: "nextflow" - directives: - label: - - "highmem" - - "lowcpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1.GB" - mem2gb: "memory = 2.GB" - mem4gb: "memory = 4.GB" - mem8gb: "memory = 8.GB" - mem16gb: "memory = 16.GB" - mem32gb: "memory = 32.GB" - mem64gb: "memory = 64.GB" - mem128gb: "memory = 128.GB" - mem256gb: "memory = 256.GB" - mem512gb: "memory = 512.GB" - mem1tb: "memory = 1.TB" - mem2tb: "memory = 2.TB" - mem4tb: "memory = 4.TB" - mem8tb: "memory = 8.TB" - mem16tb: "memory = 16.TB" - mem32tb: "memory = 32.TB" - mem64tb: "memory = 64.TB" - mem128tb: "memory = 128.TB" - mem256tb: "memory = 256.TB" - mem512tb: "memory = 512.TB" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -info: - config: "/home/runner/work/openpipeline/openpipeline/src/velocity/velocyto/config.vsh.yaml" - platform: "nextflow" - output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/velocity/velocyto" - executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/velocity/velocyto/velocyto" - viash_version: "0.7.5" - git_commit: "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f" - git_remote: "https://github.com/openpipelines-bio/openpipeline" - git_tag: "0.12.2-3-g827d483cf7" diff --git a/target/nextflow/velocity/velocyto/main.nf b/target/nextflow/velocity/velocyto/main.nf deleted file mode 100644 index 26248b02f97..00000000000 --- a/target/nextflow/velocity/velocyto/main.nf +++ /dev/null @@ -1,2650 +0,0 @@ -// velocyto 0.12.3 -// -// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. -// -// Component authors: -// * Robrecht Cannoodt (maintainer) - -nextflow.enable.dsl=2 - -// Required imports -import groovy.json.JsonSlurper - -// initialise slurper -def jsonSlurper = new JsonSlurper() - -// DEFINE CUSTOM CODE - -// functionality metadata -thisConfig = processConfig(jsonSlurper.parseText('''{ - "functionality" : { - "name" : "velocyto", - "namespace" : "velocity", - "version" : "0.12.3", - "authors" : [ - { - "name" : "Robrecht Cannoodt", - "roles" : [ - "maintainer" - ], - "info" : { - "role" : "Core Team Member", - "links" : { - "email" : "robrecht@data-intuitive.com", - "github" : "rcannood", - "orcid" : "0000-0003-3641-729X", - "linkedin" : "robrechtcannoodt" - }, - "organizations" : [ - { - "name" : "Data Intuitive", - "href" : "https://www.data-intuitive.com", - "role" : "Data Science Engineer" - }, - { - "name" : "Open Problems", - "href" : "https://openproblems.bio", - "role" : "Core Member" - } - ] - } - } - ], - "arguments" : [ - { - "type" : "file", - "name" : "--input", - "alternatives" : [ - "-i" - ], - "description" : "Path to BAM file", - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--transcriptome", - "alternatives" : [ - "-t" - ], - "description" : "Path to GTF file", - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--barcode", - "alternatives" : [ - "-b" - ], - "description" : "Valid barcodes file, to filter the bam. If --bcfile is not specified all the cell barcodes will be included.\nCell barcodes should be specified in the bcfile as the 'CB' tag for each read\n", - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "boolean_true", - "name" : "--without_umi", - "description" : "foo", - "direction" : "input", - "dest" : "par" - }, - { - "type" : "file", - "name" : "--output", - "alternatives" : [ - "-o" - ], - "description" : "Velocyto loom file", - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - }, - { - "type" : "string", - "name" : "--logic", - "alternatives" : [ - "-l" - ], - "description" : "The logic to use for the filtering.", - "default" : [ - "Default" - ], - "required" : false, - "choices" : [ - "Default", - "Permissive10X", - "Intermediate10X", - "ValidatedIntrons10X", - "Stricter10X", - "ObservedSpanning10X", - "Discordant10X", - "SmartSeq2" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ":", - "dest" : "par" - } - ], - "resources" : [ - { - "type" : "bash_script", - "path" : "script.sh", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/velocity/velocyto/" - } - ], - "description" : "Runs the velocity analysis on a BAM file, outputting a loom file.", - "test_resources" : [ - { - "type" : "python_script", - "path" : "test.py", - "is_executable" : true, - "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/velocity/velocyto/" - }, - { - "type" : "file", - "path" : "resources_test/cellranger_tiny_fastq", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - }, - { - "type" : "file", - "path" : "resources_test/rna_velocity", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - }, - { - "type" : "file", - "path" : "resources_test/reference_gencodev41_chr1", - "parent" : "file:///home/runner/work/openpipeline/openpipeline/" - } - ], - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "set_wd_to_resources_dir" : false - }, - "platforms" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "python:3.9-slim", - "target_organization" : "openpipelines-bio", - "target_registry" : "ghcr.io", - "target_tag" : "0.12.0", - "namespace_separator" : "_", - "resolve_volume" : "Automatic", - "chown" : true, - "setup_strategy" : "ifneedbepullelsecachedbuild", - "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "procps", - "build-essential", - "file" - ], - "interactive" : false - }, - { - "type" : "python", - "user" : false, - "pip" : [ - "numpy", - "Cython" - ], - "upgrade" : true - }, - { - "type" : "python", - "user" : false, - "pip" : [ - "velocyto" - ], - "upgrade" : true - }, - { - "type" : "apt", - "packages" : [ - "samtools" - ], - "interactive" : false - } - ], - "test_setup" : [ - { - "type" : "python", - "user" : false, - "packages" : [ - "viashpy==0.5.0" - ], - "upgrade" : true - } - ] - }, - { - "type" : "native", - "id" : "native" - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "label" : [ - "highmem", - "lowcpu" - ], - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1.GB", - "mem2gb" : "memory = 2.GB", - "mem4gb" : "memory = 4.GB", - "mem8gb" : "memory = 8.GB", - "mem16gb" : "memory = 16.GB", - "mem32gb" : "memory = 32.GB", - "mem64gb" : "memory = 64.GB", - "mem128gb" : "memory = 128.GB", - "mem256gb" : "memory = 256.GB", - "mem512gb" : "memory = 512.GB", - "mem1tb" : "memory = 1.TB", - "mem2tb" : "memory = 2.TB", - "mem4tb" : "memory = 4.TB", - "mem8tb" : "memory = 8.TB", - "mem16tb" : "memory = 16.TB", - "mem32tb" : "memory = 32.TB", - "mem64tb" : "memory = 64.TB", - "mem128tb" : "memory = 128.TB", - "mem256tb" : "memory = 256.TB", - "mem512tb" : "memory = 512.TB", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "info" : { - "config" : "/home/runner/work/openpipeline/openpipeline/src/velocity/velocyto/config.vsh.yaml", - "platform" : "nextflow", - "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/velocity/velocyto", - "viash_version" : "0.7.5", - "git_commit" : "827d483cf7d8844f3a3745b724f1d9cdeb3c7a2f", - "git_remote" : "https://github.com/openpipelines-bio/openpipeline", - "git_tag" : "0.12.2-3-g827d483cf7" - } -}''')) - -thisScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -#!/bin/bash - -set -eo pipefail - -## VIASH START -# The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) -$( if [ ! -z ${VIASH_PAR_TRANSCRIPTOME+x} ]; then echo "${VIASH_PAR_TRANSCRIPTOME}" | sed "s#'#'\\"'\\"'#g;s#.*#par_transcriptome='&'#" ; else echo "# par_transcriptome="; fi ) -$( if [ ! -z ${VIASH_PAR_BARCODE+x} ]; then echo "${VIASH_PAR_BARCODE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_barcode='&'#" ; else echo "# par_barcode="; fi ) -$( if [ ! -z ${VIASH_PAR_WITHOUT_UMI+x} ]; then echo "${VIASH_PAR_WITHOUT_UMI}" | sed "s#'#'\\"'\\"'#g;s#.*#par_without_umi='&'#" ; else echo "# par_without_umi="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) -$( if [ ! -z ${VIASH_PAR_LOGIC+x} ]; then echo "${VIASH_PAR_LOGIC}" | sed "s#'#'\\"'\\"'#g;s#.*#par_logic='&'#" ; else echo "# par_logic="; fi ) -$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) -$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) -$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) -$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) -$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) -$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) - -## VIASH END - -extra_params=( ) - -if [ ! -z "\\$par_barcode" ]; then - extra_params+=( "--bcfile=\\$par_barcode" ) -fi -if [ "\\$par_without_umi" == "true" ]; then - extra_params+=( "--without-umi" ) -fi -if [ ! -z "\\$meta_cpus" ]; then - extra_params+=( "--samtools-threads" "\\$meta_cpus" ) -fi -if [ ! -z "\\$meta_memory_mb" ]; then - extra_params+=( "--samtools-memory" "\\$meta_memory_mb" ) -fi - -output_dir=\\`dirname "\\$par_output"\\` -sample_id=\\`basename "\\$par_output" .loom\\` - -if (file \\`readlink -f "\\$par_transcriptome"\\` | grep -q compressed ) ; then - # create temporary directory - tmpdir=\\$(mktemp -d "\\$meta_temp_dir/\\$meta_functionality_name-XXXXXXXX") - function clean_up { - rm -rf "\\$tmpdir" - } - trap clean_up EXIT - - zcat "\\$par_transcriptome" > "\\$tmpdir/genes.gtf" - par_transcriptome="\\$tmpdir/genes.gtf" -fi - -velocyto run \\\\ - "\\$par_input" \\\\ - "\\$par_transcriptome" \\\\ - "\\${extra_params[@]}" \\\\ - --outputfolder "\\$output_dir" \\\\ - --sampleid "\\$sample_id" -VIASHMAIN -bash "$tempscript" -''' - -thisDefaultProcessArgs = [ - // key to be used to trace the process and determine output names - key: thisConfig.functionality.name, - // fixed arguments to be passed to script - args: [:], - // default directives - directives: jsonSlurper.parseText('''{ - "container" : { - "registry" : "ghcr.io", - "image" : "openpipelines-bio/velocity_velocyto", - "tag" : "0.12.0" - }, - "label" : [ - "highmem", - "lowcpu" - ], - "tag" : "$id" -}'''), - // auto settings - auto: jsonSlurper.parseText('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// END CUSTOM CODE - -///////////////////////////////////// -// Viash Workflow helper functions // -///////////////////////////////////// - -import java.util.regex.Pattern -import java.io.BufferedReader -import java.io.FileReader -import java.nio.file.Paths -import java.nio.file.Files -import groovy.json.JsonSlurper -import groovy.text.SimpleTemplateEngine -import org.yaml.snakeyaml.Yaml - -// param helpers // -def paramExists(name) { - return params.containsKey(name) && params[name] != "" -} - -def assertParamExists(name, description) { - if (!paramExists(name)) { - exit 1, "ERROR: Please provide a --${name} parameter ${description}" - } -} - -// helper functions for reading params from file // -def getChild(parent, child) { - if (child.contains("://") || Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = Pattern.compile('''"(.*)"''') - - def br = Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -def readJsonBlob(str) { - def jsonSlurper = new JsonSlurper() - jsonSlurper.parseText(str) -} - -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def jsonSlurper = new JsonSlurper() - jsonSlurper.parse(inputFile) -} - -def readYamlBlob(str) { - def yamlSlurper = new Yaml() - yamlSlurper.load(str) -} - -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path) : file_path - def yamlSlurper = new Yaml() - yamlSlurper.load(inputFile) -} - -// helper functions for reading a viash config in groovy // - -// based on how Functionality.scala is implemented -def processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - if (arg.type == "file" && arg.direction == "output") { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// based on how Functionality.scala is implemented -def processArgumentGroup(argumentGroups, name, arguments) { - def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() - - // Check if 'arguments' is in 'argumentGroups'. - def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} - - // Check whether an argument group of 'name' exists. - def existing = argumentGroups.find{gr -> name == gr.name} - - // if there are no arguments missing from the argument group, just return the existing group (if any) - if (argumentsNotInGroup.isEmpty()) { - return existing == null ? [] : [existing] - - // if there are missing arguments and there is an existing group, add the missing arguments to it - } else if (existing != null) { - def newEx = existing.clone() - newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) - return [newEx] - - // else create a new group - } else { - def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] - return [newEx] - } -} - -// based on how Functionality.scala is implemented -def processConfig(config) { - // TODO: assert .functionality etc. - if (config.functionality.inputs) { - System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") - } - if (config.functionality.outputs) { - System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") - } - - // set defaults for inputs - config.functionality.inputs = - (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "input" - processArgument(arg) - } - // set defaults for outputs - config.functionality.outputs = - (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> - arg.type = arg.type != null ? arg.type : "file" - arg.direction = "output" - processArgument(arg) - } - // set defaults for arguments - config.functionality.arguments = - (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> - processArgument(arg) - } - // set defaults for argument_group arguments - config.functionality.argument_groups = - (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> - grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> - arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) - } - grp - } - - // create combined arguments list - config.functionality.allArguments = - config.functionality.inputs + - config.functionality.outputs + - config.functionality.arguments + - config.functionality.argument_groups.collectMany{ group -> - group.arguments.findAll{ it !instanceof String } - } - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.functionality.argument_groups - def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) - def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) - def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) - def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) - config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered - - config -} - -def readConfig(file) { - def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") - processConfig(config) -} - -// recursively merge two maps -def mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -def addGlobalParams(config) { - def localConfig = [ - "functionality" : [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ], - ] - ] - ] - ] - ] - - return processConfig(mergeMap(config, localConfig)) -} - -// helper functions for generating help // - -// based on io.viash.helpers.Format.wordWrap -def formatWordWrap(str, maxLength) { - def words = str.split("\\s").toList() - - def word = null - def line = "" - def lines = [] - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - lines.add(line) - line = word - } - if (words.isEmpty()) { - lines.add(line) - } - } - return lines -} - -// based on Format.paragraphWrap -def paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def generateHelp(config) { - def fun = config.functionality - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -def helpMessage(config) { - if (paramExists("help")) { - def mergedConfig = addGlobalParams(config) - def helpStr = generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -def _guessParamListFormat(params) { - if (!params.containsKey("param_list") || params.param_list == null) { - "none" - } else { - def param_list = params.param_list - - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } - } -} - -viashChannelDeprecationWarningPrinted = false - -def paramsToList(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - // fetch default params from functionality - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - - // check multi input params - // objects should be closures and not functions, thanks to FunctionDef - def multiParamFormat = _guessParamListFormat(params) - - def multiOptionFunctions = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert multiOptionFunctions.containsKey(multiParamFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" - - // fetch multi param inputs - def multiOptionFun = multiOptionFunctions.get(multiParamFormat) - // todo: add try catch - def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") - def paramList = multiOptionOut[1] - def multiFile = multiOptionOut[0] - - // data checks - assert paramList instanceof List: "--param_list should contain a list of maps" - for (value in paramList) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // combine parameters - def processedParams = paramList.collect{ multiParam -> - // combine params - def combinedArgs = defaultArgs + paramArgs + multiParam - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - combinedArgs = [id: "stub"] + combinedArgs - } else { - // else check whether required arguments exist - config.functionality.allArguments - .findAll { it.required } - .forEach { par -> - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - - // process arguments - def inputs = config.functionality.allArguments - .findAll{ par -> combinedArgs.containsKey(par.plainName) } - .collectEntries { par -> - // split on 'multiple_sep' - if (par.multiple) { - parData = combinedArgs[par.plainName] - if (parData instanceof List) { - parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } - } else if (parData instanceof String) { - parData = parData.split(par.multiple_sep) - } else if (parData == null) { - parData = [] - } else { - parData = [ parData ] - } - } else { - parData = [ combinedArgs[par.plainName] ] - } - - // flatten - parData = parData.flatten() - - // cast types - if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { - parData = parData.collect{path -> - if (path !instanceof String) { - path - } else if (multiFile) { - file(getChild(multiFile, path)) - } else { - file(path) - } - }.flatten() - } else if (par.type == "integer") { - parData = parData.collect{it as Integer} - } else if (par.type == "double") { - parData = parData.collect{it as Double} - } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { - parData = parData.collect{it as Boolean} - } - // simplify list to value if need be - if (!par.multiple) { - assert parData.size() == 1 : - "Error: argument ${par.plainName} has too many values.\n" + - " Expected amount: 1. Found: ${parData.size()}" - parData = parData[0] - } - - // return pair - [ par.plainName, parData ] - } - // remove parameters which were explicitly set to null - .findAll{ par -> par != null } - } - - - // check processed params - processedParams.forEach { args -> - assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" - } - def ppIds = processedParams.collect{it.id} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" - - processedParams -} - -def paramsToChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - Channel.fromList(paramsToList(params, config)) -} - -def viashChannel(params, config) { - if (!viashChannelDeprecationWarningPrinted) { - viashChannelDeprecationWarningPrinted = true - System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + - "Please use a combination of channelFromParams and preprocessInputs.") - } - paramsToChannel(params, config) - | map{tup -> [tup.id, tup]} -} - -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -/** - * Resolve the file paths in the parameters relative to given path - * - * @param paramList A Map containing parameters to process. - * This function assumes that files are still of type String. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * @param relativeTo path of a file to resolve the parameters values to. - * - * @return A map of parameters where the location of the input file parameters have been resolved - * resolved relatively to the provided path. - */ -private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { - paramList.collectEntries { parName, parValue -> - argSettings = config.functionality.allArguments.find{it.plainName == parName} - if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collect({path -> - path !instanceof String ? path : file(getChild(relativeTo, path)) - }) - } else { - parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) - } - } - [parName, parValue] - } -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters from nextflow. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameter sets that were parsed from the 'param_list' argument value. - */ -private List> _parseParamListArguments(Map params, Map config){ - // first try to guess the format (if not set in params) - def paramListFormat = _guessParamListFormat(params) - - // get the correct parser function for the detected params_list format - def paramListParsers = [ - "csv": {[it, readCsv(it)]}, - "json": {[it, readJson(it)]}, - "yaml": {[it, readYaml(it)]}, - "yaml_blob": {[null, readYamlBlob(it)]}, - "asis": {[null, it]}, - "none": {[null, [[:]]]} - ] - assert paramListParsers.containsKey(paramListFormat): - "Format of provided --param_list not recognised.\n" + - "You can use '--param_list_format' to manually specify the format.\n" + - "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ - "'yaml', 'yaml_blob', 'asis' or 'none'" - def paramListParser = paramListParsers.get(paramListFormat) - - // fetch multi param inputs - def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") - // multiFile is null if the value passed to param_list was not a file (e.g a blob) - // If the value was indeed a file, multiFile contains the location that file (used later). - def paramListFile = paramListOut[0] - def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ paramValues -> - [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] - }) - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, paramValues -> - def splitParamValues = _splitParams(paramValues, config) - [id, splitParamValues] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListFile){ - paramSets = paramSets.collect({ id, paramValues -> - def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) - [id, relativeParamValues] - }) - } - - return paramSets -} - -/** - * Cast parameters to the correct type as defined in the Viash config - * - * @param parValues A Map of input arguments. - * - * @return The input arguments that have been cast to the type from the viash config. - */ - -private Map _castParamTypes(Map parValues, Map config) { - // Cast the input to the correct type according to viash config - def castParValues = parValues.collectEntries({ parName, parValue -> - paramSettings = config.functionality.allArguments.find({it.plainName == parName}) - // dont parse parameters like publish_dir ( in which case paramSettings = null) - parType = paramSettings ? paramSettings.get("type", null) : null - if (parValue !instanceof Collection) { - parValue = [parValue] - } - if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { - parValue = parValue.collect{ path -> - if (path !instanceof String) { - path - } else { - file(path) - } - } - } else if (parType == "integer") { - parValue = parValue.collect{it as Integer} - } else if (parType == "double") { - parValue = parValue.collect{it as Double} - } else if (parType == "boolean" || - parType == "boolean_true" || - parType == "boolean_false") { - parValue = parValue.collect{it as Boolean} - } - - // simplify list to value if need be - if (paramSettings && !paramSettings.multiple) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - }) - return castParValues -} - -/** - * Apply the argument settings specified in a Viash config to a single parameter set. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * - * @param paramValues A Map of parameter to be processed. All parameters must - * also be specified in the Viash config. - * @param config: A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * @return The input parameters that have been processed. - */ -Map applyConfigToOneParameterSet(Map paramValues, Map config){ - def splitParamValues = _splitParams(paramValues, config) - def castParamValues = _castParamTypes(splitParamValues, config) - - // Check if any unexpected arguments were passed - def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] - castParamValues.each({parName, parValue -> - assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" - }) - return castParamValues -} - -/** - * Apply the argument settings specified in a Viash config to a list of parameter sets. - * - Split the parameter values according to their seperator if - * the parameter accepts multiple values - * - Cast the parameters to their corect types. - * - Assertions: - * ~ Check if any unknown parameters are found - * ~ Check if the ID of the parameter set is unique across all sets. - * - * @return The input parameters that have been processed. - */ - -List applyConfig(List parameterSets, Map config){ - def processedparameterSets = parameterSets.collect({ parameterSet -> - def id = parameterSet[0] - def paramValues = parameterSet[1] - def passthrough = parameterSet.drop(2) - def processedSet = applyConfigToOneParameterSet(paramValues, config) - [id, processedSet] + passthrough - }) - - _checkUniqueIds(processedparameterSets) - return processedparameterSets -} - -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.functionality.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) - - /* process params_list arguments */ - /*********************************/ - def paramSets = _parseParamListArguments(params, config) - def parameterSetsWithConfigApplied = applyConfig(paramSets, config) - - /* combine arguments into channel */ - /**********************************/ - def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> - def id = paramSet[0] - def parValues = paramSet[1] - id = [id, globalID].find({it != null}) // first non-null element - - if (workflow.stubRun) { - // if stub run, explicitly add an id if missing - id = id ? id : "stub" + index - } - assert id != null: "Each parameter set should have at least an ID." - // Add regular parameters together with parameters passed with 'param_list' - def combinedArgsValues = globalParamsValues + parValues - - // Remove parameters which are null, if the default is also null - combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> - parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - if ( paramValue != null || parameterSettings.get("default", null) != null ) { - [paramName, paramValue] - } - } - [id, combinedArgsValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -/** - * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * @param params A list of parameter sets as Tuples. The first element of the tuples - * must be a unique id of the parameter set, and the second element - * must contain the parameters themselves. Optional extra elements - * of the tuples will be passed to the output as is. - * @param config A Map of the Viash configuration. This Map can be generated from - * the config file using the readConfig() function. - * - * @return A list of processed parameters sets as tuples. - */ - -private List _preprocessInputsList(List params, Map config) { - // Get different parameter types (used throughout this function) - def defaultArgs = config.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // Apply config to default parameters - def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) - - // Apply config to input parameters - def parsedInputParamSets = applyConfig(params, config) - - // Merge two parameter sets together - def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> - def id = parsedInputParamSet[0] - def parValues = parsedInputParamSet[1] - def passthrough = parsedInputParamSet.drop(2) - def parValuesWithDefault = parsedDefaultValues + parValues - [id, parValuesWithDefault] + passthrough - }) - _checkUniqueIds(parsedArgs) - - return parsedArgs -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - wfKey = args.key != null ? args.key : "preprocessInputs" - config = args.config - workflow preprocessInputsInstance { - take: - input_ch - - main: - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - - output_ch = input_ch - | toSortedList - | map { paramList -> _preprocessInputsList(paramList, config) } - | flatMap - emit: - output_ch - } - - return preprocessInputsInstance.cloneWithName(wfKey) -} - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -import nextflow.Nextflow -import nextflow.script.IncludeDef -import nextflow.script.ScriptBinding -import nextflow.script.ScriptMeta -import nextflow.script.ScriptParser - -// retrieve resourcesDir here to make sure the correct path is found -resourcesDir = ScriptMeta.current().getScriptPath().getParent() - -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - - // check whether expected keys are all booleans (for now) - for (key in expectedKeys) { - assert auto.containsKey(key) - assert auto[key] instanceof Boolean - } - - return auto.subMap(expectedKeys) -} - -def processProcessArgs(Map args) { - // override defaults with args - def processArgs = thisDefaultProcessArgs + args - - // check whether 'key' exists - assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (processArgs["key"] instanceof Closure) { - processArgs["key"] = processArgs["key"](thisConfig.functionality.name) - } - def key = processArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check whether directives exists and apply defaults - assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" - processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) - - // check whether directives exists and apply defaults - assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" - processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) - - // auto define publish, if so desired - if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = - params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null - - if (publishDir != null) { - processArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (processArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] - processArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { - if (processArgs.containsKey(nam) && processArgs[nam]) { - assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" - } - } - - // check fromState - assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" - def fromState = processArgs["fromState"] - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState) { - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def data = fromStateMap.collectEntries{newkey, origkey -> - // check whether all values of fromState are in state - assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" - [newkey, state[origkey]] - } - data - } - } - - processArgs["fromState"] = fromState - } - - // check toState - def toState = processArgs["toState"] - - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key': the output is not a Map" - assert state instanceof Map : "Error in module '$key': the state is not a Map" - def extraEntries = toStateMap.collectEntries{newkey, origkey -> - // check whether all values of toState are in output - assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" - [newkey, output[origkey]] - } - state + extraEntries - } - } - - processArgs["toState"] = toState - - // return output - return processArgs -} - -def processFactory(Map processArgs) { - // autodetect process key - def wfKey = processArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def meta = ScriptMeta.current() - def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = processArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ')' } - .join() - - def outputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (processArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir) - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" - |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (processArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // create runtime process - def ownerParams = new ScriptBinding.ParamsMap() - def binding = new ScriptBinding().setParams(ownerParams) - def module = new IncludeDef.Module(name: procKey) - def scriptParser = new ScriptParser(session) - .setModule(true) - .setBinding(binding) - scriptParser.scriptPath = ScriptMeta.current().getScriptPath() - def moduleScript = scriptParser.runScript(procStr) - .getScript() - - // register module in meta - meta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return meta.getProcess(procKey) -} - -def debug(processArgs, debugKey) { - if (processArgs.debug) { - view { "process '${processArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -def workflowFactory(Map args) { - def processArgs = processProcessArgs(args) - def key = processArgs["key"] - def meta = ScriptMeta.current() - - def workflowKey = key - - def processObj = null - - workflow workflowInstance { - take: - input_ - - main: - if (processObj == null) { - processObj = processFactory(processArgs) - } - - mid1_ = input_ - | debug(processArgs, "input") - | map { tuple -> - tuple = tuple.clone() - - if (processArgs.map) { - tuple = processArgs.map(tuple) - } - if (processArgs.mapId) { - tuple[0] = processArgs.mapId(tuple[0]) - } - if (processArgs.mapData) { - tuple[1] = processArgs.mapData(tuple[1]) - } - if (processArgs.mapPassthrough) { - tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - assert tuple[0] instanceof CharSequence : - "Error in module '${key}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (processArgs.renameKeys) { - assert processArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - processArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - if (processArgs.filter) { - mid2_ = mid1_ - | filter{processArgs.filter(it)} - } else { - mid2_ = mid1_ - } - - if (processArgs.fromState) { - mid3_ = mid2_ - | map{ - def new_data = processArgs["fromState"](it.take(2)) - [it[0], new_data] - } - } else { - mid3_ = mid2_ - } - - out0_ = mid3_ - | debug(processArgs, "processed") - | map { tuple -> - def id = tuple[0] - def data = tuple[1] - - // fetch default params from functionality - def defaultArgs = thisConfig.functionality.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = thisConfig.functionality.allArguments - .findAll { par -> - def argKey = key + "__" + par.plainName - params.containsKey(argKey) && params[argKey] != "viash_no_value" - } - .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = thisConfig.functionality.allArguments - .findAll { data.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs.removeAll{it.value == null} - - if (workflow.stubRun) { - // add id if missing - combinedArgs = [id: 'stub'] + combinedArgs - } else { - // check whether required arguments exist - thisConfig.functionality.allArguments - .forEach { par -> - if (par.required) { - assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" - } - } - } - - // TODO: check whether parameters have the right type - - // process input files separately - def inputPaths = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = thisConfig.functionality.allArguments - .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = combinedArgs[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] - } - | processObj - | map { output -> - def outputFiles = thisConfig.functionality.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { - outputFiles = outputFiles.values()[0] - } - - [ output[0], outputFiles ] - } - - // join the output [id, output] with the previous state [id, state, ...] - out1_ = out0_.join(mid2_, failOnDuplicate: true) - // input tuple format: [id, output, prev_state, ...] - // output tuple format: [id, new_state, ...] - | map{ - def new_state = processArgs["toState"](it) - [it[0], new_state] + it.drop(3) - } - | debug(processArgs, "output") - - - emit: - out1_ - } - - def wf = workflowInstance.cloneWithName(workflowKey) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs) - } - // add config to module for later introspection - wf.metaClass.config = thisConfig - - return wf -} - -// initialise default workflow -myWfInstance = workflowFactory([:]) - -// add workflow to environment -ScriptMeta.current().addDefinition(myWfInstance) - -// anonymous workflow for running this module as a standalone -workflow { - def mergedConfig = thisConfig - def mergedParams = [:] + params - - // add id argument if it's not already in the config - if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - mergedConfig.functionality.arguments.add(0, idArg) - mergedConfig = processConfig(mergedConfig) - } - if (!mergedParams.containsKey("id")) { - mergedParams.id = "run" - } - - helpMessage(mergedConfig) - - channelFromParams(mergedParams, mergedConfig) - | preprocessInputs("config": mergedConfig) - | view { "input: $it" } - | myWfInstance.run( - auto: [ publish: true ] - ) - | view { "output: $it" } -} \ No newline at end of file diff --git a/target/nextflow/velocity/velocyto/nextflow.config b/target/nextflow/velocity/velocyto/nextflow.config deleted file mode 100644 index dee9b3b802e..00000000000 --- a/target/nextflow/velocity/velocyto/nextflow.config +++ /dev/null @@ -1,108 +0,0 @@ -manifest { - name = 'velocyto' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = '0.12.3' - description = 'Runs the velocity analysis on a BAM file, outputting a loom file.' - author = 'Robrecht Cannoodt' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1.GB } - withLabel: mem2gb { memory = 2.GB } - withLabel: mem4gb { memory = 4.GB } - withLabel: mem8gb { memory = 8.GB } - withLabel: mem16gb { memory = 16.GB } - withLabel: mem32gb { memory = 32.GB } - withLabel: mem64gb { memory = 64.GB } - withLabel: mem128gb { memory = 128.GB } - withLabel: mem256gb { memory = 256.GB } - withLabel: mem512gb { memory = 512.GB } - withLabel: mem1tb { memory = 1.TB } - withLabel: mem2tb { memory = 2.TB } - withLabel: mem4tb { memory = 4.TB } - withLabel: mem8tb { memory = 8.TB } - withLabel: mem16tb { memory = 16.TB } - withLabel: mem32tb { memory = 32.TB } - withLabel: mem64tb { memory = 64.TB } - withLabel: mem128tb { memory = 128.TB } - withLabel: mem256tb { memory = 256.TB } - withLabel: mem512tb { memory = 512.TB } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/velocity/velocyto/nextflow_params.yaml b/target/nextflow/velocity/velocyto/nextflow_params.yaml deleted file mode 100644 index e4bbff323ad..00000000000 --- a/target/nextflow/velocity/velocyto/nextflow_params.yaml +++ /dev/null @@ -1,11 +0,0 @@ -# Arguments -input: # please fill in - example: "path/to/file" -transcriptome: # please fill in - example: "path/to/file" -# barcode: "path/to/file" -without_umi: false -# output: "$id.$key.output.output" -logic: "Default" - -# Nextflow input-output arguments -publish_dir: # please fill in - example: "output/" -# param_list: "my_params.yaml" diff --git a/target/nextflow/velocity/velocyto/nextflow_schema.json b/target/nextflow/velocity/velocyto/nextflow_schema.json deleted file mode 100644 index 9cadeb25581..00000000000 --- a/target/nextflow/velocity/velocyto/nextflow_schema.json +++ /dev/null @@ -1,86 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "velocyto", - "description": "Runs the velocity analysis on a BAM file, outputting a loom file.", - "type": "object", - "definitions": { - "arguments" : { - "title": "Arguments", - "type": "object", - "description": "No description", - "properties": { - - "input": { - "type": "string", - "description": "Type: `file`, required. Path to BAM file", - "help_text": "Type: `file`, required. Path to BAM file" - }, - - "transcriptome": { - "type": "string", - "description": "Type: `file`, required. Path to GTF file", - "help_text": "Type: `file`, required. Path to GTF file" - }, - - "barcode": { - "type": "string", - "description": "Type: `file`. Valid barcodes file, to filter the bam", - "help_text": "Type: `file`. Valid barcodes file, to filter the bam. If --bcfile is not specified all the cell barcodes will be included.\nCell barcodes should be specified in the bcfile as the \u0027CB\u0027 tag for each read\n" - }, - - "without_umi": { - "type": "boolean", - "description": "Type: `boolean_true`, default: `false`. foo", - "help_text": "Type: `boolean_true`, default: `false`. foo", - "default": "False" - }, - - "output": { - "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.output`. Velocyto loom file", - "help_text": "Type: `file`, required, default: `$id.$key.output.output`. Velocyto loom file", - "default": "$id.$key.output.output" - }, - - "logic": { - "type": "string", - "description": "Type: `string`, default: `Default`, choices: ``Default`, `Permissive10X`, `Intermediate10X`, `ValidatedIntrons10X`, `Stricter10X`, `ObservedSpanning10X`, `Discordant10X`, `SmartSeq2``. The logic to use for the filtering", - "help_text": "Type: `string`, default: `Default`, choices: ``Default`, `Permissive10X`, `Intermediate10X`, `ValidatedIntrons10X`, `Stricter10X`, `ObservedSpanning10X`, `Discordant10X`, `SmartSeq2``. The logic to use for the filtering.", - "enum": ["Default", "Permissive10X", "Intermediate10X", "ValidatedIntrons10X", "Stricter10X", "ObservedSpanning10X", "Discordant10X", "SmartSeq2"] - , - "default": "Default" - } - - } - }, - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - "publish_dir": { - "type": "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - }, - - "param_list": { - "type": "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - } - - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/arguments" - }, - { - "$ref": "#/definitions/nextflow input-output arguments" - } - ] -} From a075b9f384e200b357c4c85801062a980ddb3383 Mon Sep 17 00:00:00 2001 From: DriesSchaumont <5946712+DriesSchaumont@users.noreply.github.com> Date: Wed, 31 Jan 2024 10:06:28 +0100 Subject: [PATCH 3/5] update CI --- .github/workflows/create-documentation-pr.yml | 14 +- .github/workflows/integration-test.yml | 163 +++------- .github/workflows/main-build.yml | 296 ++++++++++++++++-- .github/workflows/release-build-viash-hub.yml | 46 +-- .github/workflows/release-build.yml | 43 +-- .github/workflows/viash-test.yml | 15 +- 6 files changed, 367 insertions(+), 210 deletions(-) diff --git a/.github/workflows/create-documentation-pr.yml b/.github/workflows/create-documentation-pr.yml index f3ef7785861..272ee8fc000 100644 --- a/.github/workflows/create-documentation-pr.yml +++ b/.github/workflows/create-documentation-pr.yml @@ -22,20 +22,20 @@ jobs: steps: - uses: actions/checkout@v4 - - uses: viash-io/viash-actions/setup@v4 + - uses: viash-io/viash-actions/setup@v5 - id: ns_list_components - uses: viash-io/viash-actions/ns-list@v4 + uses: viash-io/viash-actions/ns-list@v5 with: platform: docker - src: src format: json + query_namespace: ^(?!workflows) - id: ns_list_workflows - uses: viash-io/viash-actions/ns-list@v4 + uses: viash-io/viash-actions/ns-list@v5 with: - src: workflows format: json + query_namespace: ^workflows - id: set_matrix run: | @@ -71,7 +71,7 @@ jobs: path: website token: ${{ secrets.GTHB_PAT }} - - uses: viash-io/viash-actions/setup@v4 + - uses: viash-io/viash-actions/setup@v5 - name: Get tag to use id: get_tag @@ -80,7 +80,7 @@ jobs: TAG_OR_BRANCH_NAME=${INPUT_TAG:-"${{ github.ref_name }}"} echo "tag=$TAG_OR_BRANCH_NAME" >> $GITHUB_OUTPUT - - uses: viash-io/viash-actions/pro/generate-documentation-qmd@v4 + - uses: viash-io/viash-actions/pro/generate-documentation-qmd@v5 with: project_directory: openpipelines src: ./ diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml index 5258d7a6686..f19e50cd831 100644 --- a/.github/workflows/integration-test.yml +++ b/.github/workflows/integration-test.yml @@ -1,169 +1,88 @@ name: integration test +concurrency: + group: ${{ github.workflow }} + cancel-in-progress: false on: workflow_dispatch: + inputs: + push_containers: + type: boolean + required: false + default: true + description: Build docker images and push them to the registry schedule: - cron: '33 2 * * *' jobs: - # phase 1 - list: + # Build and create containers + build: + uses: ./.github/workflows/main-build.yml + with: + push_containers: ${{ github.event_name == 'schedule' || inputs.push_containers }} + version: 'integration_build' + target_tag: 'integration_build' + deploy_to_viash_hub: false + deploy_branch: 'integration_build' + secrets: inherit + + # Synchronize S3 Bucket and create cache for per-component runs + sync_s3: env: s3_bucket: s3://openpipelines-data/ runs-on: ubuntu-latest outputs: - component_matrix: ${{ steps.set_matrix.outputs.components }} - workflow_matrix: ${{ steps.set_matrix.outputs.workflows }} cache_key: ${{ steps.cache.outputs.cache_key }} steps: - - uses: actions/checkout@v4 - - - uses: viash-io/viash-actions/setup@v4 - - - uses: viash-io/viash-actions/project/sync-and-cache-s3@v4 - id: cache + - uses: viash-io/viash-actions/project/sync-and-cache-s3@v5 + id: cache with: s3_bucket: $s3_bucket dest_path: resources_test cache_key_prefix: resources_test__ - - name: Remove target folder from .gitignore - run: | - # allow publishing the target folder - sed -i '/^\/target\/$/d' .gitignore - - - uses: viash-io/viash-actions/ns-build@v4 - with: - config_mod: .functionality.version := 'integration_build' - parallel: true - - - name: Deploy to target branch - uses: peaceiris/actions-gh-pages@v3 - with: - github_token: ${{ secrets.GITHUB_TOKEN }} - publish_dir: . - publish_branch: integration_build - exclude_assets: '' - - - id: ns_list_components - uses: viash-io/viash-actions/ns-list@v4 - with: - platform: docker - src: src - format: json - - - id: ns_list_workflows - uses: viash-io/viash-actions/ns-list@v4 - with: - src: workflows - format: json - - - id: set_matrix - run: | - echo "components=$(jq -c '[ .[] | - { - "name": (.functionality.namespace + (.platforms | map(select(.type == "docker"))[0].namespace_separator) + .functionality.name), - "config": .info.config, - "dir": .info.config | capture("^(?.*\/)").dir - } - ]' ${{ steps.ns_list_components.outputs.output_file }} )" >> $GITHUB_OUTPUT - - echo "workflows=$(jq -c '[ .[] | . as $config | (.functionality.test_resources // [])[] | select(.type == "nextflow_script", .entrypoint) | - { - "name": ($config.functionality.namespace + "/" + $config.functionality.name), - "main_script": (($config.info.config | capture("^(?.*\/)").dir) + "/" + .path), - "entry": .entrypoint, - "config": $config.info.config - } - ] | unique' ${{ steps.ns_list_workflows.outputs.output_file }} )" >> $GITHUB_OUTPUT - - # phase 2 - build: - needs: list - - runs-on: ubuntu-latest - - strategy: - fail-fast: false - matrix: - component: ${{ fromJson(needs.list.outputs.component_matrix) }} - - steps: - # Remove unnecessary files to free up space. Otherwise, we get 'no space left on device.' - - uses: data-intuitive/reclaim-the-bytes@v2 - - - uses: actions/checkout@v4 - - - uses: viash-io/viash-actions/setup@v4 - - - name: Build container - uses: viash-io/viash-actions/ns-build@v4 - with: - config_mod: .functionality.version := 'integration_build' - setup: build - src: ${{ matrix.component.dir }} - - - name: Login to container registry - uses: docker/login-action@v3 - with: - registry: ghcr.io - username: ${{ secrets.GTHB_USER }} - password: ${{ secrets.GTHB_PAT }} - - - name: Push container - uses: viash-io/viash-actions/ns-build@v4 - with: - config_mod: .functionality.version := 'integration_build' - platform: docker - src: ${{ matrix.component.dir }} - setup: push - - ################################### # phase 3 integration_test: - needs: [ build, list ] - if: "${{ needs.list.outputs.workflow_matrix != '[]' }}" - runs-on: ubuntu-latest + needs: [ build, sync_s3 ] + if: "${{ needs.build.outputs.workflow_matrix != '[]' }}" strategy: fail-fast: false matrix: - component: ${{ fromJson(needs.list.outputs.workflow_matrix) }} + component: ${{ fromJson(needs.build.outputs.workflow_matrix) }} steps: # Remove unnecessary files to free up space. Otherwise, we get 'no space left on device.' - uses: data-intuitive/reclaim-the-bytes@v2 - - - uses: actions/checkout@v4 + + - name: Keep symlinks as-is + run: | + git config --global core.symlinks true - - uses: viash-io/viash-actions/setup@v4 + - uses: actions/checkout@v4 + with: + ref: 'integration_build' - - uses: nf-core/setup-nextflow@v1.3.0 + - uses: viash-io/viash-actions/setup@v5 - # build target dir - # use containers from integration_build branch, hopefully these are available - - name: Build target dir - uses: viash-io/viash-actions/ns-build@v4 - with: - config_mod: ".functionality.version := 'integration_build'" - parallel: true + - uses: nf-core/setup-nextflow@v1.5.0 # use cache - name: Cache resources data - uses: actions/cache@v3 + uses: actions/cache@v4 timeout-minutes: 5 with: path: resources_test - key: ${{ needs.list.outputs.cache_key }} + key: ${{ needs.sync_s3.outputs.cache_key }} fail-on-cache-miss: true - name: Remove unused test resources to save space shell: bash run: | - readarray -t resources < <(viash config view --format json "${{ matrix.component.config }}" | jq -r -c '(.info.config | capture("^(?.*\/)").dir) as $dir | .functionality.test_resources | map(select(.type == "file")) | map($dir + .path) | unique | .[]') + readarray -t resources < <(viash config view --format json "${{ matrix.component.config }}" -c 'del(.functionality.dependencies)' | jq -r -c '(.info.config | capture("^(?.*\/)").dir) as $dir | .functionality.test_resources | map(select(.type == "file")) | map($dir + .path) | unique | .[]') to_not_remove=() for resource in "${resources[@]}"; do if [[ $resource == *"resources_test"* ]]; then @@ -178,7 +97,9 @@ jobs: unset 'to_not_remove[${#to_not_remove[@]}-1]' to_not_remove+=( "(" "${to_not_remove[@]}" ")" "-prune" "-o") fi + echo "Not removing ${to_not_remove[@]}" find ./resources_test/ "${to_not_remove[@]}" -type f -exec rm {} + + tree ./resources_test/ - name: Run integration test timeout-minutes: 60 @@ -189,4 +110,4 @@ jobs: -main-script "${{ matrix.component.main_script }}" \ -entry "${{ matrix.component.entry }}" \ -profile docker,mount_temp,no_publish \ - -c workflows/utils/labels_ci.config + -c src/workflows/utils/labels_ci.config diff --git a/.github/workflows/main-build.yml b/.github/workflows/main-build.yml index 536e8743ed6..d679dab8651 100644 --- a/.github/workflows/main-build.yml +++ b/.github/workflows/main-build.yml @@ -1,70 +1,201 @@ -name: main build +name: Build concurrency: - group: main_build + group: ${{ github.workflow }}-${{ github.event.inputs.deploy_branch && format('{0}_build', github.ref_name) || github.event.inputs.deploy_branch }} cancel-in-progress: true on: + workflow_dispatch: + inputs: + push_containers: + type: boolean + required: false + default: false + description: Build docker images and push them to the registry + version: + type: string + required: false + description: | + Version to tag the build components with (e.i functionality.version). + Defaults to name of the branch that triggered the workflow, suffixed by "_build". + target_tag: + type: string + required: false + default: main_build + description: | + Version tag of containers to use. Is `main_build` by default. + Can be used in combination with 'push_containers' to re-use existing docker images + or set the tag for new builds. + deploy_to_viash_hub: + type: boolean + required: false + default: false + description: Also build packages and docker images for viash-hub.com and push them. + + # when used as a subworkflow + workflow_call: + inputs: + push_containers: + type: boolean + required: false + default: false + description: push the containers to the registry + version: + type: string + required: false + description: | + Version to tag the build components with (e.i functionality.version). + Defaults to name of the branch that triggered the workflow, suffixed by "_build". + target_tag: + type: string + required: false + default: main_build + description: Version tag of existing containers to use. Is `main_build` by default. + deploy_branch: + type: string + required: false + description: | + Branch to deploy the build to. Defaults to name of the branch + that triggered the workflow, suffixed by "_build". + deploy_to_viash_hub: + type: boolean + required: false + default: false + description: Also build packages and docker images for viash-hub.com and push them. + outputs: + component_matrix: + description: "A JSON object that can be used to populate a github actions matrix for component jobs." + value: ${{ jobs.build_and_deploy_target_folder.outputs.component_matrix }} + workflow_matrix: + description: "A JSON object that can be used to populate a github actions matrix for workflow jobs." + value: ${{ jobs.build_and_deploy_target_folder.outputs.workflow_matrix}} + secrets: + VIASHHUB_USER: + required: true + VIASHHUB_PAT: + required: true + GTHB_USER: + required: true + GTHB_PAT: + required: true push: branches: [ 'main' ] + jobs: # phase 1 - list: + build_and_deploy_target_folder: + name: "Build and push target folder" runs-on: ubuntu-latest outputs: - component_matrix: ${{ steps.set_matrix.outputs.matrix }} - cache_key: ${{ steps.cache.outputs.cache_key }} + component_matrix: ${{ steps.set_matrix.outputs.components }} + workflow_matrix: ${{ steps.set_matrix.outputs.workflows }} + + env: + DEPLOY_BRANCH: ${{ !inputs.deploy_branch && format('{0}_build', github.ref_name) || inputs.deploy_branch }} steps: + - name: Keep symlinks as-is + run: | + git config --global core.symlinks true + - uses: actions/checkout@v4 + if: ${{ inputs.deploy_to_viash_hub == 'true' }} + with: + fetch-depth: 0 + + - name: Push ref to Viash-hub + if: ${{ inputs.deploy_to_viash_hub == 'true' }} + run: | + git remote add viash-hub https://x-access-token:${{ secrets.VIASHHUB_PAT }}@viash-hub.com/openpipelines-bio/openpipeline.git + git push -f -u viash-hub ${{ github.ref_name }} + + - name: Branch to checkout (use existing target branch if it exists) + id: get_checkout_branch + run: | + if ! git ls-remote --heads --exit-code https://github.com/openpipelines-bio/openpipeline.git "$DEPLOY_BRANCH" > /dev/null; then + echo "Remote branch does not exist, fetching current branch and building on top of it" + echo "checkout_branch=${{ github.ref_name }}" >> "$GITHUB_OUTPUT" + else + echo "Remote branch exists, checking out existing branch" + echo "checkout_branch=$DEPLOY_BRANCH" >> "$GITHUB_OUTPUT" + fi + + - uses: actions/checkout@v4 + with: + ref: ${{ steps.get_checkout_branch.outputs.checkout_branch }} + fetch-depth: 0 - - uses: viash-io/viash-actions/setup@v4 + - name: Fetch changes from ${{github.ref_name}} + run: | + git fetch origin ${{github.ref_name}} + git checkout -f --no-overlay origin/${{github.ref_name}} -- '.' + + - uses: viash-io/viash-actions/setup@v5 - name: Remove target folder from .gitignore run: | # allow publishing the target folder sed -i '/^\/target\/$/d' .gitignore - - uses: viash-io/viash-actions/ns-build@v4 + - uses: viash-io/viash-actions/ns-build@v5 with: - config_mod: .functionality.version := 'main_build' + config_mod: | + .functionality.version := "${{ inputs.version || format('{0}_build', github.ref_name) }}" + .platforms[.type == 'docker'].target_tag := '${{ github.event_name == 'push' && 'main_build' || inputs.target_tag }}' parallel: true - + query: ^(?!workflows) + + - uses: viash-io/viash-actions/ns-build@v5 + with: + config_mod: .functionality.version := "${{ inputs.version || format('{0}_build', github.ref_name) }}" + parallel: true + query: ^workflows + - name: Build nextflow schemas - uses: viash-io/viash-actions/pro/build-nextflow-schemas@v4 + uses: viash-io/viash-actions/pro/build-nextflow-schemas@v5 with: - workflows: workflows components: src + workflows: src viash_pro_token: ${{ secrets.GTHB_PAT }} tools_version: 'main_build' - name: Build parameter files - uses: viash-io/viash-actions/pro/build-nextflow-params@v4 + uses: viash-io/viash-actions/pro/build-nextflow-params@v5 with: - workflows: workflows + workflows: src components: src viash_pro_token: ${{ secrets.GTHB_PAT }} tools_version: 'main_build' - name: Deploy to target branch - uses: peaceiris/actions-gh-pages@v3 + uses: stefanzweifel/git-auto-commit-action@v5 with: - github_token: ${{ secrets.GITHUB_TOKEN }} - publish_dir: . - publish_branch: main_build - exclude_assets: '' - - - id: ns_list - uses: viash-io/viash-actions/ns-list@v4 + create_branch: true + commit_message: "deploy: ${{github.sha}}" + skip_dirty_check: true + branch: ${{ !inputs.deploy_branch && format('{0}_build', github.ref_name) || inputs.deploy_branch }} + - name: "List components" + id: ns_list + uses: viash-io/viash-actions/ns-list@v5 with: platform: docker src: src format: json + query_namespace: ^(?!workflows) + + - name: "List workflows" + id: ns_list_workflows + uses: viash-io/viash-actions/ns-list@v5 + with: + src: src + format: json + query_namespace: ^workflows - - id: set_matrix + - name: "Parse JSON output from 'viash ns list' as input for matrix." + id: set_matrix run: | - echo "matrix=$(jq -c '[ .[] | + echo "components=$(jq -c '[ .[] | { "name": (.functionality.namespace + "/" + .functionality.name), "config": .info.config, @@ -72,16 +203,82 @@ jobs: } ]' ${{ steps.ns_list.outputs.output_file }} )" >> $GITHUB_OUTPUT + echo "workflows=$(jq -c '[ .[] | . as $config | (.functionality.test_resources // [])[] | select(.type == "nextflow_script", .entrypoint) | + { + "name": ($config.functionality.namespace + "/" + $config.functionality.name), + "main_script": (($config.info.config | capture("^(?.*\/)").dir) + "/" + .path), + "entry": .entrypoint, + "config": $config.info.config + } + ] | unique' ${{ steps.ns_list_workflows.outputs.output_file }} )" >> $GITHUB_OUTPUT + + - uses: actions/checkout@v4 + if: ${{ github.event_name == 'push' || inputs.deploy_to_viash_hub }} + with: + ref: ${{ !inputs.deploy_branch && format('{0}_build', github.ref_name) || inputs.deploy_branch }} + fetch-depth: 0 + clean: true + + - name: Set origin to viash-hub and commit on top of it. + if: ${{ github.event_name == 'push' || inputs.deploy_to_viash_hub }} + # This is needed because git-auto-commit-action uses origin by default + run: | + git remote add viash-hub https://x-access-token:${{ secrets.VIASHHUB_PAT }}@viash-hub.com/openpipelines-bio/openpipeline.git + if git ls-remote --heads --exit-code https://viash-hub.com/openpipelines-bio/openpipeline.git ${{ github.ref_name }}_build > /dev/null; then + git fetch viash-hub ${{ !inputs.deploy_branch && format('{0}_build', github.ref_name) || inputs.deploy_branch }} + git reset --hard viash-hub/${{ !inputs.deploy_branch && format('{0}_build', github.ref_name) || inputs.deploy_branch }} + fi + git checkout -f --no-overlay origin/${{github.ref_name}} -- '.' + git remote set-url origin https://x-access-token:${{ secrets.VIASHHUB_PAT }}@viash-hub.com/openpipelines-bio/openpipeline.git + git remote rm viash-hub + + - name: Remove target folder from .gitignore + run: | + # allow publishing the target folder + sed -i '/^\/target\/$/d' .gitignore + + - uses: viash-io/viash-actions/ns-build@v5 + if: ${{ github.event_name == 'push' || inputs.deploy_to_viash_hub }} + with: + config_mod: | + .functionality.version := " ${{ !inputs.deploy_branch && format('{0}_build', github.ref_name) || inputs.deploy_branch }}" + .platforms[.type == 'docker'].target_tag := '${{ github.event_name == 'push' && 'main_build' || inputs.target_tag }}' + .platforms[.type == 'docker'].target_organization := 'openpipelines-bio/openpipeline' + .platforms[.type == 'docker'].target_registry := 'viash-hub.com:5050' + .platforms[.type == 'docker'].target_image_source := 'https://viash-hub.com/openpipelines-bio/openpipeline' + parallel: true + query: ^(?!workflows) + + - uses: viash-io/viash-actions/ns-build@v5 + if: ${{ github.event_name == 'push' || inputs.deploy_to_viash_hub }} + with: + config_mod: | + .functionality.version := "${{ inputs.version || format('{0}_build', github.ref_name) }}" + parallel: true + query: ^workflows + + - name: Deploy to target branch + if: ${{ github.event_name == 'push' || inputs.deploy_to_viash_hub }} + uses: stefanzweifel/git-auto-commit-action@v5 + with: + create_branch: true + commit_message: "deploy: ${{github.sha}}" + skip_dirty_check: true + branch: ${{ !inputs.deploy_branch && format('{0}_build', github.ref_name) || inputs.deploy_branch }} + skip_checkout: true + # phase 2 - build: - needs: list + build_and_deploy_docker_containers: + name: "Build and Deploy Docker Images" + needs: build_and_deploy_target_folder + if: ${{github.event_name == 'push' || inputs.push_containers }} runs-on: ubuntu-latest strategy: fail-fast: false matrix: - component: ${{ fromJson(needs.list.outputs.component_matrix) }} + component: ${{ fromJson(needs.build_and_deploy_target_folder.outputs.component_matrix) }} steps: # Remove unnecessary files to free up space. Otherwise, we get 'no space left on device.' @@ -89,12 +286,14 @@ jobs: - uses: actions/checkout@v4 - - uses: viash-io/viash-actions/setup@v4 + - uses: viash-io/viash-actions/setup@v5 - name: Build container - uses: viash-io/viash-actions/ns-build@v4 + uses: viash-io/viash-actions/ns-build@v5 with: - config_mod: .functionality.version := 'main_build' + config_mod: | + .functionality.version := "${{ inputs.version || format('{0}_build', github.ref_name) }}" + .platforms[.type == 'docker'].target_tag := '${{ github.event_name == 'push' && 'main_build' || inputs.target_tag }}' platform: docker src: ${{ matrix.component.dir }} setup: build @@ -107,9 +306,42 @@ jobs: password: ${{ secrets.GTHB_PAT }} - name: Push container - uses: viash-io/viash-actions/ns-build@v4 + uses: viash-io/viash-actions/ns-build@v5 + with: + config_mod: .functionality.version := "${{ inputs.version || format('{0}_build', github.ref_name) }}" + platform: docker + src: ${{ matrix.component.dir }} + setup: push + + - name: Login to Viash-Hub container registry + if: ${{ github.event_name == 'push' || inputs.deploy_to_viash_hub }} + uses: docker/login-action@v3 + with: + registry: viash-hub.com:5050 + username: ${{ secrets.VIASHHUB_USER }} + password: ${{ secrets.VIASHHUB_PAT }} + + - name: Update Docker settings + if: ${{ github.event_name == 'push' || inputs.deploy_to_viash_hub }} + run: | + sudo sed -i 's/ }/, \"max-concurrent-uploads\": 2 }/' /etc/docker/daemon.json + sudo systemctl restart docker + + - name: "Re-tag containers for viash-hub" + if: ${{ github.event_name == 'push' || inputs.deploy_to_viash_hub }} + run: | + viash ns exec -s ${{ matrix.component.dir }} --apply_platform -p docker \ + 'docker tag ghcr.io/openpipelines-bio/{namespace}_{functionality-name}:${{ github.event_name == 'push' && 'main_build' || inputs.target_tag }} viash-hub.com:5050/openpipelines-bio/openpipeline/{namespace}_{functionality-name}:${{ github.ref_name }}_build' + + - name: Push container to Viash-Hub + if: ${{ github.event_name == 'push' || inputs.deploy_to_viash_hub }} + uses: viash-io/viash-actions/ns-build@v5 with: - config_mod: .functionality.version := 'main_build' + config_mod: | + .functionality.version := "${{ inputs.version || format('{0}_build', github.ref_name) }}" + .platforms[.type == 'docker'].target_registry := 'viash-hub.com:5050' + .platforms[.type == 'docker'].target_organization := 'openpipelines-bio/openpipeline' + .platforms[.type == 'docker'].target_image_source := 'https://viash-hub.com/openpipelines-bio/openpipeline' platform: docker src: ${{ matrix.component.dir }} - setup: push \ No newline at end of file + setup: push diff --git a/.github/workflows/release-build-viash-hub.yml b/.github/workflows/release-build-viash-hub.yml index 382909746c9..76025f11315 100644 --- a/.github/workflows/release-build-viash-hub.yml +++ b/.github/workflows/release-build-viash-hub.yml @@ -27,9 +27,9 @@ jobs: # git remote add viash-hub https://x-access-token:${{ secrets.VIASHHUB_PAT }}@viash-hub.com/openpipelines-bio/openpipeline.git # git push -f viash-hub main - - uses: viash-io/viash-actions/setup@v4 + - uses: viash-io/viash-actions/setup@v5 - - uses: viash-io/viash-actions/project/sync-and-cache-s3@v4 + - uses: viash-io/viash-actions/project/sync-and-cache-s3@v5 id: cache with: s3_bucket: $s3_bucket @@ -41,7 +41,7 @@ jobs: # allow publishing the target folder sed -i '/^\/target\/$/d' .gitignore - - uses: viash-io/viash-actions/ns-build@v4 + - uses: viash-io/viash-actions/ns-build@v5 with: config_mod: | .functionality.version := '${{ github.event.inputs.version_tag }}' @@ -49,25 +49,27 @@ jobs: .platforms[.type == 'docker'].target_organization := 'openpipelines-bio/openpipeline' .platforms[.type == 'docker'].target_image_source := 'https://viash-hub.com/openpipelines-bio/openpipeline' parallel: true + query_namespace: ^(?!workflows) + - name: Build nextflow schemas - uses: viash-io/viash-actions/pro/build-nextflow-schemas@v4 + uses: viash-io/viash-actions/pro/build-nextflow-schemas@v5 with: - workflows: workflows + workflows: src components: src viash_pro_token: ${{ secrets.GTHB_PAT }} tools_version: 'main_build' - name: Build parameter files - uses: viash-io/viash-actions/pro/build-nextflow-params@v4 + uses: viash-io/viash-actions/pro/build-nextflow-params@v5 with: - workflows: workflows + workflows: src components: src viash_pro_token: ${{ secrets.GTHB_PAT }} tools_version: 'main_build' - name: Deploy build artifacts to Viash-Hub - uses: viash-io/viash-actions/viash-hub/deploy@v4 + uses: viash-io/viash-actions/viash-hub/deploy@v5 with: github_token: ${{ github.token }} viash_hub_token: ${{ secrets.VIASHHUB_PAT }} @@ -78,17 +80,17 @@ jobs: commit_message: "Deploy for release ${{ github.event.inputs.version_tag }} from ${{ github.sha }}" - id: ns_list_components - uses: viash-io/viash-actions/ns-list@v4 + uses: viash-io/viash-actions/ns-list@v5 with: platform: docker - src: src format: json + query_namespace: ^(?!workflows) - id: ns_list_workflows - uses: viash-io/viash-actions/ns-list@v4 + uses: viash-io/viash-actions/ns-list@v5 with: - src: workflows format: json + query_namespace: ^workflows - id: set_matrix run: | @@ -126,10 +128,10 @@ jobs: - uses: actions/checkout@v4 - - uses: viash-io/viash-actions/setup@v4 + - uses: viash-io/viash-actions/setup@v5 - name: Build container - uses: viash-io/viash-actions/ns-build@v4 + uses: viash-io/viash-actions/ns-build@v5 with: config_mod: | .functionality.version := '${{ github.event.inputs.version_tag }}' @@ -149,7 +151,7 @@ jobs: password: ${{ secrets.VIASHHUB_PAT }} - name: Push container - uses: viash-io/viash-actions/ns-build@v4 + uses: viash-io/viash-actions/ns-build@v5 with: config_mod: | .functionality.version := '${{ github.event.inputs.version_tag }}' @@ -179,14 +181,14 @@ jobs: - uses: actions/checkout@v4 - - uses: viash-io/viash-actions/setup@v4 + - uses: viash-io/viash-actions/setup@v5 - - uses: nf-core/setup-nextflow@v1.3.0 + - uses: nf-core/setup-nextflow@v1.5.0 # build target dir # use containers from release branch, hopefully these are available - name: Build target dir - uses: viash-io/viash-actions/ns-build@v4 + uses: viash-io/viash-actions/ns-build@v5 with: config_mod: | .functionality.version := '${{ github.event.inputs.version_tag }}' @@ -198,7 +200,7 @@ jobs: # use cache - name: Cache resources data - uses: actions/cache@v3 + uses: actions/cache@v4 timeout-minutes: 5 with: path: resources_test @@ -234,7 +236,7 @@ jobs: -main-script "${{ matrix.component.main_script }}" \ -entry ${{ matrix.component.entry }} \ -profile docker,mount_temp,no_publish \ - -c workflows/utils/labels_ci.config + -c src/workflows/utils/labels_ci.config ###################################3 # phase 4 @@ -254,12 +256,12 @@ jobs: - uses: actions/checkout@v4 - - uses: viash-io/viash-actions/setup@v4 + - uses: viash-io/viash-actions/setup@v5 # use cache - name: Cache resources data id: restore_cache - uses: actions/cache/restore@v3 + uses: actions/cache/restore@v4 env: SEGMENT_DOWNLOAD_TIMEOUT_MINS: 5 with: diff --git a/.github/workflows/release-build.yml b/.github/workflows/release-build.yml index a92d51c990f..555cfac9795 100644 --- a/.github/workflows/release-build.yml +++ b/.github/workflows/release-build.yml @@ -22,9 +22,9 @@ jobs: steps: - uses: actions/checkout@v4 - - uses: viash-io/viash-actions/setup@v4 + - uses: viash-io/viash-actions/setup@v5 - - uses: viash-io/viash-actions/project/sync-and-cache-s3@v4 + - uses: viash-io/viash-actions/project/sync-and-cache-s3@v5 id: cache with: s3_bucket: $s3_bucket @@ -36,23 +36,24 @@ jobs: # allow publishing the target folder sed -i '/^\/target\/$/d' .gitignore - - uses: viash-io/viash-actions/ns-build@v4 + - uses: viash-io/viash-actions/ns-build@v5 with: config_mod: ".functionality.version := '${{ github.event.inputs.version_tag }}'" parallel: true + query_namespace: ^(?!workflows) - name: Build nextflow schemas - uses: viash-io/viash-actions/pro/build-nextflow-schemas@v4 + uses: viash-io/viash-actions/pro/build-nextflow-schemas@v5 with: - workflows: workflows + workflows: src components: src viash_pro_token: ${{ secrets.GTHB_PAT }} tools_version: 'main_build' - name: Build parameter files - uses: viash-io/viash-actions/pro/build-nextflow-params@v4 + uses: viash-io/viash-actions/pro/build-nextflow-params@v5 with: - workflows: workflows + workflows: src components: src viash_pro_token: ${{ secrets.GTHB_PAT }} tools_version: 'main_build' @@ -68,17 +69,17 @@ jobs: - id: ns_list_components - uses: viash-io/viash-actions/ns-list@v4 + uses: viash-io/viash-actions/ns-list@v5 with: platform: docker - src: src format: json + query_namespace: ^(?!workflows) - id: ns_list_workflows - uses: viash-io/viash-actions/ns-list@v4 + uses: viash-io/viash-actions/ns-list@v5 with: - src: workflows format: json + query_namespace: ^workflows - id: set_matrix run: | @@ -116,10 +117,10 @@ jobs: - uses: actions/checkout@v4 - - uses: viash-io/viash-actions/setup@v4 + - uses: viash-io/viash-actions/setup@v5 - name: Build container - uses: viash-io/viash-actions/ns-build@v4 + uses: viash-io/viash-actions/ns-build@v5 with: config_mod: .functionality.version := '${{ github.event.inputs.version_tag }}' platform: docker @@ -135,7 +136,7 @@ jobs: password: ${{ secrets.GTHB_PAT }} - name: Push container - uses: viash-io/viash-actions/ns-build@v4 + uses: viash-io/viash-actions/ns-build@v5 with: config_mod: .functionality.version := '${{ github.event.inputs.version_tag }}' platform: docker @@ -161,14 +162,14 @@ jobs: - uses: actions/checkout@v4 - - uses: viash-io/viash-actions/setup@v4 + - uses: viash-io/viash-actions/setup@v5 - - uses: nf-core/setup-nextflow@v1.3.0 + - uses: nf-core/setup-nextflow@v1.5.0 # build target dir # use containers from release branch, hopefully these are available - name: Build target dir - uses: viash-io/viash-actions/ns-build@v4 + uses: viash-io/viash-actions/ns-build@v5 with: config_mod: ".functionality.version := '${{ github.event.inputs.version_tag }}'" parallel: true @@ -176,7 +177,7 @@ jobs: # use cache - name: Cache resources data - uses: actions/cache@v3 + uses: actions/cache@v4 timeout-minutes: 5 with: path: resources_test @@ -212,7 +213,7 @@ jobs: -main-script "${{ matrix.component.main_script }}" \ -entry ${{ matrix.component.entry }} \ -profile docker,mount_temp,no_publish \ - -c workflows/utils/labels_ci.config + -c src/workflows/utils/labels_ci.config ###################################3 # phase 4 @@ -232,12 +233,12 @@ jobs: - uses: actions/checkout@v4 - - uses: viash-io/viash-actions/setup@v4 + - uses: viash-io/viash-actions/setup@v5 # use cache - name: Cache resources data id: restore_cache - uses: actions/cache/restore@v3 + uses: actions/cache/restore@v4 env: SEGMENT_DOWNLOAD_TIMEOUT_MINS: 5 with: diff --git a/.github/workflows/viash-test.yml b/.github/workflows/viash-test.yml index 369a51db013..6aba6802743 100644 --- a/.github/workflows/viash-test.yml +++ b/.github/workflows/viash-test.yml @@ -41,13 +41,13 @@ jobs: with: fetch-depth: 0 - - uses: viash-io/viash-actions/setup@v4 + - uses: viash-io/viash-actions/setup@v5 - name: Check if all config can be parsed if there is no unicode support run: | LANG=C viash ns list > /dev/null - - uses: viash-io/viash-actions/project/sync-and-cache-s3@v4 + - uses: viash-io/viash-actions/project/sync-and-cache-s3@v5 id: cache with: s3_bucket: $s3_bucket @@ -56,19 +56,20 @@ jobs: - name: Get changed files id: changed-files - uses: tj-actions/changed-files@v39 + uses: tj-actions/changed-files@v42 with: separator: ";" diff_relative: true - id: ns_list - uses: viash-io/viash-actions/ns-list@v4 + uses: viash-io/viash-actions/ns-list@v5 with: platform: docker format: json + query_namespace: ^(?!workflows) - id: ns_list_filtered - uses: viash-io/viash-actions/project/detect-changed-components@v4 + uses: viash-io/viash-actions/project/detect-changed-components@v5 with: input_file: "${{ steps.ns_list.outputs.output_file }}" @@ -99,12 +100,12 @@ jobs: - uses: actions/checkout@v4 - - uses: viash-io/viash-actions/setup@v4 + - uses: viash-io/viash-actions/setup@v5 # use cache - name: Cache resources data id: restore_cache - uses: actions/cache/restore@v3 + uses: actions/cache/restore@v4 env: SEGMENT_DOWNLOAD_TIMEOUT_MINS: 5 with: From 5a490e62e470b5c0652e281504d9994d170dec1b Mon Sep 17 00:00:00 2001 From: DriesSchaumont Date: Wed, 31 Jan 2024 09:10:42 +0000 Subject: [PATCH 4/5] deploy: a075b9f384e200b357c4c85801062a980ddb3383 --- target/docker/annotate/popv/.config.vsh.yaml | 346 + target/docker/annotate/popv/popv | 1401 ++++ target/docker/annotate/popv/setup_logger.py | 12 + target/docker/cluster/leiden/.config.vsh.yaml | 219 + target/docker/cluster/leiden/leiden | 1115 +++ target/docker/cluster/leiden/setup_logger.py | 12 + .../compress_h5mu/.config.vsh.yaml | 167 + .../compression/compress_h5mu/compress_h5mu | 1008 +++ .../compress_h5mu/compress_h5mu.py | 49 + .../compression/tar_extract/.config.vsh.yaml | 106 + .../compression/tar_extract/tar_extract | 978 +++ .../from_10xh5_to_h5mu/.config.vsh.yaml | 272 + .../from_10xh5_to_h5mu/from_10xh5_to_h5mu | 1117 +++ .../from_10xh5_to_h5mu/setup_logger.py | 12 + .../from_10xmtx_to_h5mu/.config.vsh.yaml | 166 + .../from_10xmtx_to_h5mu/from_10xmtx_to_h5mu | 989 +++ .../from_10xmtx_to_h5mu/setup_logger.py | 12 + .../.config.vsh.yaml | 159 + .../from_bd_to_10x_molecular_barcode_tags | 1030 +++ .../from_bdrhap_to_h5mu/.config.vsh.yaml | 181 + .../from_bdrhap_to_h5mu/from_bdrhap_to_h5mu | 1215 +++ .../.config.vsh.yaml | 190 + .../from_cellranger_multi_to_h5mu | 1166 +++ .../setup_logger.py | 12 + .../from_h5ad_to_h5mu/.config.vsh.yaml | 177 + .../from_h5ad_to_h5mu/from_h5ad_to_h5mu | 1056 +++ .../convert/from_h5ad_to_h5mu/setup_logger.py | 12 + .../from_h5mu_to_h5ad/.config.vsh.yaml | 182 + .../from_h5mu_to_h5ad/from_h5mu_to_h5ad | 1009 +++ .../convert/from_h5mu_to_h5ad/setup_logger.py | 12 + .../convert/velocyto_to_h5mu/.config.vsh.yaml | 255 + .../convert/velocyto_to_h5mu/velocyto_to_h5mu | 1086 +++ .../.config.vsh.yaml | 637 ++ .../cellbender_remove_background | 2153 ++++++ .../setup_logger.py | 12 + .../.config.vsh.yaml | 406 + .../cellbender_remove_background_v0_2 | 1629 +++++ .../helper.py | 143 + .../setup_logger.py | 12 + .../docker/dataflow/concat/.config.vsh.yaml | 222 + target/docker/dataflow/concat/concat | 1386 ++++ target/docker/dataflow/concat/setup_logger.py | 12 + target/docker/dataflow/merge/.config.vsh.yaml | 175 + target/docker/dataflow/merge/merge | 1051 +++ target/docker/dataflow/merge/setup_logger.py | 12 + .../split_modalities/.config.vsh.yaml | 214 + .../dataflow/split_modalities/setup_logger.py | 12 + .../split_modalities/split_modalities | 1065 +++ .../docker/demux/bcl2fastq/.config.vsh.yaml | 169 + target/docker/demux/bcl2fastq/bcl2fastq | 1028 +++ .../docker/demux/bcl_convert/.config.vsh.yaml | 189 + target/docker/demux/bcl_convert/bcl_convert | 1033 +++ .../demux/cellranger_mkfastq/.config.vsh.yaml | 207 + .../cellranger_mkfastq/cellranger_mkfastq | 1026 +++ .../demux/cellranger_mkfastq/setup_logger.py | 12 + target/docker/dimred/pca/.config.vsh.yaml | 253 + target/docker/dimred/pca/pca | 1188 +++ target/docker/dimred/pca/setup_logger.py | 12 + target/docker/dimred/umap/.config.vsh.yaml | 312 + target/docker/dimred/umap/setup_logger.py | 12 + target/docker/dimred/umap/umap | 1306 ++++ .../download/download_file/.config.vsh.yaml | 138 + .../download/download_file/download_file | 931 +++ .../sync_test_resources/.config.vsh.yaml | 170 + .../sync_test_resources/sync_test_resources | 1018 +++ .../docker/files/make_params/.config.vsh.yaml | 220 + target/docker/files/make_params/make_params | 1100 +++ .../filter/delimit_fraction/.config.vsh.yaml | 241 + .../filter/delimit_fraction/delimit_fraction | 1207 +++ .../filter/delimit_fraction/setup_logger.py | 12 + .../docker/filter/do_filter/.config.vsh.yaml | 202 + target/docker/filter/do_filter/do_filter | 1056 +++ .../docker/filter/do_filter/setup_logger.py | 12 + .../filter_with_counts/.config.vsh.yaml | 295 + .../filter_with_counts/filter_with_counts | 1241 ++++ .../filter/filter_with_counts/setup_logger.py | 12 + .../filter/filter_with_hvg/.config.vsh.yaml | 352 + .../filter/filter_with_hvg/filter_with_hvg | 1407 ++++ .../filter/filter_with_hvg/setup_logger.py | 12 + .../filter_with_scrublet/.config.vsh.yaml | 304 + .../filter_with_scrublet/filter_with_scrublet | 1260 ++++ .../filter_with_scrublet/setup_logger.py | 12 + .../filter/remove_modality/.config.vsh.yaml | 171 + .../filter/remove_modality/remove_modality | 972 +++ .../filter/subset_h5mu/.config.vsh.yaml | 187 + .../docker/filter/subset_h5mu/setup_logger.py | 12 + target/docker/filter/subset_h5mu/subset_h5mu | 994 +++ .../integrate/harmonypy/.config.vsh.yaml | 240 + target/docker/integrate/harmonypy/harmonypy | 1099 +++ .../integrate/scanorama/.config.vsh.yaml | 283 + target/docker/integrate/scanorama/scanorama | 1187 +++ .../integrate/scarches/.config.vsh.yaml | 331 + target/docker/integrate/scarches/scarches | 1568 ++++ .../docker/integrate/scarches/setup_logger.py | 12 + target/docker/integrate/scvi/.config.vsh.yaml | 591 ++ target/docker/integrate/scvi/scvi | 1912 +++++ target/docker/integrate/scvi/subset_vars.py | 16 + .../docker/integrate/totalvi/.config.vsh.yaml | 348 + .../docker/integrate/totalvi/setup_logger.py | 12 + target/docker/integrate/totalvi/totalvi | 1479 ++++ .../run_cellxgene/.config.vsh.yaml | 83 + .../interactive/run_cellxgene/run_cellxgene | 901 +++ .../run_cirrocumulus/.config.vsh.yaml | 85 + .../run_cirrocumulus/run_cirrocumulus | 901 +++ .../docker/interpret/lianapy/.config.vsh.yaml | 313 + target/docker/interpret/lianapy/lianapy | 1259 ++++ .../labels_transfer/knn/.config.vsh.yaml | 379 + target/docker/labels_transfer/knn/helper.py | 32 + target/docker/labels_transfer/knn/knn | 1258 ++++ .../labels_transfer/knn/setup_logger.py | 12 + .../labels_transfer/xgboost/.config.vsh.yaml | 594 ++ .../docker/labels_transfer/xgboost/helper.py | 32 + .../labels_transfer/xgboost/setup_logger.py | 12 + target/docker/labels_transfer/xgboost/xgboost | 2013 +++++ .../mapping/bd_rhapsody/.config.vsh.yaml | 417 ++ target/docker/mapping/bd_rhapsody/bd_rhapsody | 1975 +++++ .../rhapsody_targeted_1.10.1_nodocker.cwl | 5159 +++++++++++++ .../rhapsody_wta_1.10.1_nodocker.cwl | 5204 +++++++++++++ .../mapping/bd_rhapsody/setup_logger.py | 12 + .../mapping/cellranger_count/.config.vsh.yaml | 266 + .../mapping/cellranger_count/cellranger_count | 1206 +++ .../cellranger_count_split/.config.vsh.yaml | 218 + .../cellranger_count_split | 1090 +++ .../mapping/cellranger_multi/.config.vsh.yaml | 423 ++ .../mapping/cellranger_multi/cellranger_multi | 1681 +++++ .../mapping/cellranger_multi/setup_logger.py | 12 + .../mapping/htseq_count/.config.vsh.yaml | 418 ++ target/docker/mapping/htseq_count/htseq_count | 1608 ++++ .../htseq_count_to_h5mu/.config.vsh.yaml | 209 + .../htseq_count_to_h5mu/htseq_count_to_h5mu | 1151 +++ .../mapping/multi_star/.config.vsh.yaml | 3080 ++++++++ target/docker/mapping/multi_star/multi_star | 6362 ++++++++++++++++ .../multi_star_to_h5mu/.config.vsh.yaml | 179 + .../multi_star_to_h5mu/multi_star_to_h5mu | 1017 +++ .../mapping/samtools_sort/.config.vsh.yaml | 270 + .../mapping/samtools_sort/samtools_sort | 1185 +++ .../mapping/star_align/.config.vsh.yaml | 2535 +++++++ .../docker/mapping/star_align/setup_logger.py | 12 + target/docker/mapping/star_align/star_align | 5713 +++++++++++++++ .../mapping/star_align_v273a/.config.vsh.yaml | 2535 +++++++ .../mapping/star_align_v273a/setup_logger.py | 12 + .../mapping/star_align_v273a/star_align_v273a | 5713 +++++++++++++++ .../star_build_reference/.config.vsh.yaml | 190 + .../star_build_reference/star_build_reference | 1175 +++ .../docker/metadata/add_id/.config.vsh.yaml | 197 + target/docker/metadata/add_id/add_id | 1064 +++ target/docker/metadata/add_id/setup_logger.py | 12 + .../grep_annotation_column/.config.vsh.yaml | 244 + .../grep_annotation_column | 1148 +++ .../docker/metadata/join_csv/.config.vsh.yaml | 229 + target/docker/metadata/join_csv/join_csv | 1119 +++ .../docker/metadata/join_csv/setup_logger.py | 12 + .../metadata/join_uns_to_obs/.config.vsh.yaml | 171 + .../metadata/join_uns_to_obs/join_uns_to_obs | 1035 +++ .../metadata/join_uns_to_obs/setup_logger.py | 12 + .../move_obsm_to_obs/.config.vsh.yaml | 192 + .../move_obsm_to_obs/move_obsm_to_obs | 1054 +++ .../metadata/move_obsm_to_obs/setup_logger.py | 12 + .../docker/neighbors/bbknn/.config.vsh.yaml | 289 + target/docker/neighbors/bbknn/bbknn | 1184 +++ .../neighbors/find_neighbors/.config.vsh.yaml | 309 + .../neighbors/find_neighbors/find_neighbors | 1208 +++ .../neighbors/find_neighbors/setup_logger.py | 12 + .../filter_10xh5/.config.vsh.yaml | 195 + .../process_10xh5/filter_10xh5/filter_10xh5 | 1089 +++ .../qc/calculate_qc_metrics/.config.vsh.yaml | 235 + .../calculate_qc_metrics/calculate_qc_metrics | 1211 +++ .../qc/calculate_qc_metrics/setup_logger.py | 12 + target/docker/qc/fastqc/.config.vsh.yaml | 156 + target/docker/qc/fastqc/fastqc | 994 +++ target/docker/qc/multiqc/.config.vsh.yaml | 140 + target/docker/qc/multiqc/multiqc | 959 +++ .../query/cellxgene_census/.config.vsh.yaml | 260 + .../query/cellxgene_census/cellxgene_census | 1223 ++++ .../query/cellxgene_census/setup_logger.py | 12 + .../build_bdrhap_reference/.config.vsh.yaml | 186 + .../build_bdrhap_reference | 972 +++ .../.config.vsh.yaml | 187 + .../build_cellranger_reference | 977 +++ .../reference/make_reference/.config.vsh.yaml | 212 + .../reference/make_reference/make_reference | 1076 +++ target/docker/report/mermaid/.config.vsh.yaml | 185 + target/docker/report/mermaid/mermaid | 1029 +++ .../report/mermaid/puppeteer-config.json | 6 + .../docker/transfer/publish/.config.vsh.yaml | 125 + target/docker/transfer/publish/publish | 919 +++ target/docker/transform/clr/.config.vsh.yaml | 188 + target/docker/transform/clr/clr | 1005 +++ .../transform/delete_layer/.config.vsh.yaml | 196 + .../transform/delete_layer/compress_h5mu.py | 49 + .../transform/delete_layer/delete_layer | 1122 +++ .../transform/delete_layer/setup_logger.py | 12 + .../docker/transform/log1p/.config.vsh.yaml | 225 + target/docker/transform/log1p/log1p | 1081 +++ target/docker/transform/log1p/setup_logger.py | 12 + .../normalize_total/.config.vsh.yaml | 242 + .../transform/normalize_total/normalize_total | 1108 +++ .../transform/normalize_total/setup_logger.py | 12 + .../transform/regress_out/.config.vsh.yaml | 195 + .../docker/transform/regress_out/regress_out | 1039 +++ .../transform/regress_out/setup_logger.py | 12 + .../docker/transform/scale/.config.vsh.yaml | 205 + target/docker/transform/scale/scale | 1063 +++ target/docker/transform/scale/setup_logger.py | 12 + .../docker/velocity/scvelo/.config.vsh.yaml | 276 + target/docker/velocity/scvelo/scvelo | 1272 ++++ target/docker/velocity/scvelo/setup_logger.py | 12 + .../docker/velocity/velocyto/.config.vsh.yaml | 225 + target/docker/velocity/velocyto/velocyto | 1097 +++ .../compress_h5mu/.config.vsh.yaml | 167 + .../compression/compress_h5mu/compress_h5mu | 537 ++ .../compress_h5mu/compress_h5mu.py | 49 + .../compression/tar_extract/.config.vsh.yaml | 106 + .../compression/tar_extract/tar_extract | 514 ++ .../native/dataflow/concat/.config.vsh.yaml | 222 + target/native/dataflow/concat/concat | 898 +++ target/native/dataflow/concat/setup_logger.py | 12 + target/native/dataflow/merge/.config.vsh.yaml | 175 + target/native/dataflow/merge/merge | 563 ++ target/native/dataflow/merge/setup_logger.py | 12 + .../split_modalities/.config.vsh.yaml | 214 + .../dataflow/split_modalities/setup_logger.py | 12 + .../split_modalities/split_modalities | 586 ++ .../sync_test_resources/.config.vsh.yaml | 170 + .../sync_test_resources/sync_test_resources | 557 ++ .../integrate/scarches/.config.vsh.yaml | 331 + target/native/integrate/scarches/scarches | 1086 +++ .../native/integrate/scarches/setup_logger.py | 12 + .../native/integrate/totalvi/.config.vsh.yaml | 348 + .../native/integrate/totalvi/setup_logger.py | 12 + target/native/integrate/totalvi/totalvi | 985 +++ .../labels_transfer/knn/.config.vsh.yaml | 379 + target/native/labels_transfer/knn/helper.py | 32 + target/native/labels_transfer/knn/knn | 773 ++ .../labels_transfer/knn/setup_logger.py | 12 + .../labels_transfer/xgboost/.config.vsh.yaml | 594 ++ .../native/labels_transfer/xgboost/helper.py | 32 + .../labels_transfer/xgboost/setup_logger.py | 12 + target/native/labels_transfer/xgboost/xgboost | 1520 ++++ .../native/metadata/add_id/.config.vsh.yaml | 197 + target/native/metadata/add_id/add_id | 593 ++ target/native/metadata/add_id/setup_logger.py | 12 + .../grep_annotation_column/.config.vsh.yaml | 244 + .../grep_annotation_column | 677 ++ .../native/transform/scale/.config.vsh.yaml | 205 + target/native/transform/scale/scale | 592 ++ target/native/transform/scale/setup_logger.py | 12 + .../native/velocity/scvelo/.config.vsh.yaml | 276 + target/native/velocity/scvelo/scvelo | 801 ++ target/native/velocity/scvelo/setup_logger.py | 12 + .../native/velocity/velocyto/.config.vsh.yaml | 225 + target/native/velocity/velocyto/velocyto | 605 ++ .../nextflow/annotate/popv/.config.vsh.yaml | 346 + target/nextflow/annotate/popv/main.nf | 2958 ++++++++ target/nextflow/annotate/popv/nextflow.config | 108 + .../annotate/popv/nextflow_params.yaml | 25 + .../annotate/popv/nextflow_schema.json | 251 + target/nextflow/annotate/popv/setup_logger.py | 12 + .../nextflow/cluster/leiden/.config.vsh.yaml | 219 + target/nextflow/cluster/leiden/main.nf | 2631 +++++++ .../nextflow/cluster/leiden/nextflow.config | 108 + .../cluster/leiden/nextflow_params.yaml | 12 + .../cluster/leiden/nextflow_schema.json | 137 + .../nextflow/cluster/leiden/setup_logger.py | 12 + .../compress_h5mu/.config.vsh.yaml | 167 + .../compress_h5mu/compress_h5mu.py | 49 + .../compression/compress_h5mu/main.nf | 2596 +++++++ .../compression/compress_h5mu/nextflow.config | 108 + .../compress_h5mu/nextflow_params.yaml | 8 + .../compress_h5mu/nextflow_schema.json | 94 + .../from_10xh5_to_h5mu/.config.vsh.yaml | 272 + .../convert/from_10xh5_to_h5mu/main.nf | 2767 +++++++ .../from_10xh5_to_h5mu/nextflow.config | 108 + .../from_10xh5_to_h5mu/nextflow_params.yaml | 16 + .../from_10xh5_to_h5mu/nextflow_schema.json | 162 + .../from_10xh5_to_h5mu/setup_logger.py | 12 + .../from_10xmtx_to_h5mu/.config.vsh.yaml | 166 + .../convert/from_10xmtx_to_h5mu/main.nf | 2577 +++++++ .../from_10xmtx_to_h5mu/nextflow.config | 108 + .../from_10xmtx_to_h5mu/nextflow_params.yaml | 8 + .../from_10xmtx_to_h5mu/nextflow_schema.json | 93 + .../from_10xmtx_to_h5mu/setup_logger.py | 12 + .../.config.vsh.yaml | 159 + .../main.nf | 2586 +++++++ .../nextflow.config | 108 + .../nextflow_params.yaml | 9 + .../nextflow_schema.json | 102 + .../from_bdrhap_to_h5mu/.config.vsh.yaml | 181 + .../convert/from_bdrhap_to_h5mu/main.nf | 2801 +++++++ .../from_bdrhap_to_h5mu/nextflow.config | 108 + .../from_bdrhap_to_h5mu/nextflow_params.yaml | 11 + .../from_bdrhap_to_h5mu/nextflow_schema.json | 117 + .../.config.vsh.yaml | 190 + .../from_cellranger_multi_to_h5mu/main.nf | 2744 +++++++ .../nextflow.config | 108 + .../nextflow_params.yaml | 9 + .../nextflow_schema.json | 104 + .../setup_logger.py | 12 + .../from_h5ad_to_h5mu/.config.vsh.yaml | 177 + .../convert/from_h5ad_to_h5mu/main.nf | 2596 +++++++ .../convert/from_h5ad_to_h5mu/nextflow.config | 108 + .../from_h5ad_to_h5mu/nextflow_params.yaml | 9 + .../from_h5ad_to_h5mu/nextflow_schema.json | 105 + .../convert/from_h5ad_to_h5mu/setup_logger.py | 12 + .../from_h5mu_to_h5ad/.config.vsh.yaml | 182 + .../convert/from_h5mu_to_h5ad/main.nf | 2592 +++++++ .../convert/from_h5mu_to_h5ad/nextflow.config | 108 + .../from_h5mu_to_h5ad/nextflow_params.yaml | 9 + .../from_h5mu_to_h5ad/nextflow_schema.json | 106 + .../convert/from_h5mu_to_h5ad/setup_logger.py | 12 + .../convert/velocyto_to_h5mu/.config.vsh.yaml | 255 + .../nextflow/convert/velocyto_to_h5mu/main.nf | 2693 +++++++ .../convert/velocyto_to_h5mu/nextflow.config | 108 + .../velocyto_to_h5mu/nextflow_params.yaml | 15 + .../velocyto_to_h5mu/nextflow_schema.json | 161 + .../.config.vsh.yaml | 637 ++ .../cellbender_remove_background/main.nf | 3212 ++++++++ .../nextflow.config | 107 + .../nextflow_params.yaml | 51 + .../nextflow_schema.json | 544 ++ .../setup_logger.py | 12 + .../.config.vsh.yaml | 406 + .../helper.py | 143 + .../cellbender_remove_background_v0_2/main.nf | 2946 ++++++++ .../nextflow.config | 107 + .../nextflow_params.yaml | 33 + .../nextflow_schema.json | 351 + .../setup_logger.py | 12 + .../nextflow/dataflow/concat/.config.vsh.yaml | 222 + target/nextflow/dataflow/concat/main.nf | 2911 ++++++++ .../nextflow/dataflow/concat/nextflow.config | 108 + .../dataflow/concat/nextflow_params.yaml | 11 + .../dataflow/concat/nextflow_schema.json | 127 + .../nextflow/dataflow/concat/setup_logger.py | 12 + .../nextflow/dataflow/merge/.config.vsh.yaml | 175 + target/nextflow/dataflow/merge/main.nf | 2614 +++++++ .../nextflow/dataflow/merge/nextflow.config | 108 + .../dataflow/merge/nextflow_params.yaml | 8 + .../dataflow/merge/nextflow_schema.json | 94 + .../nextflow/dataflow/merge/setup_logger.py | 12 + .../split_modalities/.config.vsh.yaml | 214 + .../dataflow/split_modalities/main.nf | 2655 +++++++ .../dataflow/split_modalities/nextflow.config | 108 + .../split_modalities/nextflow_params.yaml | 10 + .../split_modalities/nextflow_schema.json | 116 + .../dataflow/split_modalities/setup_logger.py | 12 + .../nextflow/demux/bcl2fastq/.config.vsh.yaml | 169 + target/nextflow/demux/bcl2fastq/main.nf | 2548 +++++++ .../nextflow/demux/bcl2fastq/nextflow.config | 108 + .../demux/bcl2fastq/nextflow_params.yaml | 10 + .../demux/bcl2fastq/nextflow_schema.json | 113 + .../demux/bcl_convert/.config.vsh.yaml | 189 + target/nextflow/demux/bcl_convert/main.nf | 2574 +++++++ .../demux/bcl_convert/nextflow.config | 108 + .../demux/bcl_convert/nextflow_params.yaml | 10 + .../demux/bcl_convert/nextflow_schema.json | 113 + .../demux/cellranger_mkfastq/.config.vsh.yaml | 207 + .../nextflow/demux/cellranger_mkfastq/main.nf | 2646 +++++++ .../demux/cellranger_mkfastq/nextflow.config | 108 + .../cellranger_mkfastq/nextflow_params.yaml | 9 + .../cellranger_mkfastq/nextflow_schema.json | 102 + .../demux/cellranger_mkfastq/setup_logger.py | 12 + target/nextflow/dimred/pca/.config.vsh.yaml | 253 + target/nextflow/dimred/pca/main.nf | 2718 +++++++ target/nextflow/dimred/pca/nextflow.config | 108 + .../nextflow/dimred/pca/nextflow_params.yaml | 16 + .../nextflow/dimred/pca/nextflow_schema.json | 178 + target/nextflow/dimred/pca/setup_logger.py | 12 + target/nextflow/dimred/umap/.config.vsh.yaml | 312 + target/nextflow/dimred/umap/main.nf | 2778 +++++++ target/nextflow/dimred/umap/nextflow.config | 108 + .../nextflow/dimred/umap/nextflow_params.yaml | 23 + .../nextflow/dimred/umap/nextflow_schema.json | 241 + target/nextflow/dimred/umap/setup_logger.py | 12 + .../download/download_file/.config.vsh.yaml | 138 + .../nextflow/download/download_file/main.nf | 2492 +++++++ .../download/download_file/nextflow.config | 108 + .../download_file/nextflow_params.yaml | 8 + .../download_file/nextflow_schema.json | 92 + .../sync_test_resources/.config.vsh.yaml | 170 + .../download/sync_test_resources/main.nf | 2554 +++++++ .../sync_test_resources/nextflow.config | 108 + .../sync_test_resources/nextflow_params.yaml | 11 + .../sync_test_resources/nextflow_schema.json | 125 + .../files/make_params/.config.vsh.yaml | 220 + target/nextflow/files/make_params/main.nf | 2663 +++++++ .../files/make_params/nextflow.config | 108 + .../files/make_params/nextflow_params.yaml | 13 + .../files/make_params/nextflow_schema.json | 145 + .../filter/delimit_fraction/.config.vsh.yaml | 241 + .../nextflow/filter/delimit_fraction/main.nf | 2713 +++++++ .../filter/delimit_fraction/nextflow.config | 108 + .../delimit_fraction/nextflow_params.yaml | 18 + .../delimit_fraction/nextflow_schema.json | 184 + .../filter/delimit_fraction/setup_logger.py | 12 + .../filter/do_filter/.config.vsh.yaml | 202 + target/nextflow/filter/do_filter/main.nf | 2634 +++++++ .../nextflow/filter/do_filter/nextflow.config | 108 + .../filter/do_filter/nextflow_params.yaml | 11 + .../filter/do_filter/nextflow_schema.json | 124 + .../nextflow/filter/do_filter/setup_logger.py | 12 + .../filter_with_counts/.config.vsh.yaml | 295 + .../filter/filter_with_counts/main.nf | 2796 +++++++ .../filter/filter_with_counts/nextflow.config | 108 + .../filter_with_counts/nextflow_params.yaml | 22 + .../filter_with_counts/nextflow_schema.json | 225 + .../filter/filter_with_counts/setup_logger.py | 12 + .../filter/filter_with_hvg/.config.vsh.yaml | 352 + .../nextflow/filter/filter_with_hvg/main.nf | 2856 ++++++++ .../filter/filter_with_hvg/nextflow.config | 108 + .../filter_with_hvg/nextflow_params.yaml | 22 + .../filter_with_hvg/nextflow_schema.json | 245 + .../filter/filter_with_hvg/setup_logger.py | 12 + .../filter_with_scrublet/.config.vsh.yaml | 304 + .../filter/filter_with_scrublet/main.nf | 2769 +++++++ .../filter_with_scrublet/nextflow.config | 108 + .../filter_with_scrublet/nextflow_params.yaml | 18 + .../filter_with_scrublet/nextflow_schema.json | 203 + .../filter_with_scrublet/setup_logger.py | 12 + .../filter/remove_modality/.config.vsh.yaml | 171 + .../nextflow/filter/remove_modality/main.nf | 2550 +++++++ .../filter/remove_modality/nextflow.config | 108 + .../remove_modality/nextflow_params.yaml | 9 + .../remove_modality/nextflow_schema.json | 103 + .../filter/subset_h5mu/.config.vsh.yaml | 187 + target/nextflow/filter/subset_h5mu/main.nf | 2575 +++++++ .../filter/subset_h5mu/nextflow.config | 108 + .../filter/subset_h5mu/nextflow_params.yaml | 10 + .../filter/subset_h5mu/nextflow_schema.json | 114 + .../filter/subset_h5mu/setup_logger.py | 12 + .../integrate/harmonypy/.config.vsh.yaml | 240 + target/nextflow/integrate/harmonypy/main.nf | 2646 +++++++ .../integrate/harmonypy/nextflow.config | 108 + .../integrate/harmonypy/nextflow_params.yaml | 13 + .../integrate/harmonypy/nextflow_schema.json | 147 + .../integrate/scanorama/.config.vsh.yaml | 283 + target/nextflow/integrate/scanorama/main.nf | 2702 +++++++ .../integrate/scanorama/nextflow.config | 108 + .../integrate/scanorama/nextflow_params.yaml | 17 + .../integrate/scanorama/nextflow_schema.json | 192 + .../integrate/scarches/.config.vsh.yaml | 331 + target/nextflow/integrate/scarches/main.nf | 2962 ++++++++ .../integrate/scarches/nextflow.config | 108 + .../integrate/scarches/nextflow_params.yaml | 27 + .../integrate/scarches/nextflow_schema.json | 277 + .../integrate/scarches/setup_logger.py | 12 + .../nextflow/integrate/scvi/.config.vsh.yaml | 591 ++ target/nextflow/integrate/scvi/main.nf | 3174 ++++++++ .../nextflow/integrate/scvi/nextflow.config | 108 + .../integrate/scvi/nextflow_params.yaml | 51 + .../integrate/scvi/nextflow_schema.json | 520 ++ target/nextflow/integrate/scvi/subset_vars.py | 16 + .../integrate/totalvi/.config.vsh.yaml | 348 + target/nextflow/integrate/totalvi/main.nf | 2923 ++++++++ .../integrate/totalvi/nextflow.config | 108 + .../integrate/totalvi/nextflow_params.yaml | 28 + .../integrate/totalvi/nextflow_schema.json | 292 + .../integrate/totalvi/setup_logger.py | 12 + .../interpret/lianapy/.config.vsh.yaml | 313 + target/nextflow/interpret/lianapy/main.nf | 2757 +++++++ .../interpret/lianapy/nextflow.config | 108 + .../interpret/lianapy/nextflow_params.yaml | 18 + .../interpret/lianapy/nextflow_schema.json | 207 + .../labels_transfer/knn/.config.vsh.yaml | 379 + target/nextflow/labels_transfer/knn/helper.py | 32 + target/nextflow/labels_transfer/knn/main.nf | 2921 ++++++++ .../labels_transfer/knn/nextflow.config | 108 + .../labels_transfer/knn/nextflow_params.yaml | 6 + .../labels_transfer/knn/nextflow_schema.json | 70 + .../labels_transfer/knn/setup_logger.py | 12 + .../labels_transfer/xgboost/.config.vsh.yaml | 594 ++ .../labels_transfer/xgboost/helper.py | 32 + .../nextflow/labels_transfer/xgboost/main.nf | 3422 +++++++++ .../labels_transfer/xgboost/nextflow.config | 108 + .../xgboost/nextflow_params.yaml | 24 + .../xgboost/nextflow_schema.json | 263 + .../labels_transfer/xgboost/setup_logger.py | 12 + .../mapping/bd_rhapsody/.config.vsh.yaml | 417 ++ target/nextflow/mapping/bd_rhapsody/main.nf | 3249 +++++++++ .../mapping/bd_rhapsody/nextflow.config | 108 + .../mapping/bd_rhapsody/nextflow_params.yaml | 36 + .../mapping/bd_rhapsody/nextflow_schema.json | 348 + .../rhapsody_targeted_1.10.1_nodocker.cwl | 5159 +++++++++++++ .../rhapsody_wta_1.10.1_nodocker.cwl | 5204 +++++++++++++ .../mapping/bd_rhapsody/setup_logger.py | 12 + .../mapping/cellranger_count/.config.vsh.yaml | 266 + .../nextflow/mapping/cellranger_count/main.nf | 2745 +++++++ .../mapping/cellranger_count/nextflow.config | 108 + .../cellranger_count/nextflow_params.yaml | 17 + .../cellranger_count/nextflow_schema.json | 175 + .../cellranger_count_split/.config.vsh.yaml | 218 + .../mapping/cellranger_count_split/main.nf | 2633 +++++++ .../cellranger_count_split/nextflow.config | 108 + .../nextflow_params.yaml | 12 + .../nextflow_schema.json | 136 + .../mapping/cellranger_multi/.config.vsh.yaml | 423 ++ .../nextflow/mapping/cellranger_multi/main.nf | 3132 ++++++++ .../mapping/cellranger_multi/nextflow.config | 108 + .../cellranger_multi/nextflow_params.yaml | 34 + .../cellranger_multi/nextflow_schema.json | 328 + .../mapping/cellranger_multi/setup_logger.py | 12 + .../mapping/htseq_count/.config.vsh.yaml | 418 ++ target/nextflow/mapping/htseq_count/main.nf | 2978 ++++++++ .../mapping/htseq_count/nextflow.config | 108 + .../mapping/htseq_count/nextflow_params.yaml | 27 + .../mapping/htseq_count/nextflow_schema.json | 291 + .../htseq_count_to_h5mu/.config.vsh.yaml | 209 + .../mapping/htseq_count_to_h5mu/main.nf | 2710 +++++++ .../htseq_count_to_h5mu/nextflow.config | 108 + .../htseq_count_to_h5mu/nextflow_params.yaml | 12 + .../htseq_count_to_h5mu/nextflow_schema.json | 127 + .../mapping/multi_star/.config.vsh.yaml | 3080 ++++++++ target/nextflow/mapping/multi_star/main.nf | 6497 +++++++++++++++++ .../mapping/multi_star/nextflow.config | 108 + .../mapping/multi_star/nextflow_params.yaml | 16 + .../mapping/multi_star/nextflow_schema.json | 168 + .../multi_star_to_h5mu/.config.vsh.yaml | 179 + .../mapping/multi_star_to_h5mu/main.nf | 2625 +++++++ .../multi_star_to_h5mu/nextflow.config | 108 + .../multi_star_to_h5mu/nextflow_params.yaml | 8 + .../multi_star_to_h5mu/nextflow_schema.json | 93 + .../mapping/samtools_sort/.config.vsh.yaml | 270 + target/nextflow/mapping/samtools_sort/main.nf | 2740 +++++++ .../mapping/samtools_sort/nextflow.config | 108 + .../samtools_sort/nextflow_params.yaml | 19 + .../samtools_sort/nextflow_schema.json | 195 + .../mapping/star_align/.config.vsh.yaml | 2535 +++++++ target/nextflow/mapping/star_align/main.nf | 5287 ++++++++++++++ .../mapping/star_align/nextflow.config | 108 + .../mapping/star_align/nextflow_params.yaml | 8 + .../mapping/star_align/nextflow_schema.json | 91 + .../mapping/star_align/setup_logger.py | 12 + .../mapping/star_align_v273a/.config.vsh.yaml | 2535 +++++++ .../nextflow/mapping/star_align_v273a/main.nf | 5287 ++++++++++++++ .../mapping/star_align_v273a/nextflow.config | 108 + .../star_align_v273a/nextflow_params.yaml | 8 + .../star_align_v273a/nextflow_schema.json | 91 + .../mapping/star_align_v273a/setup_logger.py | 12 + .../star_build_reference/.config.vsh.yaml | 190 + .../mapping/star_build_reference/main.nf | 2686 +++++++ .../star_build_reference/nextflow.config | 108 + .../star_build_reference/nextflow_params.yaml | 11 + .../star_build_reference/nextflow_schema.json | 116 + .../nextflow/metadata/add_id/.config.vsh.yaml | 197 + target/nextflow/metadata/add_id/main.nf | 2631 +++++++ .../nextflow/metadata/add_id/nextflow.config | 108 + .../metadata/add_id/nextflow_params.yaml | 11 + .../metadata/add_id/nextflow_schema.json | 125 + .../nextflow/metadata/add_id/setup_logger.py | 12 + .../grep_annotation_column/.config.vsh.yaml | 244 + .../metadata/grep_annotation_column/main.nf | 2700 +++++++ .../grep_annotation_column/nextflow.config | 108 + .../nextflow_params.yaml | 18 + .../nextflow_schema.json | 183 + .../metadata/join_csv/.config.vsh.yaml | 229 + target/nextflow/metadata/join_csv/main.nf | 2670 +++++++ .../metadata/join_csv/nextflow.config | 108 + .../metadata/join_csv/nextflow_params.yaml | 17 + .../metadata/join_csv/nextflow_schema.json | 173 + .../metadata/join_csv/setup_logger.py | 12 + .../metadata/join_uns_to_obs/.config.vsh.yaml | 171 + .../nextflow/metadata/join_uns_to_obs/main.nf | 2577 +++++++ .../metadata/join_uns_to_obs/nextflow.config | 107 + .../join_uns_to_obs/nextflow_params.yaml | 10 + .../join_uns_to_obs/nextflow_schema.json | 114 + .../metadata/join_uns_to_obs/setup_logger.py | 12 + .../move_obsm_to_obs/.config.vsh.yaml | 192 + .../metadata/move_obsm_to_obs/main.nf | 2626 +++++++ .../metadata/move_obsm_to_obs/nextflow.config | 108 + .../move_obsm_to_obs/nextflow_params.yaml | 12 + .../move_obsm_to_obs/nextflow_schema.json | 128 + .../metadata/move_obsm_to_obs/setup_logger.py | 12 + .../nextflow/neighbors/bbknn/.config.vsh.yaml | 289 + target/nextflow/neighbors/bbknn/main.nf | 2706 +++++++ .../nextflow/neighbors/bbknn/nextflow.config | 108 + .../neighbors/bbknn/nextflow_params.yaml | 17 + .../neighbors/bbknn/nextflow_schema.json | 191 + .../neighbors/find_neighbors/.config.vsh.yaml | 309 + .../nextflow/neighbors/find_neighbors/main.nf | 2759 +++++++ .../neighbors/find_neighbors/nextflow.config | 108 + .../find_neighbors/nextflow_params.yaml | 16 + .../find_neighbors/nextflow_schema.json | 183 + .../neighbors/find_neighbors/setup_logger.py | 12 + .../filter_10xh5/.config.vsh.yaml | 195 + .../process_10xh5/filter_10xh5/main.nf | 2642 +++++++ .../filter_10xh5/nextflow.config | 108 + .../filter_10xh5/nextflow_params.yaml | 11 + .../filter_10xh5/nextflow_schema.json | 124 + .../qc/calculate_qc_metrics/.config.vsh.yaml | 235 + .../nextflow/qc/calculate_qc_metrics/main.nf | 2739 +++++++ .../qc/calculate_qc_metrics/nextflow.config | 108 + .../calculate_qc_metrics/nextflow_params.yaml | 15 + .../calculate_qc_metrics/nextflow_schema.json | 158 + .../qc/calculate_qc_metrics/setup_logger.py | 12 + target/nextflow/qc/fastqc/.config.vsh.yaml | 156 + target/nextflow/qc/fastqc/main.nf | 2512 +++++++ target/nextflow/qc/fastqc/nextflow.config | 107 + .../nextflow/qc/fastqc/nextflow_params.yaml | 9 + .../nextflow/qc/fastqc/nextflow_schema.json | 104 + target/nextflow/qc/multiqc/.config.vsh.yaml | 140 + target/nextflow/qc/multiqc/main.nf | 2493 +++++++ target/nextflow/qc/multiqc/nextflow.config | 107 + .../nextflow/qc/multiqc/nextflow_params.yaml | 7 + .../nextflow/qc/multiqc/nextflow_schema.json | 81 + .../query/cellxgene_census/.config.vsh.yaml | 260 + .../nextflow/query/cellxgene_census/main.nf | 2803 +++++++ .../query/cellxgene_census/nextflow.config | 108 + .../cellxgene_census/nextflow_params.yaml | 18 + .../cellxgene_census/nextflow_schema.json | 187 + .../query/cellxgene_census/setup_logger.py | 12 + .../build_bdrhap_reference/.config.vsh.yaml | 186 + .../reference/build_bdrhap_reference/main.nf | 2597 +++++++ .../build_bdrhap_reference/nextflow.config | 108 + .../nextflow_params.yaml | 8 + .../nextflow_schema.json | 91 + .../.config.vsh.yaml | 187 + .../build_cellranger_reference/main.nf | 2602 +++++++ .../nextflow.config | 108 + .../nextflow_params.yaml | 8 + .../nextflow_schema.json | 91 + .../reference/make_reference/.config.vsh.yaml | 212 + .../nextflow/reference/make_reference/main.nf | 2635 +++++++ .../reference/make_reference/nextflow.config | 108 + .../make_reference/nextflow_params.yaml | 11 + .../make_reference/nextflow_schema.json | 122 + .../nextflow/report/mermaid/.config.vsh.yaml | 185 + target/nextflow/report/mermaid/main.nf | 2554 +++++++ .../nextflow/report/mermaid/nextflow.config | 108 + .../report/mermaid/nextflow_params.yaml | 11 + .../report/mermaid/nextflow_schema.json | 126 + .../report/mermaid/puppeteer-config.json | 6 + .../transfer/publish/.config.vsh.yaml | 125 + target/nextflow/transfer/publish/main.nf | 2474 +++++++ .../nextflow/transfer/publish/nextflow.config | 108 + .../transfer/publish/nextflow_params.yaml | 7 + .../transfer/publish/nextflow_schema.json | 81 + .../nextflow/transform/clr/.config.vsh.yaml | 188 + target/nextflow/transform/clr/main.nf | 2577 +++++++ target/nextflow/transform/clr/nextflow.config | 108 + .../transform/clr/nextflow_params.yaml | 10 + .../transform/clr/nextflow_schema.json | 114 + .../transform/delete_layer/.config.vsh.yaml | 196 + .../transform/delete_layer/compress_h5mu.py | 49 + .../nextflow/transform/delete_layer/main.nf | 2681 +++++++ .../transform/delete_layer/nextflow.config | 108 + .../delete_layer/nextflow_params.yaml | 11 + .../delete_layer/nextflow_schema.json | 125 + .../transform/delete_layer/setup_logger.py | 12 + .../nextflow/transform/log1p/.config.vsh.yaml | 225 + target/nextflow/transform/log1p/main.nf | 2668 +++++++ .../nextflow/transform/log1p/nextflow.config | 108 + .../transform/log1p/nextflow_params.yaml | 12 + .../transform/log1p/nextflow_schema.json | 134 + .../nextflow/transform/log1p/setup_logger.py | 12 + .../normalize_total/.config.vsh.yaml | 242 + .../transform/normalize_total/main.nf | 2669 +++++++ .../transform/normalize_total/nextflow.config | 108 + .../normalize_total/nextflow_params.yaml | 13 + .../normalize_total/nextflow_schema.json | 146 + .../transform/normalize_total/setup_logger.py | 12 + .../transform/regress_out/.config.vsh.yaml | 195 + target/nextflow/transform/regress_out/main.nf | 2613 +++++++ .../transform/regress_out/nextflow.config | 108 + .../regress_out/nextflow_params.yaml | 10 + .../regress_out/nextflow_schema.json | 114 + .../transform/regress_out/setup_logger.py | 12 + .../nextflow/transform/scale/.config.vsh.yaml | 205 + target/nextflow/transform/scale/main.nf | 2625 +++++++ .../nextflow/transform/scale/nextflow.config | 108 + .../transform/scale/nextflow_params.yaml | 11 + .../transform/scale/nextflow_schema.json | 125 + .../nextflow/transform/scale/setup_logger.py | 12 + .../nextflow/velocity/scvelo/.config.vsh.yaml | 276 + target/nextflow/velocity/scvelo/main.nf | 2761 +++++++ .../nextflow/velocity/scvelo/nextflow.config | 107 + .../velocity/scvelo/nextflow_params.yaml | 24 + .../velocity/scvelo/nextflow_schema.json | 237 + .../nextflow/velocity/scvelo/setup_logger.py | 12 + .../velocity/velocyto/.config.vsh.yaml | 225 + target/nextflow/velocity/velocyto/main.nf | 2650 +++++++ .../velocity/velocyto/nextflow.config | 108 + .../velocity/velocyto/nextflow_params.yaml | 11 + .../velocity/velocyto/nextflow_schema.json | 125 + 683 files changed, 436300 insertions(+) create mode 100644 target/docker/annotate/popv/.config.vsh.yaml create mode 100755 target/docker/annotate/popv/popv create mode 100644 target/docker/annotate/popv/setup_logger.py create mode 100644 target/docker/cluster/leiden/.config.vsh.yaml create mode 100755 target/docker/cluster/leiden/leiden create mode 100644 target/docker/cluster/leiden/setup_logger.py create mode 100644 target/docker/compression/compress_h5mu/.config.vsh.yaml create mode 100755 target/docker/compression/compress_h5mu/compress_h5mu create mode 100644 target/docker/compression/compress_h5mu/compress_h5mu.py create mode 100644 target/docker/compression/tar_extract/.config.vsh.yaml create mode 100755 target/docker/compression/tar_extract/tar_extract create mode 100644 target/docker/convert/from_10xh5_to_h5mu/.config.vsh.yaml create mode 100755 target/docker/convert/from_10xh5_to_h5mu/from_10xh5_to_h5mu create mode 100644 target/docker/convert/from_10xh5_to_h5mu/setup_logger.py create mode 100644 target/docker/convert/from_10xmtx_to_h5mu/.config.vsh.yaml create mode 100755 target/docker/convert/from_10xmtx_to_h5mu/from_10xmtx_to_h5mu create mode 100644 target/docker/convert/from_10xmtx_to_h5mu/setup_logger.py create mode 100644 target/docker/convert/from_bd_to_10x_molecular_barcode_tags/.config.vsh.yaml create mode 100755 target/docker/convert/from_bd_to_10x_molecular_barcode_tags/from_bd_to_10x_molecular_barcode_tags create mode 100644 target/docker/convert/from_bdrhap_to_h5mu/.config.vsh.yaml create mode 100755 target/docker/convert/from_bdrhap_to_h5mu/from_bdrhap_to_h5mu create mode 100644 target/docker/convert/from_cellranger_multi_to_h5mu/.config.vsh.yaml create mode 100755 target/docker/convert/from_cellranger_multi_to_h5mu/from_cellranger_multi_to_h5mu create mode 100644 target/docker/convert/from_cellranger_multi_to_h5mu/setup_logger.py create mode 100644 target/docker/convert/from_h5ad_to_h5mu/.config.vsh.yaml create mode 100755 target/docker/convert/from_h5ad_to_h5mu/from_h5ad_to_h5mu create mode 100644 target/docker/convert/from_h5ad_to_h5mu/setup_logger.py create mode 100644 target/docker/convert/from_h5mu_to_h5ad/.config.vsh.yaml create mode 100755 target/docker/convert/from_h5mu_to_h5ad/from_h5mu_to_h5ad create mode 100644 target/docker/convert/from_h5mu_to_h5ad/setup_logger.py create mode 100644 target/docker/convert/velocyto_to_h5mu/.config.vsh.yaml create mode 100755 target/docker/convert/velocyto_to_h5mu/velocyto_to_h5mu create mode 100644 target/docker/correction/cellbender_remove_background/.config.vsh.yaml create mode 100755 target/docker/correction/cellbender_remove_background/cellbender_remove_background create mode 100644 target/docker/correction/cellbender_remove_background/setup_logger.py create mode 100644 target/docker/correction/cellbender_remove_background_v0_2/.config.vsh.yaml create mode 100755 target/docker/correction/cellbender_remove_background_v0_2/cellbender_remove_background_v0_2 create mode 100644 target/docker/correction/cellbender_remove_background_v0_2/helper.py create mode 100644 target/docker/correction/cellbender_remove_background_v0_2/setup_logger.py create mode 100644 target/docker/dataflow/concat/.config.vsh.yaml create mode 100755 target/docker/dataflow/concat/concat create mode 100644 target/docker/dataflow/concat/setup_logger.py create mode 100644 target/docker/dataflow/merge/.config.vsh.yaml create mode 100755 target/docker/dataflow/merge/merge create mode 100644 target/docker/dataflow/merge/setup_logger.py create mode 100644 target/docker/dataflow/split_modalities/.config.vsh.yaml create mode 100644 target/docker/dataflow/split_modalities/setup_logger.py create mode 100755 target/docker/dataflow/split_modalities/split_modalities create mode 100644 target/docker/demux/bcl2fastq/.config.vsh.yaml create mode 100755 target/docker/demux/bcl2fastq/bcl2fastq create mode 100644 target/docker/demux/bcl_convert/.config.vsh.yaml create mode 100755 target/docker/demux/bcl_convert/bcl_convert create mode 100644 target/docker/demux/cellranger_mkfastq/.config.vsh.yaml create mode 100755 target/docker/demux/cellranger_mkfastq/cellranger_mkfastq create mode 100644 target/docker/demux/cellranger_mkfastq/setup_logger.py create mode 100644 target/docker/dimred/pca/.config.vsh.yaml create mode 100755 target/docker/dimred/pca/pca create mode 100644 target/docker/dimred/pca/setup_logger.py create mode 100644 target/docker/dimred/umap/.config.vsh.yaml create mode 100644 target/docker/dimred/umap/setup_logger.py create mode 100755 target/docker/dimred/umap/umap create mode 100644 target/docker/download/download_file/.config.vsh.yaml create mode 100755 target/docker/download/download_file/download_file create mode 100644 target/docker/download/sync_test_resources/.config.vsh.yaml create mode 100755 target/docker/download/sync_test_resources/sync_test_resources create mode 100644 target/docker/files/make_params/.config.vsh.yaml create mode 100755 target/docker/files/make_params/make_params create mode 100644 target/docker/filter/delimit_fraction/.config.vsh.yaml create mode 100755 target/docker/filter/delimit_fraction/delimit_fraction create mode 100644 target/docker/filter/delimit_fraction/setup_logger.py create mode 100644 target/docker/filter/do_filter/.config.vsh.yaml create mode 100755 target/docker/filter/do_filter/do_filter create mode 100644 target/docker/filter/do_filter/setup_logger.py create mode 100644 target/docker/filter/filter_with_counts/.config.vsh.yaml create mode 100755 target/docker/filter/filter_with_counts/filter_with_counts create mode 100644 target/docker/filter/filter_with_counts/setup_logger.py create mode 100644 target/docker/filter/filter_with_hvg/.config.vsh.yaml create mode 100755 target/docker/filter/filter_with_hvg/filter_with_hvg create mode 100644 target/docker/filter/filter_with_hvg/setup_logger.py create mode 100644 target/docker/filter/filter_with_scrublet/.config.vsh.yaml create mode 100755 target/docker/filter/filter_with_scrublet/filter_with_scrublet create mode 100644 target/docker/filter/filter_with_scrublet/setup_logger.py create mode 100644 target/docker/filter/remove_modality/.config.vsh.yaml create mode 100755 target/docker/filter/remove_modality/remove_modality create mode 100644 target/docker/filter/subset_h5mu/.config.vsh.yaml create mode 100644 target/docker/filter/subset_h5mu/setup_logger.py create mode 100755 target/docker/filter/subset_h5mu/subset_h5mu create mode 100644 target/docker/integrate/harmonypy/.config.vsh.yaml create mode 100755 target/docker/integrate/harmonypy/harmonypy create mode 100644 target/docker/integrate/scanorama/.config.vsh.yaml create mode 100755 target/docker/integrate/scanorama/scanorama create mode 100644 target/docker/integrate/scarches/.config.vsh.yaml create mode 100755 target/docker/integrate/scarches/scarches create mode 100644 target/docker/integrate/scarches/setup_logger.py create mode 100644 target/docker/integrate/scvi/.config.vsh.yaml create mode 100755 target/docker/integrate/scvi/scvi create mode 100644 target/docker/integrate/scvi/subset_vars.py create mode 100644 target/docker/integrate/totalvi/.config.vsh.yaml create mode 100644 target/docker/integrate/totalvi/setup_logger.py create mode 100755 target/docker/integrate/totalvi/totalvi create mode 100644 target/docker/interactive/run_cellxgene/.config.vsh.yaml create mode 100755 target/docker/interactive/run_cellxgene/run_cellxgene create mode 100644 target/docker/interactive/run_cirrocumulus/.config.vsh.yaml create mode 100755 target/docker/interactive/run_cirrocumulus/run_cirrocumulus create mode 100644 target/docker/interpret/lianapy/.config.vsh.yaml create mode 100755 target/docker/interpret/lianapy/lianapy create mode 100644 target/docker/labels_transfer/knn/.config.vsh.yaml create mode 100644 target/docker/labels_transfer/knn/helper.py create mode 100755 target/docker/labels_transfer/knn/knn create mode 100644 target/docker/labels_transfer/knn/setup_logger.py create mode 100644 target/docker/labels_transfer/xgboost/.config.vsh.yaml create mode 100644 target/docker/labels_transfer/xgboost/helper.py create mode 100644 target/docker/labels_transfer/xgboost/setup_logger.py create mode 100755 target/docker/labels_transfer/xgboost/xgboost create mode 100644 target/docker/mapping/bd_rhapsody/.config.vsh.yaml create mode 100755 target/docker/mapping/bd_rhapsody/bd_rhapsody create mode 100755 target/docker/mapping/bd_rhapsody/rhapsody_targeted_1.10.1_nodocker.cwl create mode 100755 target/docker/mapping/bd_rhapsody/rhapsody_wta_1.10.1_nodocker.cwl create mode 100644 target/docker/mapping/bd_rhapsody/setup_logger.py create mode 100644 target/docker/mapping/cellranger_count/.config.vsh.yaml create mode 100755 target/docker/mapping/cellranger_count/cellranger_count create mode 100644 target/docker/mapping/cellranger_count_split/.config.vsh.yaml create mode 100755 target/docker/mapping/cellranger_count_split/cellranger_count_split create mode 100644 target/docker/mapping/cellranger_multi/.config.vsh.yaml create mode 100755 target/docker/mapping/cellranger_multi/cellranger_multi create mode 100644 target/docker/mapping/cellranger_multi/setup_logger.py create mode 100644 target/docker/mapping/htseq_count/.config.vsh.yaml create mode 100755 target/docker/mapping/htseq_count/htseq_count create mode 100644 target/docker/mapping/htseq_count_to_h5mu/.config.vsh.yaml create mode 100755 target/docker/mapping/htseq_count_to_h5mu/htseq_count_to_h5mu create mode 100644 target/docker/mapping/multi_star/.config.vsh.yaml create mode 100755 target/docker/mapping/multi_star/multi_star create mode 100644 target/docker/mapping/multi_star_to_h5mu/.config.vsh.yaml create mode 100755 target/docker/mapping/multi_star_to_h5mu/multi_star_to_h5mu create mode 100644 target/docker/mapping/samtools_sort/.config.vsh.yaml create mode 100755 target/docker/mapping/samtools_sort/samtools_sort create mode 100644 target/docker/mapping/star_align/.config.vsh.yaml create mode 100644 target/docker/mapping/star_align/setup_logger.py create mode 100755 target/docker/mapping/star_align/star_align create mode 100644 target/docker/mapping/star_align_v273a/.config.vsh.yaml create mode 100644 target/docker/mapping/star_align_v273a/setup_logger.py create mode 100755 target/docker/mapping/star_align_v273a/star_align_v273a create mode 100644 target/docker/mapping/star_build_reference/.config.vsh.yaml create mode 100755 target/docker/mapping/star_build_reference/star_build_reference create mode 100644 target/docker/metadata/add_id/.config.vsh.yaml create mode 100755 target/docker/metadata/add_id/add_id create mode 100644 target/docker/metadata/add_id/setup_logger.py create mode 100644 target/docker/metadata/grep_annotation_column/.config.vsh.yaml create mode 100755 target/docker/metadata/grep_annotation_column/grep_annotation_column create mode 100644 target/docker/metadata/join_csv/.config.vsh.yaml create mode 100755 target/docker/metadata/join_csv/join_csv create mode 100644 target/docker/metadata/join_csv/setup_logger.py create mode 100644 target/docker/metadata/join_uns_to_obs/.config.vsh.yaml create mode 100755 target/docker/metadata/join_uns_to_obs/join_uns_to_obs create mode 100644 target/docker/metadata/join_uns_to_obs/setup_logger.py create mode 100644 target/docker/metadata/move_obsm_to_obs/.config.vsh.yaml create mode 100755 target/docker/metadata/move_obsm_to_obs/move_obsm_to_obs create mode 100644 target/docker/metadata/move_obsm_to_obs/setup_logger.py create mode 100644 target/docker/neighbors/bbknn/.config.vsh.yaml create mode 100755 target/docker/neighbors/bbknn/bbknn create mode 100644 target/docker/neighbors/find_neighbors/.config.vsh.yaml create mode 100755 target/docker/neighbors/find_neighbors/find_neighbors create mode 100644 target/docker/neighbors/find_neighbors/setup_logger.py create mode 100644 target/docker/process_10xh5/filter_10xh5/.config.vsh.yaml create mode 100755 target/docker/process_10xh5/filter_10xh5/filter_10xh5 create mode 100644 target/docker/qc/calculate_qc_metrics/.config.vsh.yaml create mode 100755 target/docker/qc/calculate_qc_metrics/calculate_qc_metrics create mode 100644 target/docker/qc/calculate_qc_metrics/setup_logger.py create mode 100644 target/docker/qc/fastqc/.config.vsh.yaml create mode 100755 target/docker/qc/fastqc/fastqc create mode 100644 target/docker/qc/multiqc/.config.vsh.yaml create mode 100755 target/docker/qc/multiqc/multiqc create mode 100644 target/docker/query/cellxgene_census/.config.vsh.yaml create mode 100755 target/docker/query/cellxgene_census/cellxgene_census create mode 100644 target/docker/query/cellxgene_census/setup_logger.py create mode 100644 target/docker/reference/build_bdrhap_reference/.config.vsh.yaml create mode 100755 target/docker/reference/build_bdrhap_reference/build_bdrhap_reference create mode 100644 target/docker/reference/build_cellranger_reference/.config.vsh.yaml create mode 100755 target/docker/reference/build_cellranger_reference/build_cellranger_reference create mode 100644 target/docker/reference/make_reference/.config.vsh.yaml create mode 100755 target/docker/reference/make_reference/make_reference create mode 100644 target/docker/report/mermaid/.config.vsh.yaml create mode 100755 target/docker/report/mermaid/mermaid create mode 100644 target/docker/report/mermaid/puppeteer-config.json create mode 100644 target/docker/transfer/publish/.config.vsh.yaml create mode 100755 target/docker/transfer/publish/publish create mode 100644 target/docker/transform/clr/.config.vsh.yaml create mode 100755 target/docker/transform/clr/clr create mode 100644 target/docker/transform/delete_layer/.config.vsh.yaml create mode 100644 target/docker/transform/delete_layer/compress_h5mu.py create mode 100755 target/docker/transform/delete_layer/delete_layer create mode 100644 target/docker/transform/delete_layer/setup_logger.py create mode 100644 target/docker/transform/log1p/.config.vsh.yaml create mode 100755 target/docker/transform/log1p/log1p create mode 100644 target/docker/transform/log1p/setup_logger.py create mode 100644 target/docker/transform/normalize_total/.config.vsh.yaml create mode 100755 target/docker/transform/normalize_total/normalize_total create mode 100644 target/docker/transform/normalize_total/setup_logger.py create mode 100644 target/docker/transform/regress_out/.config.vsh.yaml create mode 100755 target/docker/transform/regress_out/regress_out create mode 100644 target/docker/transform/regress_out/setup_logger.py create mode 100644 target/docker/transform/scale/.config.vsh.yaml create mode 100755 target/docker/transform/scale/scale create mode 100644 target/docker/transform/scale/setup_logger.py create mode 100644 target/docker/velocity/scvelo/.config.vsh.yaml create mode 100755 target/docker/velocity/scvelo/scvelo create mode 100644 target/docker/velocity/scvelo/setup_logger.py create mode 100644 target/docker/velocity/velocyto/.config.vsh.yaml create mode 100755 target/docker/velocity/velocyto/velocyto create mode 100644 target/native/compression/compress_h5mu/.config.vsh.yaml create mode 100755 target/native/compression/compress_h5mu/compress_h5mu create mode 100644 target/native/compression/compress_h5mu/compress_h5mu.py create mode 100644 target/native/compression/tar_extract/.config.vsh.yaml create mode 100755 target/native/compression/tar_extract/tar_extract create mode 100644 target/native/dataflow/concat/.config.vsh.yaml create mode 100755 target/native/dataflow/concat/concat create mode 100644 target/native/dataflow/concat/setup_logger.py create mode 100644 target/native/dataflow/merge/.config.vsh.yaml create mode 100755 target/native/dataflow/merge/merge create mode 100644 target/native/dataflow/merge/setup_logger.py create mode 100644 target/native/dataflow/split_modalities/.config.vsh.yaml create mode 100644 target/native/dataflow/split_modalities/setup_logger.py create mode 100755 target/native/dataflow/split_modalities/split_modalities create mode 100644 target/native/download/sync_test_resources/.config.vsh.yaml create mode 100755 target/native/download/sync_test_resources/sync_test_resources create mode 100644 target/native/integrate/scarches/.config.vsh.yaml create mode 100755 target/native/integrate/scarches/scarches create mode 100644 target/native/integrate/scarches/setup_logger.py create mode 100644 target/native/integrate/totalvi/.config.vsh.yaml create mode 100644 target/native/integrate/totalvi/setup_logger.py create mode 100755 target/native/integrate/totalvi/totalvi create mode 100644 target/native/labels_transfer/knn/.config.vsh.yaml create mode 100644 target/native/labels_transfer/knn/helper.py create mode 100755 target/native/labels_transfer/knn/knn create mode 100644 target/native/labels_transfer/knn/setup_logger.py create mode 100644 target/native/labels_transfer/xgboost/.config.vsh.yaml create mode 100644 target/native/labels_transfer/xgboost/helper.py create mode 100644 target/native/labels_transfer/xgboost/setup_logger.py create mode 100755 target/native/labels_transfer/xgboost/xgboost create mode 100644 target/native/metadata/add_id/.config.vsh.yaml create mode 100755 target/native/metadata/add_id/add_id create mode 100644 target/native/metadata/add_id/setup_logger.py create mode 100644 target/native/metadata/grep_annotation_column/.config.vsh.yaml create mode 100755 target/native/metadata/grep_annotation_column/grep_annotation_column create mode 100644 target/native/transform/scale/.config.vsh.yaml create mode 100755 target/native/transform/scale/scale create mode 100644 target/native/transform/scale/setup_logger.py create mode 100644 target/native/velocity/scvelo/.config.vsh.yaml create mode 100755 target/native/velocity/scvelo/scvelo create mode 100644 target/native/velocity/scvelo/setup_logger.py create mode 100644 target/native/velocity/velocyto/.config.vsh.yaml create mode 100755 target/native/velocity/velocyto/velocyto create mode 100644 target/nextflow/annotate/popv/.config.vsh.yaml create mode 100644 target/nextflow/annotate/popv/main.nf create mode 100644 target/nextflow/annotate/popv/nextflow.config create mode 100644 target/nextflow/annotate/popv/nextflow_params.yaml create mode 100644 target/nextflow/annotate/popv/nextflow_schema.json create mode 100644 target/nextflow/annotate/popv/setup_logger.py create mode 100644 target/nextflow/cluster/leiden/.config.vsh.yaml create mode 100644 target/nextflow/cluster/leiden/main.nf create mode 100644 target/nextflow/cluster/leiden/nextflow.config create mode 100644 target/nextflow/cluster/leiden/nextflow_params.yaml create mode 100644 target/nextflow/cluster/leiden/nextflow_schema.json create mode 100644 target/nextflow/cluster/leiden/setup_logger.py create mode 100644 target/nextflow/compression/compress_h5mu/.config.vsh.yaml create mode 100644 target/nextflow/compression/compress_h5mu/compress_h5mu.py create mode 100644 target/nextflow/compression/compress_h5mu/main.nf create mode 100644 target/nextflow/compression/compress_h5mu/nextflow.config create mode 100644 target/nextflow/compression/compress_h5mu/nextflow_params.yaml create mode 100644 target/nextflow/compression/compress_h5mu/nextflow_schema.json create mode 100644 target/nextflow/convert/from_10xh5_to_h5mu/.config.vsh.yaml create mode 100644 target/nextflow/convert/from_10xh5_to_h5mu/main.nf create mode 100644 target/nextflow/convert/from_10xh5_to_h5mu/nextflow.config create mode 100644 target/nextflow/convert/from_10xh5_to_h5mu/nextflow_params.yaml create mode 100644 target/nextflow/convert/from_10xh5_to_h5mu/nextflow_schema.json create mode 100644 target/nextflow/convert/from_10xh5_to_h5mu/setup_logger.py create mode 100644 target/nextflow/convert/from_10xmtx_to_h5mu/.config.vsh.yaml create mode 100644 target/nextflow/convert/from_10xmtx_to_h5mu/main.nf create mode 100644 target/nextflow/convert/from_10xmtx_to_h5mu/nextflow.config create mode 100644 target/nextflow/convert/from_10xmtx_to_h5mu/nextflow_params.yaml create mode 100644 target/nextflow/convert/from_10xmtx_to_h5mu/nextflow_schema.json create mode 100644 target/nextflow/convert/from_10xmtx_to_h5mu/setup_logger.py create mode 100644 target/nextflow/convert/from_bd_to_10x_molecular_barcode_tags/.config.vsh.yaml create mode 100644 target/nextflow/convert/from_bd_to_10x_molecular_barcode_tags/main.nf create mode 100644 target/nextflow/convert/from_bd_to_10x_molecular_barcode_tags/nextflow.config create mode 100644 target/nextflow/convert/from_bd_to_10x_molecular_barcode_tags/nextflow_params.yaml create mode 100644 target/nextflow/convert/from_bd_to_10x_molecular_barcode_tags/nextflow_schema.json create mode 100644 target/nextflow/convert/from_bdrhap_to_h5mu/.config.vsh.yaml create mode 100644 target/nextflow/convert/from_bdrhap_to_h5mu/main.nf create mode 100644 target/nextflow/convert/from_bdrhap_to_h5mu/nextflow.config create mode 100644 target/nextflow/convert/from_bdrhap_to_h5mu/nextflow_params.yaml create mode 100644 target/nextflow/convert/from_bdrhap_to_h5mu/nextflow_schema.json create mode 100644 target/nextflow/convert/from_cellranger_multi_to_h5mu/.config.vsh.yaml create mode 100644 target/nextflow/convert/from_cellranger_multi_to_h5mu/main.nf create mode 100644 target/nextflow/convert/from_cellranger_multi_to_h5mu/nextflow.config create mode 100644 target/nextflow/convert/from_cellranger_multi_to_h5mu/nextflow_params.yaml create mode 100644 target/nextflow/convert/from_cellranger_multi_to_h5mu/nextflow_schema.json create mode 100644 target/nextflow/convert/from_cellranger_multi_to_h5mu/setup_logger.py create mode 100644 target/nextflow/convert/from_h5ad_to_h5mu/.config.vsh.yaml create mode 100644 target/nextflow/convert/from_h5ad_to_h5mu/main.nf create mode 100644 target/nextflow/convert/from_h5ad_to_h5mu/nextflow.config create mode 100644 target/nextflow/convert/from_h5ad_to_h5mu/nextflow_params.yaml create mode 100644 target/nextflow/convert/from_h5ad_to_h5mu/nextflow_schema.json create mode 100644 target/nextflow/convert/from_h5ad_to_h5mu/setup_logger.py create mode 100644 target/nextflow/convert/from_h5mu_to_h5ad/.config.vsh.yaml create mode 100644 target/nextflow/convert/from_h5mu_to_h5ad/main.nf create mode 100644 target/nextflow/convert/from_h5mu_to_h5ad/nextflow.config create mode 100644 target/nextflow/convert/from_h5mu_to_h5ad/nextflow_params.yaml create mode 100644 target/nextflow/convert/from_h5mu_to_h5ad/nextflow_schema.json create mode 100644 target/nextflow/convert/from_h5mu_to_h5ad/setup_logger.py create mode 100644 target/nextflow/convert/velocyto_to_h5mu/.config.vsh.yaml create mode 100644 target/nextflow/convert/velocyto_to_h5mu/main.nf create mode 100644 target/nextflow/convert/velocyto_to_h5mu/nextflow.config create mode 100644 target/nextflow/convert/velocyto_to_h5mu/nextflow_params.yaml create mode 100644 target/nextflow/convert/velocyto_to_h5mu/nextflow_schema.json create mode 100644 target/nextflow/correction/cellbender_remove_background/.config.vsh.yaml create mode 100644 target/nextflow/correction/cellbender_remove_background/main.nf create mode 100644 target/nextflow/correction/cellbender_remove_background/nextflow.config create mode 100644 target/nextflow/correction/cellbender_remove_background/nextflow_params.yaml create mode 100644 target/nextflow/correction/cellbender_remove_background/nextflow_schema.json create mode 100644 target/nextflow/correction/cellbender_remove_background/setup_logger.py create mode 100644 target/nextflow/correction/cellbender_remove_background_v0_2/.config.vsh.yaml create mode 100644 target/nextflow/correction/cellbender_remove_background_v0_2/helper.py create mode 100644 target/nextflow/correction/cellbender_remove_background_v0_2/main.nf create mode 100644 target/nextflow/correction/cellbender_remove_background_v0_2/nextflow.config create mode 100644 target/nextflow/correction/cellbender_remove_background_v0_2/nextflow_params.yaml create mode 100644 target/nextflow/correction/cellbender_remove_background_v0_2/nextflow_schema.json create mode 100644 target/nextflow/correction/cellbender_remove_background_v0_2/setup_logger.py create mode 100644 target/nextflow/dataflow/concat/.config.vsh.yaml create mode 100644 target/nextflow/dataflow/concat/main.nf create mode 100644 target/nextflow/dataflow/concat/nextflow.config create mode 100644 target/nextflow/dataflow/concat/nextflow_params.yaml create mode 100644 target/nextflow/dataflow/concat/nextflow_schema.json create mode 100644 target/nextflow/dataflow/concat/setup_logger.py create mode 100644 target/nextflow/dataflow/merge/.config.vsh.yaml create mode 100644 target/nextflow/dataflow/merge/main.nf create mode 100644 target/nextflow/dataflow/merge/nextflow.config create mode 100644 target/nextflow/dataflow/merge/nextflow_params.yaml create mode 100644 target/nextflow/dataflow/merge/nextflow_schema.json create mode 100644 target/nextflow/dataflow/merge/setup_logger.py create mode 100644 target/nextflow/dataflow/split_modalities/.config.vsh.yaml create mode 100644 target/nextflow/dataflow/split_modalities/main.nf create mode 100644 target/nextflow/dataflow/split_modalities/nextflow.config create mode 100644 target/nextflow/dataflow/split_modalities/nextflow_params.yaml create mode 100644 target/nextflow/dataflow/split_modalities/nextflow_schema.json create mode 100644 target/nextflow/dataflow/split_modalities/setup_logger.py create mode 100644 target/nextflow/demux/bcl2fastq/.config.vsh.yaml create mode 100644 target/nextflow/demux/bcl2fastq/main.nf create mode 100644 target/nextflow/demux/bcl2fastq/nextflow.config create mode 100644 target/nextflow/demux/bcl2fastq/nextflow_params.yaml create mode 100644 target/nextflow/demux/bcl2fastq/nextflow_schema.json create mode 100644 target/nextflow/demux/bcl_convert/.config.vsh.yaml create mode 100644 target/nextflow/demux/bcl_convert/main.nf create mode 100644 target/nextflow/demux/bcl_convert/nextflow.config create mode 100644 target/nextflow/demux/bcl_convert/nextflow_params.yaml create mode 100644 target/nextflow/demux/bcl_convert/nextflow_schema.json create mode 100644 target/nextflow/demux/cellranger_mkfastq/.config.vsh.yaml create mode 100644 target/nextflow/demux/cellranger_mkfastq/main.nf create mode 100644 target/nextflow/demux/cellranger_mkfastq/nextflow.config create mode 100644 target/nextflow/demux/cellranger_mkfastq/nextflow_params.yaml create mode 100644 target/nextflow/demux/cellranger_mkfastq/nextflow_schema.json create mode 100644 target/nextflow/demux/cellranger_mkfastq/setup_logger.py create mode 100644 target/nextflow/dimred/pca/.config.vsh.yaml create mode 100644 target/nextflow/dimred/pca/main.nf create mode 100644 target/nextflow/dimred/pca/nextflow.config create mode 100644 target/nextflow/dimred/pca/nextflow_params.yaml create mode 100644 target/nextflow/dimred/pca/nextflow_schema.json create mode 100644 target/nextflow/dimred/pca/setup_logger.py create mode 100644 target/nextflow/dimred/umap/.config.vsh.yaml create mode 100644 target/nextflow/dimred/umap/main.nf create mode 100644 target/nextflow/dimred/umap/nextflow.config create mode 100644 target/nextflow/dimred/umap/nextflow_params.yaml create mode 100644 target/nextflow/dimred/umap/nextflow_schema.json create mode 100644 target/nextflow/dimred/umap/setup_logger.py create mode 100644 target/nextflow/download/download_file/.config.vsh.yaml create mode 100644 target/nextflow/download/download_file/main.nf create mode 100644 target/nextflow/download/download_file/nextflow.config create mode 100644 target/nextflow/download/download_file/nextflow_params.yaml create mode 100644 target/nextflow/download/download_file/nextflow_schema.json create mode 100644 target/nextflow/download/sync_test_resources/.config.vsh.yaml create mode 100644 target/nextflow/download/sync_test_resources/main.nf create mode 100644 target/nextflow/download/sync_test_resources/nextflow.config create mode 100644 target/nextflow/download/sync_test_resources/nextflow_params.yaml create mode 100644 target/nextflow/download/sync_test_resources/nextflow_schema.json create mode 100644 target/nextflow/files/make_params/.config.vsh.yaml create mode 100644 target/nextflow/files/make_params/main.nf create mode 100644 target/nextflow/files/make_params/nextflow.config create mode 100644 target/nextflow/files/make_params/nextflow_params.yaml create mode 100644 target/nextflow/files/make_params/nextflow_schema.json create mode 100644 target/nextflow/filter/delimit_fraction/.config.vsh.yaml create mode 100644 target/nextflow/filter/delimit_fraction/main.nf create mode 100644 target/nextflow/filter/delimit_fraction/nextflow.config create mode 100644 target/nextflow/filter/delimit_fraction/nextflow_params.yaml create mode 100644 target/nextflow/filter/delimit_fraction/nextflow_schema.json create mode 100644 target/nextflow/filter/delimit_fraction/setup_logger.py create mode 100644 target/nextflow/filter/do_filter/.config.vsh.yaml create mode 100644 target/nextflow/filter/do_filter/main.nf create mode 100644 target/nextflow/filter/do_filter/nextflow.config create mode 100644 target/nextflow/filter/do_filter/nextflow_params.yaml create mode 100644 target/nextflow/filter/do_filter/nextflow_schema.json create mode 100644 target/nextflow/filter/do_filter/setup_logger.py create mode 100644 target/nextflow/filter/filter_with_counts/.config.vsh.yaml create mode 100644 target/nextflow/filter/filter_with_counts/main.nf create mode 100644 target/nextflow/filter/filter_with_counts/nextflow.config create mode 100644 target/nextflow/filter/filter_with_counts/nextflow_params.yaml create mode 100644 target/nextflow/filter/filter_with_counts/nextflow_schema.json create mode 100644 target/nextflow/filter/filter_with_counts/setup_logger.py create mode 100644 target/nextflow/filter/filter_with_hvg/.config.vsh.yaml create mode 100644 target/nextflow/filter/filter_with_hvg/main.nf create mode 100644 target/nextflow/filter/filter_with_hvg/nextflow.config create mode 100644 target/nextflow/filter/filter_with_hvg/nextflow_params.yaml create mode 100644 target/nextflow/filter/filter_with_hvg/nextflow_schema.json create mode 100644 target/nextflow/filter/filter_with_hvg/setup_logger.py create mode 100644 target/nextflow/filter/filter_with_scrublet/.config.vsh.yaml create mode 100644 target/nextflow/filter/filter_with_scrublet/main.nf create mode 100644 target/nextflow/filter/filter_with_scrublet/nextflow.config create mode 100644 target/nextflow/filter/filter_with_scrublet/nextflow_params.yaml create mode 100644 target/nextflow/filter/filter_with_scrublet/nextflow_schema.json create mode 100644 target/nextflow/filter/filter_with_scrublet/setup_logger.py create mode 100644 target/nextflow/filter/remove_modality/.config.vsh.yaml create mode 100644 target/nextflow/filter/remove_modality/main.nf create mode 100644 target/nextflow/filter/remove_modality/nextflow.config create mode 100644 target/nextflow/filter/remove_modality/nextflow_params.yaml create mode 100644 target/nextflow/filter/remove_modality/nextflow_schema.json create mode 100644 target/nextflow/filter/subset_h5mu/.config.vsh.yaml create mode 100644 target/nextflow/filter/subset_h5mu/main.nf create mode 100644 target/nextflow/filter/subset_h5mu/nextflow.config create mode 100644 target/nextflow/filter/subset_h5mu/nextflow_params.yaml create mode 100644 target/nextflow/filter/subset_h5mu/nextflow_schema.json create mode 100644 target/nextflow/filter/subset_h5mu/setup_logger.py create mode 100644 target/nextflow/integrate/harmonypy/.config.vsh.yaml create mode 100644 target/nextflow/integrate/harmonypy/main.nf create mode 100644 target/nextflow/integrate/harmonypy/nextflow.config create mode 100644 target/nextflow/integrate/harmonypy/nextflow_params.yaml create mode 100644 target/nextflow/integrate/harmonypy/nextflow_schema.json create mode 100644 target/nextflow/integrate/scanorama/.config.vsh.yaml create mode 100644 target/nextflow/integrate/scanorama/main.nf create mode 100644 target/nextflow/integrate/scanorama/nextflow.config create mode 100644 target/nextflow/integrate/scanorama/nextflow_params.yaml create mode 100644 target/nextflow/integrate/scanorama/nextflow_schema.json create mode 100644 target/nextflow/integrate/scarches/.config.vsh.yaml create mode 100644 target/nextflow/integrate/scarches/main.nf create mode 100644 target/nextflow/integrate/scarches/nextflow.config create mode 100644 target/nextflow/integrate/scarches/nextflow_params.yaml create mode 100644 target/nextflow/integrate/scarches/nextflow_schema.json create mode 100644 target/nextflow/integrate/scarches/setup_logger.py create mode 100644 target/nextflow/integrate/scvi/.config.vsh.yaml create mode 100644 target/nextflow/integrate/scvi/main.nf create mode 100644 target/nextflow/integrate/scvi/nextflow.config create mode 100644 target/nextflow/integrate/scvi/nextflow_params.yaml create mode 100644 target/nextflow/integrate/scvi/nextflow_schema.json create mode 100644 target/nextflow/integrate/scvi/subset_vars.py create mode 100644 target/nextflow/integrate/totalvi/.config.vsh.yaml create mode 100644 target/nextflow/integrate/totalvi/main.nf create mode 100644 target/nextflow/integrate/totalvi/nextflow.config create mode 100644 target/nextflow/integrate/totalvi/nextflow_params.yaml create mode 100644 target/nextflow/integrate/totalvi/nextflow_schema.json create mode 100644 target/nextflow/integrate/totalvi/setup_logger.py create mode 100644 target/nextflow/interpret/lianapy/.config.vsh.yaml create mode 100644 target/nextflow/interpret/lianapy/main.nf create mode 100644 target/nextflow/interpret/lianapy/nextflow.config create mode 100644 target/nextflow/interpret/lianapy/nextflow_params.yaml create mode 100644 target/nextflow/interpret/lianapy/nextflow_schema.json create mode 100644 target/nextflow/labels_transfer/knn/.config.vsh.yaml create mode 100644 target/nextflow/labels_transfer/knn/helper.py create mode 100644 target/nextflow/labels_transfer/knn/main.nf create mode 100644 target/nextflow/labels_transfer/knn/nextflow.config create mode 100644 target/nextflow/labels_transfer/knn/nextflow_params.yaml create mode 100644 target/nextflow/labels_transfer/knn/nextflow_schema.json create mode 100644 target/nextflow/labels_transfer/knn/setup_logger.py create mode 100644 target/nextflow/labels_transfer/xgboost/.config.vsh.yaml create mode 100644 target/nextflow/labels_transfer/xgboost/helper.py create mode 100644 target/nextflow/labels_transfer/xgboost/main.nf create mode 100644 target/nextflow/labels_transfer/xgboost/nextflow.config create mode 100644 target/nextflow/labels_transfer/xgboost/nextflow_params.yaml create mode 100644 target/nextflow/labels_transfer/xgboost/nextflow_schema.json create mode 100644 target/nextflow/labels_transfer/xgboost/setup_logger.py create mode 100644 target/nextflow/mapping/bd_rhapsody/.config.vsh.yaml create mode 100644 target/nextflow/mapping/bd_rhapsody/main.nf create mode 100644 target/nextflow/mapping/bd_rhapsody/nextflow.config create mode 100644 target/nextflow/mapping/bd_rhapsody/nextflow_params.yaml create mode 100644 target/nextflow/mapping/bd_rhapsody/nextflow_schema.json create mode 100755 target/nextflow/mapping/bd_rhapsody/rhapsody_targeted_1.10.1_nodocker.cwl create mode 100755 target/nextflow/mapping/bd_rhapsody/rhapsody_wta_1.10.1_nodocker.cwl create mode 100644 target/nextflow/mapping/bd_rhapsody/setup_logger.py create mode 100644 target/nextflow/mapping/cellranger_count/.config.vsh.yaml create mode 100644 target/nextflow/mapping/cellranger_count/main.nf create mode 100644 target/nextflow/mapping/cellranger_count/nextflow.config create mode 100644 target/nextflow/mapping/cellranger_count/nextflow_params.yaml create mode 100644 target/nextflow/mapping/cellranger_count/nextflow_schema.json create mode 100644 target/nextflow/mapping/cellranger_count_split/.config.vsh.yaml create mode 100644 target/nextflow/mapping/cellranger_count_split/main.nf create mode 100644 target/nextflow/mapping/cellranger_count_split/nextflow.config create mode 100644 target/nextflow/mapping/cellranger_count_split/nextflow_params.yaml create mode 100644 target/nextflow/mapping/cellranger_count_split/nextflow_schema.json create mode 100644 target/nextflow/mapping/cellranger_multi/.config.vsh.yaml create mode 100644 target/nextflow/mapping/cellranger_multi/main.nf create mode 100644 target/nextflow/mapping/cellranger_multi/nextflow.config create mode 100644 target/nextflow/mapping/cellranger_multi/nextflow_params.yaml create mode 100644 target/nextflow/mapping/cellranger_multi/nextflow_schema.json create mode 100644 target/nextflow/mapping/cellranger_multi/setup_logger.py create mode 100644 target/nextflow/mapping/htseq_count/.config.vsh.yaml create mode 100644 target/nextflow/mapping/htseq_count/main.nf create mode 100644 target/nextflow/mapping/htseq_count/nextflow.config create mode 100644 target/nextflow/mapping/htseq_count/nextflow_params.yaml create mode 100644 target/nextflow/mapping/htseq_count/nextflow_schema.json create mode 100644 target/nextflow/mapping/htseq_count_to_h5mu/.config.vsh.yaml create mode 100644 target/nextflow/mapping/htseq_count_to_h5mu/main.nf create mode 100644 target/nextflow/mapping/htseq_count_to_h5mu/nextflow.config create mode 100644 target/nextflow/mapping/htseq_count_to_h5mu/nextflow_params.yaml create mode 100644 target/nextflow/mapping/htseq_count_to_h5mu/nextflow_schema.json create mode 100644 target/nextflow/mapping/multi_star/.config.vsh.yaml create mode 100644 target/nextflow/mapping/multi_star/main.nf create mode 100644 target/nextflow/mapping/multi_star/nextflow.config create mode 100644 target/nextflow/mapping/multi_star/nextflow_params.yaml create mode 100644 target/nextflow/mapping/multi_star/nextflow_schema.json create mode 100644 target/nextflow/mapping/multi_star_to_h5mu/.config.vsh.yaml create mode 100644 target/nextflow/mapping/multi_star_to_h5mu/main.nf create mode 100644 target/nextflow/mapping/multi_star_to_h5mu/nextflow.config create mode 100644 target/nextflow/mapping/multi_star_to_h5mu/nextflow_params.yaml create mode 100644 target/nextflow/mapping/multi_star_to_h5mu/nextflow_schema.json create mode 100644 target/nextflow/mapping/samtools_sort/.config.vsh.yaml create mode 100644 target/nextflow/mapping/samtools_sort/main.nf create mode 100644 target/nextflow/mapping/samtools_sort/nextflow.config create mode 100644 target/nextflow/mapping/samtools_sort/nextflow_params.yaml create mode 100644 target/nextflow/mapping/samtools_sort/nextflow_schema.json create mode 100644 target/nextflow/mapping/star_align/.config.vsh.yaml create mode 100644 target/nextflow/mapping/star_align/main.nf create mode 100644 target/nextflow/mapping/star_align/nextflow.config create mode 100644 target/nextflow/mapping/star_align/nextflow_params.yaml create mode 100644 target/nextflow/mapping/star_align/nextflow_schema.json create mode 100644 target/nextflow/mapping/star_align/setup_logger.py create mode 100644 target/nextflow/mapping/star_align_v273a/.config.vsh.yaml create mode 100644 target/nextflow/mapping/star_align_v273a/main.nf create mode 100644 target/nextflow/mapping/star_align_v273a/nextflow.config create mode 100644 target/nextflow/mapping/star_align_v273a/nextflow_params.yaml create mode 100644 target/nextflow/mapping/star_align_v273a/nextflow_schema.json create mode 100644 target/nextflow/mapping/star_align_v273a/setup_logger.py create mode 100644 target/nextflow/mapping/star_build_reference/.config.vsh.yaml create mode 100644 target/nextflow/mapping/star_build_reference/main.nf create mode 100644 target/nextflow/mapping/star_build_reference/nextflow.config create mode 100644 target/nextflow/mapping/star_build_reference/nextflow_params.yaml create mode 100644 target/nextflow/mapping/star_build_reference/nextflow_schema.json create mode 100644 target/nextflow/metadata/add_id/.config.vsh.yaml create mode 100644 target/nextflow/metadata/add_id/main.nf create mode 100644 target/nextflow/metadata/add_id/nextflow.config create mode 100644 target/nextflow/metadata/add_id/nextflow_params.yaml create mode 100644 target/nextflow/metadata/add_id/nextflow_schema.json create mode 100644 target/nextflow/metadata/add_id/setup_logger.py create mode 100644 target/nextflow/metadata/grep_annotation_column/.config.vsh.yaml create mode 100644 target/nextflow/metadata/grep_annotation_column/main.nf create mode 100644 target/nextflow/metadata/grep_annotation_column/nextflow.config create mode 100644 target/nextflow/metadata/grep_annotation_column/nextflow_params.yaml create mode 100644 target/nextflow/metadata/grep_annotation_column/nextflow_schema.json create mode 100644 target/nextflow/metadata/join_csv/.config.vsh.yaml create mode 100644 target/nextflow/metadata/join_csv/main.nf create mode 100644 target/nextflow/metadata/join_csv/nextflow.config create mode 100644 target/nextflow/metadata/join_csv/nextflow_params.yaml create mode 100644 target/nextflow/metadata/join_csv/nextflow_schema.json create mode 100644 target/nextflow/metadata/join_csv/setup_logger.py create mode 100644 target/nextflow/metadata/join_uns_to_obs/.config.vsh.yaml create mode 100644 target/nextflow/metadata/join_uns_to_obs/main.nf create mode 100644 target/nextflow/metadata/join_uns_to_obs/nextflow.config create mode 100644 target/nextflow/metadata/join_uns_to_obs/nextflow_params.yaml create mode 100644 target/nextflow/metadata/join_uns_to_obs/nextflow_schema.json create mode 100644 target/nextflow/metadata/join_uns_to_obs/setup_logger.py create mode 100644 target/nextflow/metadata/move_obsm_to_obs/.config.vsh.yaml create mode 100644 target/nextflow/metadata/move_obsm_to_obs/main.nf create mode 100644 target/nextflow/metadata/move_obsm_to_obs/nextflow.config create mode 100644 target/nextflow/metadata/move_obsm_to_obs/nextflow_params.yaml create mode 100644 target/nextflow/metadata/move_obsm_to_obs/nextflow_schema.json create mode 100644 target/nextflow/metadata/move_obsm_to_obs/setup_logger.py create mode 100644 target/nextflow/neighbors/bbknn/.config.vsh.yaml create mode 100644 target/nextflow/neighbors/bbknn/main.nf create mode 100644 target/nextflow/neighbors/bbknn/nextflow.config create mode 100644 target/nextflow/neighbors/bbknn/nextflow_params.yaml create mode 100644 target/nextflow/neighbors/bbknn/nextflow_schema.json create mode 100644 target/nextflow/neighbors/find_neighbors/.config.vsh.yaml create mode 100644 target/nextflow/neighbors/find_neighbors/main.nf create mode 100644 target/nextflow/neighbors/find_neighbors/nextflow.config create mode 100644 target/nextflow/neighbors/find_neighbors/nextflow_params.yaml create mode 100644 target/nextflow/neighbors/find_neighbors/nextflow_schema.json create mode 100644 target/nextflow/neighbors/find_neighbors/setup_logger.py create mode 100644 target/nextflow/process_10xh5/filter_10xh5/.config.vsh.yaml create mode 100644 target/nextflow/process_10xh5/filter_10xh5/main.nf create mode 100644 target/nextflow/process_10xh5/filter_10xh5/nextflow.config create mode 100644 target/nextflow/process_10xh5/filter_10xh5/nextflow_params.yaml create mode 100644 target/nextflow/process_10xh5/filter_10xh5/nextflow_schema.json create mode 100644 target/nextflow/qc/calculate_qc_metrics/.config.vsh.yaml create mode 100644 target/nextflow/qc/calculate_qc_metrics/main.nf create mode 100644 target/nextflow/qc/calculate_qc_metrics/nextflow.config create mode 100644 target/nextflow/qc/calculate_qc_metrics/nextflow_params.yaml create mode 100644 target/nextflow/qc/calculate_qc_metrics/nextflow_schema.json create mode 100644 target/nextflow/qc/calculate_qc_metrics/setup_logger.py create mode 100644 target/nextflow/qc/fastqc/.config.vsh.yaml create mode 100644 target/nextflow/qc/fastqc/main.nf create mode 100644 target/nextflow/qc/fastqc/nextflow.config create mode 100644 target/nextflow/qc/fastqc/nextflow_params.yaml create mode 100644 target/nextflow/qc/fastqc/nextflow_schema.json create mode 100644 target/nextflow/qc/multiqc/.config.vsh.yaml create mode 100644 target/nextflow/qc/multiqc/main.nf create mode 100644 target/nextflow/qc/multiqc/nextflow.config create mode 100644 target/nextflow/qc/multiqc/nextflow_params.yaml create mode 100644 target/nextflow/qc/multiqc/nextflow_schema.json create mode 100644 target/nextflow/query/cellxgene_census/.config.vsh.yaml create mode 100644 target/nextflow/query/cellxgene_census/main.nf create mode 100644 target/nextflow/query/cellxgene_census/nextflow.config create mode 100644 target/nextflow/query/cellxgene_census/nextflow_params.yaml create mode 100644 target/nextflow/query/cellxgene_census/nextflow_schema.json create mode 100644 target/nextflow/query/cellxgene_census/setup_logger.py create mode 100644 target/nextflow/reference/build_bdrhap_reference/.config.vsh.yaml create mode 100644 target/nextflow/reference/build_bdrhap_reference/main.nf create mode 100644 target/nextflow/reference/build_bdrhap_reference/nextflow.config create mode 100644 target/nextflow/reference/build_bdrhap_reference/nextflow_params.yaml create mode 100644 target/nextflow/reference/build_bdrhap_reference/nextflow_schema.json create mode 100644 target/nextflow/reference/build_cellranger_reference/.config.vsh.yaml create mode 100644 target/nextflow/reference/build_cellranger_reference/main.nf create mode 100644 target/nextflow/reference/build_cellranger_reference/nextflow.config create mode 100644 target/nextflow/reference/build_cellranger_reference/nextflow_params.yaml create mode 100644 target/nextflow/reference/build_cellranger_reference/nextflow_schema.json create mode 100644 target/nextflow/reference/make_reference/.config.vsh.yaml create mode 100644 target/nextflow/reference/make_reference/main.nf create mode 100644 target/nextflow/reference/make_reference/nextflow.config create mode 100644 target/nextflow/reference/make_reference/nextflow_params.yaml create mode 100644 target/nextflow/reference/make_reference/nextflow_schema.json create mode 100644 target/nextflow/report/mermaid/.config.vsh.yaml create mode 100644 target/nextflow/report/mermaid/main.nf create mode 100644 target/nextflow/report/mermaid/nextflow.config create mode 100644 target/nextflow/report/mermaid/nextflow_params.yaml create mode 100644 target/nextflow/report/mermaid/nextflow_schema.json create mode 100644 target/nextflow/report/mermaid/puppeteer-config.json create mode 100644 target/nextflow/transfer/publish/.config.vsh.yaml create mode 100644 target/nextflow/transfer/publish/main.nf create mode 100644 target/nextflow/transfer/publish/nextflow.config create mode 100644 target/nextflow/transfer/publish/nextflow_params.yaml create mode 100644 target/nextflow/transfer/publish/nextflow_schema.json create mode 100644 target/nextflow/transform/clr/.config.vsh.yaml create mode 100644 target/nextflow/transform/clr/main.nf create mode 100644 target/nextflow/transform/clr/nextflow.config create mode 100644 target/nextflow/transform/clr/nextflow_params.yaml create mode 100644 target/nextflow/transform/clr/nextflow_schema.json create mode 100644 target/nextflow/transform/delete_layer/.config.vsh.yaml create mode 100644 target/nextflow/transform/delete_layer/compress_h5mu.py create mode 100644 target/nextflow/transform/delete_layer/main.nf create mode 100644 target/nextflow/transform/delete_layer/nextflow.config create mode 100644 target/nextflow/transform/delete_layer/nextflow_params.yaml create mode 100644 target/nextflow/transform/delete_layer/nextflow_schema.json create mode 100644 target/nextflow/transform/delete_layer/setup_logger.py create mode 100644 target/nextflow/transform/log1p/.config.vsh.yaml create mode 100644 target/nextflow/transform/log1p/main.nf create mode 100644 target/nextflow/transform/log1p/nextflow.config create mode 100644 target/nextflow/transform/log1p/nextflow_params.yaml create mode 100644 target/nextflow/transform/log1p/nextflow_schema.json create mode 100644 target/nextflow/transform/log1p/setup_logger.py create mode 100644 target/nextflow/transform/normalize_total/.config.vsh.yaml create mode 100644 target/nextflow/transform/normalize_total/main.nf create mode 100644 target/nextflow/transform/normalize_total/nextflow.config create mode 100644 target/nextflow/transform/normalize_total/nextflow_params.yaml create mode 100644 target/nextflow/transform/normalize_total/nextflow_schema.json create mode 100644 target/nextflow/transform/normalize_total/setup_logger.py create mode 100644 target/nextflow/transform/regress_out/.config.vsh.yaml create mode 100644 target/nextflow/transform/regress_out/main.nf create mode 100644 target/nextflow/transform/regress_out/nextflow.config create mode 100644 target/nextflow/transform/regress_out/nextflow_params.yaml create mode 100644 target/nextflow/transform/regress_out/nextflow_schema.json create mode 100644 target/nextflow/transform/regress_out/setup_logger.py create mode 100644 target/nextflow/transform/scale/.config.vsh.yaml create mode 100644 target/nextflow/transform/scale/main.nf create mode 100644 target/nextflow/transform/scale/nextflow.config create mode 100644 target/nextflow/transform/scale/nextflow_params.yaml create mode 100644 target/nextflow/transform/scale/nextflow_schema.json create mode 100644 target/nextflow/transform/scale/setup_logger.py create mode 100644 target/nextflow/velocity/scvelo/.config.vsh.yaml create mode 100644 target/nextflow/velocity/scvelo/main.nf create mode 100644 target/nextflow/velocity/scvelo/nextflow.config create mode 100644 target/nextflow/velocity/scvelo/nextflow_params.yaml create mode 100644 target/nextflow/velocity/scvelo/nextflow_schema.json create mode 100644 target/nextflow/velocity/scvelo/setup_logger.py create mode 100644 target/nextflow/velocity/velocyto/.config.vsh.yaml create mode 100644 target/nextflow/velocity/velocyto/main.nf create mode 100644 target/nextflow/velocity/velocyto/nextflow.config create mode 100644 target/nextflow/velocity/velocyto/nextflow_params.yaml create mode 100644 target/nextflow/velocity/velocyto/nextflow_schema.json diff --git a/target/docker/annotate/popv/.config.vsh.yaml b/target/docker/annotate/popv/.config.vsh.yaml new file mode 100644 index 00000000000..15eff3b4fd6 --- /dev/null +++ b/target/docker/annotate/popv/.config.vsh.yaml @@ -0,0 +1,346 @@ +functionality: + name: "popv" + namespace: "annotate" + version: "0.12.4" + authors: + - name: "Matthias Beyens" + roles: + - "author" + info: + role: "Contributor" + links: + github: "MatthiasBeyens" + orcid: "0000-0003-3304-0706" + email: "matthias.beyens@gmail.com" + linkedin: "mbeyens" + organizations: + - name: "Janssen Pharmaceuticals" + href: "https://www.janssen.com" + role: "Principal Scientist" + - name: "Robrecht Cannoodt" + roles: + - "author" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + argument_groups: + - name: "Inputs" + description: "Arguments related to the input (aka query) dataset." + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input h5mu file." + info: null + example: + - "input.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + description: "Which modality to process." + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--input_layer" + description: "Which layer to use. If no value is provided, the counts are assumed\ + \ to be in the `.X` slot. Otherwise, count data is expected to be in `.layers[input_layer]`." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--input_obs_batch" + description: "Key in obs field of input adata for batch information. If no value\ + \ is provided, batch label is assumed to be unknown." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--input_var_subset" + description: "Subset the input object with this column." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--input_obs_label" + description: "Key in obs field of input adata for label information. This is\ + \ only used for training scANVI. Unlabelled cells should be set to `\"unknown_celltype_label\"\ + `." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--unknown_celltype_label" + description: "If `input_obs_label` is specified, cells with this value will\ + \ be treated as unknown and will be predicted by the model." + info: null + default: + - "unknown" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Reference" + description: "Arguments related to the reference dataset." + arguments: + - type: "file" + name: "--reference" + description: "User-provided reference tissue. The data that will be used as\ + \ reference to call cell types." + info: null + example: + - "TS_Bladder_filtered.h5ad" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--reference_layer" + description: "Which layer to use. If no value is provided, the counts are assumed\ + \ to be in the `.X` slot. Otherwise, count data is expected to be in `.layers[reference_layer]`." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--reference_obs_label" + description: "Key in obs field of reference AnnData with cell-type information." + info: null + default: + - "cell_ontology_class" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--reference_obs_batch" + description: "Key in obs field of input adata for batch information." + info: null + default: + - "donor_assay" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Outputs" + description: "Output arguments." + arguments: + - type: "file" + name: "--output" + description: "Output h5mu file." + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Arguments" + description: "Other arguments." + arguments: + - type: "string" + name: "--methods" + description: "Methods to call cell types. By default, runs to knn_on_scvi and\ + \ scanvi." + info: null + example: + - "knn_on_scvi" + - "scanvi" + required: true + choices: + - "celltypist" + - "knn_on_bbknn" + - "knn_on_scanorama" + - "knn_on_scvi" + - "onclass" + - "rf" + - "scanvi" + - "svm" + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Performs popular major vote cell typing on single cell sequence data\ + \ using multiple algorithms. Note that this is a one-shot version of PopV." + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/annotation_test_data/" + - type: "file" + path: "resources_test/pbmc_1k_protein_v3/" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.9-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + - "git" + - "build-essential" + - "wget" + interactive: false + - type: "python" + user: false + packages: + - "scanpy~=1.9.5" + - "scvi-tools~=1.0.3" + - "popv~=0.3.2" + - "jax==0.4.10" + - "jaxlib==0.4.10" + - "ml-dtypes<0.3.0" + upgrade: true + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + upgrade: true + - type: "docker" + run: + - "cd /opt && git clone --depth 1 https://github.com/YosefLab/PopV.git && \\\n\ + \ cd PopV && git fetch --depth 1 origin tag v0.2 && git checkout v0.2\n" + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highmem" + - "highcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/annotate/popv/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/annotate/popv" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/annotate/popv/popv" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/annotate/popv/popv b/target/docker/annotate/popv/popv new file mode 100755 index 00000000000..8c038fd27da --- /dev/null +++ b/target/docker/annotate/popv/popv @@ -0,0 +1,1401 @@ +#!/usr/bin/env bash + +# popv 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Matthias Beyens (author) +# * Robrecht Cannoodt (author) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="popv" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "popv 0.12.4" + echo "" + echo "Performs popular major vote cell typing on single cell sequence data using" + echo "multiple algorithms. Note that this is a one-shot version of PopV." + echo "" + echo "Inputs:" + echo " Arguments related to the input (aka query) dataset." + echo "" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " example: input.h5mu" + echo " Input h5mu file." + echo "" + echo " --modality" + echo " type: string" + echo " default: rna" + echo " Which modality to process." + echo "" + echo " --input_layer" + echo " type: string" + echo " Which layer to use. If no value is provided, the counts are assumed to" + echo " be in the \`.X\` slot. Otherwise, count data is expected to be in" + echo " \`.layers[input_layer]\`." + echo "" + echo " --input_obs_batch" + echo " type: string" + echo " Key in obs field of input adata for batch information. If no value is" + echo " provided, batch label is assumed to be unknown." + echo "" + echo " --input_var_subset" + echo " type: string" + echo " Subset the input object with this column." + echo "" + echo " --input_obs_label" + echo " type: string" + echo " Key in obs field of input adata for label information. This is only used" + echo " for training scANVI. Unlabelled cells should be set to" + echo " \`\"unknown_celltype_label\"\`." + echo "" + echo " --unknown_celltype_label" + echo " type: string" + echo " default: unknown" + echo " If \`input_obs_label\` is specified, cells with this value will be treated" + echo " as unknown and will be predicted by the model." + echo "" + echo "Reference:" + echo " Arguments related to the reference dataset." + echo "" + echo " --reference" + echo " type: file, required parameter, file must exist" + echo " example: TS_Bladder_filtered.h5ad" + echo " User-provided reference tissue. The data that will be used as reference" + echo " to call cell types." + echo "" + echo " --reference_layer" + echo " type: string" + echo " Which layer to use. If no value is provided, the counts are assumed to" + echo " be in the \`.X\` slot. Otherwise, count data is expected to be in" + echo " \`.layers[reference_layer]\`." + echo "" + echo " --reference_obs_label" + echo " type: string" + echo " default: cell_ontology_class" + echo " Key in obs field of reference AnnData with cell-type information." + echo "" + echo " --reference_obs_batch" + echo " type: string" + echo " default: donor_assay" + echo " Key in obs field of input adata for batch information." + echo "" + echo "Outputs:" + echo " Output arguments." + echo "" + echo " --output" + echo " type: file, required parameter, output, file must exist" + echo " example: output.h5mu" + echo " Output h5mu file." + echo "" + echo " --output_compression" + echo " type: string" + echo " example: gzip" + echo " choices: [ gzip, lzf ]" + echo "" + echo "Arguments:" + echo " Other arguments." + echo "" + echo " --methods" + echo " type: string, required parameter, multiple values allowed" + echo " example: knn_on_scvi:scanvi" + echo " choices: [ celltypist, knn_on_bbknn, knn_on_scanorama, knn_on_scvi," + echo "onclass, rf, scanvi, svm ]" + echo " Methods to call cell types. By default, runs to knn_on_scvi and scanvi." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM python:3.9-slim + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y procps git build-essential wget && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "scanpy~=1.9.5" "scvi-tools~=1.0.3" "popv~=0.3.2" "jax==0.4.10" "jaxlib==0.4.10" "ml-dtypes<0.3.0" + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" + +RUN cd /opt && git clone --depth 1 https://github.com/YosefLab/PopV.git && \ + cd PopV && git fetch --depth 1 origin tag v0.2 && git checkout v0.2 + +LABEL org.opencontainers.image.authors="Matthias Beyens, Robrecht Cannoodt" +LABEL org.opencontainers.image.description="Companion container for running component annotate popv" +LABEL org.opencontainers.image.created="2024-01-31T09:08:36Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-popv-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "popv 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality=*) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --input_layer) + [ -n "$VIASH_PAR_INPUT_LAYER" ] && ViashError Bad arguments for option \'--input_layer\': \'$VIASH_PAR_INPUT_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_LAYER="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_layer. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input_layer=*) + [ -n "$VIASH_PAR_INPUT_LAYER" ] && ViashError Bad arguments for option \'--input_layer=*\': \'$VIASH_PAR_INPUT_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_LAYER=$(ViashRemoveFlags "$1") + shift 1 + ;; + --input_obs_batch) + [ -n "$VIASH_PAR_INPUT_OBS_BATCH" ] && ViashError Bad arguments for option \'--input_obs_batch\': \'$VIASH_PAR_INPUT_OBS_BATCH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_OBS_BATCH="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_obs_batch. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input_obs_batch=*) + [ -n "$VIASH_PAR_INPUT_OBS_BATCH" ] && ViashError Bad arguments for option \'--input_obs_batch=*\': \'$VIASH_PAR_INPUT_OBS_BATCH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_OBS_BATCH=$(ViashRemoveFlags "$1") + shift 1 + ;; + --input_var_subset) + [ -n "$VIASH_PAR_INPUT_VAR_SUBSET" ] && ViashError Bad arguments for option \'--input_var_subset\': \'$VIASH_PAR_INPUT_VAR_SUBSET\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_VAR_SUBSET="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_var_subset. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input_var_subset=*) + [ -n "$VIASH_PAR_INPUT_VAR_SUBSET" ] && ViashError Bad arguments for option \'--input_var_subset=*\': \'$VIASH_PAR_INPUT_VAR_SUBSET\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_VAR_SUBSET=$(ViashRemoveFlags "$1") + shift 1 + ;; + --input_obs_label) + [ -n "$VIASH_PAR_INPUT_OBS_LABEL" ] && ViashError Bad arguments for option \'--input_obs_label\': \'$VIASH_PAR_INPUT_OBS_LABEL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_OBS_LABEL="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_obs_label. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input_obs_label=*) + [ -n "$VIASH_PAR_INPUT_OBS_LABEL" ] && ViashError Bad arguments for option \'--input_obs_label=*\': \'$VIASH_PAR_INPUT_OBS_LABEL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_OBS_LABEL=$(ViashRemoveFlags "$1") + shift 1 + ;; + --unknown_celltype_label) + [ -n "$VIASH_PAR_UNKNOWN_CELLTYPE_LABEL" ] && ViashError Bad arguments for option \'--unknown_celltype_label\': \'$VIASH_PAR_UNKNOWN_CELLTYPE_LABEL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_UNKNOWN_CELLTYPE_LABEL="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --unknown_celltype_label. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --unknown_celltype_label=*) + [ -n "$VIASH_PAR_UNKNOWN_CELLTYPE_LABEL" ] && ViashError Bad arguments for option \'--unknown_celltype_label=*\': \'$VIASH_PAR_UNKNOWN_CELLTYPE_LABEL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_UNKNOWN_CELLTYPE_LABEL=$(ViashRemoveFlags "$1") + shift 1 + ;; + --reference) + [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reference=*) + [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference=*\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --reference_layer) + [ -n "$VIASH_PAR_REFERENCE_LAYER" ] && ViashError Bad arguments for option \'--reference_layer\': \'$VIASH_PAR_REFERENCE_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE_LAYER="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference_layer. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reference_layer=*) + [ -n "$VIASH_PAR_REFERENCE_LAYER" ] && ViashError Bad arguments for option \'--reference_layer=*\': \'$VIASH_PAR_REFERENCE_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE_LAYER=$(ViashRemoveFlags "$1") + shift 1 + ;; + --reference_obs_label) + [ -n "$VIASH_PAR_REFERENCE_OBS_LABEL" ] && ViashError Bad arguments for option \'--reference_obs_label\': \'$VIASH_PAR_REFERENCE_OBS_LABEL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE_OBS_LABEL="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference_obs_label. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reference_obs_label=*) + [ -n "$VIASH_PAR_REFERENCE_OBS_LABEL" ] && ViashError Bad arguments for option \'--reference_obs_label=*\': \'$VIASH_PAR_REFERENCE_OBS_LABEL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE_OBS_LABEL=$(ViashRemoveFlags "$1") + shift 1 + ;; + --reference_obs_batch) + [ -n "$VIASH_PAR_REFERENCE_OBS_BATCH" ] && ViashError Bad arguments for option \'--reference_obs_batch\': \'$VIASH_PAR_REFERENCE_OBS_BATCH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE_OBS_BATCH="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference_obs_batch. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reference_obs_batch=*) + [ -n "$VIASH_PAR_REFERENCE_OBS_BATCH" ] && ViashError Bad arguments for option \'--reference_obs_batch=*\': \'$VIASH_PAR_REFERENCE_OBS_BATCH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE_OBS_BATCH=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --methods) + if [ -z "$VIASH_PAR_METHODS" ]; then + VIASH_PAR_METHODS="$2" + else + VIASH_PAR_METHODS="$VIASH_PAR_METHODS:""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --methods. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --methods=*) + if [ -z "$VIASH_PAR_METHODS" ]; then + VIASH_PAR_METHODS=$(ViashRemoveFlags "$1") + else + VIASH_PAR_METHODS="$VIASH_PAR_METHODS:"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/annotate_popv:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/annotate_popv:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/annotate_popv:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/annotate_popv:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_REFERENCE+x} ]; then + ViashError '--reference' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_METHODS+x} ]; then + ViashError '--methods' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_MODALITY+x} ]; then + VIASH_PAR_MODALITY="rna" +fi +if [ -z ${VIASH_PAR_UNKNOWN_CELLTYPE_LABEL+x} ]; then + VIASH_PAR_UNKNOWN_CELLTYPE_LABEL="unknown" +fi +if [ -z ${VIASH_PAR_REFERENCE_OBS_LABEL+x} ]; then + VIASH_PAR_REFERENCE_OBS_LABEL="cell_ontology_class" +fi +if [ -z ${VIASH_PAR_REFERENCE_OBS_BATCH+x} ]; then + VIASH_PAR_REFERENCE_OBS_BATCH="donor_assay" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_REFERENCE" ] && [ ! -e "$VIASH_PAR_REFERENCE" ]; then + ViashError "Input file '$VIASH_PAR_REFERENCE' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +if [ ! -z "$VIASH_PAR_METHODS" ]; then + VIASH_PAR_METHODS_CHOICES=("celltypist:knn_on_bbknn:knn_on_scanorama:knn_on_scvi:onclass:rf:scanvi:svm") + IFS=':' + set -f + for val in $VIASH_PAR_METHODS; do + if ! [[ ":${VIASH_PAR_METHODS_CHOICES[*]}:" =~ ":${val}:" ]]; then + ViashError '--methods' specified value of \'${val}\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + done + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_REFERENCE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_REFERENCE")" ) + VIASH_PAR_REFERENCE=$(ViashAutodetectMount "$VIASH_PAR_REFERENCE") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/annotate_popv:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/annotate_popv:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/annotate_popv:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-popv-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +import sys +import re +import tempfile +import typing +import numpy as np +import mudata as mu +import anndata as ad +import popv + +# todo: is this still needed? +from torch.cuda import is_available as cuda_is_available +try: + from torch.backends.mps import is_available as mps_is_available +except ModuleNotFoundError: + # Older pytorch versions + # MacOS GPUs + def mps_is_available(): + return False + +# where to find the obo files +cl_obo_folder = "/opt/PopV/ontology/" + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'input_layer': $( if [ ! -z ${VIASH_PAR_INPUT_LAYER+x} ]; then echo "r'${VIASH_PAR_INPUT_LAYER//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'input_obs_batch': $( if [ ! -z ${VIASH_PAR_INPUT_OBS_BATCH+x} ]; then echo "r'${VIASH_PAR_INPUT_OBS_BATCH//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'input_var_subset': $( if [ ! -z ${VIASH_PAR_INPUT_VAR_SUBSET+x} ]; then echo "r'${VIASH_PAR_INPUT_VAR_SUBSET//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'input_obs_label': $( if [ ! -z ${VIASH_PAR_INPUT_OBS_LABEL+x} ]; then echo "r'${VIASH_PAR_INPUT_OBS_LABEL//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'unknown_celltype_label': $( if [ ! -z ${VIASH_PAR_UNKNOWN_CELLTYPE_LABEL+x} ]; then echo "r'${VIASH_PAR_UNKNOWN_CELLTYPE_LABEL//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'reference': $( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "r'${VIASH_PAR_REFERENCE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'reference_layer': $( if [ ! -z ${VIASH_PAR_REFERENCE_LAYER+x} ]; then echo "r'${VIASH_PAR_REFERENCE_LAYER//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'reference_obs_label': $( if [ ! -z ${VIASH_PAR_REFERENCE_OBS_LABEL+x} ]; then echo "r'${VIASH_PAR_REFERENCE_OBS_LABEL//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'reference_obs_batch': $( if [ ! -z ${VIASH_PAR_REFERENCE_OBS_BATCH+x} ]; then echo "r'${VIASH_PAR_REFERENCE_OBS_BATCH//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'methods': $( if [ ! -z ${VIASH_PAR_METHODS+x} ]; then echo "r'${VIASH_PAR_METHODS//\'/\'\"\'\"r\'}'.split(':')"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +## VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +use_gpu = cuda_is_available() or mps_is_available() +logger.info("GPU enabled? %s", use_gpu) + +# Helper functions +def get_X(adata: ad.AnnData, layer: typing.Optional[str], var_index: typing.Optional[str]): + """Fetch the counts data from X or a layer. Subset columns by var_index if so desired.""" + if var_index: + adata = adata[:, var_index] + if layer: + return adata.layers[layer] + else: + return adata.X +def get_obs(adata: ad.AnnData, obs_par_names): + """Subset the obs dataframe to just the columns defined by the obs_label and obs_batch.""" + obs_columns = [par[x] for x in obs_par_names if par[x]] + return adata.obs[obs_columns] +def get_var(adata: ad.AnnData, var_index: list[str]): + """Fetch the var dataframe. Subset rows by var_index if so desired.""" + return adata.var.loc[var_index] + +def main(par, meta): + assert len(par["methods"]) >= 1, "Please, specify at least one method for cell typing." + logger.info("Cell typing methods: {}".format(par["methods"])) + + ### PREPROCESSING REFERENCE ### + logger.info("### PREPROCESSING REFERENCE ###") + + # take a look at reference data + logger.info("Reading reference data '%s'", par["reference"]) + reference = ad.read_h5ad(par["reference"]) + + logger.info("Setting reference var index to Ensembl IDs") + reference.var["gene_symbol"] = list(reference.var.index) + reference.var.index = [re.sub("\\\\.[0-9]+\$", "", s) for s in reference.var["ensemblid"]] + + logger.info("Detect number of samples per label") + min_celltype_size = np.min(reference.obs.groupby(par["reference_obs_batch"]).size()) + n_samples_per_label = np.max((min_celltype_size, 100)) + + ### PREPROCESSING INPUT ### + logger.info("### PREPROCESSING INPUT ###") + logger.info("Reading '%s'", par["input"]) + input = mu.read_h5mu(par["input"]) + input_modality = input.mod[par["modality"]] + + # subset with var column + if par["input_var_subset"]: + logger.info("Subset input with .var['%s']", par["input_var_subset"]) + assert par["input_var_subset"] in input_modality.var, f"--input_var_subset='{par['input_var_subset']}' needs to be a column in .var" + input_modality = input_modality[:,input_modality.var[par["input_var_subset"]]] + + ### ALIGN REFERENCE AND INPUT ### + logger.info("### ALIGN REFERENCE AND INPUT ###") + + logger.info("Detecting common vars based on ensembl ids") + common_ens_ids = list(set(reference.var.index).intersection(set(input_modality.var.index))) + + logger.info(" reference n_vars: %i", reference.n_vars) + logger.info(" input n_vars: %i", input_modality.n_vars) + logger.info(" intersect n_vars: %i", len(common_ens_ids)) + assert len(common_ens_ids) >= 100, "The intersection of genes is too small." + + # subset input objects to make sure popv is using the data we expect + input_modality = ad.AnnData( + X = get_X(input_modality, par["input_layer"], common_ens_ids), + obs = get_obs(input_modality, ["input_obs_label", "input_obs_batch"]), + var = get_var(input_modality, common_ens_ids) + ) + reference = ad.AnnData( + X = get_X(reference, par["reference_layer"], common_ens_ids), + obs = get_obs(reference, ["reference_obs_label", "reference_obs_batch"]), + var = get_var(reference, common_ens_ids) + ) + + # remove layers that + + ### ALIGN REFERENCE AND INPUT ### + logger.info("### ALIGN REFERENCE AND INPUT ###") + + with tempfile.TemporaryDirectory(prefix="popv-", dir=meta["temp_dir"]) as temp_dir: + logger.info("Run PopV processing") + pq = popv.preprocessing.Process_Query( + # input + query_adata=input_modality, + query_labels_key=par["input_obs_label"], + query_batch_key=par["input_obs_batch"], + query_layers_key=None, # this is taken care of by subset + # reference + ref_adata=reference, + ref_labels_key=par["reference_obs_label"], + ref_batch_key=par["reference_obs_batch"], + # options + unknown_celltype_label=par["unknown_celltype_label"], + n_samples_per_label=n_samples_per_label, + # pretrained model + # Might need to be parameterized at some point + prediction_mode="retrain", + pretrained_scvi_path=None, + # outputs + # Might need to be parameterized at some point + save_path_trained_models=temp_dir, + # hardcoded values + cl_obo_folder=cl_obo_folder, + use_gpu=use_gpu + ) + method_kwargs = {} + if 'scanorama' in par['methods']: + method_kwargs['scanorama'] = {'approx': False} + logger.info("Annotate data") + popv.annotation.annotate_data( + adata=pq.adata, + methods=par["methods"], + methods_kwargs=method_kwargs + ) + + popv_input = pq.adata[input_modality.obs_names] + + # select columns starting with "popv_" + popv_obs_cols = popv_input.obs.columns[popv_input.obs.columns.str.startswith("popv_")] + + # create new data frame with selected columns + df_popv = popv_input.obs[popv_obs_cols] + + # remove prefix from column names + df_popv.columns = df_popv.columns.str.replace("popv_", "") + + # store output in mudata .obsm + input.mod[par["modality"]].obsm["popv_output"] = df_popv + + # copy important output in mudata .obs + for col in ["popv_prediction"]: + if col in popv_input.obs.columns: + input.mod[par["modality"]].obs[col] = popv_input.obs[col] + + # code to explore how the output differs from the original + # for attr in ["obs", "var", "uns", "obsm", "layers", "obsp"]: + # old_keys = set(getattr(pq_adata_orig, attr).keys()) + # new_keys = set(getattr(pq.adata, attr).keys()) + # diff_keys = list(new_keys.difference(old_keys)) + # diff_keys.sort() + # print(f"{attr}:", flush=True) + # for key in diff_keys: + # print(f" {key}", flush=True) + + # write output + logger.info("Writing %s", par["output"]) + input.write_h5mu(par["output"], compression=par["output_compression"]) + +if __name__ == "__main__": + main(par, meta) +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_REFERENCE" ]; then + VIASH_PAR_REFERENCE=$(ViashStripAutomount "$VIASH_PAR_REFERENCE") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/annotate/popv/setup_logger.py b/target/docker/annotate/popv/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/docker/annotate/popv/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/docker/cluster/leiden/.config.vsh.yaml b/target/docker/cluster/leiden/.config.vsh.yaml new file mode 100644 index 00000000000..220615dbe8b --- /dev/null +++ b/target/docker/cluster/leiden/.config.vsh.yaml @@ -0,0 +1,219 @@ +functionality: + name: "leiden" + namespace: "cluster" + version: "0.12.4" + authors: + - name: "Dries De Maeyer" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "ddemaeyer@gmail.com" + github: "ddemaeyer" + linkedin: "dries-de-maeyer-b46a814" + organizations: + - name: "Janssen Pharmaceuticals" + href: "https://www.janssen.com" + role: "Principal Scientist" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input file." + info: null + example: + - "input.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obsp_connectivities" + description: "In which .obsp slot the neighbor connectivities can be found." + info: null + default: + - "connectivities" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output file." + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obsm_name" + description: "Name of the .obsm key under which to add the cluster labels.\nThe\ + \ name of the columns in the matrix will correspond to the resolutions.\n" + info: null + default: + - "leiden" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--resolution" + description: "A parameter value controlling the coarseness of the clustering.\ + \ Higher values lead to more clusters.\nMultiple values will result in clustering\ + \ being performed multiple times.\n" + info: null + default: + - 1.0 + required: true + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Cluster cells using the Leiden algorithm [Traag18] implemented in\ + \ the Scanpy framework [Wolf18]. \nLeiden is an improved version of the Louvain\ + \ algorithm [Blondel08]. \nIt has been proposed for single-cell analysis by [Levine15].\ + \ \nThis requires having ran `neighbors/find_neighbors` or `neighbors/bbknn` first.\n\ + \nBlondel08: Blondel et al. (2008), Fast unfolding of communities in large networks,\ + \ J. Stat. Mech. \nLevine15: Levine et al. (2015), Data-Driven Phenotypic Dissection\ + \ of AML Reveals Progenitor-like Cells that Correlate with Prognosis, Cell. \n\ + Traag18: Traag et al. (2018), From Louvain to Leiden: guaranteeing well-connected\ + \ communities arXiv. \nWolf18: Wolf et al. (2018), Scanpy: large-scale single-cell\ + \ gene expression data analysis, Genome Biology. \n" + test_resources: + - type: "python_script" + path: "run_test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.8-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "cmake" + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "scanpy~=1.9.5" + - "leidenalg~=0.8.9" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highcpu" + - "midmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/cluster/leiden/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/cluster/leiden" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/cluster/leiden/leiden" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/cluster/leiden/leiden b/target/docker/cluster/leiden/leiden new file mode 100755 index 00000000000..e3767f80c35 --- /dev/null +++ b/target/docker/cluster/leiden/leiden @@ -0,0 +1,1115 @@ +#!/usr/bin/env bash + +# leiden 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Dries De Maeyer (maintainer) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="leiden" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "leiden 0.12.4" + echo "" + echo "Cluster cells using the Leiden algorithm [Traag18] implemented in the Scanpy" + echo "framework [Wolf18]." + echo "Leiden is an improved version of the Louvain algorithm [Blondel08]." + echo "It has been proposed for single-cell analysis by [Levine15]." + echo "This requires having ran \`neighbors/find_neighbors\` or \`neighbors/bbknn\` first." + echo "" + echo "Blondel08: Blondel et al. (2008), Fast unfolding of communities in large" + echo "networks, J. Stat. Mech." + echo "Levine15: Levine et al. (2015), Data-Driven Phenotypic Dissection of AML Reveals" + echo "Progenitor-like Cells that Correlate with Prognosis, Cell." + echo "Traag18: Traag et al. (2018), From Louvain to Leiden: guaranteeing" + echo "well-connected communities arXiv." + echo "Wolf18: Wolf et al. (2018), Scanpy: large-scale single-cell gene expression data" + echo "analysis, Genome Biology." + echo "" + echo "Arguments:" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " example: input.h5mu" + echo " Input file." + echo "" + echo " --modality" + echo " type: string" + echo " default: rna" + echo "" + echo " --obsp_connectivities" + echo " type: string" + echo " default: connectivities" + echo " In which .obsp slot the neighbor connectivities can be found." + echo "" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " example: output.h5mu" + echo " Output file." + echo "" + echo " --output_compression" + echo " type: string" + echo " example: gzip" + echo " choices: [ gzip, lzf ]" + echo "" + echo " --obsm_name" + echo " type: string" + echo " default: leiden" + echo " Name of the .obsm key under which to add the cluster labels." + echo " The name of the columns in the matrix will correspond to the" + echo " resolutions." + echo "" + echo " --resolution" + echo " type: double, required parameter, multiple values allowed" + echo " default: 1.0" + echo " A parameter value controlling the coarseness of the clustering. Higher" + echo " values lead to more clusters." + echo " Multiple values will result in clustering being performed multiple" + echo " times." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM python:3.8-slim + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y cmake procps && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "scanpy~=1.9.5" "leidenalg~=0.8.9" + +LABEL org.opencontainers.image.authors="Dries De Maeyer" +LABEL org.opencontainers.image.description="Companion container for running component cluster leiden" +LABEL org.opencontainers.image.created="2024-01-31T09:08:35Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-leiden-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "leiden 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality=*) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --obsp_connectivities) + [ -n "$VIASH_PAR_OBSP_CONNECTIVITIES" ] && ViashError Bad arguments for option \'--obsp_connectivities\': \'$VIASH_PAR_OBSP_CONNECTIVITIES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBSP_CONNECTIVITIES="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obsp_connectivities. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obsp_connectivities=*) + [ -n "$VIASH_PAR_OBSP_CONNECTIVITIES" ] && ViashError Bad arguments for option \'--obsp_connectivities=*\': \'$VIASH_PAR_OBSP_CONNECTIVITIES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBSP_CONNECTIVITIES=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --obsm_name) + [ -n "$VIASH_PAR_OBSM_NAME" ] && ViashError Bad arguments for option \'--obsm_name\': \'$VIASH_PAR_OBSM_NAME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBSM_NAME="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obsm_name. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obsm_name=*) + [ -n "$VIASH_PAR_OBSM_NAME" ] && ViashError Bad arguments for option \'--obsm_name=*\': \'$VIASH_PAR_OBSM_NAME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBSM_NAME=$(ViashRemoveFlags "$1") + shift 1 + ;; + --resolution) + if [ -z "$VIASH_PAR_RESOLUTION" ]; then + VIASH_PAR_RESOLUTION="$2" + else + VIASH_PAR_RESOLUTION="$VIASH_PAR_RESOLUTION:""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --resolution. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --resolution=*) + if [ -z "$VIASH_PAR_RESOLUTION" ]; then + VIASH_PAR_RESOLUTION=$(ViashRemoveFlags "$1") + else + VIASH_PAR_RESOLUTION="$VIASH_PAR_RESOLUTION:"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/cluster_leiden:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/cluster_leiden:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/cluster_leiden:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/cluster_leiden:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_RESOLUTION+x} ]; then + ViashError '--resolution' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_MODALITY+x} ]; then + VIASH_PAR_MODALITY="rna" +fi +if [ -z ${VIASH_PAR_OBSP_CONNECTIVITIES+x} ]; then + VIASH_PAR_OBSP_CONNECTIVITIES="connectivities" +fi +if [ -z ${VIASH_PAR_OBSM_NAME+x} ]; then + VIASH_PAR_OBSM_NAME="leiden" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [ -n "$VIASH_PAR_RESOLUTION" ]; then + IFS=':' + set -f + for val in $VIASH_PAR_RESOLUTION; do + if ! [[ "${val}" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--resolution' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi + done + set +f + unset IFS +fi + +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/cluster_leiden:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/cluster_leiden:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/cluster_leiden:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-leiden-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +import sys +import mudata as mu +import pandas as pd +import scanpy as sc + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'obsp_connectivities': $( if [ ! -z ${VIASH_PAR_OBSP_CONNECTIVITIES+x} ]; then echo "r'${VIASH_PAR_OBSP_CONNECTIVITIES//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'obsm_name': $( if [ ! -z ${VIASH_PAR_OBSM_NAME+x} ]; then echo "r'${VIASH_PAR_OBSM_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resolution': $( if [ ! -z ${VIASH_PAR_RESOLUTION+x} ]; then echo "list(map(float, r'${VIASH_PAR_RESOLUTION//\'/\'\"\'\"r\'}'.split(':')))"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +## VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +logger.info("Reading %s.", par["input"]) +mdata = mu.read_h5mu(par["input"]) + + +def run_single_resolution(adata, resolution): + adata_out = sc.tl.leiden( + adata, + resolution=resolution, + key_added=str(resolution), + obsp=par['obsp_connectivities'], + copy=True + ) + return adata_out.obs[str(resolution)] + +logger.info("Processing modality '%s'.", par['modality']) +data = mdata.mod[par['modality']] +results = {str(resolution): run_single_resolution(data, resolution) for resolution in par["resolution"]} +data.obsm[par["obsm_name"]] = pd.DataFrame(results) +logger.info("Writing to %s.", par["output"]) +mdata.write_h5mu(filename=par["output"], compression=par["output_compression"]) +logger.info("Finished.") +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/cluster/leiden/setup_logger.py b/target/docker/cluster/leiden/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/docker/cluster/leiden/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/docker/compression/compress_h5mu/.config.vsh.yaml b/target/docker/compression/compress_h5mu/.config.vsh.yaml new file mode 100644 index 00000000000..b08d577be0d --- /dev/null +++ b/target/docker/compression/compress_h5mu/.config.vsh.yaml @@ -0,0 +1,167 @@ +functionality: + name: "compress_h5mu" + namespace: "compression" + version: "0.12.4" + authors: + - name: "Dries Schaumont" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Path to the input .h5mu." + info: null + example: + - "sample_path" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + description: "location of output file." + info: null + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--compression" + description: "Compression type." + info: null + default: + - "gzip" + required: false + choices: + - "lzf" + - "gzip" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "../../utils/compress_h5mu.py" + description: "Compress a MuData file. \n" + test_resources: + - type: "python_script" + path: "run_test.py" + is_executable: true + - type: "file" + path: "resources_test/concat_test_data/e18_mouse_brain_fresh_5k_filtered_feature_bc_matrix_subset_unique_obs.h5mu" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "native" + id: "native" +- type: "nextflow" + id: "nextflow" + directives: + label: + - "singlecpu" + - "lowmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/compression/compress_h5mu/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/compression/compress_h5mu" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/compression/compress_h5mu/compress_h5mu" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/compression/compress_h5mu/compress_h5mu b/target/docker/compression/compress_h5mu/compress_h5mu new file mode 100755 index 00000000000..82235ebb9cd --- /dev/null +++ b/target/docker/compression/compress_h5mu/compress_h5mu @@ -0,0 +1,1008 @@ +#!/usr/bin/env bash + +# compress_h5mu 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Dries Schaumont (maintainer) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="compress_h5mu" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "compress_h5mu 0.12.4" + echo "" + echo "Compress a MuData file." + echo "" + echo "Arguments:" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " example: sample_path" + echo " Path to the input .h5mu." + echo "" + echo " --output" + echo " type: file, required parameter, output, file must exist" + echo " location of output file." + echo "" + echo " --compression" + echo " type: string" + echo " default: gzip" + echo " choices: [ lzf, gzip ]" + echo " Compression type." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM python:3.10-slim + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" + +LABEL org.opencontainers.image.authors="Dries Schaumont" +LABEL org.opencontainers.image.description="Companion container for running component compression compress_h5mu" +LABEL org.opencontainers.image.created="2024-01-31T09:08:33Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-compress_h5mu-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "compress_h5mu 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --compression) + [ -n "$VIASH_PAR_COMPRESSION" ] && ViashError Bad arguments for option \'--compression\': \'$VIASH_PAR_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --compression=*) + [ -n "$VIASH_PAR_COMPRESSION" ] && ViashError Bad arguments for option \'--compression=*\': \'$VIASH_PAR_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/compression_compress_h5mu:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/compression_compress_h5mu:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/compression_compress_h5mu:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/compression_compress_h5mu:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_COMPRESSION+x} ]; then + VIASH_PAR_COMPRESSION="gzip" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_COMPRESSION" ]; then + VIASH_PAR_COMPRESSION_CHOICES=("lzf:gzip") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_COMPRESSION:" ]]; then + ViashError '--compression' specified value of \'$VIASH_PAR_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/compression_compress_h5mu:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/compression_compress_h5mu:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/compression_compress_h5mu:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-compress_h5mu-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +import sys +### VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'compression': $( if [ ! -z ${VIASH_PAR_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +### VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND compress_h5mu +# reason: resources aren't available when using Nextflow fusion +# from compress_h5mu import compress_h5mu +from h5py import File as H5File +from h5py import Group, Dataset +from pathlib import Path +from typing import Union, Literal +from functools import partial + + +def compress_h5mu(input_path: Union[str, Path], + output_path: Union[str, Path], + compression: Union[Literal['gzip'], Literal['lzf']]): + input_path, output_path = str(input_path), str(output_path) + + def copy_attributes(in_object, out_object): + for key, value in in_object.attrs.items(): + out_object.attrs[key] = value + + def visit_path(output_h5: H5File, + compression: Union[Literal['gzip'], Literal['lzf']], + name: str, object: Union[Group, Dataset]): + if isinstance(object, Group): + new_group = output_h5.create_group(name) + copy_attributes(object, new_group) + elif isinstance(object, Dataset): + # Compression only works for non-scalar Dataset objects + # Scalar objects dont have a shape defined + if not object.compression and object.shape not in [None, ()]: + new_dataset = output_h5.create_dataset(name, data=object, compression=compression) + copy_attributes(object, new_dataset) + else: + output_h5.copy(object, name) + else: + raise NotImplementedError(f"Could not copy element {name}, " + f"type has not been implemented yet: {type(object)}") + + with H5File(input_path, 'r') as input_h5, H5File(output_path, 'w', userblock_size=512) as output_h5: + copy_attributes(input_h5, output_h5) + input_h5.visititems(partial(visit_path, output_h5, compression)) + + with open(input_path, "rb") as input_bytes: + # Mudata puts metadata like this in the first 512 bytes: + # MuData (format-version=0.1.0;creator=muon;creator-version=0.2.0) + # See mudata/_core/io.py, read_h5mu() function + starting_metadata = input_bytes.read(100) + # The metadata is padded with extra null bytes up until 512 bytes + truncate_location = starting_metadata.find(b"\\x00") + starting_metadata = starting_metadata[:truncate_location] + with open(output_path, "br+") as f: + nbytes = f.write(starting_metadata) + f.write(b"\\0" * (512 - nbytes)) +# END TEMPORARY WORKAROUND compress_h5mu + +if __name__ == "__main__": + compress_h5mu(par["input"], par["output"], compression=par["compression"]) +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/compression/compress_h5mu/compress_h5mu.py b/target/docker/compression/compress_h5mu/compress_h5mu.py new file mode 100644 index 00000000000..9d92395a573 --- /dev/null +++ b/target/docker/compression/compress_h5mu/compress_h5mu.py @@ -0,0 +1,49 @@ +from h5py import File as H5File +from h5py import Group, Dataset +from pathlib import Path +from typing import Union, Literal +from functools import partial + + +def compress_h5mu(input_path: Union[str, Path], + output_path: Union[str, Path], + compression: Union[Literal['gzip'], Literal['lzf']]): + input_path, output_path = str(input_path), str(output_path) + + def copy_attributes(in_object, out_object): + for key, value in in_object.attrs.items(): + out_object.attrs[key] = value + + def visit_path(output_h5: H5File, + compression: Union[Literal['gzip'], Literal['lzf']], + name: str, object: Union[Group, Dataset]): + if isinstance(object, Group): + new_group = output_h5.create_group(name) + copy_attributes(object, new_group) + elif isinstance(object, Dataset): + # Compression only works for non-scalar Dataset objects + # Scalar objects dont have a shape defined + if not object.compression and object.shape not in [None, ()]: + new_dataset = output_h5.create_dataset(name, data=object, compression=compression) + copy_attributes(object, new_dataset) + else: + output_h5.copy(object, name) + else: + raise NotImplementedError(f"Could not copy element {name}, " + f"type has not been implemented yet: {type(object)}") + + with H5File(input_path, 'r') as input_h5, H5File(output_path, 'w', userblock_size=512) as output_h5: + copy_attributes(input_h5, output_h5) + input_h5.visititems(partial(visit_path, output_h5, compression)) + + with open(input_path, "rb") as input_bytes: + # Mudata puts metadata like this in the first 512 bytes: + # MuData (format-version=0.1.0;creator=muon;creator-version=0.2.0) + # See mudata/_core/io.py, read_h5mu() function + starting_metadata = input_bytes.read(100) + # The metadata is padded with extra null bytes up until 512 bytes + truncate_location = starting_metadata.find(b"\x00") + starting_metadata = starting_metadata[:truncate_location] + with open(output_path, "br+") as f: + nbytes = f.write(starting_metadata) + f.write(b"\0" * (512 - nbytes)) diff --git a/target/docker/compression/tar_extract/.config.vsh.yaml b/target/docker/compression/tar_extract/.config.vsh.yaml new file mode 100644 index 00000000000..c8c59aad973 --- /dev/null +++ b/target/docker/compression/tar_extract/.config.vsh.yaml @@ -0,0 +1,106 @@ +functionality: + name: "tar_extract" + namespace: "compression" + version: "0.12.4" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input file" + info: null + example: + - "input.tar.gz" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Folder to restore file(s) to." + info: null + example: + - "output_folder" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--strip_components" + alternatives: + - "-s" + description: "Strip this amount of leading components from file names on extraction.\ + \ For example, to extract only 'myfile.txt' from an archive containing the structure\ + \ `this/goes/deep/myfile.txt', use 3 to strip 'this/goes/deep/'." + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--exclude" + alternatives: + - "-e" + description: "Prevents any file or member whose name matches the shell wildcard\ + \ (pattern) from being extracted." + info: null + example: + - "docs/figures" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "bash_script" + path: "script.sh" + is_executable: true + description: "Extract files from a tar archive" + test_resources: + - type: "bash_script" + path: "test.sh" + is_executable: true + - type: "file" + path: "../../../LICENSE" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "native" + id: "native" +- type: "docker" + id: "docker" + image: "ubuntu:latest" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + entrypoint: [] + cmd: null +info: + config: "/home/runner/work/openpipeline/openpipeline/src/compression/tar_extract/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/compression/tar_extract" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/compression/tar_extract/tar_extract" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/compression/tar_extract/tar_extract b/target/docker/compression/tar_extract/tar_extract new file mode 100755 index 00000000000..1cdb1af3d8f --- /dev/null +++ b/target/docker/compression/tar_extract/tar_extract @@ -0,0 +1,978 @@ +#!/usr/bin/env bash + +# tar_extract 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="tar_extract" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "tar_extract 0.12.4" + echo "" + echo "Extract files from a tar archive" + echo "" + echo "Arguments:" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " example: input.tar.gz" + echo " Input file" + echo "" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " example: output_folder" + echo " Folder to restore file(s) to." + echo "" + echo " -s, --strip_components" + echo " type: integer" + echo " example: 1" + echo " Strip this amount of leading components from file names on extraction." + echo " For example, to extract only 'myfile.txt' from an archive containing the" + echo " structure \`this/goes/deep/myfile.txt', use 3 to strip 'this/goes/deep/'." + echo "" + echo " -e, --exclude" + echo " type: string" + echo " example: docs/figures" + echo " Prevents any file or member whose name matches the shell wildcard" + echo " (pattern) from being extracted." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM ubuntu:latest + +ENTRYPOINT [] + + +RUN : +LABEL org.opencontainers.image.description="Companion container for running component compression tar_extract" +LABEL org.opencontainers.image.created="2024-01-31T09:08:33Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-tar_extract-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "tar_extract 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --strip_components) + [ -n "$VIASH_PAR_STRIP_COMPONENTS" ] && ViashError Bad arguments for option \'--strip_components\': \'$VIASH_PAR_STRIP_COMPONENTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRIP_COMPONENTS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --strip_components. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --strip_components=*) + [ -n "$VIASH_PAR_STRIP_COMPONENTS" ] && ViashError Bad arguments for option \'--strip_components=*\': \'$VIASH_PAR_STRIP_COMPONENTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRIP_COMPONENTS=$(ViashRemoveFlags "$1") + shift 1 + ;; + -s) + [ -n "$VIASH_PAR_STRIP_COMPONENTS" ] && ViashError Bad arguments for option \'-s\': \'$VIASH_PAR_STRIP_COMPONENTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRIP_COMPONENTS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -s. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --exclude) + [ -n "$VIASH_PAR_EXCLUDE" ] && ViashError Bad arguments for option \'--exclude\': \'$VIASH_PAR_EXCLUDE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EXCLUDE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --exclude. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --exclude=*) + [ -n "$VIASH_PAR_EXCLUDE" ] && ViashError Bad arguments for option \'--exclude=*\': \'$VIASH_PAR_EXCLUDE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EXCLUDE=$(ViashRemoveFlags "$1") + shift 1 + ;; + -e) + [ -n "$VIASH_PAR_EXCLUDE" ] && ViashError Bad arguments for option \'-e\': \'$VIASH_PAR_EXCLUDE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EXCLUDE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -e. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/compression_tar_extract:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/compression_tar_extract:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/compression_tar_extract:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/compression_tar_extract:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_STRIP_COMPONENTS" ]]; then + if ! [[ "$VIASH_PAR_STRIP_COMPONENTS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--strip_components' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/compression_tar_extract:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/compression_tar_extract:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/compression_tar_extract:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-tar_extract-XXXXXX").sh +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +#!/usr/bin/env bash + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_PAR_STRIP_COMPONENTS+x} ]; then echo "${VIASH_PAR_STRIP_COMPONENTS}" | sed "s#'#'\"'\"'#g;s#.*#par_strip_components='&'#" ; else echo "# par_strip_components="; fi ) +$( if [ ! -z ${VIASH_PAR_EXCLUDE+x} ]; then echo "${VIASH_PAR_EXCLUDE}" | sed "s#'#'\"'\"'#g;s#.*#par_exclude='&'#" ; else echo "# par_exclude="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) + +## VIASH END + +extra_params=() +mkdir -p \$par_output # Create output directory if it doesn't exist already + +if [ "\$par_strip_components" != "" ]; then + extra_params+=("--strip-components=\$par_strip_components") +fi + +if [ "\$par_exclude" != "" ]; then + extra_params+=("--exclude=\$par_exclude") +fi + +echo "Extracting \$par_input to \$par_output..." +echo "" +tar "\${extra_params[@]}" -xvf "\$par_input" -C "\$par_output" +VIASHMAIN +bash "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/convert/from_10xh5_to_h5mu/.config.vsh.yaml b/target/docker/convert/from_10xh5_to_h5mu/.config.vsh.yaml new file mode 100644 index 00000000000..dd2b34f4f53 --- /dev/null +++ b/target/docker/convert/from_10xh5_to_h5mu/.config.vsh.yaml @@ -0,0 +1,272 @@ +functionality: + name: "from_10xh5_to_h5mu" + namespace: "convert" + version: "0.12.4" + authors: + - name: "Robrecht Cannoodt" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + argument_groups: + - name: "Inputs" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "A 10x h5 file as generated by Cell Ranger." + info: null + example: + - "raw_feature_bc_matrix.h5" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--input_metrics_summary" + description: "A metrics summary csv file as generated by Cell Ranger." + info: null + example: + - "metrics_cellranger.h5" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Outputs" + arguments: + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output h5mu file." + info: + slots: + mod: + - name: "rna" + required: true + description: "Gene expression counts." + slots: + var: + - name: "gene_symbol" + type: "string" + description: "Identification of the gene." + required: true + - name: "feature_types" + type: "string" + description: "The full name of the modality." + required: true + - name: "genome" + type: "string" + description: "Reference that was used to generate the data." + required: true + - name: "prot" + required: false + description: "Protein abundancy" + slots: + var: + - name: "gene_symbol" + type: "string" + description: "Identification of the gene." + required: true + - name: "feature_types" + type: "string" + description: "The full name of the modality." + required: true + - name: "genome" + type: "string" + description: "Reference that was used to generate the data." + required: true + - name: "vdj" + required: false + description: "VDJ transcript counts" + slots: + var: + - name: "gene_symbol" + type: "string" + description: "Identification of the gene." + required: true + - name: "feature_types" + type: "string" + description: "The full name of the modality." + required: true + - name: "genome" + type: "string" + description: "Reference that was used to generate the data." + required: true + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--uns_metrics" + description: "Name of the .uns slot under which to QC metrics (if any)." + info: null + default: + - "metrics_cellranger" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Arguments" + arguments: + - type: "integer" + name: "--min_genes" + description: "Minimum number of counts required for a cell to pass filtering." + info: null + example: + - 100 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--min_counts" + description: "Minimum number of genes expressed required for a cell to pass\ + \ filtering." + info: null + example: + - 1000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Converts a 10x h5 into an h5mu file.\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "scanpy~=1.9.5" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "lowmem" + - "singlecpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/convert/from_10xh5_to_h5mu/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/convert/from_10xh5_to_h5mu" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/convert/from_10xh5_to_h5mu/from_10xh5_to_h5mu" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/convert/from_10xh5_to_h5mu/from_10xh5_to_h5mu b/target/docker/convert/from_10xh5_to_h5mu/from_10xh5_to_h5mu new file mode 100755 index 00000000000..550e88b3314 --- /dev/null +++ b/target/docker/convert/from_10xh5_to_h5mu/from_10xh5_to_h5mu @@ -0,0 +1,1117 @@ +#!/usr/bin/env bash + +# from_10xh5_to_h5mu 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Robrecht Cannoodt (maintainer) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="from_10xh5_to_h5mu" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "from_10xh5_to_h5mu 0.12.4" + echo "" + echo "Converts a 10x h5 into an h5mu file." + echo "" + echo "Inputs:" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " example: raw_feature_bc_matrix.h5" + echo " A 10x h5 file as generated by Cell Ranger." + echo "" + echo " --input_metrics_summary" + echo " type: file, file must exist" + echo " example: metrics_cellranger.h5" + echo " A metrics summary csv file as generated by Cell Ranger." + echo "" + echo "Outputs:" + echo " -o, --output" + echo " type: file, output, file must exist" + echo " example: output.h5mu" + echo " Output h5mu file." + echo "" + echo " --output_compression" + echo " type: string" + echo " example: gzip" + echo " choices: [ gzip, lzf ]" + echo "" + echo " --uns_metrics" + echo " type: string" + echo " default: metrics_cellranger" + echo " Name of the .uns slot under which to QC metrics (if any)." + echo "" + echo "Arguments:" + echo " --min_genes" + echo " type: integer" + echo " example: 100" + echo " Minimum number of counts required for a cell to pass filtering." + echo "" + echo " --min_counts" + echo " type: integer" + echo " example: 1000" + echo " Minimum number of genes expressed required for a cell to pass filtering." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM python:3.10-slim + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "scanpy~=1.9.5" + +LABEL org.opencontainers.image.authors="Robrecht Cannoodt" +LABEL org.opencontainers.image.description="Companion container for running component convert from_10xh5_to_h5mu" +LABEL org.opencontainers.image.created="2024-01-31T09:08:34Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-from_10xh5_to_h5mu-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "from_10xh5_to_h5mu 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input_metrics_summary) + [ -n "$VIASH_PAR_INPUT_METRICS_SUMMARY" ] && ViashError Bad arguments for option \'--input_metrics_summary\': \'$VIASH_PAR_INPUT_METRICS_SUMMARY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_METRICS_SUMMARY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_metrics_summary. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input_metrics_summary=*) + [ -n "$VIASH_PAR_INPUT_METRICS_SUMMARY" ] && ViashError Bad arguments for option \'--input_metrics_summary=*\': \'$VIASH_PAR_INPUT_METRICS_SUMMARY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_METRICS_SUMMARY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --uns_metrics) + [ -n "$VIASH_PAR_UNS_METRICS" ] && ViashError Bad arguments for option \'--uns_metrics\': \'$VIASH_PAR_UNS_METRICS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_UNS_METRICS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --uns_metrics. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --uns_metrics=*) + [ -n "$VIASH_PAR_UNS_METRICS" ] && ViashError Bad arguments for option \'--uns_metrics=*\': \'$VIASH_PAR_UNS_METRICS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_UNS_METRICS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --min_genes) + [ -n "$VIASH_PAR_MIN_GENES" ] && ViashError Bad arguments for option \'--min_genes\': \'$VIASH_PAR_MIN_GENES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_GENES="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_genes. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --min_genes=*) + [ -n "$VIASH_PAR_MIN_GENES" ] && ViashError Bad arguments for option \'--min_genes=*\': \'$VIASH_PAR_MIN_GENES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_GENES=$(ViashRemoveFlags "$1") + shift 1 + ;; + --min_counts) + [ -n "$VIASH_PAR_MIN_COUNTS" ] && ViashError Bad arguments for option \'--min_counts\': \'$VIASH_PAR_MIN_COUNTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_COUNTS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_counts. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --min_counts=*) + [ -n "$VIASH_PAR_MIN_COUNTS" ] && ViashError Bad arguments for option \'--min_counts=*\': \'$VIASH_PAR_MIN_COUNTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_COUNTS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/convert_from_10xh5_to_h5mu:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/convert_from_10xh5_to_h5mu:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/convert_from_10xh5_to_h5mu:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/convert_from_10xh5_to_h5mu:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_UNS_METRICS+x} ]; then + VIASH_PAR_UNS_METRICS="metrics_cellranger" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_INPUT_METRICS_SUMMARY" ] && [ ! -e "$VIASH_PAR_INPUT_METRICS_SUMMARY" ]; then + ViashError "Input file '$VIASH_PAR_INPUT_METRICS_SUMMARY' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_MIN_GENES" ]]; then + if ! [[ "$VIASH_PAR_MIN_GENES" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--min_genes' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MIN_COUNTS" ]]; then + if ! [[ "$VIASH_PAR_MIN_COUNTS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--min_counts' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_INPUT_METRICS_SUMMARY" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT_METRICS_SUMMARY")" ) + VIASH_PAR_INPUT_METRICS_SUMMARY=$(ViashAutodetectMount "$VIASH_PAR_INPUT_METRICS_SUMMARY") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/convert_from_10xh5_to_h5mu:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/convert_from_10xh5_to_h5mu:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/convert_from_10xh5_to_h5mu:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-from_10xh5_to_h5mu-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +import mudata +import scanpy as sc +import sys +import pandas as pd + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'input_metrics_summary': $( if [ ! -z ${VIASH_PAR_INPUT_METRICS_SUMMARY+x} ]; then echo "r'${VIASH_PAR_INPUT_METRICS_SUMMARY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'uns_metrics': $( if [ ! -z ${VIASH_PAR_UNS_METRICS+x} ]; then echo "r'${VIASH_PAR_UNS_METRICS//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'min_genes': $( if [ ! -z ${VIASH_PAR_MIN_GENES+x} ]; then echo "int(r'${VIASH_PAR_MIN_GENES//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'min_counts': $( if [ ! -z ${VIASH_PAR_MIN_COUNTS+x} ]; then echo "int(r'${VIASH_PAR_MIN_COUNTS//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +## VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +logger.info("Reading %s.", par["input"]) +adata = sc.read_10x_h5(par["input"], gex_only=False) + +# set the gene ids as var_names +logger.info("Renaming var columns") +adata.var = adata.var\\ + .rename_axis("gene_symbol")\\ + .reset_index()\\ + .set_index("gene_ids") + +# parse metrics summary file and store in .obsm or .obs +if par["input_metrics_summary"] and par["uns_metrics"]: + logger.info("Reading metrics summary file '%s'", par['input_metrics_summary']) + + def read_percentage(val): + try: + return float(val.strip('%')) / 100 + except AttributeError: + return val + + metrics_summary = pd.read_csv(par["input_metrics_summary"], decimal=".", quotechar='"', thousands=",").applymap(read_percentage) + + logger.info("Storing metrics summary in .uns['%s']", par['uns_metrics']) + adata.uns[par["uns_metrics"]] = metrics_summary +else: + is_none = "input_metrics_summary" if not par["input_metrics_summary"] else "uns_metrics" + logger.info("Not storing metrics summary because par['%s'] is None", is_none) + +# might perform basic filtering to get rid of some data +# applicable when starting from the raw counts +if par["min_genes"]: + logger.info("Filtering with min_genes=%d", par['min_genes']) + sc.pp.filter_cells(adata, min_genes=par["min_genes"]) + +if par["min_counts"]: + logger.info("Filtering with min_counts=%d", par['min_counts']) + sc.pp.filter_cells(adata, min_counts=par["min_counts"]) + +# generate output +logger.info("Convert to mudata") +mdata = mudata.MuData(adata) + +# override root .obs +mdata.obs = adata.obs + +# write output +logger.info("Writing %s", par["output"]) +mdata.write_h5mu(par["output"], compression=par["output_compression"]) +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_INPUT_METRICS_SUMMARY" ]; then + VIASH_PAR_INPUT_METRICS_SUMMARY=$(ViashStripAutomount "$VIASH_PAR_INPUT_METRICS_SUMMARY") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/convert/from_10xh5_to_h5mu/setup_logger.py b/target/docker/convert/from_10xh5_to_h5mu/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/docker/convert/from_10xh5_to_h5mu/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/docker/convert/from_10xmtx_to_h5mu/.config.vsh.yaml b/target/docker/convert/from_10xmtx_to_h5mu/.config.vsh.yaml new file mode 100644 index 00000000000..385c749938f --- /dev/null +++ b/target/docker/convert/from_10xmtx_to_h5mu/.config.vsh.yaml @@ -0,0 +1,166 @@ +functionality: + name: "from_10xmtx_to_h5mu" + namespace: "convert" + version: "0.12.4" + authors: + - name: "Robrecht Cannoodt" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input mtx folder" + info: null + example: + - "input_dir_containing_gz_files" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output h5mu file." + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Converts a 10x mtx into an h5mu file.\n" + test_resources: + - type: "python_script" + path: "run_test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.8-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "scanpy~=1.9.5" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "lowmem" + - "singlecpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/convert/from_10xmtx_to_h5mu/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/convert/from_10xmtx_to_h5mu" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/convert/from_10xmtx_to_h5mu/from_10xmtx_to_h5mu" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/convert/from_10xmtx_to_h5mu/from_10xmtx_to_h5mu b/target/docker/convert/from_10xmtx_to_h5mu/from_10xmtx_to_h5mu new file mode 100755 index 00000000000..9fe75cf4e9a --- /dev/null +++ b/target/docker/convert/from_10xmtx_to_h5mu/from_10xmtx_to_h5mu @@ -0,0 +1,989 @@ +#!/usr/bin/env bash + +# from_10xmtx_to_h5mu 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Robrecht Cannoodt (maintainer) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="from_10xmtx_to_h5mu" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "from_10xmtx_to_h5mu 0.12.4" + echo "" + echo "Converts a 10x mtx into an h5mu file." + echo "" + echo "Arguments:" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " example: input_dir_containing_gz_files" + echo " Input mtx folder" + echo "" + echo " -o, --output" + echo " type: file, output, file must exist" + echo " example: output.h5mu" + echo " Output h5mu file." + echo "" + echo " --output_compression" + echo " type: string" + echo " example: gzip" + echo " choices: [ gzip, lzf ]" +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM python:3.8-slim + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "scanpy~=1.9.5" + +LABEL org.opencontainers.image.authors="Robrecht Cannoodt" +LABEL org.opencontainers.image.description="Companion container for running component convert from_10xmtx_to_h5mu" +LABEL org.opencontainers.image.created="2024-01-31T09:08:31Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-from_10xmtx_to_h5mu-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "from_10xmtx_to_h5mu 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/convert_from_10xmtx_to_h5mu:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/convert_from_10xmtx_to_h5mu:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/convert_from_10xmtx_to_h5mu:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/convert_from_10xmtx_to_h5mu:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/convert_from_10xmtx_to_h5mu:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/convert_from_10xmtx_to_h5mu:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/convert_from_10xmtx_to_h5mu:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-from_10xmtx_to_h5mu-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +import mudata as mu +import scanpy as sc +import sys + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +## VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +logger.info("Reading %s.", par["input"]) +adata = sc.read_10x_mtx(par["input"], gex_only=False) + +logger.info("Renaming keys.") +adata.var = adata.var\\ + .rename_axis("gene_symbol")\\ + .reset_index()\\ + .set_index("gene_ids") + +# generate output +logger.info("Convert to mudata") +mdata = mu.MuData(adata) + +# override root .obs +mdata.obs = adata.obs + +# write output +logger.info("Writing %s", par["output"]) +mdata.write_h5mu(par["output"], compression=par["output_compression"]) +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/convert/from_10xmtx_to_h5mu/setup_logger.py b/target/docker/convert/from_10xmtx_to_h5mu/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/docker/convert/from_10xmtx_to_h5mu/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/docker/convert/from_bd_to_10x_molecular_barcode_tags/.config.vsh.yaml b/target/docker/convert/from_bd_to_10x_molecular_barcode_tags/.config.vsh.yaml new file mode 100644 index 00000000000..023da9c9d56 --- /dev/null +++ b/target/docker/convert/from_bd_to_10x_molecular_barcode_tags/.config.vsh.yaml @@ -0,0 +1,159 @@ +functionality: + name: "from_bd_to_10x_molecular_barcode_tags" + namespace: "convert" + version: "0.12.4" + authors: + - name: "Dries Schaumont" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input SAM or BAM file." + info: null + example: + - "input.bam" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output alignment file." + info: null + example: + - "output.sam" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--bam" + description: "Output a BAM file." + info: null + direction: "input" + dest: "par" + - type: "integer" + name: "--threads" + alternatives: + - "-t" + description: "Number of threads" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "bash_script" + path: "script.sh" + is_executable: true + description: "Convert the molecular barcode sequence SAM tag from BD format (MA)\ + \ to 10X format (UB).\n" + test_resources: + - type: "bash_script" + path: "run_test.sh" + is_executable: true + - type: "file" + path: "resources_test/bdrhap_5kjrt/processed/WTA.bd_rhapsody.output_raw" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "ubuntu:latest" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "samtools" + interactive: false + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "lowmem" + - "singlecpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/convert/from_bd_to_10x_molecular_barcode_tags/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/convert/from_bd_to_10x_molecular_barcode_tags" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/convert/from_bd_to_10x_molecular_barcode_tags/from_bd_to_10x_molecular_barcode_tags" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/convert/from_bd_to_10x_molecular_barcode_tags/from_bd_to_10x_molecular_barcode_tags b/target/docker/convert/from_bd_to_10x_molecular_barcode_tags/from_bd_to_10x_molecular_barcode_tags new file mode 100755 index 00000000000..908fb18b338 --- /dev/null +++ b/target/docker/convert/from_bd_to_10x_molecular_barcode_tags/from_bd_to_10x_molecular_barcode_tags @@ -0,0 +1,1030 @@ +#!/usr/bin/env bash + +# from_bd_to_10x_molecular_barcode_tags 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Dries Schaumont (maintainer) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="from_bd_to_10x_molecular_barcode_tags" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "from_bd_to_10x_molecular_barcode_tags 0.12.4" + echo "" + echo "Convert the molecular barcode sequence SAM tag from BD format (MA) to 10X format" + echo "(UB)." + echo "" + echo "Arguments:" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " example: input.bam" + echo " Input SAM or BAM file." + echo "" + echo " -o, --output" + echo " type: file, output, file must exist" + echo " example: output.sam" + echo " Output alignment file." + echo "" + echo " --bam" + echo " type: boolean_true" + echo " Output a BAM file." + echo "" + echo " -t, --threads" + echo " type: integer" + echo " Number of threads" +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM ubuntu:latest + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y samtools && \ + rm -rf /var/lib/apt/lists/* + +LABEL org.opencontainers.image.authors="Dries Schaumont" +LABEL org.opencontainers.image.description="Companion container for running component convert from_bd_to_10x_molecular_barcode_tags" +LABEL org.opencontainers.image.created="2024-01-31T09:08:31Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-from_bd_to_10x_molecular_barcode_tags-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "from_bd_to_10x_molecular_barcode_tags 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --bam) + [ -n "$VIASH_PAR_BAM" ] && ViashError Bad arguments for option \'--bam\': \'$VIASH_PAR_BAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BAM=true + shift 1 + ;; + --threads) + [ -n "$VIASH_PAR_THREADS" ] && ViashError Bad arguments for option \'--threads\': \'$VIASH_PAR_THREADS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_THREADS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --threads. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --threads=*) + [ -n "$VIASH_PAR_THREADS" ] && ViashError Bad arguments for option \'--threads=*\': \'$VIASH_PAR_THREADS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_THREADS=$(ViashRemoveFlags "$1") + shift 1 + ;; + -t) + [ -n "$VIASH_PAR_THREADS" ] && ViashError Bad arguments for option \'-t\': \'$VIASH_PAR_THREADS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_THREADS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -t. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/convert_from_bd_to_10x_molecular_barcode_tags:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/convert_from_bd_to_10x_molecular_barcode_tags:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/convert_from_bd_to_10x_molecular_barcode_tags:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/convert_from_bd_to_10x_molecular_barcode_tags:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_BAM+x} ]; then + VIASH_PAR_BAM="false" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_BAM" ]]; then + if ! [[ "$VIASH_PAR_BAM" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--bam' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_THREADS" ]]; then + if ! [[ "$VIASH_PAR_THREADS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--threads' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/convert_from_bd_to_10x_molecular_barcode_tags:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/convert_from_bd_to_10x_molecular_barcode_tags:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/convert_from_bd_to_10x_molecular_barcode_tags:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-from_bd_to_10x_molecular_barcode_tags-XXXXXX").sh +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_PAR_BAM+x} ]; then echo "${VIASH_PAR_BAM}" | sed "s#'#'\"'\"'#g;s#.*#par_bam='&'#" ; else echo "# par_bam="; fi ) +$( if [ ! -z ${VIASH_PAR_THREADS+x} ]; then echo "${VIASH_PAR_THREADS}" | sed "s#'#'\"'\"'#g;s#.*#par_threads='&'#" ; else echo "# par_threads="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +# Sam tags added by BD Rhapsody Pipeline +# From: https://www.bd.com/documents/guides/user-guides/GMX_BD-Rhapsody-genomics-informatics_UG_EN.pdf +# +# ========================================================================================= +# | | Definition | +# ========================================================================================= +# | CB | A number between 1 and 96 3 (884,736) representing a unique cell label sequence | +# | | (CB = 0 when no cell label sequence is detected) | +# ----------------------------------------------------------------------------------------- +# | MR | Raw molecular identifier sequence | +# ----------------------------------------------------------------------------------------- +# | MA | RSEC-adjusted molecular identifier sequence. If not a true cell, the raw UMI is | +# | | repeated in this tag. | +# ----------------------------------------------------------------------------------------- +# | PT | T if a poly(T) tail was found in the expected position on R1, or F if poly(T) | +# | | was not found | +# ----------------------------------------------------------------------------------------- +# | CN | Indicates if a sequence is derived from a putative cell, as determined by the | +# | | cell label filtering algorithm (T: putative cell; x: invalid cell label or noise | +# | | cell) Note: You can distinguish between an invalid cell label and a noise cell | +# | | with the CB tag (invalid cell labels are 0). | +# ----------------------------------------------------------------------------------------- +# | ST | The value is 1-12, indicating the Sample Tag of the called putative cell, or M | +# | | for multiplet, or x for undetermined. | +# ========================================================================================= + + +# SAM tags added by 10X +# https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/output/bam +# ========================================================================================= +# | | Definition | +# ========================================================================================= +# | CB | Chromium cellular barcode sequence that is error-corrected and confirmed against | +# | | a list of known-good barcode sequences. For multiplex Fixed RNA Profiling, the | +# | | cellular barcode is a combination of the 10x GEM Barcode and Probe Barcode | +# | | sequences. | +# ----------------------------------------------------------------------------------------- +# | CR | Chromium cellular barcode sequence as reported by the sequencer. For multiplex | +# | | Fixed RNA Profiling, the cellular barcode is a combination of the 10x GEM | +# | | Barcode and Probe Barcode sequences. | +# ----------------------------------------------------------------------------------------- +# | CY | Chromium cellular barcode read quality. For multiplex Fixed RNA Profiling, the | +# | | cellular barcode is a combination of the 10x GEM Barcode and Probe Barcode | +# | | sequences. Phred scores as reported by sequencer. | +# ----------------------------------------------------------------------------------------- +# | UB | Chromium molecular barcode sequence that is error-corrected among other | +# | | molecular barcodes with the same cellular barcode and gene alignment. | +# ----------------------------------------------------------------------------------------- +# | UR | Chromium molecular barcode sequence as reported by the sequencer. | +# ----------------------------------------------------------------------------------------- +# | UY | Chromium molecular barcode read quality. Phred scores as reported by sequencer. | +# ----------------------------------------------------------------------------------------- +# | TR | Trimmed sequence. For the Single Cell 3' v1 chemistry, this is trailing sequence | +# | | following the UMI on Read 2. For the Single Cell 3' v2 chemistry, this is | +# | | trailing sequence following the cell and molecular barcodes on Read 1. | +# ========================================================================================= + +extra_params=() + +if [ "\$par_bam" == "true" ]; then + extra_params+=("--bam") +fi + +cat \\ + <(samtools view -SH "\$par_input") \\ + <(samtools view "\$par_input" | grep "MA:Z:*" | sed "s/MA:Z:/UB:Z:/" ) | \\ +samtools view -Sh "\${extra_params[@]}" -@"\$par_threads" - > "\$par_output" +VIASHMAIN +bash "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/convert/from_bdrhap_to_h5mu/.config.vsh.yaml b/target/docker/convert/from_bdrhap_to_h5mu/.config.vsh.yaml new file mode 100644 index 00000000000..27665fa8042 --- /dev/null +++ b/target/docker/convert/from_bdrhap_to_h5mu/.config.vsh.yaml @@ -0,0 +1,181 @@ +functionality: + name: "from_bdrhap_to_h5mu" + namespace: "convert" + version: "0.12.4" + authors: + - name: "Robrecht Cannoodt" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + argument_groups: + - name: "Inputs" + arguments: + - type: "string" + name: "--id" + description: "A sample ID." + info: null + example: + - "my_id" + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "The output of a BD Rhapsody workflow." + info: null + example: + - "input_dir" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Outputs" + arguments: + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output h5mu file." + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "r_script" + path: "script.R" + is_executable: true + description: "Convert the output of a BD Rhapsody WTA pipeline to a MuData h5 file.\n" + test_resources: + - type: "python_script" + path: "run_test.py" + is_executable: true + - type: "file" + path: "resources_test/bdrhap_5kjrt/processed/WTA.bd_rhapsody.output_raw" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "ghcr.io/data-intuitive/randpy:r4.2_py3.9" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "docker" + run: + - "apt update && apt upgrade -y" + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + upgrade: true + - type: "r" + cran: + - "anndata" + bioc_force_install: false + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "lowmem" + - "singlecpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/convert/from_bdrhap_to_h5mu/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/convert/from_bdrhap_to_h5mu" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/convert/from_bdrhap_to_h5mu/from_bdrhap_to_h5mu" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/convert/from_bdrhap_to_h5mu/from_bdrhap_to_h5mu b/target/docker/convert/from_bdrhap_to_h5mu/from_bdrhap_to_h5mu new file mode 100755 index 00000000000..f2dad083d85 --- /dev/null +++ b/target/docker/convert/from_bdrhap_to_h5mu/from_bdrhap_to_h5mu @@ -0,0 +1,1215 @@ +#!/usr/bin/env bash + +# from_bdrhap_to_h5mu 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Robrecht Cannoodt (maintainer) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="from_bdrhap_to_h5mu" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "from_bdrhap_to_h5mu 0.12.4" + echo "" + echo "Convert the output of a BD Rhapsody WTA pipeline to a MuData h5 file." + echo "" + echo "Inputs:" + echo " --id" + echo " type: string, required parameter" + echo " example: my_id" + echo " A sample ID." + echo "" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " example: input_dir" + echo " The output of a BD Rhapsody workflow." + echo "" + echo "Outputs:" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " example: output.h5mu" + echo " Output h5mu file." + echo "" + echo " --output_compression" + echo " type: string" + echo " example: gzip" + echo " choices: [ gzip, lzf ]" +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM ghcr.io/data-intuitive/randpy:r4.2_py3.9 + +ENTRYPOINT [] + + +RUN apt update && apt upgrade -y +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" + +RUN Rscript -e 'if (!requireNamespace("remotes", quietly = TRUE)) install.packages("remotes")' && \ + Rscript -e 'remotes::install_cran(c("anndata"), repos = "https://cran.rstudio.com")' + +LABEL org.opencontainers.image.authors="Robrecht Cannoodt" +LABEL org.opencontainers.image.description="Companion container for running component convert from_bdrhap_to_h5mu" +LABEL org.opencontainers.image.created="2024-01-31T09:08:32Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-from_bdrhap_to_h5mu-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "from_bdrhap_to_h5mu 0.12.4" + exit + ;; + --id) + [ -n "$VIASH_PAR_ID" ] && ViashError Bad arguments for option \'--id\': \'$VIASH_PAR_ID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ID="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --id. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --id=*) + [ -n "$VIASH_PAR_ID" ] && ViashError Bad arguments for option \'--id=*\': \'$VIASH_PAR_ID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ID=$(ViashRemoveFlags "$1") + shift 1 + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/convert_from_bdrhap_to_h5mu:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/convert_from_bdrhap_to_h5mu:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/convert_from_bdrhap_to_h5mu:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/convert_from_bdrhap_to_h5mu:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_ID+x} ]; then + ViashError '--id' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/convert_from_bdrhap_to_h5mu:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/convert_from_bdrhap_to_h5mu:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/convert_from_bdrhap_to_h5mu:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-from_bdrhap_to_h5mu-XXXXXX").R +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +cat("Loading libraries\\n") +options(tidyverse.quiet = TRUE) +library(tidyverse) +requireNamespace("anndata", quietly = TRUE) +requireNamespace("reticulate", quietly = TRUE) +library(assertthat) +mudata <- reticulate::import("mudata") + +## VIASH START +# The following code has been auto-generated by Viash. +# treat warnings as errors +.viash_orig_warn <- options(warn = 2) + +par <- list( + "id" = $( if [ ! -z ${VIASH_PAR_ID+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_ID" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "input" = $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_INPUT" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "output" = $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_OUTPUT" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "output_compression" = $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_OUTPUT_COMPRESSION" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ) +) +meta <- list( + "functionality_name" = $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo -n "'"; echo -n "$VIASH_META_FUNCTIONALITY_NAME" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "resources_dir" = $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo -n "'"; echo -n "$VIASH_META_RESOURCES_DIR" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "executable" = $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo -n "'"; echo -n "$VIASH_META_EXECUTABLE" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "config" = $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo -n "'"; echo -n "$VIASH_META_CONFIG" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "temp_dir" = $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo -n "'"; echo -n "$VIASH_META_TEMP_DIR" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "cpus" = $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo -n "as.integer('"; echo -n "$VIASH_META_CPUS" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_b" = $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_B" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_kb" = $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_KB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_mb" = $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_MB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_gb" = $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_GB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_tb" = $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_TB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_pb" = $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_PB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ) +) + + +# restore original warn setting +options(.viash_orig_warn) +rm(.viash_orig_warn) + +## VIASH END + +read_metrics <- function(file) { + metric_lines <- readr::read_lines(file) + metric_lines_no_header <- metric_lines[!grepl("^##", metric_lines)] + + # parse sub data frames + group_title_regex <- "^#([^#]*)#" + group_title_ix <- grep(group_title_regex, metric_lines_no_header) + group_titles <- gsub(group_title_regex, "\\\\1", metric_lines_no_header[group_title_ix]) + group_ix_from <- group_title_ix+1 + group_ix_to <- c(group_title_ix[-1]-1, length(metric_lines_no_header)) + metric_dfs <- pmap( + list( + from = group_ix_from, + to = group_ix_to + ), + function(from, to) { + lines <- metric_lines_no_header[from:to] + lines <- lines[lines != ""] + readr::read_csv(paste0(lines, collapse = "\\n")) %>% + mutate(run_id = par\$id) %>% + select(run_id, everything()) + } + ) + names(metric_dfs) <- gsub(" ", "_", tolower(group_titles)) + metric_dfs +} +cat("Reading in metric summaries\\n") +metrics_file <- list.files(par\$input, pattern = "_Metrics_Summary.csv\$", full.names = TRUE) +assert_that( + length(metrics_file) == 1, + msg = paste0("Exactly one *_Metrics_Summary.csv should be found, found ", length(metrics_file), " files instead.") +) +metric_dfs <- read_metrics(metrics_file) + +cat("Reading in count data\\n") +counts_file <- list.files(par\$input, pattern = "_DBEC_MolsPerCell.csv\$", full.names = TRUE) +if (length(counts_file) == 0) { + cat("Warning: could not find DBEC file, looking for RSEC file instead.\\n") + counts_file <- list.files(par\$input, pattern = "_RSEC_MolsPerCell.csv\$", full.names = TRUE) +} +assert_that( + length(counts_file) == 1, + msg = paste0("Exactly one *_(RSEC|DBEC)_MolsPerCell.csv should be found, found ", length(counts_file), " files instead.") +) +counts <- + readr::read_csv( + counts_file, + col_types = cols(.default = col_integer()), + comment = "#" + ) %>% + tibble::column_to_rownames("Cell_Index") %>% + as.matrix %>% + Matrix::Matrix(sparse = TRUE) + +# processing VDJ data +vdj_file <- list.files(par\$input, pattern = "_VDJ_perCell.csv\$", full.names = TRUE) +vdj_data <- + if (length(vdj_file) == 1) { + cat("Reading in VDJ data\\n") + readr::read_csv( + vdj_file, + comment = "#" + ) + } else { + NULL + } + +cat("Reading in VDJ metric summaries\\n") +vdj_metrics_file <- list.files(par\$input, pattern = "_VDJ_metrics.csv\$", full.names = TRUE) +vdj_metric_dfs <- + if (length(vdj_metrics_file) == 1) { + read_metrics(vdj_metrics_file) + } else { + list() + } + +# processing SMK data +smk_file <- list.files(par\$input, pattern = "_Sample_Tag_Calls.csv\$", full.names = TRUE) +smk_calls <- + if (length(smk_file) == 1) { + cat("Processing sample tags\\n") + readr::read_csv( + smk_file, + comment = "#" + ) + } else { + NULL + } +smk_metrics_file <- list.files(par\$input, pattern = "_Sample_Tag_Metrics.csv\$", full.names = TRUE) +smk_metrics <- + if (length(smk_metrics_file) == 1) { + readr::read_csv( + smk_metrics_file, + comment = "#" + ) + } else { + NULL + } + +cat("Constructing obs\\n") +library_id <- metric_dfs[["sequencing_quality"]]\$Library +if (length(library_id) > 1) { + library_id <- paste(library_id[library_id != "Combined_stats"], collapse = " & ") +} + +obs <- tibble( + cell_id = rownames(counts), + run_id = par\$id, + library_id = library_id +) + +if (!is.null(smk_calls)) { + obs <- left_join( + obs, + smk_calls %>% transmute( + cell_id = as.character(Cell_Index), + sample_tag = Sample_Tag, + sample_id = Sample_Name + ), + by = "cell_id" + ) +} else { + obs <- obs %>% mutate(sample_id = library_id) +} + +obs <- obs %>% + mutate(sample_id = ifelse(!is.na(sample_id), sample_id, run_id)) %>% + as.data.frame() %>% + column_to_rownames("cell_id") + +cat("Constructing var\\n") +# determine feature types of genes +var0 <- tryCatch({ + feature_types_file <- list.files(par\$input, pattern = "feature_types.tsv\$", full.names = TRUE) + + # abseq fasta reference has trailing info which apparently gets stripped off by the bd rhapsody pipeline + readr::read_tsv(feature_types_file) %>% + mutate( + trimmed_feature_id = gsub(" .*", "", feature_id), + i = match(feature_id, colnames(counts)), + j = match(trimmed_feature_id, colnames(counts)), + ij = ifelse(is.na(i), j, i), + final_feature_id = ifelse(!is.na(i), feature_id, trimmed_feature_id) + ) %>% + filter(!is.na(ij)) %>% + select(feature_id = final_feature_id, feature_type, reference_file) +}, error = function(e) { + cat("Feature matching error: ", e\$message, "\\n", sep = "") + tibble( + feature_id = character() + ) +}) + +# in case the feature types are missing +missing_features <- tibble( + feature_id = setdiff(colnames(counts), var0\$feature_id), + feature_type = "Gene Expression", + reference_file = NA_character_, + note = "Feature annotation file missing, assuming type is Gene Expression" +) + +var1 <- + if (nrow(missing_features) > 0) { + cat("Feature annotation file missing, assuming type is Gene Expression\\n") + bind_rows(var0, missing_features) %>% + slice(match(colnames(counts), feature_id)) + # Avoid nullable string columnns https://github.com/scverse/anndata/issues/679 + missing_features %>% mutate(across(reference_file, as.factor)) + } else { + var0 + } + +# create var +var <- var1 %>% + transmute(gene_ids = feature_id, gene_name = feature_id, feature_types = feature_type, reference_file) %>% + as.data.frame() %>% + column_to_rownames("gene_ids") + +cat("Constructing uns\\n") +names(metric_dfs) <- paste0("mapping_qc_", names(metric_dfs)) +smk_metric_dfs <- + if (!is.null(smk_metrics)) { + list(mapping_qc_smk_metrics = smk_metrics) + } else { + NULL + } +uns <- c(metric_dfs, smk_metric_dfs) + +cat("Constructing RNA (&ABC?) AnnData") +adata <- anndata::AnnData( + X = counts, + obs = obs, + var = var, + uns = uns +) + +adata_prot <- adata[, adata\$var\$feature_types == "Antibody Capture"] +if (ncol(adata_prot) == 0) { + adata_prot <- NULL +} +adata_rna <- adata[, adata\$var\$feature_types != "Antibody Capture"] + +adata_vdj <- + if (!is.null(vdj_data)) { + cat("Constructing VDJ AnnData\\n") + names(vdj_metric_dfs) <- paste0("mapping_qc_", names(vdj_metric_dfs)) + anndata::AnnData( + obs = vdj_data, + uns = vdj_metric_dfs, + shape = c(nrow(vdj_data), 0L) + ) + } else { + NULL + } + +cat("Constructing MuData object\\n") +modalities <- + list( + rna = adata_rna, + prot = adata_prot, + vdj = adata_vdj + ) +mdata <- mudata\$MuData(modalities[!sapply(modalities, is.null)]) + +cat("Writing to h5mu file\\n") +mdata\$write(par\$output, compression=par\$output_compression) +VIASHMAIN +Rscript "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/convert/from_cellranger_multi_to_h5mu/.config.vsh.yaml b/target/docker/convert/from_cellranger_multi_to_h5mu/.config.vsh.yaml new file mode 100644 index 00000000000..64a11fc3936 --- /dev/null +++ b/target/docker/convert/from_cellranger_multi_to_h5mu/.config.vsh.yaml @@ -0,0 +1,190 @@ +functionality: + name: "from_cellranger_multi_to_h5mu" + namespace: "convert" + version: "0.12.4" + authors: + - name: "Dries Schaumont" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input folder. Must contain the output from a cellranger multi run." + info: null + example: + - "input_dir_containing_modalities" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output h5mu file." + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--uns_metrics" + description: "Name of the .uns slot under which to QC metrics (if any)." + info: null + default: + - "metrics_cellranger" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Converts the output from cellranger multi to a single .h5mu file.\n\ + By default, will map the following library type names to modality names:\n -\ + \ Gene Expression: rna\n - Peaks: atac\n - Antibody Capture: prot\n - VDJ:\ + \ vdj\n - VDJ-T: vdj_t\n - VDJ-B: vdj_b\n - CRISPR Guide Capture: crispr\n\ + \ - Multiplexing Capture: hashing\n\nOther library types have their whitepace\ + \ removed and dashes replaced by\nunderscores to generate the modality name.\n\ + \nCurrently does not allow parsing the output from cell barcode demultiplexing.\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/10x_5k_anticmv" + - type: "file" + path: "resources_test/10x_5k_lung_crispr" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "scanpy~=1.9.5" + - "scirpy~=0.11.1" + - "pandas~=2.0.0" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "lowmem" + - "singlecpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/convert/from_cellranger_multi_to_h5mu/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/convert/from_cellranger_multi_to_h5mu" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/convert/from_cellranger_multi_to_h5mu/from_cellranger_multi_to_h5mu" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/convert/from_cellranger_multi_to_h5mu/from_cellranger_multi_to_h5mu b/target/docker/convert/from_cellranger_multi_to_h5mu/from_cellranger_multi_to_h5mu new file mode 100755 index 00000000000..40b520ff768 --- /dev/null +++ b/target/docker/convert/from_cellranger_multi_to_h5mu/from_cellranger_multi_to_h5mu @@ -0,0 +1,1166 @@ +#!/usr/bin/env bash + +# from_cellranger_multi_to_h5mu 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Dries Schaumont (maintainer) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="from_cellranger_multi_to_h5mu" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "from_cellranger_multi_to_h5mu 0.12.4" + echo "" + echo "Converts the output from cellranger multi to a single .h5mu file." + echo "By default, will map the following library type names to modality names:" + echo " - Gene Expression: rna" + echo " - Peaks: atac" + echo " - Antibody Capture: prot" + echo " - VDJ: vdj" + echo " - VDJ-T: vdj_t" + echo " - VDJ-B: vdj_b" + echo " - CRISPR Guide Capture: crispr" + echo " - Multiplexing Capture: hashing" + echo "" + echo "Other library types have their whitepace removed and dashes replaced by" + echo "underscores to generate the modality name." + echo "" + echo "Currently does not allow parsing the output from cell barcode demultiplexing." + echo "" + echo "Arguments:" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " example: input_dir_containing_modalities" + echo " Input folder. Must contain the output from a cellranger multi run." + echo "" + echo " -o, --output" + echo " type: file, output, file must exist" + echo " example: output.h5mu" + echo " Output h5mu file." + echo "" + echo " --output_compression" + echo " type: string" + echo " example: gzip" + echo " choices: [ gzip, lzf ]" + echo "" + echo " --uns_metrics" + echo " type: string" + echo " default: metrics_cellranger" + echo " Name of the .uns slot under which to QC metrics (if any)." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM python:3.10-slim + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "scanpy~=1.9.5" "scirpy~=0.11.1" "pandas~=2.0.0" + +LABEL org.opencontainers.image.authors="Dries Schaumont" +LABEL org.opencontainers.image.description="Companion container for running component convert from_cellranger_multi_to_h5mu" +LABEL org.opencontainers.image.created="2024-01-31T09:08:33Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-from_cellranger_multi_to_h5mu-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "from_cellranger_multi_to_h5mu 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --uns_metrics) + [ -n "$VIASH_PAR_UNS_METRICS" ] && ViashError Bad arguments for option \'--uns_metrics\': \'$VIASH_PAR_UNS_METRICS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_UNS_METRICS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --uns_metrics. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --uns_metrics=*) + [ -n "$VIASH_PAR_UNS_METRICS" ] && ViashError Bad arguments for option \'--uns_metrics=*\': \'$VIASH_PAR_UNS_METRICS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_UNS_METRICS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/convert_from_cellranger_multi_to_h5mu:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/convert_from_cellranger_multi_to_h5mu:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/convert_from_cellranger_multi_to_h5mu:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/convert_from_cellranger_multi_to_h5mu:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_UNS_METRICS+x} ]; then + VIASH_PAR_UNS_METRICS="metrics_cellranger" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/convert_from_cellranger_multi_to_h5mu:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/convert_from_cellranger_multi_to_h5mu:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/convert_from_cellranger_multi_to_h5mu:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-from_cellranger_multi_to_h5mu-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +from pathlib import Path +import sys +import scanpy +import pandas as pd +import mudata +from scirpy.io import read_10x_vdj +from collections import defaultdict +from functools import partial + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'uns_metrics': $( if [ ! -z ${VIASH_PAR_UNS_METRICS+x} ]; then echo "r'${VIASH_PAR_UNS_METRICS//\'/\'\"\'\"r\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +## VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +POSSIBLE_LIBRARY_TYPES = ('vdj_t', 'vdj_b', 'vdj_t_gd', 'count') + +FEATURE_TYPES_NAMES = { + "Gene Expression": "rna", + "Peaks": "atac", + "Antibody Capture": "prot", + "VDJ": "vdj", + "VDJ-T": "vdj_t", + "VDJ-B": "vdj_b", + "CRISPR Guide Capture": "gdo", + "Multiplexing Capture": "hto" + } + +def gather_input_data(dir: Path): + if not dir.is_dir(): + raise ValueError("Specified input is not a directory.") + folder_contents = list(dir.iterdir()) + config = dir / 'config.csv' + if config not in folder_contents: + logger.warning('Config.csv not found in input directory, this folder might not be a valid cellranger multi output.') + + required_subfolders = [dir / subfolder_name for subfolder_name in ('multi', 'per_sample_outs')] + found_input = {key_: None for key_ in POSSIBLE_LIBRARY_TYPES + ('metrics_summary',)} + for required_subfolder in required_subfolders: + if not required_subfolder in folder_contents: + raise ValueError(f"Input folder must contain the subfolder {required_subfolder} please make " + "sure that the specified input folder is a valid cellranger multi output.") + + multi_dir = dir / 'multi' + for library_type in multi_dir.iterdir(): + if not library_type.is_dir(): + logger.warning("%s is not a directory. Contents of the multi folder " + "must be directories to be recognized as valid input data", + library_type) + continue + if library_type.name not in POSSIBLE_LIBRARY_TYPES: + raise ValueError(f"Contents of the 'multi' folder must be found one of the following: {','.join(POSSIBLE_LIBRARY_TYPES)}.") + + found_input[library_type.name] = library_type + + per_sample_outs_dir = dir / 'per_sample_outs' + for file_glob in ('*/metrics_summary.csv', '*/count/feature_reference.csv', + '*/count/crispr_analysis/perturbation_efficiencies_by_feature.csv', + '*/count/crispr_analysis/perturbation_efficiencies_by_target.csv'): + found_files = list(per_sample_outs_dir.glob(file_glob)) + if len(found_files) > 1: + raise ValueError(f"Found more than one file for glob '{file_glob}' file. " + "This component currently only supports parsing cellranger multi output for one sample.") + file_name = Path(file_glob).name.removesuffix('.csv') + found_input[file_name] = found_files[0] if found_files else None + + return found_input + + +def proces_perturbation(key_name: str, mudata: mudata.MuData, efficiency_file: Path): + assert 'gdo' in mudata.mod + eff_df = pd.read_csv(efficiency_file, index_col="Perturbation", sep=",", decimal=".", quotechar='"') + mudata.mod['gdo'].uns[key_name] = eff_df + return mudata + +def process_feature_reference(mudata: mudata.MuData, efficiency_file: Path): + df = pd.read_csv(efficiency_file, index_col="id", sep=",", decimal=".", quotechar='"') + assert 'feature_type' in df.columns, "Columns 'feature_type' should be present in features_reference file." + feature_types = df['feature_type'] + if set(feature_types) - set(FEATURE_TYPES_NAMES): + raise ValueError("Not all feature types present in the features_reference file are supported by this component.") + for feature_type in feature_types: + modality = FEATURE_TYPES_NAMES[feature_type] + subset_df = df.loc[df['feature_type'] == feature_type] + mudata.mod[modality].uns['feature_reference'] = subset_df + return mudata + +def process_counts(counts_folder: Path): + counts_matrix_file = counts_folder / "raw_feature_bc_matrix.h5" + logger.info("Reading %s.", counts_matrix_file) + adata = scanpy.read_10x_h5(counts_matrix_file, gex_only=False) + + # set the gene ids as var_names + logger.info("Renaming var columns") + adata.var = adata.var\\ + .rename_axis("gene_symbol")\\ + .reset_index()\\ + .set_index("gene_ids") + + # generate output + logger.info("Convert to mudata") + + def modality_name_factory(library_type): + return ("".join(library_type.replace("-", "_").split())).lower() + + feature_types = defaultdict(modality_name_factory, FEATURE_TYPES_NAMES) + return mudata.MuData(adata, feature_types_names=feature_types) + +def process_metrics_summary(mudata: mudata.MuData, metrics_file: Path): + def read_percentage(val): + try: + return float(val.strip('%')) / 100 + except (AttributeError, ValueError): + return val + + metrics_summary = pd.read_csv(metrics_file, + decimal=".", + quotechar='"', + thousands=",").applymap(read_percentage) + + mudata.uns[par["uns_metrics"]] = metrics_summary + for colname, coldata in metrics_summary.items(): + try: + new_column = coldata.astype(str, copy=True).astype({colname: "category"}) + metrics_summary[colname] = new_column + except (ValueError, TypeError): + logger.warning(f"Could not store column {colname} from metrics.") + pass + return mudata + +def process_vdj(mudata: mudata.MuData, vdj_folder_path: Path): + # https://scverse.org/scirpy/latest/generated/scirpy.io.read_10x_vdj.html#scirpy-io-read-10x-vdj + # According to docs, using the json is preferred as this file includes intron info. + all_config_json_file = vdj_folder_path / "all_contig_annotations.json" + vdj_anndata = read_10x_vdj(all_config_json_file) + vdj_type = vdj_folder_path.name + mudata.mod[vdj_type] = vdj_anndata + return mudata + +def get_modalities(input_data): + dispatcher = { + 'vdj_t': process_vdj, + 'vdj_b': process_vdj, + 'vdj_t_gd': process_vdj, + 'metrics_summary': process_metrics_summary, + 'feature_reference': process_feature_reference, + 'perturbation_efficiencies_by_feature': partial(proces_perturbation, 'perturbation_efficiencies_by_feature'), + 'perturbation_efficiencies_by_target': partial(proces_perturbation, 'perturbation_efficiencies_by_target'), + } + mudata_file = process_counts(input_data['count']) + for modality_name, modality_data_path in input_data.items(): + if modality_name == "count" or not modality_data_path: + continue + try: + parser_function = dispatcher[modality_name] + except KeyError as e: + raise ValueError("This component does not support the " + f"parsing of the '{modality_name}' yet.") from e + mudata_file = parser_function(mudata_file, modality_data_path) + return mudata_file + +def main(): + cellranger_multi_dir = Path(par["input"]) + input_data = gather_input_data(cellranger_multi_dir) + result = get_modalities(input_data) + logger.info("Writing %s", par["output"]) + result.write_h5mu(par["output"], compression=par["output_compression"]) + +if __name__ == "__main__": + main() +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/convert/from_cellranger_multi_to_h5mu/setup_logger.py b/target/docker/convert/from_cellranger_multi_to_h5mu/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/docker/convert/from_cellranger_multi_to_h5mu/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/docker/convert/from_h5ad_to_h5mu/.config.vsh.yaml b/target/docker/convert/from_h5ad_to_h5mu/.config.vsh.yaml new file mode 100644 index 00000000000..4476227852a --- /dev/null +++ b/target/docker/convert/from_h5ad_to_h5mu/.config.vsh.yaml @@ -0,0 +1,177 @@ +functionality: + name: "from_h5ad_to_h5mu" + namespace: "convert" + version: "0.12.4" + authors: + - name: "Dries De Maeyer" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "ddemaeyer@gmail.com" + github: "ddemaeyer" + linkedin: "dries-de-maeyer-b46a814" + organizations: + - name: "Janssen Pharmaceuticals" + href: "https://www.janssen.com" + role: "Principal Scientist" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input h5ad files" + info: null + default: + - "input.h5ad" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output MuData file." + info: null + default: + - "output.h5mu" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Converts a single layer h5ad file into a single MuData object\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.9-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "lowmem" + - "singlecpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/convert/from_h5ad_to_h5mu/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/convert/from_h5ad_to_h5mu" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/convert/from_h5ad_to_h5mu/from_h5ad_to_h5mu" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/convert/from_h5ad_to_h5mu/from_h5ad_to_h5mu b/target/docker/convert/from_h5ad_to_h5mu/from_h5ad_to_h5mu new file mode 100755 index 00000000000..a0ebe55ebfb --- /dev/null +++ b/target/docker/convert/from_h5ad_to_h5mu/from_h5ad_to_h5mu @@ -0,0 +1,1056 @@ +#!/usr/bin/env bash + +# from_h5ad_to_h5mu 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Dries De Maeyer (maintainer) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="from_h5ad_to_h5mu" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "from_h5ad_to_h5mu 0.12.4" + echo "" + echo "Converts a single layer h5ad file into a single MuData object" + echo "" + echo "Arguments:" + echo " -i, --input" + echo " type: file, required parameter, multiple values allowed, file must exist" + echo " default: input.h5ad" + echo " Input h5ad files" + echo "" + echo " --modality" + echo " type: string, multiple values allowed" + echo " default: rna" + echo "" + echo " -o, --output" + echo " type: file, output, file must exist" + echo " default: output.h5mu" + echo " Output MuData file." + echo "" + echo " --output_compression" + echo " type: string" + echo " example: gzip" + echo " choices: [ gzip, lzf ]" +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM python:3.9-slim + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" + +LABEL org.opencontainers.image.authors="Dries De Maeyer" +LABEL org.opencontainers.image.description="Companion container for running component convert from_h5ad_to_h5mu" +LABEL org.opencontainers.image.created="2024-01-31T09:08:32Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-from_h5ad_to_h5mu-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "from_h5ad_to_h5mu 0.12.4" + exit + ;; + --input) + if [ -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT="$2" + else + VIASH_PAR_INPUT="$VIASH_PAR_INPUT:""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + if [ -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + else + VIASH_PAR_INPUT="$VIASH_PAR_INPUT:"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + -i) + if [ -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT="$2" + else + VIASH_PAR_INPUT="$VIASH_PAR_INPUT:""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality) + if [ -z "$VIASH_PAR_MODALITY" ]; then + VIASH_PAR_MODALITY="$2" + else + VIASH_PAR_MODALITY="$VIASH_PAR_MODALITY:""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality=*) + if [ -z "$VIASH_PAR_MODALITY" ]; then + VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") + else + VIASH_PAR_MODALITY="$VIASH_PAR_MODALITY:"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/convert_from_h5ad_to_h5mu:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/convert_from_h5ad_to_h5mu:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/convert_from_h5ad_to_h5mu:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/convert_from_h5ad_to_h5mu:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_MODALITY+x} ]; then + VIASH_PAR_MODALITY="rna" +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + VIASH_PAR_OUTPUT="output.h5mu" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ]; then + IFS=':' + set -f + for file in $VIASH_PAR_INPUT; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_TEST_INPUT=() + IFS=':' + for var in $VIASH_PAR_INPUT; do + unset IFS + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$var")" ) + var=$(ViashAutodetectMount "$var") + VIASH_TEST_INPUT+=( "$var" ) + done + VIASH_PAR_INPUT=$(IFS=':' ; echo "${VIASH_TEST_INPUT[*]}") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/convert_from_h5ad_to_h5mu:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/convert_from_h5ad_to_h5mu:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/convert_from_h5ad_to_h5mu:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-from_h5ad_to_h5mu-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +import mudata as mu +import anndata +import sys + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'.split(':')"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'.split(':')"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +## VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +assert len(par["input"]) == len(par["modality"]), "Number of input files should be the same length as the number of modalities" + +logger.info("Reading input files") +data = { key: anndata.read_h5ad(path) for key, path in zip(par["modality"], par["input"]) } + +try: + data.var_names_make_unique() +except: + pass + +logger.info("Converting to mudata") +mudata = mu.MuData(data) + +try: + mudata.var_names_make_unique() +except: + pass + +logger.info("Writing to %s.", par['output']) +mudata.write_h5mu(par["output"], compression=par["output_compression"]) + +logger.info("Finished") +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + unset VIASH_TEST_INPUT + IFS=':' + for var in $VIASH_PAR_INPUT; do + unset IFS + if [ -z "$VIASH_TEST_INPUT" ]; then + VIASH_TEST_INPUT="$(ViashStripAutomount "$var")" + else + VIASH_TEST_INPUT="$VIASH_TEST_INPUT:""$(ViashStripAutomount "$var")" + fi + done + VIASH_PAR_INPUT="$VIASH_TEST_INPUT" +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/convert/from_h5ad_to_h5mu/setup_logger.py b/target/docker/convert/from_h5ad_to_h5mu/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/docker/convert/from_h5ad_to_h5mu/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/docker/convert/from_h5mu_to_h5ad/.config.vsh.yaml b/target/docker/convert/from_h5mu_to_h5ad/.config.vsh.yaml new file mode 100644 index 00000000000..9acb567c6cf --- /dev/null +++ b/target/docker/convert/from_h5mu_to_h5ad/.config.vsh.yaml @@ -0,0 +1,182 @@ +functionality: + name: "from_h5mu_to_h5ad" + namespace: "convert" + version: "0.12.4" + authors: + - name: "Robrecht Cannoodt" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input MuData file" + info: null + default: + - "input.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output AnnData file." + info: null + default: + - "output.h5ad" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the final h5ad object." + info: null + default: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Converts a h5mu file into a h5ad file.\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.9-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "lowmem" + - "singlecpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/convert/from_h5mu_to_h5ad/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/convert/from_h5mu_to_h5ad" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/convert/from_h5mu_to_h5ad/from_h5mu_to_h5ad" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/convert/from_h5mu_to_h5ad/from_h5mu_to_h5ad b/target/docker/convert/from_h5mu_to_h5ad/from_h5mu_to_h5ad new file mode 100755 index 00000000000..4fc2410420d --- /dev/null +++ b/target/docker/convert/from_h5mu_to_h5ad/from_h5mu_to_h5ad @@ -0,0 +1,1009 @@ +#!/usr/bin/env bash + +# from_h5mu_to_h5ad 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Robrecht Cannoodt (maintainer) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="from_h5mu_to_h5ad" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "from_h5mu_to_h5ad 0.12.4" + echo "" + echo "Converts a h5mu file into a h5ad file." + echo "" + echo "Arguments:" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " default: input.h5mu" + echo " Input MuData file" + echo "" + echo " --modality" + echo " type: string" + echo " default: rna" + echo "" + echo " -o, --output" + echo " type: file, output, file must exist" + echo " default: output.h5ad" + echo " Output AnnData file." + echo "" + echo " --output_compression" + echo " type: string" + echo " default: gzip" + echo " choices: [ gzip, lzf ]" + echo " The compression format to be used on the final h5ad object." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM python:3.9-slim + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" + +LABEL org.opencontainers.image.authors="Robrecht Cannoodt" +LABEL org.opencontainers.image.description="Companion container for running component convert from_h5mu_to_h5ad" +LABEL org.opencontainers.image.created="2024-01-31T09:08:32Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-from_h5mu_to_h5ad-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "from_h5mu_to_h5ad 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality=*) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/convert_from_h5mu_to_h5ad:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/convert_from_h5mu_to_h5ad:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/convert_from_h5mu_to_h5ad:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/convert_from_h5mu_to_h5ad:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_MODALITY+x} ]; then + VIASH_PAR_MODALITY="rna" +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + VIASH_PAR_OUTPUT="output.h5ad" +fi +if [ -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then + VIASH_PAR_OUTPUT_COMPRESSION="gzip" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/convert_from_h5mu_to_h5ad:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/convert_from_h5mu_to_h5ad:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/convert_from_h5mu_to_h5ad:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-from_h5mu_to_h5ad-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +import mudata as mu +import sys + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +## VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +# TODO: Merge modalities into one layer + +logger.info("Reading input h5mu file") +dat = mu.read_h5mu(par["input"]) + +logger.info("Converting to h5ad") +adat = dat.mod[par["modality"]] + +logger.info("Writing to %s.", par['output']) +adat.write_h5ad(par["output"], compression=par["output_compression"]) + +logger.info("Finished") +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/convert/from_h5mu_to_h5ad/setup_logger.py b/target/docker/convert/from_h5mu_to_h5ad/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/docker/convert/from_h5mu_to_h5ad/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/docker/convert/velocyto_to_h5mu/.config.vsh.yaml b/target/docker/convert/velocyto_to_h5mu/.config.vsh.yaml new file mode 100644 index 00000000000..8e58dbe4439 --- /dev/null +++ b/target/docker/convert/velocyto_to_h5mu/.config.vsh.yaml @@ -0,0 +1,255 @@ +functionality: + name: "velocyto_to_h5mu" + namespace: "convert" + version: "0.12.4" + authors: + - name: "Dries Schaumont" + roles: + - "maintainer" + - "author" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + - name: "Robrecht Cannoodt" + roles: + - "author" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + - name: "Angela Oliveira Pisco" + roles: + - "contributor" + info: + role: "Contributor" + links: + github: "aopisco" + orcid: "0000-0003-0142-2355" + linkedin: "aopisco" + organizations: + - name: "Insitro" + href: "https://insitro.com" + role: "Director of Computational Biology" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + argument_groups: + - name: "Inputs" + arguments: + - type: "file" + name: "--input_loom" + description: "Path to the input loom file." + info: null + example: + - "input.loom" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--input_h5mu" + description: "If a MuData file is provided," + info: null + example: + - "input.h5mu" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + description: "The name of the modality to operate on." + info: null + default: + - "rna_velocity" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Outputs" + arguments: + - type: "file" + name: "--output" + description: "Path to the output MuData file." + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--layer_spliced" + description: "Output layer for the spliced reads." + info: null + default: + - "velo_spliced" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--layer_unspliced" + description: "Output layer for the unspliced reads." + info: null + default: + - "velo_unspliced" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--layer_ambiguous" + description: "Output layer for the ambiguous reads." + info: null + default: + - "velo_ambiguous" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + description: "Convert a velocyto loom file to a h5mu file.\n\nIf an input h5mu file\ + \ is also provided, the velocity\nh5ad object will get added to that h5mu instead.\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/cellranger_tiny_fastq" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "scanpy~=1.9.5" + - "loompy" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "lowmem" + - "lowcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/velocity/velocyto_to_h5mu/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/convert/velocyto_to_h5mu" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/convert/velocyto_to_h5mu/velocyto_to_h5mu" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/convert/velocyto_to_h5mu/velocyto_to_h5mu b/target/docker/convert/velocyto_to_h5mu/velocyto_to_h5mu new file mode 100755 index 00000000000..9aa87a4d355 --- /dev/null +++ b/target/docker/convert/velocyto_to_h5mu/velocyto_to_h5mu @@ -0,0 +1,1086 @@ +#!/usr/bin/env bash + +# velocyto_to_h5mu 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Dries Schaumont (maintainer, author) +# * Robrecht Cannoodt (author) +# * Angela Oliveira Pisco (contributor) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="velocyto_to_h5mu" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "velocyto_to_h5mu 0.12.4" + echo "" + echo "Convert a velocyto loom file to a h5mu file." + echo "" + echo "If an input h5mu file is also provided, the velocity" + echo "h5ad object will get added to that h5mu instead." + echo "" + echo "Inputs:" + echo " --input_loom" + echo " type: file, required parameter, file must exist" + echo " example: input.loom" + echo " Path to the input loom file." + echo "" + echo " --input_h5mu" + echo " type: file, file must exist" + echo " example: input.h5mu" + echo " If a MuData file is provided," + echo "" + echo " --modality" + echo " type: string" + echo " default: rna_velocity" + echo " The name of the modality to operate on." + echo "" + echo "Outputs:" + echo " --output" + echo " type: file, output, file must exist" + echo " example: output.h5mu" + echo " Path to the output MuData file." + echo "" + echo " --output_compression" + echo " type: string" + echo " example: gzip" + echo " choices: [ gzip, lzf ]" + echo " The compression format to be used on the output h5mu object." + echo "" + echo " --layer_spliced" + echo " type: string" + echo " default: velo_spliced" + echo " Output layer for the spliced reads." + echo "" + echo " --layer_unspliced" + echo " type: string" + echo " default: velo_unspliced" + echo " Output layer for the unspliced reads." + echo "" + echo " --layer_ambiguous" + echo " type: string" + echo " default: velo_ambiguous" + echo " Output layer for the ambiguous reads." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM python:3.10-slim + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "scanpy~=1.9.5" "loompy" + +LABEL org.opencontainers.image.authors="Dries Schaumont, Robrecht Cannoodt, Angela Oliveira Pisco" +LABEL org.opencontainers.image.description="Companion container for running component convert velocyto_to_h5mu" +LABEL org.opencontainers.image.created="2024-01-31T09:08:34Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-velocyto_to_h5mu-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "velocyto_to_h5mu 0.12.4" + exit + ;; + --input_loom) + [ -n "$VIASH_PAR_INPUT_LOOM" ] && ViashError Bad arguments for option \'--input_loom\': \'$VIASH_PAR_INPUT_LOOM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_LOOM="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_loom. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input_loom=*) + [ -n "$VIASH_PAR_INPUT_LOOM" ] && ViashError Bad arguments for option \'--input_loom=*\': \'$VIASH_PAR_INPUT_LOOM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_LOOM=$(ViashRemoveFlags "$1") + shift 1 + ;; + --input_h5mu) + [ -n "$VIASH_PAR_INPUT_H5MU" ] && ViashError Bad arguments for option \'--input_h5mu\': \'$VIASH_PAR_INPUT_H5MU\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_H5MU="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_h5mu. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input_h5mu=*) + [ -n "$VIASH_PAR_INPUT_H5MU" ] && ViashError Bad arguments for option \'--input_h5mu=*\': \'$VIASH_PAR_INPUT_H5MU\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_H5MU=$(ViashRemoveFlags "$1") + shift 1 + ;; + --modality) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality=*) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --layer_spliced) + [ -n "$VIASH_PAR_LAYER_SPLICED" ] && ViashError Bad arguments for option \'--layer_spliced\': \'$VIASH_PAR_LAYER_SPLICED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LAYER_SPLICED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --layer_spliced. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --layer_spliced=*) + [ -n "$VIASH_PAR_LAYER_SPLICED" ] && ViashError Bad arguments for option \'--layer_spliced=*\': \'$VIASH_PAR_LAYER_SPLICED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LAYER_SPLICED=$(ViashRemoveFlags "$1") + shift 1 + ;; + --layer_unspliced) + [ -n "$VIASH_PAR_LAYER_UNSPLICED" ] && ViashError Bad arguments for option \'--layer_unspliced\': \'$VIASH_PAR_LAYER_UNSPLICED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LAYER_UNSPLICED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --layer_unspliced. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --layer_unspliced=*) + [ -n "$VIASH_PAR_LAYER_UNSPLICED" ] && ViashError Bad arguments for option \'--layer_unspliced=*\': \'$VIASH_PAR_LAYER_UNSPLICED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LAYER_UNSPLICED=$(ViashRemoveFlags "$1") + shift 1 + ;; + --layer_ambiguous) + [ -n "$VIASH_PAR_LAYER_AMBIGUOUS" ] && ViashError Bad arguments for option \'--layer_ambiguous\': \'$VIASH_PAR_LAYER_AMBIGUOUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LAYER_AMBIGUOUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --layer_ambiguous. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --layer_ambiguous=*) + [ -n "$VIASH_PAR_LAYER_AMBIGUOUS" ] && ViashError Bad arguments for option \'--layer_ambiguous=*\': \'$VIASH_PAR_LAYER_AMBIGUOUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LAYER_AMBIGUOUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/convert_velocyto_to_h5mu:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/convert_velocyto_to_h5mu:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/convert_velocyto_to_h5mu:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/convert_velocyto_to_h5mu:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT_LOOM+x} ]; then + ViashError '--input_loom' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_MODALITY+x} ]; then + VIASH_PAR_MODALITY="rna_velocity" +fi +if [ -z ${VIASH_PAR_LAYER_SPLICED+x} ]; then + VIASH_PAR_LAYER_SPLICED="velo_spliced" +fi +if [ -z ${VIASH_PAR_LAYER_UNSPLICED+x} ]; then + VIASH_PAR_LAYER_UNSPLICED="velo_unspliced" +fi +if [ -z ${VIASH_PAR_LAYER_AMBIGUOUS+x} ]; then + VIASH_PAR_LAYER_AMBIGUOUS="velo_ambiguous" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT_LOOM" ] && [ ! -e "$VIASH_PAR_INPUT_LOOM" ]; then + ViashError "Input file '$VIASH_PAR_INPUT_LOOM' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_INPUT_H5MU" ] && [ ! -e "$VIASH_PAR_INPUT_H5MU" ]; then + ViashError "Input file '$VIASH_PAR_INPUT_H5MU' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT_LOOM" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT_LOOM")" ) + VIASH_PAR_INPUT_LOOM=$(ViashAutodetectMount "$VIASH_PAR_INPUT_LOOM") +fi +if [ ! -z "$VIASH_PAR_INPUT_H5MU" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT_H5MU")" ) + VIASH_PAR_INPUT_H5MU=$(ViashAutodetectMount "$VIASH_PAR_INPUT_H5MU") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/convert_velocyto_to_h5mu:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/convert_velocyto_to_h5mu:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/convert_velocyto_to_h5mu:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-velocyto_to_h5mu-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +import anndata as ad +import mudata as mu + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input_loom': $( if [ ! -z ${VIASH_PAR_INPUT_LOOM+x} ]; then echo "r'${VIASH_PAR_INPUT_LOOM//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'input_h5mu': $( if [ ! -z ${VIASH_PAR_INPUT_H5MU+x} ]; then echo "r'${VIASH_PAR_INPUT_H5MU//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'layer_spliced': $( if [ ! -z ${VIASH_PAR_LAYER_SPLICED+x} ]; then echo "r'${VIASH_PAR_LAYER_SPLICED//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'layer_unspliced': $( if [ ! -z ${VIASH_PAR_LAYER_UNSPLICED+x} ]; then echo "r'${VIASH_PAR_LAYER_UNSPLICED//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'layer_ambiguous': $( if [ ! -z ${VIASH_PAR_LAYER_AMBIGUOUS+x} ]; then echo "r'${VIASH_PAR_LAYER_AMBIGUOUS//\'/\'\"\'\"r\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +## VIASH END + +print("Parameters:", par, flush=True) + +print("Reading AnnData from loom", flush=True) +adata_in = ad.read_loom(par["input_loom"]) +adata_in.var_names = adata_in.var["Accession"] + +print("Creating clean AnnData", flush=True) +adata = ad.AnnData( + obs=adata_in.obs[[]], + var=adata_in.var[[]], + layers={ + par["layer_spliced"]: adata_in.layers["spliced"], + par["layer_unspliced"]: adata_in.layers["unspliced"], + par["layer_ambiguous"]: adata_in.layers["ambiguous"] + } +) + +if par["input_h5mu"]: + print("Received input h5mu to read", flush=True) + mdata = mu.read_h5mu(par["input_h5mu"]) + + print(f"Storing AnnData in modality {par['modality']}", flush=True) + mdata.mod[par["modality"]] = adata +else: + print("Creating h5mu from scratch", flush=True) + mdata = mu.MuData({par["modality"]: adata}) + +print("Resulting mudata:", mdata, flush=True) + +print("Writing h5mu to file", flush=True) +mdata.write_h5mu(par["output"], compression=par["output_compression"]) +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT_LOOM" ]; then + VIASH_PAR_INPUT_LOOM=$(ViashStripAutomount "$VIASH_PAR_INPUT_LOOM") +fi +if [ ! -z "$VIASH_PAR_INPUT_H5MU" ]; then + VIASH_PAR_INPUT_H5MU=$(ViashStripAutomount "$VIASH_PAR_INPUT_H5MU") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/correction/cellbender_remove_background/.config.vsh.yaml b/target/docker/correction/cellbender_remove_background/.config.vsh.yaml new file mode 100644 index 00000000000..da33a50ad22 --- /dev/null +++ b/target/docker/correction/cellbender_remove_background/.config.vsh.yaml @@ -0,0 +1,637 @@ +functionality: + name: "cellbender_remove_background" + namespace: "correction" + version: "0.12.4" + argument_groups: + - name: "Inputs" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input h5mu file. Data file on which to run tool. Data must be\ + \ un-filtered: it should include empty droplets." + info: null + example: + - "input.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + description: "List of modalities to process." + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Outputs" + arguments: + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Full count matrix as an h5mu file, with background RNA removed.\ + \ This file contains all the original droplet barcodes." + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--layer_output" + description: "Output layer" + info: null + default: + - "cellbender_corrected" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_background_fraction" + info: null + default: + - "cellbender_background_fraction" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_cell_probability" + info: null + default: + - "cellbender_cell_probability" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_cell_size" + info: null + default: + - "cellbender_cell_size" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_droplet_efficiency" + info: null + default: + - "cellbender_droplet_efficiency" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_latent_scale" + info: null + default: + - "cellbender_latent_scale" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--var_ambient_expression" + info: null + default: + - "cellbender_ambient_expression" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obsm_gene_expression_encoding" + info: null + default: + - "cellbender_gene_expression_encoding" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Arguments" + arguments: + - type: "boolean" + name: "--expected_cells_from_qc" + description: "Will use the Cell Ranger QC to determine the estimated number\ + \ of cells" + info: null + default: + - false + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--expected_cells" + description: "Number of cells expected in the dataset (a rough estimate within\ + \ a factor of 2 is sufficient)." + info: null + example: + - 1000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--total_droplets_included" + description: "The number of droplets from the rank-ordered UMI plot\nthat will\ + \ have their cell probabilities inferred as an\noutput. Include the droplets\ + \ which might contain cells.\nDroplets beyond TOTAL_DROPLETS_INCLUDED should\ + \ be\n'surely empty' droplets.\n" + info: null + example: + - 25000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--force_cell_umi_prior" + description: "Ignore CellBender's heuristic prior estimation, and use this prior\ + \ for UMI counts in cells." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--force_empty_umi_prior" + description: "Ignore CellBender's heuristic prior estimation, and use this prior\ + \ for UMI counts in empty droplets." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--model" + description: "Which model is being used for count data.\n\n* 'naive' subtracts\ + \ the estimated ambient profile.\n* 'simple' does not model either ambient\ + \ RNA or random barcode swapping (for debugging purposes -- not recommended).\n\ + * 'ambient' assumes background RNA is incorporated into droplets.\n* 'swapping'\ + \ assumes background RNA comes from random barcode swapping (via PCR chimeras).\n\ + * 'full' uses a combined ambient and swapping model.\n" + info: null + default: + - "full" + required: false + choices: + - "naive" + - "simple" + - "ambient" + - "swapping" + - "full" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--epochs" + description: "Number of epochs to train." + info: null + default: + - 150 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--low_count_threshold" + description: "Droplets with UMI counts below this number are completely \nexcluded\ + \ from the analysis. This can help identify the correct \nprior for empty\ + \ droplet counts in the rare case where empty \ncounts are extremely high\ + \ (over 200).\n" + info: null + default: + - 5 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--z_dim" + description: "Dimension of latent variable z.\n" + info: null + default: + - 64 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--z_layers" + description: "Dimension of hidden layers in the encoder for z.\n" + info: null + default: + - 512 + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--training_fraction" + description: "Training detail: the fraction of the data used for training.\n\ + The rest is never seen by the inference algorithm. Speeds up learning.\n" + info: null + default: + - 0.9 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--empty_drop_training_fraction" + description: "Training detail: the fraction of the training data each epoch\ + \ that \nis drawn (randomly sampled) from surely empty droplets.\n" + info: null + default: + - 0.2 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--ignore_features" + description: "Integer indices of features to ignore entirely. In the output\n\ + count matrix, the counts for these features will be unchanged.\n" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--fpr" + description: "Target 'delta' false positive rate in [0, 1). Use 0 for a cohort\n\ + of samples which will be jointly analyzed for differential expression.\nA\ + \ false positive is a true signal count that is erroneously removed.\nMore\ + \ background removal is accompanied by more signal removal at\nhigh values\ + \ of FPR. You can specify multiple values, which will\ncreate multiple output\ + \ files.\n" + info: null + default: + - 0.01 + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--exclude_feature_types" + description: "Feature types to ignore during the analysis. These features will\n\ + be left unchanged in the output file.\n" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--projected_ambient_count_threshold" + description: "Controls how many features are included in the analysis, which\n\ + can lead to a large speedup. If a feature is expected to have less\nthan PROJECTED_AMBIENT_COUNT_THRESHOLD\ + \ counts total in all cells\n(summed), then that gene is excluded, and it\ + \ will be unchanged\nin the output count matrix. For example, \nPROJECTED_AMBIENT_COUNT_THRESHOLD\ + \ = 0 will include all features\nwhich have even a single count in any empty\ + \ droplet.\n" + info: null + default: + - 0.1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--learning_rate" + description: "Training detail: lower learning rate for inference.\nA OneCycle\ + \ learning rate schedule is used, where the\nupper learning rate is ten times\ + \ this value. (For this\nvalue, probably do not exceed 1e-3).\n" + info: null + default: + - 1.0E-4 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--final_elbo_fail_fraction" + description: "Training is considered to have failed if \n(best_test_ELBO - final_test_ELBO)/(best_test_ELBO\ + \ - initial_test_ELBO) > FINAL_ELBO_FAIL_FRACTION.\nTraining will automatically\ + \ re-run if --num-training-tries > 1.\nBy default, will not fail training\ + \ based on final_training_ELBO.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--epoch_elbo_fail_fraction" + description: "Training is considered to have failed if \n(previous_epoch_test_ELBO\ + \ - current_epoch_test_ELBO)/(previous_epoch_test_ELBO - initial_train_ELBO)\ + \ > EPOCH_ELBO_FAIL_FRACTION.\nTraining will automatically re-run if --num-training-tries\ + \ > 1.\nBy default, will not fail training based on epoch_training_ELBO.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--num_training_tries" + description: "Number of times to attempt to train the model. At each subsequent\ + \ attempt,\nthe learning rate is multiplied by LEARNING_RATE_RETRY_MULT.\n" + info: null + default: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--learning_rate_retry_mult" + description: "Learning rate is multiplied by this amount each time a new training\n\ + attempt is made. (This parameter is only used if training fails based\non\ + \ EPOCH_ELBO_FAIL_FRACTION or FINAL_ELBO_FAIL_FRACTION and\nNUM_TRAINING_TRIES\ + \ is > 1.) \n" + info: null + default: + - 0.2 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--posterior_batch_size" + description: "Training detail: size of batches when creating the posterior.\n\ + Reduce this to avoid running out of GPU memory creating the posterior\n(will\ + \ be slower).\n" + info: null + default: + - 128 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--posterior_regulation" + description: "Posterior regularization method. (For experts: not required for\ + \ normal usage,\nsee documentation). \n\n* PRq is approximate quantile-targeting.\n\ + * PRmu is approximate mean-targeting aggregated over genes (behavior of v0.2.0).\n\ + * PRmu_gene is approximate mean-targeting per gene.\n" + info: null + required: false + choices: + - "PRq" + - "PRmu" + - "PRmu_gene" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--alpha" + description: "Tunable parameter alpha for the PRq posterior regularization method\n\ + (not normally used: see documentation).\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--q" + description: "Tunable parameter q for the CDF threshold estimation method (not\n\ + normally used: see documentation).\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--estimator" + description: "Output denoised count estimation method. (For experts: not required\n\ + for normal usage, see documentation).\n" + info: null + default: + - "mckp" + required: false + choices: + - "map" + - "mean" + - "cdf" + - "sample" + - "mckp" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--estimator_multiple_cpu" + description: "Including the flag --estimator-multiple-cpu will use more than\ + \ one\nCPU to compute the MCKP output count estimator in parallel (does nothing\n\ + for other estimators).\n" + info: null + direction: "input" + dest: "par" + - type: "boolean" + name: "--constant_learning_rate" + description: "Including the flag --constant-learning-rate will use the ClippedAdam\n\ + optimizer instead of the OneCycleLR learning rate schedule, which is\nthe\ + \ default. Learning is faster with the OneCycleLR schedule.\nHowever, training\ + \ can easily be continued from a checkpoint for more\nepochs than the initial\ + \ command specified when using ClippedAdam. On\nthe other hand, if using the\ + \ OneCycleLR schedule with 150 epochs\nspecified, it is not possible to pick\ + \ up from that final checkpoint\nand continue training until 250 epochs.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--debug" + description: "Including the flag --debug will log extra messages useful for\ + \ debugging.\n" + info: null + direction: "input" + dest: "par" + - type: "boolean_true" + name: "--cuda" + description: "Including the flag --cuda will run the inference on a\nGPU.\n" + info: null + direction: "input" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Eliminating technical artifacts from high-throughput single-cell RNA\ + \ sequencing data.\n\nThis module removes counts due to ambient RNA molecules\ + \ and random barcode swapping from (raw) UMI-based scRNA-seq count matrices. \n\ + At the moment, only the count matrices produced by the CellRanger count pipeline\ + \ is supported. Support for additional tools and protocols \nwill be added in\ + \ the future. A quick start tutorial can be found here.\n\nFleming et al. 2022,\ + \ bioRxiv.\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_raw_feature_bc_matrix.h5" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "nvcr.io/nvidia/cuda:11.8.0-devel-ubuntu22.04" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "docker" + run: + - "apt update && DEBIAN_FRONTEND=noninteractive apt install -y make build-essential\ + \ libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev wget ca-certificates\ + \ curl llvm libncurses5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev libffi-dev\ + \ liblzma-dev mecab-ipadic-utf8 git \\\n&& curl https://pyenv.run | bash \\\n\ + && pyenv update \\\n&& pyenv install $PYTHON_VERSION \\\n&& pyenv global $PYTHON_VERSION\ + \ \\\n&& apt-get clean\n" + env: + - "PYENV_ROOT=\"/root/.pyenv\"" + - "PATH=\"$PYENV_ROOT/shims:$PYENV_ROOT/bin:$PATH\"" + - "PYTHON_VERSION=3.7.16" + - type: "python" + user: false + packages: + - "mudata~=0.2.1" + - "cellbender~=0.3.0" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "muon" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "singlecpu" + - "lowmem" + - "gpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/correction/cellbender_remove_background/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/correction/cellbender_remove_background" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/correction/cellbender_remove_background/cellbender_remove_background" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/correction/cellbender_remove_background/cellbender_remove_background b/target/docker/correction/cellbender_remove_background/cellbender_remove_background new file mode 100755 index 00000000000..43e24f1f8aa --- /dev/null +++ b/target/docker/correction/cellbender_remove_background/cellbender_remove_background @@ -0,0 +1,2153 @@ +#!/usr/bin/env bash + +# cellbender_remove_background 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="cellbender_remove_background" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "cellbender_remove_background 0.12.4" + echo "" + echo "Eliminating technical artifacts from high-throughput single-cell RNA sequencing" + echo "data." + echo "" + echo "This module removes counts due to ambient RNA molecules and random barcode" + echo "swapping from (raw) UMI-based scRNA-seq count matrices." + echo "At the moment, only the count matrices produced by the CellRanger count pipeline" + echo "is supported. Support for additional tools and protocols" + echo "will be added in the future. A quick start tutorial can be found here." + echo "" + echo "Fleming et al. 2022, bioRxiv." + echo "" + echo "Inputs:" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " example: input.h5mu" + echo " Input h5mu file. Data file on which to run tool. Data must be" + echo " un-filtered: it should include empty droplets." + echo "" + echo " --modality" + echo " type: string" + echo " default: rna" + echo " List of modalities to process." + echo "" + echo "Outputs:" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " example: output.h5mu" + echo " Full count matrix as an h5mu file, with background RNA removed. This" + echo " file contains all the original droplet barcodes." + echo "" + echo " --output_compression" + echo " type: string" + echo " example: gzip" + echo " choices: [ gzip, lzf ]" + echo "" + echo " --layer_output" + echo " type: string" + echo " default: cellbender_corrected" + echo " Output layer" + echo "" + echo " --obs_background_fraction" + echo " type: string" + echo " default: cellbender_background_fraction" + echo "" + echo " --obs_cell_probability" + echo " type: string" + echo " default: cellbender_cell_probability" + echo "" + echo " --obs_cell_size" + echo " type: string" + echo " default: cellbender_cell_size" + echo "" + echo " --obs_droplet_efficiency" + echo " type: string" + echo " default: cellbender_droplet_efficiency" + echo "" + echo " --obs_latent_scale" + echo " type: string" + echo " default: cellbender_latent_scale" + echo "" + echo " --var_ambient_expression" + echo " type: string" + echo " default: cellbender_ambient_expression" + echo "" + echo " --obsm_gene_expression_encoding" + echo " type: string" + echo " default: cellbender_gene_expression_encoding" + echo "" + echo "Arguments:" + echo " --expected_cells_from_qc" + echo " type: boolean" + echo " default: false" + echo " Will use the Cell Ranger QC to determine the estimated number of cells" + echo "" + echo " --expected_cells" + echo " type: integer" + echo " example: 1000" + echo " Number of cells expected in the dataset (a rough estimate within a" + echo " factor of 2 is sufficient)." + echo "" + echo " --total_droplets_included" + echo " type: integer" + echo " example: 25000" + echo " The number of droplets from the rank-ordered UMI plot" + echo " that will have their cell probabilities inferred as an" + echo " output. Include the droplets which might contain cells." + echo " Droplets beyond TOTAL_DROPLETS_INCLUDED should be" + echo " 'surely empty' droplets." + echo "" + echo " --force_cell_umi_prior" + echo " type: integer" + echo " Ignore CellBender's heuristic prior estimation, and use this prior for" + echo " UMI counts in cells." + echo "" + echo " --force_empty_umi_prior" + echo " type: integer" + echo " Ignore CellBender's heuristic prior estimation, and use this prior for" + echo " UMI counts in empty droplets." + echo "" + echo " --model" + echo " type: string" + echo " default: full" + echo " choices: [ naive, simple, ambient, swapping, full ]" + echo " Which model is being used for count data." + echo " * 'naive' subtracts the estimated ambient profile." + echo " * 'simple' does not model either ambient RNA or random barcode swapping" + echo " (for debugging purposes -- not recommended)." + echo " * 'ambient' assumes background RNA is incorporated into droplets." + echo " * 'swapping' assumes background RNA comes from random barcode swapping" + echo " (via PCR chimeras)." + echo " * 'full' uses a combined ambient and swapping model." + echo "" + echo " --epochs" + echo " type: integer" + echo " default: 150" + echo " Number of epochs to train." + echo "" + echo " --low_count_threshold" + echo " type: integer" + echo " default: 5" + echo " Droplets with UMI counts below this number are completely" + echo " excluded from the analysis. This can help identify the correct" + echo " prior for empty droplet counts in the rare case where empty" + echo " counts are extremely high (over 200)." + echo "" + echo " --z_dim" + echo " type: integer" + echo " default: 64" + echo " Dimension of latent variable z." + echo "" + echo " --z_layers" + echo " type: integer, multiple values allowed" + echo " default: 512" + echo " Dimension of hidden layers in the encoder for z." + echo "" + echo " --training_fraction" + echo " type: double" + echo " default: 0.9" + echo " Training detail: the fraction of the data used for training." + echo " The rest is never seen by the inference algorithm. Speeds up learning." + echo "" + echo " --empty_drop_training_fraction" + echo " type: double" + echo " default: 0.2" + echo " Training detail: the fraction of the training data each epoch that" + echo " is drawn (randomly sampled) from surely empty droplets." + echo "" + echo " --ignore_features" + echo " type: integer, multiple values allowed" + echo " Integer indices of features to ignore entirely. In the output" + echo " count matrix, the counts for these features will be unchanged." + echo "" + echo " --fpr" + echo " type: double, multiple values allowed" + echo " default: 0.01" + echo " Target 'delta' false positive rate in [0, 1). Use 0 for a cohort" + echo " of samples which will be jointly analyzed for differential expression." + echo " A false positive is a true signal count that is erroneously removed." + echo " More background removal is accompanied by more signal removal at" + echo " high values of FPR. You can specify multiple values, which will" + echo " create multiple output files." + echo "" + echo " --exclude_feature_types" + echo " type: string, multiple values allowed" + echo " Feature types to ignore during the analysis. These features will" + echo " be left unchanged in the output file." + echo "" + echo " --projected_ambient_count_threshold" + echo " type: double" + echo " default: 0.1" + echo " Controls how many features are included in the analysis, which" + echo " can lead to a large speedup. If a feature is expected to have less" + echo " than PROJECTED_AMBIENT_COUNT_THRESHOLD counts total in all cells" + echo " (summed), then that gene is excluded, and it will be unchanged" + echo " in the output count matrix. For example," + echo " PROJECTED_AMBIENT_COUNT_THRESHOLD = 0 will include all features" + echo " which have even a single count in any empty droplet." + echo "" + echo " --learning_rate" + echo " type: double" + echo " default: 1.0E-4" + echo " Training detail: lower learning rate for inference." + echo " A OneCycle learning rate schedule is used, where the" + echo " upper learning rate is ten times this value. (For this" + echo " value, probably do not exceed 1e-3)." + echo "" + echo " --final_elbo_fail_fraction" + echo " type: double" + echo " Training is considered to have failed if" + echo " (best_test_ELBO - final_test_ELBO)/(best_test_ELBO - initial_test_ELBO)" + echo " > FINAL_ELBO_FAIL_FRACTION." + echo " Training will automatically re-run if --num-training-tries > 1." + echo " By default, will not fail training based on final_training_ELBO." + echo "" + echo " --epoch_elbo_fail_fraction" + echo " type: double" + echo " Training is considered to have failed if" + echo " (previous_epoch_test_ELBO -" + echo " current_epoch_test_ELBO)/(previous_epoch_test_ELBO - initial_train_ELBO)" + echo " > EPOCH_ELBO_FAIL_FRACTION." + echo " Training will automatically re-run if --num-training-tries > 1." + echo " By default, will not fail training based on epoch_training_ELBO." + echo "" + echo " --num_training_tries" + echo " type: integer" + echo " default: 1" + echo " Number of times to attempt to train the model. At each subsequent" + echo " attempt," + echo " the learning rate is multiplied by LEARNING_RATE_RETRY_MULT." + echo "" + echo " --learning_rate_retry_mult" + echo " type: double" + echo " default: 0.2" + echo " Learning rate is multiplied by this amount each time a new training" + echo " attempt is made. (This parameter is only used if training fails based" + echo " on EPOCH_ELBO_FAIL_FRACTION or FINAL_ELBO_FAIL_FRACTION and" + echo " NUM_TRAINING_TRIES is > 1.)" + echo "" + echo " --posterior_batch_size" + echo " type: integer" + echo " default: 128" + echo " Training detail: size of batches when creating the posterior." + echo " Reduce this to avoid running out of GPU memory creating the posterior" + echo " (will be slower)." + echo "" + echo " --posterior_regulation" + echo " type: string" + echo " choices: [ PRq, PRmu, PRmu_gene ]" + echo " Posterior regularization method. (For experts: not required for normal" + echo " usage," + echo " see documentation)." + echo " * PRq is approximate quantile-targeting." + echo " * PRmu is approximate mean-targeting aggregated over genes (behavior of" + echo " v0.2.0)." + echo " * PRmu_gene is approximate mean-targeting per gene." + echo "" + echo " --alpha" + echo " type: double" + echo " Tunable parameter alpha for the PRq posterior regularization method" + echo " (not normally used: see documentation)." + echo "" + echo " --q" + echo " type: double" + echo " Tunable parameter q for the CDF threshold estimation method (not" + echo " normally used: see documentation)." + echo "" + echo " --estimator" + echo " type: string" + echo " default: mckp" + echo " choices: [ map, mean, cdf, sample, mckp ]" + echo " Output denoised count estimation method. (For experts: not required" + echo " for normal usage, see documentation)." + echo "" + echo " --estimator_multiple_cpu" + echo " type: boolean_true" + echo " Including the flag --estimator-multiple-cpu will use more than one" + echo " CPU to compute the MCKP output count estimator in parallel (does nothing" + echo " for other estimators)." + echo "" + echo " --constant_learning_rate" + echo " type: boolean" + echo " Including the flag --constant-learning-rate will use the ClippedAdam" + echo " optimizer instead of the OneCycleLR learning rate schedule, which is" + echo " the default. Learning is faster with the OneCycleLR schedule." + echo " However, training can easily be continued from a checkpoint for more" + echo " epochs than the initial command specified when using ClippedAdam. On" + echo " the other hand, if using the OneCycleLR schedule with 150 epochs" + echo " specified, it is not possible to pick up from that final checkpoint" + echo " and continue training until 250 epochs." + echo "" + echo " --debug" + echo " type: boolean_true" + echo " Including the flag --debug will log extra messages useful for debugging." + echo "" + echo " --cuda" + echo " type: boolean_true" + echo " Including the flag --cuda will run the inference on a" + echo " GPU." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM nvcr.io/nvidia/cuda:11.8.0-devel-ubuntu22.04 + +ENTRYPOINT [] + + +ENV PYENV_ROOT="/root/.pyenv" +ENV PATH="$PYENV_ROOT/shims:$PYENV_ROOT/bin:$PATH" +ENV PYTHON_VERSION=3.7.16 +RUN apt update && DEBIAN_FRONTEND=noninteractive apt install -y make build-essential libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev wget ca-certificates curl llvm libncurses5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev libffi-dev liblzma-dev mecab-ipadic-utf8 git \ +&& curl https://pyenv.run | bash \ +&& pyenv update \ +&& pyenv install $PYTHON_VERSION \ +&& pyenv global $PYTHON_VERSION \ +&& apt-get clean + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "mudata~=0.2.1" "cellbender~=0.3.0" + +LABEL org.opencontainers.image.description="Companion container for running component correction cellbender_remove_background" +LABEL org.opencontainers.image.created="2024-01-31T09:08:36Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-cellbender_remove_background-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "cellbender_remove_background 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality=*) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --layer_output) + [ -n "$VIASH_PAR_LAYER_OUTPUT" ] && ViashError Bad arguments for option \'--layer_output\': \'$VIASH_PAR_LAYER_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LAYER_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --layer_output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --layer_output=*) + [ -n "$VIASH_PAR_LAYER_OUTPUT" ] && ViashError Bad arguments for option \'--layer_output=*\': \'$VIASH_PAR_LAYER_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LAYER_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --obs_background_fraction) + [ -n "$VIASH_PAR_OBS_BACKGROUND_FRACTION" ] && ViashError Bad arguments for option \'--obs_background_fraction\': \'$VIASH_PAR_OBS_BACKGROUND_FRACTION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBS_BACKGROUND_FRACTION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_background_fraction. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obs_background_fraction=*) + [ -n "$VIASH_PAR_OBS_BACKGROUND_FRACTION" ] && ViashError Bad arguments for option \'--obs_background_fraction=*\': \'$VIASH_PAR_OBS_BACKGROUND_FRACTION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBS_BACKGROUND_FRACTION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --obs_cell_probability) + [ -n "$VIASH_PAR_OBS_CELL_PROBABILITY" ] && ViashError Bad arguments for option \'--obs_cell_probability\': \'$VIASH_PAR_OBS_CELL_PROBABILITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBS_CELL_PROBABILITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_cell_probability. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obs_cell_probability=*) + [ -n "$VIASH_PAR_OBS_CELL_PROBABILITY" ] && ViashError Bad arguments for option \'--obs_cell_probability=*\': \'$VIASH_PAR_OBS_CELL_PROBABILITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBS_CELL_PROBABILITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --obs_cell_size) + [ -n "$VIASH_PAR_OBS_CELL_SIZE" ] && ViashError Bad arguments for option \'--obs_cell_size\': \'$VIASH_PAR_OBS_CELL_SIZE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBS_CELL_SIZE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_cell_size. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obs_cell_size=*) + [ -n "$VIASH_PAR_OBS_CELL_SIZE" ] && ViashError Bad arguments for option \'--obs_cell_size=*\': \'$VIASH_PAR_OBS_CELL_SIZE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBS_CELL_SIZE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --obs_droplet_efficiency) + [ -n "$VIASH_PAR_OBS_DROPLET_EFFICIENCY" ] && ViashError Bad arguments for option \'--obs_droplet_efficiency\': \'$VIASH_PAR_OBS_DROPLET_EFFICIENCY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBS_DROPLET_EFFICIENCY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_droplet_efficiency. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obs_droplet_efficiency=*) + [ -n "$VIASH_PAR_OBS_DROPLET_EFFICIENCY" ] && ViashError Bad arguments for option \'--obs_droplet_efficiency=*\': \'$VIASH_PAR_OBS_DROPLET_EFFICIENCY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBS_DROPLET_EFFICIENCY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --obs_latent_scale) + [ -n "$VIASH_PAR_OBS_LATENT_SCALE" ] && ViashError Bad arguments for option \'--obs_latent_scale\': \'$VIASH_PAR_OBS_LATENT_SCALE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBS_LATENT_SCALE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_latent_scale. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obs_latent_scale=*) + [ -n "$VIASH_PAR_OBS_LATENT_SCALE" ] && ViashError Bad arguments for option \'--obs_latent_scale=*\': \'$VIASH_PAR_OBS_LATENT_SCALE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBS_LATENT_SCALE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --var_ambient_expression) + [ -n "$VIASH_PAR_VAR_AMBIENT_EXPRESSION" ] && ViashError Bad arguments for option \'--var_ambient_expression\': \'$VIASH_PAR_VAR_AMBIENT_EXPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_VAR_AMBIENT_EXPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --var_ambient_expression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --var_ambient_expression=*) + [ -n "$VIASH_PAR_VAR_AMBIENT_EXPRESSION" ] && ViashError Bad arguments for option \'--var_ambient_expression=*\': \'$VIASH_PAR_VAR_AMBIENT_EXPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_VAR_AMBIENT_EXPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --obsm_gene_expression_encoding) + [ -n "$VIASH_PAR_OBSM_GENE_EXPRESSION_ENCODING" ] && ViashError Bad arguments for option \'--obsm_gene_expression_encoding\': \'$VIASH_PAR_OBSM_GENE_EXPRESSION_ENCODING\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBSM_GENE_EXPRESSION_ENCODING="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obsm_gene_expression_encoding. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obsm_gene_expression_encoding=*) + [ -n "$VIASH_PAR_OBSM_GENE_EXPRESSION_ENCODING" ] && ViashError Bad arguments for option \'--obsm_gene_expression_encoding=*\': \'$VIASH_PAR_OBSM_GENE_EXPRESSION_ENCODING\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBSM_GENE_EXPRESSION_ENCODING=$(ViashRemoveFlags "$1") + shift 1 + ;; + --expected_cells_from_qc) + [ -n "$VIASH_PAR_EXPECTED_CELLS_FROM_QC" ] && ViashError Bad arguments for option \'--expected_cells_from_qc\': \'$VIASH_PAR_EXPECTED_CELLS_FROM_QC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EXPECTED_CELLS_FROM_QC="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --expected_cells_from_qc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --expected_cells_from_qc=*) + [ -n "$VIASH_PAR_EXPECTED_CELLS_FROM_QC" ] && ViashError Bad arguments for option \'--expected_cells_from_qc=*\': \'$VIASH_PAR_EXPECTED_CELLS_FROM_QC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EXPECTED_CELLS_FROM_QC=$(ViashRemoveFlags "$1") + shift 1 + ;; + --expected_cells) + [ -n "$VIASH_PAR_EXPECTED_CELLS" ] && ViashError Bad arguments for option \'--expected_cells\': \'$VIASH_PAR_EXPECTED_CELLS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EXPECTED_CELLS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --expected_cells. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --expected_cells=*) + [ -n "$VIASH_PAR_EXPECTED_CELLS" ] && ViashError Bad arguments for option \'--expected_cells=*\': \'$VIASH_PAR_EXPECTED_CELLS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EXPECTED_CELLS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --total_droplets_included) + [ -n "$VIASH_PAR_TOTAL_DROPLETS_INCLUDED" ] && ViashError Bad arguments for option \'--total_droplets_included\': \'$VIASH_PAR_TOTAL_DROPLETS_INCLUDED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TOTAL_DROPLETS_INCLUDED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --total_droplets_included. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --total_droplets_included=*) + [ -n "$VIASH_PAR_TOTAL_DROPLETS_INCLUDED" ] && ViashError Bad arguments for option \'--total_droplets_included=*\': \'$VIASH_PAR_TOTAL_DROPLETS_INCLUDED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TOTAL_DROPLETS_INCLUDED=$(ViashRemoveFlags "$1") + shift 1 + ;; + --force_cell_umi_prior) + [ -n "$VIASH_PAR_FORCE_CELL_UMI_PRIOR" ] && ViashError Bad arguments for option \'--force_cell_umi_prior\': \'$VIASH_PAR_FORCE_CELL_UMI_PRIOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FORCE_CELL_UMI_PRIOR="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --force_cell_umi_prior. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --force_cell_umi_prior=*) + [ -n "$VIASH_PAR_FORCE_CELL_UMI_PRIOR" ] && ViashError Bad arguments for option \'--force_cell_umi_prior=*\': \'$VIASH_PAR_FORCE_CELL_UMI_PRIOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FORCE_CELL_UMI_PRIOR=$(ViashRemoveFlags "$1") + shift 1 + ;; + --force_empty_umi_prior) + [ -n "$VIASH_PAR_FORCE_EMPTY_UMI_PRIOR" ] && ViashError Bad arguments for option \'--force_empty_umi_prior\': \'$VIASH_PAR_FORCE_EMPTY_UMI_PRIOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FORCE_EMPTY_UMI_PRIOR="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --force_empty_umi_prior. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --force_empty_umi_prior=*) + [ -n "$VIASH_PAR_FORCE_EMPTY_UMI_PRIOR" ] && ViashError Bad arguments for option \'--force_empty_umi_prior=*\': \'$VIASH_PAR_FORCE_EMPTY_UMI_PRIOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FORCE_EMPTY_UMI_PRIOR=$(ViashRemoveFlags "$1") + shift 1 + ;; + --model) + [ -n "$VIASH_PAR_MODEL" ] && ViashError Bad arguments for option \'--model\': \'$VIASH_PAR_MODEL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODEL="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --model. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --model=*) + [ -n "$VIASH_PAR_MODEL" ] && ViashError Bad arguments for option \'--model=*\': \'$VIASH_PAR_MODEL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODEL=$(ViashRemoveFlags "$1") + shift 1 + ;; + --epochs) + [ -n "$VIASH_PAR_EPOCHS" ] && ViashError Bad arguments for option \'--epochs\': \'$VIASH_PAR_EPOCHS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EPOCHS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --epochs. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --epochs=*) + [ -n "$VIASH_PAR_EPOCHS" ] && ViashError Bad arguments for option \'--epochs=*\': \'$VIASH_PAR_EPOCHS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EPOCHS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --low_count_threshold) + [ -n "$VIASH_PAR_LOW_COUNT_THRESHOLD" ] && ViashError Bad arguments for option \'--low_count_threshold\': \'$VIASH_PAR_LOW_COUNT_THRESHOLD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LOW_COUNT_THRESHOLD="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --low_count_threshold. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --low_count_threshold=*) + [ -n "$VIASH_PAR_LOW_COUNT_THRESHOLD" ] && ViashError Bad arguments for option \'--low_count_threshold=*\': \'$VIASH_PAR_LOW_COUNT_THRESHOLD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LOW_COUNT_THRESHOLD=$(ViashRemoveFlags "$1") + shift 1 + ;; + --z_dim) + [ -n "$VIASH_PAR_Z_DIM" ] && ViashError Bad arguments for option \'--z_dim\': \'$VIASH_PAR_Z_DIM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_Z_DIM="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --z_dim. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --z_dim=*) + [ -n "$VIASH_PAR_Z_DIM" ] && ViashError Bad arguments for option \'--z_dim=*\': \'$VIASH_PAR_Z_DIM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_Z_DIM=$(ViashRemoveFlags "$1") + shift 1 + ;; + --z_layers) + if [ -z "$VIASH_PAR_Z_LAYERS" ]; then + VIASH_PAR_Z_LAYERS="$2" + else + VIASH_PAR_Z_LAYERS="$VIASH_PAR_Z_LAYERS:""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --z_layers. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --z_layers=*) + if [ -z "$VIASH_PAR_Z_LAYERS" ]; then + VIASH_PAR_Z_LAYERS=$(ViashRemoveFlags "$1") + else + VIASH_PAR_Z_LAYERS="$VIASH_PAR_Z_LAYERS:"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --training_fraction) + [ -n "$VIASH_PAR_TRAINING_FRACTION" ] && ViashError Bad arguments for option \'--training_fraction\': \'$VIASH_PAR_TRAINING_FRACTION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRAINING_FRACTION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --training_fraction. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --training_fraction=*) + [ -n "$VIASH_PAR_TRAINING_FRACTION" ] && ViashError Bad arguments for option \'--training_fraction=*\': \'$VIASH_PAR_TRAINING_FRACTION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRAINING_FRACTION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --empty_drop_training_fraction) + [ -n "$VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION" ] && ViashError Bad arguments for option \'--empty_drop_training_fraction\': \'$VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --empty_drop_training_fraction. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --empty_drop_training_fraction=*) + [ -n "$VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION" ] && ViashError Bad arguments for option \'--empty_drop_training_fraction=*\': \'$VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --ignore_features) + if [ -z "$VIASH_PAR_IGNORE_FEATURES" ]; then + VIASH_PAR_IGNORE_FEATURES="$2" + else + VIASH_PAR_IGNORE_FEATURES="$VIASH_PAR_IGNORE_FEATURES:""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --ignore_features. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --ignore_features=*) + if [ -z "$VIASH_PAR_IGNORE_FEATURES" ]; then + VIASH_PAR_IGNORE_FEATURES=$(ViashRemoveFlags "$1") + else + VIASH_PAR_IGNORE_FEATURES="$VIASH_PAR_IGNORE_FEATURES:"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --fpr) + if [ -z "$VIASH_PAR_FPR" ]; then + VIASH_PAR_FPR="$2" + else + VIASH_PAR_FPR="$VIASH_PAR_FPR:""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --fpr. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --fpr=*) + if [ -z "$VIASH_PAR_FPR" ]; then + VIASH_PAR_FPR=$(ViashRemoveFlags "$1") + else + VIASH_PAR_FPR="$VIASH_PAR_FPR:"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --exclude_feature_types) + if [ -z "$VIASH_PAR_EXCLUDE_FEATURE_TYPES" ]; then + VIASH_PAR_EXCLUDE_FEATURE_TYPES="$2" + else + VIASH_PAR_EXCLUDE_FEATURE_TYPES="$VIASH_PAR_EXCLUDE_FEATURE_TYPES:""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --exclude_feature_types. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --exclude_feature_types=*) + if [ -z "$VIASH_PAR_EXCLUDE_FEATURE_TYPES" ]; then + VIASH_PAR_EXCLUDE_FEATURE_TYPES=$(ViashRemoveFlags "$1") + else + VIASH_PAR_EXCLUDE_FEATURE_TYPES="$VIASH_PAR_EXCLUDE_FEATURE_TYPES:"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --projected_ambient_count_threshold) + [ -n "$VIASH_PAR_PROJECTED_AMBIENT_COUNT_THRESHOLD" ] && ViashError Bad arguments for option \'--projected_ambient_count_threshold\': \'$VIASH_PAR_PROJECTED_AMBIENT_COUNT_THRESHOLD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PROJECTED_AMBIENT_COUNT_THRESHOLD="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --projected_ambient_count_threshold. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --projected_ambient_count_threshold=*) + [ -n "$VIASH_PAR_PROJECTED_AMBIENT_COUNT_THRESHOLD" ] && ViashError Bad arguments for option \'--projected_ambient_count_threshold=*\': \'$VIASH_PAR_PROJECTED_AMBIENT_COUNT_THRESHOLD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PROJECTED_AMBIENT_COUNT_THRESHOLD=$(ViashRemoveFlags "$1") + shift 1 + ;; + --learning_rate) + [ -n "$VIASH_PAR_LEARNING_RATE" ] && ViashError Bad arguments for option \'--learning_rate\': \'$VIASH_PAR_LEARNING_RATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LEARNING_RATE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --learning_rate. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --learning_rate=*) + [ -n "$VIASH_PAR_LEARNING_RATE" ] && ViashError Bad arguments for option \'--learning_rate=*\': \'$VIASH_PAR_LEARNING_RATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LEARNING_RATE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --final_elbo_fail_fraction) + [ -n "$VIASH_PAR_FINAL_ELBO_FAIL_FRACTION" ] && ViashError Bad arguments for option \'--final_elbo_fail_fraction\': \'$VIASH_PAR_FINAL_ELBO_FAIL_FRACTION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FINAL_ELBO_FAIL_FRACTION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --final_elbo_fail_fraction. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --final_elbo_fail_fraction=*) + [ -n "$VIASH_PAR_FINAL_ELBO_FAIL_FRACTION" ] && ViashError Bad arguments for option \'--final_elbo_fail_fraction=*\': \'$VIASH_PAR_FINAL_ELBO_FAIL_FRACTION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FINAL_ELBO_FAIL_FRACTION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --epoch_elbo_fail_fraction) + [ -n "$VIASH_PAR_EPOCH_ELBO_FAIL_FRACTION" ] && ViashError Bad arguments for option \'--epoch_elbo_fail_fraction\': \'$VIASH_PAR_EPOCH_ELBO_FAIL_FRACTION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EPOCH_ELBO_FAIL_FRACTION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --epoch_elbo_fail_fraction. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --epoch_elbo_fail_fraction=*) + [ -n "$VIASH_PAR_EPOCH_ELBO_FAIL_FRACTION" ] && ViashError Bad arguments for option \'--epoch_elbo_fail_fraction=*\': \'$VIASH_PAR_EPOCH_ELBO_FAIL_FRACTION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EPOCH_ELBO_FAIL_FRACTION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --num_training_tries) + [ -n "$VIASH_PAR_NUM_TRAINING_TRIES" ] && ViashError Bad arguments for option \'--num_training_tries\': \'$VIASH_PAR_NUM_TRAINING_TRIES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_NUM_TRAINING_TRIES="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --num_training_tries. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --num_training_tries=*) + [ -n "$VIASH_PAR_NUM_TRAINING_TRIES" ] && ViashError Bad arguments for option \'--num_training_tries=*\': \'$VIASH_PAR_NUM_TRAINING_TRIES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_NUM_TRAINING_TRIES=$(ViashRemoveFlags "$1") + shift 1 + ;; + --learning_rate_retry_mult) + [ -n "$VIASH_PAR_LEARNING_RATE_RETRY_MULT" ] && ViashError Bad arguments for option \'--learning_rate_retry_mult\': \'$VIASH_PAR_LEARNING_RATE_RETRY_MULT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LEARNING_RATE_RETRY_MULT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --learning_rate_retry_mult. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --learning_rate_retry_mult=*) + [ -n "$VIASH_PAR_LEARNING_RATE_RETRY_MULT" ] && ViashError Bad arguments for option \'--learning_rate_retry_mult=*\': \'$VIASH_PAR_LEARNING_RATE_RETRY_MULT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LEARNING_RATE_RETRY_MULT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --posterior_batch_size) + [ -n "$VIASH_PAR_POSTERIOR_BATCH_SIZE" ] && ViashError Bad arguments for option \'--posterior_batch_size\': \'$VIASH_PAR_POSTERIOR_BATCH_SIZE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_POSTERIOR_BATCH_SIZE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --posterior_batch_size. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --posterior_batch_size=*) + [ -n "$VIASH_PAR_POSTERIOR_BATCH_SIZE" ] && ViashError Bad arguments for option \'--posterior_batch_size=*\': \'$VIASH_PAR_POSTERIOR_BATCH_SIZE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_POSTERIOR_BATCH_SIZE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --posterior_regulation) + [ -n "$VIASH_PAR_POSTERIOR_REGULATION" ] && ViashError Bad arguments for option \'--posterior_regulation\': \'$VIASH_PAR_POSTERIOR_REGULATION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_POSTERIOR_REGULATION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --posterior_regulation. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --posterior_regulation=*) + [ -n "$VIASH_PAR_POSTERIOR_REGULATION" ] && ViashError Bad arguments for option \'--posterior_regulation=*\': \'$VIASH_PAR_POSTERIOR_REGULATION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_POSTERIOR_REGULATION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --alpha) + [ -n "$VIASH_PAR_ALPHA" ] && ViashError Bad arguments for option \'--alpha\': \'$VIASH_PAR_ALPHA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALPHA="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --alpha. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --alpha=*) + [ -n "$VIASH_PAR_ALPHA" ] && ViashError Bad arguments for option \'--alpha=*\': \'$VIASH_PAR_ALPHA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALPHA=$(ViashRemoveFlags "$1") + shift 1 + ;; + --q) + [ -n "$VIASH_PAR_Q" ] && ViashError Bad arguments for option \'--q\': \'$VIASH_PAR_Q\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_Q="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --q. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --q=*) + [ -n "$VIASH_PAR_Q" ] && ViashError Bad arguments for option \'--q=*\': \'$VIASH_PAR_Q\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_Q=$(ViashRemoveFlags "$1") + shift 1 + ;; + --estimator) + [ -n "$VIASH_PAR_ESTIMATOR" ] && ViashError Bad arguments for option \'--estimator\': \'$VIASH_PAR_ESTIMATOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ESTIMATOR="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --estimator. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --estimator=*) + [ -n "$VIASH_PAR_ESTIMATOR" ] && ViashError Bad arguments for option \'--estimator=*\': \'$VIASH_PAR_ESTIMATOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ESTIMATOR=$(ViashRemoveFlags "$1") + shift 1 + ;; + --estimator_multiple_cpu) + [ -n "$VIASH_PAR_ESTIMATOR_MULTIPLE_CPU" ] && ViashError Bad arguments for option \'--estimator_multiple_cpu\': \'$VIASH_PAR_ESTIMATOR_MULTIPLE_CPU\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ESTIMATOR_MULTIPLE_CPU=true + shift 1 + ;; + --constant_learning_rate) + [ -n "$VIASH_PAR_CONSTANT_LEARNING_RATE" ] && ViashError Bad arguments for option \'--constant_learning_rate\': \'$VIASH_PAR_CONSTANT_LEARNING_RATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CONSTANT_LEARNING_RATE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --constant_learning_rate. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --constant_learning_rate=*) + [ -n "$VIASH_PAR_CONSTANT_LEARNING_RATE" ] && ViashError Bad arguments for option \'--constant_learning_rate=*\': \'$VIASH_PAR_CONSTANT_LEARNING_RATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CONSTANT_LEARNING_RATE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --debug) + [ -n "$VIASH_PAR_DEBUG" ] && ViashError Bad arguments for option \'--debug\': \'$VIASH_PAR_DEBUG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DEBUG=true + shift 1 + ;; + --cuda) + [ -n "$VIASH_PAR_CUDA" ] && ViashError Bad arguments for option \'--cuda\': \'$VIASH_PAR_CUDA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CUDA=true + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/correction_cellbender_remove_background:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/correction_cellbender_remove_background:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/correction_cellbender_remove_background:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/correction_cellbender_remove_background:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_MODALITY+x} ]; then + VIASH_PAR_MODALITY="rna" +fi +if [ -z ${VIASH_PAR_LAYER_OUTPUT+x} ]; then + VIASH_PAR_LAYER_OUTPUT="cellbender_corrected" +fi +if [ -z ${VIASH_PAR_OBS_BACKGROUND_FRACTION+x} ]; then + VIASH_PAR_OBS_BACKGROUND_FRACTION="cellbender_background_fraction" +fi +if [ -z ${VIASH_PAR_OBS_CELL_PROBABILITY+x} ]; then + VIASH_PAR_OBS_CELL_PROBABILITY="cellbender_cell_probability" +fi +if [ -z ${VIASH_PAR_OBS_CELL_SIZE+x} ]; then + VIASH_PAR_OBS_CELL_SIZE="cellbender_cell_size" +fi +if [ -z ${VIASH_PAR_OBS_DROPLET_EFFICIENCY+x} ]; then + VIASH_PAR_OBS_DROPLET_EFFICIENCY="cellbender_droplet_efficiency" +fi +if [ -z ${VIASH_PAR_OBS_LATENT_SCALE+x} ]; then + VIASH_PAR_OBS_LATENT_SCALE="cellbender_latent_scale" +fi +if [ -z ${VIASH_PAR_VAR_AMBIENT_EXPRESSION+x} ]; then + VIASH_PAR_VAR_AMBIENT_EXPRESSION="cellbender_ambient_expression" +fi +if [ -z ${VIASH_PAR_OBSM_GENE_EXPRESSION_ENCODING+x} ]; then + VIASH_PAR_OBSM_GENE_EXPRESSION_ENCODING="cellbender_gene_expression_encoding" +fi +if [ -z ${VIASH_PAR_EXPECTED_CELLS_FROM_QC+x} ]; then + VIASH_PAR_EXPECTED_CELLS_FROM_QC="false" +fi +if [ -z ${VIASH_PAR_MODEL+x} ]; then + VIASH_PAR_MODEL="full" +fi +if [ -z ${VIASH_PAR_EPOCHS+x} ]; then + VIASH_PAR_EPOCHS="150" +fi +if [ -z ${VIASH_PAR_LOW_COUNT_THRESHOLD+x} ]; then + VIASH_PAR_LOW_COUNT_THRESHOLD="5" +fi +if [ -z ${VIASH_PAR_Z_DIM+x} ]; then + VIASH_PAR_Z_DIM="64" +fi +if [ -z ${VIASH_PAR_Z_LAYERS+x} ]; then + VIASH_PAR_Z_LAYERS="512" +fi +if [ -z ${VIASH_PAR_TRAINING_FRACTION+x} ]; then + VIASH_PAR_TRAINING_FRACTION="0.9" +fi +if [ -z ${VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION+x} ]; then + VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION="0.2" +fi +if [ -z ${VIASH_PAR_FPR+x} ]; then + VIASH_PAR_FPR="0.01" +fi +if [ -z ${VIASH_PAR_PROJECTED_AMBIENT_COUNT_THRESHOLD+x} ]; then + VIASH_PAR_PROJECTED_AMBIENT_COUNT_THRESHOLD="0.1" +fi +if [ -z ${VIASH_PAR_LEARNING_RATE+x} ]; then + VIASH_PAR_LEARNING_RATE="1.0E-4" +fi +if [ -z ${VIASH_PAR_NUM_TRAINING_TRIES+x} ]; then + VIASH_PAR_NUM_TRAINING_TRIES="1" +fi +if [ -z ${VIASH_PAR_LEARNING_RATE_RETRY_MULT+x} ]; then + VIASH_PAR_LEARNING_RATE_RETRY_MULT="0.2" +fi +if [ -z ${VIASH_PAR_POSTERIOR_BATCH_SIZE+x} ]; then + VIASH_PAR_POSTERIOR_BATCH_SIZE="128" +fi +if [ -z ${VIASH_PAR_ESTIMATOR+x} ]; then + VIASH_PAR_ESTIMATOR="mckp" +fi +if [ -z ${VIASH_PAR_ESTIMATOR_MULTIPLE_CPU+x} ]; then + VIASH_PAR_ESTIMATOR_MULTIPLE_CPU="false" +fi +if [ -z ${VIASH_PAR_DEBUG+x} ]; then + VIASH_PAR_DEBUG="false" +fi +if [ -z ${VIASH_PAR_CUDA+x} ]; then + VIASH_PAR_CUDA="false" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_EXPECTED_CELLS_FROM_QC" ]]; then + if ! [[ "$VIASH_PAR_EXPECTED_CELLS_FROM_QC" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--expected_cells_from_qc' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_EXPECTED_CELLS" ]]; then + if ! [[ "$VIASH_PAR_EXPECTED_CELLS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--expected_cells' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_TOTAL_DROPLETS_INCLUDED" ]]; then + if ! [[ "$VIASH_PAR_TOTAL_DROPLETS_INCLUDED" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--total_droplets_included' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_FORCE_CELL_UMI_PRIOR" ]]; then + if ! [[ "$VIASH_PAR_FORCE_CELL_UMI_PRIOR" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--force_cell_umi_prior' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_FORCE_EMPTY_UMI_PRIOR" ]]; then + if ! [[ "$VIASH_PAR_FORCE_EMPTY_UMI_PRIOR" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--force_empty_umi_prior' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_EPOCHS" ]]; then + if ! [[ "$VIASH_PAR_EPOCHS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--epochs' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_LOW_COUNT_THRESHOLD" ]]; then + if ! [[ "$VIASH_PAR_LOW_COUNT_THRESHOLD" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--low_count_threshold' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_Z_DIM" ]]; then + if ! [[ "$VIASH_PAR_Z_DIM" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--z_dim' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [ -n "$VIASH_PAR_Z_LAYERS" ]; then + IFS=':' + set -f + for val in $VIASH_PAR_Z_LAYERS; do + if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--z_layers' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + done + set +f + unset IFS +fi + +if [[ -n "$VIASH_PAR_TRAINING_FRACTION" ]]; then + if ! [[ "$VIASH_PAR_TRAINING_FRACTION" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--training_fraction' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION" ]]; then + if ! [[ "$VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--empty_drop_training_fraction' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [ -n "$VIASH_PAR_IGNORE_FEATURES" ]; then + IFS=':' + set -f + for val in $VIASH_PAR_IGNORE_FEATURES; do + if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--ignore_features' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + done + set +f + unset IFS +fi + +if [ -n "$VIASH_PAR_FPR" ]; then + IFS=':' + set -f + for val in $VIASH_PAR_FPR; do + if ! [[ "${val}" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--fpr' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi + done + set +f + unset IFS +fi + +if [[ -n "$VIASH_PAR_PROJECTED_AMBIENT_COUNT_THRESHOLD" ]]; then + if ! [[ "$VIASH_PAR_PROJECTED_AMBIENT_COUNT_THRESHOLD" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--projected_ambient_count_threshold' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_LEARNING_RATE" ]]; then + if ! [[ "$VIASH_PAR_LEARNING_RATE" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--learning_rate' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_FINAL_ELBO_FAIL_FRACTION" ]]; then + if ! [[ "$VIASH_PAR_FINAL_ELBO_FAIL_FRACTION" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--final_elbo_fail_fraction' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_EPOCH_ELBO_FAIL_FRACTION" ]]; then + if ! [[ "$VIASH_PAR_EPOCH_ELBO_FAIL_FRACTION" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--epoch_elbo_fail_fraction' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_NUM_TRAINING_TRIES" ]]; then + if ! [[ "$VIASH_PAR_NUM_TRAINING_TRIES" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--num_training_tries' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_LEARNING_RATE_RETRY_MULT" ]]; then + if ! [[ "$VIASH_PAR_LEARNING_RATE_RETRY_MULT" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--learning_rate_retry_mult' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_POSTERIOR_BATCH_SIZE" ]]; then + if ! [[ "$VIASH_PAR_POSTERIOR_BATCH_SIZE" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--posterior_batch_size' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_ALPHA" ]]; then + if ! [[ "$VIASH_PAR_ALPHA" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--alpha' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_Q" ]]; then + if ! [[ "$VIASH_PAR_Q" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--q' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_ESTIMATOR_MULTIPLE_CPU" ]]; then + if ! [[ "$VIASH_PAR_ESTIMATOR_MULTIPLE_CPU" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--estimator_multiple_cpu' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_CONSTANT_LEARNING_RATE" ]]; then + if ! [[ "$VIASH_PAR_CONSTANT_LEARNING_RATE" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--constant_learning_rate' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_DEBUG" ]]; then + if ! [[ "$VIASH_PAR_DEBUG" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--debug' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_CUDA" ]]; then + if ! [[ "$VIASH_PAR_CUDA" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--cuda' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +if [ ! -z "$VIASH_PAR_MODEL" ]; then + VIASH_PAR_MODEL_CHOICES=("naive:simple:ambient:swapping:full") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_MODEL_CHOICES[*]}:" =~ ":$VIASH_PAR_MODEL:" ]]; then + ViashError '--model' specified value of \'$VIASH_PAR_MODEL\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +if [ ! -z "$VIASH_PAR_POSTERIOR_REGULATION" ]; then + VIASH_PAR_POSTERIOR_REGULATION_CHOICES=("PRq:PRmu:PRmu_gene") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_POSTERIOR_REGULATION_CHOICES[*]}:" =~ ":$VIASH_PAR_POSTERIOR_REGULATION:" ]]; then + ViashError '--posterior_regulation' specified value of \'$VIASH_PAR_POSTERIOR_REGULATION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +if [ ! -z "$VIASH_PAR_ESTIMATOR" ]; then + VIASH_PAR_ESTIMATOR_CHOICES=("map:mean:cdf:sample:mckp") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_ESTIMATOR_CHOICES[*]}:" =~ ":$VIASH_PAR_ESTIMATOR:" ]]; then + ViashError '--estimator' specified value of \'$VIASH_PAR_ESTIMATOR\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/correction_cellbender_remove_background:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/correction_cellbender_remove_background:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/correction_cellbender_remove_background:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-cellbender_remove_background-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +import mudata as mu +import tempfile +import subprocess +import os +import sys +import numpy as np +from scipy.sparse import csr_matrix +from cellbender.remove_background.downstream import anndata_from_h5 +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'layer_output': $( if [ ! -z ${VIASH_PAR_LAYER_OUTPUT+x} ]; then echo "r'${VIASH_PAR_LAYER_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'obs_background_fraction': $( if [ ! -z ${VIASH_PAR_OBS_BACKGROUND_FRACTION+x} ]; then echo "r'${VIASH_PAR_OBS_BACKGROUND_FRACTION//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'obs_cell_probability': $( if [ ! -z ${VIASH_PAR_OBS_CELL_PROBABILITY+x} ]; then echo "r'${VIASH_PAR_OBS_CELL_PROBABILITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'obs_cell_size': $( if [ ! -z ${VIASH_PAR_OBS_CELL_SIZE+x} ]; then echo "r'${VIASH_PAR_OBS_CELL_SIZE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'obs_droplet_efficiency': $( if [ ! -z ${VIASH_PAR_OBS_DROPLET_EFFICIENCY+x} ]; then echo "r'${VIASH_PAR_OBS_DROPLET_EFFICIENCY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'obs_latent_scale': $( if [ ! -z ${VIASH_PAR_OBS_LATENT_SCALE+x} ]; then echo "r'${VIASH_PAR_OBS_LATENT_SCALE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'var_ambient_expression': $( if [ ! -z ${VIASH_PAR_VAR_AMBIENT_EXPRESSION+x} ]; then echo "r'${VIASH_PAR_VAR_AMBIENT_EXPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'obsm_gene_expression_encoding': $( if [ ! -z ${VIASH_PAR_OBSM_GENE_EXPRESSION_ENCODING+x} ]; then echo "r'${VIASH_PAR_OBSM_GENE_EXPRESSION_ENCODING//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'expected_cells_from_qc': $( if [ ! -z ${VIASH_PAR_EXPECTED_CELLS_FROM_QC+x} ]; then echo "r'${VIASH_PAR_EXPECTED_CELLS_FROM_QC//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), + 'expected_cells': $( if [ ! -z ${VIASH_PAR_EXPECTED_CELLS+x} ]; then echo "int(r'${VIASH_PAR_EXPECTED_CELLS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'total_droplets_included': $( if [ ! -z ${VIASH_PAR_TOTAL_DROPLETS_INCLUDED+x} ]; then echo "int(r'${VIASH_PAR_TOTAL_DROPLETS_INCLUDED//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'force_cell_umi_prior': $( if [ ! -z ${VIASH_PAR_FORCE_CELL_UMI_PRIOR+x} ]; then echo "int(r'${VIASH_PAR_FORCE_CELL_UMI_PRIOR//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'force_empty_umi_prior': $( if [ ! -z ${VIASH_PAR_FORCE_EMPTY_UMI_PRIOR+x} ]; then echo "int(r'${VIASH_PAR_FORCE_EMPTY_UMI_PRIOR//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'model': $( if [ ! -z ${VIASH_PAR_MODEL+x} ]; then echo "r'${VIASH_PAR_MODEL//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'epochs': $( if [ ! -z ${VIASH_PAR_EPOCHS+x} ]; then echo "int(r'${VIASH_PAR_EPOCHS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'low_count_threshold': $( if [ ! -z ${VIASH_PAR_LOW_COUNT_THRESHOLD+x} ]; then echo "int(r'${VIASH_PAR_LOW_COUNT_THRESHOLD//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'z_dim': $( if [ ! -z ${VIASH_PAR_Z_DIM+x} ]; then echo "int(r'${VIASH_PAR_Z_DIM//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'z_layers': $( if [ ! -z ${VIASH_PAR_Z_LAYERS+x} ]; then echo "list(map(int, r'${VIASH_PAR_Z_LAYERS//\'/\'\"\'\"r\'}'.split(':')))"; else echo None; fi ), + 'training_fraction': $( if [ ! -z ${VIASH_PAR_TRAINING_FRACTION+x} ]; then echo "float(r'${VIASH_PAR_TRAINING_FRACTION//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'empty_drop_training_fraction': $( if [ ! -z ${VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION+x} ]; then echo "float(r'${VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'ignore_features': $( if [ ! -z ${VIASH_PAR_IGNORE_FEATURES+x} ]; then echo "list(map(int, r'${VIASH_PAR_IGNORE_FEATURES//\'/\'\"\'\"r\'}'.split(':')))"; else echo None; fi ), + 'fpr': $( if [ ! -z ${VIASH_PAR_FPR+x} ]; then echo "list(map(float, r'${VIASH_PAR_FPR//\'/\'\"\'\"r\'}'.split(':')))"; else echo None; fi ), + 'exclude_feature_types': $( if [ ! -z ${VIASH_PAR_EXCLUDE_FEATURE_TYPES+x} ]; then echo "r'${VIASH_PAR_EXCLUDE_FEATURE_TYPES//\'/\'\"\'\"r\'}'.split(':')"; else echo None; fi ), + 'projected_ambient_count_threshold': $( if [ ! -z ${VIASH_PAR_PROJECTED_AMBIENT_COUNT_THRESHOLD+x} ]; then echo "float(r'${VIASH_PAR_PROJECTED_AMBIENT_COUNT_THRESHOLD//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'learning_rate': $( if [ ! -z ${VIASH_PAR_LEARNING_RATE+x} ]; then echo "float(r'${VIASH_PAR_LEARNING_RATE//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'final_elbo_fail_fraction': $( if [ ! -z ${VIASH_PAR_FINAL_ELBO_FAIL_FRACTION+x} ]; then echo "float(r'${VIASH_PAR_FINAL_ELBO_FAIL_FRACTION//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'epoch_elbo_fail_fraction': $( if [ ! -z ${VIASH_PAR_EPOCH_ELBO_FAIL_FRACTION+x} ]; then echo "float(r'${VIASH_PAR_EPOCH_ELBO_FAIL_FRACTION//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'num_training_tries': $( if [ ! -z ${VIASH_PAR_NUM_TRAINING_TRIES+x} ]; then echo "int(r'${VIASH_PAR_NUM_TRAINING_TRIES//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'learning_rate_retry_mult': $( if [ ! -z ${VIASH_PAR_LEARNING_RATE_RETRY_MULT+x} ]; then echo "float(r'${VIASH_PAR_LEARNING_RATE_RETRY_MULT//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'posterior_batch_size': $( if [ ! -z ${VIASH_PAR_POSTERIOR_BATCH_SIZE+x} ]; then echo "int(r'${VIASH_PAR_POSTERIOR_BATCH_SIZE//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'posterior_regulation': $( if [ ! -z ${VIASH_PAR_POSTERIOR_REGULATION+x} ]; then echo "r'${VIASH_PAR_POSTERIOR_REGULATION//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'alpha': $( if [ ! -z ${VIASH_PAR_ALPHA+x} ]; then echo "float(r'${VIASH_PAR_ALPHA//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'q': $( if [ ! -z ${VIASH_PAR_Q+x} ]; then echo "float(r'${VIASH_PAR_Q//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'estimator': $( if [ ! -z ${VIASH_PAR_ESTIMATOR+x} ]; then echo "r'${VIASH_PAR_ESTIMATOR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'estimator_multiple_cpu': $( if [ ! -z ${VIASH_PAR_ESTIMATOR_MULTIPLE_CPU+x} ]; then echo "r'${VIASH_PAR_ESTIMATOR_MULTIPLE_CPU//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), + 'constant_learning_rate': $( if [ ! -z ${VIASH_PAR_CONSTANT_LEARNING_RATE+x} ]; then echo "r'${VIASH_PAR_CONSTANT_LEARNING_RATE//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), + 'debug': $( if [ ! -z ${VIASH_PAR_DEBUG+x} ]; then echo "r'${VIASH_PAR_DEBUG//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), + 'cuda': $( if [ ! -z ${VIASH_PAR_CUDA+x} ]; then echo "r'${VIASH_PAR_CUDA//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +## VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + + +logger.info("Reading input mudata") +mdata = mu.read_h5mu(par["input"]) + +mod = par["modality"] +logger.info("Performing log transformation on modality %s", mod) +data = mdata.mod[mod] + +# import pathlib +# with pathlib.Path(os.path.dirname(par["output"])) / "cellbender" as temp_dir: +# os.mkdir(temp_dir) +with tempfile.TemporaryDirectory(prefix="cellbender-", dir=meta["temp_dir"]) as temp_dir: + # construct paths within tempdir + input_file = os.path.join(temp_dir, "input.h5ad") + output_file = os.path.join(temp_dir, "output.h5") + + logger.info("Creating AnnData input file for CellBender: '%s'", input_file) + data.write_h5ad(input_file) + + logger.info("Constructing CellBender command") + cmd_pars = [ + "cellbender", "remove-background", + "--input", input_file, + "--output", output_file, + # don't create checkpoints because they're not used / returned anyways + "--checkpoint-mins", "99999999" + ] + + if meta.get("cpus") is not None: + cmd_pars += ["--cpu-threads", str(meta["cpus"])] + + extra_args = [ + ("--expected-cells", "expected_cells", True), + ("--total-droplets-included", "total_droplets_included", True), + ("--force-cell-umi-prior", "force_cell_umi_prior", True), + ("--force-empty-umi-prior", "force_empty_umi_prior", True), + ("--model", "model", True), + ("--epochs", "epochs", True), + ("--low-count-threshold", "low_count_threshold", True), + ("--z-dim", "z_dim", True), + ("--z-layers", "z_layers", True), + ("--training-fraction", "training_fraction", True), + ("--empty-drop-training-fraction", "empty_drop_training_fraction", True), + ("--ignore-features", "ignore_features", True), + ("--fpr", "fpr", True), + ("--exclude-feature-types", "exclude_feature_types", True), + ("--projected-ambient-count-threshold", "projected_ambient_count_threshold", True), + ("--learning-rate", "learning_rate", True), + ("--final-elbo-fail-fraction", "final_elbo_fail_fraction", True), + ("--epoch-elbo-fail-fraction", "epoch_elbo_fail_fraction", True), + ("--num-training-tries", "num_training_tries", True), + ("--learning-rate-retry-mult", "learning_rate_retry_mult", True), + ("--posterior-batch-size", "posterior_batch_size", True), + ("--posterior-regulation", "posterior_regulation", True), + ("--alpha", "alpha", True), + ("--q", "q", True), + ("--estimator", "estimator", True), + ("--estimator-multiple-cpu", "estimator_multiple_cpu", False), + ("--constant-learning-rate", "constant_learning_rate", False), + ("--debug", "debug", False), + ("--cuda", "cuda", False), + ] + for (flag, name, is_kwarg) in extra_args: + if par[name]: + values = par[name] if isinstance(par[name], list) else [par[name]] + cmd_pars += [flag] + [str(val) for val in values] if is_kwarg else [flag] + + if par["expected_cells_from_qc"] and "metrics_cellranger" in data.uns: + assert par["expected_cells"] is None, "If min_counts is defined, expected_cells should be undefined" + assert par["total_droplets_included"] is None, "If min_counts is defined, expected_cells should be undefined" + met = data.uns["metrics_cellranger"] + col_name = "Estimated Number of Cells" + assert col_name in met.columns, "%s should be a column in .obs[metrics_cellranger]" + est_cells = met[col_name].values[0] + logger.info("Selecting --expected-cells %d and --total-droplets-included %d", est_cells, est_cells * 5) + cmd_pars += ["--expected-cells", str(est_cells), "--total-droplets-included", str(5*est_cells)] + + logger.info("Running CellBender: '%s'", ' '.join(cmd_pars)) + out = subprocess.check_output(cmd_pars).decode("utf-8") + + logger.info("Reading CellBender 10xh5 output file: '%s'", output_file) + adata_out = anndata_from_h5(output_file, analyzed_barcodes_only=False) + + logger.info("CellBender output format:", adata_out) + + # AnnData object with n_obs x n_vars = 6794880 x 33538 + # obs: 'cellbender_analyzed' + # var: 'ambient_expression', 'feature_type', 'genome', 'gene_id', 'cellbender_analyzed' + # uns: 'background_fraction', 'barcode_indices_for_latents', 'cell_probability', 'cell_size', 'droplet_efficiency', 'gene_expression_encoding', + # 'cell_size_lognormal_std', 'empty_droplet_size_lognormal_loc', 'empty_droplet_size_lognormal_scale', 'swapping_fraction_dist_params', + # 'barcodes_analyzed', 'barcodes_analyzed_inds', 'estimator', 'features_analyzed_inds', 'fraction_data_used_for_testing', 'learning_curve_learning_rate_epoch', + # 'learning_curve_learning_rate_value', 'learning_curve_test_elbo', 'learning_curve_test_epoch', 'learning_curve_train_elbo', 'learning_curve_train_epoch', + # 'target_false_positive_rate' + + logger.info("Copying X output to MuData") + data.layers[par["layer_output"]] = adata_out.X + + logger.info("Copying .obs output to MuData") + obs_store = { + "obs_background_fraction": "background_fraction", + "obs_cell_probability": "cell_probability", + "obs_cell_size": "cell_size", + "obs_droplet_efficiency": "droplet_efficiency", + "obs_latent_scale": "latent_scale" + } + for to_name, from_name in obs_store.items(): + if par[to_name]: + if from_name in adata_out.obs: + data.obs[par[to_name]] = adata_out.obs[from_name] + # when using unfiltered data, the values will be in uns instead of obs + elif from_name in adata_out.uns and "barcode_indices_for_latents" in adata_out.uns: + vec = np.zeros(data.n_obs) + vec[adata_out.uns["barcode_indices_for_latents"]] = adata_out.uns[from_name] + data.obs[par[to_name]] = vec + + logger.info("Copying .var output to MuData") + var_store = { "var_ambient_expression": "ambient_expression" } + for to_name, from_name in var_store.items(): + if par[to_name]: + data.var[par[to_name]] = adata_out.var[from_name] + + logger.info("Copying obsm_gene_expression_encoding output to MuData") + obsm_store = { "obsm_gene_expression_encoding": "gene_expression_encoding" } + for to_name, from_name in obsm_store.items(): + if par[to_name]: + if from_name in adata_out.obsm: + data.obsm[par[to_name]] = adata_out.obsm[from_name] + elif from_name in adata_out.uns and "barcode_indices_for_latents" in adata_out.uns: + matrix_to_store = adata_out.uns[from_name] + number_of_obs = data.X.shape[0] + latent_space_sparse = csr_matrix((number_of_obs, par["z_dim"]), + dtype=adata_out.uns[from_name].dtype) + obs_rows_in_space_representation = adata_out.uns["barcode_indices_for_latents"] + latent_space_sparse[obs_rows_in_space_representation] = adata_out.uns[from_name] + data.obsm[par[to_name]] = latent_space_sparse + else: + raise RuntimeError("Requested to save latent gene encoding, but the data is either missing " + "from cellbender output or in an incorrect format.") + + +logger.info("Writing to file %s", par["output"]) +mdata.write_h5mu(filename=par["output"], compression=par["output_compression"]) +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/correction/cellbender_remove_background/setup_logger.py b/target/docker/correction/cellbender_remove_background/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/docker/correction/cellbender_remove_background/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/docker/correction/cellbender_remove_background_v0_2/.config.vsh.yaml b/target/docker/correction/cellbender_remove_background_v0_2/.config.vsh.yaml new file mode 100644 index 00000000000..6184a725817 --- /dev/null +++ b/target/docker/correction/cellbender_remove_background_v0_2/.config.vsh.yaml @@ -0,0 +1,406 @@ +functionality: + name: "cellbender_remove_background_v0_2" + namespace: "correction" + version: "0.12.4" + argument_groups: + - name: "Inputs" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input h5mu file." + info: null + example: + - "input.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + description: "List of modalities to process." + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Outputs" + arguments: + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Full count matrix as an h5mu file, with background RNA removed.\ + \ This file contains all the original droplet barcodes." + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--layer_output" + description: "Output layer" + info: null + default: + - "corrected" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_latent_rt_efficiency" + info: null + default: + - "latent_rt_efficiency" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_latent_cell_probability" + info: null + default: + - "latent_cell_probability" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_latent_scale" + info: null + default: + - "latent_scale" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--var_ambient_expression" + info: null + default: + - "ambient_expression" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obsm_latent_gene_encoding" + info: null + default: + - "cellbender_latent_gene_encoding" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Arguments" + arguments: + - type: "integer" + name: "--expected_cells" + description: "Number of cells expected in the dataset (a rough estimate within\ + \ a factor of 2 is sufficient)." + info: null + example: + - 1000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--total_droplets_included" + description: "The number of droplets from the rank-ordered UMI plot\nthat will\ + \ be analyzed. The largest 'total_droplets'\ndroplets will have their cell\ + \ probabilities inferred\nas an output.\n" + info: null + example: + - 25000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean" + name: "--expected_cells_from_qc" + description: "Will use the Cell Ranger QC to determine the estimated number\ + \ of cells" + info: null + default: + - true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--model" + description: "Which model is being used for count data. 'simple'\ndoes not model\ + \ either ambient RNA or random barcode\nswapping (for debugging purposes --\ + \ not recommended).\n'ambient' assumes background RNA is incorporated into\n\ + droplets. 'swapping' assumes background RNA comes from\nrandom barcode swapping.\ + \ 'full' uses a combined\nambient and swapping model.\n" + info: null + default: + - "full" + required: false + choices: + - "simple" + - "ambient" + - "swapping" + - "full" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--epochs" + description: "Number of epochs to train." + info: null + default: + - 150 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--low_count_threshold" + description: "Droplets with UMI counts below this number are completely \nexcluded\ + \ from the analysis. This can help identify the correct \nprior for empty\ + \ droplet counts in the rare case where empty \ncounts are extremely high\ + \ (over 200).\n" + info: null + default: + - 15 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--z_dim" + description: "Dimension of latent variable z.\n" + info: null + default: + - 100 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--z_layers" + description: "Dimension of hidden layers in the encoder for z.\n" + info: null + default: + - 500 + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--training_fraction" + description: "Training detail: the fraction of the data used for training.\n\ + The rest is never seen by the inference algorithm. Speeds up learning.\n" + info: null + default: + - 0.9 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--empty_drop_training_fraction" + description: "Training detail: the fraction of the training data each epoch\ + \ that \nis drawn (randomly sampled) from surely empty droplets.\n" + info: null + default: + - 0.5 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--fpr" + description: "Target false positive rate in (0, 1). A false positive\nis a true\ + \ signal count that is erroneously removed.\nMore background removal is accompanied\ + \ by more signal\nremoval at high values of FPR. You can specify\nmultiple\ + \ values, which will create multiple output\nfiles.\n" + info: null + default: + - 0.01 + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--exclude_antibody_capture" + description: "Including the flag --exclude-antibody-capture will\ncause remove-background\ + \ to operate on gene counts\nonly, ignoring other features.\n" + info: null + direction: "input" + dest: "par" + - type: "double" + name: "--learning_rate" + description: "Training detail: lower learning rate for inference. A\nOneCycle\ + \ learning rate schedule is used, where the\nupper learning rate is ten times\ + \ this value. (For this\nvalue, probably do not exceed 1e-3).\n" + info: null + example: + - 1.0E-4 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--cuda" + description: "Including the flag --cuda will run the inference on a\nGPU.\n" + info: null + direction: "input" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "helper.py" + - type: "file" + path: "src/utils/setup_logger.py" + description: "Eliminating technical artifacts from high-throughput single-cell RNA\ + \ sequencing data.\n\nThis module removes counts due to ambient RNA molecules\ + \ and random barcode swapping from (raw) UMI-based scRNA-seq count matrices. \n\ + At the moment, only the count matrices produced by the CellRanger count pipeline\ + \ is supported. Support for additional tools and protocols \nwill be added in\ + \ the future. A quick start tutorial can be found here.\n\nFleming et al. 2022,\ + \ bioRxiv.\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_raw_feature_bc_matrix.h5" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "nvcr.io/nvidia/pytorch:22.12-py3" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "muon==0.1.5" + - "tables==3.8.0" + - "cellbender==0.2.1" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "muon~=0.1.4" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "gpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/correction/cellbender_remove_background_v0_2/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/correction/cellbender_remove_background_v0_2" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/correction/cellbender_remove_background_v0_2/cellbender_remove_background_v0_2" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/correction/cellbender_remove_background_v0_2/cellbender_remove_background_v0_2 b/target/docker/correction/cellbender_remove_background_v0_2/cellbender_remove_background_v0_2 new file mode 100755 index 00000000000..be9ee819e84 --- /dev/null +++ b/target/docker/correction/cellbender_remove_background_v0_2/cellbender_remove_background_v0_2 @@ -0,0 +1,1629 @@ +#!/usr/bin/env bash + +# cellbender_remove_background_v0_2 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="cellbender_remove_background_v0_2" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "cellbender_remove_background_v0_2 0.12.4" + echo "" + echo "Eliminating technical artifacts from high-throughput single-cell RNA sequencing" + echo "data." + echo "" + echo "This module removes counts due to ambient RNA molecules and random barcode" + echo "swapping from (raw) UMI-based scRNA-seq count matrices." + echo "At the moment, only the count matrices produced by the CellRanger count pipeline" + echo "is supported. Support for additional tools and protocols" + echo "will be added in the future. A quick start tutorial can be found here." + echo "" + echo "Fleming et al. 2022, bioRxiv." + echo "" + echo "Inputs:" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " example: input.h5mu" + echo " Input h5mu file." + echo "" + echo " --modality" + echo " type: string" + echo " default: rna" + echo " List of modalities to process." + echo "" + echo "Outputs:" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " example: output.h5mu" + echo " Full count matrix as an h5mu file, with background RNA removed. This" + echo " file contains all the original droplet barcodes." + echo "" + echo " --output_compression" + echo " type: string" + echo " example: gzip" + echo " choices: [ gzip, lzf ]" + echo "" + echo " --layer_output" + echo " type: string" + echo " default: corrected" + echo " Output layer" + echo "" + echo " --obs_latent_rt_efficiency" + echo " type: string" + echo " default: latent_rt_efficiency" + echo "" + echo " --obs_latent_cell_probability" + echo " type: string" + echo " default: latent_cell_probability" + echo "" + echo " --obs_latent_scale" + echo " type: string" + echo " default: latent_scale" + echo "" + echo " --var_ambient_expression" + echo " type: string" + echo " default: ambient_expression" + echo "" + echo " --obsm_latent_gene_encoding" + echo " type: string" + echo " default: cellbender_latent_gene_encoding" + echo "" + echo "Arguments:" + echo " --expected_cells" + echo " type: integer" + echo " example: 1000" + echo " Number of cells expected in the dataset (a rough estimate within a" + echo " factor of 2 is sufficient)." + echo "" + echo " --total_droplets_included" + echo " type: integer" + echo " example: 25000" + echo " The number of droplets from the rank-ordered UMI plot" + echo " that will be analyzed. The largest 'total_droplets'" + echo " droplets will have their cell probabilities inferred" + echo " as an output." + echo "" + echo " --expected_cells_from_qc" + echo " type: boolean" + echo " default: true" + echo " Will use the Cell Ranger QC to determine the estimated number of cells" + echo "" + echo " --model" + echo " type: string" + echo " default: full" + echo " choices: [ simple, ambient, swapping, full ]" + echo " Which model is being used for count data. 'simple'" + echo " does not model either ambient RNA or random barcode" + echo " swapping (for debugging purposes -- not recommended)." + echo " 'ambient' assumes background RNA is incorporated into" + echo " droplets. 'swapping' assumes background RNA comes from" + echo " random barcode swapping. 'full' uses a combined" + echo " ambient and swapping model." + echo "" + echo " --epochs" + echo " type: integer" + echo " default: 150" + echo " Number of epochs to train." + echo "" + echo " --low_count_threshold" + echo " type: integer" + echo " default: 15" + echo " Droplets with UMI counts below this number are completely" + echo " excluded from the analysis. This can help identify the correct" + echo " prior for empty droplet counts in the rare case where empty" + echo " counts are extremely high (over 200)." + echo "" + echo " --z_dim" + echo " type: integer" + echo " default: 100" + echo " Dimension of latent variable z." + echo "" + echo " --z_layers" + echo " type: integer, multiple values allowed" + echo " default: 500" + echo " Dimension of hidden layers in the encoder for z." + echo "" + echo " --training_fraction" + echo " type: double" + echo " default: 0.9" + echo " Training detail: the fraction of the data used for training." + echo " The rest is never seen by the inference algorithm. Speeds up learning." + echo "" + echo " --empty_drop_training_fraction" + echo " type: double" + echo " default: 0.5" + echo " Training detail: the fraction of the training data each epoch that" + echo " is drawn (randomly sampled) from surely empty droplets." + echo "" + echo " --fpr" + echo " type: double, multiple values allowed" + echo " default: 0.01" + echo " Target false positive rate in (0, 1). A false positive" + echo " is a true signal count that is erroneously removed." + echo " More background removal is accompanied by more signal" + echo " removal at high values of FPR. You can specify" + echo " multiple values, which will create multiple output" + echo " files." + echo "" + echo " --exclude_antibody_capture" + echo " type: boolean_true" + echo " Including the flag --exclude-antibody-capture will" + echo " cause remove-background to operate on gene counts" + echo " only, ignoring other features." + echo "" + echo " --learning_rate" + echo " type: double" + echo " example: 1.0E-4" + echo " Training detail: lower learning rate for inference. A" + echo " OneCycle learning rate schedule is used, where the" + echo " upper learning rate is ten times this value. (For this" + echo " value, probably do not exceed 1e-3)." + echo "" + echo " --cuda" + echo " type: boolean_true" + echo " Including the flag --cuda will run the inference on a" + echo " GPU." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM nvcr.io/nvidia/pytorch:22.12-py3 + +ENTRYPOINT [] + + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "muon==0.1.5" "tables==3.8.0" "cellbender==0.2.1" + +LABEL org.opencontainers.image.description="Companion container for running component correction cellbender_remove_background_v0_2" +LABEL org.opencontainers.image.created="2024-01-31T09:08:36Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-cellbender_remove_background_v0_2-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "cellbender_remove_background_v0_2 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality=*) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --layer_output) + [ -n "$VIASH_PAR_LAYER_OUTPUT" ] && ViashError Bad arguments for option \'--layer_output\': \'$VIASH_PAR_LAYER_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LAYER_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --layer_output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --layer_output=*) + [ -n "$VIASH_PAR_LAYER_OUTPUT" ] && ViashError Bad arguments for option \'--layer_output=*\': \'$VIASH_PAR_LAYER_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LAYER_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --obs_latent_rt_efficiency) + [ -n "$VIASH_PAR_OBS_LATENT_RT_EFFICIENCY" ] && ViashError Bad arguments for option \'--obs_latent_rt_efficiency\': \'$VIASH_PAR_OBS_LATENT_RT_EFFICIENCY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBS_LATENT_RT_EFFICIENCY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_latent_rt_efficiency. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obs_latent_rt_efficiency=*) + [ -n "$VIASH_PAR_OBS_LATENT_RT_EFFICIENCY" ] && ViashError Bad arguments for option \'--obs_latent_rt_efficiency=*\': \'$VIASH_PAR_OBS_LATENT_RT_EFFICIENCY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBS_LATENT_RT_EFFICIENCY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --obs_latent_cell_probability) + [ -n "$VIASH_PAR_OBS_LATENT_CELL_PROBABILITY" ] && ViashError Bad arguments for option \'--obs_latent_cell_probability\': \'$VIASH_PAR_OBS_LATENT_CELL_PROBABILITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBS_LATENT_CELL_PROBABILITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_latent_cell_probability. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obs_latent_cell_probability=*) + [ -n "$VIASH_PAR_OBS_LATENT_CELL_PROBABILITY" ] && ViashError Bad arguments for option \'--obs_latent_cell_probability=*\': \'$VIASH_PAR_OBS_LATENT_CELL_PROBABILITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBS_LATENT_CELL_PROBABILITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --obs_latent_scale) + [ -n "$VIASH_PAR_OBS_LATENT_SCALE" ] && ViashError Bad arguments for option \'--obs_latent_scale\': \'$VIASH_PAR_OBS_LATENT_SCALE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBS_LATENT_SCALE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_latent_scale. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obs_latent_scale=*) + [ -n "$VIASH_PAR_OBS_LATENT_SCALE" ] && ViashError Bad arguments for option \'--obs_latent_scale=*\': \'$VIASH_PAR_OBS_LATENT_SCALE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBS_LATENT_SCALE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --var_ambient_expression) + [ -n "$VIASH_PAR_VAR_AMBIENT_EXPRESSION" ] && ViashError Bad arguments for option \'--var_ambient_expression\': \'$VIASH_PAR_VAR_AMBIENT_EXPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_VAR_AMBIENT_EXPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --var_ambient_expression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --var_ambient_expression=*) + [ -n "$VIASH_PAR_VAR_AMBIENT_EXPRESSION" ] && ViashError Bad arguments for option \'--var_ambient_expression=*\': \'$VIASH_PAR_VAR_AMBIENT_EXPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_VAR_AMBIENT_EXPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --obsm_latent_gene_encoding) + [ -n "$VIASH_PAR_OBSM_LATENT_GENE_ENCODING" ] && ViashError Bad arguments for option \'--obsm_latent_gene_encoding\': \'$VIASH_PAR_OBSM_LATENT_GENE_ENCODING\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBSM_LATENT_GENE_ENCODING="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obsm_latent_gene_encoding. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obsm_latent_gene_encoding=*) + [ -n "$VIASH_PAR_OBSM_LATENT_GENE_ENCODING" ] && ViashError Bad arguments for option \'--obsm_latent_gene_encoding=*\': \'$VIASH_PAR_OBSM_LATENT_GENE_ENCODING\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBSM_LATENT_GENE_ENCODING=$(ViashRemoveFlags "$1") + shift 1 + ;; + --expected_cells) + [ -n "$VIASH_PAR_EXPECTED_CELLS" ] && ViashError Bad arguments for option \'--expected_cells\': \'$VIASH_PAR_EXPECTED_CELLS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EXPECTED_CELLS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --expected_cells. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --expected_cells=*) + [ -n "$VIASH_PAR_EXPECTED_CELLS" ] && ViashError Bad arguments for option \'--expected_cells=*\': \'$VIASH_PAR_EXPECTED_CELLS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EXPECTED_CELLS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --total_droplets_included) + [ -n "$VIASH_PAR_TOTAL_DROPLETS_INCLUDED" ] && ViashError Bad arguments for option \'--total_droplets_included\': \'$VIASH_PAR_TOTAL_DROPLETS_INCLUDED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TOTAL_DROPLETS_INCLUDED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --total_droplets_included. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --total_droplets_included=*) + [ -n "$VIASH_PAR_TOTAL_DROPLETS_INCLUDED" ] && ViashError Bad arguments for option \'--total_droplets_included=*\': \'$VIASH_PAR_TOTAL_DROPLETS_INCLUDED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TOTAL_DROPLETS_INCLUDED=$(ViashRemoveFlags "$1") + shift 1 + ;; + --expected_cells_from_qc) + [ -n "$VIASH_PAR_EXPECTED_CELLS_FROM_QC" ] && ViashError Bad arguments for option \'--expected_cells_from_qc\': \'$VIASH_PAR_EXPECTED_CELLS_FROM_QC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EXPECTED_CELLS_FROM_QC="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --expected_cells_from_qc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --expected_cells_from_qc=*) + [ -n "$VIASH_PAR_EXPECTED_CELLS_FROM_QC" ] && ViashError Bad arguments for option \'--expected_cells_from_qc=*\': \'$VIASH_PAR_EXPECTED_CELLS_FROM_QC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EXPECTED_CELLS_FROM_QC=$(ViashRemoveFlags "$1") + shift 1 + ;; + --model) + [ -n "$VIASH_PAR_MODEL" ] && ViashError Bad arguments for option \'--model\': \'$VIASH_PAR_MODEL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODEL="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --model. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --model=*) + [ -n "$VIASH_PAR_MODEL" ] && ViashError Bad arguments for option \'--model=*\': \'$VIASH_PAR_MODEL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODEL=$(ViashRemoveFlags "$1") + shift 1 + ;; + --epochs) + [ -n "$VIASH_PAR_EPOCHS" ] && ViashError Bad arguments for option \'--epochs\': \'$VIASH_PAR_EPOCHS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EPOCHS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --epochs. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --epochs=*) + [ -n "$VIASH_PAR_EPOCHS" ] && ViashError Bad arguments for option \'--epochs=*\': \'$VIASH_PAR_EPOCHS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EPOCHS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --low_count_threshold) + [ -n "$VIASH_PAR_LOW_COUNT_THRESHOLD" ] && ViashError Bad arguments for option \'--low_count_threshold\': \'$VIASH_PAR_LOW_COUNT_THRESHOLD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LOW_COUNT_THRESHOLD="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --low_count_threshold. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --low_count_threshold=*) + [ -n "$VIASH_PAR_LOW_COUNT_THRESHOLD" ] && ViashError Bad arguments for option \'--low_count_threshold=*\': \'$VIASH_PAR_LOW_COUNT_THRESHOLD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LOW_COUNT_THRESHOLD=$(ViashRemoveFlags "$1") + shift 1 + ;; + --z_dim) + [ -n "$VIASH_PAR_Z_DIM" ] && ViashError Bad arguments for option \'--z_dim\': \'$VIASH_PAR_Z_DIM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_Z_DIM="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --z_dim. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --z_dim=*) + [ -n "$VIASH_PAR_Z_DIM" ] && ViashError Bad arguments for option \'--z_dim=*\': \'$VIASH_PAR_Z_DIM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_Z_DIM=$(ViashRemoveFlags "$1") + shift 1 + ;; + --z_layers) + if [ -z "$VIASH_PAR_Z_LAYERS" ]; then + VIASH_PAR_Z_LAYERS="$2" + else + VIASH_PAR_Z_LAYERS="$VIASH_PAR_Z_LAYERS:""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --z_layers. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --z_layers=*) + if [ -z "$VIASH_PAR_Z_LAYERS" ]; then + VIASH_PAR_Z_LAYERS=$(ViashRemoveFlags "$1") + else + VIASH_PAR_Z_LAYERS="$VIASH_PAR_Z_LAYERS:"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --training_fraction) + [ -n "$VIASH_PAR_TRAINING_FRACTION" ] && ViashError Bad arguments for option \'--training_fraction\': \'$VIASH_PAR_TRAINING_FRACTION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRAINING_FRACTION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --training_fraction. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --training_fraction=*) + [ -n "$VIASH_PAR_TRAINING_FRACTION" ] && ViashError Bad arguments for option \'--training_fraction=*\': \'$VIASH_PAR_TRAINING_FRACTION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRAINING_FRACTION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --empty_drop_training_fraction) + [ -n "$VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION" ] && ViashError Bad arguments for option \'--empty_drop_training_fraction\': \'$VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --empty_drop_training_fraction. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --empty_drop_training_fraction=*) + [ -n "$VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION" ] && ViashError Bad arguments for option \'--empty_drop_training_fraction=*\': \'$VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --fpr) + if [ -z "$VIASH_PAR_FPR" ]; then + VIASH_PAR_FPR="$2" + else + VIASH_PAR_FPR="$VIASH_PAR_FPR:""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --fpr. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --fpr=*) + if [ -z "$VIASH_PAR_FPR" ]; then + VIASH_PAR_FPR=$(ViashRemoveFlags "$1") + else + VIASH_PAR_FPR="$VIASH_PAR_FPR:"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --exclude_antibody_capture) + [ -n "$VIASH_PAR_EXCLUDE_ANTIBODY_CAPTURE" ] && ViashError Bad arguments for option \'--exclude_antibody_capture\': \'$VIASH_PAR_EXCLUDE_ANTIBODY_CAPTURE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EXCLUDE_ANTIBODY_CAPTURE=true + shift 1 + ;; + --learning_rate) + [ -n "$VIASH_PAR_LEARNING_RATE" ] && ViashError Bad arguments for option \'--learning_rate\': \'$VIASH_PAR_LEARNING_RATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LEARNING_RATE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --learning_rate. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --learning_rate=*) + [ -n "$VIASH_PAR_LEARNING_RATE" ] && ViashError Bad arguments for option \'--learning_rate=*\': \'$VIASH_PAR_LEARNING_RATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LEARNING_RATE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --cuda) + [ -n "$VIASH_PAR_CUDA" ] && ViashError Bad arguments for option \'--cuda\': \'$VIASH_PAR_CUDA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CUDA=true + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/correction_cellbender_remove_background_v0_2:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/correction_cellbender_remove_background_v0_2:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/correction_cellbender_remove_background_v0_2:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/correction_cellbender_remove_background_v0_2:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_MODALITY+x} ]; then + VIASH_PAR_MODALITY="rna" +fi +if [ -z ${VIASH_PAR_LAYER_OUTPUT+x} ]; then + VIASH_PAR_LAYER_OUTPUT="corrected" +fi +if [ -z ${VIASH_PAR_OBS_LATENT_RT_EFFICIENCY+x} ]; then + VIASH_PAR_OBS_LATENT_RT_EFFICIENCY="latent_rt_efficiency" +fi +if [ -z ${VIASH_PAR_OBS_LATENT_CELL_PROBABILITY+x} ]; then + VIASH_PAR_OBS_LATENT_CELL_PROBABILITY="latent_cell_probability" +fi +if [ -z ${VIASH_PAR_OBS_LATENT_SCALE+x} ]; then + VIASH_PAR_OBS_LATENT_SCALE="latent_scale" +fi +if [ -z ${VIASH_PAR_VAR_AMBIENT_EXPRESSION+x} ]; then + VIASH_PAR_VAR_AMBIENT_EXPRESSION="ambient_expression" +fi +if [ -z ${VIASH_PAR_OBSM_LATENT_GENE_ENCODING+x} ]; then + VIASH_PAR_OBSM_LATENT_GENE_ENCODING="cellbender_latent_gene_encoding" +fi +if [ -z ${VIASH_PAR_EXPECTED_CELLS_FROM_QC+x} ]; then + VIASH_PAR_EXPECTED_CELLS_FROM_QC="true" +fi +if [ -z ${VIASH_PAR_MODEL+x} ]; then + VIASH_PAR_MODEL="full" +fi +if [ -z ${VIASH_PAR_EPOCHS+x} ]; then + VIASH_PAR_EPOCHS="150" +fi +if [ -z ${VIASH_PAR_LOW_COUNT_THRESHOLD+x} ]; then + VIASH_PAR_LOW_COUNT_THRESHOLD="15" +fi +if [ -z ${VIASH_PAR_Z_DIM+x} ]; then + VIASH_PAR_Z_DIM="100" +fi +if [ -z ${VIASH_PAR_Z_LAYERS+x} ]; then + VIASH_PAR_Z_LAYERS="500" +fi +if [ -z ${VIASH_PAR_TRAINING_FRACTION+x} ]; then + VIASH_PAR_TRAINING_FRACTION="0.9" +fi +if [ -z ${VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION+x} ]; then + VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION="0.5" +fi +if [ -z ${VIASH_PAR_FPR+x} ]; then + VIASH_PAR_FPR="0.01" +fi +if [ -z ${VIASH_PAR_EXCLUDE_ANTIBODY_CAPTURE+x} ]; then + VIASH_PAR_EXCLUDE_ANTIBODY_CAPTURE="false" +fi +if [ -z ${VIASH_PAR_CUDA+x} ]; then + VIASH_PAR_CUDA="false" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_EXPECTED_CELLS" ]]; then + if ! [[ "$VIASH_PAR_EXPECTED_CELLS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--expected_cells' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_TOTAL_DROPLETS_INCLUDED" ]]; then + if ! [[ "$VIASH_PAR_TOTAL_DROPLETS_INCLUDED" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--total_droplets_included' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_EXPECTED_CELLS_FROM_QC" ]]; then + if ! [[ "$VIASH_PAR_EXPECTED_CELLS_FROM_QC" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--expected_cells_from_qc' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_EPOCHS" ]]; then + if ! [[ "$VIASH_PAR_EPOCHS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--epochs' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_LOW_COUNT_THRESHOLD" ]]; then + if ! [[ "$VIASH_PAR_LOW_COUNT_THRESHOLD" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--low_count_threshold' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_Z_DIM" ]]; then + if ! [[ "$VIASH_PAR_Z_DIM" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--z_dim' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [ -n "$VIASH_PAR_Z_LAYERS" ]; then + IFS=':' + set -f + for val in $VIASH_PAR_Z_LAYERS; do + if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--z_layers' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + done + set +f + unset IFS +fi + +if [[ -n "$VIASH_PAR_TRAINING_FRACTION" ]]; then + if ! [[ "$VIASH_PAR_TRAINING_FRACTION" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--training_fraction' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION" ]]; then + if ! [[ "$VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--empty_drop_training_fraction' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [ -n "$VIASH_PAR_FPR" ]; then + IFS=':' + set -f + for val in $VIASH_PAR_FPR; do + if ! [[ "${val}" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--fpr' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi + done + set +f + unset IFS +fi + +if [[ -n "$VIASH_PAR_EXCLUDE_ANTIBODY_CAPTURE" ]]; then + if ! [[ "$VIASH_PAR_EXCLUDE_ANTIBODY_CAPTURE" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--exclude_antibody_capture' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_LEARNING_RATE" ]]; then + if ! [[ "$VIASH_PAR_LEARNING_RATE" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--learning_rate' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_CUDA" ]]; then + if ! [[ "$VIASH_PAR_CUDA" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--cuda' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +if [ ! -z "$VIASH_PAR_MODEL" ]; then + VIASH_PAR_MODEL_CHOICES=("simple:ambient:swapping:full") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_MODEL_CHOICES[*]}:" =~ ":$VIASH_PAR_MODEL:" ]]; then + ViashError '--model' specified value of \'$VIASH_PAR_MODEL\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/correction_cellbender_remove_background_v0_2:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/correction_cellbender_remove_background_v0_2:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/correction_cellbender_remove_background_v0_2:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-cellbender_remove_background_v0_2-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +import mudata as mu +import tempfile +import subprocess +import os +import sys +import numpy as np +from scipy.sparse import csr_matrix + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'layer_output': $( if [ ! -z ${VIASH_PAR_LAYER_OUTPUT+x} ]; then echo "r'${VIASH_PAR_LAYER_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'obs_latent_rt_efficiency': $( if [ ! -z ${VIASH_PAR_OBS_LATENT_RT_EFFICIENCY+x} ]; then echo "r'${VIASH_PAR_OBS_LATENT_RT_EFFICIENCY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'obs_latent_cell_probability': $( if [ ! -z ${VIASH_PAR_OBS_LATENT_CELL_PROBABILITY+x} ]; then echo "r'${VIASH_PAR_OBS_LATENT_CELL_PROBABILITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'obs_latent_scale': $( if [ ! -z ${VIASH_PAR_OBS_LATENT_SCALE+x} ]; then echo "r'${VIASH_PAR_OBS_LATENT_SCALE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'var_ambient_expression': $( if [ ! -z ${VIASH_PAR_VAR_AMBIENT_EXPRESSION+x} ]; then echo "r'${VIASH_PAR_VAR_AMBIENT_EXPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'obsm_latent_gene_encoding': $( if [ ! -z ${VIASH_PAR_OBSM_LATENT_GENE_ENCODING+x} ]; then echo "r'${VIASH_PAR_OBSM_LATENT_GENE_ENCODING//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'expected_cells': $( if [ ! -z ${VIASH_PAR_EXPECTED_CELLS+x} ]; then echo "int(r'${VIASH_PAR_EXPECTED_CELLS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'total_droplets_included': $( if [ ! -z ${VIASH_PAR_TOTAL_DROPLETS_INCLUDED+x} ]; then echo "int(r'${VIASH_PAR_TOTAL_DROPLETS_INCLUDED//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'expected_cells_from_qc': $( if [ ! -z ${VIASH_PAR_EXPECTED_CELLS_FROM_QC+x} ]; then echo "r'${VIASH_PAR_EXPECTED_CELLS_FROM_QC//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), + 'model': $( if [ ! -z ${VIASH_PAR_MODEL+x} ]; then echo "r'${VIASH_PAR_MODEL//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'epochs': $( if [ ! -z ${VIASH_PAR_EPOCHS+x} ]; then echo "int(r'${VIASH_PAR_EPOCHS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'low_count_threshold': $( if [ ! -z ${VIASH_PAR_LOW_COUNT_THRESHOLD+x} ]; then echo "int(r'${VIASH_PAR_LOW_COUNT_THRESHOLD//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'z_dim': $( if [ ! -z ${VIASH_PAR_Z_DIM+x} ]; then echo "int(r'${VIASH_PAR_Z_DIM//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'z_layers': $( if [ ! -z ${VIASH_PAR_Z_LAYERS+x} ]; then echo "list(map(int, r'${VIASH_PAR_Z_LAYERS//\'/\'\"\'\"r\'}'.split(':')))"; else echo None; fi ), + 'training_fraction': $( if [ ! -z ${VIASH_PAR_TRAINING_FRACTION+x} ]; then echo "float(r'${VIASH_PAR_TRAINING_FRACTION//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'empty_drop_training_fraction': $( if [ ! -z ${VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION+x} ]; then echo "float(r'${VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'fpr': $( if [ ! -z ${VIASH_PAR_FPR+x} ]; then echo "list(map(float, r'${VIASH_PAR_FPR//\'/\'\"\'\"r\'}'.split(':')))"; else echo None; fi ), + 'exclude_antibody_capture': $( if [ ! -z ${VIASH_PAR_EXCLUDE_ANTIBODY_CAPTURE+x} ]; then echo "r'${VIASH_PAR_EXCLUDE_ANTIBODY_CAPTURE//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), + 'learning_rate': $( if [ ! -z ${VIASH_PAR_LEARNING_RATE+x} ]; then echo "float(r'${VIASH_PAR_LEARNING_RATE//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'cuda': $( if [ ! -z ${VIASH_PAR_CUDA+x} ]; then echo "r'${VIASH_PAR_CUDA//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +## VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +from helper import anndata_from_h5 + +logger.info("Reading input mudata") +mdata = mu.read_h5mu(par["input"]) + +mod = par["modality"] +logger.info("Performing log transformation on modality %s", mod) +data = mdata.mod[mod] + +# with pathlib.Path(meta["temp_dir"]) / "cellbender" as temp_dir: +# os.mkdir(temp_dir) +with tempfile.TemporaryDirectory(prefix="cellbender-", dir=meta["temp_dir"]) as temp_dir: + # construct paths within tempdir + input_file = os.path.join(temp_dir, "input.h5ad") + output_file = os.path.join(temp_dir, "output.h5") + + logger.info("Creating AnnData input file for CellBender: '%s'", input_file) + data.write_h5ad(input_file) + + logger.info("Constructing CellBender command") + cmd_pars = [ + "cellbender", "remove-background", + "--input", input_file, + "--output", output_file + ] + + extra_args = [ + ("--expected-cells", "expected_cells", True), + ("--total-droplets-included", "total_droplets_included", True), + ("--model", "model", True), + ("--epochs", "epochs", True), + ("--cuda", "cuda", False), + ("--low-count-threshold", "low_count_threshold", True), + ("--z-dim", "z_dim", True), + ("--z-layers", "z_layers", True), + ("--training-fraction", "training_fraction", True), + ("--exclude-antibody-capture", "exclude_antibody_capture", False), + ("--learning-rate", "learning_rate", True), + ("--empty-drop-training-fraction", "empty_drop_training_fraction", True), + ] + for (flag, name, is_kwarg) in extra_args: + if par[name]: + values = par[name] if isinstance(par[name], list) else [par[name]] + cmd_pars += [flag] + [str(val) for val in values] if is_kwarg else [flag] + + if par["expected_cells_from_qc"] and "metrics_cellranger" in data.uns: + assert par["expected_cells"] is None, "If min_counts is defined, expected_cells should be undefined" + assert par["total_droplets_included"] is None, "If min_counts is defined, expected_cells should be undefined" + met = data.uns["metrics_cellranger"] + col_name = "Estimated Number of Cells" + assert col_name in met.columns, "%s should be a column in .obs[metrics_cellranger]" + est_cells = met[col_name].values[0] + logger.info("Selecting --expected-cells %d and --total-droplets-included %d", est_cells, est_cells * 5) + cmd_pars += ["--expected-cells", str(est_cells), "--total-droplets-included", str(5*est_cells)] + + logger.info("Running CellBender: '%s'", ' '.join(cmd_pars)) + out = subprocess.check_output(cmd_pars).decode("utf-8") + + logger.info("Reading CellBender 10xh5 output file: '%s'", output_file) + # have to use custom read_10x_h5 function for now + # will be fixed when https://github.com/scverse/scanpy/pull/2344 is merged + # adata_out = sc.read_10x_h5(output_file, gex_only=False) + adata_out = anndata_from_h5(output_file, analyzed_barcodes_only=False) + + logger.info("Copying X output to MuData") + data.layers[par["layer_output"]] = adata_out.X + + logger.info("Copying .obs output to MuData") + obs_store = { + "obs_latent_rt_efficiency": "latent_RT_efficiency", + "obs_latent_cell_probability": "latent_cell_probability", + "obs_latent_scale": "latent_scale" + } + for to_name, from_name in obs_store.items(): + if par[to_name]: + if from_name in adata_out.obs: + data.obs[par[to_name]] = adata_out.obs[from_name] + # when using unfiltered data, the values will be in uns instead of obs + elif from_name in adata_out.uns and 'barcode_indices_for_latents' in adata_out.uns: + vec = np.zeros(data.n_obs) + vec[adata_out.uns['barcode_indices_for_latents']] = adata_out.uns[from_name] + data.obs[par[to_name]] = vec + + logger.info("Copying .var output to MuData") + var_store = { "var_ambient_expression": "ambient_expression" } + for to_name, from_name in var_store.items(): + if par[to_name]: + data.var[par[to_name]] = adata_out.var[from_name] + + logger.info("Copying obsm_latent_gene_encoding output to MuData") + obsm_store = { "obsm_latent_gene_encoding": "latent_gene_encoding" } + for to_name, from_name in obsm_store.items(): + if par[to_name]: + if from_name in adata_out.obsm: + data.obsm[par[to_name]] = adata_out.obsm[from_name] + elif from_name in adata_out.uns and 'barcode_indices_for_latents' in adata_out.uns: + matrix_to_store = adata_out.uns[from_name] + number_of_obs = data.X.shape[0] + latent_space_sparse = csr_matrix((number_of_obs, par['z_dim']), + dtype=adata_out.uns[from_name].dtype) + obs_rows_in_space_representation = adata_out.uns['barcode_indices_for_latents'] + latent_space_sparse[obs_rows_in_space_representation] = adata_out.uns[from_name] + data.obsm[par[to_name]] = latent_space_sparse + else: + raise RuntimeError("Requested to save latent gene encoding, but the data is either missing " + "from cellbender output or in an incorrect format.") + + +logger.info("Writing to file %s", par["output"]) +mdata.write_h5mu(filename=par["output"], compression=par["output_compression"]) +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/correction/cellbender_remove_background_v0_2/helper.py b/target/docker/correction/cellbender_remove_background_v0_2/helper.py new file mode 100644 index 00000000000..479dd56f596 --- /dev/null +++ b/target/docker/correction/cellbender_remove_background_v0_2/helper.py @@ -0,0 +1,143 @@ +# This file is copied from https://github.com/broadinstitute/CellBender/issues/128#issuecomment-1175336065 +# to solve an issue with scanpy not being able to read in the 10x h5 files produced by cellbender. +# +# Note: If something doesn't work in this helper function, it may be interesting to +# take a look at the comments by Dries: https://github.com/openpipelines-bio/openpipeline/pull/115 +# I'm not going to apply them for now -- if it ain't broke, don't fix it. +import tables +import numpy as np +import scipy.sparse as sp +import anndata +from typing import Dict + + +def anndata_from_h5(file: str, + analyzed_barcodes_only: bool = True) -> 'anndata.AnnData': + """Load an output h5 file into an AnnData object for downstream work. + + Args: + file: The h5 file + analyzed_barcodes_only: False to load all barcodes, so that the size of + the AnnData object will match the size of the input raw count matrix. + True to load a limited set of barcodes: only those analyzed by the + algorithm. This allows relevant latent variables to be loaded + properly into adata.obs and adata.obsm, rather than adata.uns. + + Returns: + adata: The anndata object, populated with inferred latent variables + and metadata. + + """ + + d = dict_from_h5(file) + X = sp.csc_matrix((d.pop('data'), d.pop('indices'), d.pop('indptr')), + shape=d.pop('shape')).transpose().tocsr() + + # check and see if we have barcode index annotations, and if the file is filtered + barcode_key = [k for k in d.keys() if (('barcode' in k) and ('ind' in k))] + if len(barcode_key) > 0: + max_barcode_ind = d[barcode_key[0]].max() + filtered_file = (max_barcode_ind >= X.shape[0]) + else: + filtered_file = True + + if analyzed_barcodes_only: + if filtered_file: + # filtered file being read, so we don't need to subset + print('Assuming we are loading a "filtered" file that contains only cells.') + pass + elif 'barcode_indices_for_latents' in d.keys(): + X = X[d['barcode_indices_for_latents'], :] + d['barcodes'] = d['barcodes'][d['barcode_indices_for_latents']] + elif 'barcodes_analyzed_inds' in d.keys(): + X = X[d['barcodes_analyzed_inds'], :] + d['barcodes'] = d['barcodes'][d['barcodes_analyzed_inds']] + else: + print('Warning: analyzed_barcodes_only=True, but the key ' + '"barcodes_analyzed_inds" or "barcode_indices_for_latents" ' + 'is missing from the h5 file. ' + 'Will output all barcodes, and proceed as if ' + 'analyzed_barcodes_only=False') + + # Construct the anndata object. + adata = anndata.AnnData(X=X, + obs={'barcode': d.pop('barcodes').astype(str)}, + var={'gene_name': (d.pop('gene_names') if 'gene_names' in d.keys() + else d.pop('name')).astype(str)}, + dtype=X.dtype) + adata.obs.set_index('barcode', inplace=True) + adata.var.set_index('gene_name', inplace=True) + + # For CellRanger v2 legacy format, "gene_ids" was called "genes"... rename this + if 'genes' in d.keys(): + d['id'] = d.pop('genes') + + # For purely aesthetic purposes, rename "id" to "gene_id" + if 'id' in d.keys(): + d['gene_id'] = d.pop('id') + + # If genomes are empty, try to guess them based on gene_id + if 'genome' in d.keys(): + if np.array([s.decode() == '' for s in d['genome']]).all(): + if '_' in d['gene_id'][0].decode(): + print('Genome field blank, so attempting to guess genomes based on gene_id prefixes') + d['genome'] = np.array([s.decode().split('_')[0] for s in d['gene_id']], dtype=str) + + # Add other information to the anndata object in the appropriate slot. + _fill_adata_slots_automatically(adata, d) + + # Add a special additional field to .var if it exists. + if 'features_analyzed_inds' in adata.uns.keys(): + adata.var['cellbender_analyzed'] = [True if (i in adata.uns['features_analyzed_inds']) + else False for i in range(adata.shape[1])] + + if analyzed_barcodes_only: + for col in adata.obs.columns[adata.obs.columns.str.startswith('barcodes_analyzed') + | adata.obs.columns.str.startswith('barcode_indices')]: + try: + del adata.obs[col] + except Exception: + pass + else: + # Add a special additional field to .obs if all barcodes are included. + if 'barcodes_analyzed_inds' in adata.uns.keys(): + adata.obs['cellbender_analyzed'] = [True if (i in adata.uns['barcodes_analyzed_inds']) + else False for i in range(adata.shape[0])] + + return adata + + +def dict_from_h5(file: str) -> Dict[str, np.ndarray]: + """Read in everything from an h5 file and put into a dictionary.""" + d = {} + with tables.open_file(file) as f: + # read in everything + for array in f.walk_nodes("/", "Array"): + d[array.name] = array.read() + return d + + +def _fill_adata_slots_automatically(adata, d): + """Add other information to the adata object in the appropriate slot.""" + + for key, value in d.items(): + try: + if value is None: + continue + value = np.asarray(value) + if len(value.shape) == 0: + adata.uns[key] = value + elif value.shape[0] == adata.shape[0]: + if (len(value.shape) < 2) or (value.shape[1] < 2): + adata.obs[key] = value + else: + adata.obsm[key] = value + elif value.shape[0] == adata.shape[1]: + if value.dtype.name.startswith('bytes'): + adata.var[key] = value.astype(str) + else: + adata.var[key] = value + else: + adata.uns[key] = value + except Exception: + print('Unable to load data into AnnData: ', key, value, type(value)) \ No newline at end of file diff --git a/target/docker/correction/cellbender_remove_background_v0_2/setup_logger.py b/target/docker/correction/cellbender_remove_background_v0_2/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/docker/correction/cellbender_remove_background_v0_2/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/docker/dataflow/concat/.config.vsh.yaml b/target/docker/dataflow/concat/.config.vsh.yaml new file mode 100644 index 00000000000..a8813a29f15 --- /dev/null +++ b/target/docker/dataflow/concat/.config.vsh.yaml @@ -0,0 +1,222 @@ +functionality: + name: "concat" + namespace: "dataflow" + version: "0.12.4" + authors: + - name: "Dries Schaumont" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Paths to the different samples to be concatenated." + info: null + example: + - "sample_paths" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: true + multiple_sep: "," + dest: "par" + - type: "string" + name: "--input_id" + description: "Names of the different samples that have to be concatenated. Must\ + \ be specified when using '--mode move'.\nIn this case, the ids will be used\ + \ for the columns names of the dataframes registring the conflicts.\nIf specified,\ + \ must be of same length as `--input`.\n" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: "," + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_sample_name" + description: "Name of the .obs key under which to add the sample names." + info: null + default: + - "sample_id" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--other_axis_mode" + description: "How to handle the merging of other axis (var, obs, ...).\n\n - None:\ + \ keep no data\n - same: only keep elements of the matrices which are the same\ + \ in each of the samples\n - unique: only keep elements for which there is only\ + \ 1 possible value (1 value that can occur in multiple samples)\n - first: keep\ + \ the annotation from the first sample\n - only: keep elements that show up\ + \ in only one of the objects (1 unique element in only 1 sample)\n - move: identical\ + \ to 'same', but moving the conflicting values to .varm or .obsm\n" + info: null + default: + - "move" + required: false + choices: + - "same" + - "unique" + - "first" + - "only" + - "concat" + - "move" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Concatenates several uni-modal samples in .h5mu files into a single\ + \ file.\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/concat_test_data/e18_mouse_brain_fresh_5k_filtered_feature_bc_matrix_subset_unique_obs.h5mu" + - type: "file" + path: "resources_test/concat_test_data/human_brain_3k_filtered_feature_bc_matrix_subset_unique_obs.h5mu" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "pandas~=2.1.1" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + - "muon" + upgrade: true + entrypoint: [] + cmd: null +- type: "native" + id: "native" +- type: "nextflow" + id: "nextflow" + directives: + label: + - "midcpu" + - "highmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/dataflow/concat/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/dataflow/concat" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/dataflow/concat/concat" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/dataflow/concat/concat b/target/docker/dataflow/concat/concat new file mode 100755 index 00000000000..bfa064a4f9b --- /dev/null +++ b/target/docker/dataflow/concat/concat @@ -0,0 +1,1386 @@ +#!/usr/bin/env bash + +# concat 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Dries Schaumont (maintainer) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="concat" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "concat 0.12.4" + echo "" + echo "Concatenates several uni-modal samples in .h5mu files into a single file." + echo "" + echo "Arguments:" + echo " -i, --input" + echo " type: file, required parameter, multiple values allowed, file must exist" + echo " example: sample_paths" + echo " Paths to the different samples to be concatenated." + echo "" + echo " --input_id" + echo " type: string, multiple values allowed" + echo " Names of the different samples that have to be concatenated. Must be" + echo " specified when using '--mode move'." + echo " In this case, the ids will be used for the columns names of the" + echo " dataframes registring the conflicts." + echo " If specified, must be of same length as \`--input\`." + echo "" + echo " -o, --output" + echo " type: file, output, file must exist" + echo " example: output.h5mu" + echo "" + echo " --output_compression" + echo " type: string" + echo " example: gzip" + echo " choices: [ gzip, lzf ]" + echo " The compression format to be used on the output h5mu object." + echo "" + echo " --obs_sample_name" + echo " type: string" + echo " default: sample_id" + echo " Name of the .obs key under which to add the sample names." + echo "" + echo " --other_axis_mode" + echo " type: string" + echo " default: move" + echo " choices: [ same, unique, first, only, concat, move ]" + echo " How to handle the merging of other axis (var, obs, ...)." + echo " - None: keep no data" + echo " - same: only keep elements of the matrices which are the same in each" + echo " of the samples" + echo " - unique: only keep elements for which there is only 1 possible value" + echo " (1 value that can occur in multiple samples)" + echo " - first: keep the annotation from the first sample" + echo " - only: keep elements that show up in only one of the objects (1 unique" + echo " element in only 1 sample)" + echo " - move: identical to 'same', but moving the conflicting values to .varm" + echo " or .obsm" +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM python:3.10-slim + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "pandas~=2.1.1" + +LABEL org.opencontainers.image.authors="Dries Schaumont" +LABEL org.opencontainers.image.description="Companion container for running component dataflow concat" +LABEL org.opencontainers.image.created="2024-01-31T09:08:34Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-concat-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "concat 0.12.4" + exit + ;; + --input) + if [ -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT="$2" + else + VIASH_PAR_INPUT="$VIASH_PAR_INPUT,""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + if [ -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + else + VIASH_PAR_INPUT="$VIASH_PAR_INPUT,"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + -i) + if [ -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT="$2" + else + VIASH_PAR_INPUT="$VIASH_PAR_INPUT,""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input_id) + if [ -z "$VIASH_PAR_INPUT_ID" ]; then + VIASH_PAR_INPUT_ID="$2" + else + VIASH_PAR_INPUT_ID="$VIASH_PAR_INPUT_ID,""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_id. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input_id=*) + if [ -z "$VIASH_PAR_INPUT_ID" ]; then + VIASH_PAR_INPUT_ID=$(ViashRemoveFlags "$1") + else + VIASH_PAR_INPUT_ID="$VIASH_PAR_INPUT_ID,"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --obs_sample_name) + [ -n "$VIASH_PAR_OBS_SAMPLE_NAME" ] && ViashError Bad arguments for option \'--obs_sample_name\': \'$VIASH_PAR_OBS_SAMPLE_NAME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBS_SAMPLE_NAME="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_sample_name. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obs_sample_name=*) + [ -n "$VIASH_PAR_OBS_SAMPLE_NAME" ] && ViashError Bad arguments for option \'--obs_sample_name=*\': \'$VIASH_PAR_OBS_SAMPLE_NAME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBS_SAMPLE_NAME=$(ViashRemoveFlags "$1") + shift 1 + ;; + --other_axis_mode) + [ -n "$VIASH_PAR_OTHER_AXIS_MODE" ] && ViashError Bad arguments for option \'--other_axis_mode\': \'$VIASH_PAR_OTHER_AXIS_MODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OTHER_AXIS_MODE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --other_axis_mode. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --other_axis_mode=*) + [ -n "$VIASH_PAR_OTHER_AXIS_MODE" ] && ViashError Bad arguments for option \'--other_axis_mode=*\': \'$VIASH_PAR_OTHER_AXIS_MODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OTHER_AXIS_MODE=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/dataflow_concat:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/dataflow_concat:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/dataflow_concat:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/dataflow_concat:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_OBS_SAMPLE_NAME+x} ]; then + VIASH_PAR_OBS_SAMPLE_NAME="sample_id" +fi +if [ -z ${VIASH_PAR_OTHER_AXIS_MODE+x} ]; then + VIASH_PAR_OTHER_AXIS_MODE="move" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ]; then + IFS=',' + set -f + for file in $VIASH_PAR_INPUT; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +if [ ! -z "$VIASH_PAR_OTHER_AXIS_MODE" ]; then + VIASH_PAR_OTHER_AXIS_MODE_CHOICES=("same:unique:first:only:concat:move") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OTHER_AXIS_MODE_CHOICES[*]}:" =~ ":$VIASH_PAR_OTHER_AXIS_MODE:" ]]; then + ViashError '--other_axis_mode' specified value of \'$VIASH_PAR_OTHER_AXIS_MODE\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_TEST_INPUT=() + IFS=',' + for var in $VIASH_PAR_INPUT; do + unset IFS + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$var")" ) + var=$(ViashAutodetectMount "$var") + VIASH_TEST_INPUT+=( "$var" ) + done + VIASH_PAR_INPUT=$(IFS=',' ; echo "${VIASH_TEST_INPUT[*]}") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/dataflow_concat:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/dataflow_concat:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/dataflow_concat:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-concat-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +from __future__ import annotations +import sys +import anndata +import mudata as mu +import pandas as pd +import numpy as np +from collections.abc import Iterable +from multiprocessing import Pool +from pathlib import Path +from h5py import File as H5File +from typing import Literal +import shutil + +### VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'.split(',')"; else echo None; fi ), + 'input_id': $( if [ ! -z ${VIASH_PAR_INPUT_ID+x} ]; then echo "r'${VIASH_PAR_INPUT_ID//\'/\'\"\'\"r\'}'.split(',')"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'obs_sample_name': $( if [ ! -z ${VIASH_PAR_OBS_SAMPLE_NAME+x} ]; then echo "r'${VIASH_PAR_OBS_SAMPLE_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'other_axis_mode': $( if [ ! -z ${VIASH_PAR_OTHER_AXIS_MODE+x} ]; then echo "r'${VIASH_PAR_OTHER_AXIS_MODE//\'/\'\"\'\"r\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +### VIASH END + +sys.path.append(meta["resources_dir"]) + +# START TEMPORARY WORKAROUND compress_h5mu +# reason: resources aren't available when using Nextflow fusion + +# from compress_h5mu import compress_h5mu +from h5py import Group, Dataset +from typing import Union +from functools import partial + +def compress_h5mu(input_path: Union[str, Path], + output_path: Union[str, Path], + compression: Union[Literal['gzip'], Literal['lzf']]): + input_path, output_path = str(input_path), str(output_path) + + def copy_attributes(in_object, out_object): + for key, value in in_object.attrs.items(): + out_object.attrs[key] = value + + def visit_path(output_h5: H5File, + compression: Union[Literal['gzip'], Literal['lzf']], + name: str, object: Union[Group, Dataset]): + if isinstance(object, Group): + new_group = output_h5.create_group(name) + copy_attributes(object, new_group) + elif isinstance(object, Dataset): + # Compression only works for non-scalar Dataset objects + # Scalar objects dont have a shape defined + if not object.compression and object.shape not in [None, ()]: + new_dataset = output_h5.create_dataset(name, data=object, compression=compression) + copy_attributes(object, new_dataset) + else: + output_h5.copy(object, name) + else: + raise NotImplementedError(f"Could not copy element {name}, " + f"type has not been implemented yet: {type(object)}") + + with H5File(input_path, 'r') as input_h5, H5File(output_path, 'w', userblock_size=512) as output_h5: + copy_attributes(input_h5, output_h5) + input_h5.visititems(partial(visit_path, output_h5, compression)) + + with open(input_path, "rb") as input_bytes: + # Mudata puts metadata like this in the first 512 bytes: + # MuData (format-version=0.1.0;creator=muon;creator-version=0.2.0) + # See mudata/_core/io.py, read_h5mu() function + starting_metadata = input_bytes.read(100) + # The metadata is padded with extra null bytes up until 512 bytes + truncate_location = starting_metadata.find(b"\\x00") + starting_metadata = starting_metadata[:truncate_location] + with open(output_path, "br+") as f: + nbytes = f.write(starting_metadata) + f.write(b"\\0" * (512 - nbytes)) +# END TEMPORARY WORKAROUND compress_h5mu + +# START TEMPORARY WORKAROUND setup_logger +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +def indexes_unique(indices: Iterable[pd.Index]) -> bool: + combined_indices = indices[0].append(indices[1:]) + return combined_indices.is_unique + +def check_observations_unique(samples: Iterable[anndata.AnnData]) -> None: + observation_ids = [sample.obs.index for sample in samples] + if not indexes_unique(observation_ids): + raise ValueError("Observations are not unique across samples.") + + +def nunique(row): + unique = pd.unique(row) + unique_without_na = pd.core.dtypes.missing.remove_na_arraylike(unique) + return len(unique_without_na) > 1 + +def any_row_contains_duplicate_values(n_processes: int, frame: pd.DataFrame) -> bool: + """ + Check if any row contains duplicate values, that are not NA. + """ + numpy_array = frame.to_numpy() + with Pool(n_processes) as pool: + is_duplicated = pool.map(nunique, iter(numpy_array)) + return any(is_duplicated) + +def concatenate_matrices(n_processes: int, matrices: dict[str, pd.DataFrame], align_to: pd.Index | None) \\ + -> tuple[dict[str, pd.DataFrame], pd.DataFrame | None, dict[str, pd.core.dtypes.dtypes.Dtype]]: + """ + Merge matrices by combining columns that have the same name. + Columns that contain conflicting values (e.i. the columns have different values), + are not merged, but instead moved to a new dataframe. + """ + column_names = set(column_name for var in matrices.values() for column_name in var) + logger.debug('Trying to concatenate columns: %s.', ",".join(column_names)) + if not column_names: + return {}, pd.DataFrame(index=align_to) + conflicts, concatenated_matrix = \\ + split_conflicts_and_concatenated_columns(n_processes, + matrices, + column_names, + align_to) + concatenated_matrix = cast_to_writeable_dtype(concatenated_matrix) + conflicts = {conflict_name: cast_to_writeable_dtype(conflict_df) + for conflict_name, conflict_df in conflicts.items()} + return conflicts, concatenated_matrix + +def get_first_non_na_value_vector(df): + numpy_arr = df.to_numpy() + n_rows, n_cols = numpy_arr.shape + col_index = pd.isna(numpy_arr).argmin(axis=1) + flat_index = n_cols * np.arange(n_rows) + col_index + return pd.Series(numpy_arr.ravel()[flat_index], index=df.index, name=df.columns[0]) + +def split_conflicts_and_concatenated_columns(n_processes: int, + matrices: dict[str, pd.DataFrame], + column_names: Iterable[str], + align_to: pd.Index | None = None) -> \\ + tuple[dict[str, pd.DataFrame], pd.DataFrame]: + """ + Retrieve columns with the same name from a list of dataframes which are + identical across all the frames (ignoring NA values). + Columns which are not the same are regarded as 'conflicts', + which are stored in seperate dataframes, one per columns + with the same name that store conflicting values. + """ + conflicts = {} + concatenated_matrix = [] + for column_name in column_names: + columns = {input_id: var[column_name] + for input_id, var in matrices.items() + if column_name in var} + assert columns, "Some columns should have been found." + concatenated_columns = pd.concat(columns.values(), axis=1, + join="outer", sort=False) + if any_row_contains_duplicate_values(n_processes, concatenated_columns): + concatenated_columns.columns = columns.keys() # Use the sample id as column name + if align_to is not None: + concatenated_columns = concatenated_columns.reindex(align_to, copy=False) + conflicts[f'conflict_{column_name}'] = concatenated_columns + else: + unique_values = get_first_non_na_value_vector(concatenated_columns) + concatenated_matrix.append(unique_values) + if not concatenated_matrix: + return conflicts, pd.DataFrame(index=align_to) + concatenated_matrix = pd.concat(concatenated_matrix, join="outer", + axis=1, sort=False) + if align_to is not None: + concatenated_matrix = concatenated_matrix.reindex(align_to, copy=False) + return conflicts, concatenated_matrix + +def cast_to_writeable_dtype(result: pd.DataFrame) -> pd.DataFrame: + """ + Cast the dataframe to dtypes that can be written by mudata. + """ + # dtype inferral workfs better with np.nan + result = result.replace({pd.NA: np.nan}) + + # MuData supports nullable booleans and ints + # ie. \`IntegerArray\` and \`BooleanArray\` + result = result.convert_dtypes(infer_objects=True, + convert_integer=True, + convert_string=False, + convert_boolean=True, + convert_floating=False) + + # Convert leftover 'object' columns to string + # However, na values are supported, so convert all values except NA's to string + object_cols = result.select_dtypes(include='object').columns.values + for obj_col in object_cols: + result[obj_col] = result[obj_col].where(result[obj_col].isna(), result[obj_col].astype(str)).astype('category') + return result + +def split_conflicts_modalities(n_processes: int, samples: dict[str, anndata.AnnData], output: anndata.AnnData) \\ + -> anndata.AnnData: + """ + Merge .var and .obs matrices of the anndata objects. Columns are merged + when the values (excl NA) are the same in each of the matrices. + Conflicting columns are moved to a separate dataframe (one dataframe for each column, + containing all the corresponding column from each sample). + """ + matrices_to_parse = ("var", "obs") + for matrix_name in matrices_to_parse: + matrices = {sample_id: getattr(sample, matrix_name) for sample_id, sample in samples.items()} + output_index = getattr(output, matrix_name).index + align_to = output_index if matrix_name == "var" else None + conflicts, concatenated_matrix = concatenate_matrices(n_processes, matrices, align_to) + if concatenated_matrix.empty: + concatenated_matrix.index = output_index + # Write the conflicts to the output + for conflict_name, conflict_data in conflicts.items(): + getattr(output, f"{matrix_name}m")[conflict_name] = conflict_data + + # Set other annotation matrices in the output + setattr(output, matrix_name, concatenated_matrix) + + return output + + +def concatenate_modality(n_processes: int, mod: str, input_files: Iterable[str | Path], + other_axis_mode: str, input_ids: tuple[str]) -> anndata.AnnData: + + concat_modes = { + "move": None, + } + other_axis_mode_to_apply = concat_modes.get(other_axis_mode, other_axis_mode) + + mod_data = {} + for input_id, input_file in zip(input_ids, input_files): + try: + mod_data[input_id] = mu.read_h5ad(input_file, mod=mod) + except KeyError as e: # Modality does not exist for this sample, skip it + if f"Unable to open object '{mod}' doesn't exist" not in str(e): + raise e + pass + check_observations_unique(mod_data.values()) + + concatenated_data = anndata.concat(mod_data.values(), join='outer', merge=other_axis_mode_to_apply) + + if other_axis_mode == "move": + concatenated_data = split_conflicts_modalities(n_processes, mod_data, concatenated_data) + + return concatenated_data + +def concatenate_modalities(n_processes: int, modalities: list[str], input_files: Path | str, + other_axis_mode: str, output_file: Path | str, + compression: Literal['gzip'] | Literal['lzf'], + input_ids: tuple[str] | None = None) -> None: + """ + Join the modalities together into a single multimodal sample. + """ + logger.info('Concatenating samples.') + output_file, input_files = Path(output_file), [Path(input_file) for input_file in input_files] + output_file_uncompressed = output_file.with_name(output_file.stem + "_uncompressed.h5mu") + output_file_uncompressed.touch() + # Create empty mudata file + mdata = mu.MuData({modality: anndata.AnnData() for modality in modalities}) + mdata.write(output_file_uncompressed, compression=compression) + + for mod_name in modalities: + new_mod = concatenate_modality(n_processes, mod_name, + input_files, other_axis_mode, + input_ids) + logger.info("Writing out modality '%s' to '%s' with compression '%s'.", + mod_name, output_file_uncompressed, compression) + mu.write_h5ad(output_file_uncompressed, data=new_mod, mod=mod_name) + + if compression: + compress_h5mu(output_file_uncompressed, output_file, compression=compression) + output_file_uncompressed.unlink() + else: + shutil.move(output_file_uncompressed, output_file) + + logger.info("Concatenation successful.") + +def main() -> None: + # Get a list of all possible modalities + mods = set() + for path in par["input"]: + try: + with H5File(path, 'r') as f_root: + mods = mods | set(f_root["mod"].keys()) + except OSError: + raise OSError(f"Failed to load {path}. Is it a valid h5 file?") + + input_ids = None + if par["input_id"]: + input_ids: tuple[str] = tuple(i.strip() for i in par["input_id"]) + if len(input_ids) != len(par["input"]): + raise ValueError("The number of sample names must match the number of sample files.") + + if len(set(input_ids)) != len(input_ids): + raise ValueError("The sample names should be unique.") + + logger.info("\\nConcatenating data from paths:\\n\\t%s", + "\\n\\t".join(par["input"])) + + if par["other_axis_mode"] == "move" and not input_ids: + raise ValueError("--mode 'move' requires --input_ids.") + + n_processes = meta["cpus"] if meta["cpus"] else 1 + concatenate_modalities(n_processes, + list(mods), + par["input"], + par["other_axis_mode"], + par["output"], + par["output_compression"], + input_ids=input_ids) + + +if __name__ == "__main__": + main() +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + unset VIASH_TEST_INPUT + IFS=',' + for var in $VIASH_PAR_INPUT; do + unset IFS + if [ -z "$VIASH_TEST_INPUT" ]; then + VIASH_TEST_INPUT="$(ViashStripAutomount "$var")" + else + VIASH_TEST_INPUT="$VIASH_TEST_INPUT,""$(ViashStripAutomount "$var")" + fi + done + VIASH_PAR_INPUT="$VIASH_TEST_INPUT" +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/dataflow/concat/setup_logger.py b/target/docker/dataflow/concat/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/docker/dataflow/concat/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/docker/dataflow/merge/.config.vsh.yaml b/target/docker/dataflow/merge/.config.vsh.yaml new file mode 100644 index 00000000000..7b965881161 --- /dev/null +++ b/target/docker/dataflow/merge/.config.vsh.yaml @@ -0,0 +1,175 @@ +functionality: + name: "merge" + namespace: "dataflow" + version: "0.12.4" + authors: + - name: "Dries Schaumont" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Paths to the single-modality .h5mu files that need to be combined" + info: null + default: + - "sample_paths" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: true + multiple_sep: "," + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Path to the output file." + info: null + default: + - "output.h5mu" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Combine one or more single-modality .h5mu files together into one\ + \ .h5mu file.\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "../../../resources_test/merge_test_data/pbmc_1k_protein_v3_filtered_feature_bc_matrix_rna.h5mu" + - type: "file" + path: "../../../resources_test/merge_test_data/pbmc_1k_protein_v3_filtered_feature_bc_matrix_prot.h5mu" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "pandas~=2.0.0" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "native" + id: "native" +- type: "nextflow" + id: "nextflow" + directives: + label: + - "singlecpu" + - "highmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/dataflow/merge/config.vsh.yml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/dataflow/merge" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/dataflow/merge/merge" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/dataflow/merge/merge b/target/docker/dataflow/merge/merge new file mode 100755 index 00000000000..19c4b37af72 --- /dev/null +++ b/target/docker/dataflow/merge/merge @@ -0,0 +1,1051 @@ +#!/usr/bin/env bash + +# merge 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Dries Schaumont (maintainer) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="merge" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "merge 0.12.4" + echo "" + echo "Combine one or more single-modality .h5mu files together into one .h5mu file." + echo "" + echo "Arguments:" + echo " -i, --input" + echo " type: file, required parameter, multiple values allowed, file must exist" + echo " default: sample_paths" + echo " Paths to the single-modality .h5mu files that need to be combined" + echo "" + echo " -o, --output" + echo " type: file, output, file must exist" + echo " default: output.h5mu" + echo " Path to the output file." + echo "" + echo " --output_compression" + echo " type: string" + echo " example: gzip" + echo " choices: [ gzip, lzf ]" + echo " The compression format to be used on the output h5mu object." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM python:3.10-slim + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "pandas~=2.0.0" + +LABEL org.opencontainers.image.authors="Dries Schaumont" +LABEL org.opencontainers.image.description="Companion container for running component dataflow merge" +LABEL org.opencontainers.image.created="2024-01-31T09:08:35Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-merge-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "merge 0.12.4" + exit + ;; + --input) + if [ -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT="$2" + else + VIASH_PAR_INPUT="$VIASH_PAR_INPUT,""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + if [ -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + else + VIASH_PAR_INPUT="$VIASH_PAR_INPUT,"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + -i) + if [ -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT="$2" + else + VIASH_PAR_INPUT="$VIASH_PAR_INPUT,""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/dataflow_merge:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/dataflow_merge:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/dataflow_merge:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/dataflow_merge:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + VIASH_PAR_OUTPUT="output.h5mu" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ]; then + IFS=',' + set -f + for file in $VIASH_PAR_INPUT; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_TEST_INPUT=() + IFS=',' + for var in $VIASH_PAR_INPUT; do + unset IFS + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$var")" ) + var=$(ViashAutodetectMount "$var") + VIASH_TEST_INPUT+=( "$var" ) + done + VIASH_PAR_INPUT=$(IFS=',' ; echo "${VIASH_TEST_INPUT[*]}") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/dataflow_merge:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/dataflow_merge:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/dataflow_merge:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-merge-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +from __future__ import annotations +import sys +import mudata as md +import pandas as pd +import numpy as np + + +### VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'.split(',')"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +### VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +def main(): + logger.info('Reading input files %s', ",".join(par["input"])) + input_samples = [md.read_h5mu(path) for path in par["input"]] + + logger.info('Merging into single object.') + sample_modalities = {} + for input_sample in input_samples: + for mod_name, mod_data in input_sample.mod.items(): + if mod_name in sample_modalities: + raise ValueError(f"Modality '{mod_name}' was found in more than 1 sample.") + sample_modalities[mod_name] = mod_data + + merged = md.MuData(sample_modalities) + merged.update() + for df_attr in ("var", "obs"): + df = getattr(merged, df_attr) + df = df.replace({pd.NA: np.nan}, inplace=False) + + # MuData supports nullable booleans and ints + # ie. \`IntegerArray\` and \`BooleanArray\` + df = df.convert_dtypes(infer_objects=True, + convert_integer=True, + convert_string=False, + convert_boolean=True, + convert_floating=False) + + # Convert leftover 'object' columns to string + object_cols = df.select_dtypes(include='object').columns.values + for obj_col in object_cols: + df[obj_col].astype(str).astype('category') + setattr(merged, df_attr, df) + + merged.write_h5mu(par["output"], compression=par["output_compression"]) + logger.info('Finished') + + +if __name__ == '__main__': + main() +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + unset VIASH_TEST_INPUT + IFS=',' + for var in $VIASH_PAR_INPUT; do + unset IFS + if [ -z "$VIASH_TEST_INPUT" ]; then + VIASH_TEST_INPUT="$(ViashStripAutomount "$var")" + else + VIASH_TEST_INPUT="$VIASH_TEST_INPUT,""$(ViashStripAutomount "$var")" + fi + done + VIASH_PAR_INPUT="$VIASH_TEST_INPUT" +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/dataflow/merge/setup_logger.py b/target/docker/dataflow/merge/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/docker/dataflow/merge/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/docker/dataflow/split_modalities/.config.vsh.yaml b/target/docker/dataflow/split_modalities/.config.vsh.yaml new file mode 100644 index 00000000000..0a83aef3c14 --- /dev/null +++ b/target/docker/dataflow/split_modalities/.config.vsh.yaml @@ -0,0 +1,214 @@ +functionality: + name: "split_modalities" + namespace: "dataflow" + version: "0.12.4" + authors: + - name: "Dries Schaumont" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + - name: "Robrecht Cannoodt" + roles: + - "contributor" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Path to a single .h5mu file." + info: null + default: + - "sample_path" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output directory containing multiple h5mu files." + info: null + example: + - "/path/to/output" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output_types" + description: "A csv containing the base filename and modality type per output\ + \ file." + info: null + example: + - "types.csv" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--compression" + description: "The compression format to be used on the final h5mu object." + info: null + default: + - "gzip" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Split the modalities from a single .h5mu multimodal sample into seperate\ + \ .h5mu files. \n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "native" + id: "native" +- type: "nextflow" + id: "nextflow" + directives: + label: + - "singlecpu" + - "lowmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/dataflow/split_modalities/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/dataflow/split_modalities" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/dataflow/split_modalities/split_modalities" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/dataflow/split_modalities/setup_logger.py b/target/docker/dataflow/split_modalities/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/docker/dataflow/split_modalities/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/docker/dataflow/split_modalities/split_modalities b/target/docker/dataflow/split_modalities/split_modalities new file mode 100755 index 00000000000..9d998fc08d3 --- /dev/null +++ b/target/docker/dataflow/split_modalities/split_modalities @@ -0,0 +1,1065 @@ +#!/usr/bin/env bash + +# split_modalities 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Dries Schaumont (maintainer) +# * Robrecht Cannoodt (contributor) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="split_modalities" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "split_modalities 0.12.4" + echo "" + echo "Split the modalities from a single .h5mu multimodal sample into seperate .h5mu" + echo "files." + echo "" + echo "Arguments:" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " default: sample_path" + echo " Path to a single .h5mu file." + echo "" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " example: /path/to/output" + echo " Output directory containing multiple h5mu files." + echo "" + echo " --output_compression" + echo " type: string" + echo " example: gzip" + echo " choices: [ gzip, lzf ]" + echo " The compression format to be used on the output h5mu object." + echo "" + echo " --output_types" + echo " type: file, required parameter, output, file must exist" + echo " example: types.csv" + echo " A csv containing the base filename and modality type per output file." + echo "" + echo " --compression" + echo " type: string" + echo " default: gzip" + echo " The compression format to be used on the final h5mu object." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM python:3.10-slim + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" + +LABEL org.opencontainers.image.authors="Dries Schaumont, Robrecht Cannoodt" +LABEL org.opencontainers.image.description="Companion container for running component dataflow split_modalities" +LABEL org.opencontainers.image.created="2024-01-31T09:08:35Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-split_modalities-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "split_modalities 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_types) + [ -n "$VIASH_PAR_OUTPUT_TYPES" ] && ViashError Bad arguments for option \'--output_types\': \'$VIASH_PAR_OUTPUT_TYPES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_TYPES="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_types. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_types=*) + [ -n "$VIASH_PAR_OUTPUT_TYPES" ] && ViashError Bad arguments for option \'--output_types=*\': \'$VIASH_PAR_OUTPUT_TYPES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_TYPES=$(ViashRemoveFlags "$1") + shift 1 + ;; + --compression) + [ -n "$VIASH_PAR_COMPRESSION" ] && ViashError Bad arguments for option \'--compression\': \'$VIASH_PAR_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --compression=*) + [ -n "$VIASH_PAR_COMPRESSION" ] && ViashError Bad arguments for option \'--compression=*\': \'$VIASH_PAR_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/dataflow_split_modalities:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/dataflow_split_modalities:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/dataflow_split_modalities:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/dataflow_split_modalities:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT_TYPES+x} ]; then + ViashError '--output_types' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_COMPRESSION+x} ]; then + VIASH_PAR_COMPRESSION="gzip" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi +if [ ! -z "$VIASH_PAR_OUTPUT_TYPES" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_TYPES")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_TYPES")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_PAR_OUTPUT_TYPES" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT_TYPES")" ) + VIASH_PAR_OUTPUT_TYPES=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT_TYPES") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_TYPES" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/dataflow_split_modalities:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/dataflow_split_modalities:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/dataflow_split_modalities:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-split_modalities-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +from __future__ import annotations +import sys +import mudata as md +from sys import stdout +from pathlib import Path +import pandas as pd + +### VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_types': $( if [ ! -z ${VIASH_PAR_OUTPUT_TYPES+x} ]; then echo "r'${VIASH_PAR_OUTPUT_TYPES//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'compression': $( if [ ! -z ${VIASH_PAR_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +### VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +def main() -> None: + output_dir = Path(par["output"]) + if not output_dir.is_dir(): + output_dir.mkdir(parents=True) + + logger.info('Reading input file %s', par['input']) + sample = md.read_h5mu(par["input"].strip()) + input_file = Path(par["input"]) + + logger.info('Creating output types csv') + + names = {mod_name: f"{input_file.stem}_{mod_name}.h5mu" + for mod_name in sample.mod.keys() } + df = pd.DataFrame({"name": list(names.keys()), "filename": list(names.values())}) + df.to_csv(par["output_types"], index=False) + + logger.info('Splitting up modalities %s', ", ".join(sample.mod.keys())) + for mod_name, mod in sample.mod.items(): + new_sample = md.MuData({mod_name: mod}) + logger.info('Writing to %s', names[mod_name]) + new_sample.write_h5mu(output_dir / names[mod_name], compression=par["output_compression"]) + + logger.info("Finished") + + +if __name__ == "__main__": + main() +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT_TYPES" ]; then + VIASH_PAR_OUTPUT_TYPES=$(ViashStripAutomount "$VIASH_PAR_OUTPUT_TYPES") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_OUTPUT_TYPES" ] && [ ! -e "$VIASH_PAR_OUTPUT_TYPES" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT_TYPES' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/demux/bcl2fastq/.config.vsh.yaml b/target/docker/demux/bcl2fastq/.config.vsh.yaml new file mode 100644 index 00000000000..1ac741f2011 --- /dev/null +++ b/target/docker/demux/bcl2fastq/.config.vsh.yaml @@ -0,0 +1,169 @@ +functionality: + name: "bcl2fastq" + namespace: "demux" + version: "0.12.4" + authors: + - name: "Toni Verbeiren" + roles: + - "author" + - "maintainer" + info: + role: "Core Team Member" + links: + github: "tverbeiren" + linkedin: "verbeiren" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist and CEO" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + - "--runfolder_dir" + description: "Input run directory" + info: null + example: + - "bcl_dir" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--sample_sheet" + alternatives: + - "-s" + description: "Pointer to the sample sheet" + info: null + example: + - "SampleSheet.csv" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output directory containig fastq files" + info: null + example: + - "fastq_dir" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--reports" + description: "Reports directory" + info: null + example: + - "reports_dir" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--ignore_missing" + info: null + direction: "input" + dest: "par" + resources: + - type: "bash_script" + path: "script.sh" + is_executable: true + description: "Convert bcl files to fastq files using bcl2fastq.\n" + test_resources: + - type: "bash_script" + path: "test.sh" + is_executable: true + - type: "file" + path: "resources_test/cellranger_tiny_bcl/bcl" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "ghcr.io/data-intuitive/bcl2fastq:2.20" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "midmem" + - "midcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/demux/bcl2fastq/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/demux/bcl2fastq" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/demux/bcl2fastq/bcl2fastq" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/demux/bcl2fastq/bcl2fastq b/target/docker/demux/bcl2fastq/bcl2fastq new file mode 100755 index 00000000000..9adacf6733f --- /dev/null +++ b/target/docker/demux/bcl2fastq/bcl2fastq @@ -0,0 +1,1028 @@ +#!/usr/bin/env bash + +# bcl2fastq 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Toni Verbeiren (author, maintainer) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="bcl2fastq" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "bcl2fastq 0.12.4" + echo "" + echo "Convert bcl files to fastq files using bcl2fastq." + echo "" + echo "Arguments:" + echo " -i, --runfolder_dir, --input" + echo " type: file, required parameter, file must exist" + echo " example: bcl_dir" + echo " Input run directory" + echo "" + echo " -s, --sample_sheet" + echo " type: file, required parameter, file must exist" + echo " example: SampleSheet.csv" + echo " Pointer to the sample sheet" + echo "" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " example: fastq_dir" + echo " Output directory containig fastq files" + echo "" + echo " --reports" + echo " type: file, output, file must exist" + echo " example: reports_dir" + echo " Reports directory" + echo "" + echo " --ignore_missing" + echo " type: boolean_true" +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM ghcr.io/data-intuitive/bcl2fastq:2.20 + +ENTRYPOINT [] + + +RUN : +LABEL org.opencontainers.image.authors="Toni Verbeiren" +LABEL org.opencontainers.image.description="Companion container for running component demux bcl2fastq" +LABEL org.opencontainers.image.created="2024-01-31T09:08:33Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-bcl2fastq-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "bcl2fastq 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --runfolder_dir) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--runfolder_dir\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --runfolder_dir. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sample_sheet) + [ -n "$VIASH_PAR_SAMPLE_SHEET" ] && ViashError Bad arguments for option \'--sample_sheet\': \'$VIASH_PAR_SAMPLE_SHEET\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SAMPLE_SHEET="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sample_sheet. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sample_sheet=*) + [ -n "$VIASH_PAR_SAMPLE_SHEET" ] && ViashError Bad arguments for option \'--sample_sheet=*\': \'$VIASH_PAR_SAMPLE_SHEET\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SAMPLE_SHEET=$(ViashRemoveFlags "$1") + shift 1 + ;; + -s) + [ -n "$VIASH_PAR_SAMPLE_SHEET" ] && ViashError Bad arguments for option \'-s\': \'$VIASH_PAR_SAMPLE_SHEET\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SAMPLE_SHEET="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -s. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reports) + [ -n "$VIASH_PAR_REPORTS" ] && ViashError Bad arguments for option \'--reports\': \'$VIASH_PAR_REPORTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REPORTS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --reports. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reports=*) + [ -n "$VIASH_PAR_REPORTS" ] && ViashError Bad arguments for option \'--reports=*\': \'$VIASH_PAR_REPORTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REPORTS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --ignore_missing) + [ -n "$VIASH_PAR_IGNORE_MISSING" ] && ViashError Bad arguments for option \'--ignore_missing\': \'$VIASH_PAR_IGNORE_MISSING\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_IGNORE_MISSING=true + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/demux_bcl2fastq:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/demux_bcl2fastq:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/demux_bcl2fastq:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/demux_bcl2fastq:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_SAMPLE_SHEET+x} ]; then + ViashError '--sample_sheet' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_IGNORE_MISSING+x} ]; then + VIASH_PAR_IGNORE_MISSING="false" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_SAMPLE_SHEET" ] && [ ! -e "$VIASH_PAR_SAMPLE_SHEET" ]; then + ViashError "Input file '$VIASH_PAR_SAMPLE_SHEET' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_IGNORE_MISSING" ]]; then + if ! [[ "$VIASH_PAR_IGNORE_MISSING" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--ignore_missing' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi +if [ ! -z "$VIASH_PAR_REPORTS" ] && [ ! -d "$(dirname "$VIASH_PAR_REPORTS")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_REPORTS")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_SAMPLE_SHEET" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_SAMPLE_SHEET")" ) + VIASH_PAR_SAMPLE_SHEET=$(ViashAutodetectMount "$VIASH_PAR_SAMPLE_SHEET") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_PAR_REPORTS" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_REPORTS")" ) + VIASH_PAR_REPORTS=$(ViashAutodetectMount "$VIASH_PAR_REPORTS") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_REPORTS" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/demux_bcl2fastq:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/demux_bcl2fastq:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/demux_bcl2fastq:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-bcl2fastq-XXXXXX").sh +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_SAMPLE_SHEET+x} ]; then echo "${VIASH_PAR_SAMPLE_SHEET}" | sed "s#'#'\"'\"'#g;s#.*#par_sample_sheet='&'#" ; else echo "# par_sample_sheet="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_PAR_REPORTS+x} ]; then echo "${VIASH_PAR_REPORTS}" | sed "s#'#'\"'\"'#g;s#.*#par_reports='&'#" ; else echo "# par_reports="; fi ) +$( if [ ! -z ${VIASH_PAR_IGNORE_MISSING+x} ]; then echo "${VIASH_PAR_IGNORE_MISSING}" | sed "s#'#'\"'\"'#g;s#.*#par_ignore_missing='&'#" ; else echo "# par_ignore_missing="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) + +## VIASH END +#!/bin/bash + +set -exo pipefail + +extra_params=() + +# Handle reports stored separate +if [ ! -z "\$par_reports" ]; then + extra_params+=("--reports-dir" "\$par_reports") +fi + +# Handle the boolean flag +if [ "\$par_ignore_missing" == "true" ]; then + extra_params+=("--ignore-missing-control" "--ignore-missing-bcl" "--ignore-missing-filter") +fi + +# Run the actual command +bcl2fastq \\ + --runfolder-dir "\$par_input" \\ + --sample-sheet "\$par_sample_sheet" \\ + --output-dir "\$par_output" \\ + "\${extra_params[@]}" +VIASHMAIN +bash "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_SAMPLE_SHEET" ]; then + VIASH_PAR_SAMPLE_SHEET=$(ViashStripAutomount "$VIASH_PAR_SAMPLE_SHEET") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_PAR_REPORTS" ]; then + VIASH_PAR_REPORTS=$(ViashStripAutomount "$VIASH_PAR_REPORTS") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_REPORTS" ] && [ ! -e "$VIASH_PAR_REPORTS" ]; then + ViashError "Output file '$VIASH_PAR_REPORTS' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/demux/bcl_convert/.config.vsh.yaml b/target/docker/demux/bcl_convert/.config.vsh.yaml new file mode 100644 index 00000000000..334e9c2b201 --- /dev/null +++ b/target/docker/demux/bcl_convert/.config.vsh.yaml @@ -0,0 +1,189 @@ +functionality: + name: "bcl_convert" + namespace: "demux" + version: "0.12.4" + authors: + - name: "Toni Verbeiren" + roles: + - "author" + - "maintainer" + info: + role: "Core Team Member" + links: + github: "tverbeiren" + linkedin: "verbeiren" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist and CEO" + - name: "Marijke Van Moerbeke" + roles: + - "author" + info: + role: "Contributor" + links: + github: "mvanmoerbeke" + orcid: "0000-0002-3097-5621" + linkedin: "marijke-van-moerbeke-84303a34" + organizations: + - name: "OpenAnalytics" + href: "https://www.openanalytics.eu" + role: "Statistical Consultant" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input run directory" + info: null + example: + - "bcl_dir" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--sample_sheet" + alternatives: + - "-s" + description: "Pointer to the sample sheet" + info: null + example: + - "bcl_dir" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output directory containig fastq files" + info: null + example: + - "fastq_dir" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--reports" + description: "Reports directory" + info: null + example: + - "reports_dir" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean" + name: "--test_mode" + description: "Should bcl-convert be run in test mode (using --first-tile-only)?" + info: null + default: + - false + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "bash_script" + path: "script.sh" + is_executable: true + description: "Convert bcl files to fastq files using bcl-convert.\nInformation about\ + \ upgrading from bcl2fastq via\nhttps://emea.support.illumina.com/bulletins/2020/10/upgrading-from-bcl2fastq-to-bcl-convert.html\n\ + and https://support.illumina.com/sequencing/sequencing_software/bcl-convert/compatibility.html\n" + test_resources: + - type: "bash_script" + path: "test.sh" + is_executable: true + - type: "file" + path: "resources_test/cellranger_tiny_bcl/bcl2" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "ghcr.io/data-intuitive/bclconvert:3.10" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "midmem" + - "midcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/demux/bcl_convert/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/demux/bcl_convert" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/demux/bcl_convert/bcl_convert" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/demux/bcl_convert/bcl_convert b/target/docker/demux/bcl_convert/bcl_convert new file mode 100755 index 00000000000..4f669bf93fa --- /dev/null +++ b/target/docker/demux/bcl_convert/bcl_convert @@ -0,0 +1,1033 @@ +#!/usr/bin/env bash + +# bcl_convert 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Toni Verbeiren (author, maintainer) +# * Marijke Van Moerbeke (author) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="bcl_convert" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "bcl_convert 0.12.4" + echo "" + echo "Convert bcl files to fastq files using bcl-convert." + echo "Information about upgrading from bcl2fastq via" + echo "https://emea.support.illumina.com/bulletins/2020/10/upgrading-from-bcl2fastq-to-bcl-convert.html" + echo "and" + echo "https://support.illumina.com/sequencing/sequencing_software/bcl-convert/compatibility.html" + echo "" + echo "Arguments:" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " example: bcl_dir" + echo " Input run directory" + echo "" + echo " -s, --sample_sheet" + echo " type: file, required parameter, file must exist" + echo " example: bcl_dir" + echo " Pointer to the sample sheet" + echo "" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " example: fastq_dir" + echo " Output directory containig fastq files" + echo "" + echo " --reports" + echo " type: file, output, file must exist" + echo " example: reports_dir" + echo " Reports directory" + echo "" + echo " --test_mode" + echo " type: boolean" + echo " default: false" + echo " Should bcl-convert be run in test mode (using --first-tile-only)?" +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM ghcr.io/data-intuitive/bclconvert:3.10 + +ENTRYPOINT [] + + +RUN : +LABEL org.opencontainers.image.authors="Toni Verbeiren, Marijke Van Moerbeke" +LABEL org.opencontainers.image.description="Companion container for running component demux bcl_convert" +LABEL org.opencontainers.image.created="2024-01-31T09:08:33Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-bcl_convert-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "bcl_convert 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sample_sheet) + [ -n "$VIASH_PAR_SAMPLE_SHEET" ] && ViashError Bad arguments for option \'--sample_sheet\': \'$VIASH_PAR_SAMPLE_SHEET\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SAMPLE_SHEET="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sample_sheet. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sample_sheet=*) + [ -n "$VIASH_PAR_SAMPLE_SHEET" ] && ViashError Bad arguments for option \'--sample_sheet=*\': \'$VIASH_PAR_SAMPLE_SHEET\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SAMPLE_SHEET=$(ViashRemoveFlags "$1") + shift 1 + ;; + -s) + [ -n "$VIASH_PAR_SAMPLE_SHEET" ] && ViashError Bad arguments for option \'-s\': \'$VIASH_PAR_SAMPLE_SHEET\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SAMPLE_SHEET="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -s. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reports) + [ -n "$VIASH_PAR_REPORTS" ] && ViashError Bad arguments for option \'--reports\': \'$VIASH_PAR_REPORTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REPORTS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --reports. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reports=*) + [ -n "$VIASH_PAR_REPORTS" ] && ViashError Bad arguments for option \'--reports=*\': \'$VIASH_PAR_REPORTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REPORTS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --test_mode) + [ -n "$VIASH_PAR_TEST_MODE" ] && ViashError Bad arguments for option \'--test_mode\': \'$VIASH_PAR_TEST_MODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TEST_MODE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --test_mode. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --test_mode=*) + [ -n "$VIASH_PAR_TEST_MODE" ] && ViashError Bad arguments for option \'--test_mode=*\': \'$VIASH_PAR_TEST_MODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TEST_MODE=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/demux_bcl_convert:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/demux_bcl_convert:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/demux_bcl_convert:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/demux_bcl_convert:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_SAMPLE_SHEET+x} ]; then + ViashError '--sample_sheet' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_TEST_MODE+x} ]; then + VIASH_PAR_TEST_MODE="false" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_SAMPLE_SHEET" ] && [ ! -e "$VIASH_PAR_SAMPLE_SHEET" ]; then + ViashError "Input file '$VIASH_PAR_SAMPLE_SHEET' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_TEST_MODE" ]]; then + if ! [[ "$VIASH_PAR_TEST_MODE" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--test_mode' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi +if [ ! -z "$VIASH_PAR_REPORTS" ] && [ ! -d "$(dirname "$VIASH_PAR_REPORTS")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_REPORTS")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_SAMPLE_SHEET" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_SAMPLE_SHEET")" ) + VIASH_PAR_SAMPLE_SHEET=$(ViashAutodetectMount "$VIASH_PAR_SAMPLE_SHEET") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_PAR_REPORTS" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_REPORTS")" ) + VIASH_PAR_REPORTS=$(ViashAutodetectMount "$VIASH_PAR_REPORTS") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_REPORTS" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/demux_bcl_convert:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/demux_bcl_convert:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/demux_bcl_convert:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-bcl_convert-XXXXXX").sh +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +#!/bin/bash + +set -eo pipefail + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_SAMPLE_SHEET+x} ]; then echo "${VIASH_PAR_SAMPLE_SHEET}" | sed "s#'#'\"'\"'#g;s#.*#par_sample_sheet='&'#" ; else echo "# par_sample_sheet="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_PAR_REPORTS+x} ]; then echo "${VIASH_PAR_REPORTS}" | sed "s#'#'\"'\"'#g;s#.*#par_reports='&'#" ; else echo "# par_reports="; fi ) +$( if [ ! -z ${VIASH_PAR_TEST_MODE+x} ]; then echo "${VIASH_PAR_TEST_MODE}" | sed "s#'#'\"'\"'#g;s#.*#par_test_mode='&'#" ; else echo "# par_test_mode="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) + +## VIASH END + +[ -d "\$par_output" ] || mkdir -p "\$par_output" + +bcl-convert \\ + --force \\ + --bcl-input-directory "\$par_input" \\ + --output-directory "\$par_output" \\ + --sample-sheet "\$par_sample_sheet" \\ + --first-tile-only \$par_test_mode + +if [ ! -z "\$par_reports" ]; then + echo "Moving reports to its own location" + mv "\$par_output"/Reports "\$par_reports" +else + echo "Leaving reports alone" +fi +VIASHMAIN +bash "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_SAMPLE_SHEET" ]; then + VIASH_PAR_SAMPLE_SHEET=$(ViashStripAutomount "$VIASH_PAR_SAMPLE_SHEET") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_PAR_REPORTS" ]; then + VIASH_PAR_REPORTS=$(ViashStripAutomount "$VIASH_PAR_REPORTS") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_REPORTS" ] && [ ! -e "$VIASH_PAR_REPORTS" ]; then + ViashError "Output file '$VIASH_PAR_REPORTS' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/demux/cellranger_mkfastq/.config.vsh.yaml b/target/docker/demux/cellranger_mkfastq/.config.vsh.yaml new file mode 100644 index 00000000000..5da62b8341f --- /dev/null +++ b/target/docker/demux/cellranger_mkfastq/.config.vsh.yaml @@ -0,0 +1,207 @@ +functionality: + name: "cellranger_mkfastq" + namespace: "demux" + version: "0.12.4" + authors: + - name: "Angela Oliveira Pisco" + roles: + - "author" + info: + role: "Contributor" + links: + github: "aopisco" + orcid: "0000-0003-0142-2355" + linkedin: "aopisco" + organizations: + - name: "Insitro" + href: "https://insitro.com" + role: "Director of Computational Biology" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + - name: "Samuel D'Souza" + roles: + - "author" + info: + role: "Contributor" + links: + github: "srdsam" + linkedin: "samuel-d-souza-887023150/" + organizations: + - name: "Chan Zuckerberg Biohub" + href: "https://www.czbiohub.org" + role: "Data Engineer" + - name: "Robrecht Cannoodt" + roles: + - "author" + - "maintainer" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + arguments: + - type: "file" + name: "--input" + description: "Path to the (untarred) BCL files. Expects 'RunParameters.xml' at\ + \ './'." + info: null + example: + - "/path/to/bcl" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--sample_sheet" + description: "The path to the sample sheet." + info: null + example: + - "SampleSheet.csv" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + description: "The folder to store the demux results" + info: null + example: + - "/path/to/output" + default: + - "fastqs" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--reports" + description: "Reports directory" + info: null + example: + - "reports_dir" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "bash_script" + path: "script.sh" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Demultiplex raw sequencing data" + usage: "cellranger_mkfastq \\\n --input /path/to/bcl \\\n --sample_sheet SampleSheet.csv\ + \ \\\n --output /path/to/output\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/cellranger_tiny_bcl" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "ghcr.io/data-intuitive/cellranger:6.1" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "docker" + run: + - "apt-get update && apt-get upgrade -y" + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highmem" + - "highcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/demux/cellranger_mkfastq/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/demux/cellranger_mkfastq" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/demux/cellranger_mkfastq/cellranger_mkfastq" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/demux/cellranger_mkfastq/cellranger_mkfastq b/target/docker/demux/cellranger_mkfastq/cellranger_mkfastq new file mode 100755 index 00000000000..f94c854979b --- /dev/null +++ b/target/docker/demux/cellranger_mkfastq/cellranger_mkfastq @@ -0,0 +1,1026 @@ +#!/usr/bin/env bash + +# cellranger_mkfastq 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Angela Oliveira Pisco (author) +# * Samuel D'Souza (author) +# * Robrecht Cannoodt (author, maintainer) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="cellranger_mkfastq" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "cellranger_mkfastq 0.12.4" + echo "" + echo "Demultiplex raw sequencing data" + echo "" + echo "Usage:" + echo "cellranger_mkfastq \\" + echo " --input /path/to/bcl \\" + echo " --sample_sheet SampleSheet.csv \\" + echo " --output /path/to/output" + echo "" + echo "Arguments:" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " example: /path/to/bcl" + echo " Path to the (untarred) BCL files. Expects 'RunParameters.xml' at './'." + echo "" + echo " --sample_sheet" + echo " type: file, required parameter, file must exist" + echo " example: SampleSheet.csv" + echo " The path to the sample sheet." + echo "" + echo " --output" + echo " type: file, required parameter, output, file must exist" + echo " default: fastqs" + echo " example: /path/to/output" + echo " The folder to store the demux results" + echo "" + echo " --reports" + echo " type: file, output, file must exist" + echo " example: reports_dir" + echo " Reports directory" +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM ghcr.io/data-intuitive/cellranger:6.1 + +ENTRYPOINT [] + + +RUN apt-get update && apt-get upgrade -y +LABEL org.opencontainers.image.authors="Angela Oliveira Pisco, Samuel D'Souza, Robrecht Cannoodt" +LABEL org.opencontainers.image.description="Companion container for running component demux cellranger_mkfastq" +LABEL org.opencontainers.image.created="2024-01-31T09:08:32Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-cellranger_mkfastq-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "cellranger_mkfastq 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --sample_sheet) + [ -n "$VIASH_PAR_SAMPLE_SHEET" ] && ViashError Bad arguments for option \'--sample_sheet\': \'$VIASH_PAR_SAMPLE_SHEET\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SAMPLE_SHEET="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sample_sheet. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sample_sheet=*) + [ -n "$VIASH_PAR_SAMPLE_SHEET" ] && ViashError Bad arguments for option \'--sample_sheet=*\': \'$VIASH_PAR_SAMPLE_SHEET\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SAMPLE_SHEET=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --reports) + [ -n "$VIASH_PAR_REPORTS" ] && ViashError Bad arguments for option \'--reports\': \'$VIASH_PAR_REPORTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REPORTS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --reports. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reports=*) + [ -n "$VIASH_PAR_REPORTS" ] && ViashError Bad arguments for option \'--reports=*\': \'$VIASH_PAR_REPORTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REPORTS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/demux_cellranger_mkfastq:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/demux_cellranger_mkfastq:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/demux_cellranger_mkfastq:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/demux_cellranger_mkfastq:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_SAMPLE_SHEET+x} ]; then + ViashError '--sample_sheet' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_SAMPLE_SHEET" ] && [ ! -e "$VIASH_PAR_SAMPLE_SHEET" ]; then + ViashError "Input file '$VIASH_PAR_SAMPLE_SHEET' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi +if [ ! -z "$VIASH_PAR_REPORTS" ] && [ ! -d "$(dirname "$VIASH_PAR_REPORTS")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_REPORTS")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_SAMPLE_SHEET" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_SAMPLE_SHEET")" ) + VIASH_PAR_SAMPLE_SHEET=$(ViashAutodetectMount "$VIASH_PAR_SAMPLE_SHEET") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_PAR_REPORTS" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_REPORTS")" ) + VIASH_PAR_REPORTS=$(ViashAutodetectMount "$VIASH_PAR_REPORTS") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_REPORTS" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/demux_cellranger_mkfastq:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/demux_cellranger_mkfastq:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/demux_cellranger_mkfastq:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-cellranger_mkfastq-XXXXXX").sh +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +#!/bin/bash + +set -eo pipefail + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_SAMPLE_SHEET+x} ]; then echo "${VIASH_PAR_SAMPLE_SHEET}" | sed "s#'#'\"'\"'#g;s#.*#par_sample_sheet='&'#" ; else echo "# par_sample_sheet="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_PAR_REPORTS+x} ]; then echo "${VIASH_PAR_REPORTS}" | sed "s#'#'\"'\"'#g;s#.*#par_reports='&'#" ; else echo "# par_reports="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) + +## VIASH END + +# create temporary directory +tmpdir=\$(mktemp -d "$VIASH_TEMP/\$meta_functionality_name-XXXXXXXX") +function clean_up { + rm -rf "\$tmpdir" +} +trap clean_up EXIT + +# if par_input not is a folder, untar first +if [ ! -d "\$par_input" ]; then + echo "Assuming input is a tar.gz, untarring" + input_dir="\$tmpdir/bcl" + mkdir -p "\$input_dir" + tar -xzf "\$par_input" -C "\$input_dir" --strip-components=1 +else + input_dir="\$par_input" +fi + + +# add additional params +extra_params=( ) + +if [ ! -z "\$meta_cpus" ]; then + extra_params+=( "--localcores=\$meta_cpus" ) +fi +if [ ! -z "\$meta_memory_gb" ]; then + # always keep 2gb for the OS itself + memory_gb=\`python -c "print(int('\$meta_memory_gb') - 2)"\` + extra_params+=( "--localmem=\$memory_gb" ) +fi + + +echo "Running cellranger demux" + +id=myoutput + +cellranger mkfastq \\ + --id "\$id" \\ + --csv "\$par_sample_sheet" \\ + --run "\$par_input" \\ + "\${extra_params[@]}" \\ + --disable-ui \\ + --output-dir "\$par_output" + +# Move reports to their own output location +if [ ! -z "\$par_reports" ]; then + echo "Moving reports its own location" + mv "\$par_output"/Reports "\$par_reports" +else + echo "Leaving reports alone" +fi +VIASHMAIN +bash "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_SAMPLE_SHEET" ]; then + VIASH_PAR_SAMPLE_SHEET=$(ViashStripAutomount "$VIASH_PAR_SAMPLE_SHEET") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_PAR_REPORTS" ]; then + VIASH_PAR_REPORTS=$(ViashStripAutomount "$VIASH_PAR_REPORTS") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_REPORTS" ] && [ ! -e "$VIASH_PAR_REPORTS" ]; then + ViashError "Output file '$VIASH_PAR_REPORTS' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/demux/cellranger_mkfastq/setup_logger.py b/target/docker/demux/cellranger_mkfastq/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/docker/demux/cellranger_mkfastq/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/docker/dimred/pca/.config.vsh.yaml b/target/docker/dimred/pca/.config.vsh.yaml new file mode 100644 index 00000000000..41baca73075 --- /dev/null +++ b/target/docker/dimred/pca/.config.vsh.yaml @@ -0,0 +1,253 @@ +functionality: + name: "pca" + namespace: "dimred" + version: "0.12.4" + authors: + - name: "Dries De Maeyer" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "ddemaeyer@gmail.com" + github: "ddemaeyer" + linkedin: "dries-de-maeyer-b46a814" + organizations: + - name: "Janssen Pharmaceuticals" + href: "https://www.janssen.com" + role: "Principal Scientist" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input h5mu file" + info: null + example: + - "input.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--layer" + description: "Use specified layer for expression values instead of the .X object\ + \ from the modality." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--var_input" + description: "Column name in .var matrix that will be used to select which genes\ + \ to run the PCA on." + info: null + example: + - "filter_with_hvg" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output h5mu file." + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obsm_output" + description: "In which .obsm slot to store the resulting embedding." + info: null + default: + - "X_pca" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--varm_output" + description: "In which .varm slot to store the resulting loadings matrix." + info: null + default: + - "pca_loadings" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--uns_output" + description: "In which .uns slot to store the resulting variance objects." + info: null + default: + - "pca_variance" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--num_components" + description: "Number of principal components to compute. Defaults to 50, or 1\ + \ - minimum dimension size of selected representation." + info: null + example: + - 25 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--overwrite" + description: "Allow overwriting .obsm, .varm and .uns slots." + info: null + direction: "input" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Computes PCA coordinates, loadings and variance decomposition. Uses\ + \ the implementation of scikit-learn [Pedregosa11].\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.9-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "scanpy~=1.9.5" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highcpu" + - "highmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/dimred/pca/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/dimred/pca" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/dimred/pca/pca" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/dimred/pca/pca b/target/docker/dimred/pca/pca new file mode 100755 index 00000000000..f5ed0c878f0 --- /dev/null +++ b/target/docker/dimred/pca/pca @@ -0,0 +1,1188 @@ +#!/usr/bin/env bash + +# pca 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Dries De Maeyer (maintainer) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="pca" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "pca 0.12.4" + echo "" + echo "Computes PCA coordinates, loadings and variance decomposition. Uses the" + echo "implementation of scikit-learn [Pedregosa11]." + echo "" + echo "Arguments:" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " example: input.h5mu" + echo " Input h5mu file" + echo "" + echo " --modality" + echo " type: string" + echo " default: rna" + echo "" + echo " --layer" + echo " type: string" + echo " Use specified layer for expression values instead of the .X object from" + echo " the modality." + echo "" + echo " --var_input" + echo " type: string" + echo " example: filter_with_hvg" + echo " Column name in .var matrix that will be used to select which genes to" + echo " run the PCA on." + echo "" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " example: output.h5mu" + echo " Output h5mu file." + echo "" + echo " --output_compression" + echo " type: string" + echo " example: gzip" + echo " choices: [ gzip, lzf ]" + echo " The compression format to be used on the output h5mu object." + echo "" + echo " --obsm_output" + echo " type: string" + echo " default: X_pca" + echo " In which .obsm slot to store the resulting embedding." + echo "" + echo " --varm_output" + echo " type: string" + echo " default: pca_loadings" + echo " In which .varm slot to store the resulting loadings matrix." + echo "" + echo " --uns_output" + echo " type: string" + echo " default: pca_variance" + echo " In which .uns slot to store the resulting variance objects." + echo "" + echo " --num_components" + echo " type: integer" + echo " example: 25" + echo " Number of principal components to compute. Defaults to 50, or 1 -" + echo " minimum dimension size of selected representation." + echo "" + echo " --overwrite" + echo " type: boolean_true" + echo " Allow overwriting .obsm, .varm and .uns slots." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM python:3.9-slim + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "scanpy~=1.9.5" + +LABEL org.opencontainers.image.authors="Dries De Maeyer" +LABEL org.opencontainers.image.description="Companion container for running component dimred pca" +LABEL org.opencontainers.image.created="2024-01-31T09:08:31Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-pca-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "pca 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality=*) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --layer) + [ -n "$VIASH_PAR_LAYER" ] && ViashError Bad arguments for option \'--layer\': \'$VIASH_PAR_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LAYER="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --layer. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --layer=*) + [ -n "$VIASH_PAR_LAYER" ] && ViashError Bad arguments for option \'--layer=*\': \'$VIASH_PAR_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LAYER=$(ViashRemoveFlags "$1") + shift 1 + ;; + --var_input) + [ -n "$VIASH_PAR_VAR_INPUT" ] && ViashError Bad arguments for option \'--var_input\': \'$VIASH_PAR_VAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_VAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --var_input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --var_input=*) + [ -n "$VIASH_PAR_VAR_INPUT" ] && ViashError Bad arguments for option \'--var_input=*\': \'$VIASH_PAR_VAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_VAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --obsm_output) + [ -n "$VIASH_PAR_OBSM_OUTPUT" ] && ViashError Bad arguments for option \'--obsm_output\': \'$VIASH_PAR_OBSM_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBSM_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obsm_output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obsm_output=*) + [ -n "$VIASH_PAR_OBSM_OUTPUT" ] && ViashError Bad arguments for option \'--obsm_output=*\': \'$VIASH_PAR_OBSM_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBSM_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --varm_output) + [ -n "$VIASH_PAR_VARM_OUTPUT" ] && ViashError Bad arguments for option \'--varm_output\': \'$VIASH_PAR_VARM_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_VARM_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --varm_output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --varm_output=*) + [ -n "$VIASH_PAR_VARM_OUTPUT" ] && ViashError Bad arguments for option \'--varm_output=*\': \'$VIASH_PAR_VARM_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_VARM_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --uns_output) + [ -n "$VIASH_PAR_UNS_OUTPUT" ] && ViashError Bad arguments for option \'--uns_output\': \'$VIASH_PAR_UNS_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_UNS_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --uns_output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --uns_output=*) + [ -n "$VIASH_PAR_UNS_OUTPUT" ] && ViashError Bad arguments for option \'--uns_output=*\': \'$VIASH_PAR_UNS_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_UNS_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --num_components) + [ -n "$VIASH_PAR_NUM_COMPONENTS" ] && ViashError Bad arguments for option \'--num_components\': \'$VIASH_PAR_NUM_COMPONENTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_NUM_COMPONENTS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --num_components. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --num_components=*) + [ -n "$VIASH_PAR_NUM_COMPONENTS" ] && ViashError Bad arguments for option \'--num_components=*\': \'$VIASH_PAR_NUM_COMPONENTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_NUM_COMPONENTS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --overwrite) + [ -n "$VIASH_PAR_OVERWRITE" ] && ViashError Bad arguments for option \'--overwrite\': \'$VIASH_PAR_OVERWRITE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OVERWRITE=true + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/dimred_pca:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/dimred_pca:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/dimred_pca:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/dimred_pca:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_MODALITY+x} ]; then + VIASH_PAR_MODALITY="rna" +fi +if [ -z ${VIASH_PAR_OBSM_OUTPUT+x} ]; then + VIASH_PAR_OBSM_OUTPUT="X_pca" +fi +if [ -z ${VIASH_PAR_VARM_OUTPUT+x} ]; then + VIASH_PAR_VARM_OUTPUT="pca_loadings" +fi +if [ -z ${VIASH_PAR_UNS_OUTPUT+x} ]; then + VIASH_PAR_UNS_OUTPUT="pca_variance" +fi +if [ -z ${VIASH_PAR_OVERWRITE+x} ]; then + VIASH_PAR_OVERWRITE="false" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_NUM_COMPONENTS" ]]; then + if ! [[ "$VIASH_PAR_NUM_COMPONENTS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--num_components' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OVERWRITE" ]]; then + if ! [[ "$VIASH_PAR_OVERWRITE" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--overwrite' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/dimred_pca:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/dimred_pca:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/dimred_pca:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-pca-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +import scanpy as sc +import mudata as mu +import sys +from anndata import AnnData + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'layer': $( if [ ! -z ${VIASH_PAR_LAYER+x} ]; then echo "r'${VIASH_PAR_LAYER//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'var_input': $( if [ ! -z ${VIASH_PAR_VAR_INPUT+x} ]; then echo "r'${VIASH_PAR_VAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'obsm_output': $( if [ ! -z ${VIASH_PAR_OBSM_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'varm_output': $( if [ ! -z ${VIASH_PAR_VARM_OUTPUT+x} ]; then echo "r'${VIASH_PAR_VARM_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'uns_output': $( if [ ! -z ${VIASH_PAR_UNS_OUTPUT+x} ]; then echo "r'${VIASH_PAR_UNS_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'num_components': $( if [ ! -z ${VIASH_PAR_NUM_COMPONENTS+x} ]; then echo "int(r'${VIASH_PAR_NUM_COMPONENTS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'overwrite': $( if [ ! -z ${VIASH_PAR_OVERWRITE+x} ]; then echo "r'${VIASH_PAR_OVERWRITE//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +## VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +logger.info("Reading %s.", par["input"]) +mdata = mu.read_h5mu(par["input"]) + +logger.info("Computing PCA components for modality '%s'", par['modality']) +data = mdata.mod[par['modality']] +if par['layer'] and par['layer'] not in data.layers: + raise ValueError(f"{par['layer']} was not found in modality {par['modality']}.") +layer = data.X if not par['layer'] else data.layers[par['layer']] +adata_input_layer = AnnData(layer) +adata_input_layer.var.index = data.var.index + +use_highly_variable = False +if par["var_input"]: + if not par["var_input"] in data.var.columns: + raise ValueError(f"Requested to use .var column {par['var_input']} " + "as a selection of genes to run the PCA on, " + f"but the column is not available for modality {par['modality']}") + use_highly_variable = True + adata_input_layer.var['highly_variable'] = data.var[par["var_input"]] + +# run pca +output_adata = sc.tl.pca( + adata_input_layer, + n_comps=par["num_components"], + copy=True, + use_highly_variable=use_highly_variable +) + +# store output in specific objects + +check_exist_dict = { + "obsm_output": ("obs"), + "varm_output": ("varm"), + "uns_output": ("uns") +} +for parameter_name, field in check_exist_dict.items(): + if par[parameter_name] in getattr(data, field): + if not par["overwrite"]: + raise ValueError(f"Requested to create field {par[parameter_name]} in .{field} " + f"for modality {par['modality']}, but field already exists.") + del getattr(data, field)[par[parameter_name]] + +data.obsm[par["obsm_output"]] = output_adata.obsm['X_pca'] +data.varm[par["varm_output"]] = output_adata.varm['PCs'] +data.uns[par["uns_output"]] = { "variance": output_adata.uns['pca']['variance'], + "variance_ratio": output_adata.uns['pca']['variance_ratio'] } + + +logger.info("Writing to %s.", par["output"]) +mdata.write_h5mu(filename=par["output"], compression=par["output_compression"]) + +logger.info("Finished") +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/dimred/pca/setup_logger.py b/target/docker/dimred/pca/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/docker/dimred/pca/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/docker/dimred/umap/.config.vsh.yaml b/target/docker/dimred/umap/.config.vsh.yaml new file mode 100644 index 00000000000..beeddcaa40b --- /dev/null +++ b/target/docker/dimred/umap/.config.vsh.yaml @@ -0,0 +1,312 @@ +functionality: + name: "umap" + namespace: "dimred" + version: "0.12.4" + authors: + - name: "Dries De Maeyer" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "ddemaeyer@gmail.com" + github: "ddemaeyer" + linkedin: "dries-de-maeyer-b46a814" + organizations: + - name: "Janssen Pharmaceuticals" + href: "https://www.janssen.com" + role: "Principal Scientist" + argument_groups: + - name: "Inputs" + arguments: + - type: "file" + name: "--input" + description: "Input h5mu file" + info: null + example: + - "input.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--uns_neighbors" + description: "The `.uns` neighbors slot as output by the `find_neighbors` component." + info: null + default: + - "neighbors" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Outputs" + arguments: + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output h5mu file." + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obsm_output" + description: "The pre/postfix under which to store the UMAP results." + info: null + default: + - "umap" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Arguments" + arguments: + - type: "double" + name: "--min_dist" + description: "The effective minimum distance between embedded points. Smaller\ + \ values will result in a more clustered/clumped embedding where nearby points\ + \ on the manifold are drawn closer together, while larger values will result\ + \ on a more even dispersal of points. The value should be set relative to\ + \ the spread value, which determines the scale at which embedded points will\ + \ be spread out." + info: null + default: + - 0.5 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--spread" + description: "The effective scale of embedded points. In combination with `min_dist`\ + \ this determines how clustered/clumped the embedded points are." + info: null + default: + - 1.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--num_components" + description: "The number of dimensions of the embedding." + info: null + default: + - 2 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--max_iter" + description: "The number of iterations (epochs) of the optimization. Called\ + \ `n_epochs` in the original UMAP. Default is set to 500 if neighbors['connectivities'].shape[0]\ + \ <= 10000, else 200." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--alpha" + description: "The initial learning rate for the embedding optimization." + info: null + default: + - 1.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--gamma" + description: "Weighting applied to negative samples in low dimensional embedding\ + \ optimization. Values higher than one will result in greater weight being\ + \ given to negative samples." + info: null + default: + - 1.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--negative_sample_rate" + description: "The number of negative edge/1-simplex samples to use per positive\ + \ edge/1-simplex sample in optimizing the low dimensional embedding." + info: null + default: + - 5 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--init_pos" + description: "How to initialize the low dimensional embedding. Called `init`\ + \ in the original UMAP. Options are:\n\n* Any key from `.obsm`\n* `'paga'`:\ + \ positions from `paga()`\n* `'spectral'`: use a spectral embedding of the\ + \ graph\n* `'random'`: assign initial embedding positions at random.\n" + info: null + default: + - "spectral" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "UMAP (Uniform Manifold Approximation and Projection) is a manifold\ + \ learning technique suitable for visualizing high-dimensional data. Besides tending\ + \ to be faster than tSNE, it optimizes the embedding such that it best reflects\ + \ the topology of the data, which we represent throughout Scanpy using a neighborhood\ + \ graph. tSNE, by contrast, optimizes the distribution of nearest-neighbor distances\ + \ in the embedding such that these best match the distribution of distances in\ + \ the high-dimensional space. We use the implementation of umap-learn [McInnes18].\ + \ For a few comparisons of UMAP with tSNE, see this preprint.\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.9-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "scanpy~=1.9.5" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highcpu" + - "midmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/dimred/umap/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/dimred/umap" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/dimred/umap/umap" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/dimred/umap/setup_logger.py b/target/docker/dimred/umap/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/docker/dimred/umap/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/docker/dimred/umap/umap b/target/docker/dimred/umap/umap new file mode 100755 index 00000000000..629f8ddf870 --- /dev/null +++ b/target/docker/dimred/umap/umap @@ -0,0 +1,1306 @@ +#!/usr/bin/env bash + +# umap 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Dries De Maeyer (maintainer) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="umap" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "umap 0.12.4" + echo "" + echo "UMAP (Uniform Manifold Approximation and Projection) is a manifold learning" + echo "technique suitable for visualizing high-dimensional data. Besides tending to be" + echo "faster than tSNE, it optimizes the embedding such that it best reflects the" + echo "topology of the data, which we represent throughout Scanpy using a neighborhood" + echo "graph. tSNE, by contrast, optimizes the distribution of nearest-neighbor" + echo "distances in the embedding such that these best match the distribution of" + echo "distances in the high-dimensional space. We use the implementation of umap-learn" + echo "[McInnes18]. For a few comparisons of UMAP with tSNE, see this preprint." + echo "" + echo "Inputs:" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " example: input.h5mu" + echo " Input h5mu file" + echo "" + echo " --modality" + echo " type: string" + echo " default: rna" + echo "" + echo " --uns_neighbors" + echo " type: string" + echo " default: neighbors" + echo " The \`.uns\` neighbors slot as output by the \`find_neighbors\` component." + echo "" + echo "Outputs:" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " example: output.h5mu" + echo " Output h5mu file." + echo "" + echo " --output_compression" + echo " type: string" + echo " example: gzip" + echo " choices: [ gzip, lzf ]" + echo " The compression format to be used on the output h5mu object." + echo "" + echo " --obsm_output" + echo " type: string" + echo " default: umap" + echo " The pre/postfix under which to store the UMAP results." + echo "" + echo "Arguments:" + echo " --min_dist" + echo " type: double" + echo " default: 0.5" + echo " The effective minimum distance between embedded points. Smaller values" + echo " will result in a more clustered/clumped embedding where nearby points on" + echo " the manifold are drawn closer together, while larger values will result" + echo " on a more even dispersal of points. The value should be set relative to" + echo " the spread value, which determines the scale at which embedded points" + echo " will be spread out." + echo "" + echo " --spread" + echo " type: double" + echo " default: 1.0" + echo " The effective scale of embedded points. In combination with \`min_dist\`" + echo " this determines how clustered/clumped the embedded points are." + echo "" + echo " --num_components" + echo " type: integer" + echo " default: 2" + echo " The number of dimensions of the embedding." + echo "" + echo " --max_iter" + echo " type: integer" + echo " The number of iterations (epochs) of the optimization. Called \`n_epochs\`" + echo " in the original UMAP. Default is set to 500 if" + echo " neighbors['connectivities'].shape[0] <= 10000, else 200." + echo "" + echo " --alpha" + echo " type: double" + echo " default: 1.0" + echo " The initial learning rate for the embedding optimization." + echo "" + echo " --gamma" + echo " type: double" + echo " default: 1.0" + echo " Weighting applied to negative samples in low dimensional embedding" + echo " optimization. Values higher than one will result in greater weight being" + echo " given to negative samples." + echo "" + echo " --negative_sample_rate" + echo " type: integer" + echo " default: 5" + echo " The number of negative edge/1-simplex samples to use per positive" + echo " edge/1-simplex sample in optimizing the low dimensional embedding." + echo "" + echo " --init_pos" + echo " type: string" + echo " default: spectral" + echo " How to initialize the low dimensional embedding. Called \`init\` in the" + echo " original UMAP. Options are:" + echo " * Any key from \`.obsm\`" + echo " * \`'paga'\`: positions from \`paga()\`" + echo " * \`'spectral'\`: use a spectral embedding of the graph" + echo " * \`'random'\`: assign initial embedding positions at random." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM python:3.9-slim + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "scanpy~=1.9.5" + +LABEL org.opencontainers.image.authors="Dries De Maeyer" +LABEL org.opencontainers.image.description="Companion container for running component dimred umap" +LABEL org.opencontainers.image.created="2024-01-31T09:08:31Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-umap-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "umap 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --modality) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality=*) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --uns_neighbors) + [ -n "$VIASH_PAR_UNS_NEIGHBORS" ] && ViashError Bad arguments for option \'--uns_neighbors\': \'$VIASH_PAR_UNS_NEIGHBORS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_UNS_NEIGHBORS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --uns_neighbors. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --uns_neighbors=*) + [ -n "$VIASH_PAR_UNS_NEIGHBORS" ] && ViashError Bad arguments for option \'--uns_neighbors=*\': \'$VIASH_PAR_UNS_NEIGHBORS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_UNS_NEIGHBORS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --obsm_output) + [ -n "$VIASH_PAR_OBSM_OUTPUT" ] && ViashError Bad arguments for option \'--obsm_output\': \'$VIASH_PAR_OBSM_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBSM_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obsm_output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obsm_output=*) + [ -n "$VIASH_PAR_OBSM_OUTPUT" ] && ViashError Bad arguments for option \'--obsm_output=*\': \'$VIASH_PAR_OBSM_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBSM_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --min_dist) + [ -n "$VIASH_PAR_MIN_DIST" ] && ViashError Bad arguments for option \'--min_dist\': \'$VIASH_PAR_MIN_DIST\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_DIST="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_dist. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --min_dist=*) + [ -n "$VIASH_PAR_MIN_DIST" ] && ViashError Bad arguments for option \'--min_dist=*\': \'$VIASH_PAR_MIN_DIST\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_DIST=$(ViashRemoveFlags "$1") + shift 1 + ;; + --spread) + [ -n "$VIASH_PAR_SPREAD" ] && ViashError Bad arguments for option \'--spread\': \'$VIASH_PAR_SPREAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SPREAD="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --spread. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --spread=*) + [ -n "$VIASH_PAR_SPREAD" ] && ViashError Bad arguments for option \'--spread=*\': \'$VIASH_PAR_SPREAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SPREAD=$(ViashRemoveFlags "$1") + shift 1 + ;; + --num_components) + [ -n "$VIASH_PAR_NUM_COMPONENTS" ] && ViashError Bad arguments for option \'--num_components\': \'$VIASH_PAR_NUM_COMPONENTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_NUM_COMPONENTS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --num_components. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --num_components=*) + [ -n "$VIASH_PAR_NUM_COMPONENTS" ] && ViashError Bad arguments for option \'--num_components=*\': \'$VIASH_PAR_NUM_COMPONENTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_NUM_COMPONENTS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --max_iter) + [ -n "$VIASH_PAR_MAX_ITER" ] && ViashError Bad arguments for option \'--max_iter\': \'$VIASH_PAR_MAX_ITER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MAX_ITER="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --max_iter. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --max_iter=*) + [ -n "$VIASH_PAR_MAX_ITER" ] && ViashError Bad arguments for option \'--max_iter=*\': \'$VIASH_PAR_MAX_ITER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MAX_ITER=$(ViashRemoveFlags "$1") + shift 1 + ;; + --alpha) + [ -n "$VIASH_PAR_ALPHA" ] && ViashError Bad arguments for option \'--alpha\': \'$VIASH_PAR_ALPHA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALPHA="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --alpha. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --alpha=*) + [ -n "$VIASH_PAR_ALPHA" ] && ViashError Bad arguments for option \'--alpha=*\': \'$VIASH_PAR_ALPHA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALPHA=$(ViashRemoveFlags "$1") + shift 1 + ;; + --gamma) + [ -n "$VIASH_PAR_GAMMA" ] && ViashError Bad arguments for option \'--gamma\': \'$VIASH_PAR_GAMMA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GAMMA="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --gamma. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --gamma=*) + [ -n "$VIASH_PAR_GAMMA" ] && ViashError Bad arguments for option \'--gamma=*\': \'$VIASH_PAR_GAMMA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GAMMA=$(ViashRemoveFlags "$1") + shift 1 + ;; + --negative_sample_rate) + [ -n "$VIASH_PAR_NEGATIVE_SAMPLE_RATE" ] && ViashError Bad arguments for option \'--negative_sample_rate\': \'$VIASH_PAR_NEGATIVE_SAMPLE_RATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_NEGATIVE_SAMPLE_RATE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --negative_sample_rate. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --negative_sample_rate=*) + [ -n "$VIASH_PAR_NEGATIVE_SAMPLE_RATE" ] && ViashError Bad arguments for option \'--negative_sample_rate=*\': \'$VIASH_PAR_NEGATIVE_SAMPLE_RATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_NEGATIVE_SAMPLE_RATE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --init_pos) + [ -n "$VIASH_PAR_INIT_POS" ] && ViashError Bad arguments for option \'--init_pos\': \'$VIASH_PAR_INIT_POS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INIT_POS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --init_pos. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --init_pos=*) + [ -n "$VIASH_PAR_INIT_POS" ] && ViashError Bad arguments for option \'--init_pos=*\': \'$VIASH_PAR_INIT_POS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INIT_POS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/dimred_umap:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/dimred_umap:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/dimred_umap:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/dimred_umap:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_MODALITY+x} ]; then + VIASH_PAR_MODALITY="rna" +fi +if [ -z ${VIASH_PAR_UNS_NEIGHBORS+x} ]; then + VIASH_PAR_UNS_NEIGHBORS="neighbors" +fi +if [ -z ${VIASH_PAR_OBSM_OUTPUT+x} ]; then + VIASH_PAR_OBSM_OUTPUT="umap" +fi +if [ -z ${VIASH_PAR_MIN_DIST+x} ]; then + VIASH_PAR_MIN_DIST="0.5" +fi +if [ -z ${VIASH_PAR_SPREAD+x} ]; then + VIASH_PAR_SPREAD="1.0" +fi +if [ -z ${VIASH_PAR_NUM_COMPONENTS+x} ]; then + VIASH_PAR_NUM_COMPONENTS="2" +fi +if [ -z ${VIASH_PAR_ALPHA+x} ]; then + VIASH_PAR_ALPHA="1.0" +fi +if [ -z ${VIASH_PAR_GAMMA+x} ]; then + VIASH_PAR_GAMMA="1.0" +fi +if [ -z ${VIASH_PAR_NEGATIVE_SAMPLE_RATE+x} ]; then + VIASH_PAR_NEGATIVE_SAMPLE_RATE="5" +fi +if [ -z ${VIASH_PAR_INIT_POS+x} ]; then + VIASH_PAR_INIT_POS="spectral" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_MIN_DIST" ]]; then + if ! [[ "$VIASH_PAR_MIN_DIST" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--min_dist' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SPREAD" ]]; then + if ! [[ "$VIASH_PAR_SPREAD" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--spread' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_NUM_COMPONENTS" ]]; then + if ! [[ "$VIASH_PAR_NUM_COMPONENTS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--num_components' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MAX_ITER" ]]; then + if ! [[ "$VIASH_PAR_MAX_ITER" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--max_iter' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_ALPHA" ]]; then + if ! [[ "$VIASH_PAR_ALPHA" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--alpha' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_GAMMA" ]]; then + if ! [[ "$VIASH_PAR_GAMMA" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--gamma' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_NEGATIVE_SAMPLE_RATE" ]]; then + if ! [[ "$VIASH_PAR_NEGATIVE_SAMPLE_RATE" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--negative_sample_rate' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/dimred_umap:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/dimred_umap:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/dimred_umap:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-umap-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +import scanpy as sc +import mudata as mu +import sys +import anndata as ad + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'uns_neighbors': $( if [ ! -z ${VIASH_PAR_UNS_NEIGHBORS+x} ]; then echo "r'${VIASH_PAR_UNS_NEIGHBORS//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'obsm_output': $( if [ ! -z ${VIASH_PAR_OBSM_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'min_dist': $( if [ ! -z ${VIASH_PAR_MIN_DIST+x} ]; then echo "float(r'${VIASH_PAR_MIN_DIST//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'spread': $( if [ ! -z ${VIASH_PAR_SPREAD+x} ]; then echo "float(r'${VIASH_PAR_SPREAD//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'num_components': $( if [ ! -z ${VIASH_PAR_NUM_COMPONENTS+x} ]; then echo "int(r'${VIASH_PAR_NUM_COMPONENTS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'max_iter': $( if [ ! -z ${VIASH_PAR_MAX_ITER+x} ]; then echo "int(r'${VIASH_PAR_MAX_ITER//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'alpha': $( if [ ! -z ${VIASH_PAR_ALPHA+x} ]; then echo "float(r'${VIASH_PAR_ALPHA//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'gamma': $( if [ ! -z ${VIASH_PAR_GAMMA+x} ]; then echo "float(r'${VIASH_PAR_GAMMA//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'negative_sample_rate': $( if [ ! -z ${VIASH_PAR_NEGATIVE_SAMPLE_RATE+x} ]; then echo "int(r'${VIASH_PAR_NEGATIVE_SAMPLE_RATE//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'init_pos': $( if [ ! -z ${VIASH_PAR_INIT_POS+x} ]; then echo "r'${VIASH_PAR_INIT_POS//\'/\'\"\'\"r\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +## VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +logger.info("Reading %s", par["input"]) +mdata = mu.read_h5mu(par["input"]) + +logger.info("Computing UMAP for modality '%s'", par['modality']) +data = mdata.mod[par['modality']] + +if par['uns_neighbors'] not in data.uns: + raise ValueError(f"'{par['uns_neighbors']}' was not found in .mod['{par['modality']}'].uns.") + +# create temporary AnnData +# ... because sc.tl.umap doesn't allow to choose +# the obsm output slot +# ... also we can see scanpy is a data format dependency hell +neigh_key = par["uns_neighbors"] +temp_uns = { neigh_key: data.uns[neigh_key] } +conn_key = temp_uns[neigh_key]['connectivities_key'] +dist_key = temp_uns[neigh_key]['distances_key'] +temp_obsp = { + conn_key: data.obsp[conn_key], + dist_key: data.obsp[dist_key], +} +pca_key = temp_uns[neigh_key]['params']['use_rep'] +temp_obsm = { + pca_key: data.obsm[pca_key] +} + +temp_adata = ad.AnnData( + obsm=temp_obsm, + obsp=temp_obsp, + uns=temp_uns, + shape=data.shape +) + +sc.tl.umap( + temp_adata, + min_dist=par["min_dist"], + spread=par["spread"], + n_components=par["num_components"], + maxiter=par["max_iter"], + alpha=par["alpha"], + gamma=par["gamma"], + negative_sample_rate=par["negative_sample_rate"], + init_pos=par["init_pos"], + neighbors_key=neigh_key +) + +data.obsm[par['obsm_output']] = temp_adata.obsm['X_umap'] + +logger.info("Writing to %s.", par["output"]) +mdata.write_h5mu(filename=par["output"], compression=par["output_compression"]) + +logger.info("Finished") +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/download/download_file/.config.vsh.yaml b/target/docker/download/download_file/.config.vsh.yaml new file mode 100644 index 00000000000..65dd0f3deb9 --- /dev/null +++ b/target/docker/download/download_file/.config.vsh.yaml @@ -0,0 +1,138 @@ +functionality: + name: "download_file" + namespace: "download" + version: "0.12.4" + authors: + - name: "Robrecht Cannoodt" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + arguments: + - type: "string" + name: "--input" + description: "URL to a file to download." + info: null + example: + - "https://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_1k_protein_v3/pbmc_1k_protein_v3_raw_feature_bc_matrix.h5" + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + description: "Path where to store output." + info: null + example: + - "pbmc_1k_protein_v3_raw_feature_bc_matrix.h5" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--verbose" + alternatives: + - "-v" + description: "Increase verbosity" + info: null + direction: "input" + dest: "par" + resources: + - type: "bash_script" + path: "script.sh" + is_executable: true + description: "Download a file.\n" + usage: "download_file \\\n --input https://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_1k_protein_v3/pbmc_1k_protein_v3_raw_feature_bc_matrix.h5\ + \ \\\n --output output_rna.h5\n" + test_resources: + - type: "bash_script" + path: "run_test.sh" + is_executable: true + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "bash:5.1.16" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/download/download_file/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/download/download_file" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/download/download_file/download_file" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/download/download_file/download_file b/target/docker/download/download_file/download_file new file mode 100755 index 00000000000..3c5c12f2954 --- /dev/null +++ b/target/docker/download/download_file/download_file @@ -0,0 +1,931 @@ +#!/usr/bin/env bash + +# download_file 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Robrecht Cannoodt (maintainer) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="download_file" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "download_file 0.12.4" + echo "" + echo "Download a file." + echo "" + echo "Usage:" + echo "download_file \\" + echo " --input" + echo "https://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_1k_protein_v3/pbmc_1k_protein_v3_raw_feature_bc_matrix.h5" + echo "\\" + echo " --output output_rna.h5" + echo "" + echo "Arguments:" + echo " --input" + echo " type: string, required parameter" + echo " example:" + echo "https://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_1k_protein_v3/pbmc_1k_protein_v3_raw_feature_bc_matrix.h5" + echo " URL to a file to download." + echo "" + echo " --output" + echo " type: file, required parameter, output, file must exist" + echo " example: pbmc_1k_protein_v3_raw_feature_bc_matrix.h5" + echo " Path where to store output." + echo "" + echo " -v, --verbose" + echo " type: boolean_true" + echo " Increase verbosity" +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM bash:5.1.16 + +ENTRYPOINT [] + + +RUN : +LABEL org.opencontainers.image.authors="Robrecht Cannoodt" +LABEL org.opencontainers.image.description="Companion container for running component download download_file" +LABEL org.opencontainers.image.created="2024-01-31T09:08:35Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-download_file-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "download_file 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --verbose) + [ -n "$VIASH_PAR_VERBOSE" ] && ViashError Bad arguments for option \'--verbose\': \'$VIASH_PAR_VERBOSE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_VERBOSE=true + shift 1 + ;; + -v) + [ -n "$VIASH_PAR_VERBOSE" ] && ViashError Bad arguments for option \'-v\': \'$VIASH_PAR_VERBOSE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_VERBOSE=true + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/download_download_file:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/download_download_file:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/download_download_file:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/download_download_file:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_VERBOSE+x} ]; then + VIASH_PAR_VERBOSE="false" +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_VERBOSE" ]]; then + if ! [[ "$VIASH_PAR_VERBOSE" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--verbose' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/download_download_file:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/download_download_file:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/download_download_file:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-download_file-XXXXXX").sh +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +#!/bin/bash + +set -eo pipefail + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_PAR_VERBOSE+x} ]; then echo "${VIASH_PAR_VERBOSE}" | sed "s#'#'\"'\"'#g;s#.*#par_verbose='&'#" ; else echo "# par_verbose="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) + +## VIASH END + +extra_params=() + +if [ "\$par_verbose" != "true" ]; then + extra_params+=("--quiet") +fi + +wget "\$par_input" -O "\$par_output" "\${extra_params[@]}" +VIASHMAIN +bash "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/download/sync_test_resources/.config.vsh.yaml b/target/docker/download/sync_test_resources/.config.vsh.yaml new file mode 100644 index 00000000000..97a117275d2 --- /dev/null +++ b/target/docker/download/sync_test_resources/.config.vsh.yaml @@ -0,0 +1,170 @@ +functionality: + name: "sync_test_resources" + namespace: "download" + version: "0.12.4" + authors: + - name: "Robrecht Cannoodt" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + arguments: + - type: "string" + name: "--input" + alternatives: + - "-i" + description: "Path to the S3 bucket to sync from." + info: null + default: + - "s3://openpipelines-data" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Path to the test resource directory." + info: null + default: + - "resources_test" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--quiet" + description: "Displays the operations that would be performed using the specified\ + \ command without actually running them." + info: null + direction: "input" + dest: "par" + - type: "boolean_true" + name: "--dryrun" + description: "Does not display the operations performed from the specified command." + info: null + direction: "input" + dest: "par" + - type: "boolean_true" + name: "--delete" + description: "Files that exist in the destination but not in the source are deleted\ + \ during sync." + info: null + direction: "input" + dest: "par" + - type: "string" + name: "--exclude" + description: "Exclude all files or objects from the command that matches the specified\ + \ pattern." + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + resources: + - type: "bash_script" + path: "script.sh" + is_executable: true + description: "Synchronise the test resources from s3://openpipelines-data to resources_test" + usage: "sync_test_resources\nsync_test_resources --input s3://openpipelines-data\ + \ --output resources_test\n" + test_resources: + - type: "bash_script" + path: "run_test.sh" + is_executable: true + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "amazon/aws-cli:2.11.0" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "yum" + packages: + - "procps" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/download/sync_test_resources/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/download/sync_test_resources" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/download/sync_test_resources/sync_test_resources" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/download/sync_test_resources/sync_test_resources b/target/docker/download/sync_test_resources/sync_test_resources new file mode 100755 index 00000000000..8ebba8fac99 --- /dev/null +++ b/target/docker/download/sync_test_resources/sync_test_resources @@ -0,0 +1,1018 @@ +#!/usr/bin/env bash + +# sync_test_resources 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Robrecht Cannoodt (maintainer) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="sync_test_resources" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "sync_test_resources 0.12.4" + echo "" + echo "Synchronise the test resources from s3://openpipelines-data to resources_test" + echo "" + echo "Usage:" + echo "sync_test_resources" + echo "sync_test_resources --input s3://openpipelines-data --output resources_test" + echo "" + echo "Arguments:" + echo " -i, --input" + echo " type: string" + echo " default: s3://openpipelines-data" + echo " Path to the S3 bucket to sync from." + echo "" + echo " -o, --output" + echo " type: file, output, file must exist" + echo " default: resources_test" + echo " Path to the test resource directory." + echo "" + echo " --quiet" + echo " type: boolean_true" + echo " Displays the operations that would be performed using the specified" + echo " command without actually running them." + echo "" + echo " --dryrun" + echo " type: boolean_true" + echo " Does not display the operations performed from the specified command." + echo "" + echo " --delete" + echo " type: boolean_true" + echo " Files that exist in the destination but not in the source are deleted" + echo " during sync." + echo "" + echo " --exclude" + echo " type: string, multiple values allowed" + echo " Exclude all files or objects from the command that matches the specified" + echo " pattern." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM amazon/aws-cli:2.11.0 + +ENTRYPOINT [] + + +RUN yum install -y procps && \ + yum clean all && \ + rm -rf /var/cache/yum + +LABEL org.opencontainers.image.authors="Robrecht Cannoodt" +LABEL org.opencontainers.image.description="Companion container for running component download sync_test_resources" +LABEL org.opencontainers.image.created="2024-01-31T09:08:35Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-sync_test_resources-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "sync_test_resources 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --quiet) + [ -n "$VIASH_PAR_QUIET" ] && ViashError Bad arguments for option \'--quiet\': \'$VIASH_PAR_QUIET\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUIET=true + shift 1 + ;; + --dryrun) + [ -n "$VIASH_PAR_DRYRUN" ] && ViashError Bad arguments for option \'--dryrun\': \'$VIASH_PAR_DRYRUN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DRYRUN=true + shift 1 + ;; + --delete) + [ -n "$VIASH_PAR_DELETE" ] && ViashError Bad arguments for option \'--delete\': \'$VIASH_PAR_DELETE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DELETE=true + shift 1 + ;; + --exclude) + if [ -z "$VIASH_PAR_EXCLUDE" ]; then + VIASH_PAR_EXCLUDE="$2" + else + VIASH_PAR_EXCLUDE="$VIASH_PAR_EXCLUDE:""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --exclude. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --exclude=*) + if [ -z "$VIASH_PAR_EXCLUDE" ]; then + VIASH_PAR_EXCLUDE=$(ViashRemoveFlags "$1") + else + VIASH_PAR_EXCLUDE="$VIASH_PAR_EXCLUDE:"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/download_sync_test_resources:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/download_sync_test_resources:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/download_sync_test_resources:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/download_sync_test_resources:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_INPUT+x} ]; then + VIASH_PAR_INPUT="s3://openpipelines-data" +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + VIASH_PAR_OUTPUT="resources_test" +fi +if [ -z ${VIASH_PAR_QUIET+x} ]; then + VIASH_PAR_QUIET="false" +fi +if [ -z ${VIASH_PAR_DRYRUN+x} ]; then + VIASH_PAR_DRYRUN="false" +fi +if [ -z ${VIASH_PAR_DELETE+x} ]; then + VIASH_PAR_DELETE="false" +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_QUIET" ]]; then + if ! [[ "$VIASH_PAR_QUIET" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--quiet' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_DRYRUN" ]]; then + if ! [[ "$VIASH_PAR_DRYRUN" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--dryrun' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_DELETE" ]]; then + if ! [[ "$VIASH_PAR_DELETE" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--delete' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/download_sync_test_resources:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/download_sync_test_resources:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/download_sync_test_resources:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-sync_test_resources-XXXXXX").sh +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +#!/bin/bash + +set -eo pipefail + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_PAR_QUIET+x} ]; then echo "${VIASH_PAR_QUIET}" | sed "s#'#'\"'\"'#g;s#.*#par_quiet='&'#" ; else echo "# par_quiet="; fi ) +$( if [ ! -z ${VIASH_PAR_DRYRUN+x} ]; then echo "${VIASH_PAR_DRYRUN}" | sed "s#'#'\"'\"'#g;s#.*#par_dryrun='&'#" ; else echo "# par_dryrun="; fi ) +$( if [ ! -z ${VIASH_PAR_DELETE+x} ]; then echo "${VIASH_PAR_DELETE}" | sed "s#'#'\"'\"'#g;s#.*#par_delete='&'#" ; else echo "# par_delete="; fi ) +$( if [ ! -z ${VIASH_PAR_EXCLUDE+x} ]; then echo "${VIASH_PAR_EXCLUDE}" | sed "s#'#'\"'\"'#g;s#.*#par_exclude='&'#" ; else echo "# par_exclude="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) + +## VIASH END + +extra_params=( ) + +if [ "\$par_quiet" == "true" ]; then + extra_params+=( "--quiet" ) +fi +if [ "\$par_dryrun" == "true" ]; then + extra_params+=( "--dryrun" ) +fi +if [ "\$par_delete" == "true" ]; then + extra_params+=( "--delete" ) +fi + +if [ ! -z \${par_exclude+x} ]; then + IFS=":" + for var in \$par_exclude; do + unset IFS + extra_params+=( "--exclude" "\$var" ) + done +fi + + +# Disable the use of the Amazon EC2 instance metadata service (IMDS). +# see https://florian.ec/blog/github-actions-awscli-errors/ +# or https://github.com/aws/aws-cli/issues/5234#issuecomment-705831465 +export AWS_EC2_METADATA_DISABLED=true + +aws s3 sync "\$par_input" "\$par_output" --no-sign-request "\${extra_params[@]}" +VIASHMAIN +bash "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/files/make_params/.config.vsh.yaml b/target/docker/files/make_params/.config.vsh.yaml new file mode 100644 index 00000000000..809cf66177e --- /dev/null +++ b/target/docker/files/make_params/.config.vsh.yaml @@ -0,0 +1,220 @@ +functionality: + name: "make_params" + namespace: "files" + version: "0.12.4" + authors: + - name: "Angela Oliveira Pisco" + roles: + - "author" + info: + role: "Contributor" + links: + github: "aopisco" + orcid: "0000-0003-0142-2355" + linkedin: "aopisco" + organizations: + - name: "Insitro" + href: "https://insitro.com" + role: "Director of Computational Biology" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + - name: "Robrecht Cannoodt" + roles: + - "maintainer" + - "author" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + arguments: + - type: "file" + name: "--base_dir" + description: "Base directory to search recursively" + info: null + example: + - "/path/to/dir" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--pattern" + description: "An optional regular expression. Only file names which match the\ + \ regular expression will be matched." + info: null + example: + - "*.fastq.gz" + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--n_dirname_drop" + description: "For every matched file, the parent directory will be traversed N\ + \ times." + info: null + default: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--n_basename_id" + description: "The unique identifiers will consist of at least N dirnames." + info: null + default: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--id_name" + description: "The name for storing the identifier field in the yaml." + info: null + default: + - "id" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--path_name" + description: "The name for storing the path field in the yaml." + info: null + default: + - "path" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--group_name" + description: "Top level name for the group of entries." + info: null + example: + - "param_list" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + description: "Output YAML file." + info: null + example: + - "params.yaml" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "r_script" + path: "script.R" + is_executable: true + description: "Looks for files in a directory and turn it in a params file." + test_resources: + - type: "bash_script" + path: "test_make_params.sh" + is_executable: true + - type: "file" + path: "../../../src" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "ghcr.io/data-intuitive/randpy:r4.0" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "singlecpu" + - "lowmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/files/make_params/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/files/make_params" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/files/make_params/make_params" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/files/make_params/make_params b/target/docker/files/make_params/make_params new file mode 100755 index 00000000000..60503d61e0b --- /dev/null +++ b/target/docker/files/make_params/make_params @@ -0,0 +1,1100 @@ +#!/usr/bin/env bash + +# make_params 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Angela Oliveira Pisco (author) +# * Robrecht Cannoodt (maintainer, author) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="make_params" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "make_params 0.12.4" + echo "" + echo "Looks for files in a directory and turn it in a params file." + echo "" + echo "Arguments:" + echo " --base_dir" + echo " type: file, required parameter, file must exist" + echo " example: /path/to/dir" + echo " Base directory to search recursively" + echo "" + echo " --pattern" + echo " type: string, required parameter" + echo " example: *.fastq.gz" + echo " An optional regular expression. Only file names which match the regular" + echo " expression will be matched." + echo "" + echo " --n_dirname_drop" + echo " type: integer" + echo " default: 0" + echo " For every matched file, the parent directory will be traversed N times." + echo "" + echo " --n_basename_id" + echo " type: integer" + echo " default: 0" + echo " The unique identifiers will consist of at least N dirnames." + echo "" + echo " --id_name" + echo " type: string" + echo " default: id" + echo " The name for storing the identifier field in the yaml." + echo "" + echo " --path_name" + echo " type: string" + echo " default: path" + echo " The name for storing the path field in the yaml." + echo "" + echo " --group_name" + echo " type: string" + echo " example: param_list" + echo " Top level name for the group of entries." + echo "" + echo " --output" + echo " type: file, required parameter, output, file must exist" + echo " example: params.yaml" + echo " Output YAML file." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM ghcr.io/data-intuitive/randpy:r4.0 + +ENTRYPOINT [] + + +RUN : +LABEL org.opencontainers.image.authors="Angela Oliveira Pisco, Robrecht Cannoodt" +LABEL org.opencontainers.image.description="Companion container for running component files make_params" +LABEL org.opencontainers.image.created="2024-01-31T09:08:34Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-make_params-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "make_params 0.12.4" + exit + ;; + --base_dir) + [ -n "$VIASH_PAR_BASE_DIR" ] && ViashError Bad arguments for option \'--base_dir\': \'$VIASH_PAR_BASE_DIR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BASE_DIR="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --base_dir. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --base_dir=*) + [ -n "$VIASH_PAR_BASE_DIR" ] && ViashError Bad arguments for option \'--base_dir=*\': \'$VIASH_PAR_BASE_DIR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BASE_DIR=$(ViashRemoveFlags "$1") + shift 1 + ;; + --pattern) + [ -n "$VIASH_PAR_PATTERN" ] && ViashError Bad arguments for option \'--pattern\': \'$VIASH_PAR_PATTERN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PATTERN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --pattern. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --pattern=*) + [ -n "$VIASH_PAR_PATTERN" ] && ViashError Bad arguments for option \'--pattern=*\': \'$VIASH_PAR_PATTERN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PATTERN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --n_dirname_drop) + [ -n "$VIASH_PAR_N_DIRNAME_DROP" ] && ViashError Bad arguments for option \'--n_dirname_drop\': \'$VIASH_PAR_N_DIRNAME_DROP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_N_DIRNAME_DROP="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --n_dirname_drop. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --n_dirname_drop=*) + [ -n "$VIASH_PAR_N_DIRNAME_DROP" ] && ViashError Bad arguments for option \'--n_dirname_drop=*\': \'$VIASH_PAR_N_DIRNAME_DROP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_N_DIRNAME_DROP=$(ViashRemoveFlags "$1") + shift 1 + ;; + --n_basename_id) + [ -n "$VIASH_PAR_N_BASENAME_ID" ] && ViashError Bad arguments for option \'--n_basename_id\': \'$VIASH_PAR_N_BASENAME_ID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_N_BASENAME_ID="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --n_basename_id. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --n_basename_id=*) + [ -n "$VIASH_PAR_N_BASENAME_ID" ] && ViashError Bad arguments for option \'--n_basename_id=*\': \'$VIASH_PAR_N_BASENAME_ID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_N_BASENAME_ID=$(ViashRemoveFlags "$1") + shift 1 + ;; + --id_name) + [ -n "$VIASH_PAR_ID_NAME" ] && ViashError Bad arguments for option \'--id_name\': \'$VIASH_PAR_ID_NAME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ID_NAME="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --id_name. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --id_name=*) + [ -n "$VIASH_PAR_ID_NAME" ] && ViashError Bad arguments for option \'--id_name=*\': \'$VIASH_PAR_ID_NAME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ID_NAME=$(ViashRemoveFlags "$1") + shift 1 + ;; + --path_name) + [ -n "$VIASH_PAR_PATH_NAME" ] && ViashError Bad arguments for option \'--path_name\': \'$VIASH_PAR_PATH_NAME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PATH_NAME="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --path_name. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --path_name=*) + [ -n "$VIASH_PAR_PATH_NAME" ] && ViashError Bad arguments for option \'--path_name=*\': \'$VIASH_PAR_PATH_NAME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PATH_NAME=$(ViashRemoveFlags "$1") + shift 1 + ;; + --group_name) + [ -n "$VIASH_PAR_GROUP_NAME" ] && ViashError Bad arguments for option \'--group_name\': \'$VIASH_PAR_GROUP_NAME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GROUP_NAME="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --group_name. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --group_name=*) + [ -n "$VIASH_PAR_GROUP_NAME" ] && ViashError Bad arguments for option \'--group_name=*\': \'$VIASH_PAR_GROUP_NAME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GROUP_NAME=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/files_make_params:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/files_make_params:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/files_make_params:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/files_make_params:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_BASE_DIR+x} ]; then + ViashError '--base_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_PATTERN+x} ]; then + ViashError '--pattern' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_N_DIRNAME_DROP+x} ]; then + VIASH_PAR_N_DIRNAME_DROP="0" +fi +if [ -z ${VIASH_PAR_N_BASENAME_ID+x} ]; then + VIASH_PAR_N_BASENAME_ID="0" +fi +if [ -z ${VIASH_PAR_ID_NAME+x} ]; then + VIASH_PAR_ID_NAME="id" +fi +if [ -z ${VIASH_PAR_PATH_NAME+x} ]; then + VIASH_PAR_PATH_NAME="path" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_BASE_DIR" ] && [ ! -e "$VIASH_PAR_BASE_DIR" ]; then + ViashError "Input file '$VIASH_PAR_BASE_DIR' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_N_DIRNAME_DROP" ]]; then + if ! [[ "$VIASH_PAR_N_DIRNAME_DROP" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--n_dirname_drop' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_N_BASENAME_ID" ]]; then + if ! [[ "$VIASH_PAR_N_BASENAME_ID" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--n_basename_id' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_BASE_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_BASE_DIR")" ) + VIASH_PAR_BASE_DIR=$(ViashAutodetectMount "$VIASH_PAR_BASE_DIR") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/files_make_params:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/files_make_params:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/files_make_params:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-make_params-XXXXXX").R +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +library(dplyr) +library(purrr) + +## VIASH START +# The following code has been auto-generated by Viash. +# treat warnings as errors +.viash_orig_warn <- options(warn = 2) + +par <- list( + "base_dir" = $( if [ ! -z ${VIASH_PAR_BASE_DIR+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_BASE_DIR" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "pattern" = $( if [ ! -z ${VIASH_PAR_PATTERN+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_PATTERN" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "n_dirname_drop" = $( if [ ! -z ${VIASH_PAR_N_DIRNAME_DROP+x} ]; then echo -n "as.integer('"; echo -n "$VIASH_PAR_N_DIRNAME_DROP" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), + "n_basename_id" = $( if [ ! -z ${VIASH_PAR_N_BASENAME_ID+x} ]; then echo -n "as.integer('"; echo -n "$VIASH_PAR_N_BASENAME_ID" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), + "id_name" = $( if [ ! -z ${VIASH_PAR_ID_NAME+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_ID_NAME" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "path_name" = $( if [ ! -z ${VIASH_PAR_PATH_NAME+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_PATH_NAME" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "group_name" = $( if [ ! -z ${VIASH_PAR_GROUP_NAME+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_GROUP_NAME" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "output" = $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_OUTPUT" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ) +) +meta <- list( + "functionality_name" = $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo -n "'"; echo -n "$VIASH_META_FUNCTIONALITY_NAME" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "resources_dir" = $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo -n "'"; echo -n "$VIASH_META_RESOURCES_DIR" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "executable" = $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo -n "'"; echo -n "$VIASH_META_EXECUTABLE" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "config" = $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo -n "'"; echo -n "$VIASH_META_CONFIG" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "temp_dir" = $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo -n "'"; echo -n "$VIASH_META_TEMP_DIR" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "cpus" = $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo -n "as.integer('"; echo -n "$VIASH_META_CPUS" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_b" = $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_B" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_kb" = $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_KB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_mb" = $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_MB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_gb" = $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_GB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_tb" = $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_TB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_pb" = $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_PB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ) +) + + +# restore original warn setting +options(.viash_orig_warn) +rm(.viash_orig_warn) + +## VIASH END + +cat("> Listing files of base dir ", par\$base_dir, "\\n", sep = "") +paths <- list.files( + normalizePath(par\$base_dir), + pattern = par\$pattern, + recursive = TRUE, + full.names = TRUE +) + +cat("> Traversing up ", par\$n_dirname_apply, " times\\n", sep = "") +for (i in seq_len(par\$n_dirname_drop)) { + paths <- dirname(paths) %>% unique() +} + +# removing /viash_automount in case we're inside a docker container +paths <- gsub("^/viash_automount", "", paths) + +cat("> Checking whether basenames are unique\\n") +i <- par\$n_basename_id +maxi <- strsplit(paths, "/") %>% map_int(length) %>% max + +regex <- paste0(".*/(", paste(rep("[^/]+/", i), collapse = ""), "[^/]*)\$") +ids <- gsub("/", "_", gsub(regex, "\\\\1", paths)) + +cat("> Printing first five rows\\n") +print(tibble(id = ids, path = paths) %>% head(5)) +cat("\\n") + +while (i < maxi && any(duplicated(ids))) { + i <- i + 1 + cat("Duplicated ids detected, combining with ", i, " dirnames in an attempt to get unique ids.\\n") + regex <- paste0(".*/(", paste(rep("[^/]+/", i), collapse = ""), "[^/]*)\$") + ids <- gsub("/", "_", gsub(regex, "\\\\1", paths)) + + cat("> Printing first five rows\\n") + print(tibble(id = ids, path = paths) %>% head(5)) + cat("\\n") +} + +cat("> Transforming into list of items\\n") +par_list <- map2( + ids, paths, + function(id, input) { + setNames(list(id, input), c(par\$id_name, par\$path_name)) + } +) + +if (!is.null(par\$group_name)) { + par_list <- setNames(list(par_list), par\$group_name) +} + +cat("> Writing as YAML\\n") +yaml::write_yaml(par_list, par\$output) +VIASHMAIN +Rscript "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_BASE_DIR" ]; then + VIASH_PAR_BASE_DIR=$(ViashStripAutomount "$VIASH_PAR_BASE_DIR") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/filter/delimit_fraction/.config.vsh.yaml b/target/docker/filter/delimit_fraction/.config.vsh.yaml new file mode 100644 index 00000000000..3c64b182903 --- /dev/null +++ b/target/docker/filter/delimit_fraction/.config.vsh.yaml @@ -0,0 +1,241 @@ +functionality: + name: "delimit_fraction" + namespace: "filter" + version: "0.12.4" + authors: + - name: "Dries Schaumont" + roles: + - "author" + - "maintainer" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + argument_groups: + - name: "Inputs" + arguments: + - type: "file" + name: "--input" + description: "Input h5mu file" + info: null + example: + - "input.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--layer" + info: null + example: + - "raw_counts" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_fraction_column" + description: "Name of column from .var dataframe selecting\na column that contains\ + \ floating point values between 0 and 1.\n" + info: null + example: + - "fraction_mitochondrial" + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Outputs" + arguments: + - type: "file" + name: "--output" + description: "Output h5mu file." + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_name_filter" + description: "In which .obs slot to store a boolean array corresponding to which\ + \ observations should be removed." + info: null + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Arguments" + arguments: + - type: "double" + name: "--min_fraction" + description: "Min fraction for an observation to be retained (True in output)." + info: null + default: + - 0.0 + required: false + min: 0.0 + max: 1.0 + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--max_fraction" + description: "Max fraction for an observation to be retained (True in output)." + info: null + default: + - 1.0 + required: false + min: 0.0 + max: 1.0 + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Turns a column containing values between 0 and 1 into a boolean column\ + \ based on thresholds.\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.9-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "singlecpu" + - "lowmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/filter/delimit_fraction/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/filter/delimit_fraction" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/filter/delimit_fraction/delimit_fraction" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/filter/delimit_fraction/delimit_fraction b/target/docker/filter/delimit_fraction/delimit_fraction new file mode 100755 index 00000000000..6c1fde1cd2e --- /dev/null +++ b/target/docker/filter/delimit_fraction/delimit_fraction @@ -0,0 +1,1207 @@ +#!/usr/bin/env bash + +# delimit_fraction 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Dries Schaumont (author, maintainer) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="delimit_fraction" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "delimit_fraction 0.12.4" + echo "" + echo "Turns a column containing values between 0 and 1 into a boolean column based on" + echo "thresholds." + echo "" + echo "Inputs:" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " example: input.h5mu" + echo " Input h5mu file" + echo "" + echo " --modality" + echo " type: string" + echo " default: rna" + echo "" + echo " --layer" + echo " type: string" + echo " example: raw_counts" + echo "" + echo " --obs_fraction_column" + echo " type: string, required parameter" + echo " example: fraction_mitochondrial" + echo " Name of column from .var dataframe selecting" + echo " a column that contains floating point values between 0 and 1." + echo "" + echo "Outputs:" + echo " --output" + echo " type: file, output, file must exist" + echo " example: output.h5mu" + echo " Output h5mu file." + echo "" + echo " --output_compression" + echo " type: string" + echo " example: gzip" + echo " choices: [ gzip, lzf ]" + echo " The compression format to be used on the output h5mu object." + echo "" + echo " --obs_name_filter" + echo " type: string, required parameter" + echo " In which .obs slot to store a boolean array corresponding to which" + echo " observations should be removed." + echo "" + echo "Arguments:" + echo " --min_fraction" + echo " type: double" + echo " default: 0.0" + echo " min: 0.0" + echo " max: 1.0" + echo " Min fraction for an observation to be retained (True in output)." + echo "" + echo " --max_fraction" + echo " type: double" + echo " default: 1.0" + echo " min: 0.0" + echo " max: 1.0" + echo " Max fraction for an observation to be retained (True in output)." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM python:3.9-slim + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" + +LABEL org.opencontainers.image.authors="Dries Schaumont" +LABEL org.opencontainers.image.description="Companion container for running component filter delimit_fraction" +LABEL org.opencontainers.image.created="2024-01-31T09:08:35Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-delimit_fraction-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "delimit_fraction 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --modality) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality=*) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --layer) + [ -n "$VIASH_PAR_LAYER" ] && ViashError Bad arguments for option \'--layer\': \'$VIASH_PAR_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LAYER="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --layer. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --layer=*) + [ -n "$VIASH_PAR_LAYER" ] && ViashError Bad arguments for option \'--layer=*\': \'$VIASH_PAR_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LAYER=$(ViashRemoveFlags "$1") + shift 1 + ;; + --obs_fraction_column) + [ -n "$VIASH_PAR_OBS_FRACTION_COLUMN" ] && ViashError Bad arguments for option \'--obs_fraction_column\': \'$VIASH_PAR_OBS_FRACTION_COLUMN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBS_FRACTION_COLUMN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_fraction_column. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obs_fraction_column=*) + [ -n "$VIASH_PAR_OBS_FRACTION_COLUMN" ] && ViashError Bad arguments for option \'--obs_fraction_column=*\': \'$VIASH_PAR_OBS_FRACTION_COLUMN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBS_FRACTION_COLUMN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --obs_name_filter) + [ -n "$VIASH_PAR_OBS_NAME_FILTER" ] && ViashError Bad arguments for option \'--obs_name_filter\': \'$VIASH_PAR_OBS_NAME_FILTER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBS_NAME_FILTER="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_name_filter. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obs_name_filter=*) + [ -n "$VIASH_PAR_OBS_NAME_FILTER" ] && ViashError Bad arguments for option \'--obs_name_filter=*\': \'$VIASH_PAR_OBS_NAME_FILTER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBS_NAME_FILTER=$(ViashRemoveFlags "$1") + shift 1 + ;; + --min_fraction) + [ -n "$VIASH_PAR_MIN_FRACTION" ] && ViashError Bad arguments for option \'--min_fraction\': \'$VIASH_PAR_MIN_FRACTION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_FRACTION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_fraction. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --min_fraction=*) + [ -n "$VIASH_PAR_MIN_FRACTION" ] && ViashError Bad arguments for option \'--min_fraction=*\': \'$VIASH_PAR_MIN_FRACTION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_FRACTION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --max_fraction) + [ -n "$VIASH_PAR_MAX_FRACTION" ] && ViashError Bad arguments for option \'--max_fraction\': \'$VIASH_PAR_MAX_FRACTION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MAX_FRACTION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --max_fraction. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --max_fraction=*) + [ -n "$VIASH_PAR_MAX_FRACTION" ] && ViashError Bad arguments for option \'--max_fraction=*\': \'$VIASH_PAR_MAX_FRACTION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MAX_FRACTION=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/filter_delimit_fraction:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/filter_delimit_fraction:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/filter_delimit_fraction:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/filter_delimit_fraction:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OBS_FRACTION_COLUMN+x} ]; then + ViashError '--obs_fraction_column' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OBS_NAME_FILTER+x} ]; then + ViashError '--obs_name_filter' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_MODALITY+x} ]; then + VIASH_PAR_MODALITY="rna" +fi +if [ -z ${VIASH_PAR_MIN_FRACTION+x} ]; then + VIASH_PAR_MIN_FRACTION="0.0" +fi +if [ -z ${VIASH_PAR_MAX_FRACTION+x} ]; then + VIASH_PAR_MAX_FRACTION="1.0" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_MIN_FRACTION" ]]; then + if ! [[ "$VIASH_PAR_MIN_FRACTION" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--min_fraction' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi + if command -v bc &> /dev/null; then + if ! [[ `echo $VIASH_PAR_MIN_FRACTION '>=' 0.0 | bc` -eq 1 ]]; then + ViashError '--min_fraction' has be more than or equal to 0.0. Use "--help" to get more information on the parameters. + exit 1 + fi + elif command -v awk &> /dev/null; then + if ! [[ `awk -v n1=$VIASH_PAR_MIN_FRACTION -v n2=0.0 'BEGIN { print (n1 >= n2) ? "1" : "0" }'` -eq 1 ]]; then + ViashError '--min_fraction' has be more than or equal to 0.0. Use "--help" to get more information on the parameters. + exit 1 + fi + else + ViashWarning '--min_fraction' specifies a minimum value but the value was not verified as neither \'bc\' or \`awk\` are present on the system. + fi + if command -v bc &> /dev/null; then + if ! [[ `echo $VIASH_PAR_MIN_FRACTION '<=' 1.0 | bc` -eq 1 ]]; then + ViashError '--min_fraction' has to be less than or equal to 1.0. Use "--help" to get more information on the parameters. + exit 1 + fi + elif command -v awk &> /dev/null; then + if ! [[ `awk -v n1=$VIASH_PAR_MIN_FRACTION -v n2=1.0 'BEGIN { print (n1 <= n2) ? "1" : "0" }'` -eq 1 ]]; then + ViashError '--min_fraction' has be less than or equal to 1.0. Use "--help" to get more information on the parameters. + exit 1 + fi + else + ViashWarning '--min_fraction' specifies a maximum value but the value was not verified as neither \'bc\' or \'awk\' are present on the system. + fi +fi +if [[ -n "$VIASH_PAR_MAX_FRACTION" ]]; then + if ! [[ "$VIASH_PAR_MAX_FRACTION" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--max_fraction' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi + if command -v bc &> /dev/null; then + if ! [[ `echo $VIASH_PAR_MAX_FRACTION '>=' 0.0 | bc` -eq 1 ]]; then + ViashError '--max_fraction' has be more than or equal to 0.0. Use "--help" to get more information on the parameters. + exit 1 + fi + elif command -v awk &> /dev/null; then + if ! [[ `awk -v n1=$VIASH_PAR_MAX_FRACTION -v n2=0.0 'BEGIN { print (n1 >= n2) ? "1" : "0" }'` -eq 1 ]]; then + ViashError '--max_fraction' has be more than or equal to 0.0. Use "--help" to get more information on the parameters. + exit 1 + fi + else + ViashWarning '--max_fraction' specifies a minimum value but the value was not verified as neither \'bc\' or \`awk\` are present on the system. + fi + if command -v bc &> /dev/null; then + if ! [[ `echo $VIASH_PAR_MAX_FRACTION '<=' 1.0 | bc` -eq 1 ]]; then + ViashError '--max_fraction' has to be less than or equal to 1.0. Use "--help" to get more information on the parameters. + exit 1 + fi + elif command -v awk &> /dev/null; then + if ! [[ `awk -v n1=$VIASH_PAR_MAX_FRACTION -v n2=1.0 'BEGIN { print (n1 <= n2) ? "1" : "0" }'` -eq 1 ]]; then + ViashError '--max_fraction' has be less than or equal to 1.0. Use "--help" to get more information on the parameters. + exit 1 + fi + else + ViashWarning '--max_fraction' specifies a maximum value but the value was not verified as neither \'bc\' or \'awk\' are present on the system. + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/filter_delimit_fraction:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/filter_delimit_fraction:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/filter_delimit_fraction:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-delimit_fraction-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' + +import mudata as mu +import numpy as np +import sys +from operator import le, ge +from pandas.api.types import is_float_dtype + + +### VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'layer': $( if [ ! -z ${VIASH_PAR_LAYER+x} ]; then echo "r'${VIASH_PAR_LAYER//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'obs_fraction_column': $( if [ ! -z ${VIASH_PAR_OBS_FRACTION_COLUMN+x} ]; then echo "r'${VIASH_PAR_OBS_FRACTION_COLUMN//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'obs_name_filter': $( if [ ! -z ${VIASH_PAR_OBS_NAME_FILTER+x} ]; then echo "r'${VIASH_PAR_OBS_NAME_FILTER//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'min_fraction': $( if [ ! -z ${VIASH_PAR_MIN_FRACTION+x} ]; then echo "float(r'${VIASH_PAR_MIN_FRACTION//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'max_fraction': $( if [ ! -z ${VIASH_PAR_MAX_FRACTION+x} ]; then echo "float(r'${VIASH_PAR_MAX_FRACTION//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +### VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +logger.info("Reading input data") +mdata = mu.read_h5mu(par["input"]) + +mdata.var_names_make_unique() + +mod = par['modality'] +logger.info("Processing modality %s.", mod) +data = mdata.mod[mod] + +logger.info("\\tUnfiltered data: %s", data) + +logger.info("\\tComputing aggregations.") + +def apply_filter_to_mask(mask, base, filter, comparator): + new_filt = np.ravel(comparator(base, filter)) + num_removed = np.sum(np.invert(new_filt) & mask) + mask &= new_filt + return num_removed, mask + +try: + fraction = data.obs[par['obs_fraction_column']] +except KeyError: + raise ValueError(f"Could not find column '{par['obs_fraction_column']}'") +if not is_float_dtype(fraction): + raise ValueError(f"Column '{par['obs_fraction_column']}' does not contain float datatype.") +if fraction.max() > 1: + raise ValueError(f"Column '{par['obs_fraction_column']}' contains values > 1.") +if fraction.min() < 0: + raise ValueError(f"Column '{par['obs_fraction_column']}' contains values < 0.") + + +# Filter cells +filters = (("min_fraction", fraction, ge, "\\tRemoving %s cells with <%s percentage mitochondrial reads."), + ("max_fraction", fraction, le, "\\tRemoving %s cells with >%s percentage mitochondrial reads."), + ) + +keep_cells = np.repeat(True, data.n_obs) +for filter_name_or_value, base, comparator, message in filters: + try: + filter = par[filter_name_or_value] + except KeyError: + filter = filter_name_or_value + if filter is not None: + num_removed, keep_cells = apply_filter_to_mask(keep_cells, base, filter, comparator) + logger.info(message, num_removed, filter) + +data.obs[par["obs_name_filter"]] = keep_cells + +logger.info("\\tFiltered data: %s", data) +logger.info("Writing output data to %s", par["output"]) +mdata.write_h5mu(par["output"], compression=par["output_compression"]) + +logger.info("Finished") +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/filter/delimit_fraction/setup_logger.py b/target/docker/filter/delimit_fraction/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/docker/filter/delimit_fraction/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/docker/filter/do_filter/.config.vsh.yaml b/target/docker/filter/do_filter/.config.vsh.yaml new file mode 100644 index 00000000000..32536f480c6 --- /dev/null +++ b/target/docker/filter/do_filter/.config.vsh.yaml @@ -0,0 +1,202 @@ +functionality: + name: "do_filter" + namespace: "filter" + version: "0.12.4" + authors: + - name: "Robrecht Cannoodt" + roles: + - "maintainer" + - "contributor" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + arguments: + - type: "file" + name: "--input" + description: "Input h5mu file" + info: null + example: + - "input.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_filter" + description: "Which .obs columns to use to filter the observations by." + info: null + example: + - "filter_with_x" + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--var_filter" + description: "Which .var columns to use to filter the observations by." + info: null + example: + - "filter_with_x" + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + description: "Output h5mu file." + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Remove observations and variables based on specified .obs and .var\ + \ columns.\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.9-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "singlecpu" + - "lowmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/filter/do_filter/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/filter/do_filter" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/filter/do_filter/do_filter" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/filter/do_filter/do_filter b/target/docker/filter/do_filter/do_filter new file mode 100755 index 00000000000..02b8b2f2eb3 --- /dev/null +++ b/target/docker/filter/do_filter/do_filter @@ -0,0 +1,1056 @@ +#!/usr/bin/env bash + +# do_filter 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Robrecht Cannoodt (maintainer, contributor) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="do_filter" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "do_filter 0.12.4" + echo "" + echo "Remove observations and variables based on specified .obs and .var columns." + echo "" + echo "Arguments:" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " example: input.h5mu" + echo " Input h5mu file" + echo "" + echo " --modality" + echo " type: string" + echo " default: rna" + echo "" + echo " --obs_filter" + echo " type: string, multiple values allowed" + echo " example: filter_with_x" + echo " Which .obs columns to use to filter the observations by." + echo "" + echo " --var_filter" + echo " type: string, multiple values allowed" + echo " example: filter_with_x" + echo " Which .var columns to use to filter the observations by." + echo "" + echo " --output" + echo " type: file, output, file must exist" + echo " example: output.h5mu" + echo " Output h5mu file." + echo "" + echo " --output_compression" + echo " type: string" + echo " example: gzip" + echo " choices: [ gzip, lzf ]" + echo " The compression format to be used on the output h5mu object." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM python:3.9-slim + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" + +LABEL org.opencontainers.image.authors="Robrecht Cannoodt" +LABEL org.opencontainers.image.description="Companion container for running component filter do_filter" +LABEL org.opencontainers.image.created="2024-01-31T09:08:34Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-do_filter-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "do_filter 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --modality) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality=*) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --obs_filter) + if [ -z "$VIASH_PAR_OBS_FILTER" ]; then + VIASH_PAR_OBS_FILTER="$2" + else + VIASH_PAR_OBS_FILTER="$VIASH_PAR_OBS_FILTER:""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_filter. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obs_filter=*) + if [ -z "$VIASH_PAR_OBS_FILTER" ]; then + VIASH_PAR_OBS_FILTER=$(ViashRemoveFlags "$1") + else + VIASH_PAR_OBS_FILTER="$VIASH_PAR_OBS_FILTER:"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --var_filter) + if [ -z "$VIASH_PAR_VAR_FILTER" ]; then + VIASH_PAR_VAR_FILTER="$2" + else + VIASH_PAR_VAR_FILTER="$VIASH_PAR_VAR_FILTER:""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --var_filter. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --var_filter=*) + if [ -z "$VIASH_PAR_VAR_FILTER" ]; then + VIASH_PAR_VAR_FILTER=$(ViashRemoveFlags "$1") + else + VIASH_PAR_VAR_FILTER="$VIASH_PAR_VAR_FILTER:"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/filter_do_filter:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/filter_do_filter:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/filter_do_filter:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/filter_do_filter:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_MODALITY+x} ]; then + VIASH_PAR_MODALITY="rna" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/filter_do_filter:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/filter_do_filter:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/filter_do_filter:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-do_filter-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +import mudata as mu +import numpy as np +import sys + +### VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'obs_filter': $( if [ ! -z ${VIASH_PAR_OBS_FILTER+x} ]; then echo "r'${VIASH_PAR_OBS_FILTER//\'/\'\"\'\"r\'}'.split(':')"; else echo None; fi ), + 'var_filter': $( if [ ! -z ${VIASH_PAR_VAR_FILTER+x} ]; then echo "r'${VIASH_PAR_VAR_FILTER//\'/\'\"\'\"r\'}'.split(':')"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +### VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +logger.info("Reading %s", par['input']) +mdata = mu.read_h5mu(par["input"]) + +mod = par["modality"] +logger.info("Processing modality '%s'", mod) + +obs_filt = np.repeat(True, mdata.mod[mod].n_obs) +var_filt = np.repeat(True, mdata.mod[mod].n_vars) + +par["obs_filter"] = par["obs_filter"] if par["obs_filter"] else [] +par["var_filter"] = par["var_filter"] if par["var_filter"] else [] + +for obs_name in par["obs_filter"]: + logger.info("Filtering modality '%s' observations by .obs['%s']", mod, obs_name) + if not obs_name in mdata.mod[mod].obs: + raise ValueError(f".mod[{mod}].obs[{obs_name}] does not exist.") + if obs_name in mdata.mod[mod].obs: + obs_filt &= mdata.mod[mod].obs[obs_name] + +for var_name in par["var_filter"]: + logger.info("Filtering modality '%s' variables by .var['%s']", mod, var_name) + if not var_name in mdata.mod[mod].var: + raise ValueError(f".mod[{mod}].var[{var_name}] does not exist.") + if var_name in mdata.mod[mod].var: + var_filt &= mdata.mod[mod].var[var_name] + +mdata.mod[mod] = mdata.mod[mod][obs_filt, var_filt].copy() + +logger.info("Writing h5mu to file %s.", par["output"]) +mdata.write_h5mu(par["output"], compression=par["output_compression"]) +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/filter/do_filter/setup_logger.py b/target/docker/filter/do_filter/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/docker/filter/do_filter/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/docker/filter/filter_with_counts/.config.vsh.yaml b/target/docker/filter/filter_with_counts/.config.vsh.yaml new file mode 100644 index 00000000000..0f926af6628 --- /dev/null +++ b/target/docker/filter/filter_with_counts/.config.vsh.yaml @@ -0,0 +1,295 @@ +functionality: + name: "filter_with_counts" + namespace: "filter" + version: "0.12.4" + authors: + - name: "Dries De Maeyer" + roles: + - "author" + info: + role: "Core Team Member" + links: + email: "ddemaeyer@gmail.com" + github: "ddemaeyer" + linkedin: "dries-de-maeyer-b46a814" + organizations: + - name: "Janssen Pharmaceuticals" + href: "https://www.janssen.com" + role: "Principal Scientist" + - name: "Robrecht Cannoodt" + roles: + - "maintainer" + - "author" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + argument_groups: + - name: "Inputs" + arguments: + - type: "file" + name: "--input" + description: "Input h5mu file" + info: null + example: + - "input.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--layer" + info: null + example: + - "raw_counts" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Outputs" + arguments: + - type: "file" + name: "--output" + description: "Output h5mu file." + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--do_subset" + description: "Whether to subset before storing the output." + info: null + direction: "input" + dest: "par" + - type: "string" + name: "--obs_name_filter" + description: "In which .obs slot to store a boolean array corresponding to which\ + \ observations should be removed." + info: null + default: + - "filter_with_counts" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--var_name_filter" + description: "In which .var slot to store a boolean array corresponding to which\ + \ variables should be removed." + info: null + default: + - "filter_with_counts" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Arguments" + arguments: + - type: "integer" + name: "--min_counts" + description: "Minimum number of counts captured per cell." + info: null + example: + - 200 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--max_counts" + description: "Maximum number of counts captured per cell." + info: null + example: + - 5000000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--min_genes_per_cell" + description: "Minimum of non-zero values per cell." + info: null + example: + - 200 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--max_genes_per_cell" + description: "Maximum of non-zero values per cell." + info: null + example: + - 1500000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--min_cells_per_gene" + description: "Minimum of non-zero values per gene." + info: null + example: + - 3 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Filter scRNA-seq data based on the primary QC metrics. \nThis is based\ + \ on both the UMI counts, the gene counts \nand the mitochondrial genes (genes\ + \ starting with mt/MT).\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.9-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "singlecpu" + - "lowmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/filter/filter_with_counts/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/filter/filter_with_counts" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/filter/filter_with_counts/filter_with_counts" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/filter/filter_with_counts/filter_with_counts b/target/docker/filter/filter_with_counts/filter_with_counts new file mode 100755 index 00000000000..b51fa813a52 --- /dev/null +++ b/target/docker/filter/filter_with_counts/filter_with_counts @@ -0,0 +1,1241 @@ +#!/usr/bin/env bash + +# filter_with_counts 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Dries De Maeyer (author) +# * Robrecht Cannoodt (maintainer, author) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="filter_with_counts" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "filter_with_counts 0.12.4" + echo "" + echo "Filter scRNA-seq data based on the primary QC metrics." + echo "This is based on both the UMI counts, the gene counts" + echo "and the mitochondrial genes (genes starting with mt/MT)." + echo "" + echo "Inputs:" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " example: input.h5mu" + echo " Input h5mu file" + echo "" + echo " --modality" + echo " type: string" + echo " default: rna" + echo "" + echo " --layer" + echo " type: string" + echo " example: raw_counts" + echo "" + echo "Outputs:" + echo " --output" + echo " type: file, output, file must exist" + echo " example: output.h5mu" + echo " Output h5mu file." + echo "" + echo " --output_compression" + echo " type: string" + echo " example: gzip" + echo " choices: [ gzip, lzf ]" + echo " The compression format to be used on the output h5mu object." + echo "" + echo " --do_subset" + echo " type: boolean_true" + echo " Whether to subset before storing the output." + echo "" + echo " --obs_name_filter" + echo " type: string" + echo " default: filter_with_counts" + echo " In which .obs slot to store a boolean array corresponding to which" + echo " observations should be removed." + echo "" + echo " --var_name_filter" + echo " type: string" + echo " default: filter_with_counts" + echo " In which .var slot to store a boolean array corresponding to which" + echo " variables should be removed." + echo "" + echo "Arguments:" + echo " --min_counts" + echo " type: integer" + echo " example: 200" + echo " Minimum number of counts captured per cell." + echo "" + echo " --max_counts" + echo " type: integer" + echo " example: 5000000" + echo " Maximum number of counts captured per cell." + echo "" + echo " --min_genes_per_cell" + echo " type: integer" + echo " example: 200" + echo " Minimum of non-zero values per cell." + echo "" + echo " --max_genes_per_cell" + echo " type: integer" + echo " example: 1500000" + echo " Maximum of non-zero values per cell." + echo "" + echo " --min_cells_per_gene" + echo " type: integer" + echo " example: 3" + echo " Minimum of non-zero values per gene." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM python:3.9-slim + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" + +LABEL org.opencontainers.image.authors="Dries De Maeyer, Robrecht Cannoodt" +LABEL org.opencontainers.image.description="Companion container for running component filter filter_with_counts" +LABEL org.opencontainers.image.created="2024-01-31T09:08:36Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-filter_with_counts-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "filter_with_counts 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --modality) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality=*) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --layer) + [ -n "$VIASH_PAR_LAYER" ] && ViashError Bad arguments for option \'--layer\': \'$VIASH_PAR_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LAYER="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --layer. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --layer=*) + [ -n "$VIASH_PAR_LAYER" ] && ViashError Bad arguments for option \'--layer=*\': \'$VIASH_PAR_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LAYER=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --do_subset) + [ -n "$VIASH_PAR_DO_SUBSET" ] && ViashError Bad arguments for option \'--do_subset\': \'$VIASH_PAR_DO_SUBSET\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DO_SUBSET=true + shift 1 + ;; + --obs_name_filter) + [ -n "$VIASH_PAR_OBS_NAME_FILTER" ] && ViashError Bad arguments for option \'--obs_name_filter\': \'$VIASH_PAR_OBS_NAME_FILTER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBS_NAME_FILTER="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_name_filter. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obs_name_filter=*) + [ -n "$VIASH_PAR_OBS_NAME_FILTER" ] && ViashError Bad arguments for option \'--obs_name_filter=*\': \'$VIASH_PAR_OBS_NAME_FILTER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBS_NAME_FILTER=$(ViashRemoveFlags "$1") + shift 1 + ;; + --var_name_filter) + [ -n "$VIASH_PAR_VAR_NAME_FILTER" ] && ViashError Bad arguments for option \'--var_name_filter\': \'$VIASH_PAR_VAR_NAME_FILTER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_VAR_NAME_FILTER="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --var_name_filter. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --var_name_filter=*) + [ -n "$VIASH_PAR_VAR_NAME_FILTER" ] && ViashError Bad arguments for option \'--var_name_filter=*\': \'$VIASH_PAR_VAR_NAME_FILTER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_VAR_NAME_FILTER=$(ViashRemoveFlags "$1") + shift 1 + ;; + --min_counts) + [ -n "$VIASH_PAR_MIN_COUNTS" ] && ViashError Bad arguments for option \'--min_counts\': \'$VIASH_PAR_MIN_COUNTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_COUNTS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_counts. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --min_counts=*) + [ -n "$VIASH_PAR_MIN_COUNTS" ] && ViashError Bad arguments for option \'--min_counts=*\': \'$VIASH_PAR_MIN_COUNTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_COUNTS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --max_counts) + [ -n "$VIASH_PAR_MAX_COUNTS" ] && ViashError Bad arguments for option \'--max_counts\': \'$VIASH_PAR_MAX_COUNTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MAX_COUNTS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --max_counts. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --max_counts=*) + [ -n "$VIASH_PAR_MAX_COUNTS" ] && ViashError Bad arguments for option \'--max_counts=*\': \'$VIASH_PAR_MAX_COUNTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MAX_COUNTS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --min_genes_per_cell) + [ -n "$VIASH_PAR_MIN_GENES_PER_CELL" ] && ViashError Bad arguments for option \'--min_genes_per_cell\': \'$VIASH_PAR_MIN_GENES_PER_CELL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_GENES_PER_CELL="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_genes_per_cell. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --min_genes_per_cell=*) + [ -n "$VIASH_PAR_MIN_GENES_PER_CELL" ] && ViashError Bad arguments for option \'--min_genes_per_cell=*\': \'$VIASH_PAR_MIN_GENES_PER_CELL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_GENES_PER_CELL=$(ViashRemoveFlags "$1") + shift 1 + ;; + --max_genes_per_cell) + [ -n "$VIASH_PAR_MAX_GENES_PER_CELL" ] && ViashError Bad arguments for option \'--max_genes_per_cell\': \'$VIASH_PAR_MAX_GENES_PER_CELL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MAX_GENES_PER_CELL="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --max_genes_per_cell. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --max_genes_per_cell=*) + [ -n "$VIASH_PAR_MAX_GENES_PER_CELL" ] && ViashError Bad arguments for option \'--max_genes_per_cell=*\': \'$VIASH_PAR_MAX_GENES_PER_CELL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MAX_GENES_PER_CELL=$(ViashRemoveFlags "$1") + shift 1 + ;; + --min_cells_per_gene) + [ -n "$VIASH_PAR_MIN_CELLS_PER_GENE" ] && ViashError Bad arguments for option \'--min_cells_per_gene\': \'$VIASH_PAR_MIN_CELLS_PER_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_CELLS_PER_GENE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_cells_per_gene. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --min_cells_per_gene=*) + [ -n "$VIASH_PAR_MIN_CELLS_PER_GENE" ] && ViashError Bad arguments for option \'--min_cells_per_gene=*\': \'$VIASH_PAR_MIN_CELLS_PER_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_CELLS_PER_GENE=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/filter_filter_with_counts:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/filter_filter_with_counts:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/filter_filter_with_counts:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/filter_filter_with_counts:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_MODALITY+x} ]; then + VIASH_PAR_MODALITY="rna" +fi +if [ -z ${VIASH_PAR_DO_SUBSET+x} ]; then + VIASH_PAR_DO_SUBSET="false" +fi +if [ -z ${VIASH_PAR_OBS_NAME_FILTER+x} ]; then + VIASH_PAR_OBS_NAME_FILTER="filter_with_counts" +fi +if [ -z ${VIASH_PAR_VAR_NAME_FILTER+x} ]; then + VIASH_PAR_VAR_NAME_FILTER="filter_with_counts" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_DO_SUBSET" ]]; then + if ! [[ "$VIASH_PAR_DO_SUBSET" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--do_subset' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MIN_COUNTS" ]]; then + if ! [[ "$VIASH_PAR_MIN_COUNTS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--min_counts' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MAX_COUNTS" ]]; then + if ! [[ "$VIASH_PAR_MAX_COUNTS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--max_counts' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MIN_GENES_PER_CELL" ]]; then + if ! [[ "$VIASH_PAR_MIN_GENES_PER_CELL" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--min_genes_per_cell' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MAX_GENES_PER_CELL" ]]; then + if ! [[ "$VIASH_PAR_MAX_GENES_PER_CELL" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--max_genes_per_cell' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MIN_CELLS_PER_GENE" ]]; then + if ! [[ "$VIASH_PAR_MIN_CELLS_PER_GENE" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--min_cells_per_gene' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/filter_filter_with_counts:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/filter_filter_with_counts:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/filter_filter_with_counts:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-filter_with_counts-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' + +import mudata as mu +import numpy as np +import sys +from operator import le, ge, gt + +### VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'layer': $( if [ ! -z ${VIASH_PAR_LAYER+x} ]; then echo "r'${VIASH_PAR_LAYER//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'do_subset': $( if [ ! -z ${VIASH_PAR_DO_SUBSET+x} ]; then echo "r'${VIASH_PAR_DO_SUBSET//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), + 'obs_name_filter': $( if [ ! -z ${VIASH_PAR_OBS_NAME_FILTER+x} ]; then echo "r'${VIASH_PAR_OBS_NAME_FILTER//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'var_name_filter': $( if [ ! -z ${VIASH_PAR_VAR_NAME_FILTER+x} ]; then echo "r'${VIASH_PAR_VAR_NAME_FILTER//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'min_counts': $( if [ ! -z ${VIASH_PAR_MIN_COUNTS+x} ]; then echo "int(r'${VIASH_PAR_MIN_COUNTS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'max_counts': $( if [ ! -z ${VIASH_PAR_MAX_COUNTS+x} ]; then echo "int(r'${VIASH_PAR_MAX_COUNTS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'min_genes_per_cell': $( if [ ! -z ${VIASH_PAR_MIN_GENES_PER_CELL+x} ]; then echo "int(r'${VIASH_PAR_MIN_GENES_PER_CELL//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'max_genes_per_cell': $( if [ ! -z ${VIASH_PAR_MAX_GENES_PER_CELL+x} ]; then echo "int(r'${VIASH_PAR_MAX_GENES_PER_CELL//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'min_cells_per_gene': $( if [ ! -z ${VIASH_PAR_MIN_CELLS_PER_GENE+x} ]; then echo "int(r'${VIASH_PAR_MIN_CELLS_PER_GENE//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +### VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +logger.info("Reading input data") +mdata = mu.read_h5mu(par["input"]) + +mdata.var_names_make_unique() + +mod = par['modality'] +logger.info("Processing modality %s.", mod) +data = mdata.mod[mod] + +logger.info("\\tUnfiltered data: %s", data) + +logger.info("\\tComputing aggregations.") +n_counts_per_cell = np.ravel(np.sum(data.X, axis=1)) +n_cells_per_gene = np.sum(data.X > 0, axis=0) +n_genes_per_cell = np.sum(data.X > 0, axis=1) + +def apply_filter_to_mask(mask, base, filter, comparator): + new_filt = np.ravel(comparator(base, filter)) + num_removed = np.sum(np.invert(new_filt) & mask) + mask &= new_filt + return num_removed, mask + +# Filter genes +keep_genes = np.repeat(True, data.n_vars) +if par["min_cells_per_gene"] is not None: + num_removed, keep_genes = apply_filter_to_mask(keep_genes, + n_cells_per_gene, + par['min_cells_per_gene'], + ge) + logger.info("\\tRemoving %s genes with non-zero values in <%s cells.", + num_removed, par['min_cells_per_gene']) + +# Filter cells +filters = (("min_genes_per_cell", n_genes_per_cell, ge, "\\tRemoving %s cells with non-zero values in <%s genes."), + ("max_genes_per_cell", n_genes_per_cell, le, "\\tRemoving %s cells with non-zero values in >%s genes."), + ("min_counts", n_counts_per_cell, ge, "\\tRemoving %s cells with <%s total counts."), + ("max_counts", n_counts_per_cell, le, "\\tRemoving %s cells with >%s total counts."), + (0, np.sum(data[:,keep_genes].X, axis=1), gt, "\\tRemoving %s cells with %s counts")) + +keep_cells = np.repeat(True, data.n_obs) +for filter_name_or_value, base, comparator, message in filters: + try: + filter = par[filter_name_or_value] + except KeyError: + filter = filter_name_or_value + if filter is not None: + num_removed, keep_cells = apply_filter_to_mask(keep_cells, base, filter, comparator) + logger.info(message, num_removed, filter) + +if par["obs_name_filter"] is not None: + data.obs[par["obs_name_filter"]] = keep_cells +if par["var_name_filter"] is not None: + data.var[par["var_name_filter"]] = keep_genes + +if par["do_subset"]: + mdata.mod[mod] = data[keep_cells, keep_genes] + +logger.info("\\tFiltered data: %s", data) +logger.info("Writing output data to %s", par["output"]) +mdata.write_h5mu(par["output"], compression=par["output_compression"]) + +logger.info("Finished") +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/filter/filter_with_counts/setup_logger.py b/target/docker/filter/filter_with_counts/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/docker/filter/filter_with_counts/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/docker/filter/filter_with_hvg/.config.vsh.yaml b/target/docker/filter/filter_with_hvg/.config.vsh.yaml new file mode 100644 index 00000000000..f04dc6f5339 --- /dev/null +++ b/target/docker/filter/filter_with_hvg/.config.vsh.yaml @@ -0,0 +1,352 @@ +functionality: + name: "filter_with_hvg" + namespace: "filter" + version: "0.12.4" + authors: + - name: "Dries De Maeyer" + roles: + - "contributor" + info: + role: "Core Team Member" + links: + email: "ddemaeyer@gmail.com" + github: "ddemaeyer" + linkedin: "dries-de-maeyer-b46a814" + organizations: + - name: "Janssen Pharmaceuticals" + href: "https://www.janssen.com" + role: "Principal Scientist" + - name: "Robrecht Cannoodt" + roles: + - "maintainer" + - "contributor" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + arguments: + - type: "file" + name: "--input" + description: "Input h5mu file" + info: null + example: + - "input.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--layer" + description: "use adata.layers[layer] for expression values instead of adata.X." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + description: "Output h5mu file." + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--var_name_filter" + description: "In which .var slot to store a boolean array corresponding to which\ + \ observations should be filtered out." + info: null + default: + - "filter_with_hvg" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--varm_name" + description: "In which .varm slot to store additional metadata." + info: null + default: + - "hvg" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--do_subset" + description: "Whether to subset before storing the output." + info: null + direction: "input" + dest: "par" + - type: "string" + name: "--flavor" + description: "Choose the flavor for identifying highly variable genes. For the\ + \ dispersion based methods\nin their default workflows, Seurat passes the cutoffs\ + \ whereas Cell Ranger passes n_top_genes.\n" + info: null + default: + - "seurat" + required: false + choices: + - "seurat" + - "cell_ranger" + - "seurat_v3" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--n_top_genes" + description: "Number of highly-variable genes to keep. Mandatory if flavor='seurat_v3'." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--min_mean" + description: "If n_top_genes is defined, this and all other cutoffs for the means\ + \ and the normalized dispersions are ignored. Ignored if flavor='seurat_v3'." + info: null + default: + - 0.0125 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--max_mean" + description: "If n_top_genes is defined, this and all other cutoffs for the means\ + \ and the normalized dispersions are ignored. Ignored if flavor='seurat_v3'." + info: null + default: + - 3.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--min_disp" + description: "If n_top_genes is defined, this and all other cutoffs for the means\ + \ and the normalized dispersions are ignored. Ignored if flavor='seurat_v3'." + info: null + default: + - 0.5 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--max_disp" + description: "If n_top_genes is defined, this and all other cutoffs for the means\ + \ and the normalized dispersions are ignored. Ignored if flavor='seurat_v3'.\ + \ Default is +inf." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--span" + description: "The fraction of the data (cells) used when estimating the variance\ + \ in the loess model fit if flavor='seurat_v3'." + info: null + default: + - 0.3 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--n_bins" + description: "Number of bins for binning the mean gene expression. Normalization\ + \ is done with respect to each bin. If just a single gene falls into a bin,\ + \ the normalized dispersion is artificially set to 1." + info: null + default: + - 20 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_batch_key" + description: "If specified, highly-variable genes are selected within each batch\ + \ separately and merged. This simple \nprocess avoids the selection of batch-specific\ + \ genes and acts as a lightweight batch correction method. \nFor all flavors,\ + \ genes are first sorted by how many batches they are a HVG. For dispersion-based\ + \ flavors \nties are broken by normalized dispersion. If flavor = 'seurat_v3',\ + \ ties are broken by the median (across\nbatches) rank based on within-batch\ + \ normalized variance.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Annotate highly variable genes [Satija15] [Zheng17] [Stuart19].\n\n\ + Expects logarithmized data, except when flavor='seurat_v3' in which count data\ + \ is expected.\n\nDepending on flavor, this reproduces the R-implementations of\ + \ Seurat [Satija15], Cell Ranger [Zheng17], and Seurat v3 [Stuart19].\n\nFor the\ + \ dispersion-based methods ([Satija15] and [Zheng17]), the normalized dispersion\ + \ is obtained by scaling with the mean and standard deviation of the dispersions\ + \ for genes falling into a given bin for mean expression of genes. This means\ + \ that for each bin of mean expression, highly variable genes are selected.\n\n\ + For [Stuart19], a normalized variance for each gene is computed. First, the data\ + \ are standardized (i.e., z-score normalization per feature) with a regularized\ + \ standard deviation. Next, the normalized variance is computed as the variance\ + \ of each gene after the transformation. Genes are ranked by the normalized variance.\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.9" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "scanpy~=1.9.5" + - "scikit-misc" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "singlecpu" + - "lowmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/filter/filter_with_hvg/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/filter/filter_with_hvg" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/filter/filter_with_hvg/filter_with_hvg" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/filter/filter_with_hvg/filter_with_hvg b/target/docker/filter/filter_with_hvg/filter_with_hvg new file mode 100755 index 00000000000..04341153a0f --- /dev/null +++ b/target/docker/filter/filter_with_hvg/filter_with_hvg @@ -0,0 +1,1407 @@ +#!/usr/bin/env bash + +# filter_with_hvg 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Dries De Maeyer (contributor) +# * Robrecht Cannoodt (maintainer, contributor) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="filter_with_hvg" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "filter_with_hvg 0.12.4" + echo "" + echo "Annotate highly variable genes [Satija15] [Zheng17] [Stuart19]." + echo "" + echo "Expects logarithmized data, except when flavor='seurat_v3' in which count data" + echo "is expected." + echo "" + echo "Depending on flavor, this reproduces the R-implementations of Seurat [Satija15]," + echo "Cell Ranger [Zheng17], and Seurat v3 [Stuart19]." + echo "" + echo "For the dispersion-based methods ([Satija15] and [Zheng17]), the normalized" + echo "dispersion is obtained by scaling with the mean and standard deviation of the" + echo "dispersions for genes falling into a given bin for mean expression of genes." + echo "This means that for each bin of mean expression, highly variable genes are" + echo "selected." + echo "" + echo "For [Stuart19], a normalized variance for each gene is computed. First, the data" + echo "are standardized (i.e., z-score normalization per feature) with a regularized" + echo "standard deviation. Next, the normalized variance is computed as the variance of" + echo "each gene after the transformation. Genes are ranked by the normalized variance." + echo "" + echo "Arguments:" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " example: input.h5mu" + echo " Input h5mu file" + echo "" + echo " --modality" + echo " type: string" + echo " default: rna" + echo "" + echo " --layer" + echo " type: string" + echo " use adata.layers[layer] for expression values instead of adata.X." + echo "" + echo " --output" + echo " type: file, output, file must exist" + echo " example: output.h5mu" + echo " Output h5mu file." + echo "" + echo " --output_compression" + echo " type: string" + echo " example: gzip" + echo " choices: [ gzip, lzf ]" + echo " The compression format to be used on the output h5mu object." + echo "" + echo " --var_name_filter" + echo " type: string" + echo " default: filter_with_hvg" + echo " In which .var slot to store a boolean array corresponding to which" + echo " observations should be filtered out." + echo "" + echo " --varm_name" + echo " type: string" + echo " default: hvg" + echo " In which .varm slot to store additional metadata." + echo "" + echo " --do_subset" + echo " type: boolean_true" + echo " Whether to subset before storing the output." + echo "" + echo " --flavor" + echo " type: string" + echo " default: seurat" + echo " choices: [ seurat, cell_ranger, seurat_v3 ]" + echo " Choose the flavor for identifying highly variable genes. For the" + echo " dispersion based methods" + echo " in their default workflows, Seurat passes the cutoffs whereas Cell" + echo " Ranger passes n_top_genes." + echo "" + echo " --n_top_genes" + echo " type: integer" + echo " Number of highly-variable genes to keep. Mandatory if" + echo " flavor='seurat_v3'." + echo "" + echo " --min_mean" + echo " type: double" + echo " default: 0.0125" + echo " If n_top_genes is defined, this and all other cutoffs for the means and" + echo " the normalized dispersions are ignored. Ignored if flavor='seurat_v3'." + echo "" + echo " --max_mean" + echo " type: double" + echo " default: 3.0" + echo " If n_top_genes is defined, this and all other cutoffs for the means and" + echo " the normalized dispersions are ignored. Ignored if flavor='seurat_v3'." + echo "" + echo " --min_disp" + echo " type: double" + echo " default: 0.5" + echo " If n_top_genes is defined, this and all other cutoffs for the means and" + echo " the normalized dispersions are ignored. Ignored if flavor='seurat_v3'." + echo "" + echo " --max_disp" + echo " type: double" + echo " If n_top_genes is defined, this and all other cutoffs for the means and" + echo " the normalized dispersions are ignored. Ignored if flavor='seurat_v3'." + echo " Default is +inf." + echo "" + echo " --span" + echo " type: double" + echo " default: 0.3" + echo " The fraction of the data (cells) used when estimating the variance in" + echo " the loess model fit if flavor='seurat_v3'." + echo "" + echo " --n_bins" + echo " type: integer" + echo " default: 20" + echo " Number of bins for binning the mean gene expression. Normalization is" + echo " done with respect to each bin. If just a single gene falls into a bin," + echo " the normalized dispersion is artificially set to 1." + echo "" + echo " --obs_batch_key" + echo " type: string" + echo " If specified, highly-variable genes are selected within each batch" + echo " separately and merged. This simple" + echo " process avoids the selection of batch-specific genes and acts as a" + echo " lightweight batch correction method." + echo " For all flavors, genes are first sorted by how many batches they are a" + echo " HVG. For dispersion-based flavors" + echo " ties are broken by normalized dispersion. If flavor = 'seurat_v3', ties" + echo " are broken by the median (across" + echo " batches) rank based on within-batch normalized variance." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM python:3.9 + +ENTRYPOINT [] + + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "scanpy~=1.9.5" "scikit-misc" + +LABEL org.opencontainers.image.authors="Dries De Maeyer, Robrecht Cannoodt" +LABEL org.opencontainers.image.description="Companion container for running component filter filter_with_hvg" +LABEL org.opencontainers.image.created="2024-01-31T09:08:36Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-filter_with_hvg-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "filter_with_hvg 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --modality) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality=*) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --layer) + [ -n "$VIASH_PAR_LAYER" ] && ViashError Bad arguments for option \'--layer\': \'$VIASH_PAR_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LAYER="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --layer. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --layer=*) + [ -n "$VIASH_PAR_LAYER" ] && ViashError Bad arguments for option \'--layer=*\': \'$VIASH_PAR_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LAYER=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --var_name_filter) + [ -n "$VIASH_PAR_VAR_NAME_FILTER" ] && ViashError Bad arguments for option \'--var_name_filter\': \'$VIASH_PAR_VAR_NAME_FILTER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_VAR_NAME_FILTER="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --var_name_filter. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --var_name_filter=*) + [ -n "$VIASH_PAR_VAR_NAME_FILTER" ] && ViashError Bad arguments for option \'--var_name_filter=*\': \'$VIASH_PAR_VAR_NAME_FILTER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_VAR_NAME_FILTER=$(ViashRemoveFlags "$1") + shift 1 + ;; + --varm_name) + [ -n "$VIASH_PAR_VARM_NAME" ] && ViashError Bad arguments for option \'--varm_name\': \'$VIASH_PAR_VARM_NAME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_VARM_NAME="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --varm_name. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --varm_name=*) + [ -n "$VIASH_PAR_VARM_NAME" ] && ViashError Bad arguments for option \'--varm_name=*\': \'$VIASH_PAR_VARM_NAME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_VARM_NAME=$(ViashRemoveFlags "$1") + shift 1 + ;; + --do_subset) + [ -n "$VIASH_PAR_DO_SUBSET" ] && ViashError Bad arguments for option \'--do_subset\': \'$VIASH_PAR_DO_SUBSET\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DO_SUBSET=true + shift 1 + ;; + --flavor) + [ -n "$VIASH_PAR_FLAVOR" ] && ViashError Bad arguments for option \'--flavor\': \'$VIASH_PAR_FLAVOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FLAVOR="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --flavor. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --flavor=*) + [ -n "$VIASH_PAR_FLAVOR" ] && ViashError Bad arguments for option \'--flavor=*\': \'$VIASH_PAR_FLAVOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FLAVOR=$(ViashRemoveFlags "$1") + shift 1 + ;; + --n_top_genes) + [ -n "$VIASH_PAR_N_TOP_GENES" ] && ViashError Bad arguments for option \'--n_top_genes\': \'$VIASH_PAR_N_TOP_GENES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_N_TOP_GENES="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --n_top_genes. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --n_top_genes=*) + [ -n "$VIASH_PAR_N_TOP_GENES" ] && ViashError Bad arguments for option \'--n_top_genes=*\': \'$VIASH_PAR_N_TOP_GENES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_N_TOP_GENES=$(ViashRemoveFlags "$1") + shift 1 + ;; + --min_mean) + [ -n "$VIASH_PAR_MIN_MEAN" ] && ViashError Bad arguments for option \'--min_mean\': \'$VIASH_PAR_MIN_MEAN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_MEAN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_mean. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --min_mean=*) + [ -n "$VIASH_PAR_MIN_MEAN" ] && ViashError Bad arguments for option \'--min_mean=*\': \'$VIASH_PAR_MIN_MEAN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_MEAN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --max_mean) + [ -n "$VIASH_PAR_MAX_MEAN" ] && ViashError Bad arguments for option \'--max_mean\': \'$VIASH_PAR_MAX_MEAN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MAX_MEAN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --max_mean. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --max_mean=*) + [ -n "$VIASH_PAR_MAX_MEAN" ] && ViashError Bad arguments for option \'--max_mean=*\': \'$VIASH_PAR_MAX_MEAN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MAX_MEAN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --min_disp) + [ -n "$VIASH_PAR_MIN_DISP" ] && ViashError Bad arguments for option \'--min_disp\': \'$VIASH_PAR_MIN_DISP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_DISP="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_disp. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --min_disp=*) + [ -n "$VIASH_PAR_MIN_DISP" ] && ViashError Bad arguments for option \'--min_disp=*\': \'$VIASH_PAR_MIN_DISP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_DISP=$(ViashRemoveFlags "$1") + shift 1 + ;; + --max_disp) + [ -n "$VIASH_PAR_MAX_DISP" ] && ViashError Bad arguments for option \'--max_disp\': \'$VIASH_PAR_MAX_DISP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MAX_DISP="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --max_disp. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --max_disp=*) + [ -n "$VIASH_PAR_MAX_DISP" ] && ViashError Bad arguments for option \'--max_disp=*\': \'$VIASH_PAR_MAX_DISP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MAX_DISP=$(ViashRemoveFlags "$1") + shift 1 + ;; + --span) + [ -n "$VIASH_PAR_SPAN" ] && ViashError Bad arguments for option \'--span\': \'$VIASH_PAR_SPAN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SPAN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --span. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --span=*) + [ -n "$VIASH_PAR_SPAN" ] && ViashError Bad arguments for option \'--span=*\': \'$VIASH_PAR_SPAN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SPAN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --n_bins) + [ -n "$VIASH_PAR_N_BINS" ] && ViashError Bad arguments for option \'--n_bins\': \'$VIASH_PAR_N_BINS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_N_BINS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --n_bins. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --n_bins=*) + [ -n "$VIASH_PAR_N_BINS" ] && ViashError Bad arguments for option \'--n_bins=*\': \'$VIASH_PAR_N_BINS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_N_BINS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --obs_batch_key) + [ -n "$VIASH_PAR_OBS_BATCH_KEY" ] && ViashError Bad arguments for option \'--obs_batch_key\': \'$VIASH_PAR_OBS_BATCH_KEY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBS_BATCH_KEY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_batch_key. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obs_batch_key=*) + [ -n "$VIASH_PAR_OBS_BATCH_KEY" ] && ViashError Bad arguments for option \'--obs_batch_key=*\': \'$VIASH_PAR_OBS_BATCH_KEY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBS_BATCH_KEY=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/filter_filter_with_hvg:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/filter_filter_with_hvg:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/filter_filter_with_hvg:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/filter_filter_with_hvg:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_MODALITY+x} ]; then + VIASH_PAR_MODALITY="rna" +fi +if [ -z ${VIASH_PAR_VAR_NAME_FILTER+x} ]; then + VIASH_PAR_VAR_NAME_FILTER="filter_with_hvg" +fi +if [ -z ${VIASH_PAR_VARM_NAME+x} ]; then + VIASH_PAR_VARM_NAME="hvg" +fi +if [ -z ${VIASH_PAR_DO_SUBSET+x} ]; then + VIASH_PAR_DO_SUBSET="false" +fi +if [ -z ${VIASH_PAR_FLAVOR+x} ]; then + VIASH_PAR_FLAVOR="seurat" +fi +if [ -z ${VIASH_PAR_MIN_MEAN+x} ]; then + VIASH_PAR_MIN_MEAN="0.0125" +fi +if [ -z ${VIASH_PAR_MAX_MEAN+x} ]; then + VIASH_PAR_MAX_MEAN="3.0" +fi +if [ -z ${VIASH_PAR_MIN_DISP+x} ]; then + VIASH_PAR_MIN_DISP="0.5" +fi +if [ -z ${VIASH_PAR_SPAN+x} ]; then + VIASH_PAR_SPAN="0.3" +fi +if [ -z ${VIASH_PAR_N_BINS+x} ]; then + VIASH_PAR_N_BINS="20" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_DO_SUBSET" ]]; then + if ! [[ "$VIASH_PAR_DO_SUBSET" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--do_subset' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_N_TOP_GENES" ]]; then + if ! [[ "$VIASH_PAR_N_TOP_GENES" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--n_top_genes' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MIN_MEAN" ]]; then + if ! [[ "$VIASH_PAR_MIN_MEAN" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--min_mean' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MAX_MEAN" ]]; then + if ! [[ "$VIASH_PAR_MAX_MEAN" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--max_mean' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MIN_DISP" ]]; then + if ! [[ "$VIASH_PAR_MIN_DISP" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--min_disp' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MAX_DISP" ]]; then + if ! [[ "$VIASH_PAR_MAX_DISP" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--max_disp' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SPAN" ]]; then + if ! [[ "$VIASH_PAR_SPAN" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--span' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_N_BINS" ]]; then + if ! [[ "$VIASH_PAR_N_BINS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--n_bins' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +if [ ! -z "$VIASH_PAR_FLAVOR" ]; then + VIASH_PAR_FLAVOR_CHOICES=("seurat:cell_ranger:seurat_v3") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_FLAVOR_CHOICES[*]}:" =~ ":$VIASH_PAR_FLAVOR:" ]]; then + ViashError '--flavor' specified value of \'$VIASH_PAR_FLAVOR\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/filter_filter_with_hvg:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/filter_filter_with_hvg:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/filter_filter_with_hvg:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-filter_with_hvg-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +import scanpy as sc +import mudata as mu +import numpy as np +import sys +import re + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'layer': $( if [ ! -z ${VIASH_PAR_LAYER+x} ]; then echo "r'${VIASH_PAR_LAYER//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'var_name_filter': $( if [ ! -z ${VIASH_PAR_VAR_NAME_FILTER+x} ]; then echo "r'${VIASH_PAR_VAR_NAME_FILTER//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'varm_name': $( if [ ! -z ${VIASH_PAR_VARM_NAME+x} ]; then echo "r'${VIASH_PAR_VARM_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'do_subset': $( if [ ! -z ${VIASH_PAR_DO_SUBSET+x} ]; then echo "r'${VIASH_PAR_DO_SUBSET//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), + 'flavor': $( if [ ! -z ${VIASH_PAR_FLAVOR+x} ]; then echo "r'${VIASH_PAR_FLAVOR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'n_top_genes': $( if [ ! -z ${VIASH_PAR_N_TOP_GENES+x} ]; then echo "int(r'${VIASH_PAR_N_TOP_GENES//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'min_mean': $( if [ ! -z ${VIASH_PAR_MIN_MEAN+x} ]; then echo "float(r'${VIASH_PAR_MIN_MEAN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'max_mean': $( if [ ! -z ${VIASH_PAR_MAX_MEAN+x} ]; then echo "float(r'${VIASH_PAR_MAX_MEAN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'min_disp': $( if [ ! -z ${VIASH_PAR_MIN_DISP+x} ]; then echo "float(r'${VIASH_PAR_MIN_DISP//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'max_disp': $( if [ ! -z ${VIASH_PAR_MAX_DISP+x} ]; then echo "float(r'${VIASH_PAR_MAX_DISP//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'span': $( if [ ! -z ${VIASH_PAR_SPAN+x} ]; then echo "float(r'${VIASH_PAR_SPAN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'n_bins': $( if [ ! -z ${VIASH_PAR_N_BINS+x} ]; then echo "int(r'${VIASH_PAR_N_BINS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'obs_batch_key': $( if [ ! -z ${VIASH_PAR_OBS_BATCH_KEY+x} ]; then echo "r'${VIASH_PAR_OBS_BATCH_KEY//\'/\'\"\'\"r\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +## VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +mdata = mu.read_h5mu(par["input"]) +mdata.var_names_make_unique() + +mod = par['modality'] +logger.info(f"Processing modality '%s'", mod) +data = mdata.mod[mod] + +# Workaround for issue +# https://github.com/scverse/scanpy/issues/2239 +# https://github.com/scverse/scanpy/issues/2181 +if par['flavor'] != "seurat_v3": + # This component requires log normalized data when flavor is not seurat_v3 + # We assume that the data is correctly normalized but scanpy will look at + # .uns to check the transformations performed on the data. + # To prevent scanpy from automatically tranforming the counts when they are + # already transformed, we set the appropriate values to .uns. + if 'log1p' not in data.uns: + logger.warning("When flavor is not set to 'seurat_v3', " + "the input data for this component must be log-transformed. " + "However, the 'log1p' dictionairy in .uns has not been set. " + "This is fine if you did not log transform your data with scanpy." + "Otherwise, please check if you are providing log transformed " + "data using --layer.") + data.uns['log1p'] = {'base': None} + elif 'log1p' in data.uns and 'base' not in data.uns['log1p']: + data.uns['log1p']['base'] = None + +logger.info("\\tUnfiltered data: %s", data) + +logger.info("\\tComputing hvg") +# construct arguments +hvg_args = { + 'adata': data, + 'n_top_genes': par["n_top_genes"], + 'min_mean': par["min_mean"], + 'max_mean': par["max_mean"], + 'min_disp': par["min_disp"], + 'span': par["span"], + 'n_bins': par["n_bins"], + 'flavor': par["flavor"], + 'subset': False, + 'inplace': False, + 'layer': par['layer'], +} + +optional_parameters = { + "max_disp": "max_disp", + "obs_batch_key": "batch_key", + "n_top_genes": "n_top_genes" +} +# only add parameter if it's passed +for par_name, dest_name in optional_parameters.items(): + if par.get(par_name): + hvg_args[dest_name] = par[par_name] + +# scanpy does not do this check, although it is stated in the documentation +if par['flavor'] == "seurat_v3" and not par['n_top_genes']: + raise ValueError("When flavor is set to 'seurat_v3', you are required to set 'n_top_genes'.") + +if par["layer"] and not par['layer'] in data.layers: + raise ValueError(f"Layer '{par['layer']}' not found in layers for modality '{mod}'. " + f"Found layers are: {','.join(data.layers)}") +# call function +try: + out = sc.pp.highly_variable_genes(**hvg_args) + if par['obs_batch_key'] is not None: + assert (out.index == data.var.index).all(), "Expected output index values to be equivalent to the input index" +except ValueError as err: + if str(err) == "cannot specify integer \`bins\` when input data contains infinity": + err.args = ("Cannot specify integer \`bins\` when input data contains infinity. " + "Perhaps input data has not been log normalized?",) + if re.search("Bin edges must be unique:", str(err)): + raise RuntimeError("Scanpy failed to calculate hvg. The error " + "returned by scanpy (see above) could be the " + "result from trying to use this component on unfiltered data.") from err + raise err + +out.index = data.var.index +logger.info("\\tStoring output into .var") +if par.get("var_name_filter", None) is not None: + data.var[par["var_name_filter"]] = out["highly_variable"] + +if par.get("varm_name", None) is not None and 'mean_bin' in out: + # drop mean_bin as mudata/anndata doesn't support tuples + data.varm[par["varm_name"]] = out.drop("mean_bin", axis=1) + +if par["do_subset"]: + keep_feats = np.ravel(data.var[par["var_name_filter"]]) + mdata.mod[mod] = data[:,keep_feats] + +logger.info("Writing h5mu to file") +mdata.write_h5mu(par["output"], compression=par["output_compression"]) +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/filter/filter_with_hvg/setup_logger.py b/target/docker/filter/filter_with_hvg/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/docker/filter/filter_with_hvg/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/docker/filter/filter_with_scrublet/.config.vsh.yaml b/target/docker/filter/filter_with_scrublet/.config.vsh.yaml new file mode 100644 index 00000000000..6e96116696f --- /dev/null +++ b/target/docker/filter/filter_with_scrublet/.config.vsh.yaml @@ -0,0 +1,304 @@ +functionality: + name: "filter_with_scrublet" + namespace: "filter" + version: "0.12.4" + authors: + - name: "Dries De Maeyer" + roles: + - "contributor" + info: + role: "Core Team Member" + links: + email: "ddemaeyer@gmail.com" + github: "ddemaeyer" + linkedin: "dries-de-maeyer-b46a814" + organizations: + - name: "Janssen Pharmaceuticals" + href: "https://www.janssen.com" + role: "Principal Scientist" + - name: "Robrecht Cannoodt" + roles: + - "maintainer" + - "contributor" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + arguments: + - type: "file" + name: "--input" + description: "Input h5mu file" + info: null + example: + - "input.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + description: "Output h5mu file." + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_name_filter" + description: "In which .obs slot to store a boolean array corresponding to which\ + \ observations should be filtered out." + info: null + default: + - "filter_with_scrublet" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--do_subset" + description: "Whether to subset before storing the output." + info: null + direction: "input" + dest: "par" + - type: "string" + name: "--obs_name_doublet_score" + description: "Name of the doublet scores column in the obs slot of the returned\ + \ object." + info: null + default: + - "scrublet_doublet_score" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--min_counts" + description: "The number of minimal UMI counts per cell that have to be present\ + \ for initial cell detection." + info: null + default: + - 2 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--min_cells" + description: "The number of cells in which UMIs for a gene were detected." + info: null + default: + - 3 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--min_gene_variablity_percent" + description: "Used for gene filtering prior to PCA. Keep the most highly variable\ + \ genes (in the top min_gene_variability_pctl percentile), as measured by the\ + \ v-statistic [Klein et al., Cell 2015]." + info: null + default: + - 85.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--num_pca_components" + description: "Number of principal components to use during PCA dimensionality\ + \ reduction." + info: null + default: + - 30 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--distance_metric" + description: "The distance metric used for computing similarities." + info: null + default: + - "euclidean" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--allow_automatic_threshold_detection_fail" + description: "When scrublet fails to automatically determine the double score\ + \ threshold, \nallow the component to continue and set the output columns to\ + \ NA.\n" + info: null + direction: "input" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Doublet detection using the Scrublet method (Wolock, Lopez and Klein,\ + \ 2019).\nThe method tests for potential doublets by using the expression profiles\ + \ of\ncells to generate synthetic potential doubles which are tested against cells.\ + \ \nThe method returns a \"doublet score\" on which it calls for potential doublets.\n\ + \nFor the source code please visit https://github.com/AllonKleinLab/scrublet.\n\ + \nFor 10x we expect the doublet rates to be:\n Multiplet Rate (%) - # of Cells\ + \ Loaded - # of Cells Recovered\n ~0.4% ~800 ~500\n ~0.8% ~1,600 ~1,000\n ~1.6%\ + \ ~3,200 ~2,000\n ~2.3% ~4,800 ~3,000\n ~3.1% ~6,400 ~4,000\n ~3.9% ~8,000\ + \ ~5,000\n ~4.6% ~9,600 ~6,000\n ~5.4% ~11,200 ~7,000\n ~6.1% ~12,800 ~8,000\n\ + \ ~6.9% ~14,400 ~9,000\n ~7.6% ~16,000 ~10,000\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + run_args: + - "--env NUMBA_CACHE_DIR=/tmp" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + - "build-essential" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "scanpy~=1.9.5" + - "scrublet" + - "annoy==1.16.3" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highcpu" + - "midmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/filter/filter_with_scrublet/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/filter/filter_with_scrublet" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/filter/filter_with_scrublet/filter_with_scrublet" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/filter/filter_with_scrublet/filter_with_scrublet b/target/docker/filter/filter_with_scrublet/filter_with_scrublet new file mode 100755 index 00000000000..bc86e9f391a --- /dev/null +++ b/target/docker/filter/filter_with_scrublet/filter_with_scrublet @@ -0,0 +1,1260 @@ +#!/usr/bin/env bash + +# filter_with_scrublet 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Dries De Maeyer (contributor) +# * Robrecht Cannoodt (maintainer, contributor) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="filter_with_scrublet" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "filter_with_scrublet 0.12.4" + echo "" + echo "Doublet detection using the Scrublet method (Wolock, Lopez and Klein, 2019)." + echo "The method tests for potential doublets by using the expression profiles of" + echo "cells to generate synthetic potential doubles which are tested against cells." + echo "The method returns a \"doublet score\" on which it calls for potential doublets." + echo "" + echo "For the source code please visit https://github.com/AllonKleinLab/scrublet." + echo "" + echo "For 10x we expect the doublet rates to be:" + echo " Multiplet Rate (%) - # of Cells Loaded - # of Cells Recovered" + echo " ~0.4% ~800 ~500" + echo " ~0.8% ~1,600 ~1,000" + echo " ~1.6% ~3,200 ~2,000" + echo " ~2.3% ~4,800 ~3,000" + echo " ~3.1% ~6,400 ~4,000" + echo " ~3.9% ~8,000 ~5,000" + echo " ~4.6% ~9,600 ~6,000" + echo " ~5.4% ~11,200 ~7,000" + echo " ~6.1% ~12,800 ~8,000" + echo " ~6.9% ~14,400 ~9,000" + echo " ~7.6% ~16,000 ~10,000" + echo "" + echo "Arguments:" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " example: input.h5mu" + echo " Input h5mu file" + echo "" + echo " --modality" + echo " type: string" + echo " default: rna" + echo "" + echo " --output" + echo " type: file, output, file must exist" + echo " example: output.h5mu" + echo " Output h5mu file." + echo "" + echo " --output_compression" + echo " type: string" + echo " example: gzip" + echo " choices: [ gzip, lzf ]" + echo " The compression format to be used on the output h5mu object." + echo "" + echo " --obs_name_filter" + echo " type: string" + echo " default: filter_with_scrublet" + echo " In which .obs slot to store a boolean array corresponding to which" + echo " observations should be filtered out." + echo "" + echo " --do_subset" + echo " type: boolean_true" + echo " Whether to subset before storing the output." + echo "" + echo " --obs_name_doublet_score" + echo " type: string" + echo " default: scrublet_doublet_score" + echo " Name of the doublet scores column in the obs slot of the returned" + echo " object." + echo "" + echo " --min_counts" + echo " type: integer" + echo " default: 2" + echo " The number of minimal UMI counts per cell that have to be present for" + echo " initial cell detection." + echo "" + echo " --min_cells" + echo " type: integer" + echo " default: 3" + echo " The number of cells in which UMIs for a gene were detected." + echo "" + echo " --min_gene_variablity_percent" + echo " type: double" + echo " default: 85.0" + echo " Used for gene filtering prior to PCA. Keep the most highly variable" + echo " genes (in the top min_gene_variability_pctl percentile), as measured by" + echo " the v-statistic [Klein et al., Cell 2015]." + echo "" + echo " --num_pca_components" + echo " type: integer" + echo " default: 30" + echo " Number of principal components to use during PCA dimensionality" + echo " reduction." + echo "" + echo " --distance_metric" + echo " type: string" + echo " default: euclidean" + echo " The distance metric used for computing similarities." + echo "" + echo " --allow_automatic_threshold_detection_fail" + echo " type: boolean_true" + echo " When scrublet fails to automatically determine the double score" + echo " threshold," + echo " allow the component to continue and set the output columns to NA." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM python:3.10-slim + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y procps build-essential && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "scanpy~=1.9.5" "scrublet" "annoy==1.16.3" + +LABEL org.opencontainers.image.authors="Dries De Maeyer, Robrecht Cannoodt" +LABEL org.opencontainers.image.description="Companion container for running component filter filter_with_scrublet" +LABEL org.opencontainers.image.created="2024-01-31T09:08:36Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-filter_with_scrublet-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "filter_with_scrublet 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --modality) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality=*) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --obs_name_filter) + [ -n "$VIASH_PAR_OBS_NAME_FILTER" ] && ViashError Bad arguments for option \'--obs_name_filter\': \'$VIASH_PAR_OBS_NAME_FILTER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBS_NAME_FILTER="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_name_filter. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obs_name_filter=*) + [ -n "$VIASH_PAR_OBS_NAME_FILTER" ] && ViashError Bad arguments for option \'--obs_name_filter=*\': \'$VIASH_PAR_OBS_NAME_FILTER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBS_NAME_FILTER=$(ViashRemoveFlags "$1") + shift 1 + ;; + --do_subset) + [ -n "$VIASH_PAR_DO_SUBSET" ] && ViashError Bad arguments for option \'--do_subset\': \'$VIASH_PAR_DO_SUBSET\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DO_SUBSET=true + shift 1 + ;; + --obs_name_doublet_score) + [ -n "$VIASH_PAR_OBS_NAME_DOUBLET_SCORE" ] && ViashError Bad arguments for option \'--obs_name_doublet_score\': \'$VIASH_PAR_OBS_NAME_DOUBLET_SCORE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBS_NAME_DOUBLET_SCORE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_name_doublet_score. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obs_name_doublet_score=*) + [ -n "$VIASH_PAR_OBS_NAME_DOUBLET_SCORE" ] && ViashError Bad arguments for option \'--obs_name_doublet_score=*\': \'$VIASH_PAR_OBS_NAME_DOUBLET_SCORE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBS_NAME_DOUBLET_SCORE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --min_counts) + [ -n "$VIASH_PAR_MIN_COUNTS" ] && ViashError Bad arguments for option \'--min_counts\': \'$VIASH_PAR_MIN_COUNTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_COUNTS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_counts. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --min_counts=*) + [ -n "$VIASH_PAR_MIN_COUNTS" ] && ViashError Bad arguments for option \'--min_counts=*\': \'$VIASH_PAR_MIN_COUNTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_COUNTS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --min_cells) + [ -n "$VIASH_PAR_MIN_CELLS" ] && ViashError Bad arguments for option \'--min_cells\': \'$VIASH_PAR_MIN_CELLS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_CELLS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_cells. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --min_cells=*) + [ -n "$VIASH_PAR_MIN_CELLS" ] && ViashError Bad arguments for option \'--min_cells=*\': \'$VIASH_PAR_MIN_CELLS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_CELLS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --min_gene_variablity_percent) + [ -n "$VIASH_PAR_MIN_GENE_VARIABLITY_PERCENT" ] && ViashError Bad arguments for option \'--min_gene_variablity_percent\': \'$VIASH_PAR_MIN_GENE_VARIABLITY_PERCENT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_GENE_VARIABLITY_PERCENT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_gene_variablity_percent. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --min_gene_variablity_percent=*) + [ -n "$VIASH_PAR_MIN_GENE_VARIABLITY_PERCENT" ] && ViashError Bad arguments for option \'--min_gene_variablity_percent=*\': \'$VIASH_PAR_MIN_GENE_VARIABLITY_PERCENT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_GENE_VARIABLITY_PERCENT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --num_pca_components) + [ -n "$VIASH_PAR_NUM_PCA_COMPONENTS" ] && ViashError Bad arguments for option \'--num_pca_components\': \'$VIASH_PAR_NUM_PCA_COMPONENTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_NUM_PCA_COMPONENTS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --num_pca_components. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --num_pca_components=*) + [ -n "$VIASH_PAR_NUM_PCA_COMPONENTS" ] && ViashError Bad arguments for option \'--num_pca_components=*\': \'$VIASH_PAR_NUM_PCA_COMPONENTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_NUM_PCA_COMPONENTS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --distance_metric) + [ -n "$VIASH_PAR_DISTANCE_METRIC" ] && ViashError Bad arguments for option \'--distance_metric\': \'$VIASH_PAR_DISTANCE_METRIC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DISTANCE_METRIC="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --distance_metric. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --distance_metric=*) + [ -n "$VIASH_PAR_DISTANCE_METRIC" ] && ViashError Bad arguments for option \'--distance_metric=*\': \'$VIASH_PAR_DISTANCE_METRIC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DISTANCE_METRIC=$(ViashRemoveFlags "$1") + shift 1 + ;; + --allow_automatic_threshold_detection_fail) + [ -n "$VIASH_PAR_ALLOW_AUTOMATIC_THRESHOLD_DETECTION_FAIL" ] && ViashError Bad arguments for option \'--allow_automatic_threshold_detection_fail\': \'$VIASH_PAR_ALLOW_AUTOMATIC_THRESHOLD_DETECTION_FAIL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALLOW_AUTOMATIC_THRESHOLD_DETECTION_FAIL=true + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/filter_filter_with_scrublet:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/filter_filter_with_scrublet:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm --env NUMBA_CACHE_DIR=/tmp -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/filter_filter_with_scrublet:0.12.0'" + docker run --entrypoint=bash -i --rm --env NUMBA_CACHE_DIR=/tmp -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/filter_filter_with_scrublet:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_MODALITY+x} ]; then + VIASH_PAR_MODALITY="rna" +fi +if [ -z ${VIASH_PAR_OBS_NAME_FILTER+x} ]; then + VIASH_PAR_OBS_NAME_FILTER="filter_with_scrublet" +fi +if [ -z ${VIASH_PAR_DO_SUBSET+x} ]; then + VIASH_PAR_DO_SUBSET="false" +fi +if [ -z ${VIASH_PAR_OBS_NAME_DOUBLET_SCORE+x} ]; then + VIASH_PAR_OBS_NAME_DOUBLET_SCORE="scrublet_doublet_score" +fi +if [ -z ${VIASH_PAR_MIN_COUNTS+x} ]; then + VIASH_PAR_MIN_COUNTS="2" +fi +if [ -z ${VIASH_PAR_MIN_CELLS+x} ]; then + VIASH_PAR_MIN_CELLS="3" +fi +if [ -z ${VIASH_PAR_MIN_GENE_VARIABLITY_PERCENT+x} ]; then + VIASH_PAR_MIN_GENE_VARIABLITY_PERCENT="85.0" +fi +if [ -z ${VIASH_PAR_NUM_PCA_COMPONENTS+x} ]; then + VIASH_PAR_NUM_PCA_COMPONENTS="30" +fi +if [ -z ${VIASH_PAR_DISTANCE_METRIC+x} ]; then + VIASH_PAR_DISTANCE_METRIC="euclidean" +fi +if [ -z ${VIASH_PAR_ALLOW_AUTOMATIC_THRESHOLD_DETECTION_FAIL+x} ]; then + VIASH_PAR_ALLOW_AUTOMATIC_THRESHOLD_DETECTION_FAIL="false" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_DO_SUBSET" ]]; then + if ! [[ "$VIASH_PAR_DO_SUBSET" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--do_subset' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MIN_COUNTS" ]]; then + if ! [[ "$VIASH_PAR_MIN_COUNTS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--min_counts' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MIN_CELLS" ]]; then + if ! [[ "$VIASH_PAR_MIN_CELLS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--min_cells' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MIN_GENE_VARIABLITY_PERCENT" ]]; then + if ! [[ "$VIASH_PAR_MIN_GENE_VARIABLITY_PERCENT" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--min_gene_variablity_percent' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_NUM_PCA_COMPONENTS" ]]; then + if ! [[ "$VIASH_PAR_NUM_PCA_COMPONENTS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--num_pca_components' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_ALLOW_AUTOMATIC_THRESHOLD_DETECTION_FAIL" ]]; then + if ! [[ "$VIASH_PAR_ALLOW_AUTOMATIC_THRESHOLD_DETECTION_FAIL" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--allow_automatic_threshold_detection_fail' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm --env NUMBA_CACHE_DIR=/tmp ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/filter_filter_with_scrublet:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm --env NUMBA_CACHE_DIR=/tmp ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/filter_filter_with_scrublet:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm --env NUMBA_CACHE_DIR=/tmp ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/filter_filter_with_scrublet:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-filter_with_scrublet-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +import scrublet as scr +import mudata as mu +import numpy as np +import sys +import pandas as pd + +### VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'obs_name_filter': $( if [ ! -z ${VIASH_PAR_OBS_NAME_FILTER+x} ]; then echo "r'${VIASH_PAR_OBS_NAME_FILTER//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'do_subset': $( if [ ! -z ${VIASH_PAR_DO_SUBSET+x} ]; then echo "r'${VIASH_PAR_DO_SUBSET//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), + 'obs_name_doublet_score': $( if [ ! -z ${VIASH_PAR_OBS_NAME_DOUBLET_SCORE+x} ]; then echo "r'${VIASH_PAR_OBS_NAME_DOUBLET_SCORE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'min_counts': $( if [ ! -z ${VIASH_PAR_MIN_COUNTS+x} ]; then echo "int(r'${VIASH_PAR_MIN_COUNTS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'min_cells': $( if [ ! -z ${VIASH_PAR_MIN_CELLS+x} ]; then echo "int(r'${VIASH_PAR_MIN_CELLS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'min_gene_variablity_percent': $( if [ ! -z ${VIASH_PAR_MIN_GENE_VARIABLITY_PERCENT+x} ]; then echo "float(r'${VIASH_PAR_MIN_GENE_VARIABLITY_PERCENT//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'num_pca_components': $( if [ ! -z ${VIASH_PAR_NUM_PCA_COMPONENTS+x} ]; then echo "int(r'${VIASH_PAR_NUM_PCA_COMPONENTS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'distance_metric': $( if [ ! -z ${VIASH_PAR_DISTANCE_METRIC+x} ]; then echo "r'${VIASH_PAR_DISTANCE_METRIC//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'allow_automatic_threshold_detection_fail': $( if [ ! -z ${VIASH_PAR_ALLOW_AUTOMATIC_THRESHOLD_DETECTION_FAIL+x} ]; then echo "r'${VIASH_PAR_ALLOW_AUTOMATIC_THRESHOLD_DETECTION_FAIL//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +### VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +logger.info("Reading %s.", par['input']) +mdata = mu.read_h5mu(par["input"]) + +mod = par["modality"] +logger.info("Processing modality '%s'.", mod) +data = mdata.mod[mod] + +logger.info("\\tRunning scrublet") +scrub = scr.Scrublet(data.X) + +doublet_scores, predicted_doublets = scrub.scrub_doublets( + min_counts=par["min_counts"], + min_cells=par["min_cells"], + min_gene_variability_pctl=par["min_gene_variablity_percent"], + n_prin_comps=par["num_pca_components"], + distance_metric=par["distance_metric"], + use_approx_neighbors=False +) + +try: + keep_cells = np.invert(predicted_doublets) +except TypeError: + if par['allow_automatic_threshold_detection_fail']: + # Scrublet might not throw an error and return None if it fails to detect doublets... + logger.info("\\tScrublet could not automatically detect the doublet score threshold. Setting output columns to NA.") + keep_cells = np.nan + doublet_scores = np.nan + else: + raise RuntimeError("Scrublet could not automatically detect the doublet score threshold. " + "--allow_automatic_threshold_detection_fail can be used to ignore this failure " + "and set the corresponding output columns to NA.") + +logger.info("\\tStoring output into .obs") +if par["obs_name_doublet_score"] is not None: + data.obs[par["obs_name_doublet_score"]] = doublet_scores + data.obs[par["obs_name_doublet_score"]] = data.obs[par["obs_name_doublet_score"]].astype("float64") +if par["obs_name_filter"] is not None: + data.obs[par["obs_name_filter"]] = keep_cells + data.obs[par["obs_name_filter"]] = data.obs[par["obs_name_filter"]].astype(pd.BooleanDtype()) + +if par["do_subset"]: + if pd.api.types.is_scalar(keep_cells) and pd.isna(keep_cells): + logger.warning("Not subsetting beacuse doublets were not predicted") + else: + mdata.mod[mod] = data[keep_cells, :] + +logger.info("Writing h5mu to %s", par["output"]) +mdata.write_h5mu(par["output"], compression=par["output_compression"]) +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/filter/filter_with_scrublet/setup_logger.py b/target/docker/filter/filter_with_scrublet/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/docker/filter/filter_with_scrublet/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/docker/filter/remove_modality/.config.vsh.yaml b/target/docker/filter/remove_modality/.config.vsh.yaml new file mode 100644 index 00000000000..4be0398ed3d --- /dev/null +++ b/target/docker/filter/remove_modality/.config.vsh.yaml @@ -0,0 +1,171 @@ +functionality: + name: "remove_modality" + namespace: "filter" + version: "0.12.4" + authors: + - name: "Dries Schaumont" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + arguments: + - type: "file" + name: "--input" + description: "Input h5mu file" + info: null + example: + - "input.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + required: true + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + description: "Output h5mu file." + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + description: "Remove a modality from a .h5mu file\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.9-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "singlecpu" + - "lowmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/filter/remove_modality/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/filter/remove_modality" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/filter/remove_modality/remove_modality" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/filter/remove_modality/remove_modality b/target/docker/filter/remove_modality/remove_modality new file mode 100755 index 00000000000..a81ff47b106 --- /dev/null +++ b/target/docker/filter/remove_modality/remove_modality @@ -0,0 +1,972 @@ +#!/usr/bin/env bash + +# remove_modality 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Dries Schaumont (maintainer) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="remove_modality" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "remove_modality 0.12.4" + echo "" + echo "Remove a modality from a .h5mu file" + echo "" + echo "Arguments:" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " example: input.h5mu" + echo " Input h5mu file" + echo "" + echo " --modality" + echo " type: string, required parameter, multiple values allowed" + echo "" + echo " --output" + echo " type: file, output, file must exist" + echo " example: output.h5mu" + echo " Output h5mu file." + echo "" + echo " --output_compression" + echo " type: string" + echo " example: gzip" + echo " choices: [ gzip, lzf ]" + echo " The compression format to be used on the output h5mu object." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM python:3.9-slim + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" + +LABEL org.opencontainers.image.authors="Dries Schaumont" +LABEL org.opencontainers.image.description="Companion container for running component filter remove_modality" +LABEL org.opencontainers.image.created="2024-01-31T09:08:35Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-remove_modality-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "remove_modality 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --modality) + if [ -z "$VIASH_PAR_MODALITY" ]; then + VIASH_PAR_MODALITY="$2" + else + VIASH_PAR_MODALITY="$VIASH_PAR_MODALITY:""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality=*) + if [ -z "$VIASH_PAR_MODALITY" ]; then + VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") + else + VIASH_PAR_MODALITY="$VIASH_PAR_MODALITY:"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/filter_remove_modality:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/filter_remove_modality:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/filter_remove_modality:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/filter_remove_modality:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_MODALITY+x} ]; then + ViashError '--modality' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/filter_remove_modality:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/filter_remove_modality:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/filter_remove_modality:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-remove_modality-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +from mudata import read_h5mu, MuData + + +### VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'.split(':')"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +### VIASH END + + +input_mudata = read_h5mu(par['input']) +new_mods = {mod_name: mod for mod_name, mod + in input_mudata.mod.items() + if mod_name not in par['modality']} + +new_mudata = MuData(new_mods) +new_mudata.write_h5mu(filename=par["output"], compression=par["output_compression"]) +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/filter/subset_h5mu/.config.vsh.yaml b/target/docker/filter/subset_h5mu/.config.vsh.yaml new file mode 100644 index 00000000000..30470cb0a1c --- /dev/null +++ b/target/docker/filter/subset_h5mu/.config.vsh.yaml @@ -0,0 +1,187 @@ +functionality: + name: "subset_h5mu" + namespace: "filter" + version: "0.12.4" + authors: + - name: "Dries Schaumont" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + arguments: + - type: "file" + name: "--input" + description: "Input h5mu file" + info: null + example: + - "input.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + description: "Output h5mu file." + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--number_of_observations" + description: "Number of observations to be selected from the h5mu file." + info: null + example: + - 5 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Create a subset of a mudata file by selecting the first number of\ + \ observations\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.9-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "singlecpu" + - "lowmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/filter/subset_h5mu/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/filter/subset_h5mu" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/filter/subset_h5mu/subset_h5mu" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/filter/subset_h5mu/setup_logger.py b/target/docker/filter/subset_h5mu/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/docker/filter/subset_h5mu/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/docker/filter/subset_h5mu/subset_h5mu b/target/docker/filter/subset_h5mu/subset_h5mu new file mode 100755 index 00000000000..5fd3e710500 --- /dev/null +++ b/target/docker/filter/subset_h5mu/subset_h5mu @@ -0,0 +1,994 @@ +#!/usr/bin/env bash + +# subset_h5mu 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Dries Schaumont (maintainer) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="subset_h5mu" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "subset_h5mu 0.12.4" + echo "" + echo "Create a subset of a mudata file by selecting the first number of observations" + echo "" + echo "Arguments:" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " example: input.h5mu" + echo " Input h5mu file" + echo "" + echo " --modality" + echo " type: string" + echo " default: rna" + echo "" + echo " --output" + echo " type: file, output, file must exist" + echo " example: output.h5mu" + echo " Output h5mu file." + echo "" + echo " --output_compression" + echo " type: string" + echo " example: gzip" + echo " choices: [ gzip, lzf ]" + echo " The compression format to be used on the output h5mu object." + echo "" + echo " --number_of_observations" + echo " type: integer" + echo " example: 5" + echo " Number of observations to be selected from the h5mu file." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM python:3.9-slim + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" + +LABEL org.opencontainers.image.authors="Dries Schaumont" +LABEL org.opencontainers.image.description="Companion container for running component filter subset_h5mu" +LABEL org.opencontainers.image.created="2024-01-31T09:08:36Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-subset_h5mu-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "subset_h5mu 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --modality) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality=*) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --number_of_observations) + [ -n "$VIASH_PAR_NUMBER_OF_OBSERVATIONS" ] && ViashError Bad arguments for option \'--number_of_observations\': \'$VIASH_PAR_NUMBER_OF_OBSERVATIONS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_NUMBER_OF_OBSERVATIONS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --number_of_observations. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --number_of_observations=*) + [ -n "$VIASH_PAR_NUMBER_OF_OBSERVATIONS" ] && ViashError Bad arguments for option \'--number_of_observations=*\': \'$VIASH_PAR_NUMBER_OF_OBSERVATIONS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_NUMBER_OF_OBSERVATIONS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/filter_subset_h5mu:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/filter_subset_h5mu:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/filter_subset_h5mu:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/filter_subset_h5mu:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_MODALITY+x} ]; then + VIASH_PAR_MODALITY="rna" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_NUMBER_OF_OBSERVATIONS" ]]; then + if ! [[ "$VIASH_PAR_NUMBER_OF_OBSERVATIONS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--number_of_observations' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/filter_subset_h5mu:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/filter_subset_h5mu:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/filter_subset_h5mu:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-subset_h5mu-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +import mudata + +### VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'number_of_observations': $( if [ ! -z ${VIASH_PAR_NUMBER_OF_OBSERVATIONS+x} ]; then echo "int(r'${VIASH_PAR_NUMBER_OF_OBSERVATIONS//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +### VIASH END + +if __name__ == "__main__": + # read data + data = mudata.read(par["input"]) + + # subset data + if par["modality"]: + data.mod[par["modality"]] = data.mod[par["modality"]][:par["number_of_observations"]] + else: + data = data[:par["number_of_observations"]] + + # write data + data.write_h5mu(par["output"], compression=par["output_compression"]) +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/integrate/harmonypy/.config.vsh.yaml b/target/docker/integrate/harmonypy/.config.vsh.yaml new file mode 100644 index 00000000000..c697eda8b02 --- /dev/null +++ b/target/docker/integrate/harmonypy/.config.vsh.yaml @@ -0,0 +1,240 @@ +functionality: + name: "harmonypy" + namespace: "integrate" + version: "0.12.4" + authors: + - name: "Dries Schaumont" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + - name: "Robrecht Cannoodt" + roles: + - "contributor" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input h5mu file" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output h5mu file." + info: null + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obsm_input" + description: "Which .obsm slot to use as a starting PCA embedding." + info: null + default: + - "X_pca" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obsm_output" + description: "In which .obsm slot to store the resulting integrated embedding." + info: null + default: + - "X_pca_integrated" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--theta" + description: "Diversity clustering penalty parameter. Specify for each variable\ + \ in group.by.vars. theta=0 does not encourage any diversity. Larger values\ + \ of theta result in more diverse clusters." + info: null + default: + - 2.0 + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_covariates" + description: "The .obs field(s) that define the covariate(s) to regress out." + info: null + example: + - "batch" + - "sample" + required: true + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + description: "Performs Harmony integration based as described in https://github.com/immunogenomics/harmony.\ + \ Based on an implementation in python from https://github.com/slowkow/harmonypy" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "scanpy~=1.9.5" + - "harmonypy~=0.0.6" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highmem" + - "highcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/integrate/harmonypy/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/integrate/harmonypy" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/integrate/harmonypy/harmonypy" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/integrate/harmonypy/harmonypy b/target/docker/integrate/harmonypy/harmonypy new file mode 100755 index 00000000000..04916d61871 --- /dev/null +++ b/target/docker/integrate/harmonypy/harmonypy @@ -0,0 +1,1099 @@ +#!/usr/bin/env bash + +# harmonypy 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Dries Schaumont (maintainer) +# * Robrecht Cannoodt (contributor) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="harmonypy" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "harmonypy 0.12.4" + echo "" + echo "Performs Harmony integration based as described in" + echo "https://github.com/immunogenomics/harmony. Based on an implementation in python" + echo "from https://github.com/slowkow/harmonypy" + echo "" + echo "Arguments:" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " Input h5mu file" + echo "" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " Output h5mu file." + echo "" + echo " --output_compression" + echo " type: string" + echo " example: gzip" + echo " choices: [ gzip, lzf ]" + echo " The compression format to be used on the output h5mu object." + echo "" + echo " --modality" + echo " type: string" + echo " default: rna" + echo "" + echo " --obsm_input" + echo " type: string" + echo " default: X_pca" + echo " Which .obsm slot to use as a starting PCA embedding." + echo "" + echo " --obsm_output" + echo " type: string" + echo " default: X_pca_integrated" + echo " In which .obsm slot to store the resulting integrated embedding." + echo "" + echo " --theta" + echo " type: double, multiple values allowed" + echo " default: 2.0" + echo " Diversity clustering penalty parameter. Specify for each variable in" + echo " group.by.vars. theta=0 does not encourage any diversity. Larger values" + echo " of theta result in more diverse clusters." + echo "" + echo " --obs_covariates" + echo " type: string, required parameter, multiple values allowed" + echo " example: batch:sample" + echo " The .obs field(s) that define the covariate(s) to regress out." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM python:3.10-slim + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "scanpy~=1.9.5" "harmonypy~=0.0.6" + +LABEL org.opencontainers.image.authors="Dries Schaumont, Robrecht Cannoodt" +LABEL org.opencontainers.image.description="Companion container for running component integrate harmonypy" +LABEL org.opencontainers.image.created="2024-01-31T09:08:33Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-harmonypy-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "harmonypy 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --modality) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality=*) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --obsm_input) + [ -n "$VIASH_PAR_OBSM_INPUT" ] && ViashError Bad arguments for option \'--obsm_input\': \'$VIASH_PAR_OBSM_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBSM_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obsm_input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obsm_input=*) + [ -n "$VIASH_PAR_OBSM_INPUT" ] && ViashError Bad arguments for option \'--obsm_input=*\': \'$VIASH_PAR_OBSM_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBSM_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --obsm_output) + [ -n "$VIASH_PAR_OBSM_OUTPUT" ] && ViashError Bad arguments for option \'--obsm_output\': \'$VIASH_PAR_OBSM_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBSM_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obsm_output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obsm_output=*) + [ -n "$VIASH_PAR_OBSM_OUTPUT" ] && ViashError Bad arguments for option \'--obsm_output=*\': \'$VIASH_PAR_OBSM_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBSM_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --theta) + if [ -z "$VIASH_PAR_THETA" ]; then + VIASH_PAR_THETA="$2" + else + VIASH_PAR_THETA="$VIASH_PAR_THETA:""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --theta. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --theta=*) + if [ -z "$VIASH_PAR_THETA" ]; then + VIASH_PAR_THETA=$(ViashRemoveFlags "$1") + else + VIASH_PAR_THETA="$VIASH_PAR_THETA:"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --obs_covariates) + if [ -z "$VIASH_PAR_OBS_COVARIATES" ]; then + VIASH_PAR_OBS_COVARIATES="$2" + else + VIASH_PAR_OBS_COVARIATES="$VIASH_PAR_OBS_COVARIATES:""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_covariates. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obs_covariates=*) + if [ -z "$VIASH_PAR_OBS_COVARIATES" ]; then + VIASH_PAR_OBS_COVARIATES=$(ViashRemoveFlags "$1") + else + VIASH_PAR_OBS_COVARIATES="$VIASH_PAR_OBS_COVARIATES:"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/integrate_harmonypy:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/integrate_harmonypy:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/integrate_harmonypy:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/integrate_harmonypy:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OBS_COVARIATES+x} ]; then + ViashError '--obs_covariates' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_MODALITY+x} ]; then + VIASH_PAR_MODALITY="rna" +fi +if [ -z ${VIASH_PAR_OBSM_INPUT+x} ]; then + VIASH_PAR_OBSM_INPUT="X_pca" +fi +if [ -z ${VIASH_PAR_OBSM_OUTPUT+x} ]; then + VIASH_PAR_OBSM_OUTPUT="X_pca_integrated" +fi +if [ -z ${VIASH_PAR_THETA+x} ]; then + VIASH_PAR_THETA="2.0" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [ -n "$VIASH_PAR_THETA" ]; then + IFS=':' + set -f + for val in $VIASH_PAR_THETA; do + if ! [[ "${val}" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--theta' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi + done + set +f + unset IFS +fi + +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/integrate_harmonypy:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/integrate_harmonypy:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/integrate_harmonypy:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-harmonypy-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +import mudata +from harmonypy import run_harmony + + +### VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'obsm_input': $( if [ ! -z ${VIASH_PAR_OBSM_INPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'obsm_output': $( if [ ! -z ${VIASH_PAR_OBSM_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'theta': $( if [ ! -z ${VIASH_PAR_THETA+x} ]; then echo "list(map(float, r'${VIASH_PAR_THETA//\'/\'\"\'\"r\'}'.split(':')))"; else echo None; fi ), + 'obs_covariates': $( if [ ! -z ${VIASH_PAR_OBS_COVARIATES+x} ]; then echo "r'${VIASH_PAR_OBS_COVARIATES//\'/\'\"\'\"r\'}'.split(':')"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +### VIASH END + + +def main(): + mdata = mudata.read(par["input"].strip()) + mod_name = par['modality'] + mod = mdata.mod[mod_name] + pca_embedding = mod.obsm[par['obsm_input']] + metadata = mod.obs + ho = run_harmony(pca_embedding, metadata, par['obs_covariates'], theta=par['theta']) + mod.obsm[par["obsm_output"]] = ho.Z_corr.T + mdata.write_h5mu(par['output'].strip(), compression=par["output_compression"]) + +if __name__ == "__main__": + main() +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/integrate/scanorama/.config.vsh.yaml b/target/docker/integrate/scanorama/.config.vsh.yaml new file mode 100644 index 00000000000..580c9fb886b --- /dev/null +++ b/target/docker/integrate/scanorama/.config.vsh.yaml @@ -0,0 +1,283 @@ +functionality: + name: "scanorama" + namespace: "integrate" + version: "0.12.4" + authors: + - name: "Dries De Maeyer" + roles: + - "author" + info: + role: "Core Team Member" + links: + email: "ddemaeyer@gmail.com" + github: "ddemaeyer" + linkedin: "dries-de-maeyer-b46a814" + organizations: + - name: "Janssen Pharmaceuticals" + href: "https://www.janssen.com" + role: "Principal Scientist" + - name: "Dries Schaumont" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input h5mu file" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output .h5mu file" + info: null + default: + - "output.h5ad" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_batch" + description: "Column name discriminating between your batches." + info: null + default: + - "batch" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obsm_input" + description: "Basis obsm slot to run scanorama on." + info: null + default: + - "X_pca" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obsm_output" + description: "The name of the field in adata.obsm where the integrated embeddings\ + \ will be stored after running this function. Defaults to X_scanorama." + info: null + default: + - "X_scanorama" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--knn" + description: "Number of nearest neighbors to use for matching." + info: null + default: + - 20 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--batch_size" + description: "The batch size used in the alignment vector computation. Useful\ + \ when integrating very large (>100k samples) datasets. Set to large value that\ + \ runs within available memory." + info: null + default: + - 5000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--sigma" + description: "Correction smoothing parameter on Gaussian kernel." + info: null + default: + - 15.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean" + name: "--approx" + description: "Use approximate nearest neighbors with Python annoy; greatly speeds\ + \ up matching runtime." + info: null + default: + - true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--alpha" + description: "Alignment score minimum cutoff" + info: null + default: + - 0.1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + description: "Use Scanorama to integrate different experiments.\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.9-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + - "build-essential" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "scanpy~=1.9.5" + - "scanorama" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "midcpu" + - "highmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/integrate/scanorama/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/integrate/scanorama" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/integrate/scanorama/scanorama" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/integrate/scanorama/scanorama b/target/docker/integrate/scanorama/scanorama new file mode 100755 index 00000000000..6d35e1ce725 --- /dev/null +++ b/target/docker/integrate/scanorama/scanorama @@ -0,0 +1,1187 @@ +#!/usr/bin/env bash + +# scanorama 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Dries De Maeyer (author) +# * Dries Schaumont (maintainer) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="scanorama" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "scanorama 0.12.4" + echo "" + echo "Use Scanorama to integrate different experiments." + echo "" + echo "Arguments:" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " Input h5mu file" + echo "" + echo " --modality" + echo " type: string" + echo " default: rna" + echo "" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " default: output.h5ad" + echo " Output .h5mu file" + echo "" + echo " --output_compression" + echo " type: string" + echo " example: gzip" + echo " choices: [ gzip, lzf ]" + echo " The compression format to be used on the output h5mu object." + echo "" + echo " --obs_batch" + echo " type: string" + echo " default: batch" + echo " Column name discriminating between your batches." + echo "" + echo " --obsm_input" + echo " type: string" + echo " default: X_pca" + echo " Basis obsm slot to run scanorama on." + echo "" + echo " --obsm_output" + echo " type: string" + echo " default: X_scanorama" + echo " The name of the field in adata.obsm where the integrated embeddings will" + echo " be stored after running this function. Defaults to X_scanorama." + echo "" + echo " --knn" + echo " type: integer" + echo " default: 20" + echo " Number of nearest neighbors to use for matching." + echo "" + echo " --batch_size" + echo " type: integer" + echo " default: 5000" + echo " The batch size used in the alignment vector computation. Useful when" + echo " integrating very large (>100k samples) datasets. Set to large value that" + echo " runs within available memory." + echo "" + echo " --sigma" + echo " type: double" + echo " default: 15.0" + echo " Correction smoothing parameter on Gaussian kernel." + echo "" + echo " --approx" + echo " type: boolean" + echo " default: true" + echo " Use approximate nearest neighbors with Python annoy; greatly speeds up" + echo " matching runtime." + echo "" + echo " --alpha" + echo " type: double" + echo " default: 0.1" + echo " Alignment score minimum cutoff" +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM python:3.9-slim + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y procps build-essential && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "scanpy~=1.9.5" "scanorama" + +LABEL org.opencontainers.image.authors="Dries De Maeyer, Dries Schaumont" +LABEL org.opencontainers.image.description="Companion container for running component integrate scanorama" +LABEL org.opencontainers.image.created="2024-01-31T09:08:33Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-scanorama-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "scanorama 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality=*) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --obs_batch) + [ -n "$VIASH_PAR_OBS_BATCH" ] && ViashError Bad arguments for option \'--obs_batch\': \'$VIASH_PAR_OBS_BATCH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBS_BATCH="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_batch. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obs_batch=*) + [ -n "$VIASH_PAR_OBS_BATCH" ] && ViashError Bad arguments for option \'--obs_batch=*\': \'$VIASH_PAR_OBS_BATCH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBS_BATCH=$(ViashRemoveFlags "$1") + shift 1 + ;; + --obsm_input) + [ -n "$VIASH_PAR_OBSM_INPUT" ] && ViashError Bad arguments for option \'--obsm_input\': \'$VIASH_PAR_OBSM_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBSM_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obsm_input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obsm_input=*) + [ -n "$VIASH_PAR_OBSM_INPUT" ] && ViashError Bad arguments for option \'--obsm_input=*\': \'$VIASH_PAR_OBSM_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBSM_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --obsm_output) + [ -n "$VIASH_PAR_OBSM_OUTPUT" ] && ViashError Bad arguments for option \'--obsm_output\': \'$VIASH_PAR_OBSM_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBSM_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obsm_output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obsm_output=*) + [ -n "$VIASH_PAR_OBSM_OUTPUT" ] && ViashError Bad arguments for option \'--obsm_output=*\': \'$VIASH_PAR_OBSM_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBSM_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --knn) + [ -n "$VIASH_PAR_KNN" ] && ViashError Bad arguments for option \'--knn\': \'$VIASH_PAR_KNN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_KNN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --knn. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --knn=*) + [ -n "$VIASH_PAR_KNN" ] && ViashError Bad arguments for option \'--knn=*\': \'$VIASH_PAR_KNN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_KNN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --batch_size) + [ -n "$VIASH_PAR_BATCH_SIZE" ] && ViashError Bad arguments for option \'--batch_size\': \'$VIASH_PAR_BATCH_SIZE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BATCH_SIZE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --batch_size. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --batch_size=*) + [ -n "$VIASH_PAR_BATCH_SIZE" ] && ViashError Bad arguments for option \'--batch_size=*\': \'$VIASH_PAR_BATCH_SIZE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BATCH_SIZE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --sigma) + [ -n "$VIASH_PAR_SIGMA" ] && ViashError Bad arguments for option \'--sigma\': \'$VIASH_PAR_SIGMA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SIGMA="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sigma. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sigma=*) + [ -n "$VIASH_PAR_SIGMA" ] && ViashError Bad arguments for option \'--sigma=*\': \'$VIASH_PAR_SIGMA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SIGMA=$(ViashRemoveFlags "$1") + shift 1 + ;; + --approx) + [ -n "$VIASH_PAR_APPROX" ] && ViashError Bad arguments for option \'--approx\': \'$VIASH_PAR_APPROX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_APPROX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --approx. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --approx=*) + [ -n "$VIASH_PAR_APPROX" ] && ViashError Bad arguments for option \'--approx=*\': \'$VIASH_PAR_APPROX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_APPROX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --alpha) + [ -n "$VIASH_PAR_ALPHA" ] && ViashError Bad arguments for option \'--alpha\': \'$VIASH_PAR_ALPHA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALPHA="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --alpha. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --alpha=*) + [ -n "$VIASH_PAR_ALPHA" ] && ViashError Bad arguments for option \'--alpha=*\': \'$VIASH_PAR_ALPHA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALPHA=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/integrate_scanorama:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/integrate_scanorama:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/integrate_scanorama:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/integrate_scanorama:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_MODALITY+x} ]; then + VIASH_PAR_MODALITY="rna" +fi +if [ -z ${VIASH_PAR_OBS_BATCH+x} ]; then + VIASH_PAR_OBS_BATCH="batch" +fi +if [ -z ${VIASH_PAR_OBSM_INPUT+x} ]; then + VIASH_PAR_OBSM_INPUT="X_pca" +fi +if [ -z ${VIASH_PAR_OBSM_OUTPUT+x} ]; then + VIASH_PAR_OBSM_OUTPUT="X_scanorama" +fi +if [ -z ${VIASH_PAR_KNN+x} ]; then + VIASH_PAR_KNN="20" +fi +if [ -z ${VIASH_PAR_BATCH_SIZE+x} ]; then + VIASH_PAR_BATCH_SIZE="5000" +fi +if [ -z ${VIASH_PAR_SIGMA+x} ]; then + VIASH_PAR_SIGMA="15.0" +fi +if [ -z ${VIASH_PAR_APPROX+x} ]; then + VIASH_PAR_APPROX="true" +fi +if [ -z ${VIASH_PAR_ALPHA+x} ]; then + VIASH_PAR_ALPHA="0.1" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_KNN" ]]; then + if ! [[ "$VIASH_PAR_KNN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--knn' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_BATCH_SIZE" ]]; then + if ! [[ "$VIASH_PAR_BATCH_SIZE" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--batch_size' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SIGMA" ]]; then + if ! [[ "$VIASH_PAR_SIGMA" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--sigma' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_APPROX" ]]; then + if ! [[ "$VIASH_PAR_APPROX" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--approx' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_ALPHA" ]]; then + if ! [[ "$VIASH_PAR_ALPHA" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--alpha' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/integrate_scanorama:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/integrate_scanorama:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/integrate_scanorama:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-scanorama-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +### VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'obs_batch': $( if [ ! -z ${VIASH_PAR_OBS_BATCH+x} ]; then echo "r'${VIASH_PAR_OBS_BATCH//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'obsm_input': $( if [ ! -z ${VIASH_PAR_OBSM_INPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'obsm_output': $( if [ ! -z ${VIASH_PAR_OBSM_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'knn': $( if [ ! -z ${VIASH_PAR_KNN+x} ]; then echo "int(r'${VIASH_PAR_KNN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'batch_size': $( if [ ! -z ${VIASH_PAR_BATCH_SIZE+x} ]; then echo "int(r'${VIASH_PAR_BATCH_SIZE//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'sigma': $( if [ ! -z ${VIASH_PAR_SIGMA+x} ]; then echo "float(r'${VIASH_PAR_SIGMA//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'approx': $( if [ ! -z ${VIASH_PAR_APPROX+x} ]; then echo "r'${VIASH_PAR_APPROX//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), + 'alpha': $( if [ ! -z ${VIASH_PAR_ALPHA+x} ]; then echo "float(r'${VIASH_PAR_ALPHA//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +### VIASH END + +from scanpy.external.pp import scanorama_integrate +from mudata import read_h5mu + +mdata = read_h5mu(par["input"]) + +mod_name = par["modality"] +mod = mdata.mod[mod_name] + +# Integration. +scanorama_integrate(mod, + key=par["obs_batch"], + basis=par["obsm_input"], + adjusted_basis=par["obsm_output"], + knn=par["knn"], + alpha=par["alpha"], + sigma=par["sigma"], + approx=par["approx"], + batch_size=par["batch_size"] ) + +mdata.write_h5mu(par["output"], compression=par["output_compression"]) +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/integrate/scarches/.config.vsh.yaml b/target/docker/integrate/scarches/.config.vsh.yaml new file mode 100644 index 00000000000..5db3a55381d --- /dev/null +++ b/target/docker/integrate/scarches/.config.vsh.yaml @@ -0,0 +1,331 @@ +functionality: + name: "scarches" + namespace: "integrate" + version: "0.12.4" + authors: + - name: "Vladimir Shitov" + info: + role: "Contributor" + links: + email: "vladimir.shitov@helmholtz-muenchen.de" + github: "vladimirshitov" + orcid: "0000-0002-1960-8812" + linkedin: "vladimir-shitov-9a659513b" + organizations: + - name: "Helmholtz Munich" + href: "https://www.helmholtz-munich.de" + role: "PhD Candidate" + argument_groups: + - name: "Inputs" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input h5mu file to use as a query" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--reference" + alternatives: + - "-r" + description: "Path to the directory with reference model or a web link. For\ + \ HLCA use https://zenodo.org/record/6337966/files/HLCA_reference_model.zip" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--dataset_name" + description: "Name of query dataset to use as a batch name. If not set, name\ + \ of the input file is used" + info: null + default: + - "test_dataset" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Outputs" + arguments: + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output h5mu file." + info: null + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--model_output" + description: "Output directory for model" + info: null + default: + - "model" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obsm_output" + description: "In which .obsm slot to store the resulting integrated embedding." + info: null + default: + - "X_integrated_scanvi" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Early stopping arguments" + arguments: + - type: "boolean" + name: "--early_stopping" + description: "Whether to perform early stopping with respect to the validation\ + \ set." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--early_stopping_monitor" + description: "Metric logged during validation set epoch." + info: null + default: + - "elbo_validation" + required: false + choices: + - "elbo_validation" + - "reconstruction_loss_validation" + - "kl_local_validation" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--early_stopping_patience" + description: "Number of validation epochs with no improvement after which training\ + \ will be stopped." + info: null + default: + - 45 + required: false + min: 1 + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--early_stopping_min_delta" + description: "Minimum change in the monitored quantity to qualify as an improvement,\ + \ i.e. an absolute change of less than min_delta, will count as no improvement." + info: null + default: + - 0.0 + required: false + min: 0.0 + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Learning parameters" + arguments: + - type: "integer" + name: "--max_epochs" + description: "Number of passes through the dataset, defaults to (20000 / number\ + \ of cells) * 400 or 400; whichever is smallest." + info: null + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean" + name: "--reduce_lr_on_plateau" + description: "Whether to monitor validation loss and reduce learning rate when\ + \ validation set `lr_scheduler_metric` plateaus." + info: null + default: + - true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--lr_factor" + description: "Factor to reduce learning rate." + info: null + default: + - 0.6 + required: false + min: 0.0 + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--lr_patience" + description: "Number of epochs with no improvement after which learning rate\ + \ will be reduced." + info: null + default: + - 30.0 + required: false + min: 0.0 + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Performs reference mapping with scArches" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu" + - type: "file" + path: "resources_test/HLCA_reference_model/HLCA_reference_model.zip" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "nvcr.io/nvidia/pytorch:23.09-py3" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "scvi-tools~=1.0.3" + - "pandas~=2.1.0" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highmem" + - "highcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +- type: "native" + id: "native" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/integrate/scarches/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/integrate/scarches" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/integrate/scarches/scarches" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/integrate/scarches/scarches b/target/docker/integrate/scarches/scarches new file mode 100755 index 00000000000..bd92aa388ac --- /dev/null +++ b/target/docker/integrate/scarches/scarches @@ -0,0 +1,1568 @@ +#!/usr/bin/env bash + +# scarches 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Vladimir Shitov + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="scarches" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "scarches 0.12.4" + echo "" + echo "Performs reference mapping with scArches" + echo "" + echo "Inputs:" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " Input h5mu file to use as a query" + echo "" + echo " --modality" + echo " type: string" + echo " default: rna" + echo "" + echo " -r, --reference" + echo " type: file, required parameter, file must exist" + echo " Path to the directory with reference model or a web link. For HLCA use" + echo " https://zenodo.org/record/6337966/files/HLCA_reference_model.zip" + echo "" + echo " --dataset_name" + echo " type: string" + echo " default: test_dataset" + echo " Name of query dataset to use as a batch name. If not set, name of the" + echo " input file is used" + echo "" + echo "Outputs:" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " Output h5mu file." + echo "" + echo " --output_compression" + echo " type: string" + echo " example: gzip" + echo " choices: [ gzip, lzf ]" + echo " The compression format to be used on the output h5mu object." + echo "" + echo " --model_output" + echo " type: file, output, file must exist" + echo " default: model" + echo " Output directory for model" + echo "" + echo " --obsm_output" + echo " type: string" + echo " default: X_integrated_scanvi" + echo " In which .obsm slot to store the resulting integrated embedding." + echo "" + echo "Early stopping arguments:" + echo " --early_stopping" + echo " type: boolean" + echo " Whether to perform early stopping with respect to the validation set." + echo "" + echo " --early_stopping_monitor" + echo " type: string" + echo " default: elbo_validation" + echo " choices: [ elbo_validation, reconstruction_loss_validation," + echo "kl_local_validation ]" + echo " Metric logged during validation set epoch." + echo "" + echo " --early_stopping_patience" + echo " type: integer" + echo " default: 45" + echo " min: 1" + echo " Number of validation epochs with no improvement after which training" + echo " will be stopped." + echo "" + echo " --early_stopping_min_delta" + echo " type: double" + echo " default: 0.0" + echo " min: 0.0" + echo " Minimum change in the monitored quantity to qualify as an improvement," + echo " i.e. an absolute change of less than min_delta, will count as no" + echo " improvement." + echo "" + echo "Learning parameters:" + echo " --max_epochs" + echo " type: integer, required parameter" + echo " Number of passes through the dataset, defaults to (20000 / number of" + echo " cells) * 400 or 400; whichever is smallest." + echo "" + echo " --reduce_lr_on_plateau" + echo " type: boolean" + echo " default: true" + echo " Whether to monitor validation loss and reduce learning rate when" + echo " validation set \`lr_scheduler_metric\` plateaus." + echo "" + echo " --lr_factor" + echo " type: double" + echo " default: 0.6" + echo " min: 0.0" + echo " Factor to reduce learning rate." + echo "" + echo " --lr_patience" + echo " type: double" + echo " default: 30.0" + echo " min: 0.0" + echo " Number of epochs with no improvement after which learning rate will be" + echo " reduced." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM nvcr.io/nvidia/pytorch:23.09-py3 + +ENTRYPOINT [] + + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "scvi-tools~=1.0.3" "pandas~=2.1.0" + +LABEL org.opencontainers.image.authors="Vladimir Shitov" +LABEL org.opencontainers.image.description="Companion container for running component integrate scarches" +LABEL org.opencontainers.image.created="2024-01-31T09:08:31Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-scarches-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "scarches 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality=*) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --reference) + [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reference=*) + [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference=*\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE=$(ViashRemoveFlags "$1") + shift 1 + ;; + -r) + [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'-r\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -r. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --dataset_name) + [ -n "$VIASH_PAR_DATASET_NAME" ] && ViashError Bad arguments for option \'--dataset_name\': \'$VIASH_PAR_DATASET_NAME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DATASET_NAME="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --dataset_name. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --dataset_name=*) + [ -n "$VIASH_PAR_DATASET_NAME" ] && ViashError Bad arguments for option \'--dataset_name=*\': \'$VIASH_PAR_DATASET_NAME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DATASET_NAME=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --model_output) + [ -n "$VIASH_PAR_MODEL_OUTPUT" ] && ViashError Bad arguments for option \'--model_output\': \'$VIASH_PAR_MODEL_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODEL_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --model_output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --model_output=*) + [ -n "$VIASH_PAR_MODEL_OUTPUT" ] && ViashError Bad arguments for option \'--model_output=*\': \'$VIASH_PAR_MODEL_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODEL_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --obsm_output) + [ -n "$VIASH_PAR_OBSM_OUTPUT" ] && ViashError Bad arguments for option \'--obsm_output\': \'$VIASH_PAR_OBSM_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBSM_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obsm_output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obsm_output=*) + [ -n "$VIASH_PAR_OBSM_OUTPUT" ] && ViashError Bad arguments for option \'--obsm_output=*\': \'$VIASH_PAR_OBSM_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBSM_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --early_stopping) + [ -n "$VIASH_PAR_EARLY_STOPPING" ] && ViashError Bad arguments for option \'--early_stopping\': \'$VIASH_PAR_EARLY_STOPPING\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EARLY_STOPPING="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --early_stopping. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --early_stopping=*) + [ -n "$VIASH_PAR_EARLY_STOPPING" ] && ViashError Bad arguments for option \'--early_stopping=*\': \'$VIASH_PAR_EARLY_STOPPING\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EARLY_STOPPING=$(ViashRemoveFlags "$1") + shift 1 + ;; + --early_stopping_monitor) + [ -n "$VIASH_PAR_EARLY_STOPPING_MONITOR" ] && ViashError Bad arguments for option \'--early_stopping_monitor\': \'$VIASH_PAR_EARLY_STOPPING_MONITOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EARLY_STOPPING_MONITOR="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --early_stopping_monitor. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --early_stopping_monitor=*) + [ -n "$VIASH_PAR_EARLY_STOPPING_MONITOR" ] && ViashError Bad arguments for option \'--early_stopping_monitor=*\': \'$VIASH_PAR_EARLY_STOPPING_MONITOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EARLY_STOPPING_MONITOR=$(ViashRemoveFlags "$1") + shift 1 + ;; + --early_stopping_patience) + [ -n "$VIASH_PAR_EARLY_STOPPING_PATIENCE" ] && ViashError Bad arguments for option \'--early_stopping_patience\': \'$VIASH_PAR_EARLY_STOPPING_PATIENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EARLY_STOPPING_PATIENCE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --early_stopping_patience. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --early_stopping_patience=*) + [ -n "$VIASH_PAR_EARLY_STOPPING_PATIENCE" ] && ViashError Bad arguments for option \'--early_stopping_patience=*\': \'$VIASH_PAR_EARLY_STOPPING_PATIENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EARLY_STOPPING_PATIENCE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --early_stopping_min_delta) + [ -n "$VIASH_PAR_EARLY_STOPPING_MIN_DELTA" ] && ViashError Bad arguments for option \'--early_stopping_min_delta\': \'$VIASH_PAR_EARLY_STOPPING_MIN_DELTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EARLY_STOPPING_MIN_DELTA="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --early_stopping_min_delta. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --early_stopping_min_delta=*) + [ -n "$VIASH_PAR_EARLY_STOPPING_MIN_DELTA" ] && ViashError Bad arguments for option \'--early_stopping_min_delta=*\': \'$VIASH_PAR_EARLY_STOPPING_MIN_DELTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EARLY_STOPPING_MIN_DELTA=$(ViashRemoveFlags "$1") + shift 1 + ;; + --max_epochs) + [ -n "$VIASH_PAR_MAX_EPOCHS" ] && ViashError Bad arguments for option \'--max_epochs\': \'$VIASH_PAR_MAX_EPOCHS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MAX_EPOCHS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --max_epochs. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --max_epochs=*) + [ -n "$VIASH_PAR_MAX_EPOCHS" ] && ViashError Bad arguments for option \'--max_epochs=*\': \'$VIASH_PAR_MAX_EPOCHS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MAX_EPOCHS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --reduce_lr_on_plateau) + [ -n "$VIASH_PAR_REDUCE_LR_ON_PLATEAU" ] && ViashError Bad arguments for option \'--reduce_lr_on_plateau\': \'$VIASH_PAR_REDUCE_LR_ON_PLATEAU\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REDUCE_LR_ON_PLATEAU="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --reduce_lr_on_plateau. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reduce_lr_on_plateau=*) + [ -n "$VIASH_PAR_REDUCE_LR_ON_PLATEAU" ] && ViashError Bad arguments for option \'--reduce_lr_on_plateau=*\': \'$VIASH_PAR_REDUCE_LR_ON_PLATEAU\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REDUCE_LR_ON_PLATEAU=$(ViashRemoveFlags "$1") + shift 1 + ;; + --lr_factor) + [ -n "$VIASH_PAR_LR_FACTOR" ] && ViashError Bad arguments for option \'--lr_factor\': \'$VIASH_PAR_LR_FACTOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LR_FACTOR="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --lr_factor. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --lr_factor=*) + [ -n "$VIASH_PAR_LR_FACTOR" ] && ViashError Bad arguments for option \'--lr_factor=*\': \'$VIASH_PAR_LR_FACTOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LR_FACTOR=$(ViashRemoveFlags "$1") + shift 1 + ;; + --lr_patience) + [ -n "$VIASH_PAR_LR_PATIENCE" ] && ViashError Bad arguments for option \'--lr_patience\': \'$VIASH_PAR_LR_PATIENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LR_PATIENCE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --lr_patience. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --lr_patience=*) + [ -n "$VIASH_PAR_LR_PATIENCE" ] && ViashError Bad arguments for option \'--lr_patience=*\': \'$VIASH_PAR_LR_PATIENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LR_PATIENCE=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/integrate_scarches:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/integrate_scarches:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/integrate_scarches:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/integrate_scarches:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_REFERENCE+x} ]; then + ViashError '--reference' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_MAX_EPOCHS+x} ]; then + ViashError '--max_epochs' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_MODALITY+x} ]; then + VIASH_PAR_MODALITY="rna" +fi +if [ -z ${VIASH_PAR_DATASET_NAME+x} ]; then + VIASH_PAR_DATASET_NAME="test_dataset" +fi +if [ -z ${VIASH_PAR_MODEL_OUTPUT+x} ]; then + VIASH_PAR_MODEL_OUTPUT="model" +fi +if [ -z ${VIASH_PAR_OBSM_OUTPUT+x} ]; then + VIASH_PAR_OBSM_OUTPUT="X_integrated_scanvi" +fi +if [ -z ${VIASH_PAR_EARLY_STOPPING_MONITOR+x} ]; then + VIASH_PAR_EARLY_STOPPING_MONITOR="elbo_validation" +fi +if [ -z ${VIASH_PAR_EARLY_STOPPING_PATIENCE+x} ]; then + VIASH_PAR_EARLY_STOPPING_PATIENCE="45" +fi +if [ -z ${VIASH_PAR_EARLY_STOPPING_MIN_DELTA+x} ]; then + VIASH_PAR_EARLY_STOPPING_MIN_DELTA="0.0" +fi +if [ -z ${VIASH_PAR_REDUCE_LR_ON_PLATEAU+x} ]; then + VIASH_PAR_REDUCE_LR_ON_PLATEAU="true" +fi +if [ -z ${VIASH_PAR_LR_FACTOR+x} ]; then + VIASH_PAR_LR_FACTOR="0.6" +fi +if [ -z ${VIASH_PAR_LR_PATIENCE+x} ]; then + VIASH_PAR_LR_PATIENCE="30.0" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_REFERENCE" ] && [ ! -e "$VIASH_PAR_REFERENCE" ]; then + ViashError "Input file '$VIASH_PAR_REFERENCE' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_EARLY_STOPPING" ]]; then + if ! [[ "$VIASH_PAR_EARLY_STOPPING" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--early_stopping' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_EARLY_STOPPING_PATIENCE" ]]; then + if ! [[ "$VIASH_PAR_EARLY_STOPPING_PATIENCE" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--early_stopping_patience' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + if [[ $VIASH_PAR_EARLY_STOPPING_PATIENCE -lt 1 ]]; then + ViashError '--early_stopping_patience' has be more than or equal to 1. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_EARLY_STOPPING_MIN_DELTA" ]]; then + if ! [[ "$VIASH_PAR_EARLY_STOPPING_MIN_DELTA" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--early_stopping_min_delta' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi + if command -v bc &> /dev/null; then + if ! [[ `echo $VIASH_PAR_EARLY_STOPPING_MIN_DELTA '>=' 0.0 | bc` -eq 1 ]]; then + ViashError '--early_stopping_min_delta' has be more than or equal to 0.0. Use "--help" to get more information on the parameters. + exit 1 + fi + elif command -v awk &> /dev/null; then + if ! [[ `awk -v n1=$VIASH_PAR_EARLY_STOPPING_MIN_DELTA -v n2=0.0 'BEGIN { print (n1 >= n2) ? "1" : "0" }'` -eq 1 ]]; then + ViashError '--early_stopping_min_delta' has be more than or equal to 0.0. Use "--help" to get more information on the parameters. + exit 1 + fi + else + ViashWarning '--early_stopping_min_delta' specifies a minimum value but the value was not verified as neither \'bc\' or \`awk\` are present on the system. + fi +fi +if [[ -n "$VIASH_PAR_MAX_EPOCHS" ]]; then + if ! [[ "$VIASH_PAR_MAX_EPOCHS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--max_epochs' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_REDUCE_LR_ON_PLATEAU" ]]; then + if ! [[ "$VIASH_PAR_REDUCE_LR_ON_PLATEAU" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--reduce_lr_on_plateau' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_LR_FACTOR" ]]; then + if ! [[ "$VIASH_PAR_LR_FACTOR" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--lr_factor' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi + if command -v bc &> /dev/null; then + if ! [[ `echo $VIASH_PAR_LR_FACTOR '>=' 0.0 | bc` -eq 1 ]]; then + ViashError '--lr_factor' has be more than or equal to 0.0. Use "--help" to get more information on the parameters. + exit 1 + fi + elif command -v awk &> /dev/null; then + if ! [[ `awk -v n1=$VIASH_PAR_LR_FACTOR -v n2=0.0 'BEGIN { print (n1 >= n2) ? "1" : "0" }'` -eq 1 ]]; then + ViashError '--lr_factor' has be more than or equal to 0.0. Use "--help" to get more information on the parameters. + exit 1 + fi + else + ViashWarning '--lr_factor' specifies a minimum value but the value was not verified as neither \'bc\' or \`awk\` are present on the system. + fi +fi +if [[ -n "$VIASH_PAR_LR_PATIENCE" ]]; then + if ! [[ "$VIASH_PAR_LR_PATIENCE" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--lr_patience' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi + if command -v bc &> /dev/null; then + if ! [[ `echo $VIASH_PAR_LR_PATIENCE '>=' 0.0 | bc` -eq 1 ]]; then + ViashError '--lr_patience' has be more than or equal to 0.0. Use "--help" to get more information on the parameters. + exit 1 + fi + elif command -v awk &> /dev/null; then + if ! [[ `awk -v n1=$VIASH_PAR_LR_PATIENCE -v n2=0.0 'BEGIN { print (n1 >= n2) ? "1" : "0" }'` -eq 1 ]]; then + ViashError '--lr_patience' has be more than or equal to 0.0. Use "--help" to get more information on the parameters. + exit 1 + fi + else + ViashWarning '--lr_patience' specifies a minimum value but the value was not verified as neither \'bc\' or \`awk\` are present on the system. + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +if [ ! -z "$VIASH_PAR_EARLY_STOPPING_MONITOR" ]; then + VIASH_PAR_EARLY_STOPPING_MONITOR_CHOICES=("elbo_validation:reconstruction_loss_validation:kl_local_validation") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_EARLY_STOPPING_MONITOR_CHOICES[*]}:" =~ ":$VIASH_PAR_EARLY_STOPPING_MONITOR:" ]]; then + ViashError '--early_stopping_monitor' specified value of \'$VIASH_PAR_EARLY_STOPPING_MONITOR\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi +if [ ! -z "$VIASH_PAR_MODEL_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_MODEL_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_MODEL_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_REFERENCE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_REFERENCE")" ) + VIASH_PAR_REFERENCE=$(ViashAutodetectMount "$VIASH_PAR_REFERENCE") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_PAR_MODEL_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_MODEL_OUTPUT")" ) + VIASH_PAR_MODEL_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_MODEL_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_MODEL_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/integrate_scarches:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/integrate_scarches:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/integrate_scarches:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-scarches-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +import sys +import mudata +import scvi +from torch.cuda import is_available as cuda_is_available +try: + from torch.backends.mps import is_available as mps_is_available +except ModuleNotFoundError: + # Older pytorch versions + # MacOS GPUs + def mps_is_available(): + return False + +### VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'reference': $( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "r'${VIASH_PAR_REFERENCE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'dataset_name': $( if [ ! -z ${VIASH_PAR_DATASET_NAME+x} ]; then echo "r'${VIASH_PAR_DATASET_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'model_output': $( if [ ! -z ${VIASH_PAR_MODEL_OUTPUT+x} ]; then echo "r'${VIASH_PAR_MODEL_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'obsm_output': $( if [ ! -z ${VIASH_PAR_OBSM_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'early_stopping': $( if [ ! -z ${VIASH_PAR_EARLY_STOPPING+x} ]; then echo "r'${VIASH_PAR_EARLY_STOPPING//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), + 'early_stopping_monitor': $( if [ ! -z ${VIASH_PAR_EARLY_STOPPING_MONITOR+x} ]; then echo "r'${VIASH_PAR_EARLY_STOPPING_MONITOR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'early_stopping_patience': $( if [ ! -z ${VIASH_PAR_EARLY_STOPPING_PATIENCE+x} ]; then echo "int(r'${VIASH_PAR_EARLY_STOPPING_PATIENCE//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'early_stopping_min_delta': $( if [ ! -z ${VIASH_PAR_EARLY_STOPPING_MIN_DELTA+x} ]; then echo "float(r'${VIASH_PAR_EARLY_STOPPING_MIN_DELTA//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'max_epochs': $( if [ ! -z ${VIASH_PAR_MAX_EPOCHS+x} ]; then echo "int(r'${VIASH_PAR_MAX_EPOCHS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'reduce_lr_on_plateau': $( if [ ! -z ${VIASH_PAR_REDUCE_LR_ON_PLATEAU+x} ]; then echo "r'${VIASH_PAR_REDUCE_LR_ON_PLATEAU//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), + 'lr_factor': $( if [ ! -z ${VIASH_PAR_LR_FACTOR+x} ]; then echo "float(r'${VIASH_PAR_LR_FACTOR//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'lr_patience': $( if [ ! -z ${VIASH_PAR_LR_PATIENCE+x} ]; then echo "float(r'${VIASH_PAR_LR_PATIENCE//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +### VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +def _read_model_name_from_registry(model_path) -> str: + """Read registry with information about the model, return the model name""" + registry = scvi.model.base.BaseModelClass.load_registry(model_path) + return registry["model_name"] + + +def _detect_base_model(model_path): + """Read from the model's file which scvi_tools model it contains""" + + names_to_models_map = { + "AUTOZI": scvi.model.AUTOZI, + "CondSCVI": scvi.model.CondSCVI, + "DestVI": scvi.model.DestVI, + "LinearSCVI": scvi.model.LinearSCVI, + "PEAKVI": scvi.model.PEAKVI, + "SCANVI": scvi.model.SCANVI, + "SCVI": scvi.model.SCVI, + "TOTALVI": scvi.model.TOTALVI, + "MULTIVI": scvi.model.MULTIVI, + "AmortizedLDA": scvi.model.AmortizedLDA, + "JaxSCVI": scvi.model.JaxSCVI, + } + + return names_to_models_map[_read_model_name_from_registry(model_path)] + + +def extract_file_name(file_path): + """Return the name of the file from path to this file + + Examples + -------- + >>> extract_file_name("resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu") + pbmc_1k_protein_v3_mms + """ + slash_position = file_path.rfind("/") + dot_position = file_path.rfind(".") + + return file_path[slash_position + 1: dot_position] + + +def map_to_existing_reference(adata_query, model_path, check_val_every_n_epoch=1): + """ + A function to map the query data to the reference atlas + + Input: + * adata_query: An AnnData object with the query + * model_path: The reference model directory + + Output: + * vae_query: the trained scvi_tools model + * adata_query: The AnnData object with the query preprocessed for the mapping to the reference + """ + model = _detect_base_model(model_path) + + try: + model.prepare_query_anndata(adata_query, model_path) + except ValueError: + logger.warning("ValueError thrown when preparing adata for mapping. Clearing .varm field to prevent it") + adata_query.varm.clear() + model.prepare_query_anndata(adata_query, model_path) + + # Load query data into the model + vae_query = model.load_query_data( + adata_query, + model_path, + freeze_dropout=True + ) + + # Train scArches model for query mapping + vae_query.train( + max_epochs=par["max_epochs"], + early_stopping=par['early_stopping'], + early_stopping_monitor=par['early_stopping_monitor'], + early_stopping_patience=par['early_stopping_patience'], + early_stopping_min_delta=par['early_stopping_min_delta'], + check_val_every_n_epoch=check_val_every_n_epoch, + use_gpu=(cuda_is_available() or mps_is_available()) + ) + + return vae_query, adata_query + + +def _convert_object_dtypes_to_strings(adata): + """Convert object dtypes in .var and .obs to string to prevent error when saving file""" + def convert_cols(df): + object_cols = df.columns[df.dtypes == "object"] + for col in object_cols: + df[col] = df[col].astype(str) + return df + + adata.var = convert_cols(adata.var) + adata.obs = convert_cols(adata.obs) + + return adata + + +def _get_model_path(model_path: str): + """Obtain path to the directory with reference model. If the proposed \`model_path\` is a .zip archive, unzip it. If nesessary, convert model to the new format + + Parameters + ---------- + model_path : str + Path to a directory, where to search for the model or to a zip file containing the model + + Returns + ------- + Path to a directory with reference model in format of scvi-tools>=0.15 + """ + import os + import zipfile + import tempfile + from pathlib import Path + + if os.path.isdir(model_path) and "model.pt" in os.listdir(model_path): + # Probably, the \`model_path\` already contains model in the output format of scvi-tools>=0.15 + return model_path + + # The model either has old format or is a zip file downloaded from Zenodo + new_directory = Path(tempfile.TemporaryDirectory().name) + + if zipfile.is_zipfile(model_path): + with zipfile.ZipFile(model_path) as archive: + archive.extractall(new_directory) + model_dir = next(new_directory.glob("**/*.pt")).parent + + else: + model_dir = next(Path(model_path).glob("**/*.pt")).parent + + if "model_params.pt" in os.listdir(model_dir): + # The model is in the \`directory\`, but it was generated with scvi-tools<0.15 + # TODO: for new references (that could not be SCANVI based), we need to check the base class somehow. Reading registry does not work with models generated by scvi-tools<0.15 + # Here I assume that the reference model is for HLCA and thus is SCANVI based + converted_model_path = os.path.join(model_dir, "converted") + scvi.model.SCANVI.convert_legacy_save(model_dir, converted_model_path) + return converted_model_path + + elif "model.pt" in os.listdir(model_dir): + # Archive contained model in the new format, so just return the directory + return model_dir + + else: + raise ValueError("Cannot find model in the provided reference path. Please, provide a path or a link to the directory with reference model. For HLCA use https://zenodo.org/record/6337966/files/HLCA_reference_model.zip") + + +def main(): + + mdata_query = mudata.read(par["input"].strip()) + adata_query = mdata_query.mod[par["modality"]].copy() + + if "dataset" not in adata_query.obs.columns: + # Write name of the dataset as batch variable + if par["dataset_name"] is None: + logger.info("Detecting dataset name") + par["dataset_name"] = extract_file_name(par["input"]) + logger.info(f"Detected {par['dataset_name']}") + + adata_query.obs["dataset"] = par["dataset_name"] + + model_path = _get_model_path(par["reference"]) + vae_query, adata_query = map_to_existing_reference(adata_query, model_path=model_path) + model_name = _read_model_name_from_registry(model_path) + + # Save info about the used model + mdata_query.mod[par["modality"]].uns["integration_method"] = model_name + + logger.info("Trying to write latent representation") + output_key = par["obsm_output"].format(model_name=model_name) + mdata_query.mod[par["modality"]].obsm[output_key] = vae_query.get_latent_representation() + + logger.info("Converting dtypes") + mdata_query.mod[par["modality"]] = _convert_object_dtypes_to_strings(mdata_query.mod[par["modality"]]) + + logger.info("Updating mudata") + try: + mdata_query.update() # Without that error might be thrown during file saving + except KeyError: + # Sometimes this error is thrown, but then everything is magically fixed, and the file gets saved normally + # This is discussed here a bit: https://github.com/scverse/mudata/issues/27 + logger.warning("KeyError was thrown during updating mudata. Probably, the file is fixed after that, but be careful") + + logger.info("Saving h5mu file") + mdata_query.write_h5mu(par["output"].strip(), compression=par["output_compression"]) + + logger.info("Saving model") + vae_query.save(par["model_output"], overwrite=True) + +if __name__ == "__main__": + main() +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_REFERENCE" ]; then + VIASH_PAR_REFERENCE=$(ViashStripAutomount "$VIASH_PAR_REFERENCE") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_PAR_MODEL_OUTPUT" ]; then + VIASH_PAR_MODEL_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_MODEL_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_MODEL_OUTPUT" ] && [ ! -e "$VIASH_PAR_MODEL_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_MODEL_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/integrate/scarches/setup_logger.py b/target/docker/integrate/scarches/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/docker/integrate/scarches/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/docker/integrate/scvi/.config.vsh.yaml b/target/docker/integrate/scvi/.config.vsh.yaml new file mode 100644 index 00000000000..511d72e19bd --- /dev/null +++ b/target/docker/integrate/scvi/.config.vsh.yaml @@ -0,0 +1,591 @@ +functionality: + name: "scvi" + namespace: "integrate" + version: "0.12.4" + authors: + - name: "Malte D. Luecken" + roles: + - "author" + info: + role: "Core Team Member" + links: + email: "malte.luecken@helmholtz-muenchen.de" + github: "LuckyMD" + orcid: "0000-0001-7464-7921" + linkedin: "malte-l%C3%BCcken-b8b21049" + twitter: "MDLuecken" + organizations: + - name: "Helmholtz Munich" + href: "https://www.helmholtz-munich.de" + role: "Group Leader" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + - name: "Dries Schaumont" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + - name: "Matthias Beyens" + roles: + - "contributor" + info: + role: "Contributor" + links: + github: "MatthiasBeyens" + orcid: "0000-0003-3304-0706" + email: "matthias.beyens@gmail.com" + linkedin: "mbeyens" + organizations: + - name: "Janssen Pharmaceuticals" + href: "https://www.janssen.com" + role: "Principal Scientist" + argument_groups: + - name: "Inputs" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input h5mu file" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--input_layer" + description: "Input layer to use. If None, X is used" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_batch" + description: "Column name discriminating between your batches." + info: null + default: + - "sample_id" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--var_input" + description: ".var column containing highly variable genes. By default, do not\ + \ subset genes." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_labels" + description: "Key in adata.obs for label information. Categories will automatically\ + \ be \nconverted into integer categories and saved to adata.obs['_scvi_labels'].\n\ + If None, assigns the same label to all the data.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_size_factor" + description: "Key in adata.obs for size factor information. Instead of using\ + \ library size as a size factor,\nthe provided size factor column will be\ + \ used as offset in the mean of the likelihood.\nAssumed to be on linear scale.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_categorical_covariate" + description: "Keys in adata.obs that correspond to categorical data. These covariates\ + \ can be added in\naddition to the batch covariate and are also treated as\ + \ nuisance factors\n(i.e., the model tries to minimize their effects on the\ + \ latent space).\nThus, these should not be used for biologically-relevant\ + \ factors that you do _not_ want to correct for.\n" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_continuous_covariate" + description: "Keys in adata.obs that correspond to continuous data. These covariates\ + \ can be added in\naddition to the batch covariate and are also treated as\ + \ nuisance factors\n(i.e., the model tries to minimize their effects on the\ + \ latent space). Thus, these should not be\nused for biologically-relevant\ + \ factors that you do _not_ want to correct for.\n" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - name: "Outputs" + arguments: + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output h5mu file." + info: null + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output_model" + description: "Folder where the state of the trained model will be saved to." + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obsm_output" + description: "In which .obsm slot to store the resulting integrated embedding." + info: null + default: + - "X_scvi_integrated" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "SCVI options" + arguments: + - type: "integer" + name: "--n_hidden_nodes" + description: "Number of nodes per hidden layer." + info: null + default: + - 128 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--n_dimensions_latent_space" + description: "Dimensionality of the latent space." + info: null + default: + - 30 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--n_hidden_layers" + description: "Number of hidden layers used for encoder and decoder neural-networks." + info: null + default: + - 2 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--dropout_rate" + description: "Dropout rate for the neural networks." + info: null + default: + - 0.1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--dispersion" + description: "Set the behavior for the dispersion for negative binomial distributions:\n\ + - gene: dispersion parameter of negative binomial is constant per gene across\ + \ cells\n- gene-batch: dispersion can differ between different batches\n-\ + \ gene-label: dispersion can differ between different labels\n- gene-cell:\ + \ dispersion can differ for every gene in every cell\n" + info: null + default: + - "gene" + required: false + choices: + - "gene" + - "gene-batch" + - "gene-label" + - "gene-cell" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--gene_likelihood" + description: "Model used to generate the expression data from a count-based\ + \ likelihood distribution.\n- nb: Negative binomial distribution\n- zinb:\ + \ Zero-inflated negative binomial distribution\n- poisson: Poisson distribution\n" + info: null + default: + - "nb" + required: false + choices: + - "nb" + - "zinb" + - "poisson" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Variational auto-encoder model options" + arguments: + - type: "string" + name: "--use_layer_normalization" + description: "Neural networks for which to enable layer normalization. \n" + info: null + default: + - "both" + required: false + choices: + - "encoder" + - "decoder" + - "none" + - "both" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--use_batch_normalization" + description: "Neural networks for which to enable batch normalization. \n" + info: null + default: + - "none" + required: false + choices: + - "encoder" + - "decoder" + - "none" + - "both" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean_false" + name: "--encode_covariates" + description: "Whether to concatenate covariates to expression in encoder" + info: null + direction: "input" + dest: "par" + - type: "boolean_true" + name: "--deeply_inject_covariates" + description: "Whether to concatenate covariates into output of hidden layers\ + \ in encoder/decoder. \nThis option only applies when n_layers > 1. The covariates\ + \ are concatenated to\nthe input of subsequent hidden layers.\n" + info: null + direction: "input" + dest: "par" + - type: "boolean_true" + name: "--use_observed_lib_size" + description: "Use observed library size for RNA as scaling factor in mean of\ + \ conditional distribution.\n" + info: null + direction: "input" + dest: "par" + - name: "Early stopping arguments" + arguments: + - type: "boolean" + name: "--early_stopping" + description: "Whether to perform early stopping with respect to the validation\ + \ set." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--early_stopping_monitor" + description: "Metric logged during validation set epoch." + info: null + default: + - "elbo_validation" + required: false + choices: + - "elbo_validation" + - "reconstruction_loss_validation" + - "kl_local_validation" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--early_stopping_patience" + description: "Number of validation epochs with no improvement after which training\ + \ will be stopped." + info: null + default: + - 45 + required: false + min: 1 + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--early_stopping_min_delta" + description: "Minimum change in the monitored quantity to qualify as an improvement,\ + \ i.e. an absolute change of less than min_delta, will count as no improvement." + info: null + default: + - 0.0 + required: false + min: 0.0 + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Learning parameters" + arguments: + - type: "integer" + name: "--max_epochs" + description: "Number of passes through the dataset, defaults to (20000 / number\ + \ of cells) * 400 or 400; whichever is smallest." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean" + name: "--reduce_lr_on_plateau" + description: "Whether to monitor validation loss and reduce learning rate when\ + \ validation set `lr_scheduler_metric` plateaus." + info: null + default: + - true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--lr_factor" + description: "Factor to reduce learning rate." + info: null + default: + - 0.6 + required: false + min: 0.0 + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--lr_patience" + description: "Number of epochs with no improvement after which learning rate\ + \ will be reduced." + info: null + default: + - 30.0 + required: false + min: 0.0 + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Data validition" + arguments: + - type: "integer" + name: "--n_obs_min_count" + description: "Minimum number of cells threshold ensuring that every obs_batch\ + \ category has sufficient observations (cells) for model training." + info: null + default: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--n_var_min_count" + description: "Minimum number of genes threshold ensuring that every var_input\ + \ filter has sufficient observations (genes) for model training." + info: null + default: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "../../utils/subset_vars.py" + description: "Performs scvi integration as done in the human lung cell atlas https://github.com/LungCellAtlas/HLCA" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "../../utils/subset_vars.py" + - type: "file" + path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "nvcr.io/nvidia/pytorch:23.06-py3" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "libpng-dev" + - "libjpeg-dev" + interactive: false + - type: "docker" + run: + - "pip install \"jax[cuda]\" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html\n" + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "scanpy~=1.9.5" + upgrade: true + - type: "python" + user: false + packages: + - "numba~=0.57.1" + - "scvi-tools~=1.0.0" + upgrade: false + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "midcpu" + - "midmem" + - "gpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/integrate/scvi/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/integrate/scvi" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/integrate/scvi/scvi" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/integrate/scvi/scvi b/target/docker/integrate/scvi/scvi new file mode 100755 index 00000000000..2d1089faf51 --- /dev/null +++ b/target/docker/integrate/scvi/scvi @@ -0,0 +1,1912 @@ +#!/usr/bin/env bash + +# scvi 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Malte D. Luecken (author) +# * Dries Schaumont (maintainer) +# * Matthias Beyens (contributor) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="scvi" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "scvi 0.12.4" + echo "" + echo "Performs scvi integration as done in the human lung cell atlas" + echo "https://github.com/LungCellAtlas/HLCA" + echo "" + echo "Inputs:" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " Input h5mu file" + echo "" + echo " --modality" + echo " type: string" + echo " default: rna" + echo "" + echo " --input_layer" + echo " type: string" + echo " Input layer to use. If None, X is used" + echo "" + echo " --obs_batch" + echo " type: string" + echo " default: sample_id" + echo " Column name discriminating between your batches." + echo "" + echo " --var_input" + echo " type: string" + echo " .var column containing highly variable genes. By default, do not subset" + echo " genes." + echo "" + echo " --obs_labels" + echo " type: string" + echo " Key in adata.obs for label information. Categories will automatically be" + echo " converted into integer categories and saved to" + echo " adata.obs['_scvi_labels']." + echo " If None, assigns the same label to all the data." + echo "" + echo " --obs_size_factor" + echo " type: string" + echo " Key in adata.obs for size factor information. Instead of using library" + echo " size as a size factor," + echo " the provided size factor column will be used as offset in the mean of" + echo " the likelihood." + echo " Assumed to be on linear scale." + echo "" + echo " --obs_categorical_covariate" + echo " type: string, multiple values allowed" + echo " Keys in adata.obs that correspond to categorical data. These covariates" + echo " can be added in" + echo " addition to the batch covariate and are also treated as nuisance factors" + echo " (i.e., the model tries to minimize their effects on the latent space)." + echo " Thus, these should not be used for biologically-relevant factors that" + echo " you do _not_ want to correct for." + echo "" + echo " --obs_continuous_covariate" + echo " type: string, multiple values allowed" + echo " Keys in adata.obs that correspond to continuous data. These covariates" + echo " can be added in" + echo " addition to the batch covariate and are also treated as nuisance factors" + echo " (i.e., the model tries to minimize their effects on the latent space)." + echo " Thus, these should not be" + echo " used for biologically-relevant factors that you do _not_ want to correct" + echo " for." + echo "" + echo "Outputs:" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " Output h5mu file." + echo "" + echo " --output_model" + echo " type: file, output, file must exist" + echo " Folder where the state of the trained model will be saved to." + echo "" + echo " --output_compression" + echo " type: string" + echo " example: gzip" + echo " choices: [ gzip, lzf ]" + echo " The compression format to be used on the output h5mu object." + echo "" + echo " --obsm_output" + echo " type: string" + echo " default: X_scvi_integrated" + echo " In which .obsm slot to store the resulting integrated embedding." + echo "" + echo "SCVI options:" + echo " --n_hidden_nodes" + echo " type: integer" + echo " default: 128" + echo " Number of nodes per hidden layer." + echo "" + echo " --n_dimensions_latent_space" + echo " type: integer" + echo " default: 30" + echo " Dimensionality of the latent space." + echo "" + echo " --n_hidden_layers" + echo " type: integer" + echo " default: 2" + echo " Number of hidden layers used for encoder and decoder neural-networks." + echo "" + echo " --dropout_rate" + echo " type: double" + echo " default: 0.1" + echo " Dropout rate for the neural networks." + echo "" + echo " --dispersion" + echo " type: string" + echo " default: gene" + echo " choices: [ gene, gene-batch, gene-label, gene-cell ]" + echo " Set the behavior for the dispersion for negative binomial distributions:" + echo " - gene: dispersion parameter of negative binomial is constant per gene" + echo " across cells" + echo " - gene-batch: dispersion can differ between different batches" + echo " - gene-label: dispersion can differ between different labels" + echo " - gene-cell: dispersion can differ for every gene in every cell" + echo "" + echo " --gene_likelihood" + echo " type: string" + echo " default: nb" + echo " choices: [ nb, zinb, poisson ]" + echo " Model used to generate the expression data from a count-based likelihood" + echo " distribution." + echo " - nb: Negative binomial distribution" + echo " - zinb: Zero-inflated negative binomial distribution" + echo " - poisson: Poisson distribution" + echo "" + echo "Variational auto-encoder model options:" + echo " --use_layer_normalization" + echo " type: string" + echo " default: both" + echo " choices: [ encoder, decoder, none, both ]" + echo " Neural networks for which to enable layer normalization." + echo "" + echo " --use_batch_normalization" + echo " type: string" + echo " default: none" + echo " choices: [ encoder, decoder, none, both ]" + echo " Neural networks for which to enable batch normalization." + echo "" + echo " --encode_covariates" + echo " type: boolean_false" + echo " Whether to concatenate covariates to expression in encoder" + echo "" + echo " --deeply_inject_covariates" + echo " type: boolean_true" + echo " Whether to concatenate covariates into output of hidden layers in" + echo " encoder/decoder." + echo " This option only applies when n_layers > 1. The covariates are" + echo " concatenated to" + echo " the input of subsequent hidden layers." + echo "" + echo " --use_observed_lib_size" + echo " type: boolean_true" + echo " Use observed library size for RNA as scaling factor in mean of" + echo " conditional distribution." + echo "" + echo "Early stopping arguments:" + echo " --early_stopping" + echo " type: boolean" + echo " Whether to perform early stopping with respect to the validation set." + echo "" + echo " --early_stopping_monitor" + echo " type: string" + echo " default: elbo_validation" + echo " choices: [ elbo_validation, reconstruction_loss_validation," + echo "kl_local_validation ]" + echo " Metric logged during validation set epoch." + echo "" + echo " --early_stopping_patience" + echo " type: integer" + echo " default: 45" + echo " min: 1" + echo " Number of validation epochs with no improvement after which training" + echo " will be stopped." + echo "" + echo " --early_stopping_min_delta" + echo " type: double" + echo " default: 0.0" + echo " min: 0.0" + echo " Minimum change in the monitored quantity to qualify as an improvement," + echo " i.e. an absolute change of less than min_delta, will count as no" + echo " improvement." + echo "" + echo "Learning parameters:" + echo " --max_epochs" + echo " type: integer" + echo " Number of passes through the dataset, defaults to (20000 / number of" + echo " cells) * 400 or 400; whichever is smallest." + echo "" + echo " --reduce_lr_on_plateau" + echo " type: boolean" + echo " default: true" + echo " Whether to monitor validation loss and reduce learning rate when" + echo " validation set \`lr_scheduler_metric\` plateaus." + echo "" + echo " --lr_factor" + echo " type: double" + echo " default: 0.6" + echo " min: 0.0" + echo " Factor to reduce learning rate." + echo "" + echo " --lr_patience" + echo " type: double" + echo " default: 30.0" + echo " min: 0.0" + echo " Number of epochs with no improvement after which learning rate will be" + echo " reduced." + echo "" + echo "Data validition:" + echo " --n_obs_min_count" + echo " type: integer" + echo " default: 0" + echo " Minimum number of cells threshold ensuring that every obs_batch category" + echo " has sufficient observations (cells) for model training." + echo "" + echo " --n_var_min_count" + echo " type: integer" + echo " default: 0" + echo " Minimum number of genes threshold ensuring that every var_input filter" + echo " has sufficient observations (genes) for model training." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM nvcr.io/nvidia/pytorch:23.06-py3 + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y libpng-dev libjpeg-dev && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install "jax[cuda]" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "scanpy~=1.9.5" + +RUN pip install --upgrade pip && \ + pip install --no-cache-dir "numba~=0.57.1" "scvi-tools~=1.0.0" + +LABEL org.opencontainers.image.authors="Malte D. Luecken, Dries Schaumont, Matthias Beyens" +LABEL org.opencontainers.image.description="Companion container for running component integrate scvi" +LABEL org.opencontainers.image.created="2024-01-31T09:08:33Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-scvi-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "scvi 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality=*) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --input_layer) + [ -n "$VIASH_PAR_INPUT_LAYER" ] && ViashError Bad arguments for option \'--input_layer\': \'$VIASH_PAR_INPUT_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_LAYER="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_layer. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input_layer=*) + [ -n "$VIASH_PAR_INPUT_LAYER" ] && ViashError Bad arguments for option \'--input_layer=*\': \'$VIASH_PAR_INPUT_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_LAYER=$(ViashRemoveFlags "$1") + shift 1 + ;; + --obs_batch) + [ -n "$VIASH_PAR_OBS_BATCH" ] && ViashError Bad arguments for option \'--obs_batch\': \'$VIASH_PAR_OBS_BATCH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBS_BATCH="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_batch. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obs_batch=*) + [ -n "$VIASH_PAR_OBS_BATCH" ] && ViashError Bad arguments for option \'--obs_batch=*\': \'$VIASH_PAR_OBS_BATCH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBS_BATCH=$(ViashRemoveFlags "$1") + shift 1 + ;; + --var_input) + [ -n "$VIASH_PAR_VAR_INPUT" ] && ViashError Bad arguments for option \'--var_input\': \'$VIASH_PAR_VAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_VAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --var_input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --var_input=*) + [ -n "$VIASH_PAR_VAR_INPUT" ] && ViashError Bad arguments for option \'--var_input=*\': \'$VIASH_PAR_VAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_VAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --obs_labels) + [ -n "$VIASH_PAR_OBS_LABELS" ] && ViashError Bad arguments for option \'--obs_labels\': \'$VIASH_PAR_OBS_LABELS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBS_LABELS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_labels. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obs_labels=*) + [ -n "$VIASH_PAR_OBS_LABELS" ] && ViashError Bad arguments for option \'--obs_labels=*\': \'$VIASH_PAR_OBS_LABELS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBS_LABELS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --obs_size_factor) + [ -n "$VIASH_PAR_OBS_SIZE_FACTOR" ] && ViashError Bad arguments for option \'--obs_size_factor\': \'$VIASH_PAR_OBS_SIZE_FACTOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBS_SIZE_FACTOR="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_size_factor. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obs_size_factor=*) + [ -n "$VIASH_PAR_OBS_SIZE_FACTOR" ] && ViashError Bad arguments for option \'--obs_size_factor=*\': \'$VIASH_PAR_OBS_SIZE_FACTOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBS_SIZE_FACTOR=$(ViashRemoveFlags "$1") + shift 1 + ;; + --obs_categorical_covariate) + if [ -z "$VIASH_PAR_OBS_CATEGORICAL_COVARIATE" ]; then + VIASH_PAR_OBS_CATEGORICAL_COVARIATE="$2" + else + VIASH_PAR_OBS_CATEGORICAL_COVARIATE="$VIASH_PAR_OBS_CATEGORICAL_COVARIATE:""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_categorical_covariate. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obs_categorical_covariate=*) + if [ -z "$VIASH_PAR_OBS_CATEGORICAL_COVARIATE" ]; then + VIASH_PAR_OBS_CATEGORICAL_COVARIATE=$(ViashRemoveFlags "$1") + else + VIASH_PAR_OBS_CATEGORICAL_COVARIATE="$VIASH_PAR_OBS_CATEGORICAL_COVARIATE:"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --obs_continuous_covariate) + if [ -z "$VIASH_PAR_OBS_CONTINUOUS_COVARIATE" ]; then + VIASH_PAR_OBS_CONTINUOUS_COVARIATE="$2" + else + VIASH_PAR_OBS_CONTINUOUS_COVARIATE="$VIASH_PAR_OBS_CONTINUOUS_COVARIATE:""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_continuous_covariate. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obs_continuous_covariate=*) + if [ -z "$VIASH_PAR_OBS_CONTINUOUS_COVARIATE" ]; then + VIASH_PAR_OBS_CONTINUOUS_COVARIATE=$(ViashRemoveFlags "$1") + else + VIASH_PAR_OBS_CONTINUOUS_COVARIATE="$VIASH_PAR_OBS_CONTINUOUS_COVARIATE:"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_model) + [ -n "$VIASH_PAR_OUTPUT_MODEL" ] && ViashError Bad arguments for option \'--output_model\': \'$VIASH_PAR_OUTPUT_MODEL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_MODEL="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_model. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_model=*) + [ -n "$VIASH_PAR_OUTPUT_MODEL" ] && ViashError Bad arguments for option \'--output_model=*\': \'$VIASH_PAR_OUTPUT_MODEL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_MODEL=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --obsm_output) + [ -n "$VIASH_PAR_OBSM_OUTPUT" ] && ViashError Bad arguments for option \'--obsm_output\': \'$VIASH_PAR_OBSM_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBSM_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obsm_output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obsm_output=*) + [ -n "$VIASH_PAR_OBSM_OUTPUT" ] && ViashError Bad arguments for option \'--obsm_output=*\': \'$VIASH_PAR_OBSM_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBSM_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --n_hidden_nodes) + [ -n "$VIASH_PAR_N_HIDDEN_NODES" ] && ViashError Bad arguments for option \'--n_hidden_nodes\': \'$VIASH_PAR_N_HIDDEN_NODES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_N_HIDDEN_NODES="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --n_hidden_nodes. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --n_hidden_nodes=*) + [ -n "$VIASH_PAR_N_HIDDEN_NODES" ] && ViashError Bad arguments for option \'--n_hidden_nodes=*\': \'$VIASH_PAR_N_HIDDEN_NODES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_N_HIDDEN_NODES=$(ViashRemoveFlags "$1") + shift 1 + ;; + --n_dimensions_latent_space) + [ -n "$VIASH_PAR_N_DIMENSIONS_LATENT_SPACE" ] && ViashError Bad arguments for option \'--n_dimensions_latent_space\': \'$VIASH_PAR_N_DIMENSIONS_LATENT_SPACE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_N_DIMENSIONS_LATENT_SPACE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --n_dimensions_latent_space. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --n_dimensions_latent_space=*) + [ -n "$VIASH_PAR_N_DIMENSIONS_LATENT_SPACE" ] && ViashError Bad arguments for option \'--n_dimensions_latent_space=*\': \'$VIASH_PAR_N_DIMENSIONS_LATENT_SPACE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_N_DIMENSIONS_LATENT_SPACE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --n_hidden_layers) + [ -n "$VIASH_PAR_N_HIDDEN_LAYERS" ] && ViashError Bad arguments for option \'--n_hidden_layers\': \'$VIASH_PAR_N_HIDDEN_LAYERS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_N_HIDDEN_LAYERS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --n_hidden_layers. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --n_hidden_layers=*) + [ -n "$VIASH_PAR_N_HIDDEN_LAYERS" ] && ViashError Bad arguments for option \'--n_hidden_layers=*\': \'$VIASH_PAR_N_HIDDEN_LAYERS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_N_HIDDEN_LAYERS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --dropout_rate) + [ -n "$VIASH_PAR_DROPOUT_RATE" ] && ViashError Bad arguments for option \'--dropout_rate\': \'$VIASH_PAR_DROPOUT_RATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DROPOUT_RATE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --dropout_rate. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --dropout_rate=*) + [ -n "$VIASH_PAR_DROPOUT_RATE" ] && ViashError Bad arguments for option \'--dropout_rate=*\': \'$VIASH_PAR_DROPOUT_RATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DROPOUT_RATE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --dispersion) + [ -n "$VIASH_PAR_DISPERSION" ] && ViashError Bad arguments for option \'--dispersion\': \'$VIASH_PAR_DISPERSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DISPERSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --dispersion. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --dispersion=*) + [ -n "$VIASH_PAR_DISPERSION" ] && ViashError Bad arguments for option \'--dispersion=*\': \'$VIASH_PAR_DISPERSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DISPERSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --gene_likelihood) + [ -n "$VIASH_PAR_GENE_LIKELIHOOD" ] && ViashError Bad arguments for option \'--gene_likelihood\': \'$VIASH_PAR_GENE_LIKELIHOOD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENE_LIKELIHOOD="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --gene_likelihood. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --gene_likelihood=*) + [ -n "$VIASH_PAR_GENE_LIKELIHOOD" ] && ViashError Bad arguments for option \'--gene_likelihood=*\': \'$VIASH_PAR_GENE_LIKELIHOOD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENE_LIKELIHOOD=$(ViashRemoveFlags "$1") + shift 1 + ;; + --use_layer_normalization) + [ -n "$VIASH_PAR_USE_LAYER_NORMALIZATION" ] && ViashError Bad arguments for option \'--use_layer_normalization\': \'$VIASH_PAR_USE_LAYER_NORMALIZATION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_USE_LAYER_NORMALIZATION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --use_layer_normalization. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --use_layer_normalization=*) + [ -n "$VIASH_PAR_USE_LAYER_NORMALIZATION" ] && ViashError Bad arguments for option \'--use_layer_normalization=*\': \'$VIASH_PAR_USE_LAYER_NORMALIZATION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_USE_LAYER_NORMALIZATION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --use_batch_normalization) + [ -n "$VIASH_PAR_USE_BATCH_NORMALIZATION" ] && ViashError Bad arguments for option \'--use_batch_normalization\': \'$VIASH_PAR_USE_BATCH_NORMALIZATION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_USE_BATCH_NORMALIZATION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --use_batch_normalization. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --use_batch_normalization=*) + [ -n "$VIASH_PAR_USE_BATCH_NORMALIZATION" ] && ViashError Bad arguments for option \'--use_batch_normalization=*\': \'$VIASH_PAR_USE_BATCH_NORMALIZATION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_USE_BATCH_NORMALIZATION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --encode_covariates) + [ -n "$VIASH_PAR_ENCODE_COVARIATES" ] && ViashError Bad arguments for option \'--encode_covariates\': \'$VIASH_PAR_ENCODE_COVARIATES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ENCODE_COVARIATES=false + shift 1 + ;; + --deeply_inject_covariates) + [ -n "$VIASH_PAR_DEEPLY_INJECT_COVARIATES" ] && ViashError Bad arguments for option \'--deeply_inject_covariates\': \'$VIASH_PAR_DEEPLY_INJECT_COVARIATES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DEEPLY_INJECT_COVARIATES=true + shift 1 + ;; + --use_observed_lib_size) + [ -n "$VIASH_PAR_USE_OBSERVED_LIB_SIZE" ] && ViashError Bad arguments for option \'--use_observed_lib_size\': \'$VIASH_PAR_USE_OBSERVED_LIB_SIZE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_USE_OBSERVED_LIB_SIZE=true + shift 1 + ;; + --early_stopping) + [ -n "$VIASH_PAR_EARLY_STOPPING" ] && ViashError Bad arguments for option \'--early_stopping\': \'$VIASH_PAR_EARLY_STOPPING\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EARLY_STOPPING="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --early_stopping. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --early_stopping=*) + [ -n "$VIASH_PAR_EARLY_STOPPING" ] && ViashError Bad arguments for option \'--early_stopping=*\': \'$VIASH_PAR_EARLY_STOPPING\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EARLY_STOPPING=$(ViashRemoveFlags "$1") + shift 1 + ;; + --early_stopping_monitor) + [ -n "$VIASH_PAR_EARLY_STOPPING_MONITOR" ] && ViashError Bad arguments for option \'--early_stopping_monitor\': \'$VIASH_PAR_EARLY_STOPPING_MONITOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EARLY_STOPPING_MONITOR="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --early_stopping_monitor. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --early_stopping_monitor=*) + [ -n "$VIASH_PAR_EARLY_STOPPING_MONITOR" ] && ViashError Bad arguments for option \'--early_stopping_monitor=*\': \'$VIASH_PAR_EARLY_STOPPING_MONITOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EARLY_STOPPING_MONITOR=$(ViashRemoveFlags "$1") + shift 1 + ;; + --early_stopping_patience) + [ -n "$VIASH_PAR_EARLY_STOPPING_PATIENCE" ] && ViashError Bad arguments for option \'--early_stopping_patience\': \'$VIASH_PAR_EARLY_STOPPING_PATIENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EARLY_STOPPING_PATIENCE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --early_stopping_patience. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --early_stopping_patience=*) + [ -n "$VIASH_PAR_EARLY_STOPPING_PATIENCE" ] && ViashError Bad arguments for option \'--early_stopping_patience=*\': \'$VIASH_PAR_EARLY_STOPPING_PATIENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EARLY_STOPPING_PATIENCE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --early_stopping_min_delta) + [ -n "$VIASH_PAR_EARLY_STOPPING_MIN_DELTA" ] && ViashError Bad arguments for option \'--early_stopping_min_delta\': \'$VIASH_PAR_EARLY_STOPPING_MIN_DELTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EARLY_STOPPING_MIN_DELTA="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --early_stopping_min_delta. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --early_stopping_min_delta=*) + [ -n "$VIASH_PAR_EARLY_STOPPING_MIN_DELTA" ] && ViashError Bad arguments for option \'--early_stopping_min_delta=*\': \'$VIASH_PAR_EARLY_STOPPING_MIN_DELTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EARLY_STOPPING_MIN_DELTA=$(ViashRemoveFlags "$1") + shift 1 + ;; + --max_epochs) + [ -n "$VIASH_PAR_MAX_EPOCHS" ] && ViashError Bad arguments for option \'--max_epochs\': \'$VIASH_PAR_MAX_EPOCHS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MAX_EPOCHS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --max_epochs. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --max_epochs=*) + [ -n "$VIASH_PAR_MAX_EPOCHS" ] && ViashError Bad arguments for option \'--max_epochs=*\': \'$VIASH_PAR_MAX_EPOCHS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MAX_EPOCHS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --reduce_lr_on_plateau) + [ -n "$VIASH_PAR_REDUCE_LR_ON_PLATEAU" ] && ViashError Bad arguments for option \'--reduce_lr_on_plateau\': \'$VIASH_PAR_REDUCE_LR_ON_PLATEAU\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REDUCE_LR_ON_PLATEAU="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --reduce_lr_on_plateau. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reduce_lr_on_plateau=*) + [ -n "$VIASH_PAR_REDUCE_LR_ON_PLATEAU" ] && ViashError Bad arguments for option \'--reduce_lr_on_plateau=*\': \'$VIASH_PAR_REDUCE_LR_ON_PLATEAU\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REDUCE_LR_ON_PLATEAU=$(ViashRemoveFlags "$1") + shift 1 + ;; + --lr_factor) + [ -n "$VIASH_PAR_LR_FACTOR" ] && ViashError Bad arguments for option \'--lr_factor\': \'$VIASH_PAR_LR_FACTOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LR_FACTOR="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --lr_factor. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --lr_factor=*) + [ -n "$VIASH_PAR_LR_FACTOR" ] && ViashError Bad arguments for option \'--lr_factor=*\': \'$VIASH_PAR_LR_FACTOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LR_FACTOR=$(ViashRemoveFlags "$1") + shift 1 + ;; + --lr_patience) + [ -n "$VIASH_PAR_LR_PATIENCE" ] && ViashError Bad arguments for option \'--lr_patience\': \'$VIASH_PAR_LR_PATIENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LR_PATIENCE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --lr_patience. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --lr_patience=*) + [ -n "$VIASH_PAR_LR_PATIENCE" ] && ViashError Bad arguments for option \'--lr_patience=*\': \'$VIASH_PAR_LR_PATIENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LR_PATIENCE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --n_obs_min_count) + [ -n "$VIASH_PAR_N_OBS_MIN_COUNT" ] && ViashError Bad arguments for option \'--n_obs_min_count\': \'$VIASH_PAR_N_OBS_MIN_COUNT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_N_OBS_MIN_COUNT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --n_obs_min_count. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --n_obs_min_count=*) + [ -n "$VIASH_PAR_N_OBS_MIN_COUNT" ] && ViashError Bad arguments for option \'--n_obs_min_count=*\': \'$VIASH_PAR_N_OBS_MIN_COUNT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_N_OBS_MIN_COUNT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --n_var_min_count) + [ -n "$VIASH_PAR_N_VAR_MIN_COUNT" ] && ViashError Bad arguments for option \'--n_var_min_count\': \'$VIASH_PAR_N_VAR_MIN_COUNT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_N_VAR_MIN_COUNT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --n_var_min_count. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --n_var_min_count=*) + [ -n "$VIASH_PAR_N_VAR_MIN_COUNT" ] && ViashError Bad arguments for option \'--n_var_min_count=*\': \'$VIASH_PAR_N_VAR_MIN_COUNT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_N_VAR_MIN_COUNT=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/integrate_scvi:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/integrate_scvi:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/integrate_scvi:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/integrate_scvi:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_MODALITY+x} ]; then + VIASH_PAR_MODALITY="rna" +fi +if [ -z ${VIASH_PAR_OBS_BATCH+x} ]; then + VIASH_PAR_OBS_BATCH="sample_id" +fi +if [ -z ${VIASH_PAR_OBSM_OUTPUT+x} ]; then + VIASH_PAR_OBSM_OUTPUT="X_scvi_integrated" +fi +if [ -z ${VIASH_PAR_N_HIDDEN_NODES+x} ]; then + VIASH_PAR_N_HIDDEN_NODES="128" +fi +if [ -z ${VIASH_PAR_N_DIMENSIONS_LATENT_SPACE+x} ]; then + VIASH_PAR_N_DIMENSIONS_LATENT_SPACE="30" +fi +if [ -z ${VIASH_PAR_N_HIDDEN_LAYERS+x} ]; then + VIASH_PAR_N_HIDDEN_LAYERS="2" +fi +if [ -z ${VIASH_PAR_DROPOUT_RATE+x} ]; then + VIASH_PAR_DROPOUT_RATE="0.1" +fi +if [ -z ${VIASH_PAR_DISPERSION+x} ]; then + VIASH_PAR_DISPERSION="gene" +fi +if [ -z ${VIASH_PAR_GENE_LIKELIHOOD+x} ]; then + VIASH_PAR_GENE_LIKELIHOOD="nb" +fi +if [ -z ${VIASH_PAR_USE_LAYER_NORMALIZATION+x} ]; then + VIASH_PAR_USE_LAYER_NORMALIZATION="both" +fi +if [ -z ${VIASH_PAR_USE_BATCH_NORMALIZATION+x} ]; then + VIASH_PAR_USE_BATCH_NORMALIZATION="none" +fi +if [ -z ${VIASH_PAR_ENCODE_COVARIATES+x} ]; then + VIASH_PAR_ENCODE_COVARIATES="true" +fi +if [ -z ${VIASH_PAR_DEEPLY_INJECT_COVARIATES+x} ]; then + VIASH_PAR_DEEPLY_INJECT_COVARIATES="false" +fi +if [ -z ${VIASH_PAR_USE_OBSERVED_LIB_SIZE+x} ]; then + VIASH_PAR_USE_OBSERVED_LIB_SIZE="false" +fi +if [ -z ${VIASH_PAR_EARLY_STOPPING_MONITOR+x} ]; then + VIASH_PAR_EARLY_STOPPING_MONITOR="elbo_validation" +fi +if [ -z ${VIASH_PAR_EARLY_STOPPING_PATIENCE+x} ]; then + VIASH_PAR_EARLY_STOPPING_PATIENCE="45" +fi +if [ -z ${VIASH_PAR_EARLY_STOPPING_MIN_DELTA+x} ]; then + VIASH_PAR_EARLY_STOPPING_MIN_DELTA="0.0" +fi +if [ -z ${VIASH_PAR_REDUCE_LR_ON_PLATEAU+x} ]; then + VIASH_PAR_REDUCE_LR_ON_PLATEAU="true" +fi +if [ -z ${VIASH_PAR_LR_FACTOR+x} ]; then + VIASH_PAR_LR_FACTOR="0.6" +fi +if [ -z ${VIASH_PAR_LR_PATIENCE+x} ]; then + VIASH_PAR_LR_PATIENCE="30.0" +fi +if [ -z ${VIASH_PAR_N_OBS_MIN_COUNT+x} ]; then + VIASH_PAR_N_OBS_MIN_COUNT="0" +fi +if [ -z ${VIASH_PAR_N_VAR_MIN_COUNT+x} ]; then + VIASH_PAR_N_VAR_MIN_COUNT="0" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_N_HIDDEN_NODES" ]]; then + if ! [[ "$VIASH_PAR_N_HIDDEN_NODES" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--n_hidden_nodes' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_N_DIMENSIONS_LATENT_SPACE" ]]; then + if ! [[ "$VIASH_PAR_N_DIMENSIONS_LATENT_SPACE" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--n_dimensions_latent_space' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_N_HIDDEN_LAYERS" ]]; then + if ! [[ "$VIASH_PAR_N_HIDDEN_LAYERS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--n_hidden_layers' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_DROPOUT_RATE" ]]; then + if ! [[ "$VIASH_PAR_DROPOUT_RATE" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--dropout_rate' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_ENCODE_COVARIATES" ]]; then + if ! [[ "$VIASH_PAR_ENCODE_COVARIATES" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--encode_covariates' has to be a boolean_false. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_DEEPLY_INJECT_COVARIATES" ]]; then + if ! [[ "$VIASH_PAR_DEEPLY_INJECT_COVARIATES" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--deeply_inject_covariates' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_USE_OBSERVED_LIB_SIZE" ]]; then + if ! [[ "$VIASH_PAR_USE_OBSERVED_LIB_SIZE" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--use_observed_lib_size' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_EARLY_STOPPING" ]]; then + if ! [[ "$VIASH_PAR_EARLY_STOPPING" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--early_stopping' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_EARLY_STOPPING_PATIENCE" ]]; then + if ! [[ "$VIASH_PAR_EARLY_STOPPING_PATIENCE" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--early_stopping_patience' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + if [[ $VIASH_PAR_EARLY_STOPPING_PATIENCE -lt 1 ]]; then + ViashError '--early_stopping_patience' has be more than or equal to 1. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_EARLY_STOPPING_MIN_DELTA" ]]; then + if ! [[ "$VIASH_PAR_EARLY_STOPPING_MIN_DELTA" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--early_stopping_min_delta' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi + if command -v bc &> /dev/null; then + if ! [[ `echo $VIASH_PAR_EARLY_STOPPING_MIN_DELTA '>=' 0.0 | bc` -eq 1 ]]; then + ViashError '--early_stopping_min_delta' has be more than or equal to 0.0. Use "--help" to get more information on the parameters. + exit 1 + fi + elif command -v awk &> /dev/null; then + if ! [[ `awk -v n1=$VIASH_PAR_EARLY_STOPPING_MIN_DELTA -v n2=0.0 'BEGIN { print (n1 >= n2) ? "1" : "0" }'` -eq 1 ]]; then + ViashError '--early_stopping_min_delta' has be more than or equal to 0.0. Use "--help" to get more information on the parameters. + exit 1 + fi + else + ViashWarning '--early_stopping_min_delta' specifies a minimum value but the value was not verified as neither \'bc\' or \`awk\` are present on the system. + fi +fi +if [[ -n "$VIASH_PAR_MAX_EPOCHS" ]]; then + if ! [[ "$VIASH_PAR_MAX_EPOCHS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--max_epochs' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_REDUCE_LR_ON_PLATEAU" ]]; then + if ! [[ "$VIASH_PAR_REDUCE_LR_ON_PLATEAU" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--reduce_lr_on_plateau' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_LR_FACTOR" ]]; then + if ! [[ "$VIASH_PAR_LR_FACTOR" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--lr_factor' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi + if command -v bc &> /dev/null; then + if ! [[ `echo $VIASH_PAR_LR_FACTOR '>=' 0.0 | bc` -eq 1 ]]; then + ViashError '--lr_factor' has be more than or equal to 0.0. Use "--help" to get more information on the parameters. + exit 1 + fi + elif command -v awk &> /dev/null; then + if ! [[ `awk -v n1=$VIASH_PAR_LR_FACTOR -v n2=0.0 'BEGIN { print (n1 >= n2) ? "1" : "0" }'` -eq 1 ]]; then + ViashError '--lr_factor' has be more than or equal to 0.0. Use "--help" to get more information on the parameters. + exit 1 + fi + else + ViashWarning '--lr_factor' specifies a minimum value but the value was not verified as neither \'bc\' or \`awk\` are present on the system. + fi +fi +if [[ -n "$VIASH_PAR_LR_PATIENCE" ]]; then + if ! [[ "$VIASH_PAR_LR_PATIENCE" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--lr_patience' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi + if command -v bc &> /dev/null; then + if ! [[ `echo $VIASH_PAR_LR_PATIENCE '>=' 0.0 | bc` -eq 1 ]]; then + ViashError '--lr_patience' has be more than or equal to 0.0. Use "--help" to get more information on the parameters. + exit 1 + fi + elif command -v awk &> /dev/null; then + if ! [[ `awk -v n1=$VIASH_PAR_LR_PATIENCE -v n2=0.0 'BEGIN { print (n1 >= n2) ? "1" : "0" }'` -eq 1 ]]; then + ViashError '--lr_patience' has be more than or equal to 0.0. Use "--help" to get more information on the parameters. + exit 1 + fi + else + ViashWarning '--lr_patience' specifies a minimum value but the value was not verified as neither \'bc\' or \`awk\` are present on the system. + fi +fi +if [[ -n "$VIASH_PAR_N_OBS_MIN_COUNT" ]]; then + if ! [[ "$VIASH_PAR_N_OBS_MIN_COUNT" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--n_obs_min_count' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_N_VAR_MIN_COUNT" ]]; then + if ! [[ "$VIASH_PAR_N_VAR_MIN_COUNT" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--n_var_min_count' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +if [ ! -z "$VIASH_PAR_DISPERSION" ]; then + VIASH_PAR_DISPERSION_CHOICES=("gene:gene-batch:gene-label:gene-cell") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_DISPERSION_CHOICES[*]}:" =~ ":$VIASH_PAR_DISPERSION:" ]]; then + ViashError '--dispersion' specified value of \'$VIASH_PAR_DISPERSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +if [ ! -z "$VIASH_PAR_GENE_LIKELIHOOD" ]; then + VIASH_PAR_GENE_LIKELIHOOD_CHOICES=("nb:zinb:poisson") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_GENE_LIKELIHOOD_CHOICES[*]}:" =~ ":$VIASH_PAR_GENE_LIKELIHOOD:" ]]; then + ViashError '--gene_likelihood' specified value of \'$VIASH_PAR_GENE_LIKELIHOOD\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +if [ ! -z "$VIASH_PAR_USE_LAYER_NORMALIZATION" ]; then + VIASH_PAR_USE_LAYER_NORMALIZATION_CHOICES=("encoder:decoder:none:both") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_USE_LAYER_NORMALIZATION_CHOICES[*]}:" =~ ":$VIASH_PAR_USE_LAYER_NORMALIZATION:" ]]; then + ViashError '--use_layer_normalization' specified value of \'$VIASH_PAR_USE_LAYER_NORMALIZATION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +if [ ! -z "$VIASH_PAR_USE_BATCH_NORMALIZATION" ]; then + VIASH_PAR_USE_BATCH_NORMALIZATION_CHOICES=("encoder:decoder:none:both") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_USE_BATCH_NORMALIZATION_CHOICES[*]}:" =~ ":$VIASH_PAR_USE_BATCH_NORMALIZATION:" ]]; then + ViashError '--use_batch_normalization' specified value of \'$VIASH_PAR_USE_BATCH_NORMALIZATION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +if [ ! -z "$VIASH_PAR_EARLY_STOPPING_MONITOR" ]; then + VIASH_PAR_EARLY_STOPPING_MONITOR_CHOICES=("elbo_validation:reconstruction_loss_validation:kl_local_validation") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_EARLY_STOPPING_MONITOR_CHOICES[*]}:" =~ ":$VIASH_PAR_EARLY_STOPPING_MONITOR:" ]]; then + ViashError '--early_stopping_monitor' specified value of \'$VIASH_PAR_EARLY_STOPPING_MONITOR\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi +if [ ! -z "$VIASH_PAR_OUTPUT_MODEL" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_MODEL")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_MODEL")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_PAR_OUTPUT_MODEL" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT_MODEL")" ) + VIASH_PAR_OUTPUT_MODEL=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT_MODEL") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_MODEL" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/integrate_scvi:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/integrate_scvi:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/integrate_scvi:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-scvi-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +from scanpy._utils import check_nonnegative_integers +import mudata +import scvi + +### VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'input_layer': $( if [ ! -z ${VIASH_PAR_INPUT_LAYER+x} ]; then echo "r'${VIASH_PAR_INPUT_LAYER//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'obs_batch': $( if [ ! -z ${VIASH_PAR_OBS_BATCH+x} ]; then echo "r'${VIASH_PAR_OBS_BATCH//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'var_input': $( if [ ! -z ${VIASH_PAR_VAR_INPUT+x} ]; then echo "r'${VIASH_PAR_VAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'obs_labels': $( if [ ! -z ${VIASH_PAR_OBS_LABELS+x} ]; then echo "r'${VIASH_PAR_OBS_LABELS//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'obs_size_factor': $( if [ ! -z ${VIASH_PAR_OBS_SIZE_FACTOR+x} ]; then echo "r'${VIASH_PAR_OBS_SIZE_FACTOR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'obs_categorical_covariate': $( if [ ! -z ${VIASH_PAR_OBS_CATEGORICAL_COVARIATE+x} ]; then echo "r'${VIASH_PAR_OBS_CATEGORICAL_COVARIATE//\'/\'\"\'\"r\'}'.split(':')"; else echo None; fi ), + 'obs_continuous_covariate': $( if [ ! -z ${VIASH_PAR_OBS_CONTINUOUS_COVARIATE+x} ]; then echo "r'${VIASH_PAR_OBS_CONTINUOUS_COVARIATE//\'/\'\"\'\"r\'}'.split(':')"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_model': $( if [ ! -z ${VIASH_PAR_OUTPUT_MODEL+x} ]; then echo "r'${VIASH_PAR_OUTPUT_MODEL//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'obsm_output': $( if [ ! -z ${VIASH_PAR_OBSM_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'n_hidden_nodes': $( if [ ! -z ${VIASH_PAR_N_HIDDEN_NODES+x} ]; then echo "int(r'${VIASH_PAR_N_HIDDEN_NODES//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'n_dimensions_latent_space': $( if [ ! -z ${VIASH_PAR_N_DIMENSIONS_LATENT_SPACE+x} ]; then echo "int(r'${VIASH_PAR_N_DIMENSIONS_LATENT_SPACE//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'n_hidden_layers': $( if [ ! -z ${VIASH_PAR_N_HIDDEN_LAYERS+x} ]; then echo "int(r'${VIASH_PAR_N_HIDDEN_LAYERS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'dropout_rate': $( if [ ! -z ${VIASH_PAR_DROPOUT_RATE+x} ]; then echo "float(r'${VIASH_PAR_DROPOUT_RATE//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'dispersion': $( if [ ! -z ${VIASH_PAR_DISPERSION+x} ]; then echo "r'${VIASH_PAR_DISPERSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'gene_likelihood': $( if [ ! -z ${VIASH_PAR_GENE_LIKELIHOOD+x} ]; then echo "r'${VIASH_PAR_GENE_LIKELIHOOD//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'use_layer_normalization': $( if [ ! -z ${VIASH_PAR_USE_LAYER_NORMALIZATION+x} ]; then echo "r'${VIASH_PAR_USE_LAYER_NORMALIZATION//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'use_batch_normalization': $( if [ ! -z ${VIASH_PAR_USE_BATCH_NORMALIZATION+x} ]; then echo "r'${VIASH_PAR_USE_BATCH_NORMALIZATION//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'encode_covariates': $( if [ ! -z ${VIASH_PAR_ENCODE_COVARIATES+x} ]; then echo "r'${VIASH_PAR_ENCODE_COVARIATES//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), + 'deeply_inject_covariates': $( if [ ! -z ${VIASH_PAR_DEEPLY_INJECT_COVARIATES+x} ]; then echo "r'${VIASH_PAR_DEEPLY_INJECT_COVARIATES//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), + 'use_observed_lib_size': $( if [ ! -z ${VIASH_PAR_USE_OBSERVED_LIB_SIZE+x} ]; then echo "r'${VIASH_PAR_USE_OBSERVED_LIB_SIZE//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), + 'early_stopping': $( if [ ! -z ${VIASH_PAR_EARLY_STOPPING+x} ]; then echo "r'${VIASH_PAR_EARLY_STOPPING//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), + 'early_stopping_monitor': $( if [ ! -z ${VIASH_PAR_EARLY_STOPPING_MONITOR+x} ]; then echo "r'${VIASH_PAR_EARLY_STOPPING_MONITOR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'early_stopping_patience': $( if [ ! -z ${VIASH_PAR_EARLY_STOPPING_PATIENCE+x} ]; then echo "int(r'${VIASH_PAR_EARLY_STOPPING_PATIENCE//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'early_stopping_min_delta': $( if [ ! -z ${VIASH_PAR_EARLY_STOPPING_MIN_DELTA+x} ]; then echo "float(r'${VIASH_PAR_EARLY_STOPPING_MIN_DELTA//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'max_epochs': $( if [ ! -z ${VIASH_PAR_MAX_EPOCHS+x} ]; then echo "int(r'${VIASH_PAR_MAX_EPOCHS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'reduce_lr_on_plateau': $( if [ ! -z ${VIASH_PAR_REDUCE_LR_ON_PLATEAU+x} ]; then echo "r'${VIASH_PAR_REDUCE_LR_ON_PLATEAU//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), + 'lr_factor': $( if [ ! -z ${VIASH_PAR_LR_FACTOR+x} ]; then echo "float(r'${VIASH_PAR_LR_FACTOR//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'lr_patience': $( if [ ! -z ${VIASH_PAR_LR_PATIENCE+x} ]; then echo "float(r'${VIASH_PAR_LR_PATIENCE//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'n_obs_min_count': $( if [ ! -z ${VIASH_PAR_N_OBS_MIN_COUNT+x} ]; then echo "int(r'${VIASH_PAR_N_OBS_MIN_COUNT//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'n_var_min_count': $( if [ ! -z ${VIASH_PAR_N_VAR_MIN_COUNT+x} ]; then echo "int(r'${VIASH_PAR_N_VAR_MIN_COUNT//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +### VIASH END + +import sys +sys.path.append(meta['resources_dir']) + +# START TEMPORARY WORKAROUND subset_vars +# reason: resources aren't available when using Nextflow fusion +# from subset_vars import subset_vars +def subset_vars(adata, subset_col): + return adata[:, adata.var[subset_col]].copy() + +# END TEMPORARY WORKAROUND subset_vars + +#TODO: optionally, move to qa +# https://github.com/openpipelines-bio/openpipeline/issues/435 +def check_validity_anndata(adata, layer, obs_batch, + n_obs_min_count, n_var_min_count): + assert check_nonnegative_integers( + adata.layers[layer] if layer else adata.X + ), f"Make sure input adata contains raw_counts" + + assert len(set(adata.var_names)) == len( + adata.var_names + ), f"Dataset contains multiple genes with same gene name." + + # Ensure every obs_batch category has sufficient observations + assert min(adata.obs[[obs_batch]].value_counts()) > n_obs_min_count, \\ + f"Anndata has fewer than {n_obs_min_count} cells." + + assert adata.n_vars > n_var_min_count, \\ + f"Anndata has fewer than {n_var_min_count} genes." + + + +def main(): + mdata = mudata.read(par["input"].strip()) + adata = mdata.mod[par['modality']] + + if par['var_input']: + # Subset to HVG + adata_subset = subset_vars(adata, subset_col=par["var_input"]).copy() + else: + adata_subset = adata.copy() + + check_validity_anndata( + adata_subset, par['input_layer'], par['obs_batch'], + par["n_obs_min_count"], par["n_var_min_count"] + ) + # Set up the data + scvi.model.SCVI.setup_anndata( + adata_subset, + batch_key=par['obs_batch'], + layer=par['input_layer'], + labels_key=par['obs_labels'], + size_factor_key=par['obs_size_factor'], + categorical_covariate_keys=par['obs_categorical_covariate'], + continuous_covariate_keys=par['obs_continuous_covariate'], + ) + + # Set up the model + vae_uns = scvi.model.SCVI( + adata_subset, + n_hidden=par["n_hidden_nodes"], + n_latent=par["n_dimensions_latent_space"], + n_layers=par["n_hidden_layers"], + dropout_rate=par["dropout_rate"], + dispersion=par["dispersion"], + gene_likelihood=par["gene_likelihood"], + use_layer_norm=par["use_layer_normalization"], + use_batch_norm=par["use_batch_normalization"], + encode_covariates=par["encode_covariates"], # Default (True) is for better scArches performance -> maybe don't use this always? + deeply_inject_covariates=par["deeply_inject_covariates"], # Default (False) for better scArches performance -> maybe don't use this always? + use_observed_lib_size=par["use_observed_lib_size"], # When size_factors are not passed + ) + + plan_kwargs = { + "reduce_lr_on_plateau": par['reduce_lr_on_plateau'], + "lr_patience": par['lr_patience'], + "lr_factor": par['lr_factor'], + } + + + # Train the model + vae_uns.train( + max_epochs=par['max_epochs'], + early_stopping=par['early_stopping'], + early_stopping_monitor=par['early_stopping_monitor'], + early_stopping_patience=par['early_stopping_patience'], + early_stopping_min_delta=par['early_stopping_min_delta'], + plan_kwargs=plan_kwargs, + check_val_every_n_epoch=1, + accelerator="auto", + ) + # Note: train_size=1.0 should give better results, but then can't do early_stopping on validation set + + # Get the latent output + adata.obsm[par['obsm_output']] = vae_uns.get_latent_representation() + + mdata.mod[par['modality']] = adata + mdata.write_h5mu(par['output'].strip(), compression=par["output_compression"]) + if par["output_model"]: + vae_uns.save(par["output_model"], overwrite=True) + +if __name__ == "__main__": + main() +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT_MODEL" ]; then + VIASH_PAR_OUTPUT_MODEL=$(ViashStripAutomount "$VIASH_PAR_OUTPUT_MODEL") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_OUTPUT_MODEL" ] && [ ! -e "$VIASH_PAR_OUTPUT_MODEL" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT_MODEL' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/integrate/scvi/subset_vars.py b/target/docker/integrate/scvi/subset_vars.py new file mode 100644 index 00000000000..10011c8fcca --- /dev/null +++ b/target/docker/integrate/scvi/subset_vars.py @@ -0,0 +1,16 @@ +def subset_vars(adata, subset_col): + """Subset highly variable genes from AnnData object + + Parameters + ---------- + adata : AnnData + Annotated data object + subset_col : str + Name of the boolean column in `adata.var` that contains the information if features should be used or not + + Returns + ------- + AnnData + Copy of `adata` with subsetted features + """ + return adata[:, adata.var[subset_col]].copy() diff --git a/target/docker/integrate/totalvi/.config.vsh.yaml b/target/docker/integrate/totalvi/.config.vsh.yaml new file mode 100644 index 00000000000..40477f1eb6f --- /dev/null +++ b/target/docker/integrate/totalvi/.config.vsh.yaml @@ -0,0 +1,348 @@ +functionality: + name: "totalvi" + namespace: "integrate" + version: "0.12.4" + authors: + - name: "Vladimir Shitov" + info: + role: "Contributor" + links: + email: "vladimir.shitov@helmholtz-muenchen.de" + github: "vladimirshitov" + orcid: "0000-0002-1960-8812" + linkedin: "vladimir-shitov-9a659513b" + organizations: + - name: "Helmholtz Munich" + href: "https://www.helmholtz-munich.de" + role: "PhD Candidate" + argument_groups: + - name: "Inputs" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input h5mu file with query data to integrate with reference." + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--reference" + alternatives: + - "-r" + description: "Input h5mu file with reference data to train the TOTALVI model." + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--force_retrain" + alternatives: + - "-f" + description: "If true, retrain the model and save it to reference_model_path" + info: null + direction: "input" + dest: "par" + - type: "string" + name: "--query_modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--query_proteins_modality" + description: "Name of the modality in the input (query) h5mu file containing\ + \ protein data" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--reference_modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--reference_proteins_modality" + description: "Name of the modality containing proteins in the reference" + info: null + default: + - "prot" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--input_layer" + description: "Input layer to use. If None, X is used" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_batch" + description: "Column name discriminating between your batches." + info: null + default: + - "sample_id" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--var_input" + description: ".var column containing highly variable genes. By default, do not\ + \ subset genes." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Outputs" + arguments: + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output h5mu file." + info: null + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obsm_output" + description: "In which .obsm slot to store the resulting integrated embedding." + info: null + default: + - "X_integrated_totalvi" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obsm_normalized_rna_output" + description: "In which .obsm slot to store the normalized RNA from TOTALVI." + info: null + default: + - "X_totalvi_normalized_rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obsm_normalized_protein_output" + description: "In which .obsm slot to store the normalized protein data from\ + \ TOTALVI." + info: null + default: + - "X_totalvi_normalized_protein" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--reference_model_path" + description: "Directory with the reference model. If not exists, trained model\ + \ will be saved there" + info: null + default: + - "totalvi_model_reference" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--query_model_path" + description: "Directory, where the query model will be saved" + info: null + default: + - "totalvi_model_query" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Learning parameters" + arguments: + - type: "integer" + name: "--max_epochs" + description: "Number of passes through the dataset" + info: null + default: + - 400 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--max_query_epochs" + description: "Number of passes through the dataset, when fine-tuning model for\ + \ query" + info: null + default: + - 200 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--weight_decay" + description: "Weight decay, when fine-tuning model for query" + info: null + default: + - 0.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Performs mapping to the reference by totalvi model: https://docs.scvi-tools.org/en/stable/tutorials/notebooks/scarches_scvi_tools.html#Reference-mapping-with-TOTALVI" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.9" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "libopenblas-dev" + - "liblapack-dev" + - "gfortran" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "torchmetrics~=0.11.0" + - "scvi-tools~=1.0.3" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highmem" + - "highcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +- type: "native" + id: "native" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/integrate/totalvi/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/integrate/totalvi" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/integrate/totalvi/totalvi" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/integrate/totalvi/setup_logger.py b/target/docker/integrate/totalvi/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/docker/integrate/totalvi/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/docker/integrate/totalvi/totalvi b/target/docker/integrate/totalvi/totalvi new file mode 100755 index 00000000000..09760596b9a --- /dev/null +++ b/target/docker/integrate/totalvi/totalvi @@ -0,0 +1,1479 @@ +#!/usr/bin/env bash + +# totalvi 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Vladimir Shitov + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="totalvi" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "totalvi 0.12.4" + echo "" + echo "Performs mapping to the reference by totalvi model:" + echo "https://docs.scvi-tools.org/en/stable/tutorials/notebooks/scarches_scvi_tools.html#Reference-mapping-with-TOTALVI" + echo "" + echo "Inputs:" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " Input h5mu file with query data to integrate with reference." + echo "" + echo " -r, --reference" + echo " type: file, required parameter, file must exist" + echo " Input h5mu file with reference data to train the TOTALVI model." + echo "" + echo " -f, --force_retrain" + echo " type: boolean_true" + echo " If true, retrain the model and save it to reference_model_path" + echo "" + echo " --query_modality" + echo " type: string" + echo " default: rna" + echo "" + echo " --query_proteins_modality" + echo " type: string" + echo " Name of the modality in the input (query) h5mu file containing protein" + echo " data" + echo "" + echo " --reference_modality" + echo " type: string" + echo " default: rna" + echo "" + echo " --reference_proteins_modality" + echo " type: string" + echo " default: prot" + echo " Name of the modality containing proteins in the reference" + echo "" + echo " --input_layer" + echo " type: string" + echo " Input layer to use. If None, X is used" + echo "" + echo " --obs_batch" + echo " type: string" + echo " default: sample_id" + echo " Column name discriminating between your batches." + echo "" + echo " --var_input" + echo " type: string" + echo " .var column containing highly variable genes. By default, do not subset" + echo " genes." + echo "" + echo "Outputs:" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " Output h5mu file." + echo "" + echo " --obsm_output" + echo " type: string" + echo " default: X_integrated_totalvi" + echo " In which .obsm slot to store the resulting integrated embedding." + echo "" + echo " --obsm_normalized_rna_output" + echo " type: string" + echo " default: X_totalvi_normalized_rna" + echo " In which .obsm slot to store the normalized RNA from TOTALVI." + echo "" + echo " --obsm_normalized_protein_output" + echo " type: string" + echo " default: X_totalvi_normalized_protein" + echo " In which .obsm slot to store the normalized protein data from TOTALVI." + echo "" + echo " --reference_model_path" + echo " type: file, output, file must exist" + echo " default: totalvi_model_reference" + echo " Directory with the reference model. If not exists, trained model will be" + echo " saved there" + echo "" + echo " --query_model_path" + echo " type: file, output, file must exist" + echo " default: totalvi_model_query" + echo " Directory, where the query model will be saved" + echo "" + echo "Learning parameters:" + echo " --max_epochs" + echo " type: integer" + echo " default: 400" + echo " Number of passes through the dataset" + echo "" + echo " --max_query_epochs" + echo " type: integer" + echo " default: 200" + echo " Number of passes through the dataset, when fine-tuning model for query" + echo "" + echo " --weight_decay" + echo " type: double" + echo " default: 0.0" + echo " Weight decay, when fine-tuning model for query" +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM python:3.9 + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y libopenblas-dev liblapack-dev gfortran && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "torchmetrics~=0.11.0" "scvi-tools~=1.0.3" + +LABEL org.opencontainers.image.authors="Vladimir Shitov" +LABEL org.opencontainers.image.description="Companion container for running component integrate totalvi" +LABEL org.opencontainers.image.created="2024-01-31T09:08:34Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-totalvi-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "totalvi 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reference) + [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reference=*) + [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference=*\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE=$(ViashRemoveFlags "$1") + shift 1 + ;; + -r) + [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'-r\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -r. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --force_retrain) + [ -n "$VIASH_PAR_FORCE_RETRAIN" ] && ViashError Bad arguments for option \'--force_retrain\': \'$VIASH_PAR_FORCE_RETRAIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FORCE_RETRAIN=true + shift 1 + ;; + -f) + [ -n "$VIASH_PAR_FORCE_RETRAIN" ] && ViashError Bad arguments for option \'-f\': \'$VIASH_PAR_FORCE_RETRAIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FORCE_RETRAIN=true + shift 1 + ;; + --query_modality) + [ -n "$VIASH_PAR_QUERY_MODALITY" ] && ViashError Bad arguments for option \'--query_modality\': \'$VIASH_PAR_QUERY_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUERY_MODALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --query_modality. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --query_modality=*) + [ -n "$VIASH_PAR_QUERY_MODALITY" ] && ViashError Bad arguments for option \'--query_modality=*\': \'$VIASH_PAR_QUERY_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUERY_MODALITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --query_proteins_modality) + [ -n "$VIASH_PAR_QUERY_PROTEINS_MODALITY" ] && ViashError Bad arguments for option \'--query_proteins_modality\': \'$VIASH_PAR_QUERY_PROTEINS_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUERY_PROTEINS_MODALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --query_proteins_modality. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --query_proteins_modality=*) + [ -n "$VIASH_PAR_QUERY_PROTEINS_MODALITY" ] && ViashError Bad arguments for option \'--query_proteins_modality=*\': \'$VIASH_PAR_QUERY_PROTEINS_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUERY_PROTEINS_MODALITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --reference_modality) + [ -n "$VIASH_PAR_REFERENCE_MODALITY" ] && ViashError Bad arguments for option \'--reference_modality\': \'$VIASH_PAR_REFERENCE_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE_MODALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference_modality. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reference_modality=*) + [ -n "$VIASH_PAR_REFERENCE_MODALITY" ] && ViashError Bad arguments for option \'--reference_modality=*\': \'$VIASH_PAR_REFERENCE_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE_MODALITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --reference_proteins_modality) + [ -n "$VIASH_PAR_REFERENCE_PROTEINS_MODALITY" ] && ViashError Bad arguments for option \'--reference_proteins_modality\': \'$VIASH_PAR_REFERENCE_PROTEINS_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE_PROTEINS_MODALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference_proteins_modality. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reference_proteins_modality=*) + [ -n "$VIASH_PAR_REFERENCE_PROTEINS_MODALITY" ] && ViashError Bad arguments for option \'--reference_proteins_modality=*\': \'$VIASH_PAR_REFERENCE_PROTEINS_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE_PROTEINS_MODALITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --input_layer) + [ -n "$VIASH_PAR_INPUT_LAYER" ] && ViashError Bad arguments for option \'--input_layer\': \'$VIASH_PAR_INPUT_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_LAYER="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_layer. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input_layer=*) + [ -n "$VIASH_PAR_INPUT_LAYER" ] && ViashError Bad arguments for option \'--input_layer=*\': \'$VIASH_PAR_INPUT_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_LAYER=$(ViashRemoveFlags "$1") + shift 1 + ;; + --obs_batch) + [ -n "$VIASH_PAR_OBS_BATCH" ] && ViashError Bad arguments for option \'--obs_batch\': \'$VIASH_PAR_OBS_BATCH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBS_BATCH="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_batch. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obs_batch=*) + [ -n "$VIASH_PAR_OBS_BATCH" ] && ViashError Bad arguments for option \'--obs_batch=*\': \'$VIASH_PAR_OBS_BATCH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBS_BATCH=$(ViashRemoveFlags "$1") + shift 1 + ;; + --var_input) + [ -n "$VIASH_PAR_VAR_INPUT" ] && ViashError Bad arguments for option \'--var_input\': \'$VIASH_PAR_VAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_VAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --var_input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --var_input=*) + [ -n "$VIASH_PAR_VAR_INPUT" ] && ViashError Bad arguments for option \'--var_input=*\': \'$VIASH_PAR_VAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_VAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obsm_output) + [ -n "$VIASH_PAR_OBSM_OUTPUT" ] && ViashError Bad arguments for option \'--obsm_output\': \'$VIASH_PAR_OBSM_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBSM_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obsm_output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obsm_output=*) + [ -n "$VIASH_PAR_OBSM_OUTPUT" ] && ViashError Bad arguments for option \'--obsm_output=*\': \'$VIASH_PAR_OBSM_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBSM_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --obsm_normalized_rna_output) + [ -n "$VIASH_PAR_OBSM_NORMALIZED_RNA_OUTPUT" ] && ViashError Bad arguments for option \'--obsm_normalized_rna_output\': \'$VIASH_PAR_OBSM_NORMALIZED_RNA_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBSM_NORMALIZED_RNA_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obsm_normalized_rna_output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obsm_normalized_rna_output=*) + [ -n "$VIASH_PAR_OBSM_NORMALIZED_RNA_OUTPUT" ] && ViashError Bad arguments for option \'--obsm_normalized_rna_output=*\': \'$VIASH_PAR_OBSM_NORMALIZED_RNA_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBSM_NORMALIZED_RNA_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --obsm_normalized_protein_output) + [ -n "$VIASH_PAR_OBSM_NORMALIZED_PROTEIN_OUTPUT" ] && ViashError Bad arguments for option \'--obsm_normalized_protein_output\': \'$VIASH_PAR_OBSM_NORMALIZED_PROTEIN_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBSM_NORMALIZED_PROTEIN_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obsm_normalized_protein_output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obsm_normalized_protein_output=*) + [ -n "$VIASH_PAR_OBSM_NORMALIZED_PROTEIN_OUTPUT" ] && ViashError Bad arguments for option \'--obsm_normalized_protein_output=*\': \'$VIASH_PAR_OBSM_NORMALIZED_PROTEIN_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBSM_NORMALIZED_PROTEIN_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --reference_model_path) + [ -n "$VIASH_PAR_REFERENCE_MODEL_PATH" ] && ViashError Bad arguments for option \'--reference_model_path\': \'$VIASH_PAR_REFERENCE_MODEL_PATH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE_MODEL_PATH="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference_model_path. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reference_model_path=*) + [ -n "$VIASH_PAR_REFERENCE_MODEL_PATH" ] && ViashError Bad arguments for option \'--reference_model_path=*\': \'$VIASH_PAR_REFERENCE_MODEL_PATH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE_MODEL_PATH=$(ViashRemoveFlags "$1") + shift 1 + ;; + --query_model_path) + [ -n "$VIASH_PAR_QUERY_MODEL_PATH" ] && ViashError Bad arguments for option \'--query_model_path\': \'$VIASH_PAR_QUERY_MODEL_PATH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUERY_MODEL_PATH="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --query_model_path. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --query_model_path=*) + [ -n "$VIASH_PAR_QUERY_MODEL_PATH" ] && ViashError Bad arguments for option \'--query_model_path=*\': \'$VIASH_PAR_QUERY_MODEL_PATH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUERY_MODEL_PATH=$(ViashRemoveFlags "$1") + shift 1 + ;; + --max_epochs) + [ -n "$VIASH_PAR_MAX_EPOCHS" ] && ViashError Bad arguments for option \'--max_epochs\': \'$VIASH_PAR_MAX_EPOCHS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MAX_EPOCHS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --max_epochs. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --max_epochs=*) + [ -n "$VIASH_PAR_MAX_EPOCHS" ] && ViashError Bad arguments for option \'--max_epochs=*\': \'$VIASH_PAR_MAX_EPOCHS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MAX_EPOCHS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --max_query_epochs) + [ -n "$VIASH_PAR_MAX_QUERY_EPOCHS" ] && ViashError Bad arguments for option \'--max_query_epochs\': \'$VIASH_PAR_MAX_QUERY_EPOCHS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MAX_QUERY_EPOCHS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --max_query_epochs. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --max_query_epochs=*) + [ -n "$VIASH_PAR_MAX_QUERY_EPOCHS" ] && ViashError Bad arguments for option \'--max_query_epochs=*\': \'$VIASH_PAR_MAX_QUERY_EPOCHS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MAX_QUERY_EPOCHS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --weight_decay) + [ -n "$VIASH_PAR_WEIGHT_DECAY" ] && ViashError Bad arguments for option \'--weight_decay\': \'$VIASH_PAR_WEIGHT_DECAY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WEIGHT_DECAY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --weight_decay. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --weight_decay=*) + [ -n "$VIASH_PAR_WEIGHT_DECAY" ] && ViashError Bad arguments for option \'--weight_decay=*\': \'$VIASH_PAR_WEIGHT_DECAY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WEIGHT_DECAY=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/integrate_totalvi:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/integrate_totalvi:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/integrate_totalvi:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/integrate_totalvi:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_REFERENCE+x} ]; then + ViashError '--reference' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_FORCE_RETRAIN+x} ]; then + VIASH_PAR_FORCE_RETRAIN="false" +fi +if [ -z ${VIASH_PAR_QUERY_MODALITY+x} ]; then + VIASH_PAR_QUERY_MODALITY="rna" +fi +if [ -z ${VIASH_PAR_REFERENCE_MODALITY+x} ]; then + VIASH_PAR_REFERENCE_MODALITY="rna" +fi +if [ -z ${VIASH_PAR_REFERENCE_PROTEINS_MODALITY+x} ]; then + VIASH_PAR_REFERENCE_PROTEINS_MODALITY="prot" +fi +if [ -z ${VIASH_PAR_OBS_BATCH+x} ]; then + VIASH_PAR_OBS_BATCH="sample_id" +fi +if [ -z ${VIASH_PAR_OBSM_OUTPUT+x} ]; then + VIASH_PAR_OBSM_OUTPUT="X_integrated_totalvi" +fi +if [ -z ${VIASH_PAR_OBSM_NORMALIZED_RNA_OUTPUT+x} ]; then + VIASH_PAR_OBSM_NORMALIZED_RNA_OUTPUT="X_totalvi_normalized_rna" +fi +if [ -z ${VIASH_PAR_OBSM_NORMALIZED_PROTEIN_OUTPUT+x} ]; then + VIASH_PAR_OBSM_NORMALIZED_PROTEIN_OUTPUT="X_totalvi_normalized_protein" +fi +if [ -z ${VIASH_PAR_REFERENCE_MODEL_PATH+x} ]; then + VIASH_PAR_REFERENCE_MODEL_PATH="totalvi_model_reference" +fi +if [ -z ${VIASH_PAR_QUERY_MODEL_PATH+x} ]; then + VIASH_PAR_QUERY_MODEL_PATH="totalvi_model_query" +fi +if [ -z ${VIASH_PAR_MAX_EPOCHS+x} ]; then + VIASH_PAR_MAX_EPOCHS="400" +fi +if [ -z ${VIASH_PAR_MAX_QUERY_EPOCHS+x} ]; then + VIASH_PAR_MAX_QUERY_EPOCHS="200" +fi +if [ -z ${VIASH_PAR_WEIGHT_DECAY+x} ]; then + VIASH_PAR_WEIGHT_DECAY="0.0" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_REFERENCE" ] && [ ! -e "$VIASH_PAR_REFERENCE" ]; then + ViashError "Input file '$VIASH_PAR_REFERENCE' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_FORCE_RETRAIN" ]]; then + if ! [[ "$VIASH_PAR_FORCE_RETRAIN" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--force_retrain' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MAX_EPOCHS" ]]; then + if ! [[ "$VIASH_PAR_MAX_EPOCHS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--max_epochs' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MAX_QUERY_EPOCHS" ]]; then + if ! [[ "$VIASH_PAR_MAX_QUERY_EPOCHS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--max_query_epochs' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_WEIGHT_DECAY" ]]; then + if ! [[ "$VIASH_PAR_WEIGHT_DECAY" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--weight_decay' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi +if [ ! -z "$VIASH_PAR_REFERENCE_MODEL_PATH" ] && [ ! -d "$(dirname "$VIASH_PAR_REFERENCE_MODEL_PATH")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_REFERENCE_MODEL_PATH")" +fi +if [ ! -z "$VIASH_PAR_QUERY_MODEL_PATH" ] && [ ! -d "$(dirname "$VIASH_PAR_QUERY_MODEL_PATH")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_QUERY_MODEL_PATH")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_REFERENCE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_REFERENCE")" ) + VIASH_PAR_REFERENCE=$(ViashAutodetectMount "$VIASH_PAR_REFERENCE") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_PAR_REFERENCE_MODEL_PATH" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_REFERENCE_MODEL_PATH")" ) + VIASH_PAR_REFERENCE_MODEL_PATH=$(ViashAutodetectMount "$VIASH_PAR_REFERENCE_MODEL_PATH") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_REFERENCE_MODEL_PATH" ) +fi +if [ ! -z "$VIASH_PAR_QUERY_MODEL_PATH" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_QUERY_MODEL_PATH")" ) + VIASH_PAR_QUERY_MODEL_PATH=$(ViashAutodetectMount "$VIASH_PAR_QUERY_MODEL_PATH") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_QUERY_MODEL_PATH" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/integrate_totalvi:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/integrate_totalvi:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/integrate_totalvi:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-totalvi-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +from typing import Tuple + +import os +import sys +import mudata +from anndata import AnnData # For type hints +from mudata import MuData # For type hints +import numpy as np +import scvi +from scipy.sparse import issparse + + +### VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'reference': $( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "r'${VIASH_PAR_REFERENCE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'force_retrain': $( if [ ! -z ${VIASH_PAR_FORCE_RETRAIN+x} ]; then echo "r'${VIASH_PAR_FORCE_RETRAIN//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), + 'query_modality': $( if [ ! -z ${VIASH_PAR_QUERY_MODALITY+x} ]; then echo "r'${VIASH_PAR_QUERY_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'query_proteins_modality': $( if [ ! -z ${VIASH_PAR_QUERY_PROTEINS_MODALITY+x} ]; then echo "r'${VIASH_PAR_QUERY_PROTEINS_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'reference_modality': $( if [ ! -z ${VIASH_PAR_REFERENCE_MODALITY+x} ]; then echo "r'${VIASH_PAR_REFERENCE_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'reference_proteins_modality': $( if [ ! -z ${VIASH_PAR_REFERENCE_PROTEINS_MODALITY+x} ]; then echo "r'${VIASH_PAR_REFERENCE_PROTEINS_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'input_layer': $( if [ ! -z ${VIASH_PAR_INPUT_LAYER+x} ]; then echo "r'${VIASH_PAR_INPUT_LAYER//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'obs_batch': $( if [ ! -z ${VIASH_PAR_OBS_BATCH+x} ]; then echo "r'${VIASH_PAR_OBS_BATCH//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'var_input': $( if [ ! -z ${VIASH_PAR_VAR_INPUT+x} ]; then echo "r'${VIASH_PAR_VAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'obsm_output': $( if [ ! -z ${VIASH_PAR_OBSM_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'obsm_normalized_rna_output': $( if [ ! -z ${VIASH_PAR_OBSM_NORMALIZED_RNA_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_NORMALIZED_RNA_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'obsm_normalized_protein_output': $( if [ ! -z ${VIASH_PAR_OBSM_NORMALIZED_PROTEIN_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_NORMALIZED_PROTEIN_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'reference_model_path': $( if [ ! -z ${VIASH_PAR_REFERENCE_MODEL_PATH+x} ]; then echo "r'${VIASH_PAR_REFERENCE_MODEL_PATH//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'query_model_path': $( if [ ! -z ${VIASH_PAR_QUERY_MODEL_PATH+x} ]; then echo "r'${VIASH_PAR_QUERY_MODEL_PATH//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'max_epochs': $( if [ ! -z ${VIASH_PAR_MAX_EPOCHS+x} ]; then echo "int(r'${VIASH_PAR_MAX_EPOCHS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'max_query_epochs': $( if [ ! -z ${VIASH_PAR_MAX_QUERY_EPOCHS+x} ]; then echo "int(r'${VIASH_PAR_MAX_QUERY_EPOCHS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'weight_decay': $( if [ ! -z ${VIASH_PAR_WEIGHT_DECAY+x} ]; then echo "float(r'${VIASH_PAR_WEIGHT_DECAY//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +### VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +def align_proteins_names(adata_reference: AnnData, mdata_query: MuData, adata_query: AnnData, reference_proteins_key: str, query_proteins_key: str) -> AnnData: + """Make sure that proteins are located in the same .obsm slot in reference and query. Pad query proteins with zeros if they are absent""" + proteins_reference = adata_reference.obsm[reference_proteins_key] + + # If query has no protein data, put matrix of zeros + if not query_proteins_key or query_proteins_key not in mdata_query.mod: + adata_query.obsm[reference_proteins_key] = np.zeros((adata_query.n_obs, proteins_reference.shape[1])) + else: + # Make sure that proteins expression has the same key in query and reference + adata_query.obsm[reference_proteins_key] = adata_query.obsm[query_proteins_key] + + return adata_query + + +def extract_proteins_to_anndata(mdata: MuData, rna_modality_key, protein_modality_key, input_layer, hvg_var_key=None) -> AnnData: + """TOTALVI requires data to be stored in AnnData format with protein counts in .obsm slot. This function performs the conversion""" + adata: AnnData = mdata.mod[rna_modality_key].copy() + + if hvg_var_key: + selected_genes = adata.var_names[adata.var[hvg_var_key]] + adata = adata[:, selected_genes].copy() + + if protein_modality_key in mdata.mod: + # Put the proteins modality into .obsm slot + proteins_reference_adata = mdata.mod[protein_modality_key].copy() + + if input_layer is None: + proteins = proteins_reference_adata.X + else: + proteins = proteins_reference_adata.obsm[input_layer] + + if issparse(proteins): + proteins = proteins.toarray() + + adata.obsm[protein_modality_key] = proteins + + return adata + + +def build_reference_model(adata_reference: AnnData, max_train_epochs: int = 400) -> scvi.model.TOTALVI: + + vae_reference = scvi.model.TOTALVI(adata_reference, use_layer_norm="both", use_batch_norm="none") + vae_reference.train(max_train_epochs) + + vae_reference.save(par["reference_model_path"]) + + return vae_reference + +def is_retraining_model() -> bool: + """Decide, whether reference model should be trained. It happens when no model exists or force_retrain flag is on""" + + trained_model_exists = os.path.isdir(par["reference_model_path"]) and ("model.pt" in os.listdir(par["reference_model_path"])) + return not trained_model_exists or par["force_retrain"] + + +def map_query_to_reference(mdata_reference: MuData, mdata_query: MuData, adata_query: AnnData) -> Tuple[scvi.model.TOTALVI, AnnData]: + """Build model on the provided reference if necessary, and map query to the reference""" + + adata_reference: AnnData = extract_proteins_to_anndata(mdata_reference, rna_modality_key=par["reference_modality"], protein_modality_key=par["reference_proteins_modality"], + input_layer=par["input_layer"], hvg_var_key=par["var_input"]) + + scvi.model.TOTALVI.setup_anndata( + adata_reference, + batch_key=par["obs_batch"], + protein_expression_obsm_key=par["reference_proteins_modality"] + ) + + if is_retraining_model(): + vae_reference = build_reference_model(adata_reference, max_train_epochs=par["max_epochs"]) + else: + vae_reference = scvi.model.TOTALVI.load(dir_path=par["reference_model_path"], adata=adata_reference) + + adata_query: AnnData = align_proteins_names(adata_reference, mdata_query, adata_query, reference_proteins_key=par["reference_proteins_modality"], + query_proteins_key=par["query_proteins_modality"]) + + # Reorder genes and pad missing genes with 0s + scvi.model.TOTALVI.prepare_query_anndata(adata_query, vae_reference) + + # Train the model for query + vae_query = scvi.model.TOTALVI.load_query_data( + adata_query, + vae_reference + ) + vae_query.train(par["max_query_epochs"], plan_kwargs=dict(weight_decay=par["weight_decay"])) + + return vae_query, adata_query + +def main(): + mdata_query = mudata.read(par["input"].strip()) + adata_query = extract_proteins_to_anndata(mdata_query, + rna_modality_key=par["query_modality"], + protein_modality_key=par["query_proteins_modality"], + input_layer=par["input_layer"], + hvg_var_key=par["var_input"]) + + if par["reference"].endswith(".h5mu"): + logger.info("Reading reference") + mdata_reference = mudata.read(par["reference"].strip()) + + logger.info("Mapping query to the reference") + vae_query, adata_query = map_query_to_reference(mdata_reference, mdata_query, adata_query) + else: + raise ValueError("Incorrect format of reference, please provide a .h5mu file") + + adata_query.uns["integration_method"] = "totalvi" + + logger.info("Getting the latent representation of query") + mdata_query.mod[par["query_modality"]].obsm[par["obsm_output"]] = vae_query.get_latent_representation() + + norm_rna, norm_protein = vae_query.get_normalized_expression() + mdata_query.mod[par["query_modality"]].obsm[par["obsm_normalized_rna_output"]] = norm_rna.to_numpy() + + if par["query_proteins_modality"] in mdata_query.mod: + mdata_query.mod[par["query_proteins_modality"]].obsm[par["obsm_normalized_protein_output"]] = norm_protein.to_numpy() + + logger.info("Updating mdata") + mdata_query.update() + + logger.info("Saving updated query data") + mdata_query.write_h5mu(par["output"].strip()) + + logger.info("Saving query model") + vae_query.save(par["query_model_path"], overwrite=True) + +if __name__ == "__main__": + main() +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_REFERENCE" ]; then + VIASH_PAR_REFERENCE=$(ViashStripAutomount "$VIASH_PAR_REFERENCE") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_PAR_REFERENCE_MODEL_PATH" ]; then + VIASH_PAR_REFERENCE_MODEL_PATH=$(ViashStripAutomount "$VIASH_PAR_REFERENCE_MODEL_PATH") +fi +if [ ! -z "$VIASH_PAR_QUERY_MODEL_PATH" ]; then + VIASH_PAR_QUERY_MODEL_PATH=$(ViashStripAutomount "$VIASH_PAR_QUERY_MODEL_PATH") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_REFERENCE_MODEL_PATH" ] && [ ! -e "$VIASH_PAR_REFERENCE_MODEL_PATH" ]; then + ViashError "Output file '$VIASH_PAR_REFERENCE_MODEL_PATH' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_QUERY_MODEL_PATH" ] && [ ! -e "$VIASH_PAR_QUERY_MODEL_PATH" ]; then + ViashError "Output file '$VIASH_PAR_QUERY_MODEL_PATH' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/interactive/run_cellxgene/.config.vsh.yaml b/target/docker/interactive/run_cellxgene/.config.vsh.yaml new file mode 100644 index 00000000000..60c08874498 --- /dev/null +++ b/target/docker/interactive/run_cellxgene/.config.vsh.yaml @@ -0,0 +1,83 @@ +functionality: + name: "run_cellxgene" + namespace: "interactive" + version: "0.12.4" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Directory to mount" + info: null + default: + - "." + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--port" + alternatives: + - "-p" + description: "Port to use" + info: null + default: + - "5005" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "bash_script" + path: "script.sh" + is_executable: true + description: "" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "ubuntu:focal" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + run_args: + - "$( echo $VIASH_PAR_PORT | sed 's/..*/-p &:&/' )" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "docker" + run: + - "apt update && apt upgrade -y" + - type: "apt" + packages: + - "build-essential libxml2-dev python3-dev python3-pip zlib1g-dev python3-requests\ + \ python3-aiohttp python-is-python3" + interactive: false + - type: "python" + user: false + packages: + - "cellxgene" + upgrade: true + entrypoint: [] + cmd: null +info: + config: "/home/runner/work/openpipeline/openpipeline/src/interactive/run_cellxgene/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/interactive/run_cellxgene" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/interactive/run_cellxgene/run_cellxgene" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/interactive/run_cellxgene/run_cellxgene b/target/docker/interactive/run_cellxgene/run_cellxgene new file mode 100755 index 00000000000..85f9bbd4977 --- /dev/null +++ b/target/docker/interactive/run_cellxgene/run_cellxgene @@ -0,0 +1,901 @@ +#!/usr/bin/env bash + +# run_cellxgene 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="run_cellxgene" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "run_cellxgene 0.12.4" + echo "" + echo "" + echo "" + echo "Arguments:" + echo " -i, --input" + echo " type: file, file must exist" + echo " default: ." + echo " Directory to mount" + echo "" + echo " -p, --port" + echo " type: string" + echo " default: 5005" + echo " Port to use" +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM ubuntu:focal + +ENTRYPOINT [] + + +RUN apt update && apt upgrade -y +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y build-essential libxml2-dev python3-dev python3-pip zlib1g-dev python3-requests python3-aiohttp python-is-python3 && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "cellxgene" + +LABEL org.opencontainers.image.description="Companion container for running component interactive run_cellxgene" +LABEL org.opencontainers.image.created="2024-01-31T09:08:32Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-run_cellxgene-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "run_cellxgene 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --port) + [ -n "$VIASH_PAR_PORT" ] && ViashError Bad arguments for option \'--port\': \'$VIASH_PAR_PORT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PORT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --port. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --port=*) + [ -n "$VIASH_PAR_PORT" ] && ViashError Bad arguments for option \'--port=*\': \'$VIASH_PAR_PORT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PORT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -p) + [ -n "$VIASH_PAR_PORT" ] && ViashError Bad arguments for option \'-p\': \'$VIASH_PAR_PORT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PORT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -p. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/interactive_run_cellxgene:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/interactive_run_cellxgene:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm $( echo $VIASH_PAR_PORT | sed 's/..*/-p &:&/' ) -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/interactive_run_cellxgene:0.12.0'" + docker run --entrypoint=bash -i --rm $( echo $VIASH_PAR_PORT | sed 's/..*/-p &:&/' ) -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/interactive_run_cellxgene:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_INPUT+x} ]; then + VIASH_PAR_INPUT="." +fi +if [ -z ${VIASH_PAR_PORT+x} ]; then + VIASH_PAR_PORT="5005" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm $( echo $VIASH_PAR_PORT | sed 's/..*/-p &:&/' ) ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/interactive_run_cellxgene:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm $( echo $VIASH_PAR_PORT | sed 's/..*/-p &:&/' ) ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/interactive_run_cellxgene:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm $( echo $VIASH_PAR_PORT | sed 's/..*/-p &:&/' ) ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/interactive_run_cellxgene:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-run_cellxgene-XXXXXX").sh +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_PORT+x} ]; then echo "${VIASH_PAR_PORT}" | sed "s#'#'\"'\"'#g;s#.*#par_port='&'#" ; else echo "# par_port="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +export LC_ALL=C.UTF-8 +export LANG=C.UTF-8 + +/usr/local/bin/cellxgene launch -p $VIASH_PAR_PORT --host 0.0.0.0 -v $VIASH_PAR_INPUT +VIASHMAIN +bash "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +exit 0 diff --git a/target/docker/interactive/run_cirrocumulus/.config.vsh.yaml b/target/docker/interactive/run_cirrocumulus/.config.vsh.yaml new file mode 100644 index 00000000000..f36ed6d29c3 --- /dev/null +++ b/target/docker/interactive/run_cirrocumulus/.config.vsh.yaml @@ -0,0 +1,85 @@ +functionality: + name: "run_cirrocumulus" + namespace: "interactive" + version: "0.12.4" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Directory to mount" + info: null + default: + - "." + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--port" + alternatives: + - "-p" + description: "Port to use" + info: null + default: + - "5005" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "bash_script" + path: "script.sh" + is_executable: true + description: "" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "ubuntu:focal" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + run_args: + - "-p $VIASH_PAR_PORT:$VIASH_PAR_PORT" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "docker" + run: + - "apt update && apt upgrade -y" + - type: "apt" + packages: + - "build-essential libxml2-dev python3-dev python3-pip zlib1g-dev libhdf5-dev\ + \ python-is-python3" + interactive: false + - type: "python" + user: false + packages: + - "requests" + - "aiohttp" + - "cirrocumulus" + upgrade: true + entrypoint: [] + cmd: null +info: + config: "/home/runner/work/openpipeline/openpipeline/src/interactive/run_cirrocumulus/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/interactive/run_cirrocumulus" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/interactive/run_cirrocumulus/run_cirrocumulus" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/interactive/run_cirrocumulus/run_cirrocumulus b/target/docker/interactive/run_cirrocumulus/run_cirrocumulus new file mode 100755 index 00000000000..04ed627d274 --- /dev/null +++ b/target/docker/interactive/run_cirrocumulus/run_cirrocumulus @@ -0,0 +1,901 @@ +#!/usr/bin/env bash + +# run_cirrocumulus 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="run_cirrocumulus" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "run_cirrocumulus 0.12.4" + echo "" + echo "" + echo "" + echo "Arguments:" + echo " -i, --input" + echo " type: file, file must exist" + echo " default: ." + echo " Directory to mount" + echo "" + echo " -p, --port" + echo " type: string" + echo " default: 5005" + echo " Port to use" +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM ubuntu:focal + +ENTRYPOINT [] + + +RUN apt update && apt upgrade -y +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y build-essential libxml2-dev python3-dev python3-pip zlib1g-dev libhdf5-dev python-is-python3 && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "requests" "aiohttp" "cirrocumulus" + +LABEL org.opencontainers.image.description="Companion container for running component interactive run_cirrocumulus" +LABEL org.opencontainers.image.created="2024-01-31T09:08:32Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-run_cirrocumulus-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "run_cirrocumulus 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --port) + [ -n "$VIASH_PAR_PORT" ] && ViashError Bad arguments for option \'--port\': \'$VIASH_PAR_PORT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PORT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --port. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --port=*) + [ -n "$VIASH_PAR_PORT" ] && ViashError Bad arguments for option \'--port=*\': \'$VIASH_PAR_PORT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PORT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -p) + [ -n "$VIASH_PAR_PORT" ] && ViashError Bad arguments for option \'-p\': \'$VIASH_PAR_PORT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PORT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -p. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/interactive_run_cirrocumulus:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/interactive_run_cirrocumulus:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -p $VIASH_PAR_PORT:$VIASH_PAR_PORT -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/interactive_run_cirrocumulus:0.12.0'" + docker run --entrypoint=bash -i --rm -p $VIASH_PAR_PORT:$VIASH_PAR_PORT -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/interactive_run_cirrocumulus:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_INPUT+x} ]; then + VIASH_PAR_INPUT="." +fi +if [ -z ${VIASH_PAR_PORT+x} ]; then + VIASH_PAR_PORT="5005" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm -p $VIASH_PAR_PORT:$VIASH_PAR_PORT ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/interactive_run_cirrocumulus:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm -p $VIASH_PAR_PORT:$VIASH_PAR_PORT ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/interactive_run_cirrocumulus:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm -p $VIASH_PAR_PORT:$VIASH_PAR_PORT ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/interactive_run_cirrocumulus:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-run_cirrocumulus-XXXXXX").sh +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_PORT+x} ]; then echo "${VIASH_PAR_PORT}" | sed "s#'#'\"'\"'#g;s#.*#par_port='&'#" ; else echo "# par_port="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +export LC_ALL=C.UTF-8 +export LANG=C.UTF-8 + +/usr/local/bin/cirro launch --port $VIASH_PAR_PORT --host 0.0.0.0 $VIASH_PAR_INPUT +VIASHMAIN +bash "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +exit 0 diff --git a/target/docker/interpret/lianapy/.config.vsh.yaml b/target/docker/interpret/lianapy/.config.vsh.yaml new file mode 100644 index 00000000000..56962ccb2fc --- /dev/null +++ b/target/docker/interpret/lianapy/.config.vsh.yaml @@ -0,0 +1,313 @@ +functionality: + name: "lianapy" + namespace: "interpret" + version: "0.12.4" + authors: + - name: "Mauro Saporita" + roles: + - "author" + info: + role: "Contributor" + links: + email: "maurosaporita@gmail.com" + github: "mauro-saporita" + linkedin: "mauro-saporita-930b06a5" + organizations: + - name: "Ardigen" + href: "https://ardigen.com" + role: "Lead Nextflow Developer" + - name: "Povilas Gibas" + roles: + - "author" + info: + role: "Contributor" + links: + email: "povilasgibas@gmail.com" + github: "PoGibas" + linkedin: "povilas-gibas" + organizations: + - name: "Ardigen" + href: "https://ardigen.com" + role: "Bioinformatician" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input h5mu file" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output h5mu file." + info: null + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + info: null + default: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--layer" + description: "Layer in anndata.AnnData.layers to use. If None, use mudata.mod[modality].X." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--groupby" + description: "The key of the observations grouping to consider." + info: null + default: + - "bulk_labels" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--resource_name" + description: "Name of the resource to be loaded and use for ligand-receptor inference." + info: null + default: + - "consensus" + required: false + choices: + - "baccin2019" + - "cellcall" + - "cellchatdb" + - "cellinker" + - "cellphonedb" + - "celltalkdb" + - "connectomedb2020" + - "consensus" + - "embrace" + - "guide2pharma" + - "hpmr" + - "icellnet" + - "italk" + - "kirouac2010" + - "lrdb" + - "mouseconsensus" + - "ramilowski2015" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--gene_symbol" + description: "Column name in var DataFrame in which gene symbol are stored." + info: null + default: + - "gene_symbol" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--expr_prop" + description: "Minimum expression proportion for the ligands/receptors (and their\ + \ subunits) in the corresponding cell identities. Set to '0', to return unfiltered\ + \ results." + info: null + default: + - 0.1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--min_cells" + description: "Minimum cells per cell identity ('groupby') to be considered for\ + \ downstream analysis." + info: null + default: + - 5 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--aggregate_method" + description: "Method aggregation approach, one of ['mean', 'rra'], where 'mean'\ + \ represents the mean rank, while 'rra' is the RobustRankAggregate (Kolde et\ + \ al., 2014) of the interactions." + info: null + default: + - "rra" + required: false + choices: + - "mean" + - "rra" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean" + name: "--return_all_lrs" + description: "Bool whether to return all LRs, or only those that surpass the 'expr_prop'\ + \ threshold. Those interactions that do not pass the 'expr_prop' threshold will\ + \ be assigned to the *worst* score of the ones that do. 'False' by default." + info: null + default: + - false + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--n_perms" + description: "Number of permutations for the permutation test. Note that this\ + \ is relevant only for permutation-based methods - e.g. 'CellPhoneDB" + info: null + default: + - 100 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + description: "Performs LIANA integration based as described in https://github.com/saezlab/liana-py" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + - "git" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "liana~=0.1.9" + - "numpy~=1.24.3" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "lowmem" + - "lowcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/interpret/lianapy/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/interpret/lianapy" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/interpret/lianapy/lianapy" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/interpret/lianapy/lianapy b/target/docker/interpret/lianapy/lianapy new file mode 100755 index 00000000000..aa23191428d --- /dev/null +++ b/target/docker/interpret/lianapy/lianapy @@ -0,0 +1,1259 @@ +#!/usr/bin/env bash + +# lianapy 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Mauro Saporita (author) +# * Povilas Gibas (author) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="lianapy" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "lianapy 0.12.4" + echo "" + echo "Performs LIANA integration based as described in" + echo "https://github.com/saezlab/liana-py" + echo "" + echo "Arguments:" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " Input h5mu file" + echo "" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " Output h5mu file." + echo "" + echo " --output_compression" + echo " type: string" + echo " default: gzip" + echo " choices: [ gzip, lzf ]" + echo "" + echo " --modality" + echo " type: string" + echo " default: rna" + echo "" + echo " --layer" + echo " type: string" + echo " Layer in anndata.AnnData.layers to use. If None, use" + echo " mudata.mod[modality].X." + echo "" + echo " --groupby" + echo " type: string" + echo " default: bulk_labels" + echo " The key of the observations grouping to consider." + echo "" + echo " --resource_name" + echo " type: string" + echo " default: consensus" + echo " choices: [ baccin2019, cellcall, cellchatdb, cellinker, cellphonedb," + echo "celltalkdb, connectomedb2020, consensus, embrace, guide2pharma, hpmr, icellnet," + echo "italk, kirouac2010, lrdb, mouseconsensus, ramilowski2015 ]" + echo " Name of the resource to be loaded and use for ligand-receptor inference." + echo "" + echo " --gene_symbol" + echo " type: string" + echo " default: gene_symbol" + echo " Column name in var DataFrame in which gene symbol are stored." + echo "" + echo " --expr_prop" + echo " type: double" + echo " default: 0.1" + echo " Minimum expression proportion for the ligands/receptors (and their" + echo " subunits) in the corresponding cell identities. Set to '0', to return" + echo " unfiltered results." + echo "" + echo " --min_cells" + echo " type: integer" + echo " default: 5" + echo " Minimum cells per cell identity ('groupby') to be considered for" + echo " downstream analysis." + echo "" + echo " --aggregate_method" + echo " type: string" + echo " default: rra" + echo " choices: [ mean, rra ]" + echo " Method aggregation approach, one of ['mean', 'rra'], where 'mean'" + echo " represents the mean rank, while 'rra' is the RobustRankAggregate (Kolde" + echo " et al., 2014) of the interactions." + echo "" + echo " --return_all_lrs" + echo " type: boolean" + echo " default: false" + echo " Bool whether to return all LRs, or only those that surpass the" + echo " 'expr_prop' threshold. Those interactions that do not pass the" + echo " 'expr_prop' threshold will be assigned to the *worst* score of the ones" + echo " that do. 'False' by default." + echo "" + echo " --n_perms" + echo " type: integer" + echo " default: 100" + echo " Number of permutations for the permutation test. Note that this is" + echo " relevant only for permutation-based methods - e.g. 'CellPhoneDB" +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM python:3.10-slim + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y procps git && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "liana~=0.1.9" "numpy~=1.24.3" + +LABEL org.opencontainers.image.authors="Mauro Saporita, Povilas Gibas" +LABEL org.opencontainers.image.description="Companion container for running component interpret lianapy" +LABEL org.opencontainers.image.created="2024-01-31T09:08:33Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-lianapy-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "lianapy 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --modality) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality=*) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --layer) + [ -n "$VIASH_PAR_LAYER" ] && ViashError Bad arguments for option \'--layer\': \'$VIASH_PAR_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LAYER="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --layer. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --layer=*) + [ -n "$VIASH_PAR_LAYER" ] && ViashError Bad arguments for option \'--layer=*\': \'$VIASH_PAR_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LAYER=$(ViashRemoveFlags "$1") + shift 1 + ;; + --groupby) + [ -n "$VIASH_PAR_GROUPBY" ] && ViashError Bad arguments for option \'--groupby\': \'$VIASH_PAR_GROUPBY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GROUPBY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --groupby. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --groupby=*) + [ -n "$VIASH_PAR_GROUPBY" ] && ViashError Bad arguments for option \'--groupby=*\': \'$VIASH_PAR_GROUPBY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GROUPBY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --resource_name) + [ -n "$VIASH_PAR_RESOURCE_NAME" ] && ViashError Bad arguments for option \'--resource_name\': \'$VIASH_PAR_RESOURCE_NAME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_RESOURCE_NAME="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --resource_name. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --resource_name=*) + [ -n "$VIASH_PAR_RESOURCE_NAME" ] && ViashError Bad arguments for option \'--resource_name=*\': \'$VIASH_PAR_RESOURCE_NAME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_RESOURCE_NAME=$(ViashRemoveFlags "$1") + shift 1 + ;; + --gene_symbol) + [ -n "$VIASH_PAR_GENE_SYMBOL" ] && ViashError Bad arguments for option \'--gene_symbol\': \'$VIASH_PAR_GENE_SYMBOL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENE_SYMBOL="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --gene_symbol. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --gene_symbol=*) + [ -n "$VIASH_PAR_GENE_SYMBOL" ] && ViashError Bad arguments for option \'--gene_symbol=*\': \'$VIASH_PAR_GENE_SYMBOL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENE_SYMBOL=$(ViashRemoveFlags "$1") + shift 1 + ;; + --expr_prop) + [ -n "$VIASH_PAR_EXPR_PROP" ] && ViashError Bad arguments for option \'--expr_prop\': \'$VIASH_PAR_EXPR_PROP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EXPR_PROP="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --expr_prop. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --expr_prop=*) + [ -n "$VIASH_PAR_EXPR_PROP" ] && ViashError Bad arguments for option \'--expr_prop=*\': \'$VIASH_PAR_EXPR_PROP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EXPR_PROP=$(ViashRemoveFlags "$1") + shift 1 + ;; + --min_cells) + [ -n "$VIASH_PAR_MIN_CELLS" ] && ViashError Bad arguments for option \'--min_cells\': \'$VIASH_PAR_MIN_CELLS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_CELLS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_cells. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --min_cells=*) + [ -n "$VIASH_PAR_MIN_CELLS" ] && ViashError Bad arguments for option \'--min_cells=*\': \'$VIASH_PAR_MIN_CELLS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_CELLS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --aggregate_method) + [ -n "$VIASH_PAR_AGGREGATE_METHOD" ] && ViashError Bad arguments for option \'--aggregate_method\': \'$VIASH_PAR_AGGREGATE_METHOD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_AGGREGATE_METHOD="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --aggregate_method. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --aggregate_method=*) + [ -n "$VIASH_PAR_AGGREGATE_METHOD" ] && ViashError Bad arguments for option \'--aggregate_method=*\': \'$VIASH_PAR_AGGREGATE_METHOD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_AGGREGATE_METHOD=$(ViashRemoveFlags "$1") + shift 1 + ;; + --return_all_lrs) + [ -n "$VIASH_PAR_RETURN_ALL_LRS" ] && ViashError Bad arguments for option \'--return_all_lrs\': \'$VIASH_PAR_RETURN_ALL_LRS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_RETURN_ALL_LRS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --return_all_lrs. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --return_all_lrs=*) + [ -n "$VIASH_PAR_RETURN_ALL_LRS" ] && ViashError Bad arguments for option \'--return_all_lrs=*\': \'$VIASH_PAR_RETURN_ALL_LRS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_RETURN_ALL_LRS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --n_perms) + [ -n "$VIASH_PAR_N_PERMS" ] && ViashError Bad arguments for option \'--n_perms\': \'$VIASH_PAR_N_PERMS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_N_PERMS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --n_perms. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --n_perms=*) + [ -n "$VIASH_PAR_N_PERMS" ] && ViashError Bad arguments for option \'--n_perms=*\': \'$VIASH_PAR_N_PERMS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_N_PERMS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/interpret_lianapy:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/interpret_lianapy:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/interpret_lianapy:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/interpret_lianapy:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then + VIASH_PAR_OUTPUT_COMPRESSION="gzip" +fi +if [ -z ${VIASH_PAR_MODALITY+x} ]; then + VIASH_PAR_MODALITY="rna" +fi +if [ -z ${VIASH_PAR_GROUPBY+x} ]; then + VIASH_PAR_GROUPBY="bulk_labels" +fi +if [ -z ${VIASH_PAR_RESOURCE_NAME+x} ]; then + VIASH_PAR_RESOURCE_NAME="consensus" +fi +if [ -z ${VIASH_PAR_GENE_SYMBOL+x} ]; then + VIASH_PAR_GENE_SYMBOL="gene_symbol" +fi +if [ -z ${VIASH_PAR_EXPR_PROP+x} ]; then + VIASH_PAR_EXPR_PROP="0.1" +fi +if [ -z ${VIASH_PAR_MIN_CELLS+x} ]; then + VIASH_PAR_MIN_CELLS="5" +fi +if [ -z ${VIASH_PAR_AGGREGATE_METHOD+x} ]; then + VIASH_PAR_AGGREGATE_METHOD="rra" +fi +if [ -z ${VIASH_PAR_RETURN_ALL_LRS+x} ]; then + VIASH_PAR_RETURN_ALL_LRS="false" +fi +if [ -z ${VIASH_PAR_N_PERMS+x} ]; then + VIASH_PAR_N_PERMS="100" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_EXPR_PROP" ]]; then + if ! [[ "$VIASH_PAR_EXPR_PROP" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--expr_prop' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MIN_CELLS" ]]; then + if ! [[ "$VIASH_PAR_MIN_CELLS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--min_cells' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_RETURN_ALL_LRS" ]]; then + if ! [[ "$VIASH_PAR_RETURN_ALL_LRS" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--return_all_lrs' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_N_PERMS" ]]; then + if ! [[ "$VIASH_PAR_N_PERMS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--n_perms' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +if [ ! -z "$VIASH_PAR_RESOURCE_NAME" ]; then + VIASH_PAR_RESOURCE_NAME_CHOICES=("baccin2019:cellcall:cellchatdb:cellinker:cellphonedb:celltalkdb:connectomedb2020:consensus:embrace:guide2pharma:hpmr:icellnet:italk:kirouac2010:lrdb:mouseconsensus:ramilowski2015") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_RESOURCE_NAME_CHOICES[*]}:" =~ ":$VIASH_PAR_RESOURCE_NAME:" ]]; then + ViashError '--resource_name' specified value of \'$VIASH_PAR_RESOURCE_NAME\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +if [ ! -z "$VIASH_PAR_AGGREGATE_METHOD" ]; then + VIASH_PAR_AGGREGATE_METHOD_CHOICES=("mean:rra") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_AGGREGATE_METHOD_CHOICES[*]}:" =~ ":$VIASH_PAR_AGGREGATE_METHOD:" ]]; then + ViashError '--aggregate_method' specified value of \'$VIASH_PAR_AGGREGATE_METHOD\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/interpret_lianapy:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/interpret_lianapy:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/interpret_lianapy:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-lianapy-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +import liana +import mudata +# TODO: Remove when grouping labels exist +# For sign/PCA/ +import numpy as np + +### VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'layer': $( if [ ! -z ${VIASH_PAR_LAYER+x} ]; then echo "r'${VIASH_PAR_LAYER//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'groupby': $( if [ ! -z ${VIASH_PAR_GROUPBY+x} ]; then echo "r'${VIASH_PAR_GROUPBY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resource_name': $( if [ ! -z ${VIASH_PAR_RESOURCE_NAME+x} ]; then echo "r'${VIASH_PAR_RESOURCE_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'gene_symbol': $( if [ ! -z ${VIASH_PAR_GENE_SYMBOL+x} ]; then echo "r'${VIASH_PAR_GENE_SYMBOL//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'expr_prop': $( if [ ! -z ${VIASH_PAR_EXPR_PROP+x} ]; then echo "float(r'${VIASH_PAR_EXPR_PROP//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'min_cells': $( if [ ! -z ${VIASH_PAR_MIN_CELLS+x} ]; then echo "int(r'${VIASH_PAR_MIN_CELLS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'aggregate_method': $( if [ ! -z ${VIASH_PAR_AGGREGATE_METHOD+x} ]; then echo "r'${VIASH_PAR_AGGREGATE_METHOD//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'return_all_lrs': $( if [ ! -z ${VIASH_PAR_RETURN_ALL_LRS+x} ]; then echo "r'${VIASH_PAR_RETURN_ALL_LRS//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), + 'n_perms': $( if [ ! -z ${VIASH_PAR_N_PERMS+x} ]; then echo "int(r'${VIASH_PAR_N_PERMS//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +### VIASH END + + +def main(): + + # Get input data + mdata = mudata.read(par['input'].strip()) + mod = mdata.mod[par['modality']] + + # Add dummy grouping labels when they do not exist + if par['groupby'] not in mod.obs: + foo = mod.obsm.to_df().iloc[:, 0] + mod.obs[par['groupby']] = np.sign(foo).astype('category') + + # Solve gene labels + orig_gene_label = mod.var.index + mod.var_names = mod.var[par['gene_symbol']].astype(str) + mod.var_names_make_unique() + + liana.mt.rank_aggregate( + adata = mod, + groupby = par['groupby'], + resource_name = par["resource_name"], + expr_prop = par["expr_prop"], + min_cells = par["min_cells"], + aggregate_method = par["aggregate_method"], + return_all_lrs = par["return_all_lrs"], + layer = par["layer"], + n_perms = par["n_perms"], + verbose = True, + inplace = True, + use_raw = False + ) + + # Return original gene labels + mod.var_names = orig_gene_label + + # TODO: make sure compression is needed + mdata.write_h5mu(par['output'].strip(), compression=par['output_compression']) + +if __name__ == "__main__": + main() +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/labels_transfer/knn/.config.vsh.yaml b/target/docker/labels_transfer/knn/.config.vsh.yaml new file mode 100644 index 00000000000..2e9921112e2 --- /dev/null +++ b/target/docker/labels_transfer/knn/.config.vsh.yaml @@ -0,0 +1,379 @@ +functionality: + name: "knn" + namespace: "labels_transfer" + version: "0.12.4" + authors: + - name: "Vladimir Shitov" + roles: + - "author" + info: + role: "Contributor" + links: + email: "vladimir.shitov@helmholtz-muenchen.de" + github: "vladimirshitov" + orcid: "0000-0002-1960-8812" + linkedin: "vladimir-shitov-9a659513b" + organizations: + - name: "Helmholtz Munich" + href: "https://www.helmholtz-munich.de" + role: "PhD Candidate" + argument_groups: + - name: "Input dataset (query) arguments" + arguments: + - type: "file" + name: "--input" + description: "The query data to transfer the labels to. Should be a .h5mu file." + info: + label: "Query" + file_format: + type: "h5mu" + mod: + rna: + description: "Modality in AnnData format containing RNA data." + required: true + slots: + X: + type: "double" + name: "features" + required: false + description: "The expression data to use for the classifier's inference,\ + \ if `--input_obsm_features` argument is not provided.\n" + obsm: + - type: "double" + name: "features" + example: "X_integrated_scanvi" + required: false + description: "The embedding to use for the classifier's inference.\ + \ Override using the `--input_obsm_features` argument. If not\ + \ provided, the `.X` slot will be used instead.\nMake sure that\ + \ embedding was obtained in the same way as the reference embedding\ + \ (e.g. by the same model or preprocessing).\n" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + description: "Which modality to use." + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--input_obsm_features" + description: "The `.obsm` key of the embedding to use for the classifier's inference.\ + \ If not provided, the `.X` slot will be used instead.\nMake sure that embedding\ + \ was obtained in the same way as the reference embedding (e.g. by the same\ + \ model or preprocessing).\n" + info: null + example: + - "X_integrated_scanvi" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Reference dataset arguments" + arguments: + - type: "file" + name: "--reference" + description: "The reference data to train classifiers on." + info: + label: "Reference" + file_format: + type: "h5ad" + X: + type: "double" + name: "features" + required: false + description: "The expression data to use for the classifier's training,\ + \ if `--input_obsm_features` argument is not provided.\n" + obsm: + - type: "double" + name: "features" + example: "X_integrated_scanvi" + description: "The embedding to use for the classifier's training. Override\ + \ using the `--reference_obsm_features` argument.\nMake sure that embedding\ + \ was obtained in the same way as the query embedding (e.g. by the same\ + \ model or preprocessing).\n" + required: true + obs: + - type: "string" + name: "targets" + multiple: true + example: + - "ann_level_1" + - "ann_level_2" + - "ann_level_3" + - "ann_level_4" + - "ann_level_5" + - "ann_finest_level" + description: "The target labels to transfer. Override using the `--reference_obs_targets`\ + \ argument." + required: true + example: + - "https:/zenodo.org/record/6337966/files/HLCA_emb_and_metadata.h5ad" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--reference_obsm_features" + description: "The `.obsm` key of the embedding to use for the classifier's training.\n\ + Make sure that embedding was obtained in the same way as the query embedding\ + \ (e.g. by the same model or preprocessing).\n" + info: null + default: + - "X_integrated_scanvi" + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--reference_obs_targets" + description: "The `.obs` key of the target labels to tranfer." + info: null + default: + - "ann_level_1" + - "ann_level_2" + - "ann_level_3" + - "ann_level_4" + - "ann_level_5" + - "ann_finest_level" + required: false + direction: "input" + multiple: true + multiple_sep: "," + dest: "par" + - name: "Outputs" + arguments: + - type: "file" + name: "--output" + description: "The query data in .h5mu format with predicted labels transfered\ + \ from the reference." + info: + label: "Output data" + file_format: + type: "h5mu" + mod: + rna: + description: "Modality in AnnData format containing RNA data." + required: true + obs: + - type: "string" + name: "predictions" + description: "The predicted labels. Override using the `--output_obs_predictions`\ + \ argument." + required: true + - type: "double" + name: "uncertainty" + description: "The uncertainty of the predicted labels. Override using\ + \ the `--output_obs_uncertainty` argument." + required: false + obsm: + - type: "double" + name: "X_integrated_scanvi" + description: "The embedding used for the classifier's inference. Could\ + \ have any name, specified by `input_obsm_features` argument.\"" + required: false + uns: + - type: "string" + name: "parameters" + example: "labels_tranfer" + description: "Additional information about the parameters used for\ + \ the label transfer." + required: true + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_obs_predictions" + description: "In which `.obs` slots to store the predicted information.\nIf\ + \ provided, must have the same length as `--reference_obs_targets`.\nIf empty,\ + \ will default to the `reference_obs_targets` combined with the `\"_pred\"\ + ` suffix.\n" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_obs_uncertainty" + description: "In which `.obs` slots to store the uncertainty of the predictions.\n\ + If provided, must have the same length as `--reference_obs_targets`.\nIf empty,\ + \ will default to the `reference_obs_targets` combined with the `\"_uncertainty\"\ + ` suffix.\n" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_uns_parameters" + description: "The `.uns` key to store additional information about the parameters\ + \ used for the label transfer." + info: null + default: + - "labels_transfer" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Learning parameters" + arguments: + - type: "integer" + name: "--n_neighbors" + alternatives: + - "-k" + description: "Number of nearest neighbors to use for classification" + info: null + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "../utils/helper.py" + - type: "file" + path: "../../utils/setup_logger.py" + description: "Performs label transfer from reference to query using KNN classifier" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/annotation_test_data/" + - type: "file" + path: "resources_test/pbmc_1k_protein_v3/" + info: + method_id: "KNN_pynndescent" + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + - "git" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + upgrade: true + - type: "apt" + packages: + - "libopenblas-dev" + - "liblapack-dev" + - "gfortran" + interactive: false + - type: "python" + user: false + packages: + - "scanpy~=1.9.5" + - "pynndescent~=0.5.8" + - "numba~=0.56.4" + - "numpy~=1.23.5" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highmem" + - "highcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +- type: "native" + id: "native" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/labels_transfer/knn/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/labels_transfer/knn" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/labels_transfer/knn/knn" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/labels_transfer/knn/helper.py b/target/docker/labels_transfer/knn/helper.py new file mode 100644 index 00000000000..a90bf59efdb --- /dev/null +++ b/target/docker/labels_transfer/knn/helper.py @@ -0,0 +1,32 @@ +def check_arguments(par): + # check output .obs predictions + if not par["output_obs_predictions"]: + par["output_obs_predictions"] = [ t + "_pred" for t in par["reference_obs_targets"]] + assert len(par["output_obs_predictions"]) == len(par["reference_obs_targets"]), f"Number of output_obs_predictions must match number of reference_obs_targets\npar: {par}" + + # check output .obs uncertainty + if not par["output_obs_uncertainty"]: + par["output_obs_uncertainty"] = [ t + "_uncertainty" for t in par["reference_obs_targets"]] + assert len(par["output_obs_uncertainty"]) == len(par["reference_obs_targets"]), f"Number of output_obs_uncertainty must match number of reference_obs_targets\npar: {par}" + + return par + +def get_reference_features(adata_reference, par, logger): + if par["reference_obsm_features"] is None: + logger.info("Using .X of reference data") + train_data = adata_reference.X + else: + logger.info(f"Using .obsm[{par['reference_obsm_features']}] of reference data") + train_data = adata_reference.obsm[par["reference_obsm_features"]] + + return train_data + +def get_query_features(adata, par, logger): + if par["input_obsm_features"] is None: + logger.info("Using .X of query data") + query_data = adata.X + else: + logger.info(f"Using .obsm[{par['input_obsm_features']}] of query data") + query_data = adata.obsm[par["input_obsm_features"]] + + return query_data \ No newline at end of file diff --git a/target/docker/labels_transfer/knn/knn b/target/docker/labels_transfer/knn/knn new file mode 100755 index 00000000000..46373358882 --- /dev/null +++ b/target/docker/labels_transfer/knn/knn @@ -0,0 +1,1258 @@ +#!/usr/bin/env bash + +# knn 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Vladimir Shitov (author) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="knn" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "knn 0.12.4" + echo "" + echo "Performs label transfer from reference to query using KNN classifier" + echo "" + echo "Input dataset (query) arguments:" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " The query data to transfer the labels to. Should be a .h5mu file." + echo "" + echo " --modality" + echo " type: string" + echo " default: rna" + echo " Which modality to use." + echo "" + echo " --input_obsm_features" + echo " type: string" + echo " example: X_integrated_scanvi" + echo " The \`.obsm\` key of the embedding to use for the classifier's inference." + echo " If not provided, the \`.X\` slot will be used instead." + echo " Make sure that embedding was obtained in the same way as the reference" + echo " embedding (e.g. by the same model or preprocessing)." + echo "" + echo "Reference dataset arguments:" + echo " --reference" + echo " type: file, file must exist" + echo " example:" + echo "https:/zenodo.org/record/6337966/files/HLCA_emb_and_metadata.h5ad" + echo " The reference data to train classifiers on." + echo "" + echo " --reference_obsm_features" + echo " type: string, required parameter" + echo " default: X_integrated_scanvi" + echo " The \`.obsm\` key of the embedding to use for the classifier's training." + echo " Make sure that embedding was obtained in the same way as the query" + echo " embedding (e.g. by the same model or preprocessing)." + echo "" + echo " --reference_obs_targets" + echo " type: string, multiple values allowed" + echo " default:" + echo "ann_level_1,ann_level_2,ann_level_3,ann_level_4,ann_level_5,ann_finest_level" + echo " The \`.obs\` key of the target labels to tranfer." + echo "" + echo "Outputs:" + echo " --output" + echo " type: file, required parameter, output, file must exist" + echo " The query data in .h5mu format with predicted labels transfered from the" + echo " reference." + echo "" + echo " --output_obs_predictions" + echo " type: string, multiple values allowed" + echo " In which \`.obs\` slots to store the predicted information." + echo " If provided, must have the same length as \`--reference_obs_targets\`." + echo " If empty, will default to the \`reference_obs_targets\` combined with the" + echo " \`\"_pred\"\` suffix." + echo "" + echo " --output_obs_uncertainty" + echo " type: string, multiple values allowed" + echo " In which \`.obs\` slots to store the uncertainty of the predictions." + echo " If provided, must have the same length as \`--reference_obs_targets\`." + echo " If empty, will default to the \`reference_obs_targets\` combined with the" + echo " \`\"_uncertainty\"\` suffix." + echo "" + echo " --output_uns_parameters" + echo " type: string" + echo " default: labels_transfer" + echo " The \`.uns\` key to store additional information about the parameters used" + echo " for the label transfer." + echo "" + echo "Learning parameters:" + echo " -k, --n_neighbors" + echo " type: integer, required parameter" + echo " Number of nearest neighbors to use for classification" +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM python:3.10-slim + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y procps git && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y libopenblas-dev liblapack-dev gfortran && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "scanpy~=1.9.5" "pynndescent~=0.5.8" "numba~=0.56.4" "numpy~=1.23.5" + +LABEL org.opencontainers.image.authors="Vladimir Shitov" +LABEL org.opencontainers.image.description="Companion container for running component labels_transfer knn" +LABEL org.opencontainers.image.created="2024-01-31T09:08:35Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-knn-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "knn 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --modality) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality=*) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --input_obsm_features) + [ -n "$VIASH_PAR_INPUT_OBSM_FEATURES" ] && ViashError Bad arguments for option \'--input_obsm_features\': \'$VIASH_PAR_INPUT_OBSM_FEATURES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_OBSM_FEATURES="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_obsm_features. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input_obsm_features=*) + [ -n "$VIASH_PAR_INPUT_OBSM_FEATURES" ] && ViashError Bad arguments for option \'--input_obsm_features=*\': \'$VIASH_PAR_INPUT_OBSM_FEATURES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_OBSM_FEATURES=$(ViashRemoveFlags "$1") + shift 1 + ;; + --reference) + [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reference=*) + [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference=*\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --reference_obsm_features) + [ -n "$VIASH_PAR_REFERENCE_OBSM_FEATURES" ] && ViashError Bad arguments for option \'--reference_obsm_features\': \'$VIASH_PAR_REFERENCE_OBSM_FEATURES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE_OBSM_FEATURES="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference_obsm_features. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reference_obsm_features=*) + [ -n "$VIASH_PAR_REFERENCE_OBSM_FEATURES" ] && ViashError Bad arguments for option \'--reference_obsm_features=*\': \'$VIASH_PAR_REFERENCE_OBSM_FEATURES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE_OBSM_FEATURES=$(ViashRemoveFlags "$1") + shift 1 + ;; + --reference_obs_targets) + if [ -z "$VIASH_PAR_REFERENCE_OBS_TARGETS" ]; then + VIASH_PAR_REFERENCE_OBS_TARGETS="$2" + else + VIASH_PAR_REFERENCE_OBS_TARGETS="$VIASH_PAR_REFERENCE_OBS_TARGETS,""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference_obs_targets. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reference_obs_targets=*) + if [ -z "$VIASH_PAR_REFERENCE_OBS_TARGETS" ]; then + VIASH_PAR_REFERENCE_OBS_TARGETS=$(ViashRemoveFlags "$1") + else + VIASH_PAR_REFERENCE_OBS_TARGETS="$VIASH_PAR_REFERENCE_OBS_TARGETS,"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_obs_predictions) + if [ -z "$VIASH_PAR_OUTPUT_OBS_PREDICTIONS" ]; then + VIASH_PAR_OUTPUT_OBS_PREDICTIONS="$2" + else + VIASH_PAR_OUTPUT_OBS_PREDICTIONS="$VIASH_PAR_OUTPUT_OBS_PREDICTIONS:""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_obs_predictions. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_obs_predictions=*) + if [ -z "$VIASH_PAR_OUTPUT_OBS_PREDICTIONS" ]; then + VIASH_PAR_OUTPUT_OBS_PREDICTIONS=$(ViashRemoveFlags "$1") + else + VIASH_PAR_OUTPUT_OBS_PREDICTIONS="$VIASH_PAR_OUTPUT_OBS_PREDICTIONS:"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --output_obs_uncertainty) + if [ -z "$VIASH_PAR_OUTPUT_OBS_UNCERTAINTY" ]; then + VIASH_PAR_OUTPUT_OBS_UNCERTAINTY="$2" + else + VIASH_PAR_OUTPUT_OBS_UNCERTAINTY="$VIASH_PAR_OUTPUT_OBS_UNCERTAINTY:""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_obs_uncertainty. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_obs_uncertainty=*) + if [ -z "$VIASH_PAR_OUTPUT_OBS_UNCERTAINTY" ]; then + VIASH_PAR_OUTPUT_OBS_UNCERTAINTY=$(ViashRemoveFlags "$1") + else + VIASH_PAR_OUTPUT_OBS_UNCERTAINTY="$VIASH_PAR_OUTPUT_OBS_UNCERTAINTY:"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --output_uns_parameters) + [ -n "$VIASH_PAR_OUTPUT_UNS_PARAMETERS" ] && ViashError Bad arguments for option \'--output_uns_parameters\': \'$VIASH_PAR_OUTPUT_UNS_PARAMETERS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_UNS_PARAMETERS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_uns_parameters. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_uns_parameters=*) + [ -n "$VIASH_PAR_OUTPUT_UNS_PARAMETERS" ] && ViashError Bad arguments for option \'--output_uns_parameters=*\': \'$VIASH_PAR_OUTPUT_UNS_PARAMETERS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_UNS_PARAMETERS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --n_neighbors) + [ -n "$VIASH_PAR_N_NEIGHBORS" ] && ViashError Bad arguments for option \'--n_neighbors\': \'$VIASH_PAR_N_NEIGHBORS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_N_NEIGHBORS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --n_neighbors. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --n_neighbors=*) + [ -n "$VIASH_PAR_N_NEIGHBORS" ] && ViashError Bad arguments for option \'--n_neighbors=*\': \'$VIASH_PAR_N_NEIGHBORS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_N_NEIGHBORS=$(ViashRemoveFlags "$1") + shift 1 + ;; + -k) + [ -n "$VIASH_PAR_N_NEIGHBORS" ] && ViashError Bad arguments for option \'-k\': \'$VIASH_PAR_N_NEIGHBORS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_N_NEIGHBORS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -k. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/labels_transfer_knn:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/labels_transfer_knn:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/labels_transfer_knn:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/labels_transfer_knn:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_REFERENCE_OBSM_FEATURES+x} ]; then + ViashError '--reference_obsm_features' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_N_NEIGHBORS+x} ]; then + ViashError '--n_neighbors' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_MODALITY+x} ]; then + VIASH_PAR_MODALITY="rna" +fi +if [ -z ${VIASH_PAR_REFERENCE_OBS_TARGETS+x} ]; then + VIASH_PAR_REFERENCE_OBS_TARGETS="ann_level_1,ann_level_2,ann_level_3,ann_level_4,ann_level_5,ann_finest_level" +fi +if [ -z ${VIASH_PAR_OUTPUT_UNS_PARAMETERS+x} ]; then + VIASH_PAR_OUTPUT_UNS_PARAMETERS="labels_transfer" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_REFERENCE" ] && [ ! -e "$VIASH_PAR_REFERENCE" ]; then + ViashError "Input file '$VIASH_PAR_REFERENCE' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_N_NEIGHBORS" ]]; then + if ! [[ "$VIASH_PAR_N_NEIGHBORS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--n_neighbors' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_REFERENCE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_REFERENCE")" ) + VIASH_PAR_REFERENCE=$(ViashAutodetectMount "$VIASH_PAR_REFERENCE") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/labels_transfer_knn:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/labels_transfer_knn:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/labels_transfer_knn:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-knn-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +import sys +import warnings + +import mudata +import numpy as np +import scanpy as sc +from scipy.sparse import issparse +import pynndescent +import numba + + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'input_obsm_features': $( if [ ! -z ${VIASH_PAR_INPUT_OBSM_FEATURES+x} ]; then echo "r'${VIASH_PAR_INPUT_OBSM_FEATURES//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'reference': $( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "r'${VIASH_PAR_REFERENCE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'reference_obsm_features': $( if [ ! -z ${VIASH_PAR_REFERENCE_OBSM_FEATURES+x} ]; then echo "r'${VIASH_PAR_REFERENCE_OBSM_FEATURES//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'reference_obs_targets': $( if [ ! -z ${VIASH_PAR_REFERENCE_OBS_TARGETS+x} ]; then echo "r'${VIASH_PAR_REFERENCE_OBS_TARGETS//\'/\'\"\'\"r\'}'.split(',')"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_obs_predictions': $( if [ ! -z ${VIASH_PAR_OUTPUT_OBS_PREDICTIONS+x} ]; then echo "r'${VIASH_PAR_OUTPUT_OBS_PREDICTIONS//\'/\'\"\'\"r\'}'.split(':')"; else echo None; fi ), + 'output_obs_uncertainty': $( if [ ! -z ${VIASH_PAR_OUTPUT_OBS_UNCERTAINTY+x} ]; then echo "r'${VIASH_PAR_OUTPUT_OBS_UNCERTAINTY//\'/\'\"\'\"r\'}'.split(':')"; else echo None; fi ), + 'output_uns_parameters': $( if [ ! -z ${VIASH_PAR_OUTPUT_UNS_PARAMETERS+x} ]; then echo "r'${VIASH_PAR_OUTPUT_UNS_PARAMETERS//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'n_neighbors': $( if [ ! -z ${VIASH_PAR_N_NEIGHBORS+x} ]; then echo "int(r'${VIASH_PAR_N_NEIGHBORS//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +## VIASH END + +sys.path.append(meta["resources_dir"]) +from helper import check_arguments, get_reference_features, get_query_features +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger + +@numba.njit +def weighted_prediction(weights, ref_cats): + """Get highest weight category.""" + N = len(weights) + predictions = np.zeros((N,), dtype=ref_cats.dtype) + uncertainty = np.zeros((N,)) + for i in range(N): + obs_weights = weights[i] + obs_cats = ref_cats[i] + best_prob = 0 + for c in np.unique(obs_cats): + cand_prob = np.sum(obs_weights[obs_cats == c]) + if cand_prob > best_prob: + best_prob = cand_prob + predictions[i] = c + uncertainty[i] = max(1 - best_prob, 0) + + return predictions, uncertainty + +def distances_to_affinities(distances): + stds = np.std(distances, axis=1) + stds = (2.0 / stds) ** 2 + stds = stds.reshape(-1, 1) + distances_tilda = np.exp(-np.true_divide(distances, stds)) + + return distances_tilda / np.sum(distances_tilda, axis=1, keepdims=True) + +def main(par): + logger = setup_logger() + + logger.info("Checking arguments") + par = check_arguments(par) + + logger.info("Reading input (query) data") + mdata = mudata.read(par["input"]) + adata = mdata.mod[par["modality"]] + + logger.info("Reading reference data") + adata_reference = sc.read(par["reference"], backup_url=par["reference"]) + + # fetch feature data + train_data = get_reference_features(adata_reference, par, logger) + query_data = get_query_features(adata, par, logger) + + # pynndescent does not support sparse matrices + if issparse(train_data): + warnings.warn("Converting sparse matrix to dense. This may consume a lot of memory.") + train_data = train_data.toarray() + + logger.debug(f"Shape of train data: {train_data.shape}") + + logger.info("Building NN index") + ref_nn_index = pynndescent.NNDescent(train_data, n_neighbors=par["n_neighbors"]) + ref_nn_index.prepare() + + ref_neighbors, ref_distances = ref_nn_index.query(query_data, k=par["n_neighbors"]) + + weights = distances_to_affinities(ref_distances) + + output_uns_parameters = adata.uns.get(par["output_uns_parameters"], {}) + + # for each annotation level, get prediction and uncertainty + + for obs_tar, obs_pred, obs_unc in zip(par["reference_obs_targets"], par["output_obs_predictions"], par["output_obs_uncertainty"]): + logger.info(f"Predicting labels for {obs_tar}") + ref_cats = adata_reference.obs[obs_tar].cat.codes.to_numpy()[ref_neighbors] + prediction, uncertainty = weighted_prediction(weights, ref_cats) + prediction = np.asarray(adata_reference.obs[obs_tar].cat.categories)[prediction] + + adata.obs[obs_pred], adata.obs[obs_unc] = prediction, uncertainty + + # Write information about labels transfer to uns + output_uns_parameters[obs_tar] = { + "method": "KNN_pynndescent", + "n_neighbors": par["n_neighbors"], + "reference": par["reference"] + } + + adata.uns[par["output_uns_parameters"]] = output_uns_parameters + + mdata.mod[par['modality']] = adata + mdata.update() + mdata.write_h5mu(par['output'].strip()) + +if __name__ == "__main__": + main(par) +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_REFERENCE" ]; then + VIASH_PAR_REFERENCE=$(ViashStripAutomount "$VIASH_PAR_REFERENCE") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/labels_transfer/knn/setup_logger.py b/target/docker/labels_transfer/knn/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/docker/labels_transfer/knn/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/docker/labels_transfer/xgboost/.config.vsh.yaml b/target/docker/labels_transfer/xgboost/.config.vsh.yaml new file mode 100644 index 00000000000..aa05bdf00d4 --- /dev/null +++ b/target/docker/labels_transfer/xgboost/.config.vsh.yaml @@ -0,0 +1,594 @@ +functionality: + name: "xgboost" + namespace: "labels_transfer" + version: "0.12.4" + authors: + - name: "Vladimir Shitov" + roles: + - "author" + info: + role: "Contributor" + links: + email: "vladimir.shitov@helmholtz-muenchen.de" + github: "vladimirshitov" + orcid: "0000-0002-1960-8812" + linkedin: "vladimir-shitov-9a659513b" + organizations: + - name: "Helmholtz Munich" + href: "https://www.helmholtz-munich.de" + role: "PhD Candidate" + argument_groups: + - name: "Input dataset (query) arguments" + arguments: + - type: "file" + name: "--input" + description: "The query data to transfer the labels to. Should be a .h5mu file." + info: + label: "Query" + file_format: + type: "h5mu" + mod: + rna: + description: "Modality in AnnData format containing RNA data." + required: true + slots: + X: + type: "double" + name: "features" + required: false + description: "The expression data to use for the classifier's inference,\ + \ if `--input_obsm_features` argument is not provided.\n" + obsm: + - type: "double" + name: "features" + example: "X_integrated_scanvi" + required: false + description: "The embedding to use for the classifier's inference.\ + \ Override using the `--input_obsm_features` argument. If not\ + \ provided, the `.X` slot will be used instead.\nMake sure that\ + \ embedding was obtained in the same way as the reference embedding\ + \ (e.g. by the same model or preprocessing).\n" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + description: "Which modality to use." + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--input_obsm_features" + description: "The `.obsm` key of the embedding to use for the classifier's inference.\ + \ If not provided, the `.X` slot will be used instead.\nMake sure that embedding\ + \ was obtained in the same way as the reference embedding (e.g. by the same\ + \ model or preprocessing).\n" + info: null + example: + - "X_integrated_scanvi" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Reference dataset arguments" + arguments: + - type: "file" + name: "--reference" + description: "The reference data to train classifiers on." + info: + label: "Reference" + file_format: + type: "h5ad" + X: + type: "double" + name: "features" + required: false + description: "The expression data to use for the classifier's training,\ + \ if `--input_obsm_features` argument is not provided.\n" + obsm: + - type: "double" + name: "features" + example: "X_integrated_scanvi" + description: "The embedding to use for the classifier's training. Override\ + \ using the `--reference_obsm_features` argument.\nMake sure that embedding\ + \ was obtained in the same way as the query embedding (e.g. by the same\ + \ model or preprocessing).\n" + required: true + obs: + - type: "string" + name: "targets" + multiple: true + example: + - "ann_level_1" + - "ann_level_2" + - "ann_level_3" + - "ann_level_4" + - "ann_level_5" + - "ann_finest_level" + description: "The target labels to transfer. Override using the `--reference_obs_targets`\ + \ argument." + required: true + example: + - "https:/zenodo.org/record/6337966/files/HLCA_emb_and_metadata.h5ad" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--reference_obsm_features" + description: "The `.obsm` key of the embedding to use for the classifier's training.\n\ + Make sure that embedding was obtained in the same way as the query embedding\ + \ (e.g. by the same model or preprocessing).\n" + info: null + default: + - "X_integrated_scanvi" + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--reference_obs_targets" + description: "The `.obs` key of the target labels to tranfer." + info: null + default: + - "ann_level_1" + - "ann_level_2" + - "ann_level_3" + - "ann_level_4" + - "ann_level_5" + - "ann_finest_level" + required: false + direction: "input" + multiple: true + multiple_sep: "," + dest: "par" + - name: "Outputs" + arguments: + - type: "file" + name: "--output" + description: "The query data in .h5mu format with predicted labels transfered\ + \ from the reference." + info: + label: "Output data" + file_format: + type: "h5mu" + mod: + rna: + description: "Modality in AnnData format containing RNA data." + required: true + obs: + - type: "string" + name: "predictions" + description: "The predicted labels. Override using the `--output_obs_predictions`\ + \ argument." + required: true + - type: "double" + name: "uncertainty" + description: "The uncertainty of the predicted labels. Override using\ + \ the `--output_obs_uncertainty` argument." + required: false + obsm: + - type: "double" + name: "X_integrated_scanvi" + description: "The embedding used for the classifier's inference. Could\ + \ have any name, specified by `input_obsm_features` argument.\"" + required: false + uns: + - type: "string" + name: "parameters" + example: "labels_tranfer" + description: "Additional information about the parameters used for\ + \ the label transfer." + required: true + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_obs_predictions" + description: "In which `.obs` slots to store the predicted information.\nIf\ + \ provided, must have the same length as `--reference_obs_targets`.\nIf empty,\ + \ will default to the `reference_obs_targets` combined with the `\"_pred\"\ + ` suffix.\n" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_obs_uncertainty" + description: "In which `.obs` slots to store the uncertainty of the predictions.\n\ + If provided, must have the same length as `--reference_obs_targets`.\nIf empty,\ + \ will default to the `reference_obs_targets` combined with the `\"_uncertainty\"\ + ` suffix.\n" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_uns_parameters" + description: "The `.uns` key to store additional information about the parameters\ + \ used for the label transfer." + info: null + default: + - "labels_transfer" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Execution arguments" + arguments: + - type: "boolean_true" + name: "--force_retrain" + alternatives: + - "-f" + description: "Retrain models on the reference even if model_output directory\ + \ already has trained classifiers. WARNING! It will rewrite existing classifiers\ + \ for targets in the model_output directory!" + info: null + direction: "input" + dest: "par" + - type: "boolean" + name: "--use_gpu" + description: "Use GPU during models training and inference (recommended)." + info: null + default: + - false + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--verbosity" + alternatives: + - "-v" + description: "The verbosity level for evaluation of the classifier from the\ + \ range [0,2]" + info: null + default: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--model_output" + description: "Output directory for model" + info: null + default: + - "model" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Learning parameters" + arguments: + - type: "double" + name: "--learning_rate" + alternatives: + - "--eta" + description: "Step size shrinkage used in update to prevents overfitting. Range:\ + \ [0,1]. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ + \ for the reference" + info: null + default: + - 0.3 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--min_split_loss" + alternatives: + - "--gamma" + description: "Minimum loss reduction required to make a further partition on\ + \ a leaf node of the tree. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ + \ for the reference" + info: null + default: + - 0.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--max_depth" + alternatives: + - "-d" + description: "Maximum depth of a tree. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ + \ for the reference" + info: null + default: + - 6 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--min_child_weight" + description: "Minimum sum of instance weight (hessian) needed in a child. See\ + \ https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ + \ for the reference" + info: null + default: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--max_delta_step" + description: "Maximum delta step we allow each leaf output to be. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ + \ for the reference" + info: null + default: + - 0.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--subsample" + description: "Subsample ratio of the training instances. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ + \ for the reference" + info: null + default: + - 1.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--sampling_method" + description: "The method to use to sample the training instances. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ + \ for the reference" + info: null + default: + - "uniform" + required: false + choices: + - "uniform" + - "gradient_based" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--colsample_bytree" + description: "Fraction of columns to be subsampled. Range (0, 1]. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ + \ for the reference" + info: null + default: + - 1.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--colsample_bylevel" + description: "Subsample ratio of columns for each level. Range (0, 1]. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ + \ for the reference" + info: null + default: + - 1.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--colsample_bynode" + description: "Subsample ratio of columns for each node (split). Range (0, 1].\ + \ See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ + \ for the reference" + info: null + default: + - 1.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--reg_lambda" + alternatives: + - "--lambda" + description: "L2 regularization term on weights. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ + \ for the reference" + info: null + default: + - 1.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--reg_alpha" + alternatives: + - "--alpha" + description: "L1 regularization term on weights. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ + \ for the reference" + info: null + default: + - 0.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--scale_pos_weight" + description: "Control the balance of positive and negative weights, useful for\ + \ unbalanced classes. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ + \ for the reference" + info: null + default: + - 1.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "../utils/helper.py" + - type: "file" + path: "src/utils/setup_logger.py" + description: "Performs label transfer from reference to query using XGBoost classifier" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/annotation_test_data/" + - type: "file" + path: "resources_test/pbmc_1k_protein_v3/" + info: + method_id: "XGBClassifier" + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + - "git" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + upgrade: true + - type: "apt" + packages: + - "libopenblas-dev" + - "liblapack-dev" + - "gfortran" + interactive: false + - type: "python" + user: false + packages: + - "scanpy~=1.9.5" + - "xgboost~=1.7.1" + - "scikit-learn~=1.1.1" + - "numpy~=1.23.5" + - "pandas~=1.4.4" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highmem" + - "highcpu" + - "gpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +- type: "native" + id: "native" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/labels_transfer/xgboost/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/labels_transfer/xgboost" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/labels_transfer/xgboost/xgboost" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/labels_transfer/xgboost/helper.py b/target/docker/labels_transfer/xgboost/helper.py new file mode 100644 index 00000000000..a90bf59efdb --- /dev/null +++ b/target/docker/labels_transfer/xgboost/helper.py @@ -0,0 +1,32 @@ +def check_arguments(par): + # check output .obs predictions + if not par["output_obs_predictions"]: + par["output_obs_predictions"] = [ t + "_pred" for t in par["reference_obs_targets"]] + assert len(par["output_obs_predictions"]) == len(par["reference_obs_targets"]), f"Number of output_obs_predictions must match number of reference_obs_targets\npar: {par}" + + # check output .obs uncertainty + if not par["output_obs_uncertainty"]: + par["output_obs_uncertainty"] = [ t + "_uncertainty" for t in par["reference_obs_targets"]] + assert len(par["output_obs_uncertainty"]) == len(par["reference_obs_targets"]), f"Number of output_obs_uncertainty must match number of reference_obs_targets\npar: {par}" + + return par + +def get_reference_features(adata_reference, par, logger): + if par["reference_obsm_features"] is None: + logger.info("Using .X of reference data") + train_data = adata_reference.X + else: + logger.info(f"Using .obsm[{par['reference_obsm_features']}] of reference data") + train_data = adata_reference.obsm[par["reference_obsm_features"]] + + return train_data + +def get_query_features(adata, par, logger): + if par["input_obsm_features"] is None: + logger.info("Using .X of query data") + query_data = adata.X + else: + logger.info(f"Using .obsm[{par['input_obsm_features']}] of query data") + query_data = adata.obsm[par["input_obsm_features"]] + + return query_data \ No newline at end of file diff --git a/target/docker/labels_transfer/xgboost/setup_logger.py b/target/docker/labels_transfer/xgboost/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/docker/labels_transfer/xgboost/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/docker/labels_transfer/xgboost/xgboost b/target/docker/labels_transfer/xgboost/xgboost new file mode 100755 index 00000000000..cf2ad39a785 --- /dev/null +++ b/target/docker/labels_transfer/xgboost/xgboost @@ -0,0 +1,2013 @@ +#!/usr/bin/env bash + +# xgboost 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Vladimir Shitov (author) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="xgboost" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "xgboost 0.12.4" + echo "" + echo "Performs label transfer from reference to query using XGBoost classifier" + echo "" + echo "Input dataset (query) arguments:" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " The query data to transfer the labels to. Should be a .h5mu file." + echo "" + echo " --modality" + echo " type: string" + echo " default: rna" + echo " Which modality to use." + echo "" + echo " --input_obsm_features" + echo " type: string" + echo " example: X_integrated_scanvi" + echo " The \`.obsm\` key of the embedding to use for the classifier's inference." + echo " If not provided, the \`.X\` slot will be used instead." + echo " Make sure that embedding was obtained in the same way as the reference" + echo " embedding (e.g. by the same model or preprocessing)." + echo "" + echo "Reference dataset arguments:" + echo " --reference" + echo " type: file, file must exist" + echo " example:" + echo "https:/zenodo.org/record/6337966/files/HLCA_emb_and_metadata.h5ad" + echo " The reference data to train classifiers on." + echo "" + echo " --reference_obsm_features" + echo " type: string, required parameter" + echo " default: X_integrated_scanvi" + echo " The \`.obsm\` key of the embedding to use for the classifier's training." + echo " Make sure that embedding was obtained in the same way as the query" + echo " embedding (e.g. by the same model or preprocessing)." + echo "" + echo " --reference_obs_targets" + echo " type: string, multiple values allowed" + echo " default:" + echo "ann_level_1,ann_level_2,ann_level_3,ann_level_4,ann_level_5,ann_finest_level" + echo " The \`.obs\` key of the target labels to tranfer." + echo "" + echo "Outputs:" + echo " --output" + echo " type: file, required parameter, output, file must exist" + echo " The query data in .h5mu format with predicted labels transfered from the" + echo " reference." + echo "" + echo " --output_obs_predictions" + echo " type: string, multiple values allowed" + echo " In which \`.obs\` slots to store the predicted information." + echo " If provided, must have the same length as \`--reference_obs_targets\`." + echo " If empty, will default to the \`reference_obs_targets\` combined with the" + echo " \`\"_pred\"\` suffix." + echo "" + echo " --output_obs_uncertainty" + echo " type: string, multiple values allowed" + echo " In which \`.obs\` slots to store the uncertainty of the predictions." + echo " If provided, must have the same length as \`--reference_obs_targets\`." + echo " If empty, will default to the \`reference_obs_targets\` combined with the" + echo " \`\"_uncertainty\"\` suffix." + echo "" + echo " --output_uns_parameters" + echo " type: string" + echo " default: labels_transfer" + echo " The \`.uns\` key to store additional information about the parameters used" + echo " for the label transfer." + echo "" + echo "Execution arguments:" + echo " -f, --force_retrain" + echo " type: boolean_true" + echo " Retrain models on the reference even if model_output directory already" + echo " has trained classifiers. WARNING! It will rewrite existing classifiers" + echo " for targets in the model_output directory!" + echo "" + echo " --use_gpu" + echo " type: boolean" + echo " default: false" + echo " Use GPU during models training and inference (recommended)." + echo "" + echo " -v, --verbosity" + echo " type: integer" + echo " default: 1" + echo " The verbosity level for evaluation of the classifier from the range" + echo " [0,2]" + echo "" + echo " --model_output" + echo " type: file, output, file must exist" + echo " default: model" + echo " Output directory for model" + echo "" + echo "Learning parameters:" + echo " --eta, --learning_rate" + echo " type: double" + echo " default: 0.3" + echo " Step size shrinkage used in update to prevents overfitting. Range:" + echo " [0,1]. See" + echo " " + echo "https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster" + echo " for the reference" + echo "" + echo " --gamma, --min_split_loss" + echo " type: double" + echo " default: 0.0" + echo " Minimum loss reduction required to make a further partition on a leaf" + echo " node of the tree. See" + echo " " + echo "https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster" + echo " for the reference" + echo "" + echo " -d, --max_depth" + echo " type: integer" + echo " default: 6" + echo " Maximum depth of a tree. See" + echo " " + echo "https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster" + echo " for the reference" + echo "" + echo " --min_child_weight" + echo " type: integer" + echo " default: 1" + echo " Minimum sum of instance weight (hessian) needed in a child. See" + echo " " + echo "https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster" + echo " for the reference" + echo "" + echo " --max_delta_step" + echo " type: double" + echo " default: 0.0" + echo " Maximum delta step we allow each leaf output to be. See" + echo " " + echo "https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster" + echo " for the reference" + echo "" + echo " --subsample" + echo " type: double" + echo " default: 1.0" + echo " Subsample ratio of the training instances. See" + echo " " + echo "https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster" + echo " for the reference" + echo "" + echo " --sampling_method" + echo " type: string" + echo " default: uniform" + echo " choices: [ uniform, gradient_based ]" + echo " The method to use to sample the training instances. See" + echo " " + echo "https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster" + echo " for the reference" + echo "" + echo " --colsample_bytree" + echo " type: double" + echo " default: 1.0" + echo " Fraction of columns to be subsampled. Range (0, 1]. See" + echo " " + echo "https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster" + echo " for the reference" + echo "" + echo " --colsample_bylevel" + echo " type: double" + echo " default: 1.0" + echo " Subsample ratio of columns for each level. Range (0, 1]. See" + echo " " + echo "https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster" + echo " for the reference" + echo "" + echo " --colsample_bynode" + echo " type: double" + echo " default: 1.0" + echo " Subsample ratio of columns for each node (split). Range (0, 1]. See" + echo " " + echo "https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster" + echo " for the reference" + echo "" + echo " --lambda, --reg_lambda" + echo " type: double" + echo " default: 1.0" + echo " L2 regularization term on weights. See" + echo " " + echo "https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster" + echo " for the reference" + echo "" + echo " --alpha, --reg_alpha" + echo " type: double" + echo " default: 0.0" + echo " L1 regularization term on weights. See" + echo " " + echo "https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster" + echo " for the reference" + echo "" + echo " --scale_pos_weight" + echo " type: double" + echo " default: 1.0" + echo " Control the balance of positive and negative weights, useful for" + echo " unbalanced classes. See" + echo " " + echo "https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster" + echo " for the reference" +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM python:3.10-slim + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y procps git && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y libopenblas-dev liblapack-dev gfortran && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "scanpy~=1.9.5" "xgboost~=1.7.1" "scikit-learn~=1.1.1" "numpy~=1.23.5" "pandas~=1.4.4" + +LABEL org.opencontainers.image.authors="Vladimir Shitov" +LABEL org.opencontainers.image.description="Companion container for running component labels_transfer xgboost" +LABEL org.opencontainers.image.created="2024-01-31T09:08:34Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-xgboost-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "xgboost 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --modality) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality=*) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --input_obsm_features) + [ -n "$VIASH_PAR_INPUT_OBSM_FEATURES" ] && ViashError Bad arguments for option \'--input_obsm_features\': \'$VIASH_PAR_INPUT_OBSM_FEATURES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_OBSM_FEATURES="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_obsm_features. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input_obsm_features=*) + [ -n "$VIASH_PAR_INPUT_OBSM_FEATURES" ] && ViashError Bad arguments for option \'--input_obsm_features=*\': \'$VIASH_PAR_INPUT_OBSM_FEATURES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_OBSM_FEATURES=$(ViashRemoveFlags "$1") + shift 1 + ;; + --reference) + [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reference=*) + [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference=*\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --reference_obsm_features) + [ -n "$VIASH_PAR_REFERENCE_OBSM_FEATURES" ] && ViashError Bad arguments for option \'--reference_obsm_features\': \'$VIASH_PAR_REFERENCE_OBSM_FEATURES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE_OBSM_FEATURES="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference_obsm_features. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reference_obsm_features=*) + [ -n "$VIASH_PAR_REFERENCE_OBSM_FEATURES" ] && ViashError Bad arguments for option \'--reference_obsm_features=*\': \'$VIASH_PAR_REFERENCE_OBSM_FEATURES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE_OBSM_FEATURES=$(ViashRemoveFlags "$1") + shift 1 + ;; + --reference_obs_targets) + if [ -z "$VIASH_PAR_REFERENCE_OBS_TARGETS" ]; then + VIASH_PAR_REFERENCE_OBS_TARGETS="$2" + else + VIASH_PAR_REFERENCE_OBS_TARGETS="$VIASH_PAR_REFERENCE_OBS_TARGETS,""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference_obs_targets. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reference_obs_targets=*) + if [ -z "$VIASH_PAR_REFERENCE_OBS_TARGETS" ]; then + VIASH_PAR_REFERENCE_OBS_TARGETS=$(ViashRemoveFlags "$1") + else + VIASH_PAR_REFERENCE_OBS_TARGETS="$VIASH_PAR_REFERENCE_OBS_TARGETS,"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_obs_predictions) + if [ -z "$VIASH_PAR_OUTPUT_OBS_PREDICTIONS" ]; then + VIASH_PAR_OUTPUT_OBS_PREDICTIONS="$2" + else + VIASH_PAR_OUTPUT_OBS_PREDICTIONS="$VIASH_PAR_OUTPUT_OBS_PREDICTIONS:""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_obs_predictions. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_obs_predictions=*) + if [ -z "$VIASH_PAR_OUTPUT_OBS_PREDICTIONS" ]; then + VIASH_PAR_OUTPUT_OBS_PREDICTIONS=$(ViashRemoveFlags "$1") + else + VIASH_PAR_OUTPUT_OBS_PREDICTIONS="$VIASH_PAR_OUTPUT_OBS_PREDICTIONS:"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --output_obs_uncertainty) + if [ -z "$VIASH_PAR_OUTPUT_OBS_UNCERTAINTY" ]; then + VIASH_PAR_OUTPUT_OBS_UNCERTAINTY="$2" + else + VIASH_PAR_OUTPUT_OBS_UNCERTAINTY="$VIASH_PAR_OUTPUT_OBS_UNCERTAINTY:""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_obs_uncertainty. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_obs_uncertainty=*) + if [ -z "$VIASH_PAR_OUTPUT_OBS_UNCERTAINTY" ]; then + VIASH_PAR_OUTPUT_OBS_UNCERTAINTY=$(ViashRemoveFlags "$1") + else + VIASH_PAR_OUTPUT_OBS_UNCERTAINTY="$VIASH_PAR_OUTPUT_OBS_UNCERTAINTY:"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --output_uns_parameters) + [ -n "$VIASH_PAR_OUTPUT_UNS_PARAMETERS" ] && ViashError Bad arguments for option \'--output_uns_parameters\': \'$VIASH_PAR_OUTPUT_UNS_PARAMETERS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_UNS_PARAMETERS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_uns_parameters. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_uns_parameters=*) + [ -n "$VIASH_PAR_OUTPUT_UNS_PARAMETERS" ] && ViashError Bad arguments for option \'--output_uns_parameters=*\': \'$VIASH_PAR_OUTPUT_UNS_PARAMETERS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_UNS_PARAMETERS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --force_retrain) + [ -n "$VIASH_PAR_FORCE_RETRAIN" ] && ViashError Bad arguments for option \'--force_retrain\': \'$VIASH_PAR_FORCE_RETRAIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FORCE_RETRAIN=true + shift 1 + ;; + -f) + [ -n "$VIASH_PAR_FORCE_RETRAIN" ] && ViashError Bad arguments for option \'-f\': \'$VIASH_PAR_FORCE_RETRAIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FORCE_RETRAIN=true + shift 1 + ;; + --use_gpu) + [ -n "$VIASH_PAR_USE_GPU" ] && ViashError Bad arguments for option \'--use_gpu\': \'$VIASH_PAR_USE_GPU\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_USE_GPU="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --use_gpu. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --use_gpu=*) + [ -n "$VIASH_PAR_USE_GPU" ] && ViashError Bad arguments for option \'--use_gpu=*\': \'$VIASH_PAR_USE_GPU\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_USE_GPU=$(ViashRemoveFlags "$1") + shift 1 + ;; + --verbosity) + [ -n "$VIASH_PAR_VERBOSITY" ] && ViashError Bad arguments for option \'--verbosity\': \'$VIASH_PAR_VERBOSITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_VERBOSITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --verbosity. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --verbosity=*) + [ -n "$VIASH_PAR_VERBOSITY" ] && ViashError Bad arguments for option \'--verbosity=*\': \'$VIASH_PAR_VERBOSITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_VERBOSITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + -v) + [ -n "$VIASH_PAR_VERBOSITY" ] && ViashError Bad arguments for option \'-v\': \'$VIASH_PAR_VERBOSITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_VERBOSITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -v. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --model_output) + [ -n "$VIASH_PAR_MODEL_OUTPUT" ] && ViashError Bad arguments for option \'--model_output\': \'$VIASH_PAR_MODEL_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODEL_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --model_output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --model_output=*) + [ -n "$VIASH_PAR_MODEL_OUTPUT" ] && ViashError Bad arguments for option \'--model_output=*\': \'$VIASH_PAR_MODEL_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODEL_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --learning_rate) + [ -n "$VIASH_PAR_LEARNING_RATE" ] && ViashError Bad arguments for option \'--learning_rate\': \'$VIASH_PAR_LEARNING_RATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LEARNING_RATE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --learning_rate. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --learning_rate=*) + [ -n "$VIASH_PAR_LEARNING_RATE" ] && ViashError Bad arguments for option \'--learning_rate=*\': \'$VIASH_PAR_LEARNING_RATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LEARNING_RATE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --eta) + [ -n "$VIASH_PAR_LEARNING_RATE" ] && ViashError Bad arguments for option \'--eta\': \'$VIASH_PAR_LEARNING_RATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LEARNING_RATE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --eta. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --min_split_loss) + [ -n "$VIASH_PAR_MIN_SPLIT_LOSS" ] && ViashError Bad arguments for option \'--min_split_loss\': \'$VIASH_PAR_MIN_SPLIT_LOSS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_SPLIT_LOSS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_split_loss. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --min_split_loss=*) + [ -n "$VIASH_PAR_MIN_SPLIT_LOSS" ] && ViashError Bad arguments for option \'--min_split_loss=*\': \'$VIASH_PAR_MIN_SPLIT_LOSS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_SPLIT_LOSS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --gamma) + [ -n "$VIASH_PAR_MIN_SPLIT_LOSS" ] && ViashError Bad arguments for option \'--gamma\': \'$VIASH_PAR_MIN_SPLIT_LOSS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_SPLIT_LOSS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --gamma. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --max_depth) + [ -n "$VIASH_PAR_MAX_DEPTH" ] && ViashError Bad arguments for option \'--max_depth\': \'$VIASH_PAR_MAX_DEPTH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MAX_DEPTH="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --max_depth. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --max_depth=*) + [ -n "$VIASH_PAR_MAX_DEPTH" ] && ViashError Bad arguments for option \'--max_depth=*\': \'$VIASH_PAR_MAX_DEPTH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MAX_DEPTH=$(ViashRemoveFlags "$1") + shift 1 + ;; + -d) + [ -n "$VIASH_PAR_MAX_DEPTH" ] && ViashError Bad arguments for option \'-d\': \'$VIASH_PAR_MAX_DEPTH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MAX_DEPTH="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -d. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --min_child_weight) + [ -n "$VIASH_PAR_MIN_CHILD_WEIGHT" ] && ViashError Bad arguments for option \'--min_child_weight\': \'$VIASH_PAR_MIN_CHILD_WEIGHT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_CHILD_WEIGHT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_child_weight. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --min_child_weight=*) + [ -n "$VIASH_PAR_MIN_CHILD_WEIGHT" ] && ViashError Bad arguments for option \'--min_child_weight=*\': \'$VIASH_PAR_MIN_CHILD_WEIGHT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_CHILD_WEIGHT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --max_delta_step) + [ -n "$VIASH_PAR_MAX_DELTA_STEP" ] && ViashError Bad arguments for option \'--max_delta_step\': \'$VIASH_PAR_MAX_DELTA_STEP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MAX_DELTA_STEP="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --max_delta_step. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --max_delta_step=*) + [ -n "$VIASH_PAR_MAX_DELTA_STEP" ] && ViashError Bad arguments for option \'--max_delta_step=*\': \'$VIASH_PAR_MAX_DELTA_STEP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MAX_DELTA_STEP=$(ViashRemoveFlags "$1") + shift 1 + ;; + --subsample) + [ -n "$VIASH_PAR_SUBSAMPLE" ] && ViashError Bad arguments for option \'--subsample\': \'$VIASH_PAR_SUBSAMPLE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SUBSAMPLE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --subsample. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --subsample=*) + [ -n "$VIASH_PAR_SUBSAMPLE" ] && ViashError Bad arguments for option \'--subsample=*\': \'$VIASH_PAR_SUBSAMPLE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SUBSAMPLE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --sampling_method) + [ -n "$VIASH_PAR_SAMPLING_METHOD" ] && ViashError Bad arguments for option \'--sampling_method\': \'$VIASH_PAR_SAMPLING_METHOD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SAMPLING_METHOD="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sampling_method. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sampling_method=*) + [ -n "$VIASH_PAR_SAMPLING_METHOD" ] && ViashError Bad arguments for option \'--sampling_method=*\': \'$VIASH_PAR_SAMPLING_METHOD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SAMPLING_METHOD=$(ViashRemoveFlags "$1") + shift 1 + ;; + --colsample_bytree) + [ -n "$VIASH_PAR_COLSAMPLE_BYTREE" ] && ViashError Bad arguments for option \'--colsample_bytree\': \'$VIASH_PAR_COLSAMPLE_BYTREE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COLSAMPLE_BYTREE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --colsample_bytree. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --colsample_bytree=*) + [ -n "$VIASH_PAR_COLSAMPLE_BYTREE" ] && ViashError Bad arguments for option \'--colsample_bytree=*\': \'$VIASH_PAR_COLSAMPLE_BYTREE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COLSAMPLE_BYTREE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --colsample_bylevel) + [ -n "$VIASH_PAR_COLSAMPLE_BYLEVEL" ] && ViashError Bad arguments for option \'--colsample_bylevel\': \'$VIASH_PAR_COLSAMPLE_BYLEVEL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COLSAMPLE_BYLEVEL="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --colsample_bylevel. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --colsample_bylevel=*) + [ -n "$VIASH_PAR_COLSAMPLE_BYLEVEL" ] && ViashError Bad arguments for option \'--colsample_bylevel=*\': \'$VIASH_PAR_COLSAMPLE_BYLEVEL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COLSAMPLE_BYLEVEL=$(ViashRemoveFlags "$1") + shift 1 + ;; + --colsample_bynode) + [ -n "$VIASH_PAR_COLSAMPLE_BYNODE" ] && ViashError Bad arguments for option \'--colsample_bynode\': \'$VIASH_PAR_COLSAMPLE_BYNODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COLSAMPLE_BYNODE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --colsample_bynode. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --colsample_bynode=*) + [ -n "$VIASH_PAR_COLSAMPLE_BYNODE" ] && ViashError Bad arguments for option \'--colsample_bynode=*\': \'$VIASH_PAR_COLSAMPLE_BYNODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COLSAMPLE_BYNODE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --reg_lambda) + [ -n "$VIASH_PAR_REG_LAMBDA" ] && ViashError Bad arguments for option \'--reg_lambda\': \'$VIASH_PAR_REG_LAMBDA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REG_LAMBDA="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --reg_lambda. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reg_lambda=*) + [ -n "$VIASH_PAR_REG_LAMBDA" ] && ViashError Bad arguments for option \'--reg_lambda=*\': \'$VIASH_PAR_REG_LAMBDA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REG_LAMBDA=$(ViashRemoveFlags "$1") + shift 1 + ;; + --lambda) + [ -n "$VIASH_PAR_REG_LAMBDA" ] && ViashError Bad arguments for option \'--lambda\': \'$VIASH_PAR_REG_LAMBDA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REG_LAMBDA="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --lambda. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reg_alpha) + [ -n "$VIASH_PAR_REG_ALPHA" ] && ViashError Bad arguments for option \'--reg_alpha\': \'$VIASH_PAR_REG_ALPHA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REG_ALPHA="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --reg_alpha. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reg_alpha=*) + [ -n "$VIASH_PAR_REG_ALPHA" ] && ViashError Bad arguments for option \'--reg_alpha=*\': \'$VIASH_PAR_REG_ALPHA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REG_ALPHA=$(ViashRemoveFlags "$1") + shift 1 + ;; + --alpha) + [ -n "$VIASH_PAR_REG_ALPHA" ] && ViashError Bad arguments for option \'--alpha\': \'$VIASH_PAR_REG_ALPHA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REG_ALPHA="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --alpha. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --scale_pos_weight) + [ -n "$VIASH_PAR_SCALE_POS_WEIGHT" ] && ViashError Bad arguments for option \'--scale_pos_weight\': \'$VIASH_PAR_SCALE_POS_WEIGHT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCALE_POS_WEIGHT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --scale_pos_weight. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --scale_pos_weight=*) + [ -n "$VIASH_PAR_SCALE_POS_WEIGHT" ] && ViashError Bad arguments for option \'--scale_pos_weight=*\': \'$VIASH_PAR_SCALE_POS_WEIGHT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCALE_POS_WEIGHT=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/labels_transfer_xgboost:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/labels_transfer_xgboost:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/labels_transfer_xgboost:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/labels_transfer_xgboost:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_REFERENCE_OBSM_FEATURES+x} ]; then + ViashError '--reference_obsm_features' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_MODALITY+x} ]; then + VIASH_PAR_MODALITY="rna" +fi +if [ -z ${VIASH_PAR_REFERENCE_OBS_TARGETS+x} ]; then + VIASH_PAR_REFERENCE_OBS_TARGETS="ann_level_1,ann_level_2,ann_level_3,ann_level_4,ann_level_5,ann_finest_level" +fi +if [ -z ${VIASH_PAR_OUTPUT_UNS_PARAMETERS+x} ]; then + VIASH_PAR_OUTPUT_UNS_PARAMETERS="labels_transfer" +fi +if [ -z ${VIASH_PAR_FORCE_RETRAIN+x} ]; then + VIASH_PAR_FORCE_RETRAIN="false" +fi +if [ -z ${VIASH_PAR_USE_GPU+x} ]; then + VIASH_PAR_USE_GPU="false" +fi +if [ -z ${VIASH_PAR_VERBOSITY+x} ]; then + VIASH_PAR_VERBOSITY="1" +fi +if [ -z ${VIASH_PAR_MODEL_OUTPUT+x} ]; then + VIASH_PAR_MODEL_OUTPUT="model" +fi +if [ -z ${VIASH_PAR_LEARNING_RATE+x} ]; then + VIASH_PAR_LEARNING_RATE="0.3" +fi +if [ -z ${VIASH_PAR_MIN_SPLIT_LOSS+x} ]; then + VIASH_PAR_MIN_SPLIT_LOSS="0.0" +fi +if [ -z ${VIASH_PAR_MAX_DEPTH+x} ]; then + VIASH_PAR_MAX_DEPTH="6" +fi +if [ -z ${VIASH_PAR_MIN_CHILD_WEIGHT+x} ]; then + VIASH_PAR_MIN_CHILD_WEIGHT="1" +fi +if [ -z ${VIASH_PAR_MAX_DELTA_STEP+x} ]; then + VIASH_PAR_MAX_DELTA_STEP="0.0" +fi +if [ -z ${VIASH_PAR_SUBSAMPLE+x} ]; then + VIASH_PAR_SUBSAMPLE="1.0" +fi +if [ -z ${VIASH_PAR_SAMPLING_METHOD+x} ]; then + VIASH_PAR_SAMPLING_METHOD="uniform" +fi +if [ -z ${VIASH_PAR_COLSAMPLE_BYTREE+x} ]; then + VIASH_PAR_COLSAMPLE_BYTREE="1.0" +fi +if [ -z ${VIASH_PAR_COLSAMPLE_BYLEVEL+x} ]; then + VIASH_PAR_COLSAMPLE_BYLEVEL="1.0" +fi +if [ -z ${VIASH_PAR_COLSAMPLE_BYNODE+x} ]; then + VIASH_PAR_COLSAMPLE_BYNODE="1.0" +fi +if [ -z ${VIASH_PAR_REG_LAMBDA+x} ]; then + VIASH_PAR_REG_LAMBDA="1.0" +fi +if [ -z ${VIASH_PAR_REG_ALPHA+x} ]; then + VIASH_PAR_REG_ALPHA="0.0" +fi +if [ -z ${VIASH_PAR_SCALE_POS_WEIGHT+x} ]; then + VIASH_PAR_SCALE_POS_WEIGHT="1.0" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_REFERENCE" ] && [ ! -e "$VIASH_PAR_REFERENCE" ]; then + ViashError "Input file '$VIASH_PAR_REFERENCE' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_FORCE_RETRAIN" ]]; then + if ! [[ "$VIASH_PAR_FORCE_RETRAIN" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--force_retrain' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_USE_GPU" ]]; then + if ! [[ "$VIASH_PAR_USE_GPU" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--use_gpu' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_VERBOSITY" ]]; then + if ! [[ "$VIASH_PAR_VERBOSITY" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--verbosity' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_LEARNING_RATE" ]]; then + if ! [[ "$VIASH_PAR_LEARNING_RATE" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--learning_rate' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MIN_SPLIT_LOSS" ]]; then + if ! [[ "$VIASH_PAR_MIN_SPLIT_LOSS" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--min_split_loss' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MAX_DEPTH" ]]; then + if ! [[ "$VIASH_PAR_MAX_DEPTH" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--max_depth' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MIN_CHILD_WEIGHT" ]]; then + if ! [[ "$VIASH_PAR_MIN_CHILD_WEIGHT" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--min_child_weight' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MAX_DELTA_STEP" ]]; then + if ! [[ "$VIASH_PAR_MAX_DELTA_STEP" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--max_delta_step' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SUBSAMPLE" ]]; then + if ! [[ "$VIASH_PAR_SUBSAMPLE" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--subsample' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_COLSAMPLE_BYTREE" ]]; then + if ! [[ "$VIASH_PAR_COLSAMPLE_BYTREE" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--colsample_bytree' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_COLSAMPLE_BYLEVEL" ]]; then + if ! [[ "$VIASH_PAR_COLSAMPLE_BYLEVEL" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--colsample_bylevel' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_COLSAMPLE_BYNODE" ]]; then + if ! [[ "$VIASH_PAR_COLSAMPLE_BYNODE" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--colsample_bynode' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_REG_LAMBDA" ]]; then + if ! [[ "$VIASH_PAR_REG_LAMBDA" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--reg_lambda' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_REG_ALPHA" ]]; then + if ! [[ "$VIASH_PAR_REG_ALPHA" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--reg_alpha' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SCALE_POS_WEIGHT" ]]; then + if ! [[ "$VIASH_PAR_SCALE_POS_WEIGHT" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--scale_pos_weight' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_SAMPLING_METHOD" ]; then + VIASH_PAR_SAMPLING_METHOD_CHOICES=("uniform:gradient_based") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_SAMPLING_METHOD_CHOICES[*]}:" =~ ":$VIASH_PAR_SAMPLING_METHOD:" ]]; then + ViashError '--sampling_method' specified value of \'$VIASH_PAR_SAMPLING_METHOD\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi +if [ ! -z "$VIASH_PAR_MODEL_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_MODEL_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_MODEL_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_REFERENCE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_REFERENCE")" ) + VIASH_PAR_REFERENCE=$(ViashAutodetectMount "$VIASH_PAR_REFERENCE") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_PAR_MODEL_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_MODEL_OUTPUT")" ) + VIASH_PAR_MODEL_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_MODEL_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_MODEL_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/labels_transfer_xgboost:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/labels_transfer_xgboost:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/labels_transfer_xgboost:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-xgboost-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +import sys +import json +import os +from typing import Optional +import yaml +from pathlib import Path + +import mudata +import numpy as np +import scanpy as sc +import pandas as pd +import xgboost as xgb +from sklearn.model_selection import train_test_split +from sklearn.metrics import classification_report +from sklearn.preprocessing import LabelEncoder + + +### VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'input_obsm_features': $( if [ ! -z ${VIASH_PAR_INPUT_OBSM_FEATURES+x} ]; then echo "r'${VIASH_PAR_INPUT_OBSM_FEATURES//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'reference': $( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "r'${VIASH_PAR_REFERENCE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'reference_obsm_features': $( if [ ! -z ${VIASH_PAR_REFERENCE_OBSM_FEATURES+x} ]; then echo "r'${VIASH_PAR_REFERENCE_OBSM_FEATURES//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'reference_obs_targets': $( if [ ! -z ${VIASH_PAR_REFERENCE_OBS_TARGETS+x} ]; then echo "r'${VIASH_PAR_REFERENCE_OBS_TARGETS//\'/\'\"\'\"r\'}'.split(',')"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_obs_predictions': $( if [ ! -z ${VIASH_PAR_OUTPUT_OBS_PREDICTIONS+x} ]; then echo "r'${VIASH_PAR_OUTPUT_OBS_PREDICTIONS//\'/\'\"\'\"r\'}'.split(':')"; else echo None; fi ), + 'output_obs_uncertainty': $( if [ ! -z ${VIASH_PAR_OUTPUT_OBS_UNCERTAINTY+x} ]; then echo "r'${VIASH_PAR_OUTPUT_OBS_UNCERTAINTY//\'/\'\"\'\"r\'}'.split(':')"; else echo None; fi ), + 'output_uns_parameters': $( if [ ! -z ${VIASH_PAR_OUTPUT_UNS_PARAMETERS+x} ]; then echo "r'${VIASH_PAR_OUTPUT_UNS_PARAMETERS//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'force_retrain': $( if [ ! -z ${VIASH_PAR_FORCE_RETRAIN+x} ]; then echo "r'${VIASH_PAR_FORCE_RETRAIN//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), + 'use_gpu': $( if [ ! -z ${VIASH_PAR_USE_GPU+x} ]; then echo "r'${VIASH_PAR_USE_GPU//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), + 'verbosity': $( if [ ! -z ${VIASH_PAR_VERBOSITY+x} ]; then echo "int(r'${VIASH_PAR_VERBOSITY//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'model_output': $( if [ ! -z ${VIASH_PAR_MODEL_OUTPUT+x} ]; then echo "r'${VIASH_PAR_MODEL_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'learning_rate': $( if [ ! -z ${VIASH_PAR_LEARNING_RATE+x} ]; then echo "float(r'${VIASH_PAR_LEARNING_RATE//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'min_split_loss': $( if [ ! -z ${VIASH_PAR_MIN_SPLIT_LOSS+x} ]; then echo "float(r'${VIASH_PAR_MIN_SPLIT_LOSS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'max_depth': $( if [ ! -z ${VIASH_PAR_MAX_DEPTH+x} ]; then echo "int(r'${VIASH_PAR_MAX_DEPTH//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'min_child_weight': $( if [ ! -z ${VIASH_PAR_MIN_CHILD_WEIGHT+x} ]; then echo "int(r'${VIASH_PAR_MIN_CHILD_WEIGHT//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'max_delta_step': $( if [ ! -z ${VIASH_PAR_MAX_DELTA_STEP+x} ]; then echo "float(r'${VIASH_PAR_MAX_DELTA_STEP//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'subsample': $( if [ ! -z ${VIASH_PAR_SUBSAMPLE+x} ]; then echo "float(r'${VIASH_PAR_SUBSAMPLE//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'sampling_method': $( if [ ! -z ${VIASH_PAR_SAMPLING_METHOD+x} ]; then echo "r'${VIASH_PAR_SAMPLING_METHOD//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'colsample_bytree': $( if [ ! -z ${VIASH_PAR_COLSAMPLE_BYTREE+x} ]; then echo "float(r'${VIASH_PAR_COLSAMPLE_BYTREE//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'colsample_bylevel': $( if [ ! -z ${VIASH_PAR_COLSAMPLE_BYLEVEL+x} ]; then echo "float(r'${VIASH_PAR_COLSAMPLE_BYLEVEL//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'colsample_bynode': $( if [ ! -z ${VIASH_PAR_COLSAMPLE_BYNODE+x} ]; then echo "float(r'${VIASH_PAR_COLSAMPLE_BYNODE//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'reg_lambda': $( if [ ! -z ${VIASH_PAR_REG_LAMBDA+x} ]; then echo "float(r'${VIASH_PAR_REG_LAMBDA//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'reg_alpha': $( if [ ! -z ${VIASH_PAR_REG_ALPHA+x} ]; then echo "float(r'${VIASH_PAR_REG_ALPHA//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'scale_pos_weight': $( if [ ! -z ${VIASH_PAR_SCALE_POS_WEIGHT+x} ]; then echo "float(r'${VIASH_PAR_SCALE_POS_WEIGHT//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +### VIASH END + +sys.path.append(meta["resources_dir"]) +from helper import check_arguments, get_reference_features, get_query_features +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +# read config arguments +config = yaml.safe_load(Path(meta["config"]).read_text()) + +# look for training params for method +argument_groups = { grp["name"]: grp["arguments"] for grp in config["functionality"]["argument_groups"] } +training_arg_names = [ arg["name"].replace("--", "") for arg in argument_groups["Learning parameters"] ] +training_params = { arg_name: par[arg_name] for arg_name in training_arg_names } + +def encode_labels(y): + labels_encoder = LabelEncoder() + labels_encoder.fit(y) + + return labels_encoder.transform(y), labels_encoder + + +def get_model_eval(xgb_model, X_test, y_test, labels_encoder): + preds = xgb_model.predict(X_test) + + cr = classification_report(labels_encoder.inverse_transform(y_test), + labels_encoder.inverse_transform(preds), + output_dict=True) + cr_df = pd.DataFrame(cr).transpose() + + return cr_df + + +def train_test_split_adata(adata, labels): + train_data = pd.DataFrame(data=adata.X, index=adata.obs_names) + + X_train, X_test, y_train, y_test = train_test_split( + train_data, labels, test_size=0.2, random_state=42, stratify=labels) + + return X_train, X_test, y_train, y_test + + +def train_xgb_model(X_train, y_train, gpu=True) -> xgb.XGBClassifier: + n_classes = len(np.unique(y_train)) + objective = "binary:logistic" if n_classes == 2 else "multi:softprob" + + tree_method = "gpu_hist" if gpu else "hist" + xgbc = xgb.XGBClassifier(tree_method=tree_method, objective=objective, **training_params) + xgbc.fit(X_train, y_train) + + return xgbc + + +def build_classifier(X, y, labels_encoder, label_key, eval_verbosity: Optional[int] = 1, gpu=True) -> xgb.XGBClassifier: + # Adata prep + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y) + #Note: Do we need a new train-test split for each classifier? + + # Model training + xgb_model = train_xgb_model(X_train, y_train, gpu=gpu) + + # Model eval + if eval_verbosity != 0: + cr_df = get_model_eval(xgb_model, X_test, y_test, labels_encoder) + + if eval_verbosity == 2: + print(cr_df) + + else: + overall_accuracy = cr_df["support"]["accuracy"] + low_prec_key = cr_df.precision.idxmin() + low_prec_val = cr_df.precision.min() + low_rec_key = cr_df.recall.idxmin() + low_rec_val = cr_df.recall.min() + low_f1_key = cr_df["f1-score"].idxmin() + low_f1_val = cr_df["f1-score"].min() + + print("") + print(f"Summary stats for {label_key} model:") + print(f"Overall accuracy: {overall_accuracy}") + print(f"Min. precision: {low_prec_key}: {low_prec_val}") + print(f"Min. Recall: {low_rec_key}: {low_rec_val}") + print(f"Min. F1-score: {low_f1_key}: {low_f1_val}") + print("") + + return xgb_model + + +def build_ref_classifiers(adata_reference, targets, model_path, + eval_verbosity: Optional[int] = 1, gpu: Optional[bool] = True) -> None: + """ + This function builds xgboost classifiers on a reference embedding for a designated number of + adata_reference.obs columns. Classifier .xgb files and a model_info.json file is written to the \`model_path\` + directory. Model evaluation is printed to stdout. + + Inputs: + * \`adata_reference\`: The AnnData object that was used to train the reference model + * \`model_path\`: The reference model directory where the classifiers will also be stored + * \`eval_verbosity\`: The verbosity level for evaluation of the classifier from the range [0;2]. + * \`gpu\`: Boolean indicating whether a gpu is available for classifier training + + + Example: + \`\`\` + >>> adata + AnnData object with n_obs x n_vars = 700 x 765 + obs: "ann_finest_level", "ann_level_1" + + >>> os.listdir("/path/to/model") + model_params.pt* + + >>> build_ref_classifiers(adata, "path/to/model", eval_verbosity=1, gpu=True) + >>> os.listdir("/path/to/model") + classifier_ann_finest_level.xgb* model_info.json* + classifier_ann_level_1.xgb* model_params.pt* + \`\`\` + """ + + # Check inputs + if not isinstance(eval_verbosity, int): + raise TypeError("\`eval_verbosity\` should be an integer between 0 and 2.") + + if eval_verbosity < 0 or eval_verbosity > 2: + raise ValueError("\`eval_verbosity\` should be an integer between 0 and 2.") + + train_data = get_reference_features(adata_reference, par, logger) + + if not os.path.exists(model_path): + os.makedirs(model_path, exist_ok=True) + + # Map from name of classifier to file names + classifiers = dict() + + for label, obs_pred in zip(targets, par["output_obs_predictions"]): + if label not in adata_reference.obs: + raise ValueError(f"{label} is not in the \`adata\` object passed!") + + filename = "classifier_" + label + ".xgb" + + labels, labels_encoder = encode_labels(adata_reference.obs[label]) + logger.info(f"Classes: {labels_encoder.classes_}") + + logger.info(f"Building classifier for {label}...") + xgb_model = build_classifier( + X=train_data, + y=labels, + labels_encoder=labels_encoder, + label_key=label, + eval_verbosity=eval_verbosity, + gpu=gpu + ) + + # Save classifier + logger.info("Saving model") + xgb_model.save_model(os.path.join(model_path, filename)) + + # Store classifier info + classifiers[label] = { + "filename": filename, + "labels": labels_encoder.classes_.tolist(), + "obs_column": obs_pred, + "model_params": training_params, + } + + # Store model_info.json file + model_info = { + "classifier_info": classifiers + } + + logger.info("Writing model_info to the file") + # Read previous file if it exists + if os.path.exists(model_path + "/model_info.json"): + logger.info("Old model_info file found, updating") + with open(model_path + "/model_info.json", "r") as f: + old_model_info = json.loads(f.read()) + + for key in old_model_info: + if key in model_info: + old_model_info[key].update(model_info[key]) + json_string = json.dumps(old_model_info, indent=4) + + else: + logger.info("Creating a new file") + json_string = json.dumps(model_info, indent=4) + + with open(model_path + "/model_info.json", "w") as f: + f.write(json_string) + + +def project_labels( + query_dataset, + cell_type_classifier_model: xgb.XGBClassifier, + annotation_column_name='label_pred', + uncertainty_column_name='label_uncertainty', + uncertainty_thresh=None # Note: currently not passed to predict function +): + """ + A function that projects predicted labels onto the query dataset, along with uncertainty scores. + Performs in-place update of the adata object, adding columns to the \`obs\` DataFrame. + + Input: + * \`query_dataset\`: The query \`AnnData\` object + * \`model_file\`: Path to the classification model file + * \`prediction_key\`: Column name in \`adata.obs\` where to store the predicted labels + * \`uncertainty_key\`: Column name in \`adata.obs\` where to store the uncertainty scores + * \`uncertainty_thresh\`: The uncertainty threshold above which we call a cell 'Unknown' + + Output: + Nothing is output, the passed anndata is modified inplace + + """ + + if (uncertainty_thresh is not None) and (uncertainty_thresh < 0 or uncertainty_thresh > 1): + raise ValueError(f'\`uncertainty_thresh\` must be \`None\` or between 0 and 1.') + + query_data = get_query_features(query_dataset, par, logger) + + # Predict labels and probabilities + query_dataset.obs[annotation_column_name] = cell_type_classifier_model.predict(query_data) + + logger.info("Predicting probabilities") + probs = cell_type_classifier_model.predict_proba(query_data) + + # Format probabilities + df_probs = pd.DataFrame(probs, columns=cell_type_classifier_model.classes_, index=query_dataset.obs_names) + query_dataset.obs[uncertainty_column_name] = 1 - df_probs.max(1) + + # Note: this is here in case we want to propose a set of values for the user to accept to seed the + # manual curation of predicted labels + if uncertainty_thresh is not None: + logger.info("Marking uncertain predictions") + query_dataset.obs[annotation_column_name + "_filtered"] = [ + val if query_dataset.obs[uncertainty_column_name][i] < uncertainty_thresh + else "Unknown" for i, val in enumerate(query_dataset.obs[annotation_column_name])] + + return query_dataset + + +def predict( + query_dataset, + cell_type_classifier_model_path, + annotation_column_name: str, + prediction_column_name: str, + uncertainty_column_name: str, + models_info, + use_gpu: bool = False +) -> pd.DataFrame: + """ + Returns \`obs\` DataFrame with prediction columns appended + """ + + tree_method = "gpu_hist" if use_gpu else "hist" + + labels = models_info["classifier_info"][annotation_column_name]["labels"] + + objective = "binary:logistic" if len(labels) == 2 else "multi:softprob" + cell_type_classifier_model = xgb.XGBClassifier(tree_method=tree_method, objective=objective) + + logger.info("Loading model") + cell_type_classifier_model.load_model(fname=cell_type_classifier_model_path) + + logger.info("Predicting labels") + project_labels(query_dataset, + cell_type_classifier_model, + annotation_column_name=prediction_column_name, + uncertainty_column_name=uncertainty_column_name) + + logger.info("Converting labels from numbers to classes") + labels_encoder = LabelEncoder() + labels_encoder.classes_ = np.array(labels) + query_dataset.obs[prediction_column_name] = labels_encoder.inverse_transform(query_dataset.obs[prediction_column_name]) + + return query_dataset + + +def main(par): + logger.info("Checking arguments") + par = check_arguments(par) + + mdata = mudata.read(par["input"].strip()) + adata = mdata.mod[par["modality"]] + + adata_reference = sc.read(par["reference"], backup_url=par["reference"]) + + # If classifiers for targets are in the model_output directory, simply open them and run (unless \`retrain\` != True) + # If some classifiers are missing, train and save them first + # Predict and save the query data + + targets_to_train = [] + + for obs_target in par["reference_obs_targets"]: + if not os.path.exists(par["model_output"]) or f"classifier_{obs_target}.xgb" not in os.listdir(par["model_output"]) or par["force_retrain"]: + logger.info(f"Classifier for {obs_target} added to a training schedule") + targets_to_train.append(obs_target) + else: + logger.info(f"Found classifier for {obs_target}, no retraining required") + + build_ref_classifiers(adata_reference, targets_to_train, model_path=par["model_output"], + gpu=par["use_gpu"], eval_verbosity=par["verbosity"]) + + output_uns_parameters = adata.uns.get(par["output_uns_parameters"], {}) + + with open(par["model_output"] + "/model_info.json", "r") as f: + models_info = json.loads(f.read()) + + for obs_target, obs_pred, obs_unc in zip(par["reference_obs_targets"], par["output_obs_predictions"], par["output_obs_uncertainty"]): + logger.info(f"Predicting {obs_target}") + + adata = predict(query_dataset=adata, + cell_type_classifier_model_path=os.path.join(par["model_output"], "classifier_" + obs_target + ".xgb"), + annotation_column_name=obs_target, + prediction_column_name=obs_pred, + uncertainty_column_name=obs_unc, + models_info=models_info, + use_gpu=par["use_gpu"]) + + if obs_target in targets_to_train: + # Save information about the transfer to .uns + output_uns_parameters[obs_target] = { + "method": "XGBClassifier", + **training_params + } + + adata.uns[par["output_uns_parameters"]] = output_uns_parameters + + logger.info("Updating mdata") + mdata.mod[par['modality']] = adata + mdata.update() + + logger.info("Writing output") + mdata.write_h5mu(par['output'].strip()) + +if __name__ == "__main__": + main(par) +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_REFERENCE" ]; then + VIASH_PAR_REFERENCE=$(ViashStripAutomount "$VIASH_PAR_REFERENCE") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_PAR_MODEL_OUTPUT" ]; then + VIASH_PAR_MODEL_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_MODEL_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_MODEL_OUTPUT" ] && [ ! -e "$VIASH_PAR_MODEL_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_MODEL_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/mapping/bd_rhapsody/.config.vsh.yaml b/target/docker/mapping/bd_rhapsody/.config.vsh.yaml new file mode 100644 index 00000000000..6ad28624ee3 --- /dev/null +++ b/target/docker/mapping/bd_rhapsody/.config.vsh.yaml @@ -0,0 +1,417 @@ +functionality: + name: "bd_rhapsody" + namespace: "mapping" + version: "0.12.4" + authors: + - name: "Robrecht Cannoodt" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + argument_groups: + - name: "Inputs" + arguments: + - type: "string" + name: "--mode" + description: "Whether to run a whole transcriptome analysis (WTA) or a targeted\ + \ analysis." + info: null + example: + - "wta" + required: true + choices: + - "wta" + - "targeted" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Path to your read files in the FASTQ.GZ format. You may specify\ + \ as many R1/R2 read pairs as you want." + info: null + example: + - "input.fastq.gz" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "file" + name: "--reference" + alternatives: + - "-r" + - "--reference_genome" + description: "Refence to map to. For `--mode wta`, this is the path to STAR\ + \ index as a tar.gz file. For `--mode targeted`, this is the path to mRNA\ + \ reference file for pre-designed, supplemental, or custom panel, in FASTA\ + \ format" + info: null + example: + - "reference_genome.tar.gz|reference.fasta" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "file" + name: "--transcriptome_annotation" + alternatives: + - "-t" + description: "Path to GTF annotation file (only for `--mode wta`)." + info: null + example: + - "transcriptome.gtf" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--abseq_reference" + alternatives: + - "-a" + description: "Path to the AbSeq reference file in FASTA format. Only needed\ + \ if BD AbSeq Ab-Oligos are used." + info: null + example: + - "abseq_reference.fasta" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "file" + name: "--supplemental_reference" + alternatives: + - "-s" + description: "Path to the supplemental reference file in FASTA format. Only\ + \ needed if there are additional transgene sequences used in the experiment\ + \ (only for `--mode wta`)." + info: null + example: + - "supplemental_reference.fasta" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--sample_prefix" + description: "Specify a run name to use as the output file base name. Use only\ + \ letters, numbers, or hyphens. Do not use special characters or spaces." + info: null + default: + - "sample" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Outputs" + arguments: + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output folder. Output still needs to be processed further." + info: null + example: + - "output_dir" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Putative cell calling settings" + arguments: + - type: "string" + name: "--putative_cell_call" + description: "Specify the dataset to be used for putative cell calling. For\ + \ putative cell calling using an AbSeq dataset, please provide an AbSeq_Reference\ + \ fasta file above." + info: null + example: + - "mRNA" + required: false + choices: + - "mRNA" + - "AbSeq_Experimental" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--exact_cell_count" + description: "Exact cell count - Set a specific number (>=1) of cells as putative,\ + \ based on those with the highest error-corrected read count" + info: null + example: + - 10000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--disable_putative_calling" + description: "Disable Refined Putative Cell Calling - Determine putative cells\ + \ using only the basic algorithm (minimum second derivative along the cumulative\ + \ reads curve). The refined algorithm attempts to remove false positives and\ + \ recover false negatives, but may not be ideal for certain complex mixtures\ + \ of cell types. Does not apply if Exact Cell Count is set." + info: null + direction: "input" + dest: "par" + - name: "Subsample arguments" + arguments: + - type: "double" + name: "--subsample" + description: "A number >1 or fraction (0 < n < 1) to indicate the number or\ + \ percentage of reads to subsample." + info: null + example: + - 0.01 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--subsample_seed" + description: "A seed for replicating a previous subsampled run." + info: null + example: + - 3445 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Multiplex arguments" + arguments: + - type: "string" + name: "--sample_tags_version" + description: "Specify if multiplexed run." + info: null + example: + - "human" + required: false + choices: + - "human" + - "hs" + - "mouse" + - "mm" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--tag_names" + description: "Tag_Names (optional) - Specify the tag number followed by '-'\ + \ and the desired sample name to appear in Sample_Tag_Metrics.csv.\nDo not\ + \ use the special characters: &, (), [], {}, <>, ?, |\n" + info: null + example: + - "4-mySample" + - "9-myOtherSample" + - "6-alsoThisSample" + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - name: "VDJ arguments" + arguments: + - type: "string" + name: "--vdj_version" + description: "Specify if VDJ run." + info: null + example: + - "human" + required: false + choices: + - "human" + - "mouse" + - "humanBCR" + - "humanBCR" + - "humanTCR" + - "mouseBCR" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "CWL-runner arguments" + arguments: + - type: "boolean" + name: "--parallel" + description: "Run jobs in parallel." + info: null + default: + - true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--timestamps" + description: "Add timestamps to the errors, warnings, and notifications." + info: null + direction: "input" + dest: "par" + - type: "boolean_true" + name: "--dryrun" + description: "If true, the output directory will only contain the CWL input\ + \ files, but the pipeline itself will not be executed." + info: null + direction: "input" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "rhapsody_wta_1.10.1_nodocker.cwl" + - type: "file" + path: "rhapsody_targeted_1.10.1_nodocker.cwl" + - type: "file" + path: "src/utils/setup_logger.py" + description: "A wrapper for the BD Rhapsody Analysis CWL v1.10.1 pipeline.\n\nThe\ + \ CWL pipeline file is obtained by cloning 'https://bitbucket.org/CRSwDev/cwl/src/master/'\ + \ and removing all objects with class 'DockerRequirement' from the YML.\n\nThis\ + \ pipeline can be used for a targeted analysis (with `--mode targeted`) or for\ + \ a whole transcriptome analysis (with `--mode wta`).\n\n* If mode is `\"targeted\"\ + `, then either the `--reference` or `--abseq_reference` parameters must be defined.\n\ + * If mode is `\"wta\"`, then `--reference` and `--transcriptome_annotation` must\ + \ be defined, `--abseq_reference` and `--supplemental_reference` is optional.\n\ + \nThe reference_genome and transcriptome_annotation files can be generated with\ + \ the make_reference pipeline.\nAlternatively, BD also provides standard references\ + \ which can be downloaded from these locations:\n\n - Human: http://bd-rhapsody-public.s3-website-us-east-1.amazonaws.com/Rhapsody-WTA/GRCh38-PhiX-gencodev29/\n\ + \ - Mouse: http://bd-rhapsody-public.s3-website-us-east-1.amazonaws.com/Rhapsody-WTA/GRCm38-PhiX-gencodevM19/\n" + test_resources: + - type: "bash_script" + path: "test_memory.sh" + is_executable: true + - type: "bash_script" + path: "test_wta.sh" + is_executable: true + - type: "bash_script" + path: "test_targeted.sh" + is_executable: true + - type: "file" + path: "resources_test/bdrhap_vdj" + - type: "file" + path: "resources_test/bdrhap_5kjrt" + - type: "file" + path: "resources_test/reference_gencodev41_chr1/" + info: + name: "BD Rhapsody" + short_description: "A wrapper for the BD Rhapsody Analysis CWL v1.10.1 pipeline" + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "ghcr.io/data-intuitive/bd_rhapsody:1.10.1" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "python" + user: false + packages: + - "pandas<2" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highmem" + - "highcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/mapping/bd_rhapsody/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/mapping/bd_rhapsody" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/mapping/bd_rhapsody/bd_rhapsody" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/mapping/bd_rhapsody/bd_rhapsody b/target/docker/mapping/bd_rhapsody/bd_rhapsody new file mode 100755 index 00000000000..60def456329 --- /dev/null +++ b/target/docker/mapping/bd_rhapsody/bd_rhapsody @@ -0,0 +1,1975 @@ +#!/usr/bin/env bash + +# bd_rhapsody 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Robrecht Cannoodt (maintainer) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="bd_rhapsody" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "bd_rhapsody 0.12.4" + echo "" + echo "A wrapper for the BD Rhapsody Analysis CWL v1.10.1 pipeline." + echo "" + echo "The CWL pipeline file is obtained by cloning" + echo "'https://bitbucket.org/CRSwDev/cwl/src/master/' and removing all objects with" + echo "class 'DockerRequirement' from the YML." + echo "" + echo "This pipeline can be used for a targeted analysis (with \`--mode targeted\`) or" + echo "for a whole transcriptome analysis (with \`--mode wta\`)." + echo "" + echo "* If mode is \`\"targeted\"\`, then either the \`--reference\` or \`--abseq_reference\`" + echo "parameters must be defined." + echo "* If mode is \`\"wta\"\`, then \`--reference\` and \`--transcriptome_annotation\` must" + echo "be defined, \`--abseq_reference\` and \`--supplemental_reference\` is optional." + echo "" + echo "The reference_genome and transcriptome_annotation files can be generated with" + echo "the make_reference pipeline." + echo "Alternatively, BD also provides standard references which can be downloaded from" + echo "these locations:" + echo "" + echo " - Human:" + echo "http://bd-rhapsody-public.s3-website-us-east-1.amazonaws.com/Rhapsody-WTA/GRCh38-PhiX-gencodev29/" + echo " - Mouse:" + echo "http://bd-rhapsody-public.s3-website-us-east-1.amazonaws.com/Rhapsody-WTA/GRCm38-PhiX-gencodevM19/" + echo "" + echo "Inputs:" + echo " --mode" + echo " type: string, required parameter" + echo " example: wta" + echo " choices: [ wta, targeted ]" + echo " Whether to run a whole transcriptome analysis (WTA) or a targeted" + echo " analysis." + echo "" + echo " -i, --input" + echo " type: file, required parameter, multiple values allowed, file must exist" + echo " example: input.fastq.gz" + echo " Path to your read files in the FASTQ.GZ format. You may specify as many" + echo " R1/R2 read pairs as you want." + echo "" + echo " -r, --reference_genome, --reference" + echo " type: file, required parameter, multiple values allowed, file must exist" + echo " example: reference_genome.tar.gz|reference.fasta" + echo " Refence to map to. For \`--mode wta\`, this is the path to STAR index as a" + echo " tar.gz file. For \`--mode targeted\`, this is the path to mRNA reference" + echo " file for pre-designed, supplemental, or custom panel, in FASTA format" + echo "" + echo " -t, --transcriptome_annotation" + echo " type: file, file must exist" + echo " example: transcriptome.gtf" + echo " Path to GTF annotation file (only for \`--mode wta\`)." + echo "" + echo " -a, --abseq_reference" + echo " type: file, multiple values allowed, file must exist" + echo " example: abseq_reference.fasta" + echo " Path to the AbSeq reference file in FASTA format. Only needed if BD" + echo " AbSeq Ab-Oligos are used." + echo "" + echo " -s, --supplemental_reference" + echo " type: file, multiple values allowed, file must exist" + echo " example: supplemental_reference.fasta" + echo " Path to the supplemental reference file in FASTA format. Only needed if" + echo " there are additional transgene sequences used in the experiment (only" + echo " for \`--mode wta\`)." + echo "" + echo " --sample_prefix" + echo " type: string" + echo " default: sample" + echo " Specify a run name to use as the output file base name. Use only" + echo " letters, numbers, or hyphens. Do not use special characters or spaces." + echo "" + echo "Outputs:" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " example: output_dir" + echo " Output folder. Output still needs to be processed further." + echo "" + echo "Putative cell calling settings:" + echo " --putative_cell_call" + echo " type: string" + echo " example: mRNA" + echo " choices: [ mRNA, AbSeq_Experimental ]" + echo " Specify the dataset to be used for putative cell calling. For putative" + echo " cell calling using an AbSeq dataset, please provide an AbSeq_Reference" + echo " fasta file above." + echo "" + echo " --exact_cell_count" + echo " type: integer" + echo " example: 10000" + echo " Exact cell count - Set a specific number (>=1) of cells as putative," + echo " based on those with the highest error-corrected read count" + echo "" + echo " --disable_putative_calling" + echo " type: boolean_true" + echo " Disable Refined Putative Cell Calling - Determine putative cells using" + echo " only the basic algorithm (minimum second derivative along the cumulative" + echo " reads curve). The refined algorithm attempts to remove false positives" + echo " and recover false negatives, but may not be ideal for certain complex" + echo " mixtures of cell types. Does not apply if Exact Cell Count is set." + echo "" + echo "Subsample arguments:" + echo " --subsample" + echo " type: double" + echo " example: 0.01" + echo " A number >1 or fraction (0 < n < 1) to indicate the number or percentage" + echo " of reads to subsample." + echo "" + echo " --subsample_seed" + echo " type: integer" + echo " example: 3445" + echo " A seed for replicating a previous subsampled run." + echo "" + echo "Multiplex arguments:" + echo " --sample_tags_version" + echo " type: string" + echo " example: human" + echo " choices: [ human, hs, mouse, mm ]" + echo " Specify if multiplexed run." + echo "" + echo " --tag_names" + echo " type: string, multiple values allowed" + echo " example: 4-mySample:9-myOtherSample:6-alsoThisSample" + echo " Tag_Names (optional) - Specify the tag number followed by '-' and the" + echo " desired sample name to appear in Sample_Tag_Metrics.csv." + echo " Do not use the special characters: &, (), [], {}, <>, ?, |" + echo "" + echo "VDJ arguments:" + echo " --vdj_version" + echo " type: string" + echo " example: human" + echo " choices: [ human, mouse, humanBCR, humanBCR, humanTCR, mouseBCR ]" + echo " Specify if VDJ run." + echo "" + echo "CWL-runner arguments:" + echo " --parallel" + echo " type: boolean" + echo " default: true" + echo " Run jobs in parallel." + echo "" + echo " --timestamps" + echo " type: boolean_true" + echo " Add timestamps to the errors, warnings, and notifications." + echo "" + echo " --dryrun" + echo " type: boolean_true" + echo " If true, the output directory will only contain the CWL input files, but" + echo " the pipeline itself will not be executed." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM ghcr.io/data-intuitive/bd_rhapsody:1.10.1 + +ENTRYPOINT [] + + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "pandas<2" + +LABEL org.opencontainers.image.authors="Robrecht Cannoodt" +LABEL org.opencontainers.image.description="Companion container for running component mapping bd_rhapsody" +LABEL org.opencontainers.image.created="2024-01-31T09:08:33Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-bd_rhapsody-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "bd_rhapsody 0.12.4" + exit + ;; + --mode) + [ -n "$VIASH_PAR_MODE" ] && ViashError Bad arguments for option \'--mode\': \'$VIASH_PAR_MODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --mode. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --mode=*) + [ -n "$VIASH_PAR_MODE" ] && ViashError Bad arguments for option \'--mode=*\': \'$VIASH_PAR_MODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --input) + if [ -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT="$2" + else + VIASH_PAR_INPUT="$VIASH_PAR_INPUT;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + if [ -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + else + VIASH_PAR_INPUT="$VIASH_PAR_INPUT;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + -i) + if [ -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT="$2" + else + VIASH_PAR_INPUT="$VIASH_PAR_INPUT;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reference) + if [ -z "$VIASH_PAR_REFERENCE" ]; then + VIASH_PAR_REFERENCE="$2" + else + VIASH_PAR_REFERENCE="$VIASH_PAR_REFERENCE;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reference=*) + if [ -z "$VIASH_PAR_REFERENCE" ]; then + VIASH_PAR_REFERENCE=$(ViashRemoveFlags "$1") + else + VIASH_PAR_REFERENCE="$VIASH_PAR_REFERENCE;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + -r) + if [ -z "$VIASH_PAR_REFERENCE" ]; then + VIASH_PAR_REFERENCE="$2" + else + VIASH_PAR_REFERENCE="$VIASH_PAR_REFERENCE;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to -r. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reference_genome) + if [ -z "$VIASH_PAR_REFERENCE" ]; then + VIASH_PAR_REFERENCE="$2" + else + VIASH_PAR_REFERENCE="$VIASH_PAR_REFERENCE;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference_genome. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --transcriptome_annotation) + [ -n "$VIASH_PAR_TRANSCRIPTOME_ANNOTATION" ] && ViashError Bad arguments for option \'--transcriptome_annotation\': \'$VIASH_PAR_TRANSCRIPTOME_ANNOTATION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRANSCRIPTOME_ANNOTATION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --transcriptome_annotation. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --transcriptome_annotation=*) + [ -n "$VIASH_PAR_TRANSCRIPTOME_ANNOTATION" ] && ViashError Bad arguments for option \'--transcriptome_annotation=*\': \'$VIASH_PAR_TRANSCRIPTOME_ANNOTATION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRANSCRIPTOME_ANNOTATION=$(ViashRemoveFlags "$1") + shift 1 + ;; + -t) + [ -n "$VIASH_PAR_TRANSCRIPTOME_ANNOTATION" ] && ViashError Bad arguments for option \'-t\': \'$VIASH_PAR_TRANSCRIPTOME_ANNOTATION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRANSCRIPTOME_ANNOTATION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -t. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --abseq_reference) + if [ -z "$VIASH_PAR_ABSEQ_REFERENCE" ]; then + VIASH_PAR_ABSEQ_REFERENCE="$2" + else + VIASH_PAR_ABSEQ_REFERENCE="$VIASH_PAR_ABSEQ_REFERENCE;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --abseq_reference. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --abseq_reference=*) + if [ -z "$VIASH_PAR_ABSEQ_REFERENCE" ]; then + VIASH_PAR_ABSEQ_REFERENCE=$(ViashRemoveFlags "$1") + else + VIASH_PAR_ABSEQ_REFERENCE="$VIASH_PAR_ABSEQ_REFERENCE;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + -a) + if [ -z "$VIASH_PAR_ABSEQ_REFERENCE" ]; then + VIASH_PAR_ABSEQ_REFERENCE="$2" + else + VIASH_PAR_ABSEQ_REFERENCE="$VIASH_PAR_ABSEQ_REFERENCE;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to -a. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --supplemental_reference) + if [ -z "$VIASH_PAR_SUPPLEMENTAL_REFERENCE" ]; then + VIASH_PAR_SUPPLEMENTAL_REFERENCE="$2" + else + VIASH_PAR_SUPPLEMENTAL_REFERENCE="$VIASH_PAR_SUPPLEMENTAL_REFERENCE;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --supplemental_reference. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --supplemental_reference=*) + if [ -z "$VIASH_PAR_SUPPLEMENTAL_REFERENCE" ]; then + VIASH_PAR_SUPPLEMENTAL_REFERENCE=$(ViashRemoveFlags "$1") + else + VIASH_PAR_SUPPLEMENTAL_REFERENCE="$VIASH_PAR_SUPPLEMENTAL_REFERENCE;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + -s) + if [ -z "$VIASH_PAR_SUPPLEMENTAL_REFERENCE" ]; then + VIASH_PAR_SUPPLEMENTAL_REFERENCE="$2" + else + VIASH_PAR_SUPPLEMENTAL_REFERENCE="$VIASH_PAR_SUPPLEMENTAL_REFERENCE;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to -s. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sample_prefix) + [ -n "$VIASH_PAR_SAMPLE_PREFIX" ] && ViashError Bad arguments for option \'--sample_prefix\': \'$VIASH_PAR_SAMPLE_PREFIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SAMPLE_PREFIX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sample_prefix. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sample_prefix=*) + [ -n "$VIASH_PAR_SAMPLE_PREFIX" ] && ViashError Bad arguments for option \'--sample_prefix=*\': \'$VIASH_PAR_SAMPLE_PREFIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SAMPLE_PREFIX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --putative_cell_call) + [ -n "$VIASH_PAR_PUTATIVE_CELL_CALL" ] && ViashError Bad arguments for option \'--putative_cell_call\': \'$VIASH_PAR_PUTATIVE_CELL_CALL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PUTATIVE_CELL_CALL="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --putative_cell_call. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --putative_cell_call=*) + [ -n "$VIASH_PAR_PUTATIVE_CELL_CALL" ] && ViashError Bad arguments for option \'--putative_cell_call=*\': \'$VIASH_PAR_PUTATIVE_CELL_CALL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PUTATIVE_CELL_CALL=$(ViashRemoveFlags "$1") + shift 1 + ;; + --exact_cell_count) + [ -n "$VIASH_PAR_EXACT_CELL_COUNT" ] && ViashError Bad arguments for option \'--exact_cell_count\': \'$VIASH_PAR_EXACT_CELL_COUNT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EXACT_CELL_COUNT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --exact_cell_count. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --exact_cell_count=*) + [ -n "$VIASH_PAR_EXACT_CELL_COUNT" ] && ViashError Bad arguments for option \'--exact_cell_count=*\': \'$VIASH_PAR_EXACT_CELL_COUNT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EXACT_CELL_COUNT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --disable_putative_calling) + [ -n "$VIASH_PAR_DISABLE_PUTATIVE_CALLING" ] && ViashError Bad arguments for option \'--disable_putative_calling\': \'$VIASH_PAR_DISABLE_PUTATIVE_CALLING\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DISABLE_PUTATIVE_CALLING=true + shift 1 + ;; + --subsample) + [ -n "$VIASH_PAR_SUBSAMPLE" ] && ViashError Bad arguments for option \'--subsample\': \'$VIASH_PAR_SUBSAMPLE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SUBSAMPLE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --subsample. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --subsample=*) + [ -n "$VIASH_PAR_SUBSAMPLE" ] && ViashError Bad arguments for option \'--subsample=*\': \'$VIASH_PAR_SUBSAMPLE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SUBSAMPLE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --subsample_seed) + [ -n "$VIASH_PAR_SUBSAMPLE_SEED" ] && ViashError Bad arguments for option \'--subsample_seed\': \'$VIASH_PAR_SUBSAMPLE_SEED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SUBSAMPLE_SEED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --subsample_seed. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --subsample_seed=*) + [ -n "$VIASH_PAR_SUBSAMPLE_SEED" ] && ViashError Bad arguments for option \'--subsample_seed=*\': \'$VIASH_PAR_SUBSAMPLE_SEED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SUBSAMPLE_SEED=$(ViashRemoveFlags "$1") + shift 1 + ;; + --sample_tags_version) + [ -n "$VIASH_PAR_SAMPLE_TAGS_VERSION" ] && ViashError Bad arguments for option \'--sample_tags_version\': \'$VIASH_PAR_SAMPLE_TAGS_VERSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SAMPLE_TAGS_VERSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sample_tags_version. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sample_tags_version=*) + [ -n "$VIASH_PAR_SAMPLE_TAGS_VERSION" ] && ViashError Bad arguments for option \'--sample_tags_version=*\': \'$VIASH_PAR_SAMPLE_TAGS_VERSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SAMPLE_TAGS_VERSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --tag_names) + if [ -z "$VIASH_PAR_TAG_NAMES" ]; then + VIASH_PAR_TAG_NAMES="$2" + else + VIASH_PAR_TAG_NAMES="$VIASH_PAR_TAG_NAMES:""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --tag_names. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --tag_names=*) + if [ -z "$VIASH_PAR_TAG_NAMES" ]; then + VIASH_PAR_TAG_NAMES=$(ViashRemoveFlags "$1") + else + VIASH_PAR_TAG_NAMES="$VIASH_PAR_TAG_NAMES:"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --vdj_version) + [ -n "$VIASH_PAR_VDJ_VERSION" ] && ViashError Bad arguments for option \'--vdj_version\': \'$VIASH_PAR_VDJ_VERSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_VDJ_VERSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --vdj_version. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --vdj_version=*) + [ -n "$VIASH_PAR_VDJ_VERSION" ] && ViashError Bad arguments for option \'--vdj_version=*\': \'$VIASH_PAR_VDJ_VERSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_VDJ_VERSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --parallel) + [ -n "$VIASH_PAR_PARALLEL" ] && ViashError Bad arguments for option \'--parallel\': \'$VIASH_PAR_PARALLEL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PARALLEL="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --parallel. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --parallel=*) + [ -n "$VIASH_PAR_PARALLEL" ] && ViashError Bad arguments for option \'--parallel=*\': \'$VIASH_PAR_PARALLEL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PARALLEL=$(ViashRemoveFlags "$1") + shift 1 + ;; + --timestamps) + [ -n "$VIASH_PAR_TIMESTAMPS" ] && ViashError Bad arguments for option \'--timestamps\': \'$VIASH_PAR_TIMESTAMPS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TIMESTAMPS=true + shift 1 + ;; + --dryrun) + [ -n "$VIASH_PAR_DRYRUN" ] && ViashError Bad arguments for option \'--dryrun\': \'$VIASH_PAR_DRYRUN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DRYRUN=true + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/mapping_bd_rhapsody:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/mapping_bd_rhapsody:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/mapping_bd_rhapsody:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/mapping_bd_rhapsody:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_MODE+x} ]; then + ViashError '--mode' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_REFERENCE+x} ]; then + ViashError '--reference' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_SAMPLE_PREFIX+x} ]; then + VIASH_PAR_SAMPLE_PREFIX="sample" +fi +if [ -z ${VIASH_PAR_DISABLE_PUTATIVE_CALLING+x} ]; then + VIASH_PAR_DISABLE_PUTATIVE_CALLING="false" +fi +if [ -z ${VIASH_PAR_PARALLEL+x} ]; then + VIASH_PAR_PARALLEL="true" +fi +if [ -z ${VIASH_PAR_TIMESTAMPS+x} ]; then + VIASH_PAR_TIMESTAMPS="false" +fi +if [ -z ${VIASH_PAR_DRYRUN+x} ]; then + VIASH_PAR_DRYRUN="false" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ]; then + IFS=';' + set -f + for file in $VIASH_PAR_INPUT; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi +if [ ! -z "$VIASH_PAR_REFERENCE" ]; then + IFS=';' + set -f + for file in $VIASH_PAR_REFERENCE; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi +if [ ! -z "$VIASH_PAR_TRANSCRIPTOME_ANNOTATION" ] && [ ! -e "$VIASH_PAR_TRANSCRIPTOME_ANNOTATION" ]; then + ViashError "Input file '$VIASH_PAR_TRANSCRIPTOME_ANNOTATION' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_ABSEQ_REFERENCE" ]; then + IFS=';' + set -f + for file in $VIASH_PAR_ABSEQ_REFERENCE; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi +if [ ! -z "$VIASH_PAR_SUPPLEMENTAL_REFERENCE" ]; then + IFS=';' + set -f + for file in $VIASH_PAR_SUPPLEMENTAL_REFERENCE; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_EXACT_CELL_COUNT" ]]; then + if ! [[ "$VIASH_PAR_EXACT_CELL_COUNT" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--exact_cell_count' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_DISABLE_PUTATIVE_CALLING" ]]; then + if ! [[ "$VIASH_PAR_DISABLE_PUTATIVE_CALLING" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--disable_putative_calling' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SUBSAMPLE" ]]; then + if ! [[ "$VIASH_PAR_SUBSAMPLE" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--subsample' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SUBSAMPLE_SEED" ]]; then + if ! [[ "$VIASH_PAR_SUBSAMPLE_SEED" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--subsample_seed' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_PARALLEL" ]]; then + if ! [[ "$VIASH_PAR_PARALLEL" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--parallel' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_TIMESTAMPS" ]]; then + if ! [[ "$VIASH_PAR_TIMESTAMPS" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--timestamps' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_DRYRUN" ]]; then + if ! [[ "$VIASH_PAR_DRYRUN" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--dryrun' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_MODE" ]; then + VIASH_PAR_MODE_CHOICES=("wta:targeted") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_MODE_CHOICES[*]}:" =~ ":$VIASH_PAR_MODE:" ]]; then + ViashError '--mode' specified value of \'$VIASH_PAR_MODE\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +if [ ! -z "$VIASH_PAR_PUTATIVE_CELL_CALL" ]; then + VIASH_PAR_PUTATIVE_CELL_CALL_CHOICES=("mRNA:AbSeq_Experimental") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_PUTATIVE_CELL_CALL_CHOICES[*]}:" =~ ":$VIASH_PAR_PUTATIVE_CELL_CALL:" ]]; then + ViashError '--putative_cell_call' specified value of \'$VIASH_PAR_PUTATIVE_CELL_CALL\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +if [ ! -z "$VIASH_PAR_SAMPLE_TAGS_VERSION" ]; then + VIASH_PAR_SAMPLE_TAGS_VERSION_CHOICES=("human:hs:mouse:mm") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_SAMPLE_TAGS_VERSION_CHOICES[*]}:" =~ ":$VIASH_PAR_SAMPLE_TAGS_VERSION:" ]]; then + ViashError '--sample_tags_version' specified value of \'$VIASH_PAR_SAMPLE_TAGS_VERSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +if [ ! -z "$VIASH_PAR_VDJ_VERSION" ]; then + VIASH_PAR_VDJ_VERSION_CHOICES=("human:mouse:humanBCR:humanBCR:humanTCR:mouseBCR") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_VDJ_VERSION_CHOICES[*]}:" =~ ":$VIASH_PAR_VDJ_VERSION:" ]]; then + ViashError '--vdj_version' specified value of \'$VIASH_PAR_VDJ_VERSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_TEST_INPUT=() + IFS=';' + for var in $VIASH_PAR_INPUT; do + unset IFS + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$var")" ) + var=$(ViashAutodetectMount "$var") + VIASH_TEST_INPUT+=( "$var" ) + done + VIASH_PAR_INPUT=$(IFS=';' ; echo "${VIASH_TEST_INPUT[*]}") +fi +if [ ! -z "$VIASH_PAR_REFERENCE" ]; then + VIASH_TEST_REFERENCE=() + IFS=';' + for var in $VIASH_PAR_REFERENCE; do + unset IFS + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$var")" ) + var=$(ViashAutodetectMount "$var") + VIASH_TEST_REFERENCE+=( "$var" ) + done + VIASH_PAR_REFERENCE=$(IFS=';' ; echo "${VIASH_TEST_REFERENCE[*]}") +fi +if [ ! -z "$VIASH_PAR_TRANSCRIPTOME_ANNOTATION" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_TRANSCRIPTOME_ANNOTATION")" ) + VIASH_PAR_TRANSCRIPTOME_ANNOTATION=$(ViashAutodetectMount "$VIASH_PAR_TRANSCRIPTOME_ANNOTATION") +fi +if [ ! -z "$VIASH_PAR_ABSEQ_REFERENCE" ]; then + VIASH_TEST_ABSEQ_REFERENCE=() + IFS=';' + for var in $VIASH_PAR_ABSEQ_REFERENCE; do + unset IFS + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$var")" ) + var=$(ViashAutodetectMount "$var") + VIASH_TEST_ABSEQ_REFERENCE+=( "$var" ) + done + VIASH_PAR_ABSEQ_REFERENCE=$(IFS=';' ; echo "${VIASH_TEST_ABSEQ_REFERENCE[*]}") +fi +if [ ! -z "$VIASH_PAR_SUPPLEMENTAL_REFERENCE" ]; then + VIASH_TEST_SUPPLEMENTAL_REFERENCE=() + IFS=';' + for var in $VIASH_PAR_SUPPLEMENTAL_REFERENCE; do + unset IFS + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$var")" ) + var=$(ViashAutodetectMount "$var") + VIASH_TEST_SUPPLEMENTAL_REFERENCE+=( "$var" ) + done + VIASH_PAR_SUPPLEMENTAL_REFERENCE=$(IFS=';' ; echo "${VIASH_TEST_SUPPLEMENTAL_REFERENCE[*]}") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/mapping_bd_rhapsody:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/mapping_bd_rhapsody:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/mapping_bd_rhapsody:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-bd_rhapsody-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +import os +import re +import subprocess +import tempfile +import sys +from typing import Any +import pandas as pd +import gzip +import shutil + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'mode': $( if [ ! -z ${VIASH_PAR_MODE+x} ]; then echo "r'${VIASH_PAR_MODE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'reference': $( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "r'${VIASH_PAR_REFERENCE//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'transcriptome_annotation': $( if [ ! -z ${VIASH_PAR_TRANSCRIPTOME_ANNOTATION+x} ]; then echo "r'${VIASH_PAR_TRANSCRIPTOME_ANNOTATION//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'abseq_reference': $( if [ ! -z ${VIASH_PAR_ABSEQ_REFERENCE+x} ]; then echo "r'${VIASH_PAR_ABSEQ_REFERENCE//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'supplemental_reference': $( if [ ! -z ${VIASH_PAR_SUPPLEMENTAL_REFERENCE+x} ]; then echo "r'${VIASH_PAR_SUPPLEMENTAL_REFERENCE//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'sample_prefix': $( if [ ! -z ${VIASH_PAR_SAMPLE_PREFIX+x} ]; then echo "r'${VIASH_PAR_SAMPLE_PREFIX//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'putative_cell_call': $( if [ ! -z ${VIASH_PAR_PUTATIVE_CELL_CALL+x} ]; then echo "r'${VIASH_PAR_PUTATIVE_CELL_CALL//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'exact_cell_count': $( if [ ! -z ${VIASH_PAR_EXACT_CELL_COUNT+x} ]; then echo "int(r'${VIASH_PAR_EXACT_CELL_COUNT//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'disable_putative_calling': $( if [ ! -z ${VIASH_PAR_DISABLE_PUTATIVE_CALLING+x} ]; then echo "r'${VIASH_PAR_DISABLE_PUTATIVE_CALLING//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), + 'subsample': $( if [ ! -z ${VIASH_PAR_SUBSAMPLE+x} ]; then echo "float(r'${VIASH_PAR_SUBSAMPLE//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'subsample_seed': $( if [ ! -z ${VIASH_PAR_SUBSAMPLE_SEED+x} ]; then echo "int(r'${VIASH_PAR_SUBSAMPLE_SEED//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'sample_tags_version': $( if [ ! -z ${VIASH_PAR_SAMPLE_TAGS_VERSION+x} ]; then echo "r'${VIASH_PAR_SAMPLE_TAGS_VERSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'tag_names': $( if [ ! -z ${VIASH_PAR_TAG_NAMES+x} ]; then echo "r'${VIASH_PAR_TAG_NAMES//\'/\'\"\'\"r\'}'.split(':')"; else echo None; fi ), + 'vdj_version': $( if [ ! -z ${VIASH_PAR_VDJ_VERSION+x} ]; then echo "r'${VIASH_PAR_VDJ_VERSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'parallel': $( if [ ! -z ${VIASH_PAR_PARALLEL+x} ]; then echo "r'${VIASH_PAR_PARALLEL//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), + 'timestamps': $( if [ ! -z ${VIASH_PAR_TIMESTAMPS+x} ]; then echo "r'${VIASH_PAR_TIMESTAMPS//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), + 'dryrun': $( if [ ! -z ${VIASH_PAR_DRYRUN+x} ]; then echo "r'${VIASH_PAR_DRYRUN//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +## VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +def is_gz_file(filepath): + with open(filepath, 'rb') as test_f: + return test_f.read(2) == b'\\x1f\\x8b' + +def strip_margin(text: str) -> str: + return re.sub('(\\n?)[ \\t]*\\|', '\\\\1', text) + +def process_params(par: dict[str, Any]) -> str: + # check input parameters + assert par["input"] is not None, "Pass at least one set of inputs to --input." + if par["mode"] == "wta": + assert len(par["reference"]) == 1, "When mode is \\"wta\\", --reference should be length 1" + assert par["transcriptome_annotation"] is not None, "When mode is \\"wta\\", --transcriptome_annotation should be defined" + elif par["mode"] == "targeted": + assert par["transcriptome_annotation"] is None, "When mode is \\"targeted\\", --transcriptome_annotation should be undefined" + assert par["supplemental_reference"] is None, "When mode is \\"targeted\\", --supplemental_reference should be undefined" + + # checking sample prefix + if re.match("[^A-Za-z0-9]", par["sample_prefix"]): + logger.warning("--sample_prefix should only consist of letters, numbers or hyphens. Replacing all '[^A-Za-z0-9]' with '-'.") + par["sample_prefix"] = re.sub("[^A-Za-z0-9\\\\-]", "-", par["sample_prefix"]) + + # if par_input is a directory, look for fastq files + if len(par["input"]) == 1 and os.path.isdir(par["input"][0]): + par["input"] = [ os.path.join(dp, f) for dp, dn, filenames in os.walk(par["input"]) for f in filenames if re.match(r'.*\\.fastq.gz', f) ] + + # use absolute paths + par["input"] = [ os.path.abspath(f) for f in par["input"] ] + if par["reference"]: + par["reference"] = [ os.path.abspath(f) for f in par["reference"] ] + if par["transcriptome_annotation"]: + par["transcriptome_annotation"] = os.path.abspath(par["transcriptome_annotation"]) + if par["abseq_reference"]: + par["abseq_reference"] = [ os.path.abspath(f) for f in par["abseq_reference"] ] + if par["supplemental_reference"]: + par["supplemental_reference"] = [ os.path.abspath(f) for f in par["supplemental_reference"] ] + par["output"] = os.path.abspath(par["output"]) + + return par + +def generate_config(par: dict[str, Any]) -> str: + content_list = [strip_margin(f"""\\ + |#!/usr/bin/env cwl-runner + | + |cwl:tool: rhapsody + | + |# This is a YML file used to specify the inputs for a BD Genomics {"WTA" if par["mode"] == "wta" else "Targeted" } Rhapsody Analysis pipeline run. See the + |# BD Genomics Analysis Setup User Guide (Doc ID: 47383) for more details. + | + |## Reads (required) - Path to your read files in the FASTQ.GZ format. You may specify as many R1/R2 read pairs as you want. + |Reads: + |""")] + + for file in par["input"]: + content_list.append(strip_margin(f"""\\ + | - class: File + | location: "{file}" + |""")) + + if par["reference"] and par["mode"] == "wta": + content_list.append(strip_margin(f"""\\ + | + |## Reference_Genome (required) - Path to STAR index for tar.gz format. See Doc ID: 47383 for instructions to obtain pre-built STAR index file. + |Reference_Genome: + | class: File + | location: "{par["reference"][0]}" + |""")) + + if par["reference"] and par["mode"] == "targeted": + content_list.append(strip_margin(f"""\\ + | + |## Reference (optional) - Path to mRNA reference file for pre-designed, supplemental, or custom panel, in FASTA format. + |Reference: + |""")) + for file in par["reference"]: + content_list.append(strip_margin(f"""\\ + | - class: File + | location: {file} + |""")) + + if par["transcriptome_annotation"]: + content_list.append(strip_margin(f"""\\ + | + |## Transcriptome_Annotation (required) - Path to GTF annotation file + |Transcriptome_Annotation: + | class: File + | location: "{par["transcriptome_annotation"]}" + |""")) + + if par["abseq_reference"]: + content_list.append(strip_margin(f"""\\ + | + |## AbSeq_Reference (optional) - Path to the AbSeq reference file in FASTA format. Only needed if BD AbSeq Ab-Oligos are used. + |AbSeq_Reference: + |""")) + for file in par["abseq_reference"]: + content_list.append(strip_margin(f"""\\ + | - class: File + | location: {file} + |""")) + + if par["supplemental_reference"]: + content_list.append(strip_margin(f"""\\ + | + |## Supplemental_Reference (optional) - Path to the supplemental reference file in FASTA format. Only needed if there are additional transgene sequences used in the experiment. + |Supplemental_Reference: + |""")) + for file in par["supplemental_reference"]: + content_list.append(strip_margin(f"""\\ + | - class: File + | location: {file} + |""")) + + ## Putative Cell Calling Settings + content_list.append(strip_margin(f"""\\ + | + |#################################### + |## Putative Cell Calling Settings ## + |#################################### + |""")) + + if par["putative_cell_call"]: + content_list.append(strip_margin(f"""\\ + |## Putative cell calling dataset (optional) - Specify the dataset to be used for putative cell calling: mRNA or AbSeq_Experimental. + |## For putative cell calling using an AbSeq dataset, please provide an AbSeq_Reference fasta file above. + |## By default, the mRNA data will be used for putative cell calling. + |Putative_Cell_Call: {par["putative_cell_call"]} + |""")) + + if par["exact_cell_count"]: + content_list.append(strip_margin(f"""\\ + |## Exact cell count (optional) - Set a specific number (>=1) of cells as putative, based on those with the highest error-corrected read count + |Exact_Cell_Count: {par["exact_cell_count"]} + |""")) + + if par["disable_putative_calling"]: + content_list.append(strip_margin(f"""\\ + |## Disable Refined Putative Cell Calling (optional) - Determine putative cells using only the basic algorithm (minimum second derivative along the cumulative reads curve). The refined algorithm attempts to remove false positives and recover false negatives, but may not be ideal for certain complex mixtures of cell types. Does not apply if Exact Cell Count is set. + |## The values can be true or false. By default, the refined algorithm is used. + |Basic_Algo_Only: {str(par["disable_putative_calling"]).lower()} + |""")) + + ## Subsample Settings + content_list.append(strip_margin(f"""\\ + | + |######################## + |## Subsample Settings ## + |######################## + |""" + )) + + if par["subsample"]: + content_list.append(strip_margin(f"""\\ + |## Subsample (optional) - A number >1 or fraction (0 < n < 1) to indicate the number or percentage of reads to subsample. + |Subsample: {par["subsample"]} + |""")) + + if par["subsample_seed"]: + content_list.append(strip_margin(f"""\\ + |## Subsample seed (optional) - A seed for replicating a previous subsampled run. + |Subsample_seed: {par["subsample_seed"]} + |""")) + + + ## Multiplex options + content_list.append(strip_margin(f"""\\ + | + |####################### + |## Multiplex options ## + |####################### + |""" + )) + + if par["sample_tags_version"]: + content_list.append(strip_margin(f"""\\ + |## Sample Tags Version (optional) - Specify if multiplexed run: human, hs, mouse or mm + |Sample_Tags_Version: {par["sample_tags_version"]} + |""")) + + if par["tag_names"]: + content_list.append(strip_margin(f"""\\ + |## Tag_Names (optional) - Specify the tag number followed by '-' and the desired sample name to appear in Sample_Tag_Metrics.csv + |# Do not use the special characters: &, (), [], {{}}, <>, ?, | + |Tag_Names: [{', '.join(par["tag_names"])}] + |""")) + + ## VDJ options + content_list.append(strip_margin(f"""\\ + | + |################# + |## VDJ options ## + |################# + |""" + )) + + if par["vdj_version"]: + content_list.append(strip_margin(f"""\\ + |## VDJ Version (optional) - Specify if VDJ run: human, mouse, humanBCR, humanTCR, mouseBCR, mouseTCR + |VDJ_Version: {par["vdj_version"]} + |""")) + + ## VDJ options + content_list.append(strip_margin(f"""\\ + | + |######################## + |## Additional Options ## + |######################## + |""" + )) + + if par["sample_prefix"]: + content_list.append(strip_margin(f"""\\ + |## Run Name (optional) - Specify a run name to use as the output file base name. Use only letters, numbers, or hyphens. Do not use special characters or spaces. + |Run_Name: {par["sample_prefix"]} + |""")) + + ## Write config to file + return ''.join(content_list) + +def generate_cwl_file(par: dict[str, Any], meta: dict[str, Any]) -> str: + # create cwl file (if need be) + if par["mode"] == "wta": + orig_cwl_file=os.path.join(meta["resources_dir"], "rhapsody_wta_1.10.1_nodocker.cwl") + elif par["mode"] == "targeted": + orig_cwl_file=os.path.join(meta["resources_dir"], "rhapsody_targeted_1.10.1_nodocker.cwl") + + # Inject computational requirements into pipeline + if meta["memory_mb"] or meta["cpus"]: + cwl_file = os.path.join(par["output"], "pipeline.cwl") + + # Read in the file + with open(orig_cwl_file, 'r') as file : + cwl_data = file.read() + + # Inject computational requirements into pipeline + if meta["memory_mb"]: + memory = int(meta["memory_mb"]) - 2000 # keep 2gb for OS + cwl_data = re.sub('"ramMin": [^\\n]*,\\n', f'"ramMin": {memory},\\n', cwl_data) + if meta["cpus"]: + cwl_data = re.sub('"coresMin": [^\\n]*,\\n', f'"coresMin": {meta["cpus"]},\\n', cwl_data) + + # Write the file out again + with open(cwl_file, 'w') as file: + file.write(cwl_data) + else: + cwl_file = orig_cwl_file + + return cwl_file + +def process_fasta(feature_type: str, path: str) -> pd.DataFrame: + with open(path) as f: + df = pd.DataFrame(data={ + 'feature_type': feature_type, + 'feature_id': [line[1:].strip() for line in f if line[0] == ">"], + 'reference_file': os.path.basename(path), + }) + return df + +def process_gtf(feature_type: str, path: str) -> pd.DataFrame: + with open(path) as f: + data = [] + for line in f: + if not line.startswith("#"): + attr = dict(item.strip().split(' ') for item in line.split('\\t')[8].strip('\\n').split(';') if item) + row = { + 'feature_types': feature_type, + 'feature_ids': attr["gene_name"].strip("\\""), + 'reference_file': os.path.basename(path), + } + data.append(row) + df = pd.DataFrame(data) + df = df.drop_duplicates() + return df + +def extract_feature_types(par: dict[str, Any]): + feature_types = [] + + if par["mode"] == "targeted": + for file in par["reference"]: + logger.info(f"Processing reference fasta {file}") + feature_types.append(process_fasta("Gene Expression", file)) + + if par["mode"] == "wta": + file = par["transcriptome_annotation"] + logger.info(f"Processing reference gtf {file}") + feature_types.append(process_gtf("Gene Expression", file)) + + if par["abseq_reference"]: + for file in par["abseq_reference"]: + logger.info(f"Processing abseq fasta {file}") + feature_types.append(process_fasta("Antibody Capture", file)) + + if par["supplemental_reference"]: + for file in par["supplemental_reference"]: + logger.info(f"Processing supp fasta {file}") + feature_types.append(process_fasta("Other", file)) + + return pd.concat(feature_types) + +def main(par: dict[str, Any], meta: dict[str, Any]): + # Preprocess params + par = process_params(par) + + # Create output dir if not exists + if not os.path.exists(par["output"]): + os.makedirs(par["output"]) + + ## Process parameters + proc_pars = ["--no-container", "--outdir", par["output"]] + + if par["parallel"]: + proc_pars.append("--parallel") + + if par["timestamps"]: + proc_pars.append("--timestamps") + + with tempfile.TemporaryDirectory(prefix="cwl-bd_rhapsody_wta-", dir=meta["temp_dir"]) as temp_dir: + # extract transcriptome gtf if need be + if par["transcriptome_annotation"] and is_gz_file(par["transcriptome_annotation"]): + with open(os.path.join(temp_dir, "transcriptome.gtf"), 'wb') as genes_uncompressed: + with gzip.open(par["transcriptome_annotation"], 'rb') as genes_compressed: + shutil.copyfileobj(genes_compressed, genes_uncompressed) + par["transcriptome_annotation"] = genes_uncompressed.name + + # Create params file + config_file = os.path.join(par["output"], "config.yml") + config_content = generate_config(par) + with open(config_file, "w") as f: + f.write(config_content) + + # Create cwl file (if need be) + cwl_file = generate_cwl_file(par, meta) + + ## Run pipeline + if not par["dryrun"]: + cmd = ["cwl-runner"] + proc_pars + [cwl_file, os.path.basename(config_file)] + + env = dict(os.environ) + env["TMPDIR"] = temp_dir + + logger.info("> " + ' '.join(cmd)) + _ = subprocess.check_call( + cmd, + cwd=os.path.dirname(config_file), + env=env + ) + + # extracting feature ids from references + # extract info from reference files (while they still exist) + feature_df = extract_feature_types(par) + feature_types_file = os.path.join(par["output"], "feature_types.tsv") + feature_df.to_csv(feature_types_file, sep="\\t", index=False) + + + if not par["dryrun"]: + # look for counts file + if not par["sample_prefix"]: + par["sample_prefix"] = "sample" + counts_filename = par["sample_prefix"] + "_RSEC_MolsPerCell.csv" + + if par["sample_tags_version"]: + counts_filename = "Combined_" + counts_filename + counts_file = os.path.join(par["output"], counts_filename) + + if not os.path.exists(counts_file): + raise ValueError(f"Could not find output counts file '{counts_filename}'") + + # look for metrics file + metrics_filename = par["sample_prefix"] + "_Metrics_Summary.csv" + metrics_file = os.path.join(par["output"], metrics_filename) + if not os.path.exists(metrics_file): + raise ValueError(f"Could not find output metrics file '{metrics_filename}'") + +if __name__ == "__main__": + main(par, meta) +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + unset VIASH_TEST_INPUT + IFS=';' + for var in $VIASH_PAR_INPUT; do + unset IFS + if [ -z "$VIASH_TEST_INPUT" ]; then + VIASH_TEST_INPUT="$(ViashStripAutomount "$var")" + else + VIASH_TEST_INPUT="$VIASH_TEST_INPUT;""$(ViashStripAutomount "$var")" + fi + done + VIASH_PAR_INPUT="$VIASH_TEST_INPUT" +fi +if [ ! -z "$VIASH_PAR_REFERENCE" ]; then + unset VIASH_TEST_REFERENCE + IFS=';' + for var in $VIASH_PAR_REFERENCE; do + unset IFS + if [ -z "$VIASH_TEST_REFERENCE" ]; then + VIASH_TEST_REFERENCE="$(ViashStripAutomount "$var")" + else + VIASH_TEST_REFERENCE="$VIASH_TEST_REFERENCE;""$(ViashStripAutomount "$var")" + fi + done + VIASH_PAR_REFERENCE="$VIASH_TEST_REFERENCE" +fi +if [ ! -z "$VIASH_PAR_TRANSCRIPTOME_ANNOTATION" ]; then + VIASH_PAR_TRANSCRIPTOME_ANNOTATION=$(ViashStripAutomount "$VIASH_PAR_TRANSCRIPTOME_ANNOTATION") +fi +if [ ! -z "$VIASH_PAR_ABSEQ_REFERENCE" ]; then + unset VIASH_TEST_ABSEQ_REFERENCE + IFS=';' + for var in $VIASH_PAR_ABSEQ_REFERENCE; do + unset IFS + if [ -z "$VIASH_TEST_ABSEQ_REFERENCE" ]; then + VIASH_TEST_ABSEQ_REFERENCE="$(ViashStripAutomount "$var")" + else + VIASH_TEST_ABSEQ_REFERENCE="$VIASH_TEST_ABSEQ_REFERENCE;""$(ViashStripAutomount "$var")" + fi + done + VIASH_PAR_ABSEQ_REFERENCE="$VIASH_TEST_ABSEQ_REFERENCE" +fi +if [ ! -z "$VIASH_PAR_SUPPLEMENTAL_REFERENCE" ]; then + unset VIASH_TEST_SUPPLEMENTAL_REFERENCE + IFS=';' + for var in $VIASH_PAR_SUPPLEMENTAL_REFERENCE; do + unset IFS + if [ -z "$VIASH_TEST_SUPPLEMENTAL_REFERENCE" ]; then + VIASH_TEST_SUPPLEMENTAL_REFERENCE="$(ViashStripAutomount "$var")" + else + VIASH_TEST_SUPPLEMENTAL_REFERENCE="$VIASH_TEST_SUPPLEMENTAL_REFERENCE;""$(ViashStripAutomount "$var")" + fi + done + VIASH_PAR_SUPPLEMENTAL_REFERENCE="$VIASH_TEST_SUPPLEMENTAL_REFERENCE" +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/mapping/bd_rhapsody/rhapsody_targeted_1.10.1_nodocker.cwl b/target/docker/mapping/bd_rhapsody/rhapsody_targeted_1.10.1_nodocker.cwl new file mode 100755 index 00000000000..56a6310bc07 --- /dev/null +++ b/target/docker/mapping/bd_rhapsody/rhapsody_targeted_1.10.1_nodocker.cwl @@ -0,0 +1,5159 @@ +#!/usr/bin/env cwl-runner +{ + "cwlVersion": "v1.0", + "$graph": [ + { + "inputs": [ + { + "inputBinding": { + "prefix": "--annot-r1", + "itemSeparator": "," + }, + "type": { + "items": "File", + "type": "array" + }, + "id": "#AddtoBam.cwl/Annotation_R1" + }, + { + "inputBinding": { + "prefix": "--cell-order" + }, + "type": "File", + "id": "#AddtoBam.cwl/Cell_Order" + }, + { + "inputBinding": { + "prefix": "--annot-mol-file" + }, + "type": "File", + "id": "#AddtoBam.cwl/Molecular_Annotation" + }, + { + "inputBinding": { + "prefix": "--r2-bam" + }, + "type": "File", + "id": "#AddtoBam.cwl/R2_Bam" + }, + { + "inputBinding": { + "prefix": "--run-metadata" + }, + "type": "File", + "id": "#AddtoBam.cwl/Run_Metadata" + }, + { + "inputBinding": { + "prefix": "--tag-calls" + }, + "type": [ + "null", + "File" + ], + "id": "#AddtoBam.cwl/Tag_Calls" + }, + { + "inputBinding": { + "prefix": "--target-gene-mapping" + }, + "type": [ + "null", + "File" + ], + "id": "#AddtoBam.cwl/Target_Gene_Mapping" + } + ], + "requirements": [ + ], + "outputs": [ + { + "outputBinding": { + "glob": "Annotated_mapping_R2.BAM" + }, + "type": "File", + "id": "#AddtoBam.cwl/Annotated_Bam" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#AddtoBam.cwl/output" + } + ], + "baseCommand": [ + "mist_add_to_bam.py" + ], + "class": "CommandLineTool", + "id": "#AddtoBam.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "prefix": "--extra-seqs" + }, + "type": [ + "null", + "File" + ], + "id": "#AlignR2.cwl/Extra_Seqs" + }, + { + "inputBinding": { + "prefix": "--index" + }, + "type": "File", + "id": "#AlignR2.cwl/Index" + }, + { + "inputBinding": { + "prefix": "--r2-fastqs", + "itemSeparator": "," + }, + "type": { + "items": "File", + "type": "array" + }, + "id": "#AlignR2.cwl/R2" + }, + { + "inputBinding": { + "prefix": "--run-metadata" + }, + "type": "File", + "id": "#AlignR2.cwl/Run_Metadata" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + }, + { + "envDef": [ + { + "envName": "CORES_ALLOCATED_PER_CWL_PROCESS", + "envValue": "$(String(runtime.cores))" + } + ], + "class": "EnvVarRequirement" + } + ], + "outputs": [ + { + "outputBinding": { + "glob": "*zip" + }, + "type": { + "items": "File", + "type": "array" + }, + "id": "#AlignR2.cwl/Alignments" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#AlignR2.cwl/output" + } + ], + "baseCommand": [ + "mist_align_R2.py" + ], + "class": "CommandLineTool", + "id": "#AlignR2.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "prefix": "--umi-option" + }, + "type": [ + "null", + "int" + ], + "id": "#AnnotateMolecules.cwl/AbSeq_UMI" + }, + { + "inputBinding": { + "prefix": "--run-metadata" + }, + "type": "File", + "id": "#AnnotateMolecules.cwl/Run_Metadata" + }, + { + "inputBinding": { + "prefix": "--use-dbec" + }, + "type": [ + "null", + "boolean" + ], + "id": "#AnnotateMolecules.cwl/Use_DBEC" + }, + { + "inputBinding": { + "prefix": "--valid-annot" + }, + "type": "File", + "id": "#AnnotateMolecules.cwl/Valids" + } + ], + "requirements": [ + ], + "outputs": [ + { + "outputBinding": { + "glob": "*_GeneStatus.csv.*" + }, + "type": "File", + "id": "#AnnotateMolecules.cwl/Gene_Status_List" + }, + { + "outputBinding": { + "glob": "stats.json", + "loadContents": true, + "outputEval": "$(JSON.parse(self[0].contents).max_count)\n" + }, + "type": "int", + "id": "#AnnotateMolecules.cwl/Max_Count" + }, + { + "outputBinding": { + "glob": "*_Annotation_Molecule.csv.*" + }, + "type": "File", + "id": "#AnnotateMolecules.cwl/Mol_Annot_List" + }, + { + "outputBinding": { + "glob": "stats.json", + "loadContents": true, + "outputEval": "$(JSON.parse(self[0].contents).total_molecules)\n" + }, + "type": "int", + "id": "#AnnotateMolecules.cwl/Total_Molecules" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#AnnotateMolecules.cwl/output" + } + ], + "baseCommand": [ + "mist_annotate_molecules.py" + ], + "class": "CommandLineTool", + "id": "#AnnotateMolecules.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "prefix": "--filter-metrics", + "itemSeparator": "," + }, + "type": [ + { + "items": [ + "null", + "File" + ], + "type": "array" + } + ], + "id": "#AnnotateR1.cwl/Filter_Metrics" + }, + { + "inputBinding": { + "prefix": "--R1" + }, + "type": "File", + "id": "#AnnotateR1.cwl/R1" + }, + { + "inputBinding": { + "prefix": "--run-metadata" + }, + "type": "File", + "id": "#AnnotateR1.cwl/Run_Metadata" + } + ], + "requirements": [ + { + "ramMin": 2000, + "class": "ResourceRequirement" + } + ], + "outputs": [ + { + "outputBinding": { + "glob": "*_Annotation_R1.csv.gz" + }, + "type": "File", + "id": "#AnnotateR1.cwl/Annotation_R1" + }, + { + "outputBinding": { + "glob": "*_R1_error_count_table.npy" + }, + "type": "File", + "id": "#AnnotateR1.cwl/R1_error_count_table" + }, + { + "outputBinding": { + "glob": "*_R1_read_count_breakdown.json" + }, + "type": "File", + "id": "#AnnotateR1.cwl/R1_read_count_breakdown" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#AnnotateR1.cwl/output" + } + ], + "baseCommand": [ + "mist_annotate_R1.py" + ], + "class": "CommandLineTool", + "id": "#AnnotateR1.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "prefix": "--extra-seqs" + }, + "type": [ + "null", + "File" + ], + "id": "#AnnotateR2.cwl/Extra_Seqs" + }, + { + "inputBinding": { + "prefix": "--gtf" + }, + "type": [ + "null", + "File" + ], + "id": "#AnnotateR2.cwl/GTF_Annotation" + }, + { + "inputBinding": { + "prefix": "--R2-zip" + }, + "type": "File", + "id": "#AnnotateR2.cwl/R2_zip" + }, + { + "inputBinding": { + "prefix": "--run-metadata" + }, + "type": "File", + "id": "#AnnotateR2.cwl/Run_Metadata" + }, + { + "inputBinding": { + "prefix": "--transcript-length" + }, + "type": [ + "null", + "File" + ], + "id": "#AnnotateR2.cwl/Transcript_Length" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "outputs": [ + { + "outputBinding": { + "glob": "*Annotation_R2.csv.gz" + }, + "type": "File", + "id": "#AnnotateR2.cwl/Annot_R2" + }, + { + "outputBinding": { + "glob": "*-annot.gtf" + }, + "type": [ + "null", + "File" + ], + "id": "#AnnotateR2.cwl/GTF" + }, + { + "outputBinding": { + "glob": "*mapping_R2.BAM" + }, + "type": "File", + "id": "#AnnotateR2.cwl/R2_Bam" + }, + { + "outputBinding": { + "glob": "*_picard_quality_metrics.csv.gz" + }, + "type": "File", + "id": "#AnnotateR2.cwl/R2_Quality_Metrics" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#AnnotateR2.cwl/output" + } + ], + "baseCommand": [ + "mist_annotate_R2.py" + ], + "class": "CommandLineTool", + "id": "#AnnotateR2.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "prefix": "--umi-option" + }, + "type": [ + "null", + "int" + ], + "id": "#AnnotateReads.cwl/AbSeq_UMI" + }, + { + "inputBinding": { + "prefix": "--extra-seqs", + "itemSeparator": "," + }, + "type": [ + "null", + "File" + ], + "id": "#AnnotateReads.cwl/Extra_Seqs" + }, + { + "type": { + "items": [ + "null", + "File" + ], + "type": "array" + }, + "id": "#AnnotateReads.cwl/Filter_Metrics" + }, + { + "inputBinding": { + "prefix": "--putative-cell-call" + }, + "type": [ + "null", + "int" + ], + "id": "#AnnotateReads.cwl/Putative_Cell_Call" + }, + { + "type": { + "items": "File", + "type": "array" + }, + "id": "#AnnotateReads.cwl/R1_Annotation" + }, + { + "type": { + "items": "File", + "type": "array" + }, + "id": "#AnnotateReads.cwl/R1_error_count_table" + }, + { + "type": { + "items": "File", + "type": "array" + }, + "id": "#AnnotateReads.cwl/R1_read_count_breakdown" + }, + { + "type": { + "items": "File", + "type": "array" + }, + "id": "#AnnotateReads.cwl/R2_Annotation" + }, + { + "type": { + "items": "File", + "type": "array" + }, + "id": "#AnnotateReads.cwl/R2_Quality_Metrics" + }, + { + "inputBinding": { + "prefix": "--run-metadata" + }, + "type": "File", + "id": "#AnnotateReads.cwl/Run_Metadata" + }, + { + "inputBinding": { + "prefix": "--target-gene-mapping" + }, + "type": [ + "null", + "File" + ], + "id": "#AnnotateReads.cwl/Target_Gene_Mapping" + } + ], + "requirements": [ + { + "class": "InitialWorkDirRequirement", + "listing": [ + { + "writable": false, + "entry": "${\n function getPaths(inputs, attribute) {\n var fp_arr = []\n for (var i = 0; i < inputs[attribute].length; i++)\n {\n fp_arr.push(inputs[attribute][i].path);\n }\n return fp_arr;\n }\n var paths = {}\n paths['annotR1'] = getPaths(inputs, 'R1_Annotation')\n paths['R1_error_count_table'] = getPaths(inputs, 'R1_error_count_table')\n paths['R1_read_count_breakdown'] = getPaths(inputs, 'R1_read_count_breakdown')\n paths['annotR2'] = getPaths(inputs, 'R2_Annotation')\n paths['r2_quality_metrics_fps'] = getPaths(inputs, 'R2_Quality_Metrics')\n if(inputs.Filter_Metrics[0] != null){\n paths['filtering_stat_files'] = getPaths(inputs, 'Filter_Metrics')\n }\n var paths_json = JSON.stringify(paths);\n return paths_json;\n}", + "entryname": "manifest.json" + } + ] + }, + { + "class": "InlineJavascriptRequirement" + }, + { + "envDef": [ + { + "envName": "CORES_ALLOCATED_PER_CWL_PROCESS", + "envValue": "4" + } + ], + "class": "EnvVarRequirement" + } + ], + "outputs": [ + { + "outputBinding": { + "glob": "*_Annotation_Read.csv.gz" + }, + "type": [ + "null", + "File" + ], + "id": "#AnnotateReads.cwl/Annotation_Read" + }, + { + "outputBinding": { + "glob": "*read1_error_rate_archive*" + }, + "type": "File", + "id": "#AnnotateReads.cwl/Read1_error_rate" + }, + { + "outputBinding": { + "glob": "*_SeqMetrics.csv.gz" + }, + "type": "File", + "id": "#AnnotateReads.cwl/Seq_Metrics" + }, + { + "outputBinding": { + "glob": "*Sorted_Valid_Reads.csv.*" + }, + "type": { + "items": "File", + "type": "array" + }, + "id": "#AnnotateReads.cwl/Valid_Reads" + }, + { + "outputBinding": { + "glob": "num_vdj_reads.json", + "loadContents": true, + "outputEval": "${ if (!self[0]) { return 0; } return parseInt(JSON.parse(self[0].contents).BCR); }" + }, + "type": "int", + "id": "#AnnotateReads.cwl/num_valid_ig_reads" + }, + { + "outputBinding": { + "glob": "num_vdj_reads.json", + "loadContents": true, + "outputEval": "${ if (!self[0]) { return 0; } return parseInt(JSON.parse(self[0].contents).TCR); }" + }, + "type": "int", + "id": "#AnnotateReads.cwl/num_valid_tcr_reads" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#AnnotateReads.cwl/output" + }, + { + "outputBinding": { + "glob": "*_VDJ_IG_Valid_Reads.fastq.gz" + }, + "type": [ + "null", + "File" + ], + "id": "#AnnotateReads.cwl/validIgReads" + }, + { + "outputBinding": { + "glob": "*_VDJ_TCR_Valid_Reads.fastq.gz" + }, + "type": [ + "null", + "File" + ], + "id": "#AnnotateReads.cwl/validTcrReads" + } + ], + "baseCommand": [ + "mist_annotate_reads.py" + ], + "class": "CommandLineTool", + "id": "#AnnotateReads.cwl" + }, + { + "inputs": [ + { + "type": { + "items": "File", + "type": "array" + }, + "id": "#BundleLogs.cwl/log_files" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + }, + { + "class": "MultipleInputFeatureRequirement" + } + ], + "outputs": [ + { + "type": "Directory", + "id": "#BundleLogs.cwl/logs_dir" + } + ], + "class": "ExpressionTool", + "expression": "${\n /* shamelly cribbed from https://gist.github.com/jcxplorer/823878 */\n function uuid() {\n var uuid = \"\", i, random;\n for (i = 0; i < 32; i++) {\n random = Math.random() * 16 | 0;\n if (i == 8 || i == 12 || i == 16 || i == 20) {\n uuid += \"-\";\n }\n uuid += (i == 12 ? 4 : (i == 16 ? (random & 3 | 8) : random)).toString(16);\n }\n return uuid;\n }\n var listing = [];\n for (var i = 0; i < inputs.log_files.length; i++) {\n var log_file = inputs.log_files[i];\n log_file.basename = uuid() + \"-\" + log_file.basename;\n listing.push(log_file);\n }\n return ({\n logs_dir: {\n class: \"Directory\",\n basename: \"Logs\",\n listing: listing\n }\n });\n}", + "id": "#BundleLogs.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "position": 0 + }, + "type": [ + "null", + "File" + ], + "id": "#Cell_Classifier.cwl/molsPerCellMatrix" + } + ], + "requirements": [ + ], + "outputs": [ + { + "outputBinding": { + "glob": "*cell_type_experimental.csv" + }, + "type": [ + "null", + "File" + ], + "id": "#Cell_Classifier.cwl/cellTypePredictions" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#Cell_Classifier.cwl/log" + } + ], + "baseCommand": [ + "mist_cell_classifier.py" + ], + "class": "CommandLineTool", + "id": "#Cell_Classifier.cwl" + }, + { + "inputs": [ + { + "doc": "The minimum size (megabytes) of a file that should get split into chunks of a size designated in NumRecordsPerSplit\n", + "inputBinding": { + "prefix": "--min-split-size" + }, + "type": [ + "null", + "int" + ], + "id": "#CheckFastqs.cwl/MinChunkSize" + }, + { + "inputBinding": { + "prefix": "--reads", + "itemSeparator": "," + }, + "type": { + "items": "File", + "type": "array" + }, + "id": "#CheckFastqs.cwl/Reads" + }, + { + "inputBinding": { + "prefix": "--subsample" + }, + "type": [ + "null", + "float" + ], + "id": "#CheckFastqs.cwl/Subsample" + }, + { + "inputBinding": { + "prefix": "--subsample-seed" + }, + "type": [ + "null", + "int" + ], + "id": "#CheckFastqs.cwl/Subsample_Seed" + }, + { + "inputBinding": { + "prefix": "--subsample-seed" + }, + "type": [ + "null", + "int" + ], + "id": "#CheckFastqs.cwl/UserInputSubsampleSeed" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "doc": "CheckFastqs does several quality control routines including: (1) ensuring that read pair file names are formatted correctly and contain a read pair mate; (2) disambiguating the \"Subsample Reads\" input and; (3) if not provided, generating a subsampling seed that the downstream instances can use.\n", + "baseCommand": [ + "mist_check_fastqs.py" + ], + "id": "#CheckFastqs.cwl", + "outputs": [ + { + "outputBinding": { + "glob": "bead_version.json", + "loadContents": true, + "outputEval": "$(JSON.parse(self[0].contents).BeadVersion)\n" + }, + "type": { + "items": { + "fields": [ + { + "type": "string", + "name": "#CheckFastqs.cwl/Bead_Version/Library" + }, + { + "type": "string", + "name": "#CheckFastqs.cwl/Bead_Version/bead_version" + } + ], + "type": "record" + }, + "type": "array" + }, + "id": "#CheckFastqs.cwl/Bead_Version" + }, + { + "outputBinding": { + "glob": "fastq_read_pairs.json", + "loadContents": true, + "outputEval": "$(JSON.parse(self[0].contents).fastq_read_pairs)\n" + }, + "type": { + "items": { + "fields": [ + { + "type": "string", + "name": "#CheckFastqs.cwl/FastqReadPairs/filename" + }, + { + "type": "string", + "name": "#CheckFastqs.cwl/FastqReadPairs/readFlag" + }, + { + "type": "string", + "name": "#CheckFastqs.cwl/FastqReadPairs/readPairId" + }, + { + "type": "string", + "name": "#CheckFastqs.cwl/FastqReadPairs/library" + }, + { + "type": "string", + "name": "#CheckFastqs.cwl/FastqReadPairs/beadVersion" + } + ], + "type": "record" + }, + "type": "array" + }, + "id": "#CheckFastqs.cwl/FastqReadPairs" + }, + { + "outputBinding": { + "glob": "files_to_skip_split_and_subsample.json", + "loadContents": true, + "outputEval": "$(JSON.parse(self[0].contents).files_to_skip_split_and_subsample)\n" + }, + "type": { + "items": "string", + "type": "array" + }, + "id": "#CheckFastqs.cwl/FilesToSkipSplitAndSubsample" + }, + { + "outputBinding": { + "glob": "fastq_read_pairs.json", + "loadContents": true, + "outputEval": "${\n var obj = JSON.parse(self[0].contents);\n var libraries = [];\n var pairs = obj.fastq_read_pairs\n for (var i in pairs){\n if (pairs[i][\"readFlag\"] == \"R1\"){\n if (libraries.indexOf(pairs[i][\"library\"]) == -1){ \n libraries.push(pairs[i][\"library\"]);\n }\n }\n }\n libraries.sort();\n return(libraries.toString())\n}\n" + }, + "type": [ + "null", + "string" + ], + "id": "#CheckFastqs.cwl/Libraries" + }, + { + "outputBinding": { + "outputEval": "${ \n var reads = []; \n var files = inputs.Reads\n for (var i in files){\n reads.push(files[i][\"basename\"]);\n }\n reads.sort();\n return(reads)\n}\n" + }, + "type": [ + "null", + { + "items": "string", + "type": "array" + } + ], + "id": "#CheckFastqs.cwl/ReadsList" + }, + { + "outputBinding": { + "glob": "subsampling_info.json", + "loadContents": true, + "outputEval": "$(JSON.parse(self[0].contents).subsampling_seed)\n" + }, + "type": "int", + "id": "#CheckFastqs.cwl/SubsampleSeed" + }, + { + "outputBinding": { + "glob": "subsampling_info.json", + "loadContents": true, + "outputEval": "$(JSON.parse(self[0].contents).subsampling_ratio)\n" + }, + "type": "float", + "id": "#CheckFastqs.cwl/SubsamplingRatio" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#CheckFastqs.cwl/log" + } + ], + "class": "CommandLineTool" + }, + { + "inputs": [ + { + "inputBinding": { + "prefix": "--abseq-reference", + "itemSeparator": "," + }, + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#CheckReference.cwl/AbSeq_Reference" + }, + { + "inputBinding": { + "prefix": "--putative-cell-call" + }, + "type": [ + "null", + "int" + ], + "id": "#CheckReference.cwl/Putative_Cell_Call" + }, + { + "inputBinding": { + "prefix": "--reference", + "itemSeparator": "," + }, + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#CheckReference.cwl/Reference" + }, + { + "inputBinding": { + "prefix": "--run-metadata" + }, + "type": "File", + "id": "#CheckReference.cwl/Run_Metadata" + }, + { + "inputBinding": { + "prefix": "--supplemental-reference", + "itemSeparator": "," + }, + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#CheckReference.cwl/Supplemental_Reference" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "outputs": [ + { + "outputBinding": { + "glob": "combined_extra_seq.fasta" + }, + "type": [ + "null", + "File" + ], + "id": "#CheckReference.cwl/Extra_Seqs" + }, + { + "outputBinding": { + "glob": "full-gene-list.json" + }, + "type": [ + "null", + "File" + ], + "id": "#CheckReference.cwl/Full_Genes" + }, + { + "outputBinding": { + "glob": "*gtf", + "outputEval": "${\n // get the WTA modified GTF with extra seqs\n if (self.length == 1) {\n return self;\n // there is no modified GTF\n } else if (self.length == 0) {\n // if Reference is null (i.e. AbSeq_Reference only), return no GTF\n if (inputs.Reference === null) {\n return null;\n } else {\n // get the original WTA GTF without extra seqs\n for (var i = 0; i < inputs.Reference.length; i++) {\n if (inputs.Reference[i].basename.toLowerCase().indexOf('gtf') !== -1) {\n return inputs.Reference[i];\n }\n }\n // return no GTF for Targeted\n return null\n }\n }\n}\n" + }, + "type": [ + "null", + "File" + ], + "id": "#CheckReference.cwl/GTF" + }, + { + "outputBinding": { + "glob": "*-annot.*", + "outputEval": "${\n if (self.length == 1) { // Targeted\n return self;\n } else if (self.length == 0){ // WTA without extra seqs or targets\n for (var i = 0; i < inputs.Reference.length; i++) {\n if (inputs.Reference[i].basename.toLowerCase().indexOf('tar.gz') !== -1) {\n return inputs.Reference[i];\n }\n }\n return null\n }\n}\n" + }, + "type": "File", + "id": "#CheckReference.cwl/Index" + }, + { + "outputBinding": { + "glob": "target-gene.json" + }, + "type": [ + "null", + "File" + ], + "id": "#CheckReference.cwl/Target_Gene_Mapping" + }, + { + "outputBinding": { + "glob": "transcript_length.json" + }, + "type": [ + "null", + "File" + ], + "id": "#CheckReference.cwl/Transcript_Length" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#CheckReference.cwl/output" + } + ], + "baseCommand": [ + "mist_check_references.py" + ], + "class": "CommandLineTool", + "id": "#CheckReference.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "prefix": "--cell-order" + }, + "type": "File", + "id": "#DensetoSparse.cwl/Cell_Order" + }, + { + "inputBinding": { + "prefix": "--dense-data-table" + }, + "type": [ + "null", + "File" + ], + "id": "#DensetoSparse.cwl/Dense_Data_Table" + }, + { + "inputBinding": { + "prefix": "--gene-list" + }, + "type": "File", + "id": "#DensetoSparse.cwl/Gene_List" + }, + { + "inputBinding": { + "prefix": "--run-metadata" + }, + "type": "File", + "id": "#DensetoSparse.cwl/Run_Metadata" + } + ], + "requirements": [ + ], + "outputs": [ + { + "outputBinding": { + "glob": "*.csv.gz" + }, + "type": "File", + "id": "#DensetoSparse.cwl/Data_Tables" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#DensetoSparse.cwl/output" + } + ], + "baseCommand": [ + "mist_dense_to_sparse.py" + ], + "class": "CommandLineTool", + "id": "#DensetoSparse.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "position": 1 + }, + "type": [ + "null", + "File" + ], + "id": "#DensetoSparseFile.cwl/GDT_cell_order" + } + ], + "requirements": [ + ], + "stdout": "cell_order.json", + "outputs": [ + { + "type": "stdout", + "id": "#DensetoSparseFile.cwl/Cell_Order" + } + ], + "baseCommand": "cat", + "id": "#DensetoSparseFile.cwl", + "class": "CommandLineTool" + }, + { + "inputs": [ + { + "inputBinding": { + "prefix": "--full-gene-list" + }, + "type": [ + "null", + "File" + ], + "id": "#GetDataTable.cwl/Full_Genes" + }, + { + "inputBinding": { + "prefix": "--gene-status", + "itemSeparator": "," + }, + "type": { + "items": "File", + "type": "array" + }, + "id": "#GetDataTable.cwl/Gene_Status_List" + }, + { + "inputBinding": { + "prefix": "--max-count", + "itemSeparator": "," + }, + "type": { + "items": "int", + "type": "array" + }, + "id": "#GetDataTable.cwl/Max_Count" + }, + { + "inputBinding": { + "prefix": "--mol-annot", + "itemSeparator": "," + }, + "type": { + "items": "File", + "type": "array" + }, + "id": "#GetDataTable.cwl/Molecule_Annotation_List" + }, + { + "inputBinding": { + "prefix": "--putative-cell-call" + }, + "type": [ + "null", + "int" + ], + "id": "#GetDataTable.cwl/Putative_Cell_Call" + }, + { + "inputBinding": { + "prefix": "--run-metadata" + }, + "type": "File", + "id": "#GetDataTable.cwl/Run_Metadata" + }, + { + "inputBinding": { + "prefix": "--seq-metrics" + }, + "type": "File", + "id": "#GetDataTable.cwl/Seq_Metrics" + }, + { + "inputBinding": { + "prefix": "--tag-names", + "itemSeparator": "," + }, + "type": [ + "null", + { + "items": "string", + "type": "array" + } + ], + "id": "#GetDataTable.cwl/Tag_Names" + }, + { + "type": { + "items": "int", + "type": "array" + }, + "id": "#GetDataTable.cwl/Total_Molecules" + } + ], + "requirements": [ + { + "ramMin": "${return Math.min(Math.max(parseInt(inputs.Total_Molecules.reduce(function(a, b) { return a + b; }, 0) / 4000), 32000), 768000);}", + "class": "ResourceRequirement" + }, + { + "class": "InlineJavascriptRequirement" + } + ], + "outputs": [ + { + "outputBinding": { + "glob": "metrics-files.tar.gz" + }, + "type": "File", + "id": "#GetDataTable.cwl/Annot_Files" + }, + { + "outputBinding": { + "glob": "Annotations/*_Bioproduct_Stats.csv" + }, + "type": [ + "null", + "File" + ], + "id": "#GetDataTable.cwl/Bioproduct_Stats" + }, + { + "outputBinding": { + "glob": "Cell_Label_Filtering/*.png" + }, + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#GetDataTable.cwl/Cell_Label_Filter" + }, + { + "outputBinding": { + "glob": "cell_order.json" + }, + "type": "File", + "id": "#GetDataTable.cwl/Cell_Order" + }, + { + "outputBinding": { + "glob": "*_Annotation_Molecule_corrected.csv.gz" + }, + "type": "File", + "id": "#GetDataTable.cwl/Corrected_Molecular_Annotation" + }, + { + "outputBinding": { + "glob": "*PerCell_Dense.csv.gz" + }, + "type": { + "items": "File", + "type": "array" + }, + "id": "#GetDataTable.cwl/Dense_Data_Tables" + }, + { + "outputBinding": { + "glob": "*PerCell_Unfiltered_Dense.csv.gz" + }, + "type": { + "items": "File", + "type": "array" + }, + "id": "#GetDataTable.cwl/Dense_Data_Tables_Unfiltered" + }, + { + "outputBinding": { + "glob": "*_Expression_Data.st.gz" + }, + "type": "File", + "id": "#GetDataTable.cwl/Expression_Data" + }, + { + "outputBinding": { + "glob": "*_Expression_Data_Unfiltered.st.gz" + }, + "type": [ + "null", + "File" + ], + "id": "#GetDataTable.cwl/Expression_Data_Unfiltered" + }, + { + "outputBinding": { + "glob": "gene_list.json" + }, + "type": "File", + "id": "#GetDataTable.cwl/Gene_List" + }, + { + "outputBinding": { + "glob": "Annotations/*_Annotation_Molecule.csv.gz" + }, + "type": "File", + "id": "#GetDataTable.cwl/Molecular_Annotation" + }, + { + "outputBinding": { + "glob": "Cell_Label_Filtering/*_Protein_Aggregates_Experimental.csv" + }, + "type": [ + "null", + "File" + ], + "id": "#GetDataTable.cwl/Protein_Aggregates_Experimental" + }, + { + "outputBinding": { + "glob": "Cell_Label_Filtering/*_Putative_Cells_Origin.csv" + }, + "type": [ + "null", + "File" + ], + "id": "#GetDataTable.cwl/Putative_Cells_Origin" + }, + { + "outputBinding": { + "glob": "Annotations/*_Annotation_Molecule_Trueno.csv" + }, + "type": [ + "null", + "File" + ], + "id": "#GetDataTable.cwl/Tag_Annotation" + }, + { + "outputBinding": { + "glob": "Trueno/*_Calls.csv" + }, + "type": [ + "null", + "File" + ], + "id": "#GetDataTable.cwl/Tag_Calls" + }, + { + "outputBinding": { + "glob": "Trueno/*csv" + }, + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#GetDataTable.cwl/Trueno_out" + }, + { + "outputBinding": { + "glob": "Trueno/*zip" + }, + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#GetDataTable.cwl/Trueno_zip" + }, + { + "outputBinding": { + "glob": "Annotations/*_UMI_Adjusted_CellLabel_Stats.csv" + }, + "type": [ + "null", + "File" + ], + "id": "#GetDataTable.cwl/UMI_Adjusted_CellLabel_Stats" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#GetDataTable.cwl/output" + } + ], + "baseCommand": [ + "mist_get_datatables.py" + ], + "class": "CommandLineTool", + "id": "#GetDataTable.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "position": 1 + }, + "type": "File", + "id": "#IndexBAM.cwl/BamFile" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "stdout": "samtools_index.log", + "outputs": [ + { + "outputBinding": { + "glob": "*.bai" + }, + "type": "File", + "id": "#IndexBAM.cwl/Index" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#IndexBAM.cwl/log" + } + ], + "baseCommand": [ + "samtools", + "index" + ], + "id": "#IndexBAM.cwl", + "arguments": [ + { + "position": 2, + "valueFrom": "${\n return inputs.BamFile.basename + \".bai\"\n}" + } + ], + "class": "CommandLineTool" + }, + { + "inputs": [], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "outputs": [ + { + "type": [ + "null", + "int" + ], + "id": "#InternalSettings.cwl/AbSeq_UMI" + }, + { + "type": [ + "null", + "int" + ], + "id": "#InternalSettings.cwl/Barcode_Num" + }, + { + "type": [ + "null", + "File" + ], + "id": "#InternalSettings.cwl/Extra_Seqs" + }, + { + "type": [ + "null", + "int" + ], + "id": "#InternalSettings.cwl/Label_Version" + }, + { + "type": [ + "null", + "int" + ], + "id": "#InternalSettings.cwl/MinChunkSize" + }, + { + "type": [ + "null", + "long" + ], + "id": "#InternalSettings.cwl/NumRecordsPerSplit" + }, + { + "type": [ + "null", + "boolean" + ], + "id": "#InternalSettings.cwl/Read_Filter_Off" + }, + { + "type": [ + "null", + "string" + ], + "id": "#InternalSettings.cwl/Seq_Run" + }, + { + "type": [ + "null", + "float" + ], + "id": "#InternalSettings.cwl/Subsample_Tags" + }, + { + "type": [ + "null", + "boolean" + ], + "id": "#InternalSettings.cwl/Target_analysis" + }, + { + "type": [ + "null", + "boolean" + ], + "id": "#InternalSettings.cwl/Use_DBEC" + }, + { + "type": [ + "null", + "float" + ], + "id": "#InternalSettings.cwl/VDJ_JGene_Evalue" + }, + { + "type": [ + "null", + "float" + ], + "id": "#InternalSettings.cwl/VDJ_VGene_Evalue" + } + ], + "class": "ExpressionTool", + "expression": "${\n var internalInputs = [\n '_Label_Version',\n '_Read_Filter_Off',\n '_Barcode_Num',\n '_Seq_Run',\n '_AbSeq_UMI',\n '_Use_DBEC',\n '_Extra_Seqs',\n '_MinChunkSize',\n '_NumRecordsPerSplit',\n '_Target_analysis',\n '_Subsample_Tags',\n '_VDJ_VGene_Evalue',\n '_VDJ_JGene_Evalue',\n ];\n var internalOutputs = {}\n for (var i = 0; i < internalInputs.length; i++) {\n var internalInput = internalInputs[i];\n var internalOutput = internalInput.slice(1); // remove leading underscore\n if (inputs.hasOwnProperty(internalInput)) {\n internalOutputs[internalOutput] = inputs[internalInput]; // if input specified, redirect to output\n } else {\n internalOutputs[internalOutput] = null; // if input not specified, provide a null\n }\n }\n return internalOutputs;\n}", + "id": "#InternalSettings.cwl" + }, + { + "inputs": [ + { + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#main/AbSeq_Reference", + "label": "AbSeq Reference" + }, + { + "doc": "Determine putative cells using only the basic algorithm (minimum second derivative along the cumulative reads curve). The refined algorithm attempts to remove false positives and recover false negatives, but may not be ideal for certain complex mixtures of cell types. Does not apply if Exact Cell Count is set.", + "type": [ + "null", + "boolean" + ], + "id": "#main/Basic_Algo_Only", + "label": "Disable Refined Putative Cell Calling" + }, + { + "doc": "Set a specific number (>=1) of cells as putative, based on those with the highest error-corrected read count", + "type": [ + "null", + "int" + ], + "id": "#main/Exact_Cell_Count", + "label": "Exact Cell Count" + }, + { + "doc": "Specify the data to be used for putative cell calling. mRNA is the default selected option. AbSeq (Experimental) is for troubleshooting only.", + "type": [ + "null", + { + "symbols": [ + "#main/Putative_Cell_Call/Putative_Cell_Call/mRNA", + "#main/Putative_Cell_Call/Putative_Cell_Call/AbSeq_Experimental" + ], + "type": "enum", + "name": "#main/Putative_Cell_Call/Putative_Cell_Call" + } + ], + "id": "#main/Putative_Cell_Call", + "label": "Putative Cell Calling" + }, + { + "type": { + "items": "File", + "type": "array" + }, + "id": "#main/Reads", + "label": "Reads" + }, + { + "doc": "A fasta file containing the mRNA panel amplicon targets used in the experiment", + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#main/Reference", + "label": "Reference" + }, + { + "doc": "This is a name for output files, for example Experiment1_Metrics_Summary.csv. Default if left empty is to name run based on a library. Any non-alpha numeric characters will be changed to a hyphen.", + "type": [ + "null", + "string" + ], + "id": "#main/Run_Name", + "label": "Run Name" + }, + { + "doc": "The sample multiplexing kit version. This option should only be set for a multiplexed experiment.", + "type": [ + "null", + { + "symbols": [ + "#main/Sample_Tags_Version/Sample_Tags_Version/human", + "#main/Sample_Tags_Version/Sample_Tags_Version/hs", + "#main/Sample_Tags_Version/Sample_Tags_Version/mouse", + "#main/Sample_Tags_Version/Sample_Tags_Version/mm", + "#main/Sample_Tags_Version/Sample_Tags_Version/custom" + ], + "type": "enum", + "name": "#main/Sample_Tags_Version/Sample_Tags_Version" + } + ], + "id": "#main/Sample_Tags_Version", + "label": "Sample Tags Version" + }, + { + "doc": "Any number of reads >1 or a fraction between 0 < n < 1 to indicate the percentage of reads to subsample.\n", + "type": [ + "null", + "float" + ], + "id": "#main/Subsample", + "label": "Subsample Reads" + }, + { + "doc": "For use when replicating a previous subsampling run only. Obtain the seed generated from the log file for the SplitFastQ node.\n", + "type": [ + "null", + "int" + ], + "id": "#main/Subsample_seed", + "label": "Subsample Seed" + }, + { + "doc": "Specify the Sample Tag number followed by - (hyphen) and a sample name to appear in the output files. For example: 4-Ramos. Should be alpha numeric, with + - and _ allowed. Any special characters: &, (), [], {}, <>, ?, | will be corrected to underscores. \n", + "type": [ + "null", + { + "items": "string", + "type": "array" + } + ], + "id": "#main/Tag_Names", + "label": "Tag Names" + }, + { + "doc": "The VDJ species and chain types. This option should only be set for VDJ experiment.", + "type": [ + "null", + { + "symbols": [ + "#main/VDJ_Version/VDJ_Version/human", + "#main/VDJ_Version/VDJ_Version/hs", + "#main/VDJ_Version/VDJ_Version/mouse", + "#main/VDJ_Version/VDJ_Version/mm", + "#main/VDJ_Version/VDJ_Version/humanBCR", + "#main/VDJ_Version/VDJ_Version/humanTCR", + "#main/VDJ_Version/VDJ_Version/mouseBCR", + "#main/VDJ_Version/VDJ_Version/mouseTCR" + ], + "type": "enum", + "name": "#main/VDJ_Version/VDJ_Version" + } + ], + "id": "#main/VDJ_Version", + "label": "VDJ Species Version" + } + ], + "requirements": [ + { + "class": "ScatterFeatureRequirement" + }, + { + "class": "MultipleInputFeatureRequirement" + }, + { + "class": "SubworkflowFeatureRequirement" + }, + { + "class": "StepInputExpressionRequirement" + }, + { + "class": "InlineJavascriptRequirement" + } + ], + "doc": "The BD Rhapsody\u2122 assays are used to create sequencing libraries from single cell transcriptomes.\n\nAfter sequencing, the analysis pipeline takes the FASTQ files and a reference file for gene alignment. The pipeline generates molecular counts per cell, read counts per cell, metrics, and an alignment file.", + "label": "BD Rhapsody\u2122 Targeted Analysis Pipeline", + "steps": [ + { + "run": "#AddtoBam.cwl", + "scatter": [ + "#main/AddtoBam/R2_Bam" + ], + "in": [ + { + "source": "#main/AnnotateR1/Annotation_R1", + "id": "#main/AddtoBam/Annotation_R1" + }, + { + "source": "#main/Dense_to_Sparse_File/Cell_Order", + "id": "#main/AddtoBam/Cell_Order" + }, + { + "source": "#main/GetDataTable/Corrected_Molecular_Annotation", + "id": "#main/AddtoBam/Molecular_Annotation" + }, + { + "source": "#main/AnnotateR2/R2_Bam", + "id": "#main/AddtoBam/R2_Bam" + }, + { + "source": "#main/Metadata_Settings/Run_Metadata", + "id": "#main/AddtoBam/Run_Metadata" + }, + { + "source": "#main/GetDataTable/Tag_Calls", + "id": "#main/AddtoBam/Tag_Calls" + }, + { + "source": "#main/CheckReference/Target_Gene_Mapping", + "id": "#main/AddtoBam/Target_Gene_Mapping" + } + ], + "requirements": [ + { + "ramMin": 16000, + "class": "ResourceRequirement" + } + ], + "id": "#main/AddtoBam", + "out": [ + "#main/AddtoBam/Annotated_Bam", + "#main/AddtoBam/output" + ] + }, + { + "run": "#AlignR2.cwl", + "out": [ + "#main/AlignR2/Alignments", + "#main/AlignR2/output" + ], + "requirements": [ + { + "coresMin": 8, + "ramMin": 4000, + "class": "ResourceRequirement" + } + ], + "id": "#main/AlignR2", + "in": [ + { + "source": "#main/CheckReference/Extra_Seqs", + "id": "#main/AlignR2/Extra_Seqs" + }, + { + "source": "#main/CheckReference/Index", + "id": "#main/AlignR2/Index" + }, + { + "source": "#main/QualityFilterOuter/R2", + "id": "#main/AlignR2/R2" + }, + { + "source": "#main/Metadata_Settings/Run_Metadata", + "id": "#main/AlignR2/Run_Metadata" + } + ] + }, + { + "run": "#AnnotateMolecules.cwl", + "scatter": [ + "#main/AnnotateMolecules/Valids" + ], + "in": [ + { + "source": "#main/Internal_Settings/AbSeq_UMI", + "id": "#main/AnnotateMolecules/AbSeq_UMI" + }, + { + "source": "#main/Metadata_Settings/Run_Metadata", + "id": "#main/AnnotateMolecules/Run_Metadata" + }, + { + "source": "#main/Internal_Settings/Use_DBEC", + "id": "#main/AnnotateMolecules/Use_DBEC" + }, + { + "source": "#main/AnnotateReads/Valid_Reads", + "id": "#main/AnnotateMolecules/Valids" + } + ], + "requirements": [ + { + "ramMin": 32000, + "class": "ResourceRequirement" + } + ], + "id": "#main/AnnotateMolecules", + "out": [ + "#main/AnnotateMolecules/Mol_Annot_List", + "#main/AnnotateMolecules/Gene_Status_List", + "#main/AnnotateMolecules/Max_Count", + "#main/AnnotateMolecules/Total_Molecules", + "#main/AnnotateMolecules/output" + ] + }, + { + "id": "#main/AnnotateR1", + "out": [ + "#main/AnnotateR1/Annotation_R1", + "#main/AnnotateR1/R1_error_count_table", + "#main/AnnotateR1/R1_read_count_breakdown", + "#main/AnnotateR1/output" + ], + "run": "#AnnotateR1.cwl", + "scatter": [ + "#main/AnnotateR1/R1" + ], + "in": [ + { + "source": "#main/QualityFilterOuter/Filter_Metrics", + "id": "#main/AnnotateR1/Filter_Metrics" + }, + { + "source": "#main/QualityFilterOuter/R1", + "id": "#main/AnnotateR1/R1" + }, + { + "source": "#main/Metadata_Settings/Run_Metadata", + "id": "#main/AnnotateR1/Run_Metadata" + } + ] + }, + { + "run": "#AnnotateR2.cwl", + "scatter": [ + "#main/AnnotateR2/R2_zip" + ], + "in": [ + { + "source": "#main/CheckReference/Extra_Seqs", + "id": "#main/AnnotateR2/Extra_Seqs" + }, + { + "source": "#main/CheckReference/GTF", + "id": "#main/AnnotateR2/GTF_Annotation" + }, + { + "source": "#main/AlignR2/Alignments", + "id": "#main/AnnotateR2/R2_zip" + }, + { + "source": "#main/Metadata_Settings/Run_Metadata", + "id": "#main/AnnotateR2/Run_Metadata" + }, + { + "source": "#main/CheckReference/Transcript_Length", + "id": "#main/AnnotateR2/Transcript_Length" + } + ], + "requirements": [ + { + "ramMin": 4000, + "class": "ResourceRequirement" + } + ], + "id": "#main/AnnotateR2", + "out": [ + "#main/AnnotateR2/Annot_R2", + "#main/AnnotateR2/R2_Bam", + "#main/AnnotateR2/GTF", + "#main/AnnotateR2/output", + "#main/AnnotateR2/R2_Quality_Metrics" + ] + }, + { + "run": "#AnnotateReads.cwl", + "out": [ + "#main/AnnotateReads/Seq_Metrics", + "#main/AnnotateReads/Valid_Reads", + "#main/AnnotateReads/Read1_error_rate", + "#main/AnnotateReads/Annotation_Read", + "#main/AnnotateReads/output", + "#main/AnnotateReads/validTcrReads", + "#main/AnnotateReads/validIgReads", + "#main/AnnotateReads/num_valid_tcr_reads", + "#main/AnnotateReads/num_valid_ig_reads" + ], + "requirements": [ + { + "ramMin": 32000, + "class": "ResourceRequirement" + } + ], + "id": "#main/AnnotateReads", + "in": [ + { + "source": "#main/Internal_Settings/AbSeq_UMI", + "id": "#main/AnnotateReads/AbSeq_UMI" + }, + { + "source": "#main/CheckReference/Extra_Seqs", + "id": "#main/AnnotateReads/Extra_Seqs" + }, + { + "source": "#main/QualityFilterOuter/Filter_Metrics", + "id": "#main/AnnotateReads/Filter_Metrics" + }, + { + "source": "#main/Putative_Cell_Calling_Settings/Putative_Cell_Call", + "id": "#main/AnnotateReads/Putative_Cell_Call" + }, + { + "source": "#main/AnnotateR1/Annotation_R1", + "id": "#main/AnnotateReads/R1_Annotation" + }, + { + "source": "#main/AnnotateR1/R1_error_count_table", + "id": "#main/AnnotateReads/R1_error_count_table" + }, + { + "source": "#main/AnnotateR1/R1_read_count_breakdown", + "id": "#main/AnnotateReads/R1_read_count_breakdown" + }, + { + "source": "#main/AnnotateR2/Annot_R2", + "id": "#main/AnnotateReads/R2_Annotation" + }, + { + "source": "#main/AnnotateR2/R2_Quality_Metrics", + "id": "#main/AnnotateReads/R2_Quality_Metrics" + }, + { + "source": "#main/Metadata_Settings/Run_Metadata", + "id": "#main/AnnotateReads/Run_Metadata" + }, + { + "source": "#main/CheckReference/Target_Gene_Mapping", + "id": "#main/AnnotateReads/Target_Gene_Mapping" + } + ] + }, + { + "out": [ + "#main/BundleLogs/logs_dir" + ], + "run": "#BundleLogs.cwl", + "id": "#main/BundleLogs", + "in": [ + { + "source": [ + "#main/AnnotateReads/output", + "#main/AnnotateR1/output", + "#main/AnnotateR2/output", + "#main/CheckReference/output", + "#main/GetDataTable/output", + "#main/Metrics/output", + "#main/AddtoBam/output", + "#main/AnnotateMolecules/output", + "#main/QualityFilterOuter/output", + "#main/CheckFastqs/log", + "#main/SplitAndSubsample/log", + "#main/MergeBAM/log", + "#main/Dense_to_Sparse_Datatable/output", + "#main/Dense_to_Sparse_Datatable_Unfiltered/output", + "#main/IndexBAM/log", + "#main/CellClassifier/log" + ], + "linkMerge": "merge_flattened", + "id": "#main/BundleLogs/log_files" + } + ] + }, + { + "run": "#Cell_Classifier.cwl", + "out": [ + "#main/CellClassifier/cellTypePredictions", + "#main/CellClassifier/log" + ], + "requirements": [ + { + "ramMin": 4000, + "class": "ResourceRequirement" + } + ], + "id": "#main/CellClassifier", + "in": [ + { + "source": "#main/FindDataTableForCellClassifier/molsPerCellMatrixForCellClassifier", + "id": "#main/CellClassifier/molsPerCellMatrix" + } + ] + }, + { + "out": [ + "#main/CheckFastqs/SubsampleSeed", + "#main/CheckFastqs/SubsamplingRatio", + "#main/CheckFastqs/FilesToSkipSplitAndSubsample", + "#main/CheckFastqs/FastqReadPairs", + "#main/CheckFastqs/Bead_Version", + "#main/CheckFastqs/Libraries", + "#main/CheckFastqs/ReadsList", + "#main/CheckFastqs/log" + ], + "run": "#CheckFastqs.cwl", + "id": "#main/CheckFastqs", + "in": [ + { + "source": "#main/Internal_Settings/MinChunkSize", + "id": "#main/CheckFastqs/MinChunkSize" + }, + { + "source": "#main/Reads", + "id": "#main/CheckFastqs/Reads" + }, + { + "source": "#main/Subsample_Settings/Subsample_Reads", + "id": "#main/CheckFastqs/Subsample" + }, + { + "source": "#main/Subsample_Settings/Subsample_Seed", + "id": "#main/CheckFastqs/Subsample_Seed" + } + ] + }, + { + "run": "#CheckReference.cwl", + "out": [ + "#main/CheckReference/Index", + "#main/CheckReference/Extra_Seqs", + "#main/CheckReference/Full_Genes", + "#main/CheckReference/output", + "#main/CheckReference/Transcript_Length", + "#main/CheckReference/GTF", + "#main/CheckReference/Target_Gene_Mapping" + ], + "requirements": [ + { + "ramMin": 1000, + "class": "ResourceRequirement" + } + ], + "id": "#main/CheckReference", + "in": [ + { + "source": "#main/AbSeq_Reference", + "id": "#main/CheckReference/AbSeq_Reference" + }, + { + "source": "#main/Putative_Cell_Calling_Settings/Putative_Cell_Call", + "id": "#main/CheckReference/Putative_Cell_Call" + }, + { + "source": "#main/Reference", + "id": "#main/CheckReference/Reference" + }, + { + "source": "#main/Metadata_Settings/Run_Metadata", + "id": "#main/CheckReference/Run_Metadata" + } + ] + }, + { + "run": "#DensetoSparse.cwl", + "scatter": [ + "#main/Dense_to_Sparse_Datatable/Dense_Data_Table" + ], + "in": [ + { + "source": "#main/Dense_to_Sparse_File/Cell_Order", + "id": "#main/Dense_to_Sparse_Datatable/Cell_Order" + }, + { + "source": "#main/GetDataTable/Dense_Data_Tables", + "id": "#main/Dense_to_Sparse_Datatable/Dense_Data_Table" + }, + { + "source": "#main/GetDataTable/Gene_List", + "id": "#main/Dense_to_Sparse_Datatable/Gene_List" + }, + { + "source": "#main/Metadata_Settings/Run_Metadata", + "id": "#main/Dense_to_Sparse_Datatable/Run_Metadata" + } + ], + "requirements": [ + { + "ramMin": 16000, + "class": "ResourceRequirement" + } + ], + "id": "#main/Dense_to_Sparse_Datatable", + "out": [ + "#main/Dense_to_Sparse_Datatable/Data_Tables", + "#main/Dense_to_Sparse_Datatable/output" + ] + }, + { + "run": "#DensetoSparse.cwl", + "scatter": [ + "#main/Dense_to_Sparse_Datatable_Unfiltered/Dense_Data_Table" + ], + "in": [ + { + "source": "#main/GetDataTable/Cell_Order", + "id": "#main/Dense_to_Sparse_Datatable_Unfiltered/Cell_Order" + }, + { + "source": "#main/GetDataTable/Dense_Data_Tables_Unfiltered", + "id": "#main/Dense_to_Sparse_Datatable_Unfiltered/Dense_Data_Table" + }, + { + "source": "#main/GetDataTable/Gene_List", + "id": "#main/Dense_to_Sparse_Datatable_Unfiltered/Gene_List" + }, + { + "source": "#main/Metadata_Settings/Run_Metadata", + "id": "#main/Dense_to_Sparse_Datatable_Unfiltered/Run_Metadata" + } + ], + "requirements": [ + { + "ramMin": 16000, + "class": "ResourceRequirement" + } + ], + "id": "#main/Dense_to_Sparse_Datatable_Unfiltered", + "out": [ + "#main/Dense_to_Sparse_Datatable_Unfiltered/Data_Tables", + "#main/Dense_to_Sparse_Datatable_Unfiltered/output" + ] + }, + { + "out": [ + "#main/Dense_to_Sparse_File/Cell_Order" + ], + "run": "#DensetoSparseFile.cwl", + "id": "#main/Dense_to_Sparse_File", + "in": [ + { + "source": "#main/GetDataTable/Cell_Order", + "id": "#main/Dense_to_Sparse_File/GDT_cell_order" + } + ] + }, + { + "out": [ + "#main/FindDataTableForCellClassifier/molsPerCellMatrixForCellClassifier" + ], + "run": { + "cwlVersion": "v1.0", + "inputs": [ + { + "type": { + "items": "File", + "type": "array" + }, + "id": "#main/FindDataTableForCellClassifier/c174ddb5-9fdb-4dae-a1c5-b5666a631cc7/dataTables" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "outputs": [ + { + "type": "File", + "id": "#main/FindDataTableForCellClassifier/c174ddb5-9fdb-4dae-a1c5-b5666a631cc7/molsPerCellMatrixForCellClassifier" + } + ], + "id": "#main/FindDataTableForCellClassifier/c174ddb5-9fdb-4dae-a1c5-b5666a631cc7", + "expression": "${\n for (var i = 0; i < inputs.dataTables.length; i++) {\n var dataTable = inputs.dataTables[i];\n if (dataTable.basename.indexOf(\"_RSEC_MolsPerCell.csv\") >= 0) {\n return({molsPerCellMatrixForCellClassifier: dataTable});\n }\n }\n return({molsPerCellMatrixForCellClassifier: null});\n}", + "class": "ExpressionTool" + }, + "id": "#main/FindDataTableForCellClassifier", + "in": [ + { + "source": "#main/Dense_to_Sparse_Datatable/Data_Tables", + "id": "#main/FindDataTableForCellClassifier/dataTables" + } + ] + }, + { + "out": [ + "#main/GetDataTable/Tag_Calls", + "#main/GetDataTable/Molecular_Annotation", + "#main/GetDataTable/Corrected_Molecular_Annotation", + "#main/GetDataTable/Tag_Annotation", + "#main/GetDataTable/Annot_Files", + "#main/GetDataTable/Cell_Label_Filter", + "#main/GetDataTable/Dense_Data_Tables", + "#main/GetDataTable/Dense_Data_Tables_Unfiltered", + "#main/GetDataTable/Expression_Data", + "#main/GetDataTable/Expression_Data_Unfiltered", + "#main/GetDataTable/Bioproduct_Stats", + "#main/GetDataTable/UMI_Adjusted_CellLabel_Stats", + "#main/GetDataTable/Putative_Cells_Origin", + "#main/GetDataTable/Protein_Aggregates_Experimental", + "#main/GetDataTable/Trueno_out", + "#main/GetDataTable/Trueno_zip", + "#main/GetDataTable/output", + "#main/GetDataTable/Cell_Order", + "#main/GetDataTable/Gene_List" + ], + "run": "#GetDataTable.cwl", + "id": "#main/GetDataTable", + "in": [ + { + "source": "#main/CheckReference/Full_Genes", + "id": "#main/GetDataTable/Full_Genes" + }, + { + "source": "#main/AnnotateMolecules/Gene_Status_List", + "id": "#main/GetDataTable/Gene_Status_List" + }, + { + "source": "#main/AnnotateMolecules/Max_Count", + "id": "#main/GetDataTable/Max_Count" + }, + { + "source": "#main/AnnotateMolecules/Mol_Annot_List", + "id": "#main/GetDataTable/Molecule_Annotation_List" + }, + { + "source": "#main/Putative_Cell_Calling_Settings/Putative_Cell_Call", + "id": "#main/GetDataTable/Putative_Cell_Call" + }, + { + "source": "#main/Metadata_Settings/Run_Metadata", + "id": "#main/GetDataTable/Run_Metadata" + }, + { + "source": "#main/AnnotateReads/Seq_Metrics", + "id": "#main/GetDataTable/Seq_Metrics" + }, + { + "source": "#main/Multiplexing_Settings/Tag_Sample_Names", + "id": "#main/GetDataTable/Tag_Names" + }, + { + "source": "#main/AnnotateMolecules/Total_Molecules", + "id": "#main/GetDataTable/Total_Molecules" + } + ] + }, + { + "out": [ + "#main/IndexBAM/Index", + "#main/IndexBAM/log" + ], + "run": "#IndexBAM.cwl", + "id": "#main/IndexBAM", + "in": [ + { + "source": "#main/MergeBAM/Final_Bam", + "id": "#main/IndexBAM/BamFile" + } + ] + }, + { + "out": [ + "#main/Internal_Settings/Read_Filter_Off", + "#main/Internal_Settings/Barcode_Num", + "#main/Internal_Settings/Seq_Run", + "#main/Internal_Settings/AbSeq_UMI", + "#main/Internal_Settings/Use_DBEC", + "#main/Internal_Settings/Extra_Seqs", + "#main/Internal_Settings/MinChunkSize", + "#main/Internal_Settings/NumRecordsPerSplit", + "#main/Internal_Settings/Target_analysis", + "#main/Internal_Settings/Subsample_Tags", + "#main/Internal_Settings/VDJ_VGene_Evalue", + "#main/Internal_Settings/VDJ_JGene_Evalue" + ], + "in": [], + "run": "#InternalSettings.cwl", + "id": "#main/Internal_Settings", + "label": "Internal Settings" + }, + { + "out": [ + "#main/MergeBAM/Final_Bam", + "#main/MergeBAM/log" + ], + "run": "#MergeBAM.cwl", + "id": "#main/MergeBAM", + "in": [ + { + "source": "#main/AddtoBam/Annotated_Bam", + "id": "#main/MergeBAM/BamFiles" + }, + { + "source": "#main/Metadata_Settings/Run_Base_Name", + "id": "#main/MergeBAM/Run_Name" + }, + { + "source": "#main/Multiplexing_Settings/Sample_Tags_Version", + "id": "#main/MergeBAM/Sample_Tags_Version" + } + ] + }, + { + "out": [ + "#main/MergeMultiplex/Multiplex_out" + ], + "run": { + "cwlVersion": "v1.0", + "inputs": [ + { + "type": { + "items": [ + "null", + "File" + ], + "type": "array" + }, + "id": "#main/MergeMultiplex/8e7f752c-1505-4d65-81b3-f91fcd83b679/SampleTag_Files" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "outputs": [ + { + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#main/MergeMultiplex/8e7f752c-1505-4d65-81b3-f91fcd83b679/Multiplex_out" + } + ], + "id": "#main/MergeMultiplex/8e7f752c-1505-4d65-81b3-f91fcd83b679", + "expression": "${\n var fp_array = [];\n for (var i = 0; i < inputs.SampleTag_Files.length; i++) {\n var fp = inputs.SampleTag_Files[i];\n if (fp != null) {\n fp_array.push(fp);\n }\n }\n return({\"Multiplex_out\": fp_array});\n}", + "class": "ExpressionTool" + }, + "id": "#main/MergeMultiplex", + "in": [ + { + "source": [ + "#main/GetDataTable/Trueno_out", + "#main/Metrics/Sample_Tag_Out" + ], + "linkMerge": "merge_flattened", + "id": "#main/MergeMultiplex/SampleTag_Files" + } + ] + }, + { + "out": [ + "#main/Metadata_Settings/Run_Metadata", + "#main/Metadata_Settings/Run_Base_Name" + ], + "run": "#Metadata.cwl", + "id": "#main/Metadata_Settings", + "in": [ + { + "source": "#main/AbSeq_Reference", + "id": "#main/Metadata_Settings/AbSeq_Reference" + }, + { + "valueFrom": "Targeted", + "id": "#main/Metadata_Settings/Assay" + }, + { + "source": "#main/Putative_Cell_Calling_Settings/Basic_Algo_Only", + "id": "#main/Metadata_Settings/Basic_Algo_Only" + }, + { + "source": "#main/CheckFastqs/Bead_Version", + "id": "#main/Metadata_Settings/Bead_Version" + }, + { + "source": "#main/Putative_Cell_Calling_Settings/Exact_Cell_Count", + "id": "#main/Metadata_Settings/Exact_Cell_Count" + }, + { + "source": "#main/CheckFastqs/Libraries", + "id": "#main/Metadata_Settings/Libraries" + }, + { + "valueFrom": "BD Rhapsody Targeted Analysis Pipeline", + "id": "#main/Metadata_Settings/Pipeline_Name" + }, + { + "source": "#main/Version/version", + "id": "#main/Metadata_Settings/Pipeline_Version" + }, + { + "source": "#main/Putative_Cell_Calling_Settings/Putative_Cell_Call", + "id": "#main/Metadata_Settings/Putative_Cell_Call" + }, + { + "source": "#main/CheckFastqs/ReadsList", + "id": "#main/Metadata_Settings/Reads" + }, + { + "source": "#main/Reference", + "id": "#main/Metadata_Settings/Reference" + }, + { + "source": "#main/Name_Settings/Run_Name", + "id": "#main/Metadata_Settings/Run_Name" + }, + { + "source": "#main/Multiplexing_Settings/Tag_Sample_Names", + "id": "#main/Metadata_Settings/Sample_Tag_Names" + }, + { + "source": "#main/Multiplexing_Settings/Sample_Tags_Version", + "id": "#main/Metadata_Settings/Sample_Tags_Version" + }, + { + "source": "#main/Start_Time/Start_Time", + "id": "#main/Metadata_Settings/Start_Time" + }, + { + "source": "#main/Subsample_Settings/Subsample_Reads", + "id": "#main/Metadata_Settings/Subsample" + }, + { + "source": "#main/Subsample_Settings/Subsample_Seed", + "id": "#main/Metadata_Settings/Subsample_Seed" + }, + { + "source": "#main/VDJ_Settings/VDJ_Version", + "id": "#main/Metadata_Settings/VDJ_Version" + } + ] + }, + { + "out": [ + "#main/Metrics/Metrics_Summary", + "#main/Metrics/Metrics_Archive", + "#main/Metrics/output", + "#main/Metrics/Sample_Tag_Out" + ], + "run": "#Metrics.cwl", + "id": "#main/Metrics", + "in": [ + { + "source": "#main/GetDataTable/Annot_Files", + "id": "#main/Metrics/Annot_Files" + }, + { + "source": "#main/AnnotateReads/Read1_error_rate", + "id": "#main/Metrics/Read1_error_rate" + }, + { + "source": "#main/Metadata_Settings/Run_Metadata", + "id": "#main/Metrics/Run_Metadata" + }, + { + "source": "#main/GetDataTable/Trueno_zip", + "id": "#main/Metrics/Sample_Tag_Archives" + }, + { + "source": "#main/Internal_Settings/Seq_Run", + "id": "#main/Metrics/Seq_Run" + }, + { + "source": "#main/GetDataTable/UMI_Adjusted_CellLabel_Stats", + "id": "#main/Metrics/UMI_Adjusted_Stats" + }, + { + "source": "#main/VDJ_Compile_Results/vdjMetricsJson", + "id": "#main/Metrics/vdjMetricsJson" + } + ] + }, + { + "out": [ + "#main/Multiplexing_Settings/Tag_Sample_Names", + "#main/Multiplexing_Settings/Sample_Tags_Version" + ], + "in": [ + { + "source": "#main/Sample_Tags_Version", + "id": "#main/Multiplexing_Settings/_Sample_Tags_Version" + }, + { + "source": "#main/Tag_Names", + "id": "#main/Multiplexing_Settings/_Tag_Sample_Names" + } + ], + "run": "#MultiplexingSettings.cwl", + "id": "#main/Multiplexing_Settings", + "label": "Multiplexing Settings" + }, + { + "out": [ + "#main/Name_Settings/Run_Name" + ], + "in": [ + { + "source": "#main/Run_Name", + "id": "#main/Name_Settings/_Run_Name" + } + ], + "run": "#NameSettings.cwl", + "id": "#main/Name_Settings", + "label": "Name Settings" + }, + { + "out": [ + "#main/PairReadFiles/ReadPairs" + ], + "run": "#PairReadFiles.cwl", + "id": "#main/PairReadFiles", + "in": [ + { + "source": "#main/CheckFastqs/FastqReadPairs", + "id": "#main/PairReadFiles/FastqReadPairs" + }, + { + "source": "#main/SplitAndSubsample/SplitAndSubsampledFastqs", + "id": "#main/PairReadFiles/Reads" + } + ] + }, + { + "out": [ + "#main/Putative_Cell_Calling_Settings/Putative_Cell_Call", + "#main/Putative_Cell_Calling_Settings/Exact_Cell_Count", + "#main/Putative_Cell_Calling_Settings/Basic_Algo_Only" + ], + "in": [ + { + "source": "#main/Basic_Algo_Only", + "id": "#main/Putative_Cell_Calling_Settings/_Basic_Algo_Only" + }, + { + "source": "#main/Exact_Cell_Count", + "id": "#main/Putative_Cell_Calling_Settings/_Exact_Cell_Count" + }, + { + "source": "#main/Putative_Cell_Call", + "id": "#main/Putative_Cell_Calling_Settings/_Putative_Cell_Call" + } + ], + "run": "#PutativeCellSettings.cwl", + "id": "#main/Putative_Cell_Calling_Settings", + "label": "Putative Cell Calling Settings" + }, + { + "out": [ + "#main/QualityFilterOuter/Filter_Metrics", + "#main/QualityFilterOuter/R1", + "#main/QualityFilterOuter/R2", + "#main/QualityFilterOuter/output" + ], + "run": "#QualityFilterOuter.cwl", + "id": "#main/QualityFilterOuter", + "in": [ + { + "source": "#main/Metadata_Settings/Run_Metadata", + "id": "#main/QualityFilterOuter/Run_Metadata" + }, + { + "source": "#main/PairReadFiles/ReadPairs", + "id": "#main/QualityFilterOuter/Split_Read_Pairs" + } + ] + }, + { + "out": [ + "#main/SplitAndSubsample/SplitAndSubsampledFastqs", + "#main/SplitAndSubsample/log" + ], + "run": "#SplitAndSubsample.cwl", + "id": "#main/SplitAndSubsample", + "in": [ + { + "source": "#main/Reads", + "id": "#main/SplitAndSubsample/Fastqs" + }, + { + "source": "#main/CheckFastqs/FilesToSkipSplitAndSubsample", + "id": "#main/SplitAndSubsample/FilesToSkipSplitAndSubsample" + }, + { + "source": "#main/Internal_Settings/NumRecordsPerSplit", + "id": "#main/SplitAndSubsample/NumRecordsPerSplit" + }, + { + "source": "#main/CheckFastqs/SubsamplingRatio", + "id": "#main/SplitAndSubsample/SubsampleRatio" + }, + { + "source": "#main/CheckFastqs/SubsampleSeed", + "id": "#main/SplitAndSubsample/SubsampleSeed" + } + ] + }, + { + "out": [ + "#main/Start_Time/Start_Time" + ], + "run": { + "cwlVersion": "v1.0", + "inputs": [], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "outputs": [ + { + "type": "string", + "id": "#main/Start_Time/dc4e9fd7-92dc-4aca-80ad-76601aaaf6ad/Start_Time" + } + ], + "id": "#main/Start_Time/dc4e9fd7-92dc-4aca-80ad-76601aaaf6ad", + "expression": "${ \n var today = new Date();\n var date = today.toString()\n return ({Start_Time: date});\n} ", + "class": "ExpressionTool" + }, + "id": "#main/Start_Time", + "in": [] + }, + { + "out": [ + "#main/Subsample_Settings/Subsample_Reads", + "#main/Subsample_Settings/Subsample_Seed" + ], + "in": [ + { + "source": "#main/Subsample", + "id": "#main/Subsample_Settings/_Subsample_Reads" + }, + { + "source": "#main/Subsample_seed", + "id": "#main/Subsample_Settings/_Subsample_Seed" + } + ], + "run": "#SubsampleSettings.cwl", + "id": "#main/Subsample_Settings", + "label": "Subsample Settings" + }, + { + "out": [ + "#main/Uncompress_Datatables/Uncompressed_Data_Tables", + "#main/Uncompress_Datatables/Uncompressed_Expression_Matrix" + ], + "run": "#UncompressDatatables.cwl", + "id": "#main/Uncompress_Datatables", + "in": [ + { + "source": "#main/Dense_to_Sparse_Datatable/Data_Tables", + "id": "#main/Uncompress_Datatables/Compressed_Data_Table" + }, + { + "source": "#main/GetDataTable/Expression_Data", + "id": "#main/Uncompress_Datatables/Compressed_Expression_Matrix" + } + ] + }, + { + "out": [ + "#main/VDJ_Assemble_and_Annotate_Contigs_IG/igCalls" + ], + "run": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl", + "id": "#main/VDJ_Assemble_and_Annotate_Contigs_IG", + "in": [ + { + "source": "#main/VDJ_Preprocess_Reads_IG/RSEC_Reads_Fastq", + "id": "#main/VDJ_Assemble_and_Annotate_Contigs_IG/RSEC_Reads_Fastq" + }, + { + "source": "#main/VDJ_Settings/VDJ_Version", + "id": "#main/VDJ_Assemble_and_Annotate_Contigs_IG/VDJ_Version" + }, + { + "source": "#main/VDJ_Preprocess_Reads_IG/num_cores", + "id": "#main/VDJ_Assemble_and_Annotate_Contigs_IG/num_cores" + } + ] + }, + { + "out": [ + "#main/VDJ_Assemble_and_Annotate_Contigs_TCR/tcrCalls" + ], + "run": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl", + "id": "#main/VDJ_Assemble_and_Annotate_Contigs_TCR", + "in": [ + { + "source": "#main/VDJ_Preprocess_Reads_TCR/RSEC_Reads_Fastq", + "id": "#main/VDJ_Assemble_and_Annotate_Contigs_TCR/RSEC_Reads_Fastq" + }, + { + "source": "#main/VDJ_Settings/VDJ_Version", + "id": "#main/VDJ_Assemble_and_Annotate_Contigs_TCR/VDJ_Version" + }, + { + "source": "#main/VDJ_Preprocess_Reads_TCR/num_cores", + "id": "#main/VDJ_Assemble_and_Annotate_Contigs_TCR/num_cores" + } + ] + }, + { + "out": [ + "#main/VDJ_Compile_Results/vdjCellsDatatable", + "#main/VDJ_Compile_Results/vdjCellsDatatableUncorrected", + "#main/VDJ_Compile_Results/vdjDominantContigs", + "#main/VDJ_Compile_Results/vdjUnfilteredContigs", + "#main/VDJ_Compile_Results/vdjMetricsJson", + "#main/VDJ_Compile_Results/vdjMetricsCsv", + "#main/VDJ_Compile_Results/vdjReadsPerCellByChainTypeFigure" + ], + "run": "#VDJ_Compile_Results.cwl", + "id": "#main/VDJ_Compile_Results", + "in": [ + { + "source": "#main/AnnotateReads/Seq_Metrics", + "id": "#main/VDJ_Compile_Results/Seq_Metrics" + }, + { + "source": "#main/CellClassifier/cellTypePredictions", + "id": "#main/VDJ_Compile_Results/cellTypeMapping" + }, + { + "valueFrom": "$([])", + "id": "#main/VDJ_Compile_Results/chainsToIgnore" + }, + { + "source": "#main/Internal_Settings/VDJ_JGene_Evalue", + "id": "#main/VDJ_Compile_Results/evalueJgene" + }, + { + "source": "#main/Internal_Settings/VDJ_VGene_Evalue", + "id": "#main/VDJ_Compile_Results/evalueVgene" + }, + { + "source": "#main/VDJ_GatherIGCalls/gatheredCalls", + "id": "#main/VDJ_Compile_Results/igCalls" + }, + { + "source": "#main/Metadata_Settings/Run_Metadata", + "id": "#main/VDJ_Compile_Results/metadata" + }, + { + "source": "#main/GetDataTable/Cell_Order", + "id": "#main/VDJ_Compile_Results/putativeCells" + }, + { + "source": "#main/VDJ_GatherTCRCalls/gatheredCalls", + "id": "#main/VDJ_Compile_Results/tcrCalls" + }, + { + "source": "#main/VDJ_Settings/VDJ_Version", + "id": "#main/VDJ_Compile_Results/vdjVersion" + } + ] + }, + { + "out": [ + "#main/VDJ_GatherIGCalls/gatheredCalls" + ], + "run": "#VDJ_GatherCalls.cwl", + "id": "#main/VDJ_GatherIGCalls", + "in": [ + { + "source": "#main/VDJ_Assemble_and_Annotate_Contigs_IG/igCalls", + "id": "#main/VDJ_GatherIGCalls/theCalls" + } + ] + }, + { + "out": [ + "#main/VDJ_GatherTCRCalls/gatheredCalls" + ], + "run": "#VDJ_GatherCalls.cwl", + "id": "#main/VDJ_GatherTCRCalls", + "in": [ + { + "source": "#main/VDJ_Assemble_and_Annotate_Contigs_TCR/tcrCalls", + "id": "#main/VDJ_GatherTCRCalls/theCalls" + } + ] + }, + { + "out": [ + "#main/VDJ_Preprocess_Reads_IG/RSEC_Reads_Fastq", + "#main/VDJ_Preprocess_Reads_IG/num_splits", + "#main/VDJ_Preprocess_Reads_IG/num_cores" + ], + "run": "#VDJ_Preprocess_Reads.cwl", + "id": "#main/VDJ_Preprocess_Reads_IG", + "in": [ + { + "source": "#main/AnnotateReads/validIgReads", + "id": "#main/VDJ_Preprocess_Reads_IG/Valid_Reads_Fastq" + }, + { + "source": "#main/AnnotateReads/num_valid_ig_reads", + "id": "#main/VDJ_Preprocess_Reads_IG/num_valid_reads" + }, + { + "valueFrom": "BCR", + "id": "#main/VDJ_Preprocess_Reads_IG/vdj_type" + } + ] + }, + { + "out": [ + "#main/VDJ_Preprocess_Reads_TCR/RSEC_Reads_Fastq", + "#main/VDJ_Preprocess_Reads_TCR/num_splits", + "#main/VDJ_Preprocess_Reads_TCR/num_cores" + ], + "run": "#VDJ_Preprocess_Reads.cwl", + "id": "#main/VDJ_Preprocess_Reads_TCR", + "in": [ + { + "source": "#main/AnnotateReads/validTcrReads", + "id": "#main/VDJ_Preprocess_Reads_TCR/Valid_Reads_Fastq" + }, + { + "source": "#main/AnnotateReads/num_valid_tcr_reads", + "id": "#main/VDJ_Preprocess_Reads_TCR/num_valid_reads" + }, + { + "valueFrom": "TCR", + "id": "#main/VDJ_Preprocess_Reads_TCR/vdj_type" + } + ] + }, + { + "out": [ + "#main/VDJ_Settings/VDJ_Version" + ], + "in": [ + { + "source": "#main/VDJ_Version", + "id": "#main/VDJ_Settings/_VDJ_Version" + } + ], + "run": "#VDJ_Settings.cwl", + "id": "#main/VDJ_Settings", + "label": "VDJ Settings" + }, + { + "out": [ + "#main/Version/version" + ], + "run": "#Version.cwl", + "id": "#main/Version", + "in": [] + } + ], + "outputs": [ + { + "outputSource": "#main/GetDataTable/Bioproduct_Stats", + "type": [ + "null", + "File" + ], + "id": "#main/Bioproduct_Stats", + "label": "Bioproduct Statistics" + }, + { + "outputSource": "#main/GetDataTable/Cell_Label_Filter", + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#main/Cell_Label_Filter", + "label": "Cell Label Filter" + }, + { + "outputSource": "#main/Uncompress_Datatables/Uncompressed_Data_Tables", + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#main/Data_Tables", + "label": "Data Tables" + }, + { + "outputSource": "#main/Dense_to_Sparse_Datatable_Unfiltered/Data_Tables", + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#main/Data_Tables_Unfiltered", + "label": "Unfiltered Data Tables" + }, + { + "outputSource": "#main/Uncompress_Datatables/Uncompressed_Expression_Matrix", + "type": [ + "null", + "File" + ], + "id": "#main/Expression_Data", + "label": "Expression Matrix" + }, + { + "outputSource": "#main/GetDataTable/Expression_Data_Unfiltered", + "type": [ + "null", + "File" + ], + "id": "#main/Expression_Data_Unfiltered", + "label": "Unfiltered Expression Matrix" + }, + { + "outputSource": "#main/MergeBAM/Final_Bam", + "type": "File", + "id": "#main/Final_Bam", + "label": "Final BAM File" + }, + { + "outputSource": "#main/IndexBAM/Index", + "type": "File", + "id": "#main/Final_Bam_Index", + "label": "Final BAM Index" + }, + { + "outputSource": "#main/CellClassifier/cellTypePredictions", + "type": [ + "null", + "File" + ], + "id": "#main/ImmuneCellClassification(Experimental)", + "label": "Immune Cell Classification (Experimental)" + }, + { + "outputSource": "#main/BundleLogs/logs_dir", + "type": "Directory", + "id": "#main/Logs", + "label": "Pipeline Logs" + }, + { + "outputSource": "#main/Metrics/Metrics_Summary", + "type": "File", + "id": "#main/Metrics_Summary", + "label": "Metrics Summary" + }, + { + "outputSource": "#main/MergeMultiplex/Multiplex_out", + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#main/Multiplex" + }, + { + "outputSource": "#main/GetDataTable/Protein_Aggregates_Experimental", + "type": [ + "null", + "File" + ], + "id": "#main/Protein_Aggregates_Experimental", + "label": "Protein Aggregates (Experimental)" + }, + { + "outputSource": "#main/GetDataTable/Putative_Cells_Origin", + "type": [ + "null", + "File" + ], + "id": "#main/Putative_Cells_Origin", + "label": "Putative Cells Origin" + }, + { + "outputSource": "#main/VDJ_Compile_Results/vdjCellsDatatable", + "type": [ + "null", + "File" + ], + "id": "#main/vdjCellsDatatable", + "label": "vdjCellsDatatable" + }, + { + "outputSource": "#main/VDJ_Compile_Results/vdjCellsDatatableUncorrected", + "type": [ + "null", + "File" + ], + "id": "#main/vdjCellsDatatableUncorrected", + "label": "vdjCellsDatatableUncorrected" + }, + { + "outputSource": "#main/VDJ_Compile_Results/vdjDominantContigs", + "type": [ + "null", + "File" + ], + "id": "#main/vdjDominantContigs", + "label": "vdjDominantContigs" + }, + { + "outputSource": "#main/VDJ_Compile_Results/vdjMetricsCsv", + "type": [ + "null", + "File" + ], + "id": "#main/vdjMetricsCsv", + "label": "vdjMetricsCsv" + }, + { + "outputSource": "#main/VDJ_Compile_Results/vdjUnfilteredContigs", + "type": [ + "null", + "File" + ], + "id": "#main/vdjUnfilteredContigs", + "label": "vdjUnfilteredContigs" + } + ], + "id": "#main", + "class": "Workflow" + }, + { + "inputs": [ + { + "inputBinding": { + "position": 1 + }, + "type": { + "items": "File", + "type": "array" + }, + "id": "#MergeBAM.cwl/BamFiles" + }, + { + "type": [ + "null", + "string" + ], + "id": "#MergeBAM.cwl/Run_Name" + }, + { + "type": [ + "null", + "string" + ], + "id": "#MergeBAM.cwl/Sample_Tags_Version" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "stdout": "samtools_merge.log", + "outputs": [ + { + "outputBinding": { + "glob": "*_final.BAM" + }, + "type": "File", + "id": "#MergeBAM.cwl/Final_Bam" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#MergeBAM.cwl/log" + } + ], + "baseCommand": [ + "samtools", + "merge" + ], + "id": "#MergeBAM.cwl", + "arguments": [ + { + "prefix": "-@", + "valueFrom": "$(runtime.cores)" + }, + { + "position": 0, + "valueFrom": "${\n if (inputs.Sample_Tags_Version) {\n return \"Combined_\" + inputs.Run_Name + \"_final.BAM\"\n } else {\n return inputs.Run_Name + \"_final.BAM\"\n }\n}" + } + ], + "class": "CommandLineTool", + "hints": [ + { + "coresMin": 4, + "class": "ResourceRequirement" + } + ] + }, + { + "inputs": [ + { + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#Metadata.cwl/AbSeq_Reference" + }, + { + "type": "string", + "id": "#Metadata.cwl/Assay" + }, + { + "type": [ + "null", + "boolean" + ], + "id": "#Metadata.cwl/Basic_Algo_Only" + }, + { + "type": { + "items": { + "fields": [ + { + "type": "string", + "name": "#Metadata.cwl/Bead_Version/Library" + }, + { + "type": "string", + "name": "#Metadata.cwl/Bead_Version/bead_version" + } + ], + "type": "record" + }, + "type": "array" + }, + "id": "#Metadata.cwl/Bead_Version" + }, + { + "type": [ + "null", + "int" + ], + "id": "#Metadata.cwl/Exact_Cell_Count" + }, + { + "type": [ + "null", + "int" + ], + "id": "#Metadata.cwl/Label_Version" + }, + { + "type": [ + "null", + "string" + ], + "id": "#Metadata.cwl/Libraries" + }, + { + "type": "string", + "id": "#Metadata.cwl/Pipeline_Name" + }, + { + "type": "string", + "id": "#Metadata.cwl/Pipeline_Version" + }, + { + "type": [ + "null", + "int" + ], + "id": "#Metadata.cwl/Putative_Cell_Call" + }, + { + "type": [ + "null", + "boolean" + ], + "id": "#Metadata.cwl/Read_Filter_Off" + }, + { + "type": [ + "null", + { + "items": "string", + "type": "array" + } + ], + "id": "#Metadata.cwl/Reads" + }, + { + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#Metadata.cwl/Reference" + }, + { + "type": [ + "null", + "string" + ], + "id": "#Metadata.cwl/Run_Name" + }, + { + "type": [ + "null", + { + "items": "string", + "type": "array" + } + ], + "id": "#Metadata.cwl/Sample_Tag_Names" + }, + { + "type": [ + "null", + "string" + ], + "id": "#Metadata.cwl/Sample_Tags_Version" + }, + { + "type": [ + "null", + "string" + ], + "id": "#Metadata.cwl/Start_Time" + }, + { + "type": [ + "null", + "float" + ], + "id": "#Metadata.cwl/Subsample" + }, + { + "type": [ + "null", + "int" + ], + "id": "#Metadata.cwl/Subsample_Seed" + }, + { + "type": [ + "null", + "float" + ], + "id": "#Metadata.cwl/Subsample_Tags" + }, + { + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#Metadata.cwl/Supplemental_Reference" + }, + { + "type": [ + "null", + "string" + ], + "id": "#Metadata.cwl/VDJ_Version" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "stdout": "run_metadata.json", + "outputs": [ + { + "outputBinding": { + "outputEval": "${ \n var name = inputs.Run_Name;\n if (name == null){\n var libraries = inputs.Libraries;\n name = libraries.split(',')[0];\n } \n return(name)\n} \n" + }, + "type": [ + "null", + "string" + ], + "id": "#Metadata.cwl/Run_Base_Name" + }, + { + "type": "stdout", + "id": "#Metadata.cwl/Run_Metadata" + } + ], + "baseCommand": "echo", + "id": "#Metadata.cwl", + "arguments": [ + { + "prefix": "" + }, + { + "shellQuote": true, + "valueFrom": "${\n var metadata = inputs;\n var all_bv = {};\n var customer_bv = \"Original (V1)\";\n for (var i = 0; i < inputs.Bead_Version.length; i++) {\n var BeadVer = inputs.Bead_Version[i];\n var Library = BeadVer[\"Library\"];\n var bead_version = BeadVer[\"bead_version\"];\n all_bv[Library] = bead_version \n var short_bv = bead_version.substring(0, 2);\n if (short_bv == \"V2\"){\n var customer_bv = \"Enhanced (V2)\";\n }\n }\n metadata[\"Bead_Version\"] = all_bv;\n\n var pipeline_name = inputs.Pipeline_Name;\n var assay = inputs.Assay;\n var version = inputs.Pipeline_Version;\n var time = inputs.Start_Time;\n var libraries = inputs.Libraries.split(\",\");\n var i = 0;\n var reference_list = []\n if(inputs.Reference != null){\n reference_list = reference_list.concat(inputs.Reference);\n }\n if(inputs.AbSeq_Reference != null){\n reference_list = reference_list.concat(inputs.AbSeq_Reference);\n }\n\n var supplemental = \"\"\n if(inputs.Supplemental_Reference != null){\n supplemental = \"; Supplemental_Reference - \" + inputs.Supplemental_Reference[0][\"basename\"];\n }\n var references = [];\n for (i = 0; i< reference_list.length; i++) {\n if(reference_list[i] != null){\n references.push(reference_list[i][\"basename\"]);\n }\n }\n var parameters = [];\n if(inputs.Sample_Tags_Version != null){\n var tags = \"Sample Tag Version: \" + inputs.Sample_Tags_Version;\n } else{ \n var tags = \"Sample Tag Version: None\";\n }\n parameters.push(tags);\n\n if(inputs.Sample_Tag_Names != null){\n var tag_names = inputs.Sample_Tag_Names.join(\" ; \")\n var tag_list = \"Sample Tag Names: \" + tag_names;\n } else{\n var tag_list = \"Sample Tag Names: None\";\n }\n parameters.push(tag_list);\n \n if(inputs.VDJ_Version != null){\n var vdj = \"VDJ Version: \" + inputs.VDJ_Version;\n } else{ \n var vdj = \"VDJ Version: None\";\n }\n parameters.push(vdj)\n\n if(inputs.Subsample != null){\n var subsample = \"Subsample: \" + inputs.Subsample;\n } else{ \n var subsample = \"Subsample: None\";\n } \n parameters.push(subsample);\n\n if(inputs.Putative_Cell_Call == 1){\n var call = \"Putative Cell Calling Type: AbSeq\";\n } else{ \n var call = \"Putative Cell Calling Type: mRNA\";\n } \n parameters.push(call)\n\n if(inputs.Basic_Algo_Only){\n var basic = \"Refined Putative Cell Calling: Off\";\n } else{ \n var basic = \"Refined Putative Cell Calling: On\";\n } \n parameters.push(basic)\n\n if(inputs.Exact_Cell_Count != null){\n var cells = \"Exact Cell Count: \" + inputs.Exact_Cell_Count;\n } else{ \n var cells = \"Exact Cell Count: None\";\n } \n parameters.push(cells)\n\n var name = inputs.Run_Name;\n if (name == null){\n var libraries = inputs.Libraries.split(',');\n name = libraries[0];\n } \n\n var header = [\"####################\"];\n header.push(\"## \" + pipeline_name + \" Version \" + version);\n header.push(\"## Analysis Date - \" + time);\n header.push(\"## Libraries - \" + libraries.join(' | ') + \" - Bead version detected: \" + customer_bv);\n header.push(\"## References - \" + references.join(' | ') + supplemental);\n header.push(\"## Parameters - \" + parameters.join(' | '));\n header.push(\"####################\");\n metadata[\"Output_Header\"] = header;\n metadata[\"Run_Base_Name\"] = name;\n var metadata_json = JSON.stringify(metadata);\n return metadata_json;\n}\n" + } + ], + "class": "CommandLineTool" + }, + { + "inputs": [ + { + "inputBinding": { + "prefix": "--annot-files" + }, + "type": "File", + "id": "#Metrics.cwl/Annot_Files" + }, + { + "inputBinding": { + "prefix": "--read1-error-rate" + }, + "type": "File", + "id": "#Metrics.cwl/Read1_error_rate" + }, + { + "inputBinding": { + "prefix": "--run-metadata" + }, + "type": "File", + "id": "#Metrics.cwl/Run_Metadata" + }, + { + "inputBinding": { + "prefix": "--sample-tag-archives", + "itemSeparator": "," + }, + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#Metrics.cwl/Sample_Tag_Archives" + }, + { + "inputBinding": { + "prefix": "--seq-run" + }, + "type": [ + "null", + "string" + ], + "id": "#Metrics.cwl/Seq_Run" + }, + { + "inputBinding": { + "prefix": "--umi-adjusted-stats" + }, + "type": [ + "null", + "File" + ], + "id": "#Metrics.cwl/UMI_Adjusted_Stats" + }, + { + "inputBinding": { + "prefix": "--vdj-metrics-fp" + }, + "type": [ + "null", + "File" + ], + "id": "#Metrics.cwl/vdjMetricsJson" + } + ], + "requirements": [ + ], + "outputs": [ + { + "outputBinding": { + "glob": "internal-metrics-archive.tar.gz" + }, + "type": "File", + "id": "#Metrics.cwl/Metrics_Archive" + }, + { + "outputBinding": { + "glob": "*_Metrics_Summary.csv" + }, + "type": "File", + "id": "#Metrics.cwl/Metrics_Summary" + }, + { + "outputBinding": { + "glob": "*.zip" + }, + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#Metrics.cwl/Sample_Tag_Out" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#Metrics.cwl/output" + } + ], + "baseCommand": [ + "mist_metrics.py" + ], + "class": "CommandLineTool", + "id": "#Metrics.cwl" + }, + { + "inputs": [ + { + "default": "Targeted", + "type": "string", + "id": "#MultiplexingSettings.cwl/Assay" + }, + { + "type": [ + "null", + "Any" + ], + "id": "#MultiplexingSettings.cwl/_Sample_Tags_Version" + }, + { + "type": [ + "null", + { + "items": "string", + "type": "array" + } + ], + "id": "#MultiplexingSettings.cwl/_Tag_Sample_Names" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "outputs": [ + { + "type": [ + "null", + "string" + ], + "id": "#MultiplexingSettings.cwl/Sample_Tags_Version" + }, + { + "type": [ + "null", + { + "items": "string", + "type": "array" + } + ], + "id": "#MultiplexingSettings.cwl/Tag_Sample_Names" + } + ], + "class": "ExpressionTool", + "expression": "${\n var enumifiedSampleTagsVersion = null;\n if (inputs._Sample_Tags_Version) {\n var _Sample_Tags_Version = inputs._Sample_Tags_Version.toLowerCase();\n if (_Sample_Tags_Version.indexOf('human') >= 0 || _Sample_Tags_Version === 'hs')\n {\n enumifiedSampleTagsVersion = 'hs';\n }\n else if (_Sample_Tags_Version.indexOf('mouse') >= 0 || _Sample_Tags_Version === 'mm')\n {\n enumifiedSampleTagsVersion = 'mm';\n }\n else if (_Sample_Tags_Version === 'no multiplexing')\n {\n enumifiedSampleTagsVersion = null;\n }\n else\n {\n throw new Error(\"Cannot parse Sample Tag Version: \" + inputs._Sample_Tags_Version);\n }\n }\n var listTagNames = inputs._Tag_Sample_Names\n var newTagNames = []\n for (var num in listTagNames) {\n var tag = listTagNames[num].replace(/[^A-Za-z0-9-+]/g,\"_\");\n newTagNames.push(tag); \n } \n return ({\n Tag_Sample_Names: newTagNames,\n Sample_Tags_Version: enumifiedSampleTagsVersion\n });\n}", + "id": "#MultiplexingSettings.cwl" + }, + { + "inputs": [ + { + "type": [ + "null", + "string" + ], + "id": "#NameSettings.cwl/_Run_Name" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "outputs": [ + { + "type": [ + "null", + "string" + ], + "id": "#NameSettings.cwl/Run_Name" + } + ], + "class": "ExpressionTool", + "expression": "${ var name = inputs._Run_Name;\n if (name != null) {\n name = name.replace(/[\\W_]+/g,\"-\");}\n return({'Run_Name' : name });\n } ", + "id": "#NameSettings.cwl" + }, + { + "inputs": [ + { + "type": { + "items": { + "fields": [ + { + "type": "string", + "name": "#PairReadFiles.cwl/FastqReadPairs/filename" + }, + { + "type": "string", + "name": "#PairReadFiles.cwl/FastqReadPairs/readFlag" + }, + { + "type": "string", + "name": "#PairReadFiles.cwl/FastqReadPairs/readPairId" + }, + { + "type": "string", + "name": "#PairReadFiles.cwl/FastqReadPairs/library" + } + ], + "type": "record" + }, + "type": "array" + }, + "id": "#PairReadFiles.cwl/FastqReadPairs" + }, + { + "type": { + "items": "File", + "type": "array" + }, + "id": "#PairReadFiles.cwl/Reads" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "doc": "PairReadFiles takes an array of split files and pairs them, such that an R1 file is transferred to the QualityFilter with its corresponding R2 file.\nThe original FASTQ files are paired in CheckFastqs and then split and sub-sampled in SplitAndSubsample. The pairing information is taken from CheckFastqs.\n", + "id": "#PairReadFiles.cwl", + "outputs": [ + { + "type": { + "items": { + "fields": [ + { + "type": "File", + "name": "#PairReadFiles.cwl/ReadPairs/R1" + }, + { + "type": "File", + "name": "#PairReadFiles.cwl/ReadPairs/R2" + }, + { + "type": "int", + "name": "#PairReadFiles.cwl/ReadPairs/readPairId" + }, + { + "type": "string", + "name": "#PairReadFiles.cwl/ReadPairs/library" + } + ], + "type": "record" + }, + "type": "array" + }, + "id": "#PairReadFiles.cwl/ReadPairs" + } + ], + "expression": "${\n // use the CheckFastqs read pairing information to create a dictionary\n // using the original fastq file name without the extension as the key\n var fastqReadPairs = {}\n for (var i = 0; i < inputs.FastqReadPairs.length; i++) {\n var fileDict = inputs.FastqReadPairs[i];\n var filename = fileDict[\"filename\"];\n\n if (!fastqReadPairs[filename]) {\n fastqReadPairs[filename] = {\n readPairId: null,\n readFlag: null,\n library: null,\n };\n }\n else {\n throw new Error(\"Found non-unique fastq filename '\" + filename + \"' in the FastqReadPairs dictionary from CheckFastqs.\")\n }\n\n fastqReadPairs[filename].readPairId = fileDict[\"readPairId\"]\n fastqReadPairs[filename].readFlag = fileDict[\"readFlag\"]\n fastqReadPairs[filename].library = fileDict[\"library\"]\n }\n\n // now loop through the input read files which could\n // be the original fastq files if no sub-sampling has\n // been done, or the sub-sampled fastq files\n var readPairs = {}\n for (var i = 0; i < inputs.Reads.length; i++) {\n\n // Set the fileDict to null\n var fileDict = null;\n\n // Get the fastq file\n var fastqFile = inputs.Reads[i];\n\n // Remove the .gz from the end of the filename\n var fileNoGzExt = fastqFile.basename.replace(/.gz$/i, \"\");\n\n // Remove the next file extension if it exists\n var fileArrayWithExt = fileNoGzExt.split(\".\");\n // If an extension exists, splice the array\n var fileArrayNoExt = null;\n if (fileArrayWithExt.length > 1) {\n fileArrayNoExt = fileArrayWithExt.splice(0, fileArrayWithExt.length-1);\n } else {\n // No file extension exists, so use the whole array\n fileArrayNoExt = fileArrayWithExt\n }\n var fileRootname = fileArrayNoExt.join(\".\")\n\n // if the original files were sub-sampled\n // get the original file and the chunk id\n if (fileRootname.indexOf(\"-\") != -1) {\n // Split on the dash to get the name of\n // the original file and the chunk id\n // The original file name can also have dashes\n var chunkFileArray = fileRootname.split(\"-\");\n\n // Get the original file rootname and chunk id\n // The rootname without the chunk id and file\n // extension is the key from CheckFastqs\n // The chunk id is used later to create a new unique\n // read pair id for all sub-sampled fastq files\n\n // The rootname array should contain all elements up to the last dash\n var fileRootnameArray = chunkFileArray.splice(0, chunkFileArray.length-1);\n var fileRootnameNoChunkId = fileRootnameArray.join(\"-\");\n\n // The chunk id is the last element in the array\n // representing the content after the last dash\n var orgChunkId = chunkFileArray.pop();\n\n // if there is no chunk id, use an arbitrary number\n // the chunk id is unique when the files are sub-sampled\n // and does not need to be unique when the files are not sub-sampled\n var chunkId = 9999;\n if (orgChunkId) {\n // cast to an integer\n chunkId = parseInt(orgChunkId);\n }\n // double check that we have a chunk id\n if (chunkId === undefined || chunkId === null) {\n throw new Error(\"The fastq file sub-sampling id could not be determined!\");\n }\n\n // The file rootname without the chunk id and file extension\n // should match the original file rootname from CheckFastqs\n // The original file rootname from CheckFastqs is the key for\n // the dictionary containing the original unique pair id\n var fileDict = fastqReadPairs[fileRootnameNoChunkId];\n }\n\n // If the files are not sub-sampled or the fileDict\n // is not found, then try to use the original\n // file rootname without the file extension as the key\n if (fileDict === undefined || fileDict === null) {\n\n // if the original files were not sub-sampled,\n // use the original file rootname and an arbitrary chunk id\n var chunkId = 9999;\n\n var fileDict = fastqReadPairs[fileRootname];\n\n // If the fileDict for this file rootname is not found,\n // then the filenames are in an unexpected format and\n // the code to parse the filenames in CheckFastqs,\n // SplitAndSubsample and here need to match\n if (fileDict === undefined || fileDict === null) {\n // Create an error\n if (fileDict === undefined || fileDict === null) {\n throw new Error(\"Cannot find the fastq read pair information for '\" + fastqFile.basename + \"'.\");\n }\n }\n }\n\n // Get the pairing information from CheckFastqs\n var readPairId = fileDict[\"readPairId\"];\n var library = fileDict[\"library\"];\n var flag = fileDict[\"readFlag\"];\n\n // Add the chunkId to create a new unique read pair id\n // for each file (sub-sampled or not)\n var chunkReadPairId = readPairId + \"_\" + chunkId;\n\n // Create a dictionary for each pair of files\n if (!readPairs[chunkReadPairId]) {\n readPairs[chunkReadPairId] = {\n R1: null,\n R2: null,\n library: library,\n readPairId: null,\n };\n }\n // add in the R1 and R2 files, depending on the flag\n if (flag === \"R1\") {\n readPairs[chunkReadPairId].R1 = fastqFile\n } else if (flag === \"R2\") {\n readPairs[chunkReadPairId].R2 = fastqFile\n }\n }\n // we are not interested in the read pair ids in readPairs\n // flatten into an array of objects\n var readPairsList = [];\n var i = 1;\n for (var key in readPairs) {\n if (readPairs.hasOwnProperty(key)) {\n var readPair = readPairs[key];\n readPair.readPairId = i;\n readPairsList.push(readPair);\n i++;\n }\n }\n // pass this array to the record array named \"ReadPairs\" on the CWL layer\n return {ReadPairs: readPairsList}\n}", + "class": "ExpressionTool" + }, + { + "inputs": [ + { + "type": [ + "null", + "boolean" + ], + "id": "#PutativeCellSettings.cwl/_Basic_Algo_Only" + }, + { + "type": [ + "null", + "int" + ], + "id": "#PutativeCellSettings.cwl/_Exact_Cell_Count" + }, + { + "type": [ + "null", + "Any" + ], + "id": "#PutativeCellSettings.cwl/_Putative_Cell_Call" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "outputs": [ + { + "type": [ + "null", + "boolean" + ], + "id": "#PutativeCellSettings.cwl/Basic_Algo_Only" + }, + { + "type": [ + "null", + "int" + ], + "id": "#PutativeCellSettings.cwl/Exact_Cell_Count" + }, + { + "type": [ + "null", + "int" + ], + "id": "#PutativeCellSettings.cwl/Putative_Cell_Call" + } + ], + "class": "ExpressionTool", + "expression": "${\n // the basic algorithm flag defaults to false\n var basicAlgOnlyFlag = false;\n // the user can set the basic algorithm flag\n if (inputs._Basic_Algo_Only) {\n basicAlgOnlyFlag = inputs._Basic_Algo_Only;\n }\n // convert the Putative_Cell_Call from a string to an integer\n var putativeCellCallInt = 0;\n if (inputs._Putative_Cell_Call) {\n if (inputs._Putative_Cell_Call === \"mRNA\") {\n putativeCellCallInt = 0;\n }\n else if (inputs._Putative_Cell_Call == \"AbSeq_Experimental\" || inputs._Putative_Cell_Call == \"AbSeq (Experimental)\") {\n putativeCellCallInt = 1;\n // for protein-only cell calling, we only have the basic algorithm\n basicAlgOnlyFlag = true;\n }\n else if (inputs._Putative_Cell_Call == \"mRNA_and_AbSeq\") {\n putativeCellCallInt = 2;\n }\n }\n // check the exact cell count\n if (inputs._Exact_Cell_Count) {\n if (inputs._Exact_Cell_Count < 1) {\n throw(\"Illogical value for exact cell count: \" + inputs._Exact_Cell_Count);\n }\n }\n return ({\n Putative_Cell_Call: putativeCellCallInt,\n Exact_Cell_Count: inputs._Exact_Cell_Count,\n Basic_Algo_Only: basicAlgOnlyFlag,\n });\n}", + "id": "#PutativeCellSettings.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "prefix": "--run-metadata" + }, + "type": "File", + "id": "#QualityFilter.cwl/Run_Metadata" + }, + { + "type": { + "fields": [ + { + "inputBinding": { + "prefix": "--r1" + }, + "type": "File", + "name": "#QualityFilter.cwl/Split_Read_Pairs/R1" + }, + { + "inputBinding": { + "prefix": "--r2" + }, + "type": "File", + "name": "#QualityFilter.cwl/Split_Read_Pairs/R2" + }, + { + "inputBinding": { + "prefix": "--read-pair-id" + }, + "type": "int", + "name": "#QualityFilter.cwl/Split_Read_Pairs/readPairId" + }, + { + "inputBinding": { + "prefix": "--library" + }, + "type": "string", + "name": "#QualityFilter.cwl/Split_Read_Pairs/library" + } + ], + "type": "record" + }, + "id": "#QualityFilter.cwl/Split_Read_Pairs" + } + ], + "requirements": [ + ], + "outputs": [ + { + "outputBinding": { + "glob": "*read_quality.csv.gz" + }, + "type": [ + "null", + "File" + ], + "id": "#QualityFilter.cwl/Filter_Metrics" + }, + { + "outputBinding": { + "glob": "*_R1*.fastq.gz" + }, + "type": "File", + "id": "#QualityFilter.cwl/R1" + }, + { + "outputBinding": { + "glob": "*_R2*.fastq.gz" + }, + "type": "File", + "id": "#QualityFilter.cwl/R2" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#QualityFilter.cwl/output" + } + ], + "baseCommand": [ + "mist_quality_filter.py" + ], + "class": "CommandLineTool", + "id": "#QualityFilter.cwl" + }, + { + "inputs": [ + { + "type": "File", + "id": "#QualityFilterOuter.cwl/Run_Metadata" + }, + { + "type": { + "items": { + "fields": [ + { + "type": "File", + "name": "#QualityFilterOuter.cwl/Split_Read_Pairs/R1" + }, + { + "type": "File", + "name": "#QualityFilterOuter.cwl/Split_Read_Pairs/R2" + }, + { + "type": "int", + "name": "#QualityFilterOuter.cwl/Split_Read_Pairs/readPairId" + }, + { + "type": "string", + "name": "#QualityFilterOuter.cwl/Split_Read_Pairs/library" + } + ], + "type": "record" + }, + "type": "array" + }, + "id": "#QualityFilterOuter.cwl/Split_Read_Pairs" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + }, + { + "class": "ScatterFeatureRequirement" + }, + { + "class": "StepInputExpressionRequirement" + }, + { + "class": "SubworkflowFeatureRequirement" + } + ], + "outputs": [ + { + "outputSource": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/Filter_Metrics", + "type": [ + { + "items": [ + "null", + "File" + ], + "type": "array" + } + ], + "id": "#QualityFilterOuter.cwl/Filter_Metrics" + }, + { + "outputSource": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/R1", + "type": [ + { + "items": [ + "null", + "File" + ], + "type": "array" + } + ], + "id": "#QualityFilterOuter.cwl/R1" + }, + { + "outputSource": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/R2", + "type": [ + { + "items": [ + "null", + "File" + ], + "type": "array" + } + ], + "id": "#QualityFilterOuter.cwl/R2" + }, + { + "outputSource": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/output", + "type": [ + { + "items": [ + "null", + "File" + ], + "type": "array" + } + ], + "id": "#QualityFilterOuter.cwl/output" + } + ], + "class": "Workflow", + "steps": [ + { + "scatter": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/Split_Read_Pairs", + "out": [ + "#QualityFilterOuter.cwl/Quality_Filter_Scatter/R1", + "#QualityFilterOuter.cwl/Quality_Filter_Scatter/R2", + "#QualityFilterOuter.cwl/Quality_Filter_Scatter/Filter_Metrics", + "#QualityFilterOuter.cwl/Quality_Filter_Scatter/output" + ], + "run": "#QualityFilter.cwl", + "id": "#QualityFilterOuter.cwl/Quality_Filter_Scatter", + "in": [ + { + "source": "#QualityFilterOuter.cwl/Run_Metadata", + "id": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/Run_Metadata" + }, + { + "source": "#QualityFilterOuter.cwl/Split_Read_Pairs", + "id": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/Split_Read_Pairs" + } + ] + } + ], + "id": "#QualityFilterOuter.cwl" + }, + { + "inputs": [ + { + "type": { + "items": "File", + "type": "array" + }, + "id": "#SplitAndSubsample.cwl/Fastqs" + }, + { + "type": { + "items": "string", + "type": "array" + }, + "id": "#SplitAndSubsample.cwl/FilesToSkipSplitAndSubsample" + }, + { + "type": [ + "null", + "long" + ], + "id": "#SplitAndSubsample.cwl/NumRecordsPerSplit" + }, + { + "type": "float", + "id": "#SplitAndSubsample.cwl/SubsampleRatio" + }, + { + "type": "int", + "id": "#SplitAndSubsample.cwl/SubsampleSeed" + } + ], + "requirements": [ + { + "class": "ScatterFeatureRequirement" + }, + { + "class": "InlineJavascriptRequirement" + } + ], + "doc": "SplitAndSubsample splits, subsamples and formats read files to be deposited in QualityFilter.\n", + "id": "#SplitAndSubsample.cwl", + "steps": [ + { + "doc": "After scattering \"SplitAndSubsample\" on a File array, the output of each node is also an array. Thus, we are left with a nestled list. This JS expression flattens this list to deal with the split reads in PairReadFiles.cwl", + "out": [ + "#SplitAndSubsample.cwl/FlattenOutput/SplitFastqList" + ], + "run": { + "cwlVersion": "v1.0", + "inputs": [ + { + "type": { + "items": { + "items": "File", + "type": "array" + }, + "type": "array" + }, + "id": "#SplitAndSubsample.cwl/FlattenOutput/flatten_output/nestledSplitFastqList" + } + ], + "outputs": [ + { + "type": { + "items": "File", + "type": "array" + }, + "id": "#SplitAndSubsample.cwl/FlattenOutput/flatten_output/SplitFastqList" + } + ], + "class": "ExpressionTool", + "expression": "${\n return {SplitFastqList: [].concat.apply([], inputs.nestledSplitFastqList)}\n}\n", + "id": "#SplitAndSubsample.cwl/FlattenOutput/flatten_output" + }, + "id": "#SplitAndSubsample.cwl/FlattenOutput", + "in": [ + { + "source": "#SplitAndSubsample.cwl/SplitAndSubsample/SplitAndSubsampledFastqs", + "id": "#SplitAndSubsample.cwl/FlattenOutput/nestledSplitFastqList" + } + ] + }, + { + "run": { + "cwlVersion": "v1.0", + "inputs": [ + { + "inputBinding": { + "prefix": "--fastq-file-path" + }, + "type": "File", + "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/Fastq" + }, + { + "inputBinding": { + "prefix": "--files-to-skip-split-and-subsample", + "itemSeparator": "," + }, + "type": { + "items": "string", + "type": "array" + }, + "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/FilesToSkipSplitAndSubsample" + }, + { + "inputBinding": { + "prefix": "--num-records" + }, + "type": [ + "null", + "long" + ], + "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/NumRecordsPerSplit" + }, + { + "inputBinding": { + "prefix": "--subsample-ratio" + }, + "type": "float", + "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/SubsampleRatio" + }, + { + "inputBinding": { + "prefix": "--subsample-seed" + }, + "type": "int", + "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/SubsampleSeed" + } + ], + "requirements": [ + ], + "outputs": [ + { + "outputBinding": { + "glob": "*.fastq.gz", + "outputEval": "${ if (self.length === 0) { return [inputs.Fastq]; } else { return self; } }" + }, + "type": { + "items": "File", + "type": "array" + }, + "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/SplitAndSubsampledFastqs" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/log" + } + ], + "baseCommand": [ + "mist_split_fastq.py" + ], + "class": "CommandLineTool", + "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq" + }, + "doc": "Allocate one docker/python process per file to do the actual file splitting.", + "scatter": [ + "#SplitAndSubsample.cwl/SplitAndSubsample/Fastq" + ], + "in": [ + { + "source": "#SplitAndSubsample.cwl/Fastqs", + "id": "#SplitAndSubsample.cwl/SplitAndSubsample/Fastq" + }, + { + "source": "#SplitAndSubsample.cwl/FilesToSkipSplitAndSubsample", + "id": "#SplitAndSubsample.cwl/SplitAndSubsample/FilesToSkipSplitAndSubsample" + }, + { + "source": "#SplitAndSubsample.cwl/NumRecordsPerSplit", + "id": "#SplitAndSubsample.cwl/SplitAndSubsample/NumRecordsPerSplit" + }, + { + "source": "#SplitAndSubsample.cwl/SubsampleRatio", + "id": "#SplitAndSubsample.cwl/SplitAndSubsample/SubsampleRatio" + }, + { + "source": "#SplitAndSubsample.cwl/SubsampleSeed", + "id": "#SplitAndSubsample.cwl/SplitAndSubsample/SubsampleSeed" + } + ], + "id": "#SplitAndSubsample.cwl/SplitAndSubsample", + "out": [ + "#SplitAndSubsample.cwl/SplitAndSubsample/SplitAndSubsampledFastqs", + "#SplitAndSubsample.cwl/SplitAndSubsample/log" + ] + } + ], + "outputs": [ + { + "outputSource": "#SplitAndSubsample.cwl/FlattenOutput/SplitFastqList", + "type": { + "items": "File", + "type": "array" + }, + "id": "#SplitAndSubsample.cwl/SplitAndSubsampledFastqs" + }, + { + "outputSource": "#SplitAndSubsample.cwl/SplitAndSubsample/log", + "type": { + "items": "File", + "type": "array" + }, + "id": "#SplitAndSubsample.cwl/log" + } + ], + "class": "Workflow" + }, + { + "inputs": [ + { + "type": [ + "null", + "float" + ], + "id": "#SubsampleSettings.cwl/_Subsample_Reads" + }, + { + "type": [ + "null", + "int" + ], + "id": "#SubsampleSettings.cwl/_Subsample_Seed" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "outputs": [ + { + "type": [ + "null", + "float" + ], + "id": "#SubsampleSettings.cwl/Subsample_Reads" + }, + { + "type": [ + "null", + "int" + ], + "id": "#SubsampleSettings.cwl/Subsample_Seed" + } + ], + "class": "ExpressionTool", + "expression": "${\n var subsamplingOutputs = {\n Subsample_Reads: inputs._Subsample_Reads,\n Subsample_Seed: inputs._Subsample_Seed\n }\n return subsamplingOutputs;\n}", + "id": "#SubsampleSettings.cwl" + }, + { + "inputs": [ + { + "type": { + "items": "File", + "type": "array" + }, + "id": "#UncompressDatatables.cwl/Compressed_Data_Table" + }, + { + "type": "File", + "id": "#UncompressDatatables.cwl/Compressed_Expression_Matrix" + } + ], + "requirements": [ + { + "class": "ScatterFeatureRequirement" + } + ], + "outputs": [ + { + "outputSource": "#UncompressDatatables.cwl/Uncompress_Datatable/Uncompressed_File", + "type": { + "items": "File", + "type": "array" + }, + "id": "#UncompressDatatables.cwl/Uncompressed_Data_Tables" + }, + { + "outputSource": "#UncompressDatatables.cwl/Uncompress_Expression_Matrix/Uncompressed_File", + "type": "File", + "id": "#UncompressDatatables.cwl/Uncompressed_Expression_Matrix" + } + ], + "class": "Workflow", + "steps": [ + { + "id": "#UncompressDatatables.cwl/Uncompress_Datatable", + "out": [ + "#UncompressDatatables.cwl/Uncompress_Datatable/Uncompressed_File" + ], + "run": { + "cwlVersion": "v1.0", + "inputs": [ + { + "inputBinding": { + "position": 1 + }, + "type": "File", + "id": "#UncompressDatatables.cwl/Uncompress_Datatable/Uncompress_Datatable_Inner/Compressed_File" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "stdout": "$(inputs.Compressed_File.nameroot)", + "outputs": [ + { + "outputBinding": { + "glob": "$(inputs.Compressed_File.nameroot)" + }, + "type": "File", + "id": "#UncompressDatatables.cwl/Uncompress_Datatable/Uncompress_Datatable_Inner/Uncompressed_File" + } + ], + "baseCommand": [ + "gunzip" + ], + "id": "#UncompressDatatables.cwl/Uncompress_Datatable/Uncompress_Datatable_Inner", + "arguments": [ + { + "position": 0, + "valueFrom": "-c" + } + ], + "class": "CommandLineTool", + "hints": [ + ] + }, + "scatter": [ + "#UncompressDatatables.cwl/Uncompress_Datatable/Compressed_File" + ], + "in": [ + { + "source": "#UncompressDatatables.cwl/Compressed_Data_Table", + "id": "#UncompressDatatables.cwl/Uncompress_Datatable/Compressed_File" + } + ] + }, + { + "out": [ + "#UncompressDatatables.cwl/Uncompress_Expression_Matrix/Uncompressed_File" + ], + "run": { + "cwlVersion": "v1.0", + "inputs": [ + { + "inputBinding": { + "position": 1 + }, + "type": "File", + "id": "#UncompressDatatables.cwl/Uncompress_Expression_Matrix/Uncompress_Expression_Matrix_Inner/Compressed_File" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "stdout": "$(inputs.Compressed_File.nameroot)", + "outputs": [ + { + "outputBinding": { + "glob": "$(inputs.Compressed_File.nameroot)" + }, + "type": "File", + "id": "#UncompressDatatables.cwl/Uncompress_Expression_Matrix/Uncompress_Expression_Matrix_Inner/Uncompressed_File" + } + ], + "baseCommand": [ + "gunzip" + ], + "id": "#UncompressDatatables.cwl/Uncompress_Expression_Matrix/Uncompress_Expression_Matrix_Inner", + "arguments": [ + { + "position": 0, + "valueFrom": "-c" + } + ], + "class": "CommandLineTool", + "hints": [ + ] + }, + "id": "#UncompressDatatables.cwl/Uncompress_Expression_Matrix", + "in": [ + { + "source": "#UncompressDatatables.cwl/Compressed_Expression_Matrix", + "id": "#UncompressDatatables.cwl/Uncompress_Expression_Matrix/Compressed_File" + } + ] + } + ], + "id": "#UncompressDatatables.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "position": 1 + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs.cwl/RSEC_Reads_Fastq" + }, + { + "inputBinding": { + "position": 2 + }, + "type": "string", + "id": "#VDJ_Assemble_and_Annotate_Contigs.cwl/Read_Limit" + }, + { + "inputBinding": { + "position": 3 + }, + "type": [ + "null", + "string" + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs.cwl/VDJ_Version" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + }, + { + "class": "ShellCommandRequirement" + } + ], + "outputs": [ + { + "outputBinding": { + "glob": "*_pruned.csv.gz" + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs.cwl/PyirCall" + } + ], + "baseCommand": [ + "AssembleAndAnnotate.sh" + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs.cwl", + "class": "CommandLineTool", + "hints": [ + { + "coresMin": 1, + "ramMin": 3200, + "class": "ResourceRequirement" + } + ] + }, + { + "inputs": [ + { + "type": [ + { + "items": [ + "null", + "File" + ], + "type": "array" + } + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/RSEC_Reads_Fastq" + }, + { + "type": [ + "null", + "string" + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Version" + }, + { + "type": [ + "null", + "int" + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/num_cores" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + }, + { + "class": "ScatterFeatureRequirement" + }, + { + "class": "StepInputExpressionRequirement" + }, + { + "class": "SubworkflowFeatureRequirement" + } + ], + "outputs": [ + { + "outputSource": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG/PyirCall", + "type": [ + { + "items": [ + "null", + "File" + ], + "type": "array" + } + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/igCalls" + } + ], + "class": "Workflow", + "steps": [ + { + "run": "#VDJ_Assemble_and_Annotate_Contigs.cwl", + "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG", + "in": [ + { + "source": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/RSEC_Reads_Fastq", + "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG/RSEC_Reads_Fastq" + }, + { + "valueFrom": "75000", + "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG/Read_Limit" + }, + { + "source": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Version", + "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG/VDJ_Version" + } + ], + "hints": [ + { + "coresMin": "$(inputs.num_cores)", + "class": "ResourceRequirement" + } + ], + "scatter": [ + "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG/RSEC_Reads_Fastq" + ], + "out": [ + "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG/PyirCall" + ] + } + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl" + }, + { + "inputs": [ + { + "type": [ + { + "items": [ + "null", + "File" + ], + "type": "array" + } + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/RSEC_Reads_Fastq" + }, + { + "type": [ + "null", + "string" + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Version" + }, + { + "type": [ + "null", + "int" + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/num_cores" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + }, + { + "class": "ScatterFeatureRequirement" + }, + { + "class": "StepInputExpressionRequirement" + }, + { + "class": "SubworkflowFeatureRequirement" + } + ], + "outputs": [ + { + "outputSource": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR/PyirCall", + "type": [ + { + "items": [ + "null", + "File" + ], + "type": "array" + } + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/tcrCalls" + } + ], + "class": "Workflow", + "steps": [ + { + "run": "#VDJ_Assemble_and_Annotate_Contigs.cwl", + "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR", + "in": [ + { + "source": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/RSEC_Reads_Fastq", + "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR/RSEC_Reads_Fastq" + }, + { + "valueFrom": "75000", + "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR/Read_Limit" + }, + { + "source": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Version", + "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR/VDJ_Version" + } + ], + "hints": [ + { + "coresMin": "$(inputs.num_cores)", + "class": "ResourceRequirement" + } + ], + "scatter": [ + "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR/RSEC_Reads_Fastq" + ], + "out": [ + "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR/PyirCall" + ] + } + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "position": 10, + "prefix": "--seq-metrics" + }, + "type": "File", + "id": "#VDJ_Compile_Results.cwl/Seq_Metrics" + }, + { + "inputBinding": { + "position": 0, + "prefix": "--cell-type-mapping-fp" + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_Compile_Results.cwl/cellTypeMapping" + }, + { + "inputBinding": { + "position": 4, + "prefix": "--ignore", + "itemSeparator": "," + }, + "type": [ + "null", + { + "items": "string", + "type": "array" + } + ], + "id": "#VDJ_Compile_Results.cwl/chainsToIgnore" + }, + { + "inputBinding": { + "position": 8, + "prefix": "--e-value-for-j" + }, + "type": [ + "null", + "float" + ], + "id": "#VDJ_Compile_Results.cwl/evalueJgene" + }, + { + "inputBinding": { + "position": 7, + "prefix": "--e-value-for-v" + }, + "type": [ + "null", + "float" + ], + "id": "#VDJ_Compile_Results.cwl/evalueVgene" + }, + { + "inputBinding": { + "position": 5 + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_Compile_Results.cwl/igCalls" + }, + { + "inputBinding": { + "position": 9, + "prefix": "--metadata-fp" + }, + "type": "File", + "id": "#VDJ_Compile_Results.cwl/metadata" + }, + { + "inputBinding": { + "position": 3, + "prefix": "--putative-cells-json-fp" + }, + "type": "File", + "id": "#VDJ_Compile_Results.cwl/putativeCells" + }, + { + "inputBinding": { + "position": 6 + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_Compile_Results.cwl/tcrCalls" + }, + { + "inputBinding": { + "position": 2, + "prefix": "--vdj-version" + }, + "type": [ + "null", + "string" + ], + "id": "#VDJ_Compile_Results.cwl/vdjVersion" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "outputs": [ + { + "doc": "VDJ data per cell, with distribution based error correction", + "outputBinding": { + "glob": "*_VDJ_perCell.csv" + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_Compile_Results.cwl/vdjCellsDatatable" + }, + { + "doc": "VDJ data per cell, including non-putative cells, no error correction applied", + "outputBinding": { + "glob": "*_VDJ_perCell_uncorrected.csv.gz" + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_Compile_Results.cwl/vdjCellsDatatableUncorrected" + }, + { + "outputBinding": { + "glob": "*_VDJ_Dominant_Contigs.csv.gz" + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_Compile_Results.cwl/vdjDominantContigs" + }, + { + "outputBinding": { + "glob": "*_VDJ_metrics.csv" + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_Compile_Results.cwl/vdjMetricsCsv" + }, + { + "outputBinding": { + "glob": "*_VDJ_metrics.json" + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_Compile_Results.cwl/vdjMetricsJson" + }, + { + "outputBinding": { + "glob": "*_DBEC_cutoff.png" + }, + "type": { + "items": "File", + "type": "array" + }, + "id": "#VDJ_Compile_Results.cwl/vdjReadsPerCellByChainTypeFigure" + }, + { + "outputBinding": { + "glob": "*_VDJ_Unfiltered_Contigs.csv.gz" + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_Compile_Results.cwl/vdjUnfilteredContigs" + } + ], + "baseCommand": [ + "mist_vdj_compile_results.py" + ], + "id": "#VDJ_Compile_Results.cwl", + "class": "CommandLineTool", + "hints": [ + { + "ramMin": 32000, + "class": "ResourceRequirement" + } + ] + }, + { + "inputs": [ + { + "type": [ + { + "items": [ + "null", + "File" + ], + "type": "array" + } + ], + "id": "#VDJ_GatherCalls.cwl/theCalls" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "doc": "VDJ_GatherCalls collect the outputs from the multi-processed VDJ step into one file.\n", + "id": "#VDJ_GatherCalls.cwl", + "steps": [ + { + "out": [ + "#VDJ_GatherCalls.cwl/VDJ_GatherCalls/gatheredCalls" + ], + "run": { + "cwlVersion": "v1.0", + "inputs": [ + { + "type": [ + { + "items": [ + "null", + "File" + ], + "type": "array" + } + ], + "id": "#VDJ_GatherCalls.cwl/VDJ_GatherCalls/gather_PyIR/theCalls" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + }, + { + "class": "ShellCommandRequirement" + } + ], + "outputs": [ + { + "outputBinding": { + "glob": "*_constant_region_called_pruned.csv.gz", + "outputEval": "${\n if (self.size == 0) {\n throw(\"No outputs from PyIR detected in VDJ_GatherCalls!\");\n } else {\n return(self);\n }\n}" + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_GatherCalls.cwl/VDJ_GatherCalls/gather_PyIR/gatheredCalls" + } + ], + "class": "CommandLineTool", + "arguments": [ + { + "shellQuote": false, + "valueFrom": "${\n if (!inputs.theCalls[0] ) {\n return (\"echo \\\"No outputs from PyIR detected in VDJ_GatherCalls\\\"\")\n }\n var inputFiles = \"\"\n if (!inputs.theCalls[0].path.split(\"_PrunePyIR\")[1]){\n inputFiles = \"zcat\"\n for (var i = 0; i < inputs.theCalls.length; i++) {\n inputFiles += \" \" + inputs.theCalls[i].path\n }\n inputFiles += \" | \"\n } else {\n inputFiles = \"zcat \" + inputs.theCalls[0].path.split(\"VDJ\")[0] + \"*\" + inputs.theCalls[0].path.split(\"_PrunePyIR\")[1].split(\"_Number_\")[0] + \"_Number_*.csv.gz | \"\n }\n var outputFileName = \"\\\"gzip > \" + inputs.theCalls[0].nameroot.split(\"_Number_\")[0] + \"_constant_region_called_pruned.csv.gz\" + \"\\\"\"\n var awkCommand = \"awk \\'NR==1{F=$1;print | \" + outputFileName + \" } $1!=F { print | \" + outputFileName + \" }\\' \"\n var outputCommand = inputFiles + awkCommand\n return (outputCommand)\n}" + } + ], + "id": "#VDJ_GatherCalls.cwl/VDJ_GatherCalls/gather_PyIR" + }, + "id": "#VDJ_GatherCalls.cwl/VDJ_GatherCalls", + "in": [ + { + "source": "#VDJ_GatherCalls.cwl/theCalls", + "id": "#VDJ_GatherCalls.cwl/VDJ_GatherCalls/theCalls" + } + ] + } + ], + "outputs": [ + { + "outputSource": "#VDJ_GatherCalls.cwl/VDJ_GatherCalls/gatheredCalls", + "type": [ + "null", + "File" + ], + "id": "#VDJ_GatherCalls.cwl/gatheredCalls" + } + ], + "class": "Workflow" + }, + { + "inputs": [ + { + "type": [ + "null", + "File" + ], + "id": "#VDJ_Preprocess_Reads.cwl/Valid_Reads_Fastq" + }, + { + "type": [ + "null", + "int" + ], + "id": "#VDJ_Preprocess_Reads.cwl/num_valid_reads" + }, + { + "type": "string", + "id": "#VDJ_Preprocess_Reads.cwl/vdj_type" + } + ], + "requirements": [ + { + "class": "SubworkflowFeatureRequirement" + }, + { + "class": "InlineJavascriptRequirement" + }, + { + "envDef": [ + { + "envName": "CORES_ALLOCATED_PER_CWL_PROCESS", + "envValue": "8" + } + ], + "class": "EnvVarRequirement" + } + ], + "outputs": [ + { + "outputSource": "#VDJ_Preprocess_Reads.cwl/VDJ_RSEC_Reads/RSEC_Reads_Fastq", + "type": [ + { + "items": [ + "null", + "File" + ], + "type": "array" + } + ], + "id": "#VDJ_Preprocess_Reads.cwl/RSEC_Reads_Fastq" + }, + { + "type": [ + "null", + "int" + ], + "outputSource": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/num_cores", + "id": "#VDJ_Preprocess_Reads.cwl/num_cores" + }, + { + "type": [ + "null", + "int" + ], + "outputSource": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/num_splits", + "id": "#VDJ_Preprocess_Reads.cwl/num_splits" + } + ], + "class": "Workflow", + "steps": [ + { + "run": "#VDJ_RSEC_Reads.cwl", + "out": [ + "#VDJ_Preprocess_Reads.cwl/VDJ_RSEC_Reads/RSEC_Reads_Fastq" + ], + "requirements": [ + { + "coresMin": 8, + "ramMin": "${ var est_ram = 0.0006 * parseInt(inputs.num_valid_reads) + 2000; var buffer = 1.25; est_ram *= buffer; if (est_ram < 2000) return 2000; if (est_ram > 370000) return 370000; return parseInt(est_ram); }", + "class": "ResourceRequirement" + } + ], + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_RSEC_Reads", + "in": [ + { + "source": "#VDJ_Preprocess_Reads.cwl/VDJ_Trim_Reads/Valid_Reads", + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_RSEC_Reads/Valid_Reads" + }, + { + "source": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/num_splits", + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_RSEC_Reads/num_splits" + }, + { + "source": "#VDJ_Preprocess_Reads.cwl/num_valid_reads", + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_RSEC_Reads/num_valid_reads" + } + ] + }, + { + "out": [ + "#VDJ_Preprocess_Reads.cwl/VDJ_Trim_Reads/Valid_Reads", + "#VDJ_Preprocess_Reads.cwl/VDJ_Trim_Reads/Trim_Report" + ], + "in": [ + { + "source": "#VDJ_Preprocess_Reads.cwl/Valid_Reads_Fastq", + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_Trim_Reads/Valid_Reads_Fastq" + } + ], + "run": "#VDJ_Trim_Reads.cwl", + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_Trim_Reads", + "hints": [ + { + "coresMin": 8, + "class": "ResourceRequirement" + } + ] + }, + { + "out": [ + "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/num_splits", + "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/num_cores" + ], + "run": { + "cwlVersion": "v1.0", + "inputs": [ + { + "type": [ + "null", + "int" + ], + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/determine_num_splits/num_valid_reads" + }, + { + "type": "string", + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/determine_num_splits/vdj_type" + } + ], + "outputs": [ + { + "type": [ + "null", + "int" + ], + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/determine_num_splits/num_cores" + }, + { + "type": [ + "null", + "int" + ], + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/determine_num_splits/num_splits" + } + ], + "class": "ExpressionTool", + "expression": "${\n var ram_per_instance = 192 * 1024;\n var num_cores = 96;\n if (inputs.vdj_type == \"BCR\") {\n ram_per_instance = 144 * 1024;\n num_cores = 72;\n }\n var ram_per_split = 3200;\n var num_splits_per_instance = parseInt(ram_per_instance / ram_per_split);\n var num_splits = num_splits_per_instance;\n\n var num_reads = parseInt(inputs.num_valid_reads);\n if (num_reads != null) {\n if (num_reads > 100000000)\n num_splits = num_splits_per_instance * 2;\n num_cores = num_cores * 2;\n }\n\n return ({\"num_splits\": num_splits, \"num_cores\": num_cores});\n}", + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/determine_num_splits" + }, + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits", + "in": [ + { + "source": "#VDJ_Preprocess_Reads.cwl/num_valid_reads", + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/num_valid_reads" + }, + { + "source": "#VDJ_Preprocess_Reads.cwl/vdj_type", + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/vdj_type" + } + ] + } + ], + "id": "#VDJ_Preprocess_Reads.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "prefix": "--vdj-valid-reads", + "itemSeparator": "," + }, + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#VDJ_RSEC_Reads.cwl/Valid_Reads" + }, + { + "inputBinding": { + "prefix": "--num-splits" + }, + "type": [ + "null", + "int" + ], + "id": "#VDJ_RSEC_Reads.cwl/num_splits" + } + ], + "requirements": [ + ], + "outputs": [ + { + "outputBinding": { + "glob": "*RSEC_Reads_Fastq_*.tar.gz" + }, + "type": [ + { + "items": [ + "null", + "File" + ], + "type": "array" + } + ], + "id": "#VDJ_RSEC_Reads.cwl/RSEC_Reads_Fastq" + } + ], + "baseCommand": "mist_vdj_rsec_reads.py", + "class": "CommandLineTool", + "id": "#VDJ_RSEC_Reads.cwl" + }, + { + "inputs": [ + { + "type": [ + "null", + "Any" + ], + "id": "#VDJ_Settings.cwl/_VDJ_Version" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "outputs": [ + { + "type": [ + "null", + "float" + ], + "id": "#VDJ_Settings.cwl/VDJ_JGene_Evalue" + }, + { + "type": [ + "null", + "float" + ], + "id": "#VDJ_Settings.cwl/VDJ_VGene_Evalue" + }, + { + "type": [ + "null", + "string" + ], + "id": "#VDJ_Settings.cwl/VDJ_Version" + } + ], + "class": "ExpressionTool", + "expression": "${\n var vdjVersion = null;\n if (!inputs._VDJ_Version) {\n vdjVersion = null;}\n else {\n var _VDJ_Version = inputs._VDJ_Version.toLowerCase();\n if (_VDJ_Version === \"human\" || _VDJ_Version === \"hs\" || _VDJ_Version === \"human vdj - bcr and tcr\") {\n vdjVersion = \"human\";\n } else if (_VDJ_Version === \"humanbcr\" || _VDJ_Version === \"human vdj - bcr only\") {\n vdjVersion = \"humanBCR\";\n } else if (_VDJ_Version === \"humantcr\" || _VDJ_Version === \"human vdj - tcr only\") {\n vdjVersion = \"humanTCR\";\n } else if (_VDJ_Version === \"mouse\" || _VDJ_Version === \"mm\" || _VDJ_Version === \"mouse vdj - bcr and tcr\") {\n vdjVersion = \"mouse\";\n } else if (_VDJ_Version === \"mousebcr\" || _VDJ_Version === \"mouse vdj - bcr only\") {\n vdjVersion = \"mouseBCR\";\n } else if (_VDJ_Version === \"mousetcr\" || _VDJ_Version === \"mouse vdj - tcr only\") {\n vdjVersion = \"mouseTCR\";\n } else {\n vdjVersion = inputs._VDJ_Version;\n }\n }\n\n return ({\n VDJ_Version: vdjVersion,\n })\n}", + "id": "#VDJ_Settings.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "position": 1 + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_Trim_Reads.cwl/Valid_Reads_Fastq" + } + ], + "requirements": [ + ], + "outputs": [ + { + "outputBinding": { + "glob": "cutadapt.log" + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_Trim_Reads.cwl/Trim_Report" + }, + { + "outputBinding": { + "glob": "*vdjtxt.gz" + }, + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#VDJ_Trim_Reads.cwl/Valid_Reads" + } + ], + "baseCommand": "VDJ_Trim_Reads.sh", + "class": "CommandLineTool", + "id": "#VDJ_Trim_Reads.cwl" + }, + { + "inputs": [], + "requirements": [ + ], + "stdout": "output.txt", + "outputs": [ + { + "outputBinding": { + "glob": "output.txt", + "loadContents": true, + "outputEval": "$(self[0].contents)" + }, + "type": "string", + "id": "#Version.cwl/version" + } + ], + "baseCommand": [ + "mist_version.py" + ], + "id": "#Version.cwl", + "class": "CommandLineTool" + } + ], + "$namespaces": { + "sbg": "https://sevenbridges.com#", + "arv": "http://arvados.org/cwl#" + } +} \ No newline at end of file diff --git a/target/docker/mapping/bd_rhapsody/rhapsody_wta_1.10.1_nodocker.cwl b/target/docker/mapping/bd_rhapsody/rhapsody_wta_1.10.1_nodocker.cwl new file mode 100755 index 00000000000..5fa9ea85e48 --- /dev/null +++ b/target/docker/mapping/bd_rhapsody/rhapsody_wta_1.10.1_nodocker.cwl @@ -0,0 +1,5204 @@ +#!/usr/bin/env cwl-runner +{ + "cwlVersion": "v1.0", + "$graph": [ + { + "inputs": [ + { + "inputBinding": { + "prefix": "--annot-r1", + "itemSeparator": "," + }, + "type": { + "items": "File", + "type": "array" + }, + "id": "#AddtoBam.cwl/Annotation_R1" + }, + { + "inputBinding": { + "prefix": "--cell-order" + }, + "type": "File", + "id": "#AddtoBam.cwl/Cell_Order" + }, + { + "inputBinding": { + "prefix": "--annot-mol-file" + }, + "type": "File", + "id": "#AddtoBam.cwl/Molecular_Annotation" + }, + { + "inputBinding": { + "prefix": "--r2-bam" + }, + "type": "File", + "id": "#AddtoBam.cwl/R2_Bam" + }, + { + "inputBinding": { + "prefix": "--run-metadata" + }, + "type": "File", + "id": "#AddtoBam.cwl/Run_Metadata" + }, + { + "inputBinding": { + "prefix": "--tag-calls" + }, + "type": [ + "null", + "File" + ], + "id": "#AddtoBam.cwl/Tag_Calls" + }, + { + "inputBinding": { + "prefix": "--target-gene-mapping" + }, + "type": [ + "null", + "File" + ], + "id": "#AddtoBam.cwl/Target_Gene_Mapping" + } + ], + "requirements": [ + + ], + "outputs": [ + { + "outputBinding": { + "glob": "Annotated_mapping_R2.BAM" + }, + "type": "File", + "id": "#AddtoBam.cwl/Annotated_Bam" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#AddtoBam.cwl/output" + } + ], + "baseCommand": [ + "mist_add_to_bam.py" + ], + "class": "CommandLineTool", + "id": "#AddtoBam.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "prefix": "--extra-seqs" + }, + "type": [ + "null", + "File" + ], + "id": "#AlignR2.cwl/Extra_Seqs" + }, + { + "inputBinding": { + "prefix": "--index" + }, + "type": "File", + "id": "#AlignR2.cwl/Index" + }, + { + "inputBinding": { + "prefix": "--r2-fastqs", + "itemSeparator": "," + }, + "type": { + "items": "File", + "type": "array" + }, + "id": "#AlignR2.cwl/R2" + }, + { + "inputBinding": { + "prefix": "--run-metadata" + }, + "type": "File", + "id": "#AlignR2.cwl/Run_Metadata" + } + ], + "requirements": [ + + { + "class": "InlineJavascriptRequirement" + }, + { + "envDef": [ + { + "envName": "CORES_ALLOCATED_PER_CWL_PROCESS", + "envValue": "$(String(runtime.cores))" + } + ], + "class": "EnvVarRequirement" + } + ], + "outputs": [ + { + "outputBinding": { + "glob": "*zip" + }, + "type": { + "items": "File", + "type": "array" + }, + "id": "#AlignR2.cwl/Alignments" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#AlignR2.cwl/output" + } + ], + "baseCommand": [ + "mist_align_R2.py" + ], + "class": "CommandLineTool", + "id": "#AlignR2.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "prefix": "--umi-option" + }, + "type": [ + "null", + "int" + ], + "id": "#AnnotateMolecules.cwl/AbSeq_UMI" + }, + { + "inputBinding": { + "prefix": "--run-metadata" + }, + "type": "File", + "id": "#AnnotateMolecules.cwl/Run_Metadata" + }, + { + "inputBinding": { + "prefix": "--use-dbec" + }, + "type": [ + "null", + "boolean" + ], + "id": "#AnnotateMolecules.cwl/Use_DBEC" + }, + { + "inputBinding": { + "prefix": "--valid-annot" + }, + "type": "File", + "id": "#AnnotateMolecules.cwl/Valids" + } + ], + "requirements": [ + + ], + "outputs": [ + { + "outputBinding": { + "glob": "*_GeneStatus.csv.*" + }, + "type": "File", + "id": "#AnnotateMolecules.cwl/Gene_Status_List" + }, + { + "outputBinding": { + "glob": "stats.json", + "loadContents": true, + "outputEval": "$(JSON.parse(self[0].contents).max_count)\n" + }, + "type": "int", + "id": "#AnnotateMolecules.cwl/Max_Count" + }, + { + "outputBinding": { + "glob": "*_Annotation_Molecule.csv.*" + }, + "type": "File", + "id": "#AnnotateMolecules.cwl/Mol_Annot_List" + }, + { + "outputBinding": { + "glob": "stats.json", + "loadContents": true, + "outputEval": "$(JSON.parse(self[0].contents).total_molecules)\n" + }, + "type": "int", + "id": "#AnnotateMolecules.cwl/Total_Molecules" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#AnnotateMolecules.cwl/output" + } + ], + "baseCommand": [ + "mist_annotate_molecules.py" + ], + "class": "CommandLineTool", + "id": "#AnnotateMolecules.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "prefix": "--filter-metrics", + "itemSeparator": "," + }, + "type": [ + { + "items": [ + "null", + "File" + ], + "type": "array" + } + ], + "id": "#AnnotateR1.cwl/Filter_Metrics" + }, + { + "inputBinding": { + "prefix": "--R1" + }, + "type": "File", + "id": "#AnnotateR1.cwl/R1" + }, + { + "inputBinding": { + "prefix": "--run-metadata" + }, + "type": "File", + "id": "#AnnotateR1.cwl/Run_Metadata" + } + ], + "requirements": [ + + { + "ramMin": 2000, + "class": "ResourceRequirement" + } + ], + "outputs": [ + { + "outputBinding": { + "glob": "*_Annotation_R1.csv.gz" + }, + "type": "File", + "id": "#AnnotateR1.cwl/Annotation_R1" + }, + { + "outputBinding": { + "glob": "*_R1_error_count_table.npy" + }, + "type": "File", + "id": "#AnnotateR1.cwl/R1_error_count_table" + }, + { + "outputBinding": { + "glob": "*_R1_read_count_breakdown.json" + }, + "type": "File", + "id": "#AnnotateR1.cwl/R1_read_count_breakdown" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#AnnotateR1.cwl/output" + } + ], + "baseCommand": [ + "mist_annotate_R1.py" + ], + "class": "CommandLineTool", + "id": "#AnnotateR1.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "prefix": "--extra-seqs" + }, + "type": [ + "null", + "File" + ], + "id": "#AnnotateR2.cwl/Extra_Seqs" + }, + { + "inputBinding": { + "prefix": "--gtf" + }, + "type": [ + "null", + "File" + ], + "id": "#AnnotateR2.cwl/GTF_Annotation" + }, + { + "inputBinding": { + "prefix": "--R2-zip" + }, + "type": "File", + "id": "#AnnotateR2.cwl/R2_zip" + }, + { + "inputBinding": { + "prefix": "--run-metadata" + }, + "type": "File", + "id": "#AnnotateR2.cwl/Run_Metadata" + }, + { + "inputBinding": { + "prefix": "--transcript-length" + }, + "type": [ + "null", + "File" + ], + "id": "#AnnotateR2.cwl/Transcript_Length" + } + ], + "requirements": [ + + { + "class": "InlineJavascriptRequirement" + } + ], + "outputs": [ + { + "outputBinding": { + "glob": "*Annotation_R2.csv.gz" + }, + "type": "File", + "id": "#AnnotateR2.cwl/Annot_R2" + }, + { + "outputBinding": { + "glob": "*-annot.gtf" + }, + "type": [ + "null", + "File" + ], + "id": "#AnnotateR2.cwl/GTF" + }, + { + "outputBinding": { + "glob": "*mapping_R2.BAM" + }, + "type": "File", + "id": "#AnnotateR2.cwl/R2_Bam" + }, + { + "outputBinding": { + "glob": "*_picard_quality_metrics.csv.gz" + }, + "type": "File", + "id": "#AnnotateR2.cwl/R2_Quality_Metrics" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#AnnotateR2.cwl/output" + } + ], + "baseCommand": [ + "mist_annotate_R2.py" + ], + "class": "CommandLineTool", + "id": "#AnnotateR2.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "prefix": "--umi-option" + }, + "type": [ + "null", + "int" + ], + "id": "#AnnotateReads.cwl/AbSeq_UMI" + }, + { + "inputBinding": { + "prefix": "--extra-seqs", + "itemSeparator": "," + }, + "type": [ + "null", + "File" + ], + "id": "#AnnotateReads.cwl/Extra_Seqs" + }, + { + "type": { + "items": [ + "null", + "File" + ], + "type": "array" + }, + "id": "#AnnotateReads.cwl/Filter_Metrics" + }, + { + "inputBinding": { + "prefix": "--putative-cell-call" + }, + "type": [ + "null", + "int" + ], + "id": "#AnnotateReads.cwl/Putative_Cell_Call" + }, + { + "type": { + "items": "File", + "type": "array" + }, + "id": "#AnnotateReads.cwl/R1_Annotation" + }, + { + "type": { + "items": "File", + "type": "array" + }, + "id": "#AnnotateReads.cwl/R1_error_count_table" + }, + { + "type": { + "items": "File", + "type": "array" + }, + "id": "#AnnotateReads.cwl/R1_read_count_breakdown" + }, + { + "type": { + "items": "File", + "type": "array" + }, + "id": "#AnnotateReads.cwl/R2_Annotation" + }, + { + "type": { + "items": "File", + "type": "array" + }, + "id": "#AnnotateReads.cwl/R2_Quality_Metrics" + }, + { + "inputBinding": { + "prefix": "--run-metadata" + }, + "type": "File", + "id": "#AnnotateReads.cwl/Run_Metadata" + }, + { + "inputBinding": { + "prefix": "--target-gene-mapping" + }, + "type": [ + "null", + "File" + ], + "id": "#AnnotateReads.cwl/Target_Gene_Mapping" + } + ], + "requirements": [ + + { + "class": "InitialWorkDirRequirement", + "listing": [ + { + "writable": false, + "entry": "${\n function getPaths(inputs, attribute) {\n var fp_arr = []\n for (var i = 0; i < inputs[attribute].length; i++)\n {\n fp_arr.push(inputs[attribute][i].path);\n }\n return fp_arr;\n }\n var paths = {}\n paths['annotR1'] = getPaths(inputs, 'R1_Annotation')\n paths['R1_error_count_table'] = getPaths(inputs, 'R1_error_count_table')\n paths['R1_read_count_breakdown'] = getPaths(inputs, 'R1_read_count_breakdown')\n paths['annotR2'] = getPaths(inputs, 'R2_Annotation')\n paths['r2_quality_metrics_fps'] = getPaths(inputs, 'R2_Quality_Metrics')\n if(inputs.Filter_Metrics[0] != null){\n paths['filtering_stat_files'] = getPaths(inputs, 'Filter_Metrics')\n }\n var paths_json = JSON.stringify(paths);\n return paths_json;\n}", + "entryname": "manifest.json" + } + ] + }, + { + "class": "InlineJavascriptRequirement" + }, + { + "envDef": [ + { + "envName": "CORES_ALLOCATED_PER_CWL_PROCESS", + "envValue": "4" + } + ], + "class": "EnvVarRequirement" + } + ], + "outputs": [ + { + "outputBinding": { + "glob": "*_Annotation_Read.csv.gz" + }, + "type": [ + "null", + "File" + ], + "id": "#AnnotateReads.cwl/Annotation_Read" + }, + { + "outputBinding": { + "glob": "*read1_error_rate_archive*" + }, + "type": "File", + "id": "#AnnotateReads.cwl/Read1_error_rate" + }, + { + "outputBinding": { + "glob": "*_SeqMetrics.csv.gz" + }, + "type": "File", + "id": "#AnnotateReads.cwl/Seq_Metrics" + }, + { + "outputBinding": { + "glob": "*Sorted_Valid_Reads.csv.*" + }, + "type": { + "items": "File", + "type": "array" + }, + "id": "#AnnotateReads.cwl/Valid_Reads" + }, + { + "outputBinding": { + "glob": "num_vdj_reads.json", + "loadContents": true, + "outputEval": "${ if (!self[0]) { return 0; } return parseInt(JSON.parse(self[0].contents).BCR); }" + }, + "type": "int", + "id": "#AnnotateReads.cwl/num_valid_ig_reads" + }, + { + "outputBinding": { + "glob": "num_vdj_reads.json", + "loadContents": true, + "outputEval": "${ if (!self[0]) { return 0; } return parseInt(JSON.parse(self[0].contents).TCR); }" + }, + "type": "int", + "id": "#AnnotateReads.cwl/num_valid_tcr_reads" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#AnnotateReads.cwl/output" + }, + { + "outputBinding": { + "glob": "*_VDJ_IG_Valid_Reads.fastq.gz" + }, + "type": [ + "null", + "File" + ], + "id": "#AnnotateReads.cwl/validIgReads" + }, + { + "outputBinding": { + "glob": "*_VDJ_TCR_Valid_Reads.fastq.gz" + }, + "type": [ + "null", + "File" + ], + "id": "#AnnotateReads.cwl/validTcrReads" + } + ], + "baseCommand": [ + "mist_annotate_reads.py" + ], + "class": "CommandLineTool", + "id": "#AnnotateReads.cwl" + }, + { + "inputs": [ + { + "type": { + "items": "File", + "type": "array" + }, + "id": "#BundleLogs.cwl/log_files" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + }, + { + "class": "MultipleInputFeatureRequirement" + } + ], + "outputs": [ + { + "type": "Directory", + "id": "#BundleLogs.cwl/logs_dir" + } + ], + "class": "ExpressionTool", + "expression": "${\n /* shamelly cribbed from https://gist.github.com/jcxplorer/823878 */\n function uuid() {\n var uuid = \"\", i, random;\n for (i = 0; i < 32; i++) {\n random = Math.random() * 16 | 0;\n if (i == 8 || i == 12 || i == 16 || i == 20) {\n uuid += \"-\";\n }\n uuid += (i == 12 ? 4 : (i == 16 ? (random & 3 | 8) : random)).toString(16);\n }\n return uuid;\n }\n var listing = [];\n for (var i = 0; i < inputs.log_files.length; i++) {\n var log_file = inputs.log_files[i];\n log_file.basename = uuid() + \"-\" + log_file.basename;\n listing.push(log_file);\n }\n return ({\n logs_dir: {\n class: \"Directory\",\n basename: \"Logs\",\n listing: listing\n }\n });\n}", + "id": "#BundleLogs.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "position": 0 + }, + "type": [ + "null", + "File" + ], + "id": "#Cell_Classifier.cwl/molsPerCellMatrix" + } + ], + "requirements": [ + + ], + "outputs": [ + { + "outputBinding": { + "glob": "*cell_type_experimental.csv" + }, + "type": [ + "null", + "File" + ], + "id": "#Cell_Classifier.cwl/cellTypePredictions" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#Cell_Classifier.cwl/log" + } + ], + "baseCommand": [ + "mist_cell_classifier.py" + ], + "class": "CommandLineTool", + "id": "#Cell_Classifier.cwl" + }, + { + "inputs": [ + { + "doc": "The minimum size (megabytes) of a file that should get split into chunks of a size designated in NumRecordsPerSplit\n", + "inputBinding": { + "prefix": "--min-split-size" + }, + "type": [ + "null", + "int" + ], + "id": "#CheckFastqs.cwl/MinChunkSize" + }, + { + "inputBinding": { + "prefix": "--reads", + "itemSeparator": "," + }, + "type": { + "items": "File", + "type": "array" + }, + "id": "#CheckFastqs.cwl/Reads" + }, + { + "inputBinding": { + "prefix": "--subsample" + }, + "type": [ + "null", + "float" + ], + "id": "#CheckFastqs.cwl/Subsample" + }, + { + "inputBinding": { + "prefix": "--subsample-seed" + }, + "type": [ + "null", + "int" + ], + "id": "#CheckFastqs.cwl/Subsample_Seed" + }, + { + "inputBinding": { + "prefix": "--subsample-seed" + }, + "type": [ + "null", + "int" + ], + "id": "#CheckFastqs.cwl/UserInputSubsampleSeed" + } + ], + "requirements": [ + + { + "class": "InlineJavascriptRequirement" + } + ], + "doc": "CheckFastqs does several quality control routines including: (1) ensuring that read pair file names are formatted correctly and contain a read pair mate; (2) disambiguating the \"Subsample Reads\" input and; (3) if not provided, generating a subsampling seed that the downstream instances can use.\n", + "baseCommand": [ + "mist_check_fastqs.py" + ], + "id": "#CheckFastqs.cwl", + "outputs": [ + { + "outputBinding": { + "glob": "bead_version.json", + "loadContents": true, + "outputEval": "$(JSON.parse(self[0].contents).BeadVersion)\n" + }, + "type": { + "items": { + "fields": [ + { + "type": "string", + "name": "#CheckFastqs.cwl/Bead_Version/Library" + }, + { + "type": "string", + "name": "#CheckFastqs.cwl/Bead_Version/bead_version" + } + ], + "type": "record" + }, + "type": "array" + }, + "id": "#CheckFastqs.cwl/Bead_Version" + }, + { + "outputBinding": { + "glob": "fastq_read_pairs.json", + "loadContents": true, + "outputEval": "$(JSON.parse(self[0].contents).fastq_read_pairs)\n" + }, + "type": { + "items": { + "fields": [ + { + "type": "string", + "name": "#CheckFastqs.cwl/FastqReadPairs/filename" + }, + { + "type": "string", + "name": "#CheckFastqs.cwl/FastqReadPairs/readFlag" + }, + { + "type": "string", + "name": "#CheckFastqs.cwl/FastqReadPairs/readPairId" + }, + { + "type": "string", + "name": "#CheckFastqs.cwl/FastqReadPairs/library" + }, + { + "type": "string", + "name": "#CheckFastqs.cwl/FastqReadPairs/beadVersion" + } + ], + "type": "record" + }, + "type": "array" + }, + "id": "#CheckFastqs.cwl/FastqReadPairs" + }, + { + "outputBinding": { + "glob": "files_to_skip_split_and_subsample.json", + "loadContents": true, + "outputEval": "$(JSON.parse(self[0].contents).files_to_skip_split_and_subsample)\n" + }, + "type": { + "items": "string", + "type": "array" + }, + "id": "#CheckFastqs.cwl/FilesToSkipSplitAndSubsample" + }, + { + "outputBinding": { + "glob": "fastq_read_pairs.json", + "loadContents": true, + "outputEval": "${\n var obj = JSON.parse(self[0].contents);\n var libraries = [];\n var pairs = obj.fastq_read_pairs\n for (var i in pairs){\n if (pairs[i][\"readFlag\"] == \"R1\"){\n if (libraries.indexOf(pairs[i][\"library\"]) == -1){ \n libraries.push(pairs[i][\"library\"]);\n }\n }\n }\n libraries.sort();\n return(libraries.toString())\n}\n" + }, + "type": [ + "null", + "string" + ], + "id": "#CheckFastqs.cwl/Libraries" + }, + { + "outputBinding": { + "outputEval": "${ \n var reads = []; \n var files = inputs.Reads\n for (var i in files){\n reads.push(files[i][\"basename\"]);\n }\n reads.sort();\n return(reads)\n}\n" + }, + "type": [ + "null", + { + "items": "string", + "type": "array" + } + ], + "id": "#CheckFastqs.cwl/ReadsList" + }, + { + "outputBinding": { + "glob": "subsampling_info.json", + "loadContents": true, + "outputEval": "$(JSON.parse(self[0].contents).subsampling_seed)\n" + }, + "type": "int", + "id": "#CheckFastqs.cwl/SubsampleSeed" + }, + { + "outputBinding": { + "glob": "subsampling_info.json", + "loadContents": true, + "outputEval": "$(JSON.parse(self[0].contents).subsampling_ratio)\n" + }, + "type": "float", + "id": "#CheckFastqs.cwl/SubsamplingRatio" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#CheckFastqs.cwl/log" + } + ], + "class": "CommandLineTool" + }, + { + "inputs": [ + { + "inputBinding": { + "prefix": "--abseq-reference", + "itemSeparator": "," + }, + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#CheckReference.cwl/AbSeq_Reference" + }, + { + "inputBinding": { + "prefix": "--putative-cell-call" + }, + "type": [ + "null", + "int" + ], + "id": "#CheckReference.cwl/Putative_Cell_Call" + }, + { + "inputBinding": { + "prefix": "--reference", + "itemSeparator": "," + }, + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#CheckReference.cwl/Reference" + }, + { + "inputBinding": { + "prefix": "--run-metadata" + }, + "type": "File", + "id": "#CheckReference.cwl/Run_Metadata" + }, + { + "inputBinding": { + "prefix": "--supplemental-reference", + "itemSeparator": "," + }, + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#CheckReference.cwl/Supplemental_Reference" + } + ], + "requirements": [ + + { + "class": "InlineJavascriptRequirement" + } + ], + "outputs": [ + { + "outputBinding": { + "glob": "combined_extra_seq.fasta" + }, + "type": [ + "null", + "File" + ], + "id": "#CheckReference.cwl/Extra_Seqs" + }, + { + "outputBinding": { + "glob": "full-gene-list.json" + }, + "type": [ + "null", + "File" + ], + "id": "#CheckReference.cwl/Full_Genes" + }, + { + "outputBinding": { + "glob": "*gtf", + "outputEval": "${\n // get the WTA modified GTF with extra seqs\n if (self.length == 1) {\n return self;\n // there is no modified GTF\n } else if (self.length == 0) {\n // if Reference is null (i.e. AbSeq_Reference only), return no GTF\n if (inputs.Reference === null) {\n return null;\n } else {\n // get the original WTA GTF without extra seqs\n for (var i = 0; i < inputs.Reference.length; i++) {\n if (inputs.Reference[i].basename.toLowerCase().indexOf('gtf') !== -1) {\n return inputs.Reference[i];\n }\n }\n // return no GTF for Targeted\n return null\n }\n }\n}\n" + }, + "type": [ + "null", + "File" + ], + "id": "#CheckReference.cwl/GTF" + }, + { + "outputBinding": { + "glob": "*-annot.*", + "outputEval": "${\n if (self.length == 1) { // Targeted\n return self;\n } else if (self.length == 0){ // WTA without extra seqs or targets\n for (var i = 0; i < inputs.Reference.length; i++) {\n if (inputs.Reference[i].basename.toLowerCase().indexOf('tar.gz') !== -1) {\n return inputs.Reference[i];\n }\n }\n return null\n }\n}\n" + }, + "type": "File", + "id": "#CheckReference.cwl/Index" + }, + { + "outputBinding": { + "glob": "target-gene.json" + }, + "type": [ + "null", + "File" + ], + "id": "#CheckReference.cwl/Target_Gene_Mapping" + }, + { + "outputBinding": { + "glob": "transcript_length.json" + }, + "type": [ + "null", + "File" + ], + "id": "#CheckReference.cwl/Transcript_Length" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#CheckReference.cwl/output" + } + ], + "baseCommand": [ + "mist_check_references.py" + ], + "class": "CommandLineTool", + "id": "#CheckReference.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "prefix": "--cell-order" + }, + "type": "File", + "id": "#DensetoSparse.cwl/Cell_Order" + }, + { + "inputBinding": { + "prefix": "--dense-data-table" + }, + "type": [ + "null", + "File" + ], + "id": "#DensetoSparse.cwl/Dense_Data_Table" + }, + { + "inputBinding": { + "prefix": "--gene-list" + }, + "type": "File", + "id": "#DensetoSparse.cwl/Gene_List" + }, + { + "inputBinding": { + "prefix": "--run-metadata" + }, + "type": "File", + "id": "#DensetoSparse.cwl/Run_Metadata" + } + ], + "requirements": [ + + ], + "outputs": [ + { + "outputBinding": { + "glob": "*.csv.gz" + }, + "type": "File", + "id": "#DensetoSparse.cwl/Data_Tables" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#DensetoSparse.cwl/output" + } + ], + "baseCommand": [ + "mist_dense_to_sparse.py" + ], + "class": "CommandLineTool", + "id": "#DensetoSparse.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "position": 1 + }, + "type": [ + "null", + "File" + ], + "id": "#DensetoSparseFile.cwl/GDT_cell_order" + } + ], + "requirements": [ + + ], + "stdout": "cell_order.json", + "outputs": [ + { + "type": "stdout", + "id": "#DensetoSparseFile.cwl/Cell_Order" + } + ], + "baseCommand": "cat", + "id": "#DensetoSparseFile.cwl", + "class": "CommandLineTool" + }, + { + "inputs": [ + { + "inputBinding": { + "prefix": "--full-gene-list" + }, + "type": [ + "null", + "File" + ], + "id": "#GetDataTable.cwl/Full_Genes" + }, + { + "inputBinding": { + "prefix": "--gene-status", + "itemSeparator": "," + }, + "type": { + "items": "File", + "type": "array" + }, + "id": "#GetDataTable.cwl/Gene_Status_List" + }, + { + "inputBinding": { + "prefix": "--max-count", + "itemSeparator": "," + }, + "type": { + "items": "int", + "type": "array" + }, + "id": "#GetDataTable.cwl/Max_Count" + }, + { + "inputBinding": { + "prefix": "--mol-annot", + "itemSeparator": "," + }, + "type": { + "items": "File", + "type": "array" + }, + "id": "#GetDataTable.cwl/Molecule_Annotation_List" + }, + { + "inputBinding": { + "prefix": "--putative-cell-call" + }, + "type": [ + "null", + "int" + ], + "id": "#GetDataTable.cwl/Putative_Cell_Call" + }, + { + "inputBinding": { + "prefix": "--run-metadata" + }, + "type": "File", + "id": "#GetDataTable.cwl/Run_Metadata" + }, + { + "inputBinding": { + "prefix": "--seq-metrics" + }, + "type": "File", + "id": "#GetDataTable.cwl/Seq_Metrics" + }, + { + "inputBinding": { + "prefix": "--tag-names", + "itemSeparator": "," + }, + "type": [ + "null", + { + "items": "string", + "type": "array" + } + ], + "id": "#GetDataTable.cwl/Tag_Names" + }, + { + "type": { + "items": "int", + "type": "array" + }, + "id": "#GetDataTable.cwl/Total_Molecules" + } + ], + "requirements": [ + { + "ramMin": "${return Math.min(Math.max(parseInt(inputs.Total_Molecules.reduce(function(a, b) { return a + b; }, 0) / 4000), 32000), 768000);}", + "class": "ResourceRequirement" + }, + + { + "class": "InlineJavascriptRequirement" + } + ], + "outputs": [ + { + "outputBinding": { + "glob": "metrics-files.tar.gz" + }, + "type": "File", + "id": "#GetDataTable.cwl/Annot_Files" + }, + { + "outputBinding": { + "glob": "Annotations/*_Bioproduct_Stats.csv" + }, + "type": [ + "null", + "File" + ], + "id": "#GetDataTable.cwl/Bioproduct_Stats" + }, + { + "outputBinding": { + "glob": "Cell_Label_Filtering/*.png" + }, + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#GetDataTable.cwl/Cell_Label_Filter" + }, + { + "outputBinding": { + "glob": "cell_order.json" + }, + "type": "File", + "id": "#GetDataTable.cwl/Cell_Order" + }, + { + "outputBinding": { + "glob": "*_Annotation_Molecule_corrected.csv.gz" + }, + "type": "File", + "id": "#GetDataTable.cwl/Corrected_Molecular_Annotation" + }, + { + "outputBinding": { + "glob": "*PerCell_Dense.csv.gz" + }, + "type": { + "items": "File", + "type": "array" + }, + "id": "#GetDataTable.cwl/Dense_Data_Tables" + }, + { + "outputBinding": { + "glob": "*PerCell_Unfiltered_Dense.csv.gz" + }, + "type": { + "items": "File", + "type": "array" + }, + "id": "#GetDataTable.cwl/Dense_Data_Tables_Unfiltered" + }, + { + "outputBinding": { + "glob": "*_Expression_Data.st.gz" + }, + "type": "File", + "id": "#GetDataTable.cwl/Expression_Data" + }, + { + "outputBinding": { + "glob": "*_Expression_Data_Unfiltered.st.gz" + }, + "type": [ + "null", + "File" + ], + "id": "#GetDataTable.cwl/Expression_Data_Unfiltered" + }, + { + "outputBinding": { + "glob": "gene_list.json" + }, + "type": "File", + "id": "#GetDataTable.cwl/Gene_List" + }, + { + "outputBinding": { + "glob": "Annotations/*_Annotation_Molecule.csv.gz" + }, + "type": "File", + "id": "#GetDataTable.cwl/Molecular_Annotation" + }, + { + "outputBinding": { + "glob": "Cell_Label_Filtering/*_Protein_Aggregates_Experimental.csv" + }, + "type": [ + "null", + "File" + ], + "id": "#GetDataTable.cwl/Protein_Aggregates_Experimental" + }, + { + "outputBinding": { + "glob": "Cell_Label_Filtering/*_Putative_Cells_Origin.csv" + }, + "type": [ + "null", + "File" + ], + "id": "#GetDataTable.cwl/Putative_Cells_Origin" + }, + { + "outputBinding": { + "glob": "Annotations/*_Annotation_Molecule_Trueno.csv" + }, + "type": [ + "null", + "File" + ], + "id": "#GetDataTable.cwl/Tag_Annotation" + }, + { + "outputBinding": { + "glob": "Trueno/*_Calls.csv" + }, + "type": [ + "null", + "File" + ], + "id": "#GetDataTable.cwl/Tag_Calls" + }, + { + "outputBinding": { + "glob": "Trueno/*csv" + }, + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#GetDataTable.cwl/Trueno_out" + }, + { + "outputBinding": { + "glob": "Trueno/*zip" + }, + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#GetDataTable.cwl/Trueno_zip" + }, + { + "outputBinding": { + "glob": "Annotations/*_UMI_Adjusted_CellLabel_Stats.csv" + }, + "type": [ + "null", + "File" + ], + "id": "#GetDataTable.cwl/UMI_Adjusted_CellLabel_Stats" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#GetDataTable.cwl/output" + } + ], + "baseCommand": [ + "mist_get_datatables.py" + ], + "class": "CommandLineTool", + "id": "#GetDataTable.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "position": 1 + }, + "type": "File", + "id": "#IndexBAM.cwl/BamFile" + } + ], + "requirements": [ + + { + "class": "InlineJavascriptRequirement" + } + ], + "stdout": "samtools_index.log", + "outputs": [ + { + "outputBinding": { + "glob": "*.bai" + }, + "type": "File", + "id": "#IndexBAM.cwl/Index" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#IndexBAM.cwl/log" + } + ], + "baseCommand": [ + "samtools", + "index" + ], + "id": "#IndexBAM.cwl", + "arguments": [ + { + "position": 2, + "valueFrom": "${\n return inputs.BamFile.basename + \".bai\"\n}" + } + ], + "class": "CommandLineTool" + }, + { + "inputs": [], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "outputs": [ + { + "type": [ + "null", + "int" + ], + "id": "#InternalSettings.cwl/AbSeq_UMI" + }, + { + "type": [ + "null", + "int" + ], + "id": "#InternalSettings.cwl/Barcode_Num" + }, + { + "type": [ + "null", + "File" + ], + "id": "#InternalSettings.cwl/Extra_Seqs" + }, + { + "type": [ + "null", + "int" + ], + "id": "#InternalSettings.cwl/Label_Version" + }, + { + "type": [ + "null", + "int" + ], + "id": "#InternalSettings.cwl/MinChunkSize" + }, + { + "type": [ + "null", + "long" + ], + "id": "#InternalSettings.cwl/NumRecordsPerSplit" + }, + { + "type": [ + "null", + "boolean" + ], + "id": "#InternalSettings.cwl/Read_Filter_Off" + }, + { + "type": [ + "null", + "string" + ], + "id": "#InternalSettings.cwl/Seq_Run" + }, + { + "type": [ + "null", + "float" + ], + "id": "#InternalSettings.cwl/Subsample_Tags" + }, + { + "type": [ + "null", + "boolean" + ], + "id": "#InternalSettings.cwl/Target_analysis" + }, + { + "type": [ + "null", + "boolean" + ], + "id": "#InternalSettings.cwl/Use_DBEC" + }, + { + "type": [ + "null", + "float" + ], + "id": "#InternalSettings.cwl/VDJ_JGene_Evalue" + }, + { + "type": [ + "null", + "float" + ], + "id": "#InternalSettings.cwl/VDJ_VGene_Evalue" + } + ], + "class": "ExpressionTool", + "expression": "${\n var internalInputs = [\n '_Label_Version',\n '_Read_Filter_Off',\n '_Barcode_Num',\n '_Seq_Run',\n '_AbSeq_UMI',\n '_Use_DBEC',\n '_Extra_Seqs',\n '_MinChunkSize',\n '_NumRecordsPerSplit',\n '_Target_analysis',\n '_Subsample_Tags',\n '_VDJ_VGene_Evalue',\n '_VDJ_JGene_Evalue',\n ];\n var internalOutputs = {}\n for (var i = 0; i < internalInputs.length; i++) {\n var internalInput = internalInputs[i];\n var internalOutput = internalInput.slice(1); // remove leading underscore\n if (inputs.hasOwnProperty(internalInput)) {\n internalOutputs[internalOutput] = inputs[internalInput]; // if input specified, redirect to output\n } else {\n internalOutputs[internalOutput] = null; // if input not specified, provide a null\n }\n }\n return internalOutputs;\n}", + "id": "#InternalSettings.cwl" + }, + { + "inputs": [ + { + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#main/AbSeq_Reference", + "label": "AbSeq Reference" + }, + { + "doc": "Determine putative cells using only the basic algorithm (minimum second derivative along the cumulative reads curve). The refined algorithm attempts to remove false positives and recover false negatives, but may not be ideal for certain complex mixtures of cell types. Does not apply if Exact Cell Count is set.", + "type": [ + "null", + "boolean" + ], + "id": "#main/Basic_Algo_Only", + "label": "Disable Refined Putative Cell Calling" + }, + { + "doc": "Set a specific number (>=1) of cells as putative, based on those with the highest error-corrected read count", + "type": [ + "null", + "int" + ], + "id": "#main/Exact_Cell_Count", + "label": "Exact Cell Count" + }, + { + "doc": "Specify the data to be used for putative cell calling. mRNA is the default selected option. AbSeq (Experimental) is for troubleshooting only.", + "type": [ + "null", + { + "symbols": [ + "#main/Putative_Cell_Call/Putative_Cell_Call/mRNA", + "#main/Putative_Cell_Call/Putative_Cell_Call/AbSeq_Experimental" + ], + "type": "enum", + "name": "#main/Putative_Cell_Call/Putative_Cell_Call" + } + ], + "id": "#main/Putative_Cell_Call", + "label": "Putative Cell Calling" + }, + { + "type": { + "items": "File", + "type": "array" + }, + "id": "#main/Reads", + "label": "Reads" + }, + { + "type": "File", + "id": "#main/Reference_Genome", + "label": "Reference Genome" + }, + { + "doc": "This is a name for output files, for example Experiment1_Metrics_Summary.csv. Default if left empty is to name run based on a library. Any non-alpha numeric characters will be changed to a hyphen.", + "type": [ + "null", + "string" + ], + "id": "#main/Run_Name", + "label": "Run Name" + }, + { + "doc": "The sample multiplexing kit version. This option should only be set for a multiplexed experiment.", + "type": [ + "null", + { + "symbols": [ + "#main/Sample_Tags_Version/Sample_Tags_Version/human", + "#main/Sample_Tags_Version/Sample_Tags_Version/hs", + "#main/Sample_Tags_Version/Sample_Tags_Version/mouse", + "#main/Sample_Tags_Version/Sample_Tags_Version/mm", + "#main/Sample_Tags_Version/Sample_Tags_Version/custom" + ], + "type": "enum", + "name": "#main/Sample_Tags_Version/Sample_Tags_Version" + } + ], + "id": "#main/Sample_Tags_Version", + "label": "Sample Tags Version" + }, + { + "doc": "Any number of reads >1 or a fraction between 0 < n < 1 to indicate the percentage of reads to subsample.\n", + "type": [ + "null", + "float" + ], + "id": "#main/Subsample", + "label": "Subsample Reads" + }, + { + "doc": "For use when replicating a previous subsampling run only. Obtain the seed generated from the log file for the SplitFastQ node.\n", + "type": [ + "null", + "int" + ], + "id": "#main/Subsample_seed", + "label": "Subsample Seed" + }, + { + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#main/Supplemental_Reference", + "label": "Supplemental Reference" + }, + { + "doc": "Specify the Sample Tag number followed by - (hyphen) and a sample name to appear in the output files. For example: 4-Ramos. Should be alpha numeric, with + - and _ allowed. Any special characters: &, (), [], {}, <>, ?, | will be corrected to underscores. \n", + "type": [ + "null", + { + "items": "string", + "type": "array" + } + ], + "id": "#main/Tag_Names", + "label": "Tag Names" + }, + { + "type": "File", + "id": "#main/Transcriptome_Annotation", + "label": "Transcriptome Annotation" + }, + { + "doc": "The VDJ species and chain types. This option should only be set for VDJ experiment.", + "type": [ + "null", + { + "symbols": [ + "#main/VDJ_Version/VDJ_Version/human", + "#main/VDJ_Version/VDJ_Version/hs", + "#main/VDJ_Version/VDJ_Version/mouse", + "#main/VDJ_Version/VDJ_Version/mm", + "#main/VDJ_Version/VDJ_Version/humanBCR", + "#main/VDJ_Version/VDJ_Version/humanTCR", + "#main/VDJ_Version/VDJ_Version/mouseBCR", + "#main/VDJ_Version/VDJ_Version/mouseTCR" + ], + "type": "enum", + "name": "#main/VDJ_Version/VDJ_Version" + } + ], + "id": "#main/VDJ_Version", + "label": "VDJ Species Version" + } + ], + "requirements": [ + { + "class": "ScatterFeatureRequirement" + }, + { + "class": "MultipleInputFeatureRequirement" + }, + { + "class": "SubworkflowFeatureRequirement" + }, + { + "class": "StepInputExpressionRequirement" + }, + { + "class": "InlineJavascriptRequirement" + } + ], + "doc": "The BD Rhapsody\u2122 WTA Analysis Pipeline is used to create sequencing libraries from single cell transcriptomes without having to specify a targeted panel.\n\nAfter sequencing, the analysis pipeline takes the FASTQ files, a reference genome file and a transcriptome annotation file for gene alignment. The pipeline generates molecular counts per cell, read counts per cell, metrics, and an alignment file.", + "label": "BD Rhapsody\u2122 WTA Analysis Pipeline", + "steps": [ + { + "run": "#AddtoBam.cwl", + "scatter": [ + "#main/AddtoBam/R2_Bam" + ], + "in": [ + { + "source": "#main/AnnotateR1/Annotation_R1", + "id": "#main/AddtoBam/Annotation_R1" + }, + { + "source": "#main/Dense_to_Sparse_File/Cell_Order", + "id": "#main/AddtoBam/Cell_Order" + }, + { + "source": "#main/GetDataTable/Corrected_Molecular_Annotation", + "id": "#main/AddtoBam/Molecular_Annotation" + }, + { + "source": "#main/AnnotateR2/R2_Bam", + "id": "#main/AddtoBam/R2_Bam" + }, + { + "source": "#main/Metadata_Settings/Run_Metadata", + "id": "#main/AddtoBam/Run_Metadata" + }, + { + "source": "#main/GetDataTable/Tag_Calls", + "id": "#main/AddtoBam/Tag_Calls" + }, + { + "source": "#main/CheckReference/Target_Gene_Mapping", + "id": "#main/AddtoBam/Target_Gene_Mapping" + } + ], + "requirements": [ + { + "ramMin": 16000, + "class": "ResourceRequirement" + } + ], + "id": "#main/AddtoBam", + "out": [ + "#main/AddtoBam/Annotated_Bam", + "#main/AddtoBam/output" + ] + }, + { + "run": "#AlignR2.cwl", + "out": [ + "#main/AlignR2/Alignments", + "#main/AlignR2/output" + ], + "requirements": [ + { + "coresMin": 8, + "ramMin": 48000, + "class": "ResourceRequirement" + } + ], + "id": "#main/AlignR2", + "in": [ + { + "source": "#main/CheckReference/Extra_Seqs", + "id": "#main/AlignR2/Extra_Seqs" + }, + { + "source": "#main/CheckReference/Index", + "id": "#main/AlignR2/Index" + }, + { + "source": "#main/QualityFilterOuter/R2", + "id": "#main/AlignR2/R2" + }, + { + "source": "#main/Metadata_Settings/Run_Metadata", + "id": "#main/AlignR2/Run_Metadata" + } + ] + }, + { + "run": "#AnnotateMolecules.cwl", + "scatter": [ + "#main/AnnotateMolecules/Valids" + ], + "in": [ + { + "source": "#main/Internal_Settings/AbSeq_UMI", + "id": "#main/AnnotateMolecules/AbSeq_UMI" + }, + { + "source": "#main/Metadata_Settings/Run_Metadata", + "id": "#main/AnnotateMolecules/Run_Metadata" + }, + { + "source": "#main/Internal_Settings/Use_DBEC", + "id": "#main/AnnotateMolecules/Use_DBEC" + }, + { + "source": "#main/AnnotateReads/Valid_Reads", + "id": "#main/AnnotateMolecules/Valids" + } + ], + "requirements": [ + { + "ramMin": 32000, + "class": "ResourceRequirement" + } + ], + "id": "#main/AnnotateMolecules", + "out": [ + "#main/AnnotateMolecules/Mol_Annot_List", + "#main/AnnotateMolecules/Gene_Status_List", + "#main/AnnotateMolecules/Max_Count", + "#main/AnnotateMolecules/Total_Molecules", + "#main/AnnotateMolecules/output" + ] + }, + { + "id": "#main/AnnotateR1", + "out": [ + "#main/AnnotateR1/Annotation_R1", + "#main/AnnotateR1/R1_error_count_table", + "#main/AnnotateR1/R1_read_count_breakdown", + "#main/AnnotateR1/output" + ], + "run": "#AnnotateR1.cwl", + "scatter": [ + "#main/AnnotateR1/R1" + ], + "in": [ + { + "source": "#main/QualityFilterOuter/Filter_Metrics", + "id": "#main/AnnotateR1/Filter_Metrics" + }, + { + "source": "#main/QualityFilterOuter/R1", + "id": "#main/AnnotateR1/R1" + }, + { + "source": "#main/Metadata_Settings/Run_Metadata", + "id": "#main/AnnotateR1/Run_Metadata" + } + ] + }, + { + "run": "#AnnotateR2.cwl", + "scatter": [ + "#main/AnnotateR2/R2_zip" + ], + "in": [ + { + "source": "#main/CheckReference/Extra_Seqs", + "id": "#main/AnnotateR2/Extra_Seqs" + }, + { + "source": "#main/CheckReference/GTF", + "id": "#main/AnnotateR2/GTF_Annotation" + }, + { + "source": "#main/AlignR2/Alignments", + "id": "#main/AnnotateR2/R2_zip" + }, + { + "source": "#main/Metadata_Settings/Run_Metadata", + "id": "#main/AnnotateR2/Run_Metadata" + }, + { + "source": "#main/CheckReference/Transcript_Length", + "id": "#main/AnnotateR2/Transcript_Length" + } + ], + "requirements": [ + { + "ramMin": 10000, + "class": "ResourceRequirement" + } + ], + "id": "#main/AnnotateR2", + "out": [ + "#main/AnnotateR2/Annot_R2", + "#main/AnnotateR2/R2_Bam", + "#main/AnnotateR2/GTF", + "#main/AnnotateR2/output", + "#main/AnnotateR2/R2_Quality_Metrics" + ] + }, + { + "run": "#AnnotateReads.cwl", + "out": [ + "#main/AnnotateReads/Seq_Metrics", + "#main/AnnotateReads/Valid_Reads", + "#main/AnnotateReads/Read1_error_rate", + "#main/AnnotateReads/Annotation_Read", + "#main/AnnotateReads/output", + "#main/AnnotateReads/validTcrReads", + "#main/AnnotateReads/validIgReads", + "#main/AnnotateReads/num_valid_tcr_reads", + "#main/AnnotateReads/num_valid_ig_reads" + ], + "requirements": [ + { + "ramMin": 32000, + "class": "ResourceRequirement" + } + ], + "id": "#main/AnnotateReads", + "in": [ + { + "source": "#main/Internal_Settings/AbSeq_UMI", + "id": "#main/AnnotateReads/AbSeq_UMI" + }, + { + "source": "#main/CheckReference/Extra_Seqs", + "id": "#main/AnnotateReads/Extra_Seqs" + }, + { + "source": "#main/QualityFilterOuter/Filter_Metrics", + "id": "#main/AnnotateReads/Filter_Metrics" + }, + { + "source": "#main/Putative_Cell_Calling_Settings/Putative_Cell_Call", + "id": "#main/AnnotateReads/Putative_Cell_Call" + }, + { + "source": "#main/AnnotateR1/Annotation_R1", + "id": "#main/AnnotateReads/R1_Annotation" + }, + { + "source": "#main/AnnotateR1/R1_error_count_table", + "id": "#main/AnnotateReads/R1_error_count_table" + }, + { + "source": "#main/AnnotateR1/R1_read_count_breakdown", + "id": "#main/AnnotateReads/R1_read_count_breakdown" + }, + { + "source": "#main/AnnotateR2/Annot_R2", + "id": "#main/AnnotateReads/R2_Annotation" + }, + { + "source": "#main/AnnotateR2/R2_Quality_Metrics", + "id": "#main/AnnotateReads/R2_Quality_Metrics" + }, + { + "source": "#main/Metadata_Settings/Run_Metadata", + "id": "#main/AnnotateReads/Run_Metadata" + }, + { + "source": "#main/CheckReference/Target_Gene_Mapping", + "id": "#main/AnnotateReads/Target_Gene_Mapping" + } + ] + }, + { + "out": [ + "#main/BundleLogs/logs_dir" + ], + "run": "#BundleLogs.cwl", + "id": "#main/BundleLogs", + "in": [ + { + "source": [ + "#main/AnnotateReads/output", + "#main/AnnotateR1/output", + "#main/AnnotateR2/output", + "#main/CheckReference/output", + "#main/GetDataTable/output", + "#main/Metrics/output", + "#main/AddtoBam/output", + "#main/AnnotateMolecules/output", + "#main/QualityFilterOuter/output", + "#main/CheckFastqs/log", + "#main/SplitAndSubsample/log", + "#main/MergeBAM/log", + "#main/Dense_to_Sparse_Datatable/output", + "#main/Dense_to_Sparse_Datatable_Unfiltered/output", + "#main/IndexBAM/log", + "#main/CellClassifier/log" + ], + "linkMerge": "merge_flattened", + "id": "#main/BundleLogs/log_files" + } + ] + }, + { + "run": "#Cell_Classifier.cwl", + "out": [ + "#main/CellClassifier/cellTypePredictions", + "#main/CellClassifier/log" + ], + "requirements": [ + { + "ramMin": 4000, + "class": "ResourceRequirement" + } + ], + "id": "#main/CellClassifier", + "in": [ + { + "source": "#main/FindDataTableForCellClassifier/molsPerCellMatrixForCellClassifier", + "id": "#main/CellClassifier/molsPerCellMatrix" + } + ] + }, + { + "out": [ + "#main/CheckFastqs/SubsampleSeed", + "#main/CheckFastqs/SubsamplingRatio", + "#main/CheckFastqs/FilesToSkipSplitAndSubsample", + "#main/CheckFastqs/FastqReadPairs", + "#main/CheckFastqs/Bead_Version", + "#main/CheckFastqs/Libraries", + "#main/CheckFastqs/ReadsList", + "#main/CheckFastqs/log" + ], + "run": "#CheckFastqs.cwl", + "id": "#main/CheckFastqs", + "in": [ + { + "source": "#main/Internal_Settings/MinChunkSize", + "id": "#main/CheckFastqs/MinChunkSize" + }, + { + "source": "#main/Reads", + "id": "#main/CheckFastqs/Reads" + }, + { + "source": "#main/Subsample_Settings/Subsample_Reads", + "id": "#main/CheckFastqs/Subsample" + }, + { + "source": "#main/Subsample_Settings/Subsample_Seed", + "id": "#main/CheckFastqs/Subsample_Seed" + } + ] + }, + { + "run": "#CheckReference.cwl", + "out": [ + "#main/CheckReference/Index", + "#main/CheckReference/Extra_Seqs", + "#main/CheckReference/Full_Genes", + "#main/CheckReference/output", + "#main/CheckReference/Transcript_Length", + "#main/CheckReference/GTF", + "#main/CheckReference/Target_Gene_Mapping" + ], + "requirements": [ + { + "ramMin": 10000, + "class": "ResourceRequirement" + } + ], + "id": "#main/CheckReference", + "in": [ + { + "source": "#main/AbSeq_Reference", + "id": "#main/CheckReference/AbSeq_Reference" + }, + { + "source": "#main/Putative_Cell_Calling_Settings/Putative_Cell_Call", + "id": "#main/CheckReference/Putative_Cell_Call" + }, + { + "source": [ + "#main/Transcriptome_Annotation", + "#main/Reference_Genome" + ], + "id": "#main/CheckReference/Reference" + }, + { + "source": "#main/Metadata_Settings/Run_Metadata", + "id": "#main/CheckReference/Run_Metadata" + }, + { + "source": "#main/Supplemental_Reference", + "id": "#main/CheckReference/Supplemental_Reference" + } + ] + }, + { + "run": "#DensetoSparse.cwl", + "scatter": [ + "#main/Dense_to_Sparse_Datatable/Dense_Data_Table" + ], + "in": [ + { + "source": "#main/Dense_to_Sparse_File/Cell_Order", + "id": "#main/Dense_to_Sparse_Datatable/Cell_Order" + }, + { + "source": "#main/GetDataTable/Dense_Data_Tables", + "id": "#main/Dense_to_Sparse_Datatable/Dense_Data_Table" + }, + { + "source": "#main/GetDataTable/Gene_List", + "id": "#main/Dense_to_Sparse_Datatable/Gene_List" + }, + { + "source": "#main/Metadata_Settings/Run_Metadata", + "id": "#main/Dense_to_Sparse_Datatable/Run_Metadata" + } + ], + "requirements": [ + { + "ramMin": 16000, + "class": "ResourceRequirement" + } + ], + "id": "#main/Dense_to_Sparse_Datatable", + "out": [ + "#main/Dense_to_Sparse_Datatable/Data_Tables", + "#main/Dense_to_Sparse_Datatable/output" + ] + }, + { + "run": "#DensetoSparse.cwl", + "scatter": [ + "#main/Dense_to_Sparse_Datatable_Unfiltered/Dense_Data_Table" + ], + "in": [ + { + "source": "#main/GetDataTable/Cell_Order", + "id": "#main/Dense_to_Sparse_Datatable_Unfiltered/Cell_Order" + }, + { + "source": "#main/GetDataTable/Dense_Data_Tables_Unfiltered", + "id": "#main/Dense_to_Sparse_Datatable_Unfiltered/Dense_Data_Table" + }, + { + "source": "#main/GetDataTable/Gene_List", + "id": "#main/Dense_to_Sparse_Datatable_Unfiltered/Gene_List" + }, + { + "source": "#main/Metadata_Settings/Run_Metadata", + "id": "#main/Dense_to_Sparse_Datatable_Unfiltered/Run_Metadata" + } + ], + "requirements": [ + { + "ramMin": 16000, + "class": "ResourceRequirement" + } + ], + "id": "#main/Dense_to_Sparse_Datatable_Unfiltered", + "out": [ + "#main/Dense_to_Sparse_Datatable_Unfiltered/Data_Tables", + "#main/Dense_to_Sparse_Datatable_Unfiltered/output" + ] + }, + { + "out": [ + "#main/Dense_to_Sparse_File/Cell_Order" + ], + "run": "#DensetoSparseFile.cwl", + "id": "#main/Dense_to_Sparse_File", + "in": [ + { + "source": "#main/GetDataTable/Cell_Order", + "id": "#main/Dense_to_Sparse_File/GDT_cell_order" + } + ] + }, + { + "out": [ + "#main/FindDataTableForCellClassifier/molsPerCellMatrixForCellClassifier" + ], + "run": { + "cwlVersion": "v1.0", + "inputs": [ + { + "type": { + "items": "File", + "type": "array" + }, + "id": "#main/FindDataTableForCellClassifier/e13a85b9-73df-4ed0-9386-c8c9ca3b47f0/dataTables" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "outputs": [ + { + "type": "File", + "id": "#main/FindDataTableForCellClassifier/e13a85b9-73df-4ed0-9386-c8c9ca3b47f0/molsPerCellMatrixForCellClassifier" + } + ], + "id": "#main/FindDataTableForCellClassifier/e13a85b9-73df-4ed0-9386-c8c9ca3b47f0", + "expression": "${\n for (var i = 0; i < inputs.dataTables.length; i++) {\n var dataTable = inputs.dataTables[i];\n if (dataTable.basename.indexOf(\"_RSEC_MolsPerCell.csv\") >= 0) {\n return({molsPerCellMatrixForCellClassifier: dataTable});\n }\n }\n return({molsPerCellMatrixForCellClassifier: null});\n}", + "class": "ExpressionTool" + }, + "id": "#main/FindDataTableForCellClassifier", + "in": [ + { + "source": "#main/Dense_to_Sparse_Datatable/Data_Tables", + "id": "#main/FindDataTableForCellClassifier/dataTables" + } + ] + }, + { + "out": [ + "#main/GetDataTable/Tag_Calls", + "#main/GetDataTable/Molecular_Annotation", + "#main/GetDataTable/Corrected_Molecular_Annotation", + "#main/GetDataTable/Tag_Annotation", + "#main/GetDataTable/Annot_Files", + "#main/GetDataTable/Cell_Label_Filter", + "#main/GetDataTable/Dense_Data_Tables", + "#main/GetDataTable/Dense_Data_Tables_Unfiltered", + "#main/GetDataTable/Expression_Data", + "#main/GetDataTable/Expression_Data_Unfiltered", + "#main/GetDataTable/Bioproduct_Stats", + "#main/GetDataTable/UMI_Adjusted_CellLabel_Stats", + "#main/GetDataTable/Putative_Cells_Origin", + "#main/GetDataTable/Protein_Aggregates_Experimental", + "#main/GetDataTable/Trueno_out", + "#main/GetDataTable/Trueno_zip", + "#main/GetDataTable/output", + "#main/GetDataTable/Cell_Order", + "#main/GetDataTable/Gene_List" + ], + "run": "#GetDataTable.cwl", + "id": "#main/GetDataTable", + "in": [ + { + "source": "#main/CheckReference/Full_Genes", + "id": "#main/GetDataTable/Full_Genes" + }, + { + "source": "#main/AnnotateMolecules/Gene_Status_List", + "id": "#main/GetDataTable/Gene_Status_List" + }, + { + "source": "#main/AnnotateMolecules/Max_Count", + "id": "#main/GetDataTable/Max_Count" + }, + { + "source": "#main/AnnotateMolecules/Mol_Annot_List", + "id": "#main/GetDataTable/Molecule_Annotation_List" + }, + { + "source": "#main/Putative_Cell_Calling_Settings/Putative_Cell_Call", + "id": "#main/GetDataTable/Putative_Cell_Call" + }, + { + "source": "#main/Metadata_Settings/Run_Metadata", + "id": "#main/GetDataTable/Run_Metadata" + }, + { + "source": "#main/AnnotateReads/Seq_Metrics", + "id": "#main/GetDataTable/Seq_Metrics" + }, + { + "source": "#main/Multiplexing_Settings/Tag_Sample_Names", + "id": "#main/GetDataTable/Tag_Names" + }, + { + "source": "#main/AnnotateMolecules/Total_Molecules", + "id": "#main/GetDataTable/Total_Molecules" + } + ] + }, + { + "out": [ + "#main/IndexBAM/Index", + "#main/IndexBAM/log" + ], + "run": "#IndexBAM.cwl", + "id": "#main/IndexBAM", + "in": [ + { + "source": "#main/MergeBAM/Final_Bam", + "id": "#main/IndexBAM/BamFile" + } + ] + }, + { + "out": [ + "#main/Internal_Settings/Read_Filter_Off", + "#main/Internal_Settings/Barcode_Num", + "#main/Internal_Settings/Seq_Run", + "#main/Internal_Settings/AbSeq_UMI", + "#main/Internal_Settings/Use_DBEC", + "#main/Internal_Settings/Extra_Seqs", + "#main/Internal_Settings/MinChunkSize", + "#main/Internal_Settings/NumRecordsPerSplit", + "#main/Internal_Settings/Target_analysis", + "#main/Internal_Settings/Subsample_Tags", + "#main/Internal_Settings/VDJ_VGene_Evalue", + "#main/Internal_Settings/VDJ_JGene_Evalue" + ], + "in": [], + "run": "#InternalSettings.cwl", + "id": "#main/Internal_Settings", + "label": "Internal Settings" + }, + { + "out": [ + "#main/MergeBAM/Final_Bam", + "#main/MergeBAM/log" + ], + "run": "#MergeBAM.cwl", + "id": "#main/MergeBAM", + "in": [ + { + "source": "#main/AddtoBam/Annotated_Bam", + "id": "#main/MergeBAM/BamFiles" + }, + { + "source": "#main/Metadata_Settings/Run_Base_Name", + "id": "#main/MergeBAM/Run_Name" + }, + { + "source": "#main/Multiplexing_Settings/Sample_Tags_Version", + "id": "#main/MergeBAM/Sample_Tags_Version" + } + ] + }, + { + "out": [ + "#main/MergeMultiplex/Multiplex_out" + ], + "run": { + "cwlVersion": "v1.0", + "inputs": [ + { + "type": { + "items": [ + "null", + "File" + ], + "type": "array" + }, + "id": "#main/MergeMultiplex/d7de4031-c557-4bec-bdfc-33e9f909e2d7/SampleTag_Files" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "outputs": [ + { + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#main/MergeMultiplex/d7de4031-c557-4bec-bdfc-33e9f909e2d7/Multiplex_out" + } + ], + "id": "#main/MergeMultiplex/d7de4031-c557-4bec-bdfc-33e9f909e2d7", + "expression": "${\n var fp_array = [];\n for (var i = 0; i < inputs.SampleTag_Files.length; i++) {\n var fp = inputs.SampleTag_Files[i];\n if (fp != null) {\n fp_array.push(fp);\n }\n }\n return({\"Multiplex_out\": fp_array});\n}", + "class": "ExpressionTool" + }, + "id": "#main/MergeMultiplex", + "in": [ + { + "source": [ + "#main/GetDataTable/Trueno_out", + "#main/Metrics/Sample_Tag_Out" + ], + "linkMerge": "merge_flattened", + "id": "#main/MergeMultiplex/SampleTag_Files" + } + ] + }, + { + "out": [ + "#main/Metadata_Settings/Run_Metadata", + "#main/Metadata_Settings/Run_Base_Name" + ], + "run": "#Metadata.cwl", + "id": "#main/Metadata_Settings", + "in": [ + { + "source": "#main/AbSeq_Reference", + "id": "#main/Metadata_Settings/AbSeq_Reference" + }, + { + "valueFrom": "WTA", + "id": "#main/Metadata_Settings/Assay" + }, + { + "source": "#main/Putative_Cell_Calling_Settings/Basic_Algo_Only", + "id": "#main/Metadata_Settings/Basic_Algo_Only" + }, + { + "source": "#main/CheckFastqs/Bead_Version", + "id": "#main/Metadata_Settings/Bead_Version" + }, + { + "source": "#main/Putative_Cell_Calling_Settings/Exact_Cell_Count", + "id": "#main/Metadata_Settings/Exact_Cell_Count" + }, + { + "source": "#main/CheckFastqs/Libraries", + "id": "#main/Metadata_Settings/Libraries" + }, + { + "valueFrom": "BD Rhapsody WTA Analysis Pipeline", + "id": "#main/Metadata_Settings/Pipeline_Name" + }, + { + "source": "#main/Version/version", + "id": "#main/Metadata_Settings/Pipeline_Version" + }, + { + "source": "#main/Putative_Cell_Calling_Settings/Putative_Cell_Call", + "id": "#main/Metadata_Settings/Putative_Cell_Call" + }, + { + "source": "#main/CheckFastqs/ReadsList", + "id": "#main/Metadata_Settings/Reads" + }, + { + "source": [ + "#main/Transcriptome_Annotation", + "#main/Reference_Genome" + ], + "id": "#main/Metadata_Settings/Reference" + }, + { + "source": "#main/Name_Settings/Run_Name", + "id": "#main/Metadata_Settings/Run_Name" + }, + { + "source": "#main/Multiplexing_Settings/Tag_Sample_Names", + "id": "#main/Metadata_Settings/Sample_Tag_Names" + }, + { + "source": "#main/Multiplexing_Settings/Sample_Tags_Version", + "id": "#main/Metadata_Settings/Sample_Tags_Version" + }, + { + "source": "#main/Start_Time/Start_Time", + "id": "#main/Metadata_Settings/Start_Time" + }, + { + "source": "#main/Subsample_Settings/Subsample_Reads", + "id": "#main/Metadata_Settings/Subsample" + }, + { + "source": "#main/Subsample_Settings/Subsample_Seed", + "id": "#main/Metadata_Settings/Subsample_Seed" + }, + { + "source": "#main/Supplemental_Reference", + "id": "#main/Metadata_Settings/Supplemental_Reference" + }, + { + "source": "#main/VDJ_Settings/VDJ_Version", + "id": "#main/Metadata_Settings/VDJ_Version" + } + ] + }, + { + "out": [ + "#main/Metrics/Metrics_Summary", + "#main/Metrics/Metrics_Archive", + "#main/Metrics/output", + "#main/Metrics/Sample_Tag_Out" + ], + "run": "#Metrics.cwl", + "id": "#main/Metrics", + "in": [ + { + "source": "#main/GetDataTable/Annot_Files", + "id": "#main/Metrics/Annot_Files" + }, + { + "source": "#main/AnnotateReads/Read1_error_rate", + "id": "#main/Metrics/Read1_error_rate" + }, + { + "source": "#main/Metadata_Settings/Run_Metadata", + "id": "#main/Metrics/Run_Metadata" + }, + { + "source": "#main/GetDataTable/Trueno_zip", + "id": "#main/Metrics/Sample_Tag_Archives" + }, + { + "source": "#main/Internal_Settings/Seq_Run", + "id": "#main/Metrics/Seq_Run" + }, + { + "source": "#main/GetDataTable/UMI_Adjusted_CellLabel_Stats", + "id": "#main/Metrics/UMI_Adjusted_Stats" + }, + { + "source": "#main/VDJ_Compile_Results/vdjMetricsJson", + "id": "#main/Metrics/vdjMetricsJson" + } + ] + }, + { + "out": [ + "#main/Multiplexing_Settings/Tag_Sample_Names", + "#main/Multiplexing_Settings/Sample_Tags_Version" + ], + "in": [ + { + "source": "#main/Sample_Tags_Version", + "id": "#main/Multiplexing_Settings/_Sample_Tags_Version" + }, + { + "source": "#main/Tag_Names", + "id": "#main/Multiplexing_Settings/_Tag_Sample_Names" + } + ], + "run": "#MultiplexingSettings.cwl", + "id": "#main/Multiplexing_Settings", + "label": "Multiplexing Settings" + }, + { + "out": [ + "#main/Name_Settings/Run_Name" + ], + "in": [ + { + "source": "#main/Run_Name", + "id": "#main/Name_Settings/_Run_Name" + } + ], + "run": "#NameSettings.cwl", + "id": "#main/Name_Settings", + "label": "Name Settings" + }, + { + "out": [ + "#main/PairReadFiles/ReadPairs" + ], + "run": "#PairReadFiles.cwl", + "id": "#main/PairReadFiles", + "in": [ + { + "source": "#main/CheckFastqs/FastqReadPairs", + "id": "#main/PairReadFiles/FastqReadPairs" + }, + { + "source": "#main/SplitAndSubsample/SplitAndSubsampledFastqs", + "id": "#main/PairReadFiles/Reads" + } + ] + }, + { + "out": [ + "#main/Putative_Cell_Calling_Settings/Putative_Cell_Call", + "#main/Putative_Cell_Calling_Settings/Exact_Cell_Count", + "#main/Putative_Cell_Calling_Settings/Basic_Algo_Only" + ], + "in": [ + { + "source": "#main/Basic_Algo_Only", + "id": "#main/Putative_Cell_Calling_Settings/_Basic_Algo_Only" + }, + { + "source": "#main/Exact_Cell_Count", + "id": "#main/Putative_Cell_Calling_Settings/_Exact_Cell_Count" + }, + { + "source": "#main/Putative_Cell_Call", + "id": "#main/Putative_Cell_Calling_Settings/_Putative_Cell_Call" + } + ], + "run": "#PutativeCellSettings.cwl", + "id": "#main/Putative_Cell_Calling_Settings", + "label": "Putative Cell Calling Settings" + }, + { + "out": [ + "#main/QualityFilterOuter/Filter_Metrics", + "#main/QualityFilterOuter/R1", + "#main/QualityFilterOuter/R2", + "#main/QualityFilterOuter/output" + ], + "run": "#QualityFilterOuter.cwl", + "id": "#main/QualityFilterOuter", + "in": [ + { + "source": "#main/Metadata_Settings/Run_Metadata", + "id": "#main/QualityFilterOuter/Run_Metadata" + }, + { + "source": "#main/PairReadFiles/ReadPairs", + "id": "#main/QualityFilterOuter/Split_Read_Pairs" + } + ] + }, + { + "out": [ + "#main/SplitAndSubsample/SplitAndSubsampledFastqs", + "#main/SplitAndSubsample/log" + ], + "run": "#SplitAndSubsample.cwl", + "id": "#main/SplitAndSubsample", + "in": [ + { + "source": "#main/Reads", + "id": "#main/SplitAndSubsample/Fastqs" + }, + { + "source": "#main/CheckFastqs/FilesToSkipSplitAndSubsample", + "id": "#main/SplitAndSubsample/FilesToSkipSplitAndSubsample" + }, + { + "source": "#main/Internal_Settings/NumRecordsPerSplit", + "id": "#main/SplitAndSubsample/NumRecordsPerSplit" + }, + { + "source": "#main/CheckFastqs/SubsamplingRatio", + "id": "#main/SplitAndSubsample/SubsampleRatio" + }, + { + "source": "#main/CheckFastqs/SubsampleSeed", + "id": "#main/SplitAndSubsample/SubsampleSeed" + } + ] + }, + { + "out": [ + "#main/Start_Time/Start_Time" + ], + "run": { + "cwlVersion": "v1.0", + "inputs": [], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "outputs": [ + { + "type": "string", + "id": "#main/Start_Time/c0e8267c-52e8-448b-b9c2-7600ab5ed59a/Start_Time" + } + ], + "id": "#main/Start_Time/c0e8267c-52e8-448b-b9c2-7600ab5ed59a", + "expression": "${ \n var today = new Date();\n var date = today.toString()\n return ({Start_Time: date});\n} ", + "class": "ExpressionTool" + }, + "id": "#main/Start_Time", + "in": [] + }, + { + "out": [ + "#main/Subsample_Settings/Subsample_Reads", + "#main/Subsample_Settings/Subsample_Seed" + ], + "in": [ + { + "source": "#main/Subsample", + "id": "#main/Subsample_Settings/_Subsample_Reads" + }, + { + "source": "#main/Subsample_seed", + "id": "#main/Subsample_Settings/_Subsample_Seed" + } + ], + "run": "#SubsampleSettings.cwl", + "id": "#main/Subsample_Settings", + "label": "Subsample Settings" + }, + { + "out": [ + "#main/Uncompress_Datatables/Uncompressed_Data_Tables", + "#main/Uncompress_Datatables/Uncompressed_Expression_Matrix" + ], + "run": "#UncompressDatatables.cwl", + "id": "#main/Uncompress_Datatables", + "in": [ + { + "source": "#main/Dense_to_Sparse_Datatable/Data_Tables", + "id": "#main/Uncompress_Datatables/Compressed_Data_Table" + }, + { + "source": "#main/GetDataTable/Expression_Data", + "id": "#main/Uncompress_Datatables/Compressed_Expression_Matrix" + } + ] + }, + { + "out": [ + "#main/VDJ_Assemble_and_Annotate_Contigs_IG/igCalls" + ], + "run": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl", + "id": "#main/VDJ_Assemble_and_Annotate_Contigs_IG", + "in": [ + { + "source": "#main/VDJ_Preprocess_Reads_IG/RSEC_Reads_Fastq", + "id": "#main/VDJ_Assemble_and_Annotate_Contigs_IG/RSEC_Reads_Fastq" + }, + { + "source": "#main/VDJ_Settings/VDJ_Version", + "id": "#main/VDJ_Assemble_and_Annotate_Contigs_IG/VDJ_Version" + }, + { + "source": "#main/VDJ_Preprocess_Reads_IG/num_cores", + "id": "#main/VDJ_Assemble_and_Annotate_Contigs_IG/num_cores" + } + ] + }, + { + "out": [ + "#main/VDJ_Assemble_and_Annotate_Contigs_TCR/tcrCalls" + ], + "run": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl", + "id": "#main/VDJ_Assemble_and_Annotate_Contigs_TCR", + "in": [ + { + "source": "#main/VDJ_Preprocess_Reads_TCR/RSEC_Reads_Fastq", + "id": "#main/VDJ_Assemble_and_Annotate_Contigs_TCR/RSEC_Reads_Fastq" + }, + { + "source": "#main/VDJ_Settings/VDJ_Version", + "id": "#main/VDJ_Assemble_and_Annotate_Contigs_TCR/VDJ_Version" + }, + { + "source": "#main/VDJ_Preprocess_Reads_TCR/num_cores", + "id": "#main/VDJ_Assemble_and_Annotate_Contigs_TCR/num_cores" + } + ] + }, + { + "out": [ + "#main/VDJ_Compile_Results/vdjCellsDatatable", + "#main/VDJ_Compile_Results/vdjCellsDatatableUncorrected", + "#main/VDJ_Compile_Results/vdjDominantContigs", + "#main/VDJ_Compile_Results/vdjUnfilteredContigs", + "#main/VDJ_Compile_Results/vdjMetricsJson", + "#main/VDJ_Compile_Results/vdjMetricsCsv", + "#main/VDJ_Compile_Results/vdjReadsPerCellByChainTypeFigure" + ], + "run": "#VDJ_Compile_Results.cwl", + "id": "#main/VDJ_Compile_Results", + "in": [ + { + "source": "#main/AnnotateReads/Seq_Metrics", + "id": "#main/VDJ_Compile_Results/Seq_Metrics" + }, + { + "source": "#main/CellClassifier/cellTypePredictions", + "id": "#main/VDJ_Compile_Results/cellTypeMapping" + }, + { + "valueFrom": "$([])", + "id": "#main/VDJ_Compile_Results/chainsToIgnore" + }, + { + "source": "#main/Internal_Settings/VDJ_JGene_Evalue", + "id": "#main/VDJ_Compile_Results/evalueJgene" + }, + { + "source": "#main/Internal_Settings/VDJ_VGene_Evalue", + "id": "#main/VDJ_Compile_Results/evalueVgene" + }, + { + "source": "#main/VDJ_GatherIGCalls/gatheredCalls", + "id": "#main/VDJ_Compile_Results/igCalls" + }, + { + "source": "#main/Metadata_Settings/Run_Metadata", + "id": "#main/VDJ_Compile_Results/metadata" + }, + { + "source": "#main/GetDataTable/Cell_Order", + "id": "#main/VDJ_Compile_Results/putativeCells" + }, + { + "source": "#main/VDJ_GatherTCRCalls/gatheredCalls", + "id": "#main/VDJ_Compile_Results/tcrCalls" + }, + { + "source": "#main/VDJ_Settings/VDJ_Version", + "id": "#main/VDJ_Compile_Results/vdjVersion" + } + ] + }, + { + "out": [ + "#main/VDJ_GatherIGCalls/gatheredCalls" + ], + "run": "#VDJ_GatherCalls.cwl", + "id": "#main/VDJ_GatherIGCalls", + "in": [ + { + "source": "#main/VDJ_Assemble_and_Annotate_Contigs_IG/igCalls", + "id": "#main/VDJ_GatherIGCalls/theCalls" + } + ] + }, + { + "out": [ + "#main/VDJ_GatherTCRCalls/gatheredCalls" + ], + "run": "#VDJ_GatherCalls.cwl", + "id": "#main/VDJ_GatherTCRCalls", + "in": [ + { + "source": "#main/VDJ_Assemble_and_Annotate_Contigs_TCR/tcrCalls", + "id": "#main/VDJ_GatherTCRCalls/theCalls" + } + ] + }, + { + "out": [ + "#main/VDJ_Preprocess_Reads_IG/RSEC_Reads_Fastq", + "#main/VDJ_Preprocess_Reads_IG/num_splits", + "#main/VDJ_Preprocess_Reads_IG/num_cores" + ], + "run": "#VDJ_Preprocess_Reads.cwl", + "id": "#main/VDJ_Preprocess_Reads_IG", + "in": [ + { + "source": "#main/AnnotateReads/validIgReads", + "id": "#main/VDJ_Preprocess_Reads_IG/Valid_Reads_Fastq" + }, + { + "source": "#main/AnnotateReads/num_valid_ig_reads", + "id": "#main/VDJ_Preprocess_Reads_IG/num_valid_reads" + }, + { + "valueFrom": "BCR", + "id": "#main/VDJ_Preprocess_Reads_IG/vdj_type" + } + ] + }, + { + "out": [ + "#main/VDJ_Preprocess_Reads_TCR/RSEC_Reads_Fastq", + "#main/VDJ_Preprocess_Reads_TCR/num_splits", + "#main/VDJ_Preprocess_Reads_TCR/num_cores" + ], + "run": "#VDJ_Preprocess_Reads.cwl", + "id": "#main/VDJ_Preprocess_Reads_TCR", + "in": [ + { + "source": "#main/AnnotateReads/validTcrReads", + "id": "#main/VDJ_Preprocess_Reads_TCR/Valid_Reads_Fastq" + }, + { + "source": "#main/AnnotateReads/num_valid_tcr_reads", + "id": "#main/VDJ_Preprocess_Reads_TCR/num_valid_reads" + }, + { + "valueFrom": "TCR", + "id": "#main/VDJ_Preprocess_Reads_TCR/vdj_type" + } + ] + }, + { + "out": [ + "#main/VDJ_Settings/VDJ_Version" + ], + "in": [ + { + "source": "#main/VDJ_Version", + "id": "#main/VDJ_Settings/_VDJ_Version" + } + ], + "run": "#VDJ_Settings.cwl", + "id": "#main/VDJ_Settings", + "label": "VDJ Settings" + }, + { + "out": [ + "#main/Version/version" + ], + "run": "#Version.cwl", + "id": "#main/Version", + "in": [] + } + ], + "outputs": [ + { + "outputSource": "#main/GetDataTable/Bioproduct_Stats", + "type": [ + "null", + "File" + ], + "id": "#main/Bioproduct_Stats", + "label": "Bioproduct Statistics" + }, + { + "outputSource": "#main/GetDataTable/Cell_Label_Filter", + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#main/Cell_Label_Filter", + "label": "Cell Label Filter" + }, + { + "outputSource": "#main/Uncompress_Datatables/Uncompressed_Data_Tables", + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#main/Data_Tables", + "label": "Data Tables" + }, + { + "outputSource": "#main/Dense_to_Sparse_Datatable_Unfiltered/Data_Tables", + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#main/Data_Tables_Unfiltered", + "label": "Unfiltered Data Tables" + }, + { + "outputSource": "#main/Uncompress_Datatables/Uncompressed_Expression_Matrix", + "type": [ + "null", + "File" + ], + "id": "#main/Expression_Data", + "label": "Expression Matrix" + }, + { + "outputSource": "#main/GetDataTable/Expression_Data_Unfiltered", + "type": [ + "null", + "File" + ], + "id": "#main/Expression_Data_Unfiltered", + "label": "Unfiltered Expression Matrix" + }, + { + "outputSource": "#main/MergeBAM/Final_Bam", + "type": "File", + "id": "#main/Final_Bam", + "label": "Final BAM File" + }, + { + "outputSource": "#main/IndexBAM/Index", + "type": "File", + "id": "#main/Final_Bam_Index", + "label": "Final BAM Index" + }, + { + "outputSource": "#main/CellClassifier/cellTypePredictions", + "type": [ + "null", + "File" + ], + "id": "#main/ImmuneCellClassification(Experimental)", + "label": "Immune Cell Classification (Experimental)" + }, + { + "outputSource": "#main/BundleLogs/logs_dir", + "type": "Directory", + "id": "#main/Logs", + "label": "Pipeline Logs" + }, + { + "outputSource": "#main/Metrics/Metrics_Summary", + "type": "File", + "id": "#main/Metrics_Summary", + "label": "Metrics Summary" + }, + { + "outputSource": "#main/MergeMultiplex/Multiplex_out", + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#main/Multiplex" + }, + { + "outputSource": "#main/GetDataTable/Protein_Aggregates_Experimental", + "type": [ + "null", + "File" + ], + "id": "#main/Protein_Aggregates_Experimental", + "label": "Protein Aggregates (Experimental)" + }, + { + "outputSource": "#main/GetDataTable/Putative_Cells_Origin", + "type": [ + "null", + "File" + ], + "id": "#main/Putative_Cells_Origin", + "label": "Putative Cells Origin" + }, + { + "outputSource": "#main/VDJ_Compile_Results/vdjCellsDatatable", + "type": [ + "null", + "File" + ], + "id": "#main/vdjCellsDatatable", + "label": "vdjCellsDatatable" + }, + { + "outputSource": "#main/VDJ_Compile_Results/vdjCellsDatatableUncorrected", + "type": [ + "null", + "File" + ], + "id": "#main/vdjCellsDatatableUncorrected", + "label": "vdjCellsDatatableUncorrected" + }, + { + "outputSource": "#main/VDJ_Compile_Results/vdjDominantContigs", + "type": [ + "null", + "File" + ], + "id": "#main/vdjDominantContigs", + "label": "vdjDominantContigs" + }, + { + "outputSource": "#main/VDJ_Compile_Results/vdjMetricsCsv", + "type": [ + "null", + "File" + ], + "id": "#main/vdjMetricsCsv", + "label": "vdjMetricsCsv" + }, + { + "outputSource": "#main/VDJ_Compile_Results/vdjUnfilteredContigs", + "type": [ + "null", + "File" + ], + "id": "#main/vdjUnfilteredContigs", + "label": "vdjUnfilteredContigs" + } + ], + "id": "#main", + "class": "Workflow" + }, + { + "inputs": [ + { + "inputBinding": { + "position": 1 + }, + "type": { + "items": "File", + "type": "array" + }, + "id": "#MergeBAM.cwl/BamFiles" + }, + { + "type": [ + "null", + "string" + ], + "id": "#MergeBAM.cwl/Run_Name" + }, + { + "type": [ + "null", + "string" + ], + "id": "#MergeBAM.cwl/Sample_Tags_Version" + } + ], + "requirements": [ + + { + "class": "InlineJavascriptRequirement" + } + ], + "stdout": "samtools_merge.log", + "outputs": [ + { + "outputBinding": { + "glob": "*_final.BAM" + }, + "type": "File", + "id": "#MergeBAM.cwl/Final_Bam" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#MergeBAM.cwl/log" + } + ], + "baseCommand": [ + "samtools", + "merge" + ], + "id": "#MergeBAM.cwl", + "arguments": [ + { + "prefix": "-@", + "valueFrom": "$(runtime.cores)" + }, + { + "position": 0, + "valueFrom": "${\n if (inputs.Sample_Tags_Version) {\n return \"Combined_\" + inputs.Run_Name + \"_final.BAM\"\n } else {\n return inputs.Run_Name + \"_final.BAM\"\n }\n}" + } + ], + "class": "CommandLineTool", + "hints": [ + { + "coresMin": 4, + "class": "ResourceRequirement" + } + ] + }, + { + "inputs": [ + { + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#Metadata.cwl/AbSeq_Reference" + }, + { + "type": "string", + "id": "#Metadata.cwl/Assay" + }, + { + "type": [ + "null", + "boolean" + ], + "id": "#Metadata.cwl/Basic_Algo_Only" + }, + { + "type": { + "items": { + "fields": [ + { + "type": "string", + "name": "#Metadata.cwl/Bead_Version/Library" + }, + { + "type": "string", + "name": "#Metadata.cwl/Bead_Version/bead_version" + } + ], + "type": "record" + }, + "type": "array" + }, + "id": "#Metadata.cwl/Bead_Version" + }, + { + "type": [ + "null", + "int" + ], + "id": "#Metadata.cwl/Exact_Cell_Count" + }, + { + "type": [ + "null", + "int" + ], + "id": "#Metadata.cwl/Label_Version" + }, + { + "type": [ + "null", + "string" + ], + "id": "#Metadata.cwl/Libraries" + }, + { + "type": "string", + "id": "#Metadata.cwl/Pipeline_Name" + }, + { + "type": "string", + "id": "#Metadata.cwl/Pipeline_Version" + }, + { + "type": [ + "null", + "int" + ], + "id": "#Metadata.cwl/Putative_Cell_Call" + }, + { + "type": [ + "null", + "boolean" + ], + "id": "#Metadata.cwl/Read_Filter_Off" + }, + { + "type": [ + "null", + { + "items": "string", + "type": "array" + } + ], + "id": "#Metadata.cwl/Reads" + }, + { + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#Metadata.cwl/Reference" + }, + { + "type": [ + "null", + "string" + ], + "id": "#Metadata.cwl/Run_Name" + }, + { + "type": [ + "null", + { + "items": "string", + "type": "array" + } + ], + "id": "#Metadata.cwl/Sample_Tag_Names" + }, + { + "type": [ + "null", + "string" + ], + "id": "#Metadata.cwl/Sample_Tags_Version" + }, + { + "type": [ + "null", + "string" + ], + "id": "#Metadata.cwl/Start_Time" + }, + { + "type": [ + "null", + "float" + ], + "id": "#Metadata.cwl/Subsample" + }, + { + "type": [ + "null", + "int" + ], + "id": "#Metadata.cwl/Subsample_Seed" + }, + { + "type": [ + "null", + "float" + ], + "id": "#Metadata.cwl/Subsample_Tags" + }, + { + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#Metadata.cwl/Supplemental_Reference" + }, + { + "type": [ + "null", + "string" + ], + "id": "#Metadata.cwl/VDJ_Version" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "stdout": "run_metadata.json", + "outputs": [ + { + "outputBinding": { + "outputEval": "${ \n var name = inputs.Run_Name;\n if (name == null){\n var libraries = inputs.Libraries;\n name = libraries.split(',')[0];\n } \n return(name)\n} \n" + }, + "type": [ + "null", + "string" + ], + "id": "#Metadata.cwl/Run_Base_Name" + }, + { + "type": "stdout", + "id": "#Metadata.cwl/Run_Metadata" + } + ], + "baseCommand": "echo", + "id": "#Metadata.cwl", + "arguments": [ + { + "prefix": "" + }, + { + "shellQuote": true, + "valueFrom": "${\n var metadata = inputs;\n var all_bv = {};\n var customer_bv = \"Original (V1)\";\n for (var i = 0; i < inputs.Bead_Version.length; i++) {\n var BeadVer = inputs.Bead_Version[i];\n var Library = BeadVer[\"Library\"];\n var bead_version = BeadVer[\"bead_version\"];\n all_bv[Library] = bead_version \n var short_bv = bead_version.substring(0, 2);\n if (short_bv == \"V2\"){\n var customer_bv = \"Enhanced (V2)\";\n }\n }\n metadata[\"Bead_Version\"] = all_bv;\n\n var pipeline_name = inputs.Pipeline_Name;\n var assay = inputs.Assay;\n var version = inputs.Pipeline_Version;\n var time = inputs.Start_Time;\n var libraries = inputs.Libraries.split(\",\");\n var i = 0;\n var reference_list = []\n if(inputs.Reference != null){\n reference_list = reference_list.concat(inputs.Reference);\n }\n if(inputs.AbSeq_Reference != null){\n reference_list = reference_list.concat(inputs.AbSeq_Reference);\n }\n\n var supplemental = \"\"\n if(inputs.Supplemental_Reference != null){\n supplemental = \"; Supplemental_Reference - \" + inputs.Supplemental_Reference[0][\"basename\"];\n }\n var references = [];\n for (i = 0; i< reference_list.length; i++) {\n if(reference_list[i] != null){\n references.push(reference_list[i][\"basename\"]);\n }\n }\n var parameters = [];\n if(inputs.Sample_Tags_Version != null){\n var tags = \"Sample Tag Version: \" + inputs.Sample_Tags_Version;\n } else{ \n var tags = \"Sample Tag Version: None\";\n }\n parameters.push(tags);\n\n if(inputs.Sample_Tag_Names != null){\n var tag_names = inputs.Sample_Tag_Names.join(\" ; \")\n var tag_list = \"Sample Tag Names: \" + tag_names;\n } else{\n var tag_list = \"Sample Tag Names: None\";\n }\n parameters.push(tag_list);\n \n if(inputs.VDJ_Version != null){\n var vdj = \"VDJ Version: \" + inputs.VDJ_Version;\n } else{ \n var vdj = \"VDJ Version: None\";\n }\n parameters.push(vdj)\n\n if(inputs.Subsample != null){\n var subsample = \"Subsample: \" + inputs.Subsample;\n } else{ \n var subsample = \"Subsample: None\";\n } \n parameters.push(subsample);\n\n if(inputs.Putative_Cell_Call == 1){\n var call = \"Putative Cell Calling Type: AbSeq\";\n } else{ \n var call = \"Putative Cell Calling Type: mRNA\";\n } \n parameters.push(call)\n\n if(inputs.Basic_Algo_Only){\n var basic = \"Refined Putative Cell Calling: Off\";\n } else{ \n var basic = \"Refined Putative Cell Calling: On\";\n } \n parameters.push(basic)\n\n if(inputs.Exact_Cell_Count != null){\n var cells = \"Exact Cell Count: \" + inputs.Exact_Cell_Count;\n } else{ \n var cells = \"Exact Cell Count: None\";\n } \n parameters.push(cells)\n\n var name = inputs.Run_Name;\n if (name == null){\n var libraries = inputs.Libraries.split(',');\n name = libraries[0];\n } \n\n var header = [\"####################\"];\n header.push(\"## \" + pipeline_name + \" Version \" + version);\n header.push(\"## Analysis Date - \" + time);\n header.push(\"## Libraries - \" + libraries.join(' | ') + \" - Bead version detected: \" + customer_bv);\n header.push(\"## References - \" + references.join(' | ') + supplemental);\n header.push(\"## Parameters - \" + parameters.join(' | '));\n header.push(\"####################\");\n metadata[\"Output_Header\"] = header;\n metadata[\"Run_Base_Name\"] = name;\n var metadata_json = JSON.stringify(metadata);\n return metadata_json;\n}\n" + } + ], + "class": "CommandLineTool" + }, + { + "inputs": [ + { + "inputBinding": { + "prefix": "--annot-files" + }, + "type": "File", + "id": "#Metrics.cwl/Annot_Files" + }, + { + "inputBinding": { + "prefix": "--read1-error-rate" + }, + "type": "File", + "id": "#Metrics.cwl/Read1_error_rate" + }, + { + "inputBinding": { + "prefix": "--run-metadata" + }, + "type": "File", + "id": "#Metrics.cwl/Run_Metadata" + }, + { + "inputBinding": { + "prefix": "--sample-tag-archives", + "itemSeparator": "," + }, + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#Metrics.cwl/Sample_Tag_Archives" + }, + { + "inputBinding": { + "prefix": "--seq-run" + }, + "type": [ + "null", + "string" + ], + "id": "#Metrics.cwl/Seq_Run" + }, + { + "inputBinding": { + "prefix": "--umi-adjusted-stats" + }, + "type": [ + "null", + "File" + ], + "id": "#Metrics.cwl/UMI_Adjusted_Stats" + }, + { + "inputBinding": { + "prefix": "--vdj-metrics-fp" + }, + "type": [ + "null", + "File" + ], + "id": "#Metrics.cwl/vdjMetricsJson" + } + ], + "requirements": [ + + ], + "outputs": [ + { + "outputBinding": { + "glob": "internal-metrics-archive.tar.gz" + }, + "type": "File", + "id": "#Metrics.cwl/Metrics_Archive" + }, + { + "outputBinding": { + "glob": "*_Metrics_Summary.csv" + }, + "type": "File", + "id": "#Metrics.cwl/Metrics_Summary" + }, + { + "outputBinding": { + "glob": "*.zip" + }, + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#Metrics.cwl/Sample_Tag_Out" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#Metrics.cwl/output" + } + ], + "baseCommand": [ + "mist_metrics.py" + ], + "class": "CommandLineTool", + "id": "#Metrics.cwl" + }, + { + "inputs": [ + { + "default": "Targeted", + "type": "string", + "id": "#MultiplexingSettings.cwl/Assay" + }, + { + "type": [ + "null", + "Any" + ], + "id": "#MultiplexingSettings.cwl/_Sample_Tags_Version" + }, + { + "type": [ + "null", + { + "items": "string", + "type": "array" + } + ], + "id": "#MultiplexingSettings.cwl/_Tag_Sample_Names" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "outputs": [ + { + "type": [ + "null", + "string" + ], + "id": "#MultiplexingSettings.cwl/Sample_Tags_Version" + }, + { + "type": [ + "null", + { + "items": "string", + "type": "array" + } + ], + "id": "#MultiplexingSettings.cwl/Tag_Sample_Names" + } + ], + "class": "ExpressionTool", + "expression": "${\n var enumifiedSampleTagsVersion = null;\n if (inputs._Sample_Tags_Version) {\n var _Sample_Tags_Version = inputs._Sample_Tags_Version.toLowerCase();\n if (_Sample_Tags_Version.indexOf('human') >= 0 || _Sample_Tags_Version === 'hs')\n {\n enumifiedSampleTagsVersion = 'hs';\n }\n else if (_Sample_Tags_Version.indexOf('mouse') >= 0 || _Sample_Tags_Version === 'mm')\n {\n enumifiedSampleTagsVersion = 'mm';\n }\n else if (_Sample_Tags_Version === 'no multiplexing')\n {\n enumifiedSampleTagsVersion = null;\n }\n else\n {\n throw new Error(\"Cannot parse Sample Tag Version: \" + inputs._Sample_Tags_Version);\n }\n }\n var listTagNames = inputs._Tag_Sample_Names\n var newTagNames = []\n for (var num in listTagNames) {\n var tag = listTagNames[num].replace(/[^A-Za-z0-9-+]/g,\"_\");\n newTagNames.push(tag); \n } \n return ({\n Tag_Sample_Names: newTagNames,\n Sample_Tags_Version: enumifiedSampleTagsVersion\n });\n}", + "id": "#MultiplexingSettings.cwl" + }, + { + "inputs": [ + { + "type": [ + "null", + "string" + ], + "id": "#NameSettings.cwl/_Run_Name" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "outputs": [ + { + "type": [ + "null", + "string" + ], + "id": "#NameSettings.cwl/Run_Name" + } + ], + "class": "ExpressionTool", + "expression": "${ var name = inputs._Run_Name;\n if (name != null) {\n name = name.replace(/[\\W_]+/g,\"-\");}\n return({'Run_Name' : name });\n } ", + "id": "#NameSettings.cwl" + }, + { + "inputs": [ + { + "type": { + "items": { + "fields": [ + { + "type": "string", + "name": "#PairReadFiles.cwl/FastqReadPairs/filename" + }, + { + "type": "string", + "name": "#PairReadFiles.cwl/FastqReadPairs/readFlag" + }, + { + "type": "string", + "name": "#PairReadFiles.cwl/FastqReadPairs/readPairId" + }, + { + "type": "string", + "name": "#PairReadFiles.cwl/FastqReadPairs/library" + } + ], + "type": "record" + }, + "type": "array" + }, + "id": "#PairReadFiles.cwl/FastqReadPairs" + }, + { + "type": { + "items": "File", + "type": "array" + }, + "id": "#PairReadFiles.cwl/Reads" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "doc": "PairReadFiles takes an array of split files and pairs them, such that an R1 file is transferred to the QualityFilter with its corresponding R2 file.\nThe original FASTQ files are paired in CheckFastqs and then split and sub-sampled in SplitAndSubsample. The pairing information is taken from CheckFastqs.\n", + "id": "#PairReadFiles.cwl", + "outputs": [ + { + "type": { + "items": { + "fields": [ + { + "type": "File", + "name": "#PairReadFiles.cwl/ReadPairs/R1" + }, + { + "type": "File", + "name": "#PairReadFiles.cwl/ReadPairs/R2" + }, + { + "type": "int", + "name": "#PairReadFiles.cwl/ReadPairs/readPairId" + }, + { + "type": "string", + "name": "#PairReadFiles.cwl/ReadPairs/library" + } + ], + "type": "record" + }, + "type": "array" + }, + "id": "#PairReadFiles.cwl/ReadPairs" + } + ], + "expression": "${\n // use the CheckFastqs read pairing information to create a dictionary\n // using the original fastq file name without the extension as the key\n var fastqReadPairs = {}\n for (var i = 0; i < inputs.FastqReadPairs.length; i++) {\n var fileDict = inputs.FastqReadPairs[i];\n var filename = fileDict[\"filename\"];\n\n if (!fastqReadPairs[filename]) {\n fastqReadPairs[filename] = {\n readPairId: null,\n readFlag: null,\n library: null,\n };\n }\n else {\n throw new Error(\"Found non-unique fastq filename '\" + filename + \"' in the FastqReadPairs dictionary from CheckFastqs.\")\n }\n\n fastqReadPairs[filename].readPairId = fileDict[\"readPairId\"]\n fastqReadPairs[filename].readFlag = fileDict[\"readFlag\"]\n fastqReadPairs[filename].library = fileDict[\"library\"]\n }\n\n // now loop through the input read files which could\n // be the original fastq files if no sub-sampling has\n // been done, or the sub-sampled fastq files\n var readPairs = {}\n for (var i = 0; i < inputs.Reads.length; i++) {\n\n // Set the fileDict to null\n var fileDict = null;\n\n // Get the fastq file\n var fastqFile = inputs.Reads[i];\n\n // Remove the .gz from the end of the filename\n var fileNoGzExt = fastqFile.basename.replace(/.gz$/i, \"\");\n\n // Remove the next file extension if it exists\n var fileArrayWithExt = fileNoGzExt.split(\".\");\n // If an extension exists, splice the array\n var fileArrayNoExt = null;\n if (fileArrayWithExt.length > 1) {\n fileArrayNoExt = fileArrayWithExt.splice(0, fileArrayWithExt.length-1);\n } else {\n // No file extension exists, so use the whole array\n fileArrayNoExt = fileArrayWithExt\n }\n var fileRootname = fileArrayNoExt.join(\".\")\n\n // if the original files were sub-sampled\n // get the original file and the chunk id\n if (fileRootname.indexOf(\"-\") != -1) {\n // Split on the dash to get the name of\n // the original file and the chunk id\n // The original file name can also have dashes\n var chunkFileArray = fileRootname.split(\"-\");\n\n // Get the original file rootname and chunk id\n // The rootname without the chunk id and file\n // extension is the key from CheckFastqs\n // The chunk id is used later to create a new unique\n // read pair id for all sub-sampled fastq files\n\n // The rootname array should contain all elements up to the last dash\n var fileRootnameArray = chunkFileArray.splice(0, chunkFileArray.length-1);\n var fileRootnameNoChunkId = fileRootnameArray.join(\"-\");\n\n // The chunk id is the last element in the array\n // representing the content after the last dash\n var orgChunkId = chunkFileArray.pop();\n\n // if there is no chunk id, use an arbitrary number\n // the chunk id is unique when the files are sub-sampled\n // and does not need to be unique when the files are not sub-sampled\n var chunkId = 9999;\n if (orgChunkId) {\n // cast to an integer\n chunkId = parseInt(orgChunkId);\n }\n // double check that we have a chunk id\n if (chunkId === undefined || chunkId === null) {\n throw new Error(\"The fastq file sub-sampling id could not be determined!\");\n }\n\n // The file rootname without the chunk id and file extension\n // should match the original file rootname from CheckFastqs\n // The original file rootname from CheckFastqs is the key for\n // the dictionary containing the original unique pair id\n var fileDict = fastqReadPairs[fileRootnameNoChunkId];\n }\n\n // If the files are not sub-sampled or the fileDict\n // is not found, then try to use the original\n // file rootname without the file extension as the key\n if (fileDict === undefined || fileDict === null) {\n\n // if the original files were not sub-sampled,\n // use the original file rootname and an arbitrary chunk id\n var chunkId = 9999;\n\n var fileDict = fastqReadPairs[fileRootname];\n\n // If the fileDict for this file rootname is not found,\n // then the filenames are in an unexpected format and\n // the code to parse the filenames in CheckFastqs,\n // SplitAndSubsample and here need to match\n if (fileDict === undefined || fileDict === null) {\n // Create an error\n if (fileDict === undefined || fileDict === null) {\n throw new Error(\"Cannot find the fastq read pair information for '\" + fastqFile.basename + \"'.\");\n }\n }\n }\n\n // Get the pairing information from CheckFastqs\n var readPairId = fileDict[\"readPairId\"];\n var library = fileDict[\"library\"];\n var flag = fileDict[\"readFlag\"];\n\n // Add the chunkId to create a new unique read pair id\n // for each file (sub-sampled or not)\n var chunkReadPairId = readPairId + \"_\" + chunkId;\n\n // Create a dictionary for each pair of files\n if (!readPairs[chunkReadPairId]) {\n readPairs[chunkReadPairId] = {\n R1: null,\n R2: null,\n library: library,\n readPairId: null,\n };\n }\n // add in the R1 and R2 files, depending on the flag\n if (flag === \"R1\") {\n readPairs[chunkReadPairId].R1 = fastqFile\n } else if (flag === \"R2\") {\n readPairs[chunkReadPairId].R2 = fastqFile\n }\n }\n // we are not interested in the read pair ids in readPairs\n // flatten into an array of objects\n var readPairsList = [];\n var i = 1;\n for (var key in readPairs) {\n if (readPairs.hasOwnProperty(key)) {\n var readPair = readPairs[key];\n readPair.readPairId = i;\n readPairsList.push(readPair);\n i++;\n }\n }\n // pass this array to the record array named \"ReadPairs\" on the CWL layer\n return {ReadPairs: readPairsList}\n}", + "class": "ExpressionTool" + }, + { + "inputs": [ + { + "type": [ + "null", + "boolean" + ], + "id": "#PutativeCellSettings.cwl/_Basic_Algo_Only" + }, + { + "type": [ + "null", + "int" + ], + "id": "#PutativeCellSettings.cwl/_Exact_Cell_Count" + }, + { + "type": [ + "null", + "Any" + ], + "id": "#PutativeCellSettings.cwl/_Putative_Cell_Call" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "outputs": [ + { + "type": [ + "null", + "boolean" + ], + "id": "#PutativeCellSettings.cwl/Basic_Algo_Only" + }, + { + "type": [ + "null", + "int" + ], + "id": "#PutativeCellSettings.cwl/Exact_Cell_Count" + }, + { + "type": [ + "null", + "int" + ], + "id": "#PutativeCellSettings.cwl/Putative_Cell_Call" + } + ], + "class": "ExpressionTool", + "expression": "${\n // the basic algorithm flag defaults to false\n var basicAlgOnlyFlag = false;\n // the user can set the basic algorithm flag\n if (inputs._Basic_Algo_Only) {\n basicAlgOnlyFlag = inputs._Basic_Algo_Only;\n }\n // convert the Putative_Cell_Call from a string to an integer\n var putativeCellCallInt = 0;\n if (inputs._Putative_Cell_Call) {\n if (inputs._Putative_Cell_Call === \"mRNA\") {\n putativeCellCallInt = 0;\n }\n else if (inputs._Putative_Cell_Call == \"AbSeq_Experimental\" || inputs._Putative_Cell_Call == \"AbSeq (Experimental)\") {\n putativeCellCallInt = 1;\n // for protein-only cell calling, we only have the basic algorithm\n basicAlgOnlyFlag = true;\n }\n else if (inputs._Putative_Cell_Call == \"mRNA_and_AbSeq\") {\n putativeCellCallInt = 2;\n }\n }\n // check the exact cell count\n if (inputs._Exact_Cell_Count) {\n if (inputs._Exact_Cell_Count < 1) {\n throw(\"Illogical value for exact cell count: \" + inputs._Exact_Cell_Count);\n }\n }\n return ({\n Putative_Cell_Call: putativeCellCallInt,\n Exact_Cell_Count: inputs._Exact_Cell_Count,\n Basic_Algo_Only: basicAlgOnlyFlag,\n });\n}", + "id": "#PutativeCellSettings.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "prefix": "--run-metadata" + }, + "type": "File", + "id": "#QualityFilter.cwl/Run_Metadata" + }, + { + "type": { + "fields": [ + { + "inputBinding": { + "prefix": "--r1" + }, + "type": "File", + "name": "#QualityFilter.cwl/Split_Read_Pairs/R1" + }, + { + "inputBinding": { + "prefix": "--r2" + }, + "type": "File", + "name": "#QualityFilter.cwl/Split_Read_Pairs/R2" + }, + { + "inputBinding": { + "prefix": "--read-pair-id" + }, + "type": "int", + "name": "#QualityFilter.cwl/Split_Read_Pairs/readPairId" + }, + { + "inputBinding": { + "prefix": "--library" + }, + "type": "string", + "name": "#QualityFilter.cwl/Split_Read_Pairs/library" + } + ], + "type": "record" + }, + "id": "#QualityFilter.cwl/Split_Read_Pairs" + } + ], + "requirements": [ + + ], + "outputs": [ + { + "outputBinding": { + "glob": "*read_quality.csv.gz" + }, + "type": [ + "null", + "File" + ], + "id": "#QualityFilter.cwl/Filter_Metrics" + }, + { + "outputBinding": { + "glob": "*_R1*.fastq.gz" + }, + "type": "File", + "id": "#QualityFilter.cwl/R1" + }, + { + "outputBinding": { + "glob": "*_R2*.fastq.gz" + }, + "type": "File", + "id": "#QualityFilter.cwl/R2" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#QualityFilter.cwl/output" + } + ], + "baseCommand": [ + "mist_quality_filter.py" + ], + "class": "CommandLineTool", + "id": "#QualityFilter.cwl" + }, + { + "inputs": [ + { + "type": "File", + "id": "#QualityFilterOuter.cwl/Run_Metadata" + }, + { + "type": { + "items": { + "fields": [ + { + "type": "File", + "name": "#QualityFilterOuter.cwl/Split_Read_Pairs/R1" + }, + { + "type": "File", + "name": "#QualityFilterOuter.cwl/Split_Read_Pairs/R2" + }, + { + "type": "int", + "name": "#QualityFilterOuter.cwl/Split_Read_Pairs/readPairId" + }, + { + "type": "string", + "name": "#QualityFilterOuter.cwl/Split_Read_Pairs/library" + } + ], + "type": "record" + }, + "type": "array" + }, + "id": "#QualityFilterOuter.cwl/Split_Read_Pairs" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + }, + { + "class": "ScatterFeatureRequirement" + }, + { + "class": "StepInputExpressionRequirement" + }, + { + "class": "SubworkflowFeatureRequirement" + } + ], + "outputs": [ + { + "outputSource": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/Filter_Metrics", + "type": [ + { + "items": [ + "null", + "File" + ], + "type": "array" + } + ], + "id": "#QualityFilterOuter.cwl/Filter_Metrics" + }, + { + "outputSource": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/R1", + "type": [ + { + "items": [ + "null", + "File" + ], + "type": "array" + } + ], + "id": "#QualityFilterOuter.cwl/R1" + }, + { + "outputSource": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/R2", + "type": [ + { + "items": [ + "null", + "File" + ], + "type": "array" + } + ], + "id": "#QualityFilterOuter.cwl/R2" + }, + { + "outputSource": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/output", + "type": [ + { + "items": [ + "null", + "File" + ], + "type": "array" + } + ], + "id": "#QualityFilterOuter.cwl/output" + } + ], + "class": "Workflow", + "steps": [ + { + "scatter": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/Split_Read_Pairs", + "out": [ + "#QualityFilterOuter.cwl/Quality_Filter_Scatter/R1", + "#QualityFilterOuter.cwl/Quality_Filter_Scatter/R2", + "#QualityFilterOuter.cwl/Quality_Filter_Scatter/Filter_Metrics", + "#QualityFilterOuter.cwl/Quality_Filter_Scatter/output" + ], + "run": "#QualityFilter.cwl", + "id": "#QualityFilterOuter.cwl/Quality_Filter_Scatter", + "in": [ + { + "source": "#QualityFilterOuter.cwl/Run_Metadata", + "id": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/Run_Metadata" + }, + { + "source": "#QualityFilterOuter.cwl/Split_Read_Pairs", + "id": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/Split_Read_Pairs" + } + ] + } + ], + "id": "#QualityFilterOuter.cwl" + }, + { + "inputs": [ + { + "type": { + "items": "File", + "type": "array" + }, + "id": "#SplitAndSubsample.cwl/Fastqs" + }, + { + "type": { + "items": "string", + "type": "array" + }, + "id": "#SplitAndSubsample.cwl/FilesToSkipSplitAndSubsample" + }, + { + "type": [ + "null", + "long" + ], + "id": "#SplitAndSubsample.cwl/NumRecordsPerSplit" + }, + { + "type": "float", + "id": "#SplitAndSubsample.cwl/SubsampleRatio" + }, + { + "type": "int", + "id": "#SplitAndSubsample.cwl/SubsampleSeed" + } + ], + "requirements": [ + { + "class": "ScatterFeatureRequirement" + }, + { + "class": "InlineJavascriptRequirement" + } + ], + "doc": "SplitAndSubsample splits, subsamples and formats read files to be deposited in QualityFilter.\n", + "id": "#SplitAndSubsample.cwl", + "steps": [ + { + "doc": "After scattering \"SplitAndSubsample\" on a File array, the output of each node is also an array. Thus, we are left with a nestled list. This JS expression flattens this list to deal with the split reads in PairReadFiles.cwl", + "out": [ + "#SplitAndSubsample.cwl/FlattenOutput/SplitFastqList" + ], + "run": { + "cwlVersion": "v1.0", + "inputs": [ + { + "type": { + "items": { + "items": "File", + "type": "array" + }, + "type": "array" + }, + "id": "#SplitAndSubsample.cwl/FlattenOutput/flatten_output/nestledSplitFastqList" + } + ], + "outputs": [ + { + "type": { + "items": "File", + "type": "array" + }, + "id": "#SplitAndSubsample.cwl/FlattenOutput/flatten_output/SplitFastqList" + } + ], + "class": "ExpressionTool", + "expression": "${\n return {SplitFastqList: [].concat.apply([], inputs.nestledSplitFastqList)}\n}\n", + "id": "#SplitAndSubsample.cwl/FlattenOutput/flatten_output" + }, + "id": "#SplitAndSubsample.cwl/FlattenOutput", + "in": [ + { + "source": "#SplitAndSubsample.cwl/SplitAndSubsample/SplitAndSubsampledFastqs", + "id": "#SplitAndSubsample.cwl/FlattenOutput/nestledSplitFastqList" + } + ] + }, + { + "run": { + "cwlVersion": "v1.0", + "inputs": [ + { + "inputBinding": { + "prefix": "--fastq-file-path" + }, + "type": "File", + "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/Fastq" + }, + { + "inputBinding": { + "prefix": "--files-to-skip-split-and-subsample", + "itemSeparator": "," + }, + "type": { + "items": "string", + "type": "array" + }, + "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/FilesToSkipSplitAndSubsample" + }, + { + "inputBinding": { + "prefix": "--num-records" + }, + "type": [ + "null", + "long" + ], + "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/NumRecordsPerSplit" + }, + { + "inputBinding": { + "prefix": "--subsample-ratio" + }, + "type": "float", + "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/SubsampleRatio" + }, + { + "inputBinding": { + "prefix": "--subsample-seed" + }, + "type": "int", + "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/SubsampleSeed" + } + ], + "requirements": [ + ], + "outputs": [ + { + "outputBinding": { + "glob": "*.fastq.gz", + "outputEval": "${ if (self.length === 0) { return [inputs.Fastq]; } else { return self; } }" + }, + "type": { + "items": "File", + "type": "array" + }, + "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/SplitAndSubsampledFastqs" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/log" + } + ], + "baseCommand": [ + "mist_split_fastq.py" + ], + "class": "CommandLineTool", + "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq" + }, + "doc": "Allocate one docker/python process per file to do the actual file splitting.", + "scatter": [ + "#SplitAndSubsample.cwl/SplitAndSubsample/Fastq" + ], + "in": [ + { + "source": "#SplitAndSubsample.cwl/Fastqs", + "id": "#SplitAndSubsample.cwl/SplitAndSubsample/Fastq" + }, + { + "source": "#SplitAndSubsample.cwl/FilesToSkipSplitAndSubsample", + "id": "#SplitAndSubsample.cwl/SplitAndSubsample/FilesToSkipSplitAndSubsample" + }, + { + "source": "#SplitAndSubsample.cwl/NumRecordsPerSplit", + "id": "#SplitAndSubsample.cwl/SplitAndSubsample/NumRecordsPerSplit" + }, + { + "source": "#SplitAndSubsample.cwl/SubsampleRatio", + "id": "#SplitAndSubsample.cwl/SplitAndSubsample/SubsampleRatio" + }, + { + "source": "#SplitAndSubsample.cwl/SubsampleSeed", + "id": "#SplitAndSubsample.cwl/SplitAndSubsample/SubsampleSeed" + } + ], + "id": "#SplitAndSubsample.cwl/SplitAndSubsample", + "out": [ + "#SplitAndSubsample.cwl/SplitAndSubsample/SplitAndSubsampledFastqs", + "#SplitAndSubsample.cwl/SplitAndSubsample/log" + ] + } + ], + "outputs": [ + { + "outputSource": "#SplitAndSubsample.cwl/FlattenOutput/SplitFastqList", + "type": { + "items": "File", + "type": "array" + }, + "id": "#SplitAndSubsample.cwl/SplitAndSubsampledFastqs" + }, + { + "outputSource": "#SplitAndSubsample.cwl/SplitAndSubsample/log", + "type": { + "items": "File", + "type": "array" + }, + "id": "#SplitAndSubsample.cwl/log" + } + ], + "class": "Workflow" + }, + { + "inputs": [ + { + "type": [ + "null", + "float" + ], + "id": "#SubsampleSettings.cwl/_Subsample_Reads" + }, + { + "type": [ + "null", + "int" + ], + "id": "#SubsampleSettings.cwl/_Subsample_Seed" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "outputs": [ + { + "type": [ + "null", + "float" + ], + "id": "#SubsampleSettings.cwl/Subsample_Reads" + }, + { + "type": [ + "null", + "int" + ], + "id": "#SubsampleSettings.cwl/Subsample_Seed" + } + ], + "class": "ExpressionTool", + "expression": "${\n var subsamplingOutputs = {\n Subsample_Reads: inputs._Subsample_Reads,\n Subsample_Seed: inputs._Subsample_Seed\n }\n return subsamplingOutputs;\n}", + "id": "#SubsampleSettings.cwl" + }, + { + "inputs": [ + { + "type": { + "items": "File", + "type": "array" + }, + "id": "#UncompressDatatables.cwl/Compressed_Data_Table" + }, + { + "type": "File", + "id": "#UncompressDatatables.cwl/Compressed_Expression_Matrix" + } + ], + "requirements": [ + { + "class": "ScatterFeatureRequirement" + } + ], + "outputs": [ + { + "outputSource": "#UncompressDatatables.cwl/Uncompress_Datatable/Uncompressed_File", + "type": { + "items": "File", + "type": "array" + }, + "id": "#UncompressDatatables.cwl/Uncompressed_Data_Tables" + }, + { + "outputSource": "#UncompressDatatables.cwl/Uncompress_Expression_Matrix/Uncompressed_File", + "type": "File", + "id": "#UncompressDatatables.cwl/Uncompressed_Expression_Matrix" + } + ], + "class": "Workflow", + "steps": [ + { + "id": "#UncompressDatatables.cwl/Uncompress_Datatable", + "out": [ + "#UncompressDatatables.cwl/Uncompress_Datatable/Uncompressed_File" + ], + "run": { + "cwlVersion": "v1.0", + "inputs": [ + { + "inputBinding": { + "position": 1 + }, + "type": "File", + "id": "#UncompressDatatables.cwl/Uncompress_Datatable/Uncompress_Datatable_Inner/Compressed_File" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "stdout": "$(inputs.Compressed_File.nameroot)", + "outputs": [ + { + "outputBinding": { + "glob": "$(inputs.Compressed_File.nameroot)" + }, + "type": "File", + "id": "#UncompressDatatables.cwl/Uncompress_Datatable/Uncompress_Datatable_Inner/Uncompressed_File" + } + ], + "baseCommand": [ + "gunzip" + ], + "id": "#UncompressDatatables.cwl/Uncompress_Datatable/Uncompress_Datatable_Inner", + "arguments": [ + { + "position": 0, + "valueFrom": "-c" + } + ], + "class": "CommandLineTool", + "hints": [ + ] + }, + "scatter": [ + "#UncompressDatatables.cwl/Uncompress_Datatable/Compressed_File" + ], + "in": [ + { + "source": "#UncompressDatatables.cwl/Compressed_Data_Table", + "id": "#UncompressDatatables.cwl/Uncompress_Datatable/Compressed_File" + } + ] + }, + { + "out": [ + "#UncompressDatatables.cwl/Uncompress_Expression_Matrix/Uncompressed_File" + ], + "run": { + "cwlVersion": "v1.0", + "inputs": [ + { + "inputBinding": { + "position": 1 + }, + "type": "File", + "id": "#UncompressDatatables.cwl/Uncompress_Expression_Matrix/Uncompress_Expression_Matrix_Inner/Compressed_File" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "stdout": "$(inputs.Compressed_File.nameroot)", + "outputs": [ + { + "outputBinding": { + "glob": "$(inputs.Compressed_File.nameroot)" + }, + "type": "File", + "id": "#UncompressDatatables.cwl/Uncompress_Expression_Matrix/Uncompress_Expression_Matrix_Inner/Uncompressed_File" + } + ], + "baseCommand": [ + "gunzip" + ], + "id": "#UncompressDatatables.cwl/Uncompress_Expression_Matrix/Uncompress_Expression_Matrix_Inner", + "arguments": [ + { + "position": 0, + "valueFrom": "-c" + } + ], + "class": "CommandLineTool", + "hints": [ + + ] + }, + "id": "#UncompressDatatables.cwl/Uncompress_Expression_Matrix", + "in": [ + { + "source": "#UncompressDatatables.cwl/Compressed_Expression_Matrix", + "id": "#UncompressDatatables.cwl/Uncompress_Expression_Matrix/Compressed_File" + } + ] + } + ], + "id": "#UncompressDatatables.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "position": 1 + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs.cwl/RSEC_Reads_Fastq" + }, + { + "inputBinding": { + "position": 2 + }, + "type": "string", + "id": "#VDJ_Assemble_and_Annotate_Contigs.cwl/Read_Limit" + }, + { + "inputBinding": { + "position": 3 + }, + "type": [ + "null", + "string" + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs.cwl/VDJ_Version" + } + ], + "requirements": [ + + { + "class": "InlineJavascriptRequirement" + }, + { + "class": "ShellCommandRequirement" + } + ], + "outputs": [ + { + "outputBinding": { + "glob": "*_pruned.csv.gz" + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs.cwl/PyirCall" + } + ], + "baseCommand": [ + "AssembleAndAnnotate.sh" + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs.cwl", + "class": "CommandLineTool", + "hints": [ + { + "coresMin": 1, + "ramMin": 3200, + "class": "ResourceRequirement" + } + ] + }, + { + "inputs": [ + { + "type": [ + { + "items": [ + "null", + "File" + ], + "type": "array" + } + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/RSEC_Reads_Fastq" + }, + { + "type": [ + "null", + "string" + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Version" + }, + { + "type": [ + "null", + "int" + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/num_cores" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + }, + { + "class": "ScatterFeatureRequirement" + }, + { + "class": "StepInputExpressionRequirement" + }, + { + "class": "SubworkflowFeatureRequirement" + } + ], + "outputs": [ + { + "outputSource": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG/PyirCall", + "type": [ + { + "items": [ + "null", + "File" + ], + "type": "array" + } + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/igCalls" + } + ], + "class": "Workflow", + "steps": [ + { + "run": "#VDJ_Assemble_and_Annotate_Contigs.cwl", + "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG", + "in": [ + { + "source": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/RSEC_Reads_Fastq", + "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG/RSEC_Reads_Fastq" + }, + { + "valueFrom": "75000", + "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG/Read_Limit" + }, + { + "source": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Version", + "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG/VDJ_Version" + } + ], + "hints": [ + { + "coresMin": "$(inputs.num_cores)", + "class": "ResourceRequirement" + } + ], + "scatter": [ + "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG/RSEC_Reads_Fastq" + ], + "out": [ + "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG/PyirCall" + ] + } + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl" + }, + { + "inputs": [ + { + "type": [ + { + "items": [ + "null", + "File" + ], + "type": "array" + } + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/RSEC_Reads_Fastq" + }, + { + "type": [ + "null", + "string" + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Version" + }, + { + "type": [ + "null", + "int" + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/num_cores" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + }, + { + "class": "ScatterFeatureRequirement" + }, + { + "class": "StepInputExpressionRequirement" + }, + { + "class": "SubworkflowFeatureRequirement" + } + ], + "outputs": [ + { + "outputSource": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR/PyirCall", + "type": [ + { + "items": [ + "null", + "File" + ], + "type": "array" + } + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/tcrCalls" + } + ], + "class": "Workflow", + "steps": [ + { + "run": "#VDJ_Assemble_and_Annotate_Contigs.cwl", + "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR", + "in": [ + { + "source": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/RSEC_Reads_Fastq", + "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR/RSEC_Reads_Fastq" + }, + { + "valueFrom": "75000", + "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR/Read_Limit" + }, + { + "source": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Version", + "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR/VDJ_Version" + } + ], + "hints": [ + { + "coresMin": "$(inputs.num_cores)", + "class": "ResourceRequirement" + } + ], + "scatter": [ + "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR/RSEC_Reads_Fastq" + ], + "out": [ + "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR/PyirCall" + ] + } + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "position": 10, + "prefix": "--seq-metrics" + }, + "type": "File", + "id": "#VDJ_Compile_Results.cwl/Seq_Metrics" + }, + { + "inputBinding": { + "position": 0, + "prefix": "--cell-type-mapping-fp" + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_Compile_Results.cwl/cellTypeMapping" + }, + { + "inputBinding": { + "position": 4, + "prefix": "--ignore", + "itemSeparator": "," + }, + "type": [ + "null", + { + "items": "string", + "type": "array" + } + ], + "id": "#VDJ_Compile_Results.cwl/chainsToIgnore" + }, + { + "inputBinding": { + "position": 8, + "prefix": "--e-value-for-j" + }, + "type": [ + "null", + "float" + ], + "id": "#VDJ_Compile_Results.cwl/evalueJgene" + }, + { + "inputBinding": { + "position": 7, + "prefix": "--e-value-for-v" + }, + "type": [ + "null", + "float" + ], + "id": "#VDJ_Compile_Results.cwl/evalueVgene" + }, + { + "inputBinding": { + "position": 5 + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_Compile_Results.cwl/igCalls" + }, + { + "inputBinding": { + "position": 9, + "prefix": "--metadata-fp" + }, + "type": "File", + "id": "#VDJ_Compile_Results.cwl/metadata" + }, + { + "inputBinding": { + "position": 3, + "prefix": "--putative-cells-json-fp" + }, + "type": "File", + "id": "#VDJ_Compile_Results.cwl/putativeCells" + }, + { + "inputBinding": { + "position": 6 + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_Compile_Results.cwl/tcrCalls" + }, + { + "inputBinding": { + "position": 2, + "prefix": "--vdj-version" + }, + "type": [ + "null", + "string" + ], + "id": "#VDJ_Compile_Results.cwl/vdjVersion" + } + ], + "requirements": [ + + { + "class": "InlineJavascriptRequirement" + } + ], + "outputs": [ + { + "doc": "VDJ data per cell, with distribution based error correction", + "outputBinding": { + "glob": "*_VDJ_perCell.csv" + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_Compile_Results.cwl/vdjCellsDatatable" + }, + { + "doc": "VDJ data per cell, including non-putative cells, no error correction applied", + "outputBinding": { + "glob": "*_VDJ_perCell_uncorrected.csv.gz" + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_Compile_Results.cwl/vdjCellsDatatableUncorrected" + }, + { + "outputBinding": { + "glob": "*_VDJ_Dominant_Contigs.csv.gz" + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_Compile_Results.cwl/vdjDominantContigs" + }, + { + "outputBinding": { + "glob": "*_VDJ_metrics.csv" + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_Compile_Results.cwl/vdjMetricsCsv" + }, + { + "outputBinding": { + "glob": "*_VDJ_metrics.json" + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_Compile_Results.cwl/vdjMetricsJson" + }, + { + "outputBinding": { + "glob": "*_DBEC_cutoff.png" + }, + "type": { + "items": "File", + "type": "array" + }, + "id": "#VDJ_Compile_Results.cwl/vdjReadsPerCellByChainTypeFigure" + }, + { + "outputBinding": { + "glob": "*_VDJ_Unfiltered_Contigs.csv.gz" + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_Compile_Results.cwl/vdjUnfilteredContigs" + } + ], + "baseCommand": [ + "mist_vdj_compile_results.py" + ], + "id": "#VDJ_Compile_Results.cwl", + "class": "CommandLineTool", + "hints": [ + { + "ramMin": 32000, + "class": "ResourceRequirement" + } + ] + }, + { + "inputs": [ + { + "type": [ + { + "items": [ + "null", + "File" + ], + "type": "array" + } + ], + "id": "#VDJ_GatherCalls.cwl/theCalls" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "doc": "VDJ_GatherCalls collect the outputs from the multi-processed VDJ step into one file.\n", + "id": "#VDJ_GatherCalls.cwl", + "steps": [ + { + "out": [ + "#VDJ_GatherCalls.cwl/VDJ_GatherCalls/gatheredCalls" + ], + "run": { + "cwlVersion": "v1.0", + "inputs": [ + { + "type": [ + { + "items": [ + "null", + "File" + ], + "type": "array" + } + ], + "id": "#VDJ_GatherCalls.cwl/VDJ_GatherCalls/gather_PyIR/theCalls" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + }, + { + "class": "ShellCommandRequirement" + } + ], + "outputs": [ + { + "outputBinding": { + "glob": "*_constant_region_called_pruned.csv.gz", + "outputEval": "${\n if (self.size == 0) {\n throw(\"No outputs from PyIR detected in VDJ_GatherCalls!\");\n } else {\n return(self);\n }\n}" + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_GatherCalls.cwl/VDJ_GatherCalls/gather_PyIR/gatheredCalls" + } + ], + "class": "CommandLineTool", + "arguments": [ + { + "shellQuote": false, + "valueFrom": "${\n if (!inputs.theCalls[0] ) {\n return (\"echo \\\"No outputs from PyIR detected in VDJ_GatherCalls\\\"\")\n }\n var inputFiles = \"\"\n if (!inputs.theCalls[0].path.split(\"_PrunePyIR\")[1]){\n inputFiles = \"zcat\"\n for (var i = 0; i < inputs.theCalls.length; i++) {\n inputFiles += \" \" + inputs.theCalls[i].path\n }\n inputFiles += \" | \"\n } else {\n inputFiles = \"zcat \" + inputs.theCalls[0].path.split(\"VDJ\")[0] + \"*\" + inputs.theCalls[0].path.split(\"_PrunePyIR\")[1].split(\"_Number_\")[0] + \"_Number_*.csv.gz | \"\n }\n var outputFileName = \"\\\"gzip > \" + inputs.theCalls[0].nameroot.split(\"_Number_\")[0] + \"_constant_region_called_pruned.csv.gz\" + \"\\\"\"\n var awkCommand = \"awk \\'NR==1{F=$1;print | \" + outputFileName + \" } $1!=F { print | \" + outputFileName + \" }\\' \"\n var outputCommand = inputFiles + awkCommand\n return (outputCommand)\n}" + } + ], + "id": "#VDJ_GatherCalls.cwl/VDJ_GatherCalls/gather_PyIR" + }, + "id": "#VDJ_GatherCalls.cwl/VDJ_GatherCalls", + "in": [ + { + "source": "#VDJ_GatherCalls.cwl/theCalls", + "id": "#VDJ_GatherCalls.cwl/VDJ_GatherCalls/theCalls" + } + ] + } + ], + "outputs": [ + { + "outputSource": "#VDJ_GatherCalls.cwl/VDJ_GatherCalls/gatheredCalls", + "type": [ + "null", + "File" + ], + "id": "#VDJ_GatherCalls.cwl/gatheredCalls" + } + ], + "class": "Workflow" + }, + { + "inputs": [ + { + "type": [ + "null", + "File" + ], + "id": "#VDJ_Preprocess_Reads.cwl/Valid_Reads_Fastq" + }, + { + "type": [ + "null", + "int" + ], + "id": "#VDJ_Preprocess_Reads.cwl/num_valid_reads" + }, + { + "type": "string", + "id": "#VDJ_Preprocess_Reads.cwl/vdj_type" + } + ], + "requirements": [ + { + "class": "SubworkflowFeatureRequirement" + }, + { + "class": "InlineJavascriptRequirement" + }, + { + "envDef": [ + { + "envName": "CORES_ALLOCATED_PER_CWL_PROCESS", + "envValue": "8" + } + ], + "class": "EnvVarRequirement" + } + ], + "outputs": [ + { + "outputSource": "#VDJ_Preprocess_Reads.cwl/VDJ_RSEC_Reads/RSEC_Reads_Fastq", + "type": [ + { + "items": [ + "null", + "File" + ], + "type": "array" + } + ], + "id": "#VDJ_Preprocess_Reads.cwl/RSEC_Reads_Fastq" + }, + { + "type": [ + "null", + "int" + ], + "outputSource": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/num_cores", + "id": "#VDJ_Preprocess_Reads.cwl/num_cores" + }, + { + "type": [ + "null", + "int" + ], + "outputSource": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/num_splits", + "id": "#VDJ_Preprocess_Reads.cwl/num_splits" + } + ], + "class": "Workflow", + "steps": [ + { + "run": "#VDJ_RSEC_Reads.cwl", + "out": [ + "#VDJ_Preprocess_Reads.cwl/VDJ_RSEC_Reads/RSEC_Reads_Fastq" + ], + "requirements": [ + { + "coresMin": 8, + "ramMin": "${ var est_ram = 0.0006 * parseInt(inputs.num_valid_reads) + 2000; var buffer = 1.25; est_ram *= buffer; if (est_ram < 2000) return 2000; if (est_ram > 370000) return 370000; return parseInt(est_ram); }", + "class": "ResourceRequirement" + } + ], + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_RSEC_Reads", + "in": [ + { + "source": "#VDJ_Preprocess_Reads.cwl/VDJ_Trim_Reads/Valid_Reads", + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_RSEC_Reads/Valid_Reads" + }, + { + "source": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/num_splits", + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_RSEC_Reads/num_splits" + }, + { + "source": "#VDJ_Preprocess_Reads.cwl/num_valid_reads", + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_RSEC_Reads/num_valid_reads" + } + ] + }, + { + "out": [ + "#VDJ_Preprocess_Reads.cwl/VDJ_Trim_Reads/Valid_Reads", + "#VDJ_Preprocess_Reads.cwl/VDJ_Trim_Reads/Trim_Report" + ], + "in": [ + { + "source": "#VDJ_Preprocess_Reads.cwl/Valid_Reads_Fastq", + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_Trim_Reads/Valid_Reads_Fastq" + } + ], + "run": "#VDJ_Trim_Reads.cwl", + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_Trim_Reads", + "hints": [ + { + "coresMin": 8, + "class": "ResourceRequirement" + } + ] + }, + { + "out": [ + "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/num_splits", + "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/num_cores" + ], + "run": { + "cwlVersion": "v1.0", + "inputs": [ + { + "type": [ + "null", + "int" + ], + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/determine_num_splits/num_valid_reads" + }, + { + "type": "string", + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/determine_num_splits/vdj_type" + } + ], + "outputs": [ + { + "type": [ + "null", + "int" + ], + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/determine_num_splits/num_cores" + }, + { + "type": [ + "null", + "int" + ], + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/determine_num_splits/num_splits" + } + ], + "class": "ExpressionTool", + "expression": "${\n var ram_per_instance = 192 * 1024;\n var num_cores = 96;\n if (inputs.vdj_type == \"BCR\") {\n ram_per_instance = 144 * 1024;\n num_cores = 72;\n }\n var ram_per_split = 3200;\n var num_splits_per_instance = parseInt(ram_per_instance / ram_per_split);\n var num_splits = num_splits_per_instance;\n\n var num_reads = parseInt(inputs.num_valid_reads);\n if (num_reads != null) {\n if (num_reads > 100000000)\n num_splits = num_splits_per_instance * 2;\n num_cores = num_cores * 2;\n }\n\n return ({\"num_splits\": num_splits, \"num_cores\": num_cores});\n}", + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/determine_num_splits" + }, + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits", + "in": [ + { + "source": "#VDJ_Preprocess_Reads.cwl/num_valid_reads", + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/num_valid_reads" + }, + { + "source": "#VDJ_Preprocess_Reads.cwl/vdj_type", + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/vdj_type" + } + ] + } + ], + "id": "#VDJ_Preprocess_Reads.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "prefix": "--vdj-valid-reads", + "itemSeparator": "," + }, + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#VDJ_RSEC_Reads.cwl/Valid_Reads" + }, + { + "inputBinding": { + "prefix": "--num-splits" + }, + "type": [ + "null", + "int" + ], + "id": "#VDJ_RSEC_Reads.cwl/num_splits" + } + ], + "requirements": [ + + ], + "outputs": [ + { + "outputBinding": { + "glob": "*RSEC_Reads_Fastq_*.tar.gz" + }, + "type": [ + { + "items": [ + "null", + "File" + ], + "type": "array" + } + ], + "id": "#VDJ_RSEC_Reads.cwl/RSEC_Reads_Fastq" + } + ], + "baseCommand": "mist_vdj_rsec_reads.py", + "class": "CommandLineTool", + "id": "#VDJ_RSEC_Reads.cwl" + }, + { + "inputs": [ + { + "type": [ + "null", + "Any" + ], + "id": "#VDJ_Settings.cwl/_VDJ_Version" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "outputs": [ + { + "type": [ + "null", + "float" + ], + "id": "#VDJ_Settings.cwl/VDJ_JGene_Evalue" + }, + { + "type": [ + "null", + "float" + ], + "id": "#VDJ_Settings.cwl/VDJ_VGene_Evalue" + }, + { + "type": [ + "null", + "string" + ], + "id": "#VDJ_Settings.cwl/VDJ_Version" + } + ], + "class": "ExpressionTool", + "expression": "${\n var vdjVersion = null;\n if (!inputs._VDJ_Version) {\n vdjVersion = null;}\n else {\n var _VDJ_Version = inputs._VDJ_Version.toLowerCase();\n if (_VDJ_Version === \"human\" || _VDJ_Version === \"hs\" || _VDJ_Version === \"human vdj - bcr and tcr\") {\n vdjVersion = \"human\";\n } else if (_VDJ_Version === \"humanbcr\" || _VDJ_Version === \"human vdj - bcr only\") {\n vdjVersion = \"humanBCR\";\n } else if (_VDJ_Version === \"humantcr\" || _VDJ_Version === \"human vdj - tcr only\") {\n vdjVersion = \"humanTCR\";\n } else if (_VDJ_Version === \"mouse\" || _VDJ_Version === \"mm\" || _VDJ_Version === \"mouse vdj - bcr and tcr\") {\n vdjVersion = \"mouse\";\n } else if (_VDJ_Version === \"mousebcr\" || _VDJ_Version === \"mouse vdj - bcr only\") {\n vdjVersion = \"mouseBCR\";\n } else if (_VDJ_Version === \"mousetcr\" || _VDJ_Version === \"mouse vdj - tcr only\") {\n vdjVersion = \"mouseTCR\";\n } else {\n vdjVersion = inputs._VDJ_Version;\n }\n }\n\n return ({\n VDJ_Version: vdjVersion,\n })\n}", + "id": "#VDJ_Settings.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "position": 1 + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_Trim_Reads.cwl/Valid_Reads_Fastq" + } + ], + "requirements": [ + + ], + "outputs": [ + { + "outputBinding": { + "glob": "cutadapt.log" + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_Trim_Reads.cwl/Trim_Report" + }, + { + "outputBinding": { + "glob": "*vdjtxt.gz" + }, + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#VDJ_Trim_Reads.cwl/Valid_Reads" + } + ], + "baseCommand": "VDJ_Trim_Reads.sh", + "class": "CommandLineTool", + "id": "#VDJ_Trim_Reads.cwl" + }, + { + "inputs": [], + "requirements": [ + + ], + "stdout": "output.txt", + "outputs": [ + { + "outputBinding": { + "glob": "output.txt", + "loadContents": true, + "outputEval": "$(self[0].contents)" + }, + "type": "string", + "id": "#Version.cwl/version" + } + ], + "baseCommand": [ + "mist_version.py" + ], + "id": "#Version.cwl", + "class": "CommandLineTool" + } + ], + "$namespaces": { + "sbg": "https://sevenbridges.com#", + "arv": "http://arvados.org/cwl#" + } +} \ No newline at end of file diff --git a/target/docker/mapping/bd_rhapsody/setup_logger.py b/target/docker/mapping/bd_rhapsody/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/docker/mapping/bd_rhapsody/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/docker/mapping/cellranger_count/.config.vsh.yaml b/target/docker/mapping/cellranger_count/.config.vsh.yaml new file mode 100644 index 00000000000..389d033b2a2 --- /dev/null +++ b/target/docker/mapping/cellranger_count/.config.vsh.yaml @@ -0,0 +1,266 @@ +functionality: + name: "cellranger_count" + namespace: "mapping" + version: "0.12.4" + authors: + - name: "Angela Oliveira Pisco" + roles: + - "author" + info: + role: "Contributor" + links: + github: "aopisco" + orcid: "0000-0003-0142-2355" + linkedin: "aopisco" + organizations: + - name: "Insitro" + href: "https://insitro.com" + role: "Director of Computational Biology" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + - name: "Samuel D'Souza" + roles: + - "author" + info: + role: "Contributor" + links: + github: "srdsam" + linkedin: "samuel-d-souza-887023150/" + organizations: + - name: "Chan Zuckerberg Biohub" + href: "https://www.czbiohub.org" + role: "Data Engineer" + - name: "Robrecht Cannoodt" + roles: + - "author" + - "maintainer" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + argument_groups: + - name: "Inputs" + arguments: + - type: "file" + name: "--input" + description: "The fastq.gz files to align. Can also be a single directory containing\ + \ fastq.gz files." + info: null + example: + - "sample_S1_L001_R1_001.fastq.gz" + - "sample_S1_L001_R2_001.fastq.gz" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "file" + name: "--reference" + description: "The path to Cell Ranger reference tar.gz file. Can also be a directory." + info: null + example: + - "reference.tar.gz" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Outputs" + arguments: + - type: "file" + name: "--output" + description: "The folder to store the alignment results." + info: null + example: + - "/path/to/output" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Arguments" + arguments: + - type: "integer" + name: "--expect_cells" + description: "Expected number of recovered cells, used as input to cell calling\ + \ algorithm." + info: null + example: + - 3000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--chemistry" + description: "Assay configuration.\n- auto: autodetect mode\n- threeprime: Single\ + \ Cell 3'\n- fiveprime: Single Cell 5'\n- SC3Pv1: Single Cell 3' v1\n- SC3Pv2:\ + \ Single Cell 3' v2\n- SC3Pv3: Single Cell 3' v3\n- SC3Pv3LT: Single Cell\ + \ 3' v3 LT\n- SC3Pv3HT: Single Cell 3' v3 HT\n- SC5P-PE: Single Cell 5' paired-end\n\ + - SC5P-R2: Single Cell 5' R2-only\n- SC-FB: Single Cell Antibody-only 3' v2\ + \ or 5'\nSee https://kb.10xgenomics.com/hc/en-us/articles/115003764132-How-does-Cell-Ranger-auto-detect-chemistry-\ + \ for more information.\n" + info: null + default: + - "auto" + required: false + choices: + - "auto" + - "threeprime" + - "fiveprime" + - "SC3Pv1" + - "SC3Pv2" + - "SC3Pv3" + - "SC3Pv3LT" + - "SC3Pv3HT" + - "SC5P-PE" + - "SC5P-R2" + - "SC-FB" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean" + name: "--secondary_analysis" + description: "Whether or not to run the secondary analysis e.g. clustering." + info: null + default: + - false + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean" + name: "--generate_bam" + description: "Whether to generate a BAM file." + info: null + default: + - true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean" + name: "--include_introns" + description: "Include intronic reads in count (default=true unless --target-panel\ + \ is specified in which case default=false)" + info: null + default: + - true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "bash_script" + path: "script.sh" + is_executable: true + description: "Align fastq files using Cell Ranger count." + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/cellranger_tiny_fastq" + - type: "file" + path: "src/utils/setup_logger.py" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "ghcr.io/data-intuitive/cellranger:7.0" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "docker" + run: + - "apt update && apt upgrade -y" + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highmem" + - "highcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/mapping/cellranger_count/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/mapping/cellranger_count" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/mapping/cellranger_count/cellranger_count" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/mapping/cellranger_count/cellranger_count b/target/docker/mapping/cellranger_count/cellranger_count new file mode 100755 index 00000000000..8aa688967d3 --- /dev/null +++ b/target/docker/mapping/cellranger_count/cellranger_count @@ -0,0 +1,1206 @@ +#!/usr/bin/env bash + +# cellranger_count 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Angela Oliveira Pisco (author) +# * Samuel D'Souza (author) +# * Robrecht Cannoodt (author, maintainer) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="cellranger_count" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "cellranger_count 0.12.4" + echo "" + echo "Align fastq files using Cell Ranger count." + echo "" + echo "Inputs:" + echo " --input" + echo " type: file, required parameter, multiple values allowed, file must exist" + echo " example: sample_S1_L001_R1_001.fastq.gz;sample_S1_L001_R2_001.fastq.gz" + echo " The fastq.gz files to align. Can also be a single directory containing" + echo " fastq.gz files." + echo "" + echo " --reference" + echo " type: file, required parameter, file must exist" + echo " example: reference.tar.gz" + echo " The path to Cell Ranger reference tar.gz file. Can also be a directory." + echo "" + echo "Outputs:" + echo " --output" + echo " type: file, required parameter, output, file must exist" + echo " example: /path/to/output" + echo " The folder to store the alignment results." + echo "" + echo "Arguments:" + echo " --expect_cells" + echo " type: integer" + echo " example: 3000" + echo " Expected number of recovered cells, used as input to cell calling" + echo " algorithm." + echo "" + echo " --chemistry" + echo " type: string" + echo " default: auto" + echo " choices: [ auto, threeprime, fiveprime, SC3Pv1, SC3Pv2, SC3Pv3," + echo "SC3Pv3LT, SC3Pv3HT, SC5P-PE, SC5P-R2, SC-FB ]" + echo " Assay configuration." + echo " - auto: autodetect mode" + echo " - threeprime: Single Cell 3'" + echo " - fiveprime: Single Cell 5'" + echo " - SC3Pv1: Single Cell 3' v1" + echo " - SC3Pv2: Single Cell 3' v2" + echo " - SC3Pv3: Single Cell 3' v3" + echo " - SC3Pv3LT: Single Cell 3' v3 LT" + echo " - SC3Pv3HT: Single Cell 3' v3 HT" + echo " - SC5P-PE: Single Cell 5' paired-end" + echo " - SC5P-R2: Single Cell 5' R2-only" + echo " - SC-FB: Single Cell Antibody-only 3' v2 or 5'" + echo " See" + echo " " + echo "https://kb.10xgenomics.com/hc/en-us/articles/115003764132-How-does-Cell-Ranger-auto-detect-chemistry-" + echo " for more information." + echo "" + echo " --secondary_analysis" + echo " type: boolean" + echo " default: false" + echo " Whether or not to run the secondary analysis e.g. clustering." + echo "" + echo " --generate_bam" + echo " type: boolean" + echo " default: true" + echo " Whether to generate a BAM file." + echo "" + echo " --include_introns" + echo " type: boolean" + echo " default: true" + echo " Include intronic reads in count (default=true unless --target-panel is" + echo " specified in which case default=false)" +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM ghcr.io/data-intuitive/cellranger:7.0 + +ENTRYPOINT [] + + +RUN apt update && apt upgrade -y +LABEL org.opencontainers.image.authors="Angela Oliveira Pisco, Samuel D'Souza, Robrecht Cannoodt" +LABEL org.opencontainers.image.description="Companion container for running component mapping cellranger_count" +LABEL org.opencontainers.image.created="2024-01-31T09:08:32Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-cellranger_count-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "cellranger_count 0.12.4" + exit + ;; + --input) + if [ -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT="$2" + else + VIASH_PAR_INPUT="$VIASH_PAR_INPUT;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + if [ -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + else + VIASH_PAR_INPUT="$VIASH_PAR_INPUT;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --reference) + [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reference=*) + [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference=*\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --expect_cells) + [ -n "$VIASH_PAR_EXPECT_CELLS" ] && ViashError Bad arguments for option \'--expect_cells\': \'$VIASH_PAR_EXPECT_CELLS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EXPECT_CELLS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --expect_cells. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --expect_cells=*) + [ -n "$VIASH_PAR_EXPECT_CELLS" ] && ViashError Bad arguments for option \'--expect_cells=*\': \'$VIASH_PAR_EXPECT_CELLS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EXPECT_CELLS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --chemistry) + [ -n "$VIASH_PAR_CHEMISTRY" ] && ViashError Bad arguments for option \'--chemistry\': \'$VIASH_PAR_CHEMISTRY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHEMISTRY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --chemistry. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --chemistry=*) + [ -n "$VIASH_PAR_CHEMISTRY" ] && ViashError Bad arguments for option \'--chemistry=*\': \'$VIASH_PAR_CHEMISTRY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHEMISTRY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --secondary_analysis) + [ -n "$VIASH_PAR_SECONDARY_ANALYSIS" ] && ViashError Bad arguments for option \'--secondary_analysis\': \'$VIASH_PAR_SECONDARY_ANALYSIS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SECONDARY_ANALYSIS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --secondary_analysis. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --secondary_analysis=*) + [ -n "$VIASH_PAR_SECONDARY_ANALYSIS" ] && ViashError Bad arguments for option \'--secondary_analysis=*\': \'$VIASH_PAR_SECONDARY_ANALYSIS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SECONDARY_ANALYSIS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --generate_bam) + [ -n "$VIASH_PAR_GENERATE_BAM" ] && ViashError Bad arguments for option \'--generate_bam\': \'$VIASH_PAR_GENERATE_BAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENERATE_BAM="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --generate_bam. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --generate_bam=*) + [ -n "$VIASH_PAR_GENERATE_BAM" ] && ViashError Bad arguments for option \'--generate_bam=*\': \'$VIASH_PAR_GENERATE_BAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENERATE_BAM=$(ViashRemoveFlags "$1") + shift 1 + ;; + --include_introns) + [ -n "$VIASH_PAR_INCLUDE_INTRONS" ] && ViashError Bad arguments for option \'--include_introns\': \'$VIASH_PAR_INCLUDE_INTRONS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INCLUDE_INTRONS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --include_introns. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --include_introns=*) + [ -n "$VIASH_PAR_INCLUDE_INTRONS" ] && ViashError Bad arguments for option \'--include_introns=*\': \'$VIASH_PAR_INCLUDE_INTRONS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INCLUDE_INTRONS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/mapping_cellranger_count:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/mapping_cellranger_count:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/mapping_cellranger_count:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/mapping_cellranger_count:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_REFERENCE+x} ]; then + ViashError '--reference' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_CHEMISTRY+x} ]; then + VIASH_PAR_CHEMISTRY="auto" +fi +if [ -z ${VIASH_PAR_SECONDARY_ANALYSIS+x} ]; then + VIASH_PAR_SECONDARY_ANALYSIS="false" +fi +if [ -z ${VIASH_PAR_GENERATE_BAM+x} ]; then + VIASH_PAR_GENERATE_BAM="true" +fi +if [ -z ${VIASH_PAR_INCLUDE_INTRONS+x} ]; then + VIASH_PAR_INCLUDE_INTRONS="true" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ]; then + IFS=';' + set -f + for file in $VIASH_PAR_INPUT; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi +if [ ! -z "$VIASH_PAR_REFERENCE" ] && [ ! -e "$VIASH_PAR_REFERENCE" ]; then + ViashError "Input file '$VIASH_PAR_REFERENCE' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_EXPECT_CELLS" ]]; then + if ! [[ "$VIASH_PAR_EXPECT_CELLS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--expect_cells' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SECONDARY_ANALYSIS" ]]; then + if ! [[ "$VIASH_PAR_SECONDARY_ANALYSIS" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--secondary_analysis' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_GENERATE_BAM" ]]; then + if ! [[ "$VIASH_PAR_GENERATE_BAM" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--generate_bam' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_INCLUDE_INTRONS" ]]; then + if ! [[ "$VIASH_PAR_INCLUDE_INTRONS" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--include_introns' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_CHEMISTRY" ]; then + VIASH_PAR_CHEMISTRY_CHOICES=("auto:threeprime:fiveprime:SC3Pv1:SC3Pv2:SC3Pv3:SC3Pv3LT:SC3Pv3HT:SC5P-PE:SC5P-R2:SC-FB") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_CHEMISTRY_CHOICES[*]}:" =~ ":$VIASH_PAR_CHEMISTRY:" ]]; then + ViashError '--chemistry' specified value of \'$VIASH_PAR_CHEMISTRY\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_TEST_INPUT=() + IFS=';' + for var in $VIASH_PAR_INPUT; do + unset IFS + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$var")" ) + var=$(ViashAutodetectMount "$var") + VIASH_TEST_INPUT+=( "$var" ) + done + VIASH_PAR_INPUT=$(IFS=';' ; echo "${VIASH_TEST_INPUT[*]}") +fi +if [ ! -z "$VIASH_PAR_REFERENCE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_REFERENCE")" ) + VIASH_PAR_REFERENCE=$(ViashAutodetectMount "$VIASH_PAR_REFERENCE") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/mapping_cellranger_count:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/mapping_cellranger_count:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/mapping_cellranger_count:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-cellranger_count-XXXXXX").sh +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +#!/bin/bash + +set -eo pipefail + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "${VIASH_PAR_REFERENCE}" | sed "s#'#'\"'\"'#g;s#.*#par_reference='&'#" ; else echo "# par_reference="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_PAR_EXPECT_CELLS+x} ]; then echo "${VIASH_PAR_EXPECT_CELLS}" | sed "s#'#'\"'\"'#g;s#.*#par_expect_cells='&'#" ; else echo "# par_expect_cells="; fi ) +$( if [ ! -z ${VIASH_PAR_CHEMISTRY+x} ]; then echo "${VIASH_PAR_CHEMISTRY}" | sed "s#'#'\"'\"'#g;s#.*#par_chemistry='&'#" ; else echo "# par_chemistry="; fi ) +$( if [ ! -z ${VIASH_PAR_SECONDARY_ANALYSIS+x} ]; then echo "${VIASH_PAR_SECONDARY_ANALYSIS}" | sed "s#'#'\"'\"'#g;s#.*#par_secondary_analysis='&'#" ; else echo "# par_secondary_analysis="; fi ) +$( if [ ! -z ${VIASH_PAR_GENERATE_BAM+x} ]; then echo "${VIASH_PAR_GENERATE_BAM}" | sed "s#'#'\"'\"'#g;s#.*#par_generate_bam='&'#" ; else echo "# par_generate_bam="; fi ) +$( if [ ! -z ${VIASH_PAR_INCLUDE_INTRONS+x} ]; then echo "${VIASH_PAR_INCLUDE_INTRONS}" | sed "s#'#'\"'\"'#g;s#.*#par_include_introns='&'#" ; else echo "# par_include_introns="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) + +## VIASH END + +# just to make sure paths are absolute +par_reference=\`realpath \$par_reference\` +par_output=\`realpath \$par_output\` + +# create temporary directory +tmpdir=\$(mktemp -d "\$meta_temp_dir/\$meta_functionality_name-XXXXXXXX") +function clean_up { + rm -rf "\$tmpdir" +} +trap clean_up EXIT + +# process inputs +# for every fastq file found, make a symlink into the tempdir +fastq_dir="\$tmpdir/fastqs" +mkdir -p "\$fastq_dir" +IFS=";" +for var in \$par_input; do + unset IFS + abs_path=\`realpath \$var\` + if [ -d "\$abs_path" ]; then + find "\$abs_path" -name *.fastq.gz -exec ln -s {} "\$fastq_dir" \\; + else + ln -s "\$abs_path" "\$fastq_dir" + fi +done + +# process reference +if file \$par_reference | grep -q 'gzip compressed data'; then + echo "Untarring genome" + reference_dir="\$tmpdir/fastqs" + mkdir -p "\$reference_dir" + tar -xvf "\$par_reference" -C "\$reference_dir" --strip-components=1 + par_reference="\$reference_dir" +fi + +# cd into tempdir +cd "\$tmpdir" + +# add additional params +extra_params=( ) + +if [ ! -z "\$meta_cpus" ]; then + extra_params+=( "--localcores=\$meta_cpus" ) +fi +if [ ! -z "\$meta_memory_gb" ]; then + # always keep 2gb for the OS itself + memory_gb=\`python -c "print(int('\$meta_memory_gb') - 2)"\` + extra_params+=( "--localmem=\$memory_gb" ) +fi +if [ ! -z "\$par_expect_cells" ]; then + extra_params+=( "--expect-cells=\$par_expect_cells" ) +fi +if [ ! -z "\$par_chemistry" ]; then + extra_params+=( "--chemistry=\$par_chemistry" ) +fi +if [ "\$par_secondary_analysis" == "false" ]; then + extra_params+=( "--nosecondary" ) +fi +if [ "\$par_generate_bam" == "false" ]; then + extra_params+=( "--no-bam" ) +fi +echo "Running cellranger count" + + +id=myoutput +cellranger count \\ + --id "\$id" \\ + --fastqs "\$fastq_dir" \\ + --transcriptome "\$par_reference" \\ + --include-introns "\$par_include_introns" \\ + "\${extra_params[@]}" \\ + --disable-ui \\ + +echo "Copying output" +if [ -d "\$id/outs/" ]; then + if [ ! -d "\$par_output" ]; then + mkdir -p "\$par_output" + fi + mv "\$id/outs/"* "\$par_output" +fi +VIASHMAIN +bash "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + unset VIASH_TEST_INPUT + IFS=';' + for var in $VIASH_PAR_INPUT; do + unset IFS + if [ -z "$VIASH_TEST_INPUT" ]; then + VIASH_TEST_INPUT="$(ViashStripAutomount "$var")" + else + VIASH_TEST_INPUT="$VIASH_TEST_INPUT;""$(ViashStripAutomount "$var")" + fi + done + VIASH_PAR_INPUT="$VIASH_TEST_INPUT" +fi +if [ ! -z "$VIASH_PAR_REFERENCE" ]; then + VIASH_PAR_REFERENCE=$(ViashStripAutomount "$VIASH_PAR_REFERENCE") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/mapping/cellranger_count_split/.config.vsh.yaml b/target/docker/mapping/cellranger_count_split/.config.vsh.yaml new file mode 100644 index 00000000000..2ad5c0f0b1d --- /dev/null +++ b/target/docker/mapping/cellranger_count_split/.config.vsh.yaml @@ -0,0 +1,218 @@ +functionality: + name: "cellranger_count_split" + namespace: "mapping" + version: "0.12.4" + authors: + - name: "Angela Oliveira Pisco" + roles: + - "author" + info: + role: "Contributor" + links: + github: "aopisco" + orcid: "0000-0003-0142-2355" + linkedin: "aopisco" + organizations: + - name: "Insitro" + href: "https://insitro.com" + role: "Director of Computational Biology" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + - name: "Samuel D'Souza" + roles: + - "author" + info: + role: "Contributor" + links: + github: "srdsam" + linkedin: "samuel-d-souza-887023150/" + organizations: + - name: "Chan Zuckerberg Biohub" + href: "https://www.czbiohub.org" + role: "Data Engineer" + - name: "Robrecht Cannoodt" + roles: + - "author" + - "maintainer" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + arguments: + - type: "file" + name: "--input" + description: "Output directory from a Cell Ranger count run." + info: null + example: + - "input_dir" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--filtered_h5" + info: null + example: + - "filtered_feature_bc_matrix.h5" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--metrics_summary" + info: null + example: + - "metrics_summary.csv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--molecule_info" + info: null + example: + - "molecule_info.h5" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--bam" + info: null + example: + - "possorted_genome_bam.bam" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--bai" + info: null + example: + - "possorted_genome_bam.bam.bai" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--raw_h5" + info: null + example: + - "raw_feature_bc_matrix.h5" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "bash_script" + path: "script.sh" + is_executable: true + description: "Split 10x Cell Ranger output directory into separate output fields." + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "ubuntu:jammy" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "docker" + run: + - "apt update && apt upgrade -y" + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/mapping/cellranger_count_split/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/mapping/cellranger_count_split" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/mapping/cellranger_count_split/cellranger_count_split" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/mapping/cellranger_count_split/cellranger_count_split b/target/docker/mapping/cellranger_count_split/cellranger_count_split new file mode 100755 index 00000000000..413dbf8e9bc --- /dev/null +++ b/target/docker/mapping/cellranger_count_split/cellranger_count_split @@ -0,0 +1,1090 @@ +#!/usr/bin/env bash + +# cellranger_count_split 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Angela Oliveira Pisco (author) +# * Samuel D'Souza (author) +# * Robrecht Cannoodt (author, maintainer) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="cellranger_count_split" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "cellranger_count_split 0.12.4" + echo "" + echo "Split 10x Cell Ranger output directory into separate output fields." + echo "" + echo "Arguments:" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " example: input_dir" + echo " Output directory from a Cell Ranger count run." + echo "" + echo " --filtered_h5" + echo " type: file, output, file must exist" + echo " example: filtered_feature_bc_matrix.h5" + echo "" + echo " --metrics_summary" + echo " type: file, output, file must exist" + echo " example: metrics_summary.csv" + echo "" + echo " --molecule_info" + echo " type: file, output, file must exist" + echo " example: molecule_info.h5" + echo "" + echo " --bam" + echo " type: file, output, file must exist" + echo " example: possorted_genome_bam.bam" + echo "" + echo " --bai" + echo " type: file, output, file must exist" + echo " example: possorted_genome_bam.bam.bai" + echo "" + echo " --raw_h5" + echo " type: file, output, file must exist" + echo " example: raw_feature_bc_matrix.h5" +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM ubuntu:jammy + +ENTRYPOINT [] + + +RUN apt update && apt upgrade -y +LABEL org.opencontainers.image.authors="Angela Oliveira Pisco, Samuel D'Souza, Robrecht Cannoodt" +LABEL org.opencontainers.image.description="Companion container for running component mapping cellranger_count_split" +LABEL org.opencontainers.image.created="2024-01-31T09:08:32Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-cellranger_count_split-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "cellranger_count_split 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --filtered_h5) + [ -n "$VIASH_PAR_FILTERED_H5" ] && ViashError Bad arguments for option \'--filtered_h5\': \'$VIASH_PAR_FILTERED_H5\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FILTERED_H5="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --filtered_h5. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --filtered_h5=*) + [ -n "$VIASH_PAR_FILTERED_H5" ] && ViashError Bad arguments for option \'--filtered_h5=*\': \'$VIASH_PAR_FILTERED_H5\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FILTERED_H5=$(ViashRemoveFlags "$1") + shift 1 + ;; + --metrics_summary) + [ -n "$VIASH_PAR_METRICS_SUMMARY" ] && ViashError Bad arguments for option \'--metrics_summary\': \'$VIASH_PAR_METRICS_SUMMARY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_METRICS_SUMMARY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --metrics_summary. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --metrics_summary=*) + [ -n "$VIASH_PAR_METRICS_SUMMARY" ] && ViashError Bad arguments for option \'--metrics_summary=*\': \'$VIASH_PAR_METRICS_SUMMARY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_METRICS_SUMMARY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --molecule_info) + [ -n "$VIASH_PAR_MOLECULE_INFO" ] && ViashError Bad arguments for option \'--molecule_info\': \'$VIASH_PAR_MOLECULE_INFO\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MOLECULE_INFO="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --molecule_info. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --molecule_info=*) + [ -n "$VIASH_PAR_MOLECULE_INFO" ] && ViashError Bad arguments for option \'--molecule_info=*\': \'$VIASH_PAR_MOLECULE_INFO\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MOLECULE_INFO=$(ViashRemoveFlags "$1") + shift 1 + ;; + --bam) + [ -n "$VIASH_PAR_BAM" ] && ViashError Bad arguments for option \'--bam\': \'$VIASH_PAR_BAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BAM="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --bam. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --bam=*) + [ -n "$VIASH_PAR_BAM" ] && ViashError Bad arguments for option \'--bam=*\': \'$VIASH_PAR_BAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BAM=$(ViashRemoveFlags "$1") + shift 1 + ;; + --bai) + [ -n "$VIASH_PAR_BAI" ] && ViashError Bad arguments for option \'--bai\': \'$VIASH_PAR_BAI\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BAI="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --bai. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --bai=*) + [ -n "$VIASH_PAR_BAI" ] && ViashError Bad arguments for option \'--bai=*\': \'$VIASH_PAR_BAI\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BAI=$(ViashRemoveFlags "$1") + shift 1 + ;; + --raw_h5) + [ -n "$VIASH_PAR_RAW_H5" ] && ViashError Bad arguments for option \'--raw_h5\': \'$VIASH_PAR_RAW_H5\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_RAW_H5="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --raw_h5. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --raw_h5=*) + [ -n "$VIASH_PAR_RAW_H5" ] && ViashError Bad arguments for option \'--raw_h5=*\': \'$VIASH_PAR_RAW_H5\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_RAW_H5=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/mapping_cellranger_count_split:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/mapping_cellranger_count_split:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/mapping_cellranger_count_split:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/mapping_cellranger_count_split:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_FILTERED_H5" ] && [ ! -d "$(dirname "$VIASH_PAR_FILTERED_H5")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_FILTERED_H5")" +fi +if [ ! -z "$VIASH_PAR_METRICS_SUMMARY" ] && [ ! -d "$(dirname "$VIASH_PAR_METRICS_SUMMARY")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_METRICS_SUMMARY")" +fi +if [ ! -z "$VIASH_PAR_MOLECULE_INFO" ] && [ ! -d "$(dirname "$VIASH_PAR_MOLECULE_INFO")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_MOLECULE_INFO")" +fi +if [ ! -z "$VIASH_PAR_BAM" ] && [ ! -d "$(dirname "$VIASH_PAR_BAM")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_BAM")" +fi +if [ ! -z "$VIASH_PAR_BAI" ] && [ ! -d "$(dirname "$VIASH_PAR_BAI")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_BAI")" +fi +if [ ! -z "$VIASH_PAR_RAW_H5" ] && [ ! -d "$(dirname "$VIASH_PAR_RAW_H5")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_RAW_H5")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_FILTERED_H5" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_FILTERED_H5")" ) + VIASH_PAR_FILTERED_H5=$(ViashAutodetectMount "$VIASH_PAR_FILTERED_H5") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_FILTERED_H5" ) +fi +if [ ! -z "$VIASH_PAR_METRICS_SUMMARY" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_METRICS_SUMMARY")" ) + VIASH_PAR_METRICS_SUMMARY=$(ViashAutodetectMount "$VIASH_PAR_METRICS_SUMMARY") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_METRICS_SUMMARY" ) +fi +if [ ! -z "$VIASH_PAR_MOLECULE_INFO" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_MOLECULE_INFO")" ) + VIASH_PAR_MOLECULE_INFO=$(ViashAutodetectMount "$VIASH_PAR_MOLECULE_INFO") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_MOLECULE_INFO" ) +fi +if [ ! -z "$VIASH_PAR_BAM" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_BAM")" ) + VIASH_PAR_BAM=$(ViashAutodetectMount "$VIASH_PAR_BAM") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_BAM" ) +fi +if [ ! -z "$VIASH_PAR_BAI" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_BAI")" ) + VIASH_PAR_BAI=$(ViashAutodetectMount "$VIASH_PAR_BAI") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_BAI" ) +fi +if [ ! -z "$VIASH_PAR_RAW_H5" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_RAW_H5")" ) + VIASH_PAR_RAW_H5=$(ViashAutodetectMount "$VIASH_PAR_RAW_H5") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_RAW_H5" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/mapping_cellranger_count_split:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/mapping_cellranger_count_split:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/mapping_cellranger_count_split:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-cellranger_count_split-XXXXXX").sh +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +#!/bin/bash + +set -eo pipefail + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_FILTERED_H5+x} ]; then echo "${VIASH_PAR_FILTERED_H5}" | sed "s#'#'\"'\"'#g;s#.*#par_filtered_h5='&'#" ; else echo "# par_filtered_h5="; fi ) +$( if [ ! -z ${VIASH_PAR_METRICS_SUMMARY+x} ]; then echo "${VIASH_PAR_METRICS_SUMMARY}" | sed "s#'#'\"'\"'#g;s#.*#par_metrics_summary='&'#" ; else echo "# par_metrics_summary="; fi ) +$( if [ ! -z ${VIASH_PAR_MOLECULE_INFO+x} ]; then echo "${VIASH_PAR_MOLECULE_INFO}" | sed "s#'#'\"'\"'#g;s#.*#par_molecule_info='&'#" ; else echo "# par_molecule_info="; fi ) +$( if [ ! -z ${VIASH_PAR_BAM+x} ]; then echo "${VIASH_PAR_BAM}" | sed "s#'#'\"'\"'#g;s#.*#par_bam='&'#" ; else echo "# par_bam="; fi ) +$( if [ ! -z ${VIASH_PAR_BAI+x} ]; then echo "${VIASH_PAR_BAI}" | sed "s#'#'\"'\"'#g;s#.*#par_bai='&'#" ; else echo "# par_bai="; fi ) +$( if [ ! -z ${VIASH_PAR_RAW_H5+x} ]; then echo "${VIASH_PAR_RAW_H5}" | sed "s#'#'\"'\"'#g;s#.*#par_raw_h5='&'#" ; else echo "# par_raw_h5="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) + +## VIASH END + +filtered_h5="\$par_input/filtered_feature_bc_matrix.h5" +if [ -f "\$filtered_h5" ] && [ ! -z "\$par_filtered_h5" ]; then + echo "+ cp \$filtered_h5 \$par_filtered_h5" + cp "\$filtered_h5" "\$par_filtered_h5" +fi + +metrics_summary="\$par_input/metrics_summary.csv" +if [ -f "\$metrics_summary" ] && [ ! -z "\$par_metrics_summary" ]; then + echo "+ cp \$metrics_summary \$par_metrics_summary" + cp "\$metrics_summary" "\$par_metrics_summary" +fi + +molecule_info="\$par_input/molecule_info.h5" +if [ -f "\$molecule_info" ] && [ ! -z "\$par_molecule_info" ]; then + echo "+ cp \$molecule_info \$par_molecule_info" + cp "\$molecule_info" "\$par_molecule_info" +fi + +bam="\$par_input/possorted_genome_bam.bam" +if [ -f "\$bam" ] && [ ! -z "\$par_bam" ]; then + echo "cp \$bam \$par_bam" + cp "\$bam" "\$par_bam" +fi + +raw_h5="\$par_input/raw_feature_bc_matrix.h5" +if [ -f "\$raw_h5" ] && [ ! -z "\$par_raw_h5" ]; then + echo "+ cp \$raw_h5 \$par_raw_h5" + cp "\$raw_h5" "\$par_raw_h5" +fi + +bai="\$par_input/possorted_genome_bam.bam.bai" +if [ -f "\$bai" ] && [ ! -z "\$par_bai" ]; then + echo "+ cp \$bai \$par_bai" + cp "\$bai" "\$par_bai" +fi +VIASHMAIN +bash "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_FILTERED_H5" ]; then + VIASH_PAR_FILTERED_H5=$(ViashStripAutomount "$VIASH_PAR_FILTERED_H5") +fi +if [ ! -z "$VIASH_PAR_METRICS_SUMMARY" ]; then + VIASH_PAR_METRICS_SUMMARY=$(ViashStripAutomount "$VIASH_PAR_METRICS_SUMMARY") +fi +if [ ! -z "$VIASH_PAR_MOLECULE_INFO" ]; then + VIASH_PAR_MOLECULE_INFO=$(ViashStripAutomount "$VIASH_PAR_MOLECULE_INFO") +fi +if [ ! -z "$VIASH_PAR_BAM" ]; then + VIASH_PAR_BAM=$(ViashStripAutomount "$VIASH_PAR_BAM") +fi +if [ ! -z "$VIASH_PAR_BAI" ]; then + VIASH_PAR_BAI=$(ViashStripAutomount "$VIASH_PAR_BAI") +fi +if [ ! -z "$VIASH_PAR_RAW_H5" ]; then + VIASH_PAR_RAW_H5=$(ViashStripAutomount "$VIASH_PAR_RAW_H5") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_FILTERED_H5" ] && [ ! -e "$VIASH_PAR_FILTERED_H5" ]; then + ViashError "Output file '$VIASH_PAR_FILTERED_H5' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_METRICS_SUMMARY" ] && [ ! -e "$VIASH_PAR_METRICS_SUMMARY" ]; then + ViashError "Output file '$VIASH_PAR_METRICS_SUMMARY' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_MOLECULE_INFO" ] && [ ! -e "$VIASH_PAR_MOLECULE_INFO" ]; then + ViashError "Output file '$VIASH_PAR_MOLECULE_INFO' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_BAM" ] && [ ! -e "$VIASH_PAR_BAM" ]; then + ViashError "Output file '$VIASH_PAR_BAM' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_BAI" ] && [ ! -e "$VIASH_PAR_BAI" ]; then + ViashError "Output file '$VIASH_PAR_BAI' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_RAW_H5" ] && [ ! -e "$VIASH_PAR_RAW_H5" ]; then + ViashError "Output file '$VIASH_PAR_RAW_H5' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/mapping/cellranger_multi/.config.vsh.yaml b/target/docker/mapping/cellranger_multi/.config.vsh.yaml new file mode 100644 index 00000000000..f593c404969 --- /dev/null +++ b/target/docker/mapping/cellranger_multi/.config.vsh.yaml @@ -0,0 +1,423 @@ +functionality: + name: "cellranger_multi" + namespace: "mapping" + version: "0.12.4" + authors: + - name: "Angela Oliveira Pisco" + roles: + - "author" + info: + role: "Contributor" + links: + github: "aopisco" + orcid: "0000-0003-0142-2355" + linkedin: "aopisco" + organizations: + - name: "Insitro" + href: "https://insitro.com" + role: "Director of Computational Biology" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + - name: "Robrecht Cannoodt" + roles: + - "author" + - "maintainer" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + - name: "Dries De Maeyer" + roles: + - "author" + info: + role: "Core Team Member" + links: + email: "ddemaeyer@gmail.com" + github: "ddemaeyer" + linkedin: "dries-de-maeyer-b46a814" + organizations: + - name: "Janssen Pharmaceuticals" + href: "https://www.janssen.com" + role: "Principal Scientist" + argument_groups: + - name: "Input files" + arguments: + - type: "file" + name: "--input" + description: "The FASTQ files to be analyzed. FASTQ files should conform to\ + \ the naming conventions of bcl2fastq and mkfastq:\n`[Sample Name]_S[Sample\ + \ Index]_L00[Lane Number]_[Read Type]_001.fastq.gz`\n" + info: null + example: + - "mysample_S1_L001_R1_001.fastq.gz" + - "mysample_S1_L001_R2_001.fastq.gz" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "file" + name: "--gex_reference" + description: "Genome refence index built by Cell Ranger mkref." + info: null + example: + - "reference_genome.tar.gz" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--vdj_reference" + description: "VDJ refence index built by Cell Ranger mkref." + info: null + example: + - "reference_vdj.tar.gz" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--vdj_inner_enrichment_primers" + description: "V(D)J Immune Profiling libraries: if inner enrichment primers\ + \ other than those provided \nin the 10x Genomics kits are used, they need\ + \ to be specified here as a\ntext file with one primer per line.\n" + info: null + example: + - "enrichment_primers.txt" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--feature_reference" + description: "Path to the Feature reference CSV file, declaring Feature Barcode\ + \ constructs and associated barcodes. Required only for Antibody Capture or\ + \ CRISPR Guide Capture libraries. See https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/feature-bc-analysis#feature-ref\ + \ for more information." + info: null + example: + - "feature_reference.csv" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Library arguments" + arguments: + - type: "string" + name: "--library_id" + description: "The Illumina sample name to analyze. This must exactly match the\ + \ 'Sample Name' part of the FASTQ files specified in the `--input` argument." + info: null + example: + - "mysample1" + required: true + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--library_type" + description: "The underlying feature type of the library.\nPossible values:\ + \ \"Gene Expression\", \"VDJ\", \"VDJ-T\", \"VDJ-B\", \"Antibody Capture\"\ + , \"CRISPR Guide Capture\", \"Multiplexing Capture\"\n" + info: null + example: + - "Gene Expression" + required: true + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--library_subsample" + description: "Optional. The rate at which reads from the provided FASTQ files\ + \ are sampled. Must be strictly greater than 0 and less than or equal to 1." + info: null + example: + - "0.5" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--library_lanes" + description: "Lanes associated with this sample. Defaults to using all lanes." + info: null + example: + - "1-4" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - name: "Gene expression arguments" + description: "Arguments relevant to the analysis of gene expression data." + arguments: + - type: "integer" + name: "--gex_expect_cells" + description: "Expected number of recovered cells, used as input to cell calling\ + \ algorithm." + info: null + example: + - 3000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--gex_chemistry" + description: "Assay configuration.\n- auto: autodetect mode\n- threeprime: Single\ + \ Cell 3'\n- fiveprime: Single Cell 5'\n- SC3Pv1: Single Cell 3' v1\n- SC3Pv2:\ + \ Single Cell 3' v2\n- SC3Pv3: Single Cell 3' v3\n- SC3Pv3LT: Single Cell\ + \ 3' v3 LT\n- SC3Pv3HT: Single Cell 3' v3 HT\n- SC5P-PE: Single Cell 5' paired-end\n\ + - SC5P-R2: Single Cell 5' R2-only\n- SC-FB: Single Cell Antibody-only 3' v2\ + \ or 5'\nSee https://kb.10xgenomics.com/hc/en-us/articles/115003764132-How-does-Cell-Ranger-auto-detect-chemistry-\ + \ for more information.\n" + info: null + default: + - "auto" + required: false + choices: + - "auto" + - "threeprime" + - "fiveprime" + - "SC3Pv1" + - "SC3Pv2" + - "SC3Pv3" + - "SC3Pv3LT" + - "SC3Pv3HT" + - "SC5P-PE" + - "SC5P-R2" + - "SC-FB" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean" + name: "--gex_secondary_analysis" + description: "Whether or not to run the secondary analysis e.g. clustering." + info: null + default: + - false + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean" + name: "--gex_generate_bam" + description: "Whether to generate a BAM file." + info: null + default: + - false + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean" + name: "--gex_include_introns" + description: "Include intronic reads in count (default=true unless --target-panel\ + \ is specified in which case default=false)" + info: null + default: + - true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Cell multiplexing parameters" + description: "Arguments related to cell multiplexing." + arguments: + - type: "string" + name: "--cell_multiplex_sample_id" + description: "A name to identify a multiplexed sample. Must be alphanumeric\ + \ with hyphens and/or underscores, and less than 64 characters. Required for\ + \ Cell Multiplexing libraries." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--cell_multiplex_oligo_ids" + description: "The Cell Multiplexing oligo IDs used to multiplex this sample.\ + \ If multiple CMOs were used for a sample, separate IDs with a pipe (e.g.,\ + \ CMO301|CMO302). Required for Cell Multiplexing libraries." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--cell_multiplex_description" + description: "A description for the sample." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Outputs" + arguments: + - type: "file" + name: "--output" + description: "The folder to store the alignment results." + info: null + example: + - "/path/to/output" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Executor arguments" + arguments: + - type: "boolean_true" + name: "--dryrun" + description: "If true, the output directory will only contain the CWL input\ + \ files, but the pipeline itself will not be executed." + info: null + direction: "input" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Align fastq files using Cell Ranger multi." + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/10x_5k_anticmv/raw/" + dest: "10x_5k_anticmv/raw/" + - type: "file" + path: "resources_test/10x_5k_lung_crispr/raw/" + dest: "10x_5k_lung_crispr/raw/" + - type: "file" + path: "resources_test/reference_gencodev41_chr1" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "ghcr.io/data-intuitive/cellranger:7.0" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "docker" + run: + - "DEBIAN_FRONTEND=noninteractive apt update && apt upgrade -y && rm -rf /var/lib/apt/lists/*" + - type: "python" + user: false + packages: + - "pandas" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "veryhighmem" + - "highcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/mapping/cellranger_multi/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/mapping/cellranger_multi" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/mapping/cellranger_multi/cellranger_multi" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/mapping/cellranger_multi/cellranger_multi b/target/docker/mapping/cellranger_multi/cellranger_multi new file mode 100755 index 00000000000..9822c2b2112 --- /dev/null +++ b/target/docker/mapping/cellranger_multi/cellranger_multi @@ -0,0 +1,1681 @@ +#!/usr/bin/env bash + +# cellranger_multi 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Angela Oliveira Pisco (author) +# * Robrecht Cannoodt (author, maintainer) +# * Dries De Maeyer (author) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="cellranger_multi" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "cellranger_multi 0.12.4" + echo "" + echo "Align fastq files using Cell Ranger multi." + echo "" + echo "Input files:" + echo " --input" + echo " type: file, required parameter, multiple values allowed, file must exist" + echo " example:" + echo "mysample_S1_L001_R1_001.fastq.gz;mysample_S1_L001_R2_001.fastq.gz" + echo " The FASTQ files to be analyzed. FASTQ files should conform to the naming" + echo " conventions of bcl2fastq and mkfastq:" + echo " \`[Sample Name]_S[Sample Index]_L00[Lane Number]_[Read" + echo " Type]_001.fastq.gz\`" + echo "" + echo " --gex_reference" + echo " type: file, required parameter, file must exist" + echo " example: reference_genome.tar.gz" + echo " Genome refence index built by Cell Ranger mkref." + echo "" + echo " --vdj_reference" + echo " type: file, file must exist" + echo " example: reference_vdj.tar.gz" + echo " VDJ refence index built by Cell Ranger mkref." + echo "" + echo " --vdj_inner_enrichment_primers" + echo " type: file, file must exist" + echo " example: enrichment_primers.txt" + echo " V(D)J Immune Profiling libraries: if inner enrichment primers other than" + echo " those provided" + echo " in the 10x Genomics kits are used, they need to be specified here as a" + echo " text file with one primer per line." + echo "" + echo " --feature_reference" + echo " type: file, file must exist" + echo " example: feature_reference.csv" + echo " Path to the Feature reference CSV file, declaring Feature Barcode" + echo " constructs and associated barcodes. Required only for Antibody Capture" + echo " or CRISPR Guide Capture libraries. See" + echo " " + echo "https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/feature-bc-analysis#feature-ref" + echo " for more information." + echo "" + echo "Library arguments:" + echo " --library_id" + echo " type: string, required parameter, multiple values allowed" + echo " example: mysample1" + echo " The Illumina sample name to analyze. This must exactly match the 'Sample" + echo " Name' part of the FASTQ files specified in the \`--input\` argument." + echo "" + echo " --library_type" + echo " type: string, required parameter, multiple values allowed" + echo " example: Gene Expression" + echo " The underlying feature type of the library." + echo " Possible values: \"Gene Expression\", \"VDJ\", \"VDJ-T\", \"VDJ-B\", \"Antibody" + echo " Capture\", \"CRISPR Guide Capture\", \"Multiplexing Capture\"" + echo "" + echo " --library_subsample" + echo " type: string, multiple values allowed" + echo " example: 0.5" + echo " Optional. The rate at which reads from the provided FASTQ files are" + echo " sampled. Must be strictly greater than 0 and less than or equal to 1." + echo "" + echo " --library_lanes" + echo " type: string, multiple values allowed" + echo " example: 1-4" + echo " Lanes associated with this sample. Defaults to using all lanes." + echo "" + echo "Gene expression arguments:" + echo " Arguments relevant to the analysis of gene expression data." + echo "" + echo " --gex_expect_cells" + echo " type: integer" + echo " example: 3000" + echo " Expected number of recovered cells, used as input to cell calling" + echo " algorithm." + echo "" + echo " --gex_chemistry" + echo " type: string" + echo " default: auto" + echo " choices: [ auto, threeprime, fiveprime, SC3Pv1, SC3Pv2, SC3Pv3," + echo "SC3Pv3LT, SC3Pv3HT, SC5P-PE, SC5P-R2, SC-FB ]" + echo " Assay configuration." + echo " - auto: autodetect mode" + echo " - threeprime: Single Cell 3'" + echo " - fiveprime: Single Cell 5'" + echo " - SC3Pv1: Single Cell 3' v1" + echo " - SC3Pv2: Single Cell 3' v2" + echo " - SC3Pv3: Single Cell 3' v3" + echo " - SC3Pv3LT: Single Cell 3' v3 LT" + echo " - SC3Pv3HT: Single Cell 3' v3 HT" + echo " - SC5P-PE: Single Cell 5' paired-end" + echo " - SC5P-R2: Single Cell 5' R2-only" + echo " - SC-FB: Single Cell Antibody-only 3' v2 or 5'" + echo " See" + echo " " + echo "https://kb.10xgenomics.com/hc/en-us/articles/115003764132-How-does-Cell-Ranger-auto-detect-chemistry-" + echo " for more information." + echo "" + echo " --gex_secondary_analysis" + echo " type: boolean" + echo " default: false" + echo " Whether or not to run the secondary analysis e.g. clustering." + echo "" + echo " --gex_generate_bam" + echo " type: boolean" + echo " default: false" + echo " Whether to generate a BAM file." + echo "" + echo " --gex_include_introns" + echo " type: boolean" + echo " default: true" + echo " Include intronic reads in count (default=true unless --target-panel is" + echo " specified in which case default=false)" + echo "" + echo "Cell multiplexing parameters:" + echo " Arguments related to cell multiplexing." + echo "" + echo " --cell_multiplex_sample_id" + echo " type: string" + echo " A name to identify a multiplexed sample. Must be alphanumeric with" + echo " hyphens and/or underscores, and less than 64 characters. Required for" + echo " Cell Multiplexing libraries." + echo "" + echo " --cell_multiplex_oligo_ids" + echo " type: string" + echo " The Cell Multiplexing oligo IDs used to multiplex this sample. If" + echo " multiple CMOs were used for a sample, separate IDs with a pipe (e.g.," + echo " CMO301|CMO302). Required for Cell Multiplexing libraries." + echo "" + echo " --cell_multiplex_description" + echo " type: string" + echo " A description for the sample." + echo "" + echo "Outputs:" + echo " --output" + echo " type: file, required parameter, output, file must exist" + echo " example: /path/to/output" + echo " The folder to store the alignment results." + echo "" + echo "Executor arguments:" + echo " --dryrun" + echo " type: boolean_true" + echo " If true, the output directory will only contain the CWL input files, but" + echo " the pipeline itself will not be executed." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM ghcr.io/data-intuitive/cellranger:7.0 + +ENTRYPOINT [] + + +RUN DEBIAN_FRONTEND=noninteractive apt update && apt upgrade -y && rm -rf /var/lib/apt/lists/* +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "pandas" + +LABEL org.opencontainers.image.authors="Angela Oliveira Pisco, Robrecht Cannoodt, Dries De Maeyer" +LABEL org.opencontainers.image.description="Companion container for running component mapping cellranger_multi" +LABEL org.opencontainers.image.created="2024-01-31T09:08:33Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-cellranger_multi-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "cellranger_multi 0.12.4" + exit + ;; + --input) + if [ -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT="$2" + else + VIASH_PAR_INPUT="$VIASH_PAR_INPUT;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + if [ -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + else + VIASH_PAR_INPUT="$VIASH_PAR_INPUT;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --gex_reference) + [ -n "$VIASH_PAR_GEX_REFERENCE" ] && ViashError Bad arguments for option \'--gex_reference\': \'$VIASH_PAR_GEX_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GEX_REFERENCE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --gex_reference. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --gex_reference=*) + [ -n "$VIASH_PAR_GEX_REFERENCE" ] && ViashError Bad arguments for option \'--gex_reference=*\': \'$VIASH_PAR_GEX_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GEX_REFERENCE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --vdj_reference) + [ -n "$VIASH_PAR_VDJ_REFERENCE" ] && ViashError Bad arguments for option \'--vdj_reference\': \'$VIASH_PAR_VDJ_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_VDJ_REFERENCE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --vdj_reference. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --vdj_reference=*) + [ -n "$VIASH_PAR_VDJ_REFERENCE" ] && ViashError Bad arguments for option \'--vdj_reference=*\': \'$VIASH_PAR_VDJ_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_VDJ_REFERENCE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --vdj_inner_enrichment_primers) + [ -n "$VIASH_PAR_VDJ_INNER_ENRICHMENT_PRIMERS" ] && ViashError Bad arguments for option \'--vdj_inner_enrichment_primers\': \'$VIASH_PAR_VDJ_INNER_ENRICHMENT_PRIMERS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_VDJ_INNER_ENRICHMENT_PRIMERS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --vdj_inner_enrichment_primers. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --vdj_inner_enrichment_primers=*) + [ -n "$VIASH_PAR_VDJ_INNER_ENRICHMENT_PRIMERS" ] && ViashError Bad arguments for option \'--vdj_inner_enrichment_primers=*\': \'$VIASH_PAR_VDJ_INNER_ENRICHMENT_PRIMERS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_VDJ_INNER_ENRICHMENT_PRIMERS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --feature_reference) + [ -n "$VIASH_PAR_FEATURE_REFERENCE" ] && ViashError Bad arguments for option \'--feature_reference\': \'$VIASH_PAR_FEATURE_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FEATURE_REFERENCE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --feature_reference. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --feature_reference=*) + [ -n "$VIASH_PAR_FEATURE_REFERENCE" ] && ViashError Bad arguments for option \'--feature_reference=*\': \'$VIASH_PAR_FEATURE_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FEATURE_REFERENCE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --library_id) + if [ -z "$VIASH_PAR_LIBRARY_ID" ]; then + VIASH_PAR_LIBRARY_ID="$2" + else + VIASH_PAR_LIBRARY_ID="$VIASH_PAR_LIBRARY_ID;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --library_id. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --library_id=*) + if [ -z "$VIASH_PAR_LIBRARY_ID" ]; then + VIASH_PAR_LIBRARY_ID=$(ViashRemoveFlags "$1") + else + VIASH_PAR_LIBRARY_ID="$VIASH_PAR_LIBRARY_ID;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --library_type) + if [ -z "$VIASH_PAR_LIBRARY_TYPE" ]; then + VIASH_PAR_LIBRARY_TYPE="$2" + else + VIASH_PAR_LIBRARY_TYPE="$VIASH_PAR_LIBRARY_TYPE;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --library_type. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --library_type=*) + if [ -z "$VIASH_PAR_LIBRARY_TYPE" ]; then + VIASH_PAR_LIBRARY_TYPE=$(ViashRemoveFlags "$1") + else + VIASH_PAR_LIBRARY_TYPE="$VIASH_PAR_LIBRARY_TYPE;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --library_subsample) + if [ -z "$VIASH_PAR_LIBRARY_SUBSAMPLE" ]; then + VIASH_PAR_LIBRARY_SUBSAMPLE="$2" + else + VIASH_PAR_LIBRARY_SUBSAMPLE="$VIASH_PAR_LIBRARY_SUBSAMPLE;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --library_subsample. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --library_subsample=*) + if [ -z "$VIASH_PAR_LIBRARY_SUBSAMPLE" ]; then + VIASH_PAR_LIBRARY_SUBSAMPLE=$(ViashRemoveFlags "$1") + else + VIASH_PAR_LIBRARY_SUBSAMPLE="$VIASH_PAR_LIBRARY_SUBSAMPLE;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --library_lanes) + if [ -z "$VIASH_PAR_LIBRARY_LANES" ]; then + VIASH_PAR_LIBRARY_LANES="$2" + else + VIASH_PAR_LIBRARY_LANES="$VIASH_PAR_LIBRARY_LANES;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --library_lanes. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --library_lanes=*) + if [ -z "$VIASH_PAR_LIBRARY_LANES" ]; then + VIASH_PAR_LIBRARY_LANES=$(ViashRemoveFlags "$1") + else + VIASH_PAR_LIBRARY_LANES="$VIASH_PAR_LIBRARY_LANES;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --gex_expect_cells) + [ -n "$VIASH_PAR_GEX_EXPECT_CELLS" ] && ViashError Bad arguments for option \'--gex_expect_cells\': \'$VIASH_PAR_GEX_EXPECT_CELLS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GEX_EXPECT_CELLS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --gex_expect_cells. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --gex_expect_cells=*) + [ -n "$VIASH_PAR_GEX_EXPECT_CELLS" ] && ViashError Bad arguments for option \'--gex_expect_cells=*\': \'$VIASH_PAR_GEX_EXPECT_CELLS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GEX_EXPECT_CELLS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --gex_chemistry) + [ -n "$VIASH_PAR_GEX_CHEMISTRY" ] && ViashError Bad arguments for option \'--gex_chemistry\': \'$VIASH_PAR_GEX_CHEMISTRY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GEX_CHEMISTRY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --gex_chemistry. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --gex_chemistry=*) + [ -n "$VIASH_PAR_GEX_CHEMISTRY" ] && ViashError Bad arguments for option \'--gex_chemistry=*\': \'$VIASH_PAR_GEX_CHEMISTRY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GEX_CHEMISTRY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --gex_secondary_analysis) + [ -n "$VIASH_PAR_GEX_SECONDARY_ANALYSIS" ] && ViashError Bad arguments for option \'--gex_secondary_analysis\': \'$VIASH_PAR_GEX_SECONDARY_ANALYSIS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GEX_SECONDARY_ANALYSIS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --gex_secondary_analysis. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --gex_secondary_analysis=*) + [ -n "$VIASH_PAR_GEX_SECONDARY_ANALYSIS" ] && ViashError Bad arguments for option \'--gex_secondary_analysis=*\': \'$VIASH_PAR_GEX_SECONDARY_ANALYSIS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GEX_SECONDARY_ANALYSIS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --gex_generate_bam) + [ -n "$VIASH_PAR_GEX_GENERATE_BAM" ] && ViashError Bad arguments for option \'--gex_generate_bam\': \'$VIASH_PAR_GEX_GENERATE_BAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GEX_GENERATE_BAM="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --gex_generate_bam. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --gex_generate_bam=*) + [ -n "$VIASH_PAR_GEX_GENERATE_BAM" ] && ViashError Bad arguments for option \'--gex_generate_bam=*\': \'$VIASH_PAR_GEX_GENERATE_BAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GEX_GENERATE_BAM=$(ViashRemoveFlags "$1") + shift 1 + ;; + --gex_include_introns) + [ -n "$VIASH_PAR_GEX_INCLUDE_INTRONS" ] && ViashError Bad arguments for option \'--gex_include_introns\': \'$VIASH_PAR_GEX_INCLUDE_INTRONS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GEX_INCLUDE_INTRONS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --gex_include_introns. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --gex_include_introns=*) + [ -n "$VIASH_PAR_GEX_INCLUDE_INTRONS" ] && ViashError Bad arguments for option \'--gex_include_introns=*\': \'$VIASH_PAR_GEX_INCLUDE_INTRONS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GEX_INCLUDE_INTRONS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --cell_multiplex_sample_id) + [ -n "$VIASH_PAR_CELL_MULTIPLEX_SAMPLE_ID" ] && ViashError Bad arguments for option \'--cell_multiplex_sample_id\': \'$VIASH_PAR_CELL_MULTIPLEX_SAMPLE_ID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CELL_MULTIPLEX_SAMPLE_ID="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --cell_multiplex_sample_id. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --cell_multiplex_sample_id=*) + [ -n "$VIASH_PAR_CELL_MULTIPLEX_SAMPLE_ID" ] && ViashError Bad arguments for option \'--cell_multiplex_sample_id=*\': \'$VIASH_PAR_CELL_MULTIPLEX_SAMPLE_ID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CELL_MULTIPLEX_SAMPLE_ID=$(ViashRemoveFlags "$1") + shift 1 + ;; + --cell_multiplex_oligo_ids) + [ -n "$VIASH_PAR_CELL_MULTIPLEX_OLIGO_IDS" ] && ViashError Bad arguments for option \'--cell_multiplex_oligo_ids\': \'$VIASH_PAR_CELL_MULTIPLEX_OLIGO_IDS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CELL_MULTIPLEX_OLIGO_IDS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --cell_multiplex_oligo_ids. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --cell_multiplex_oligo_ids=*) + [ -n "$VIASH_PAR_CELL_MULTIPLEX_OLIGO_IDS" ] && ViashError Bad arguments for option \'--cell_multiplex_oligo_ids=*\': \'$VIASH_PAR_CELL_MULTIPLEX_OLIGO_IDS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CELL_MULTIPLEX_OLIGO_IDS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --cell_multiplex_description) + [ -n "$VIASH_PAR_CELL_MULTIPLEX_DESCRIPTION" ] && ViashError Bad arguments for option \'--cell_multiplex_description\': \'$VIASH_PAR_CELL_MULTIPLEX_DESCRIPTION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CELL_MULTIPLEX_DESCRIPTION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --cell_multiplex_description. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --cell_multiplex_description=*) + [ -n "$VIASH_PAR_CELL_MULTIPLEX_DESCRIPTION" ] && ViashError Bad arguments for option \'--cell_multiplex_description=*\': \'$VIASH_PAR_CELL_MULTIPLEX_DESCRIPTION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CELL_MULTIPLEX_DESCRIPTION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --dryrun) + [ -n "$VIASH_PAR_DRYRUN" ] && ViashError Bad arguments for option \'--dryrun\': \'$VIASH_PAR_DRYRUN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DRYRUN=true + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/mapping_cellranger_multi:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/mapping_cellranger_multi:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/mapping_cellranger_multi:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/mapping_cellranger_multi:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_GEX_REFERENCE+x} ]; then + ViashError '--gex_reference' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_LIBRARY_ID+x} ]; then + ViashError '--library_id' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_LIBRARY_TYPE+x} ]; then + ViashError '--library_type' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_GEX_CHEMISTRY+x} ]; then + VIASH_PAR_GEX_CHEMISTRY="auto" +fi +if [ -z ${VIASH_PAR_GEX_SECONDARY_ANALYSIS+x} ]; then + VIASH_PAR_GEX_SECONDARY_ANALYSIS="false" +fi +if [ -z ${VIASH_PAR_GEX_GENERATE_BAM+x} ]; then + VIASH_PAR_GEX_GENERATE_BAM="false" +fi +if [ -z ${VIASH_PAR_GEX_INCLUDE_INTRONS+x} ]; then + VIASH_PAR_GEX_INCLUDE_INTRONS="true" +fi +if [ -z ${VIASH_PAR_DRYRUN+x} ]; then + VIASH_PAR_DRYRUN="false" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ]; then + IFS=';' + set -f + for file in $VIASH_PAR_INPUT; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi +if [ ! -z "$VIASH_PAR_GEX_REFERENCE" ] && [ ! -e "$VIASH_PAR_GEX_REFERENCE" ]; then + ViashError "Input file '$VIASH_PAR_GEX_REFERENCE' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_VDJ_REFERENCE" ] && [ ! -e "$VIASH_PAR_VDJ_REFERENCE" ]; then + ViashError "Input file '$VIASH_PAR_VDJ_REFERENCE' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_VDJ_INNER_ENRICHMENT_PRIMERS" ] && [ ! -e "$VIASH_PAR_VDJ_INNER_ENRICHMENT_PRIMERS" ]; then + ViashError "Input file '$VIASH_PAR_VDJ_INNER_ENRICHMENT_PRIMERS' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_FEATURE_REFERENCE" ] && [ ! -e "$VIASH_PAR_FEATURE_REFERENCE" ]; then + ViashError "Input file '$VIASH_PAR_FEATURE_REFERENCE' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_GEX_EXPECT_CELLS" ]]; then + if ! [[ "$VIASH_PAR_GEX_EXPECT_CELLS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--gex_expect_cells' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_GEX_SECONDARY_ANALYSIS" ]]; then + if ! [[ "$VIASH_PAR_GEX_SECONDARY_ANALYSIS" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--gex_secondary_analysis' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_GEX_GENERATE_BAM" ]]; then + if ! [[ "$VIASH_PAR_GEX_GENERATE_BAM" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--gex_generate_bam' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_GEX_INCLUDE_INTRONS" ]]; then + if ! [[ "$VIASH_PAR_GEX_INCLUDE_INTRONS" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--gex_include_introns' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_DRYRUN" ]]; then + if ! [[ "$VIASH_PAR_DRYRUN" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--dryrun' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_GEX_CHEMISTRY" ]; then + VIASH_PAR_GEX_CHEMISTRY_CHOICES=("auto:threeprime:fiveprime:SC3Pv1:SC3Pv2:SC3Pv3:SC3Pv3LT:SC3Pv3HT:SC5P-PE:SC5P-R2:SC-FB") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_GEX_CHEMISTRY_CHOICES[*]}:" =~ ":$VIASH_PAR_GEX_CHEMISTRY:" ]]; then + ViashError '--gex_chemistry' specified value of \'$VIASH_PAR_GEX_CHEMISTRY\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_TEST_INPUT=() + IFS=';' + for var in $VIASH_PAR_INPUT; do + unset IFS + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$var")" ) + var=$(ViashAutodetectMount "$var") + VIASH_TEST_INPUT+=( "$var" ) + done + VIASH_PAR_INPUT=$(IFS=';' ; echo "${VIASH_TEST_INPUT[*]}") +fi +if [ ! -z "$VIASH_PAR_GEX_REFERENCE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_GEX_REFERENCE")" ) + VIASH_PAR_GEX_REFERENCE=$(ViashAutodetectMount "$VIASH_PAR_GEX_REFERENCE") +fi +if [ ! -z "$VIASH_PAR_VDJ_REFERENCE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_VDJ_REFERENCE")" ) + VIASH_PAR_VDJ_REFERENCE=$(ViashAutodetectMount "$VIASH_PAR_VDJ_REFERENCE") +fi +if [ ! -z "$VIASH_PAR_VDJ_INNER_ENRICHMENT_PRIMERS" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_VDJ_INNER_ENRICHMENT_PRIMERS")" ) + VIASH_PAR_VDJ_INNER_ENRICHMENT_PRIMERS=$(ViashAutodetectMount "$VIASH_PAR_VDJ_INNER_ENRICHMENT_PRIMERS") +fi +if [ ! -z "$VIASH_PAR_FEATURE_REFERENCE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_FEATURE_REFERENCE")" ) + VIASH_PAR_FEATURE_REFERENCE=$(ViashAutodetectMount "$VIASH_PAR_FEATURE_REFERENCE") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/mapping_cellranger_multi:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/mapping_cellranger_multi:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/mapping_cellranger_multi:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-cellranger_multi-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +from __future__ import annotations + +import sys +import re +import subprocess +import tempfile +import pandas as pd +from typing import Optional, Any, Union +import tarfile +from pathlib import Path +import shutil +from itertools import chain + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'gex_reference': $( if [ ! -z ${VIASH_PAR_GEX_REFERENCE+x} ]; then echo "r'${VIASH_PAR_GEX_REFERENCE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'vdj_reference': $( if [ ! -z ${VIASH_PAR_VDJ_REFERENCE+x} ]; then echo "r'${VIASH_PAR_VDJ_REFERENCE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'vdj_inner_enrichment_primers': $( if [ ! -z ${VIASH_PAR_VDJ_INNER_ENRICHMENT_PRIMERS+x} ]; then echo "r'${VIASH_PAR_VDJ_INNER_ENRICHMENT_PRIMERS//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'feature_reference': $( if [ ! -z ${VIASH_PAR_FEATURE_REFERENCE+x} ]; then echo "r'${VIASH_PAR_FEATURE_REFERENCE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'library_id': $( if [ ! -z ${VIASH_PAR_LIBRARY_ID+x} ]; then echo "r'${VIASH_PAR_LIBRARY_ID//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'library_type': $( if [ ! -z ${VIASH_PAR_LIBRARY_TYPE+x} ]; then echo "r'${VIASH_PAR_LIBRARY_TYPE//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'library_subsample': $( if [ ! -z ${VIASH_PAR_LIBRARY_SUBSAMPLE+x} ]; then echo "r'${VIASH_PAR_LIBRARY_SUBSAMPLE//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'library_lanes': $( if [ ! -z ${VIASH_PAR_LIBRARY_LANES+x} ]; then echo "r'${VIASH_PAR_LIBRARY_LANES//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'gex_expect_cells': $( if [ ! -z ${VIASH_PAR_GEX_EXPECT_CELLS+x} ]; then echo "int(r'${VIASH_PAR_GEX_EXPECT_CELLS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'gex_chemistry': $( if [ ! -z ${VIASH_PAR_GEX_CHEMISTRY+x} ]; then echo "r'${VIASH_PAR_GEX_CHEMISTRY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'gex_secondary_analysis': $( if [ ! -z ${VIASH_PAR_GEX_SECONDARY_ANALYSIS+x} ]; then echo "r'${VIASH_PAR_GEX_SECONDARY_ANALYSIS//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), + 'gex_generate_bam': $( if [ ! -z ${VIASH_PAR_GEX_GENERATE_BAM+x} ]; then echo "r'${VIASH_PAR_GEX_GENERATE_BAM//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), + 'gex_include_introns': $( if [ ! -z ${VIASH_PAR_GEX_INCLUDE_INTRONS+x} ]; then echo "r'${VIASH_PAR_GEX_INCLUDE_INTRONS//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), + 'cell_multiplex_sample_id': $( if [ ! -z ${VIASH_PAR_CELL_MULTIPLEX_SAMPLE_ID+x} ]; then echo "r'${VIASH_PAR_CELL_MULTIPLEX_SAMPLE_ID//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cell_multiplex_oligo_ids': $( if [ ! -z ${VIASH_PAR_CELL_MULTIPLEX_OLIGO_IDS+x} ]; then echo "r'${VIASH_PAR_CELL_MULTIPLEX_OLIGO_IDS//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cell_multiplex_description': $( if [ ! -z ${VIASH_PAR_CELL_MULTIPLEX_DESCRIPTION+x} ]; then echo "r'${VIASH_PAR_CELL_MULTIPLEX_DESCRIPTION//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'dryrun': $( if [ ! -z ${VIASH_PAR_DRYRUN+x} ]; then echo "r'${VIASH_PAR_DRYRUN//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +## VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +fastq_regex = r'([A-Za-z0-9\\-_\\.]+)_S(\\d+)_L(\\d+)_[RI](\\d+)_(\\d+)\\.fastq\\.gz' +# assert re.match(fastq_regex, "5k_human_GEX_1_subset_S1_L001_R1_001.fastq.gz") is not None + +# Invert some parameters. Keep the original ones in the config for compatibility +inverted_params = { + "gex_generate_no_bam": "gex_generate_bam", + "gex_no_secondary_analysis": "gex_secondary_analysis" +} +for inverted_param, param in inverted_params.items(): + par[inverted_param] = not par[param] if par[param] is not None else None + del par[param] + +GEX_CONFIG_KEYS = { + "gex_reference": "reference", + "gex_expect_cells": "expect-cells", + "gex_chemistry": "chemistry", + "gex_no_secondary_analysis": "no-secondary", + "gex_generate_no_bam": "no-bam", + "gex_include_introns": "include-introns" +} +FEATURE_CONFIG_KEYS = {"feature_reference": "reference"} +VDJ_CONFIG_KEYS = {"vdj_reference": "reference", + "vdj_inner_enrichment_primers": "inner-enrichment-primers"} + +REFERENCE_SECTIONS = { + "gene-expression": (GEX_CONFIG_KEYS, "index"), + "feature": (FEATURE_CONFIG_KEYS, "index"), + "vdj": (VDJ_CONFIG_KEYS, "index") +} + +LIBRARY_CONFIG_KEYS = {'library_id': 'fastq_id', + 'library_type': 'feature_types', + 'library_subsample': 'subsample_rate', + 'library_lanes': 'lanes'} +SAMPLE_PARAMS_CONFIG_KEYS = {'cell_multiplex_sample_id': 'sample_id', + 'cell_multiplex_oligo_ids': 'cmo_ids', + 'cell_multiplex_description': 'description'} + + +# These are derived from the dictionaries above +REFERENCES = tuple(reference_param for reference_param, cellranger_param + in chain(GEX_CONFIG_KEYS.items(), FEATURE_CONFIG_KEYS.items(), VDJ_CONFIG_KEYS.items()) + if cellranger_param == "reference") +LIBRARY_PARAMS = tuple(LIBRARY_CONFIG_KEYS.keys()) +SAMPLE_PARAMS = tuple(SAMPLE_PARAMS_CONFIG_KEYS.keys()) + + +def lengths_gt1(dic: dict[str, Optional[list[Any]]]) -> dict[str, int]: + return {key: len(li) for key, li in dic.items() + if li is not None and len(li) > 1} + +def strip_margin(text: str) -> str: + return re.sub('(\\n?)[ \\t]*\\|', '\\\\1', text) + + +def subset_dict(dictionary: dict[str, str], + keys: Union[dict[str, str], list[str]]) -> dict[str, str]: + if isinstance(keys, (list, tuple)): + keys = {key: key for key in keys} + return {dest_key: dictionary[orig_key] + for orig_key, dest_key in keys.items() + if dictionary[orig_key] is not None} + +def check_subset_dict_equal_length(group_name: str, + dictionary: dict[str, list[str]]) -> None: + lens = lengths_gt1(dictionary) + assert len(set(lens.values())) <= 1, f"The number of values passed to {group_name} "\\ + f"arguments must be 0, 1 or all the same. Offenders: {lens}" + +def process_params(par: dict[str, Any]) -> str: + # if par_input is a directory, look for fastq files + par["input"] = [Path(fastq) for fastq in par["input"]] + if len(par["input"]) == 1 and par["input"][0].is_dir(): + logger.info("Detected '--input' as a directory, " + "traversing to see if we can detect any FASTQ files.") + par["input"] = [input_path for input_path in par["input"][0].rglob('*') + if re.match(fastq_regex, input_path.name) ] + + # check input fastq files + for input_path in par["input"]: + assert re.match(fastq_regex, input_path.name) is not None, \\ + f"File name of --input '{input_path}' should match regex {fastq_regex}." + + # check lengths of libraries metadata + library_dict = subset_dict(par, LIBRARY_PARAMS) + check_subset_dict_equal_length("Library", library_dict) + # storing for later use + par["libraries"] = library_dict + + cmo_dict = subset_dict(par, SAMPLE_PARAMS) + check_subset_dict_equal_length("Cell multiplexing", cmo_dict) + # storing for later use + par["cmo"] = cmo_dict + + # use absolute paths + par["input"] = [input_path.resolve() for input_path in par["input"]] + for file_path in REFERENCES + ('output', ): + if par[file_path]: + logger.info('Making path %s absolute', par[file_path]) + par[file_path] = Path(par[file_path]).resolve() + return par + + +def generate_csv_category(name: str, args: dict[str, str], orient: str) -> list[str]: + assert orient in ("index", "columns") + if not args: + return [] + title = [ f'[{name}]' ] + # Which index to include in csv section is based on orientation + to_csv_args = {"index": (orient=="index"), "header": (orient=="columns")} + values = [pd.DataFrame.from_dict(args, orient=orient).to_csv(**to_csv_args).strip()] + return title + values + [""] + + +def generate_config(par: dict[str, Any], fastq_dir: str) -> str: + content_list = [] + par["fastqs"] = fastq_dir + libraries = dict(LIBRARY_CONFIG_KEYS, **{"fastqs": "fastqs"}) + #TODO: use the union (|) operator when python is updated to 3.9 + all_sections = dict(REFERENCE_SECTIONS, + **{"libraries": (libraries, "columns")}, + **{"samples": (SAMPLE_PARAMS_CONFIG_KEYS, "columns")}) + for section_name, (section_params, orientation) in all_sections.items(): + reference_pars = subset_dict(par, section_params) + content_list += generate_csv_category(section_name, reference_pars, orient=orientation) + + return '\\n'.join(content_list) + +def main(par: dict[str, Any], meta: dict[str, Any]): + logger.info(" Processing params") + par = process_params(par) + logger.info(par) + + # TODO: throw error or else Cell Ranger will + with tempfile.TemporaryDirectory(prefix="cellranger_multi-", + dir=meta["temp_dir"]) as temp_dir: + temp_dir_path = Path(temp_dir) + for reference_par_name in REFERENCES: + reference = par[reference_par_name] + logger.info('Looking at %s to check if it needs decompressing', reference) + if reference and Path(reference).is_file() and tarfile.is_tarfile(reference): + extaction_dir_name = Path(reference.stem).stem # Remove two extensions (if they exist) + unpacked_directory = temp_dir_path / extaction_dir_name + logger.info('Extracting %s to %s', reference, unpacked_directory) + + with tarfile.open(reference, 'r') as open_tar: + members = open_tar.getmembers() + root_dirs = [member for member in members if member.isdir() + and member.name != '.' and '/' not in member.name] + # if there is only one root_dir (and there are files in that directory) + # strip that directory name from the destination folder + if len(root_dirs) == 1: + for mem in members: + mem.path = Path(*Path(mem.path).parts[1:]) + members_to_move = [mem for mem in members if mem.path != Path('.')] + open_tar.extractall(unpacked_directory, members=members_to_move) + par[reference_par_name] = unpacked_directory + + # Creating symlinks of fastq files to tempdir + input_symlinks_dir = temp_dir_path / "input_symlinks" + input_symlinks_dir.mkdir() + for fastq in par['input']: + destination = input_symlinks_dir / fastq.name + destination.symlink_to(fastq) + + logger.info(" Creating config file") + config_content = generate_config(par, input_symlinks_dir) + + logger.info(" Creating Cell Ranger argument") + temp_id="run" + proc_pars=["--disable-ui", "--id", temp_id] + + command_line_parameters = { + "--localcores": meta['cpus'], + "--localmem": int(meta['memory_gb']) - 2 if meta['memory_gb'] else None, + } + for param, param_value in command_line_parameters.items(): + if param_value: + proc_pars.append(f"{param}={param_value}") + + ## Run pipeline + if par["dryrun"]: + par['output'].mkdir(parents=True, exist_ok=True) + + # write config file + config_file = par['output'] / "config.csv" + with open(config_file, "w") as f: + f.write(config_content) + proc_pars.append(f"--csv={config_file}") + + # display command that would've been used + cmd = ["cellranger multi"] + proc_pars + ["--csv=config.csv"] + logger.info("> " + ' '.join(cmd)) + else: + # write config file to execution directory + config_file = temp_dir_path / "config.csv" + with open(config_file, "w") as f: + f.write(config_content) + proc_pars.append(f"--csv={config_file}") + + # Already copy config file to output directory + par['output'].mkdir(parents=True, exist_ok=True) + with (par['output'] / "config.csv").open('w') as open_config: + open_config.write(config_content) + + # run process + cmd = ["cellranger", "multi"] + proc_pars + logger.info("> " + ' '.join(cmd)) + try: + process_output = subprocess.run( + cmd, + cwd=temp_dir, + check=True, + capture_output=True + ) + except subprocess.CalledProcessError as e: + print(e.output.decode('utf-8'), flush=True) + raise e + else: + # Write stdout output to output folder + with (par["output"] / "cellranger_multi.log").open('w') as open_log: + open_log.write(process_output.stdout.decode('utf-8')) + print(process_output.stdout.decode('utf-8'), flush=True) + + # look for output dir file + tmp_output_dir = temp_dir_path / temp_id / "outs" + expected_files = { + Path("multi"): Path.is_dir, + Path("per_sample_outs"): Path.is_dir, + Path("config.csv"): Path.is_file, + } + for file_path, type_func in expected_files.items(): + output_path = tmp_output_dir / file_path + if not type_func(output_path): + raise ValueError(f"Could not find expected '{output_path}'") + + for output_path in tmp_output_dir.rglob('*'): + if output_path.name != "config.csv": # Already created + shutil.move(str(output_path), par['output']) + +if __name__ == "__main__": + main(par, meta) +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + unset VIASH_TEST_INPUT + IFS=';' + for var in $VIASH_PAR_INPUT; do + unset IFS + if [ -z "$VIASH_TEST_INPUT" ]; then + VIASH_TEST_INPUT="$(ViashStripAutomount "$var")" + else + VIASH_TEST_INPUT="$VIASH_TEST_INPUT;""$(ViashStripAutomount "$var")" + fi + done + VIASH_PAR_INPUT="$VIASH_TEST_INPUT" +fi +if [ ! -z "$VIASH_PAR_GEX_REFERENCE" ]; then + VIASH_PAR_GEX_REFERENCE=$(ViashStripAutomount "$VIASH_PAR_GEX_REFERENCE") +fi +if [ ! -z "$VIASH_PAR_VDJ_REFERENCE" ]; then + VIASH_PAR_VDJ_REFERENCE=$(ViashStripAutomount "$VIASH_PAR_VDJ_REFERENCE") +fi +if [ ! -z "$VIASH_PAR_VDJ_INNER_ENRICHMENT_PRIMERS" ]; then + VIASH_PAR_VDJ_INNER_ENRICHMENT_PRIMERS=$(ViashStripAutomount "$VIASH_PAR_VDJ_INNER_ENRICHMENT_PRIMERS") +fi +if [ ! -z "$VIASH_PAR_FEATURE_REFERENCE" ]; then + VIASH_PAR_FEATURE_REFERENCE=$(ViashStripAutomount "$VIASH_PAR_FEATURE_REFERENCE") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/mapping/cellranger_multi/setup_logger.py b/target/docker/mapping/cellranger_multi/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/docker/mapping/cellranger_multi/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/docker/mapping/htseq_count/.config.vsh.yaml b/target/docker/mapping/htseq_count/.config.vsh.yaml new file mode 100644 index 00000000000..9a52757cf62 --- /dev/null +++ b/target/docker/mapping/htseq_count/.config.vsh.yaml @@ -0,0 +1,418 @@ +functionality: + name: "htseq_count" + namespace: "mapping" + version: "0.12.4" + authors: + - name: "Robrecht Cannoodt" + roles: + - "author" + - "maintainer" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + - name: "Angela Oliveira Pisco" + roles: + - "author" + info: + role: "Contributor" + links: + github: "aopisco" + orcid: "0000-0003-0142-2355" + linkedin: "aopisco" + organizations: + - name: "Insitro" + href: "https://insitro.com" + role: "Director of Computational Biology" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + argument_groups: + - name: "Input" + arguments: + - type: "file" + name: "--input" + description: "Path to the SAM/BAM files containing the mapped reads." + info: + orig_arg: "samfilenames" + example: + - "mysample1.BAM" + - "mysample2.BAM" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "file" + name: "--reference" + description: "Path to the GTF file containing the features." + info: + orig_arg: "featurefilename" + example: + - "reference.gtf" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Output" + arguments: + - type: "file" + name: "--output" + description: "Filename to output the counts to." + info: + orig_arg: "--counts_output" + example: + - "htseq-count.tsv" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_delimiter" + description: "Column delimiter in output." + info: + orig_arg: "--delimiter" + example: + - "\t" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output_sam" + description: "Write out all SAM alignment records into SAM/BAM files (one per\ + \ input file needed), \nannotating each line with its feature assignment (as\ + \ an optional field with tag 'XF'). \nSee the -p option to use BAM instead\ + \ of SAM.\n" + info: + orig_arg: "--samout" + example: + - "mysample1_out.BAM" + - "mysample2_out.BAM" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--output_sam_format" + description: "Format to use with the --output_sam argument." + info: + orig_arg: "--samout-format" + required: false + choices: + - "sam" + - "bam" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Arguments" + arguments: + - type: "string" + name: "--order" + alternatives: + - "-r" + description: "Sorting order of . Paired-end sequencing data\ + \ must be sorted either by position or\nby read name, and the sorting order\ + \ must be specified. Ignored for single-end data.\n" + info: + orig_arg: "--order" + default: + - "name" + required: false + choices: + - "pos" + - "name" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--stranded" + alternatives: + - "-s" + description: "Whether the data is from a strand-specific assay. 'reverse' means\ + \ 'yes' with reversed strand interpretation." + info: + orig_arg: "--stranded" + default: + - "yes" + required: false + choices: + - "yes" + - "no" + - "reverse" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--minimum_alignment_quality" + alternatives: + - "-a" + - "--minaqual" + description: "Skip all reads with MAPQ alignment quality lower than the given\ + \ minimum value. \nMAPQ is the 5th column of a SAM/BAM file and its usage\ + \ depends on the software \nused to map the reads.\n" + info: + orig_arg: "--minaqual" + default: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--type" + alternatives: + - "-t" + description: "Feature type (3rd column in GTF file) to be used, all features\ + \ of other type are ignored (default, suitable for Ensembl GTF files: exon)" + info: + orig_arg: "--type" + example: + - "exon" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--id_attribute" + alternatives: + - "-i" + description: "GTF attribute to be used as feature ID (default, suitable for\ + \ Ensembl GTF files: gene_id).\nAll feature of the right type (see -t option)\ + \ within the same GTF attribute will be added\ntogether. The typical way of\ + \ using this option is to count all exonic reads from each gene\nand add the\ + \ exons but other uses are possible as well. You can call this option multiple\n\ + times: in that case, the combination of all attributes separated by colons\ + \ (:) will be used\nas a unique identifier, e.g. for exons you might use -i\ + \ gene_id -i exon_number.\n" + info: + orig_arg: "--idattr" + example: + - "gene_id" + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--additional_attributes" + description: "Additional feature attributes (suitable for Ensembl GTF files:\ + \ gene_name). Use multiple times\nfor more than one additional attribute.\ + \ These attributes are only used as annotations in the\noutput, while the\ + \ determination of how the counts are added together is done based on option\ + \ -i.\n" + info: + orig_arg: "--additional-attr" + example: + - "gene_name" + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--add_chromosome_info" + description: "Store information about the chromosome of each feature as an additional\ + \ attribute\n(e.g. colunm in the TSV output file).\n" + info: + orig_arg: "--add-chromosome-info" + direction: "input" + dest: "par" + - type: "string" + name: "--mode" + alternatives: + - "-m" + description: "Mode to handle reads overlapping more than one feature." + info: + orig_arg: "--mode" + default: + - "union" + required: false + choices: + - "union" + - "intersection-strict" + - "intersection-nonempty" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--non_unique" + description: "Whether and how to score reads that are not uniquely aligned or\ + \ ambiguously assigned to features." + info: + orig_arg: "--nonunique" + default: + - "none" + required: false + choices: + - "none" + - "all" + - "fraction" + - "random" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--secondary_alignments" + description: "Whether to score secondary alignments (0x100 flag)." + info: + orig_arg: "--secondary-alignments" + required: false + choices: + - "score" + - "ignore" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--supplementary_alignments" + description: "Whether to score supplementary alignments (0x800 flag)." + info: + orig_arg: "--supplementary-alignments" + required: false + choices: + - "score" + - "ignore" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--counts_output_sparse" + description: "Store the counts as a sparse matrix (mtx, h5ad, loom)." + info: + orig_arg: "--counts-output-sparse" + direction: "input" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + description: "Quantify gene expression for subsequent testing for differential expression.\n\ + \nThis script takes one or more alignment files in SAM/BAM format and a feature\ + \ file in GFF format and calculates for each feature the number of reads mapping\ + \ to it. \n\nSee http://htseq.readthedocs.io/en/master/count.html for details.\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/cellranger_tiny_fastq" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "HTSeq" + - "pyyaml" + - "scipy" + - "pandas~=2.0.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highmem" + - "highcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/mapping/htseq_count/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/mapping/htseq_count" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/mapping/htseq_count/htseq_count" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/mapping/htseq_count/htseq_count b/target/docker/mapping/htseq_count/htseq_count new file mode 100755 index 00000000000..32d499c22f2 --- /dev/null +++ b/target/docker/mapping/htseq_count/htseq_count @@ -0,0 +1,1608 @@ +#!/usr/bin/env bash + +# htseq_count 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Robrecht Cannoodt (author, maintainer) +# * Angela Oliveira Pisco (author) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="htseq_count" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "htseq_count 0.12.4" + echo "" + echo "Quantify gene expression for subsequent testing for differential expression." + echo "" + echo "This script takes one or more alignment files in SAM/BAM format and a feature" + echo "file in GFF format and calculates for each feature the number of reads mapping" + echo "to it." + echo "" + echo "See http://htseq.readthedocs.io/en/master/count.html for details." + echo "" + echo "Input:" + echo " --input" + echo " type: file, required parameter, multiple values allowed, file must exist" + echo " example: mysample1.BAM;mysample2.BAM" + echo " Path to the SAM/BAM files containing the mapped reads." + echo "" + echo " --reference" + echo " type: file, required parameter, file must exist" + echo " example: reference.gtf" + echo " Path to the GTF file containing the features." + echo "" + echo "Output:" + echo " --output" + echo " type: file, required parameter, output, file must exist" + echo " example: htseq-count.tsv" + echo " Filename to output the counts to." + echo "" + echo " --output_delimiter" + echo " type: string" + echo " example:" + echo " Column delimiter in output." + echo "" + echo " --output_sam" + echo " type: file, multiple values allowed, output, file must exist" + echo " example: mysample1_out.BAM;mysample2_out.BAM" + echo " Write out all SAM alignment records into SAM/BAM files (one per input" + echo " file needed)," + echo " annotating each line with its feature assignment (as an optional field" + echo " with tag 'XF')." + echo " See the -p option to use BAM instead of SAM." + echo "" + echo " --output_sam_format" + echo " type: string" + echo " choices: [ sam, bam ]" + echo " Format to use with the --output_sam argument." + echo "" + echo "Arguments:" + echo " -r, --order" + echo " type: string" + echo " default: name" + echo " choices: [ pos, name ]" + echo " Sorting order of . Paired-end sequencing data must be" + echo " sorted either by position or" + echo " by read name, and the sorting order must be specified. Ignored for" + echo " single-end data." + echo "" + echo " -s, --stranded" + echo " type: string" + echo " default: yes" + echo " choices: [ yes, no, reverse ]" + echo " Whether the data is from a strand-specific assay. 'reverse' means 'yes'" + echo " with reversed strand interpretation." + echo "" + echo " -a, --minaqual, --minimum_alignment_quality" + echo " type: integer" + echo " default: 10" + echo " Skip all reads with MAPQ alignment quality lower than the given minimum" + echo " value." + echo " MAPQ is the 5th column of a SAM/BAM file and its usage depends on the" + echo " software" + echo " used to map the reads." + echo "" + echo " -t, --type" + echo " type: string" + echo " example: exon" + echo " Feature type (3rd column in GTF file) to be used, all features of other" + echo " type are ignored (default, suitable for Ensembl GTF files: exon)" + echo "" + echo " -i, --id_attribute" + echo " type: string, multiple values allowed" + echo " example: gene_id" + echo " GTF attribute to be used as feature ID (default, suitable for Ensembl" + echo " GTF files: gene_id)." + echo " All feature of the right type (see -t option) within the same GTF" + echo " attribute will be added" + echo " together. The typical way of using this option is to count all exonic" + echo " reads from each gene" + echo " and add the exons but other uses are possible as well. You can call this" + echo " option multiple" + echo " times: in that case, the combination of all attributes separated by" + echo " colons (:) will be used" + echo " as a unique identifier, e.g. for exons you might use -i gene_id -i" + echo " exon_number." + echo "" + echo " --additional_attributes" + echo " type: string, multiple values allowed" + echo " example: gene_name" + echo " Additional feature attributes (suitable for Ensembl GTF files:" + echo " gene_name). Use multiple times" + echo " for more than one additional attribute. These attributes are only used" + echo " as annotations in the" + echo " output, while the determination of how the counts are added together is" + echo " done based on option -i." + echo "" + echo " --add_chromosome_info" + echo " type: boolean_true" + echo " Store information about the chromosome of each feature as an additional" + echo " attribute" + echo " (e.g. colunm in the TSV output file)." + echo "" + echo " -m, --mode" + echo " type: string" + echo " default: union" + echo " choices: [ union, intersection-strict, intersection-nonempty ]" + echo " Mode to handle reads overlapping more than one feature." + echo "" + echo " --non_unique" + echo " type: string" + echo " default: none" + echo " choices: [ none, all, fraction, random ]" + echo " Whether and how to score reads that are not uniquely aligned or" + echo " ambiguously assigned to features." + echo "" + echo " --secondary_alignments" + echo " type: string" + echo " choices: [ score, ignore ]" + echo " Whether to score secondary alignments (0x100 flag)." + echo "" + echo " --supplementary_alignments" + echo " type: string" + echo " choices: [ score, ignore ]" + echo " Whether to score supplementary alignments (0x800 flag)." + echo "" + echo " --counts_output_sparse" + echo " type: boolean_true" + echo " Store the counts as a sparse matrix (mtx, h5ad, loom)." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM python:3.10-slim + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "HTSeq" "pyyaml" "scipy" "pandas~=2.0.0" + +LABEL org.opencontainers.image.authors="Robrecht Cannoodt, Angela Oliveira Pisco" +LABEL org.opencontainers.image.description="Companion container for running component mapping htseq_count" +LABEL org.opencontainers.image.created="2024-01-31T09:08:36Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-htseq_count-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "htseq_count 0.12.4" + exit + ;; + --input) + if [ -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT="$2" + else + VIASH_PAR_INPUT="$VIASH_PAR_INPUT;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + if [ -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + else + VIASH_PAR_INPUT="$VIASH_PAR_INPUT;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --reference) + [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reference=*) + [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference=*\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_delimiter) + [ -n "$VIASH_PAR_OUTPUT_DELIMITER" ] && ViashError Bad arguments for option \'--output_delimiter\': \'$VIASH_PAR_OUTPUT_DELIMITER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_DELIMITER="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_delimiter. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_delimiter=*) + [ -n "$VIASH_PAR_OUTPUT_DELIMITER" ] && ViashError Bad arguments for option \'--output_delimiter=*\': \'$VIASH_PAR_OUTPUT_DELIMITER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_DELIMITER=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_sam) + if [ -z "$VIASH_PAR_OUTPUT_SAM" ]; then + VIASH_PAR_OUTPUT_SAM="$2" + else + VIASH_PAR_OUTPUT_SAM="$VIASH_PAR_OUTPUT_SAM;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_sam. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_sam=*) + if [ -z "$VIASH_PAR_OUTPUT_SAM" ]; then + VIASH_PAR_OUTPUT_SAM=$(ViashRemoveFlags "$1") + else + VIASH_PAR_OUTPUT_SAM="$VIASH_PAR_OUTPUT_SAM;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --output_sam_format) + [ -n "$VIASH_PAR_OUTPUT_SAM_FORMAT" ] && ViashError Bad arguments for option \'--output_sam_format\': \'$VIASH_PAR_OUTPUT_SAM_FORMAT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_SAM_FORMAT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_sam_format. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_sam_format=*) + [ -n "$VIASH_PAR_OUTPUT_SAM_FORMAT" ] && ViashError Bad arguments for option \'--output_sam_format=*\': \'$VIASH_PAR_OUTPUT_SAM_FORMAT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_SAM_FORMAT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --order) + [ -n "$VIASH_PAR_ORDER" ] && ViashError Bad arguments for option \'--order\': \'$VIASH_PAR_ORDER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ORDER="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --order. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --order=*) + [ -n "$VIASH_PAR_ORDER" ] && ViashError Bad arguments for option \'--order=*\': \'$VIASH_PAR_ORDER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ORDER=$(ViashRemoveFlags "$1") + shift 1 + ;; + -r) + [ -n "$VIASH_PAR_ORDER" ] && ViashError Bad arguments for option \'-r\': \'$VIASH_PAR_ORDER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ORDER="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -r. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --stranded) + [ -n "$VIASH_PAR_STRANDED" ] && ViashError Bad arguments for option \'--stranded\': \'$VIASH_PAR_STRANDED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRANDED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --stranded. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --stranded=*) + [ -n "$VIASH_PAR_STRANDED" ] && ViashError Bad arguments for option \'--stranded=*\': \'$VIASH_PAR_STRANDED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRANDED=$(ViashRemoveFlags "$1") + shift 1 + ;; + -s) + [ -n "$VIASH_PAR_STRANDED" ] && ViashError Bad arguments for option \'-s\': \'$VIASH_PAR_STRANDED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRANDED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -s. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --minimum_alignment_quality) + [ -n "$VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY" ] && ViashError Bad arguments for option \'--minimum_alignment_quality\': \'$VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --minimum_alignment_quality. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --minimum_alignment_quality=*) + [ -n "$VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY" ] && ViashError Bad arguments for option \'--minimum_alignment_quality=*\': \'$VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + -a) + [ -n "$VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY" ] && ViashError Bad arguments for option \'-a\': \'$VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -a. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --minaqual) + [ -n "$VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY" ] && ViashError Bad arguments for option \'--minaqual\': \'$VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --minaqual. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --type) + [ -n "$VIASH_PAR_TYPE" ] && ViashError Bad arguments for option \'--type\': \'$VIASH_PAR_TYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TYPE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --type. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --type=*) + [ -n "$VIASH_PAR_TYPE" ] && ViashError Bad arguments for option \'--type=*\': \'$VIASH_PAR_TYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TYPE=$(ViashRemoveFlags "$1") + shift 1 + ;; + -t) + [ -n "$VIASH_PAR_TYPE" ] && ViashError Bad arguments for option \'-t\': \'$VIASH_PAR_TYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TYPE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -t. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --id_attribute) + if [ -z "$VIASH_PAR_ID_ATTRIBUTE" ]; then + VIASH_PAR_ID_ATTRIBUTE="$2" + else + VIASH_PAR_ID_ATTRIBUTE="$VIASH_PAR_ID_ATTRIBUTE:""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --id_attribute. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --id_attribute=*) + if [ -z "$VIASH_PAR_ID_ATTRIBUTE" ]; then + VIASH_PAR_ID_ATTRIBUTE=$(ViashRemoveFlags "$1") + else + VIASH_PAR_ID_ATTRIBUTE="$VIASH_PAR_ID_ATTRIBUTE:"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + -i) + if [ -z "$VIASH_PAR_ID_ATTRIBUTE" ]; then + VIASH_PAR_ID_ATTRIBUTE="$2" + else + VIASH_PAR_ID_ATTRIBUTE="$VIASH_PAR_ID_ATTRIBUTE:""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --additional_attributes) + if [ -z "$VIASH_PAR_ADDITIONAL_ATTRIBUTES" ]; then + VIASH_PAR_ADDITIONAL_ATTRIBUTES="$2" + else + VIASH_PAR_ADDITIONAL_ATTRIBUTES="$VIASH_PAR_ADDITIONAL_ATTRIBUTES:""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --additional_attributes. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --additional_attributes=*) + if [ -z "$VIASH_PAR_ADDITIONAL_ATTRIBUTES" ]; then + VIASH_PAR_ADDITIONAL_ATTRIBUTES=$(ViashRemoveFlags "$1") + else + VIASH_PAR_ADDITIONAL_ATTRIBUTES="$VIASH_PAR_ADDITIONAL_ATTRIBUTES:"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --add_chromosome_info) + [ -n "$VIASH_PAR_ADD_CHROMOSOME_INFO" ] && ViashError Bad arguments for option \'--add_chromosome_info\': \'$VIASH_PAR_ADD_CHROMOSOME_INFO\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ADD_CHROMOSOME_INFO=true + shift 1 + ;; + --mode) + [ -n "$VIASH_PAR_MODE" ] && ViashError Bad arguments for option \'--mode\': \'$VIASH_PAR_MODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --mode. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --mode=*) + [ -n "$VIASH_PAR_MODE" ] && ViashError Bad arguments for option \'--mode=*\': \'$VIASH_PAR_MODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODE=$(ViashRemoveFlags "$1") + shift 1 + ;; + -m) + [ -n "$VIASH_PAR_MODE" ] && ViashError Bad arguments for option \'-m\': \'$VIASH_PAR_MODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -m. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --non_unique) + [ -n "$VIASH_PAR_NON_UNIQUE" ] && ViashError Bad arguments for option \'--non_unique\': \'$VIASH_PAR_NON_UNIQUE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_NON_UNIQUE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --non_unique. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --non_unique=*) + [ -n "$VIASH_PAR_NON_UNIQUE" ] && ViashError Bad arguments for option \'--non_unique=*\': \'$VIASH_PAR_NON_UNIQUE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_NON_UNIQUE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --secondary_alignments) + [ -n "$VIASH_PAR_SECONDARY_ALIGNMENTS" ] && ViashError Bad arguments for option \'--secondary_alignments\': \'$VIASH_PAR_SECONDARY_ALIGNMENTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SECONDARY_ALIGNMENTS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --secondary_alignments. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --secondary_alignments=*) + [ -n "$VIASH_PAR_SECONDARY_ALIGNMENTS" ] && ViashError Bad arguments for option \'--secondary_alignments=*\': \'$VIASH_PAR_SECONDARY_ALIGNMENTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SECONDARY_ALIGNMENTS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --supplementary_alignments) + [ -n "$VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS" ] && ViashError Bad arguments for option \'--supplementary_alignments\': \'$VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --supplementary_alignments. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --supplementary_alignments=*) + [ -n "$VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS" ] && ViashError Bad arguments for option \'--supplementary_alignments=*\': \'$VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --counts_output_sparse) + [ -n "$VIASH_PAR_COUNTS_OUTPUT_SPARSE" ] && ViashError Bad arguments for option \'--counts_output_sparse\': \'$VIASH_PAR_COUNTS_OUTPUT_SPARSE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COUNTS_OUTPUT_SPARSE=true + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/mapping_htseq_count:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/mapping_htseq_count:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/mapping_htseq_count:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/mapping_htseq_count:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_REFERENCE+x} ]; then + ViashError '--reference' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_ORDER+x} ]; then + VIASH_PAR_ORDER="name" +fi +if [ -z ${VIASH_PAR_STRANDED+x} ]; then + VIASH_PAR_STRANDED="yes" +fi +if [ -z ${VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY+x} ]; then + VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY="10" +fi +if [ -z ${VIASH_PAR_ADD_CHROMOSOME_INFO+x} ]; then + VIASH_PAR_ADD_CHROMOSOME_INFO="false" +fi +if [ -z ${VIASH_PAR_MODE+x} ]; then + VIASH_PAR_MODE="union" +fi +if [ -z ${VIASH_PAR_NON_UNIQUE+x} ]; then + VIASH_PAR_NON_UNIQUE="none" +fi +if [ -z ${VIASH_PAR_COUNTS_OUTPUT_SPARSE+x} ]; then + VIASH_PAR_COUNTS_OUTPUT_SPARSE="false" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ]; then + IFS=';' + set -f + for file in $VIASH_PAR_INPUT; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi +if [ ! -z "$VIASH_PAR_REFERENCE" ] && [ ! -e "$VIASH_PAR_REFERENCE" ]; then + ViashError "Input file '$VIASH_PAR_REFERENCE' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY" ]]; then + if ! [[ "$VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--minimum_alignment_quality' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_ADD_CHROMOSOME_INFO" ]]; then + if ! [[ "$VIASH_PAR_ADD_CHROMOSOME_INFO" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--add_chromosome_info' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_COUNTS_OUTPUT_SPARSE" ]]; then + if ! [[ "$VIASH_PAR_COUNTS_OUTPUT_SPARSE" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--counts_output_sparse' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_OUTPUT_SAM_FORMAT" ]; then + VIASH_PAR_OUTPUT_SAM_FORMAT_CHOICES=("sam:bam") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_SAM_FORMAT_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_SAM_FORMAT:" ]]; then + ViashError '--output_sam_format' specified value of \'$VIASH_PAR_OUTPUT_SAM_FORMAT\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +if [ ! -z "$VIASH_PAR_ORDER" ]; then + VIASH_PAR_ORDER_CHOICES=("pos:name") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_ORDER_CHOICES[*]}:" =~ ":$VIASH_PAR_ORDER:" ]]; then + ViashError '--order' specified value of \'$VIASH_PAR_ORDER\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +if [ ! -z "$VIASH_PAR_STRANDED" ]; then + VIASH_PAR_STRANDED_CHOICES=("yes:no:reverse") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_STRANDED_CHOICES[*]}:" =~ ":$VIASH_PAR_STRANDED:" ]]; then + ViashError '--stranded' specified value of \'$VIASH_PAR_STRANDED\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +if [ ! -z "$VIASH_PAR_MODE" ]; then + VIASH_PAR_MODE_CHOICES=("union:intersection-strict:intersection-nonempty") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_MODE_CHOICES[*]}:" =~ ":$VIASH_PAR_MODE:" ]]; then + ViashError '--mode' specified value of \'$VIASH_PAR_MODE\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +if [ ! -z "$VIASH_PAR_NON_UNIQUE" ]; then + VIASH_PAR_NON_UNIQUE_CHOICES=("none:all:fraction:random") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_NON_UNIQUE_CHOICES[*]}:" =~ ":$VIASH_PAR_NON_UNIQUE:" ]]; then + ViashError '--non_unique' specified value of \'$VIASH_PAR_NON_UNIQUE\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +if [ ! -z "$VIASH_PAR_SECONDARY_ALIGNMENTS" ]; then + VIASH_PAR_SECONDARY_ALIGNMENTS_CHOICES=("score:ignore") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_SECONDARY_ALIGNMENTS_CHOICES[*]}:" =~ ":$VIASH_PAR_SECONDARY_ALIGNMENTS:" ]]; then + ViashError '--secondary_alignments' specified value of \'$VIASH_PAR_SECONDARY_ALIGNMENTS\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +if [ ! -z "$VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS" ]; then + VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS_CHOICES=("score:ignore") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS_CHOICES[*]}:" =~ ":$VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS:" ]]; then + ViashError '--supplementary_alignments' specified value of \'$VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi +if [ ! -z "$VIASH_PAR_OUTPUT_SAM" ]; then + IFS=';' + set -f + for file in $VIASH_PAR_OUTPUT_SAM; do + unset IFS + if [ ! -d "$(dirname "$file")" ]; then + mkdir -p "$(dirname "$file")" + fi + done + set +f +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_TEST_INPUT=() + IFS=';' + for var in $VIASH_PAR_INPUT; do + unset IFS + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$var")" ) + var=$(ViashAutodetectMount "$var") + VIASH_TEST_INPUT+=( "$var" ) + done + VIASH_PAR_INPUT=$(IFS=';' ; echo "${VIASH_TEST_INPUT[*]}") +fi +if [ ! -z "$VIASH_PAR_REFERENCE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_REFERENCE")" ) + VIASH_PAR_REFERENCE=$(ViashAutodetectMount "$VIASH_PAR_REFERENCE") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_PAR_OUTPUT_SAM" ]; then + VIASH_TEST_OUTPUT_SAM=() + IFS=';' + for var in $VIASH_PAR_OUTPUT_SAM; do + unset IFS + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$var")" ) + var=$(ViashAutodetectMount "$var") + VIASH_TEST_OUTPUT_SAM+=( "$var" ) + VIASH_CHOWN_VARS+=( "$var" ) + done + VIASH_PAR_OUTPUT_SAM=$(IFS=';' ; echo "${VIASH_TEST_OUTPUT_SAM[*]}") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/mapping_htseq_count:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/mapping_htseq_count:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/mapping_htseq_count:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-htseq_count-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +import tempfile +import subprocess +from pathlib import Path +import tarfile +import gzip +import shutil +import yaml + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'reference': $( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "r'${VIASH_PAR_REFERENCE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_delimiter': $( if [ ! -z ${VIASH_PAR_OUTPUT_DELIMITER+x} ]; then echo "r'${VIASH_PAR_OUTPUT_DELIMITER//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_sam': $( if [ ! -z ${VIASH_PAR_OUTPUT_SAM+x} ]; then echo "r'${VIASH_PAR_OUTPUT_SAM//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'output_sam_format': $( if [ ! -z ${VIASH_PAR_OUTPUT_SAM_FORMAT+x} ]; then echo "r'${VIASH_PAR_OUTPUT_SAM_FORMAT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'order': $( if [ ! -z ${VIASH_PAR_ORDER+x} ]; then echo "r'${VIASH_PAR_ORDER//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'stranded': $( if [ ! -z ${VIASH_PAR_STRANDED+x} ]; then echo "r'${VIASH_PAR_STRANDED//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'minimum_alignment_quality': $( if [ ! -z ${VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY+x} ]; then echo "int(r'${VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'type': $( if [ ! -z ${VIASH_PAR_TYPE+x} ]; then echo "r'${VIASH_PAR_TYPE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'id_attribute': $( if [ ! -z ${VIASH_PAR_ID_ATTRIBUTE+x} ]; then echo "r'${VIASH_PAR_ID_ATTRIBUTE//\'/\'\"\'\"r\'}'.split(':')"; else echo None; fi ), + 'additional_attributes': $( if [ ! -z ${VIASH_PAR_ADDITIONAL_ATTRIBUTES+x} ]; then echo "r'${VIASH_PAR_ADDITIONAL_ATTRIBUTES//\'/\'\"\'\"r\'}'.split(':')"; else echo None; fi ), + 'add_chromosome_info': $( if [ ! -z ${VIASH_PAR_ADD_CHROMOSOME_INFO+x} ]; then echo "r'${VIASH_PAR_ADD_CHROMOSOME_INFO//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), + 'mode': $( if [ ! -z ${VIASH_PAR_MODE+x} ]; then echo "r'${VIASH_PAR_MODE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'non_unique': $( if [ ! -z ${VIASH_PAR_NON_UNIQUE+x} ]; then echo "r'${VIASH_PAR_NON_UNIQUE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'secondary_alignments': $( if [ ! -z ${VIASH_PAR_SECONDARY_ALIGNMENTS+x} ]; then echo "r'${VIASH_PAR_SECONDARY_ALIGNMENTS//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'supplementary_alignments': $( if [ ! -z ${VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS+x} ]; then echo "r'${VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'counts_output_sparse': $( if [ ! -z ${VIASH_PAR_COUNTS_OUTPUT_SPARSE+x} ]; then echo "r'${VIASH_PAR_COUNTS_OUTPUT_SPARSE//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +## VIASH END + +######################## +### Helper functions ### +######################## + +# helper function for cheching whether something is a gzip +def is_gz_file(path: Path) -> bool: + with open(path, 'rb') as file: + return file.read(2) == b'\\x1f\\x8b' + +# if {par_value} is a Path, extract it to a temp_dir_path and return the resulting path +def extract_if_need_be(par_value: Path, temp_dir_path: Path) -> Path: + if par_value.is_file() and tarfile.is_tarfile(par_value): + # Remove two extensions (if they exist) + extaction_dir_name = Path(par_value.stem).stem + unpacked_path = temp_dir_path / extaction_dir_name + print(f' Tar detected; extracting {par_value} to {unpacked_path}', flush=True) + + with tarfile.open(par_value, 'r') as open_tar: + members = open_tar.getmembers() + root_dirs = [member + for member in members + if member.isdir() and member.name != '.' and '/' not in member.name] + # if there is only one root_dir (and there are files in that directory) + # strip that directory name from the destination folder + if len(root_dirs) == 1: + for mem in members: + mem.path = Path(*Path(mem.path).parts[1:]) + members_to_move = [mem for mem in members if mem.path != Path('.')] + open_tar.extractall(unpacked_path, members=members_to_move) + return unpacked_path + + elif par_value.is_file() and is_gz_file(par_value): + # Remove extension (if it exists) + extaction_file_name = Path(par_value.stem) + unpacked_path = temp_dir_path / extaction_file_name + print(f' Gzip detected; extracting {par_value} to {unpacked_path}', flush=True) + + with gzip.open(par_value, 'rb') as f_in: + with open(unpacked_path, 'wb') as f_out: + shutil.copyfileobj(f_in, f_out) + return unpacked_path + + else: + return par_value + +def generate_args(par, config): + # fetch arguments from config + arguments = [ + arg + for group in config["functionality"]["argument_groups"] + for arg in group["arguments"] + ] + + cmd_args = [] + + for arg in arguments: + arg_val = par.get(arg["name"].removeprefix("--")) + orig_arg = arg.get("info", {}).get("orig_arg") + if arg_val and orig_arg: + if not arg.get("multiple", False): + arg_val = [arg_val] + + if arg["type"] in ["boolean_true", "boolean_false"]: + # if argument is a boolean_true or boolean_false, simply add the flag + arg_val = [orig_arg] + elif orig_arg.startswith("-"): + # if the orig arg flag is not a positional, + # add the flag in front of each element and flatten + arg_val = [str(x) for val in arg_val for x in [orig_arg, val]] + + cmd_args.extend(arg_val) + + return cmd_args + +######################## +### Main code ### +######################## + +# read config arguments +config = yaml.safe_load(Path(meta["config"]).read_text()) + + +with tempfile.TemporaryDirectory(prefix="htseq-", dir=meta["temp_dir"]) as temp_dir: + + # checking for compressed files, ungzip files if need be + temp_dir_path = Path(temp_dir) + reference = Path(par["reference"]) + + print(f'>> Check compression of --reference with value: {reference}', flush=True) + par["reference"] = extract_if_need_be(reference, temp_dir_path) + + print(">> Constructing command", flush=True) + cmd_args = [ "htseq-count" ] + generate_args(par, config) + + # manually process cpus parameter + if 'cpus' in meta and meta['cpus']: + cmd_args.extend(["--nprocesses", str(meta["cpus"])]) + + print(">> Running htseq-count with command:", flush=True) + print("+ " + ' '.join([str(x) for x in cmd_args]), flush=True) + + subprocess.run( + cmd_args, + check=True + ) +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + unset VIASH_TEST_INPUT + IFS=';' + for var in $VIASH_PAR_INPUT; do + unset IFS + if [ -z "$VIASH_TEST_INPUT" ]; then + VIASH_TEST_INPUT="$(ViashStripAutomount "$var")" + else + VIASH_TEST_INPUT="$VIASH_TEST_INPUT;""$(ViashStripAutomount "$var")" + fi + done + VIASH_PAR_INPUT="$VIASH_TEST_INPUT" +fi +if [ ! -z "$VIASH_PAR_REFERENCE" ]; then + VIASH_PAR_REFERENCE=$(ViashStripAutomount "$VIASH_PAR_REFERENCE") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT_SAM" ]; then + unset VIASH_TEST_OUTPUT_SAM + IFS=';' + for var in $VIASH_PAR_OUTPUT_SAM; do + unset IFS + if [ -z "$VIASH_TEST_OUTPUT_SAM" ]; then + VIASH_TEST_OUTPUT_SAM="$(ViashStripAutomount "$var")" + else + VIASH_TEST_OUTPUT_SAM="$VIASH_TEST_OUTPUT_SAM;""$(ViashStripAutomount "$var")" + fi + done + VIASH_PAR_OUTPUT_SAM="$VIASH_TEST_OUTPUT_SAM" +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_OUTPUT_SAM" ]; then + IFS=';' + set -f + for file in $VIASH_PAR_OUTPUT_SAM; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Output file '$file' does not exist." + exit 1 + fi + done + set +f +fi + + +exit 0 diff --git a/target/docker/mapping/htseq_count_to_h5mu/.config.vsh.yaml b/target/docker/mapping/htseq_count_to_h5mu/.config.vsh.yaml new file mode 100644 index 00000000000..8589683cbae --- /dev/null +++ b/target/docker/mapping/htseq_count_to_h5mu/.config.vsh.yaml @@ -0,0 +1,209 @@ +functionality: + name: "htseq_count_to_h5mu" + namespace: "mapping" + version: "0.12.4" + authors: + - name: "Robrecht Cannoodt" + roles: + - "author" + - "maintainer" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + - name: "Angela Oliveira Pisco" + roles: + - "author" + info: + role: "Contributor" + links: + github: "aopisco" + orcid: "0000-0003-0142-2355" + linkedin: "aopisco" + organizations: + - name: "Insitro" + href: "https://insitro.com" + role: "Director of Computational Biology" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + argument_groups: + - name: "Input" + arguments: + - type: "string" + name: "--input_id" + description: "The obs index for the counts" + info: null + example: + - "foo" + required: true + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "file" + name: "--input_counts" + description: "The counts as a TSV file as output by HTSeq." + info: null + example: + - "counts.tsv" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "file" + name: "--reference" + description: "The GTF file." + info: null + example: + - "gencode_v41_star" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Outputs" + arguments: + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output h5mu file." + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + description: "Convert the htseq table to a h5mu.\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/cellranger_tiny_fastq" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "gtfparse" + - "polars[pyarrow] < 0.16.14" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highmem" + - "midcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/mapping/htseq_count_to_h5mu/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/mapping/htseq_count_to_h5mu" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/mapping/htseq_count_to_h5mu/htseq_count_to_h5mu" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/mapping/htseq_count_to_h5mu/htseq_count_to_h5mu b/target/docker/mapping/htseq_count_to_h5mu/htseq_count_to_h5mu new file mode 100755 index 00000000000..3b68d87030c --- /dev/null +++ b/target/docker/mapping/htseq_count_to_h5mu/htseq_count_to_h5mu @@ -0,0 +1,1151 @@ +#!/usr/bin/env bash + +# htseq_count_to_h5mu 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Robrecht Cannoodt (author, maintainer) +# * Angela Oliveira Pisco (author) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="htseq_count_to_h5mu" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "htseq_count_to_h5mu 0.12.4" + echo "" + echo "Convert the htseq table to a h5mu." + echo "" + echo "Input:" + echo " --input_id" + echo " type: string, required parameter, multiple values allowed" + echo " example: foo" + echo " The obs index for the counts" + echo "" + echo " --input_counts" + echo " type: file, required parameter, multiple values allowed, file must exist" + echo " example: counts.tsv" + echo " The counts as a TSV file as output by HTSeq." + echo "" + echo " --reference" + echo " type: file, required parameter, file must exist" + echo " example: gencode_v41_star" + echo " The GTF file." + echo "" + echo "Outputs:" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " example: output.h5mu" + echo " Output h5mu file." + echo "" + echo " --output_compression" + echo " type: string" + echo " example: gzip" + echo " choices: [ gzip, lzf ]" + echo " The compression format to be used on the output h5mu object." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM python:3.10-slim + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "gtfparse" "polars[pyarrow] < 0.16.14" + +LABEL org.opencontainers.image.authors="Robrecht Cannoodt, Angela Oliveira Pisco" +LABEL org.opencontainers.image.description="Companion container for running component mapping htseq_count_to_h5mu" +LABEL org.opencontainers.image.created="2024-01-31T09:08:36Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-htseq_count_to_h5mu-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "htseq_count_to_h5mu 0.12.4" + exit + ;; + --input_id) + if [ -z "$VIASH_PAR_INPUT_ID" ]; then + VIASH_PAR_INPUT_ID="$2" + else + VIASH_PAR_INPUT_ID="$VIASH_PAR_INPUT_ID;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_id. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input_id=*) + if [ -z "$VIASH_PAR_INPUT_ID" ]; then + VIASH_PAR_INPUT_ID=$(ViashRemoveFlags "$1") + else + VIASH_PAR_INPUT_ID="$VIASH_PAR_INPUT_ID;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --input_counts) + if [ -z "$VIASH_PAR_INPUT_COUNTS" ]; then + VIASH_PAR_INPUT_COUNTS="$2" + else + VIASH_PAR_INPUT_COUNTS="$VIASH_PAR_INPUT_COUNTS;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_counts. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input_counts=*) + if [ -z "$VIASH_PAR_INPUT_COUNTS" ]; then + VIASH_PAR_INPUT_COUNTS=$(ViashRemoveFlags "$1") + else + VIASH_PAR_INPUT_COUNTS="$VIASH_PAR_INPUT_COUNTS;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --reference) + [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reference=*) + [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference=*\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/mapping_htseq_count_to_h5mu:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/mapping_htseq_count_to_h5mu:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/mapping_htseq_count_to_h5mu:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/mapping_htseq_count_to_h5mu:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT_ID+x} ]; then + ViashError '--input_id' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_INPUT_COUNTS+x} ]; then + ViashError '--input_counts' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_REFERENCE+x} ]; then + ViashError '--reference' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT_COUNTS" ]; then + IFS=';' + set -f + for file in $VIASH_PAR_INPUT_COUNTS; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi +if [ ! -z "$VIASH_PAR_REFERENCE" ] && [ ! -e "$VIASH_PAR_REFERENCE" ]; then + ViashError "Input file '$VIASH_PAR_REFERENCE' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT_COUNTS" ]; then + VIASH_TEST_INPUT_COUNTS=() + IFS=';' + for var in $VIASH_PAR_INPUT_COUNTS; do + unset IFS + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$var")" ) + var=$(ViashAutodetectMount "$var") + VIASH_TEST_INPUT_COUNTS+=( "$var" ) + done + VIASH_PAR_INPUT_COUNTS=$(IFS=';' ; echo "${VIASH_TEST_INPUT_COUNTS[*]}") +fi +if [ ! -z "$VIASH_PAR_REFERENCE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_REFERENCE")" ) + VIASH_PAR_REFERENCE=$(ViashAutodetectMount "$VIASH_PAR_REFERENCE") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/mapping_htseq_count_to_h5mu:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/mapping_htseq_count_to_h5mu:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/mapping_htseq_count_to_h5mu:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-htseq_count_to_h5mu-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +import tempfile +from pathlib import Path +import tarfile +import gzip +import shutil +import pandas as pd +import mudata as md +import anndata as ad +import polars as pl +import numpy as np +import gtfparse + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input_id': $( if [ ! -z ${VIASH_PAR_INPUT_ID+x} ]; then echo "r'${VIASH_PAR_INPUT_ID//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'input_counts': $( if [ ! -z ${VIASH_PAR_INPUT_COUNTS+x} ]; then echo "r'${VIASH_PAR_INPUT_COUNTS//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'reference': $( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "r'${VIASH_PAR_REFERENCE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +## VIASH END + +######################## +### Helper functions ### +######################## + +# helper function for cheching whether something is a gzip +def is_gz_file(path: Path) -> bool: + with open(path, 'rb') as file: + return file.read(2) == b'\\x1f\\x8b' + +# if {par_value} is a Path, extract it to a temp_dir_path and return the resulting path +def extract_if_need_be(par_value: Path, temp_dir_path: Path) -> Path: + if par_value.is_file() and tarfile.is_tarfile(par_value): + # Remove two extensions (if they exist) + extaction_dir_name = Path(par_value.stem).stem + unpacked_path = temp_dir_path / extaction_dir_name + print(f' Tar detected; extracting {par_value} to {unpacked_path}', flush=True) + + with tarfile.open(par_value, 'r') as open_tar: + members = open_tar.getmembers() + root_dirs = [member + for member in members + if member.isdir() and member.name != '.' and '/' not in member.name] + # if there is only one root_dir (and there are files in that directory) + # strip that directory name from the destination folder + if len(root_dirs) == 1: + for mem in members: + mem.path = Path(*Path(mem.path).parts[1:]) + members_to_move = [mem for mem in members if mem.path != Path('.')] + open_tar.extractall(unpacked_path, members=members_to_move) + return unpacked_path + + elif par_value.is_file() and is_gz_file(par_value): + # Remove extension (if it exists) + extaction_file_name = Path(par_value.stem) + unpacked_path = temp_dir_path / extaction_file_name + print(f' Gzip detected; extracting {par_value} to {unpacked_path}', flush=True) + + with gzip.open(par_value, 'rb') as f_in: + with open(unpacked_path, 'wb') as f_out: + shutil.copyfileobj(f_in, f_out) + return unpacked_path + + else: + return par_value + + +print("> combine counts data", flush=True) +counts_data = [] + +for input_id, input_counts in zip(par["input_id"], par["input_counts"]): + data = pd.read_table(input_counts, index_col=0, names=["gene_ids", input_id], dtype={'gene_ids': 'U', input_id: 'i'}).transpose() + counts_data.append(data) + +# combine all counts +counts_and_qc = pd.concat(counts_data, axis=0) + +print("> split qc", flush=True) +idx = counts_and_qc.columns.str.startswith("_") +qc = counts_and_qc.loc[:,idx] +qc.columns = qc.columns.str.replace("^__", "", regex=True) +counts = counts_and_qc.loc[:,~idx] + +print("> construct var", flush=True) +with tempfile.TemporaryDirectory(prefix="htseq-", dir=meta["temp_dir"]) as temp_dir: + # checking for compressed files, ungzip files if need be + temp_dir_path = Path(temp_dir) + reference = Path(par["reference"]) + + print(f'>> Check compression of --reference with value: {reference}', flush=True) + par["reference"] = extract_if_need_be(reference, temp_dir_path) + + # read_gtf only works on str object, not pathlib.Path + reference = gtfparse.read_gtf(str(par["reference"])) + + +# This is a polars dataframe, not pandas +reference_genes = reference.filter((pl.col("feature") == "gene") & + (pl.col("gene_id").is_in(list(counts.columns))))\\ + .sort("gene_id") + +var = pd.DataFrame( + data={ + "gene_ids": pd.Index(reference_genes.get_column("gene_id")), + "feature_types": "Gene Expression", + "gene_symbol": reference_genes.get_column("gene_name").to_pandas(), + } +).set_index("gene_ids") + +print("> construct anndata", flush=True) +adata = ad.AnnData( + X=counts, + obsm={"qc_htseq": qc}, + var=var, + dtype=np.int32 +) + +print("> convert to mudata", flush=True) +mdata = md.MuData(adata) + +print("> write to file", flush=True) +mdata.write_h5mu(par["output"], compression=par["output_compression"]) +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT_COUNTS" ]; then + unset VIASH_TEST_INPUT_COUNTS + IFS=';' + for var in $VIASH_PAR_INPUT_COUNTS; do + unset IFS + if [ -z "$VIASH_TEST_INPUT_COUNTS" ]; then + VIASH_TEST_INPUT_COUNTS="$(ViashStripAutomount "$var")" + else + VIASH_TEST_INPUT_COUNTS="$VIASH_TEST_INPUT_COUNTS;""$(ViashStripAutomount "$var")" + fi + done + VIASH_PAR_INPUT_COUNTS="$VIASH_TEST_INPUT_COUNTS" +fi +if [ ! -z "$VIASH_PAR_REFERENCE" ]; then + VIASH_PAR_REFERENCE=$(ViashStripAutomount "$VIASH_PAR_REFERENCE") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/mapping/multi_star/.config.vsh.yaml b/target/docker/mapping/multi_star/.config.vsh.yaml new file mode 100644 index 00000000000..8c53bc3ffcd --- /dev/null +++ b/target/docker/mapping/multi_star/.config.vsh.yaml @@ -0,0 +1,3080 @@ +functionality: + name: "multi_star" + namespace: "mapping" + version: "0.12.4" + authors: + - name: "Angela Oliveira Pisco" + roles: + - "author" + info: + role: "Contributor" + links: + github: "aopisco" + orcid: "0000-0003-0142-2355" + linkedin: "aopisco" + organizations: + - name: "Insitro" + href: "https://insitro.com" + role: "Director of Computational Biology" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + - name: "Robrecht Cannoodt" + roles: + - "author" + - "maintainer" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + argument_groups: + - name: "Input/Output" + arguments: + - type: "string" + name: "--input_id" + description: "The ID of the sample being processed. This vector should have\ + \ the same length as the `--input_r1` argument." + info: null + example: + - "mysample" + - "mysample" + required: true + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "file" + name: "--input_r1" + description: "Paths to the sequences to be mapped. If using Illumina paired-end\ + \ reads, only the R1 files should be passed." + info: null + example: + - "mysample_S1_L001_R1_001.fastq.gz" + - "mysample_S1_L002_R1_001.fastq.gz" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "file" + name: "--input_r2" + description: "Paths to the sequences to be mapped. If using Illumina paired-end\ + \ reads, only the R2 files should be passed." + info: null + example: + - "mysample_S1_L001_R2_001.fastq.gz" + - "mysample_S1_L002_R2_001.fastq.gz" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "file" + name: "--reference_index" + alternatives: + - "--genomeDir" + description: "Path to the reference built by star_build_reference. Corresponds\ + \ to the --genomeDir argument in the STAR command." + info: null + example: + - "/path/to/reference" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--reference_gtf" + description: "Path to the gtf reference file." + info: null + example: + - "genes.gtf" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "--outFileNamePrefix" + description: "Path to output directory. Corresponds to the --outFileNamePrefix\ + \ argument in the STAR command." + info: null + example: + - "/path/to/foo" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Processing arguments" + arguments: + - type: "boolean" + name: "--run_htseq_count" + description: "Whether or not to also run htseq-count after STAR." + info: null + default: + - true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean" + name: "--run_multiqc" + description: "Whether or not to also run MultiQC at the end." + info: null + default: + - true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--min_success_rate" + description: "Fail when the success rate is below this threshold." + info: null + default: + - 0.5 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Run Parameters" + arguments: + - type: "integer" + name: "--runRNGseed" + description: "random number generator seed." + info: + step: "star" + orig_arg: "--runRNGseed" + example: + - 777 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Genome Parameters" + arguments: + - type: "file" + name: "--genomeFastaFiles" + description: "path(s) to the fasta files with the genome sequences, separated\ + \ by spaces. These files should be plain text FASTA files, they *cannot* be\ + \ zipped.\n\nRequired for the genome generation (--runMode genomeGenerate).\ + \ Can also be used in the mapping (--runMode alignReads) to add extra (new)\ + \ sequences to the genome (e.g. spike-ins)." + info: + step: "star" + orig_arg: "--genomeFastaFiles" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - name: "Splice Junctions Database" + arguments: + - type: "string" + name: "--sjdbFileChrStartEnd" + description: "path to the files with genomic coordinates (chr start \ + \ end strand) for the splice junction introns. Multiple files can be\ + \ supplied and will be concatenated." + info: + step: "star" + orig_arg: "--sjdbFileChrStartEnd" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "file" + name: "--sjdbGTFfile" + description: "path to the GTF file with annotations" + info: + step: "star" + orig_arg: "--sjdbGTFfile" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--sjdbGTFchrPrefix" + description: "prefix for chromosome names in a GTF file (e.g. 'chr' for using\ + \ ENSMEBL annotations with UCSC genomes)" + info: + step: "star" + orig_arg: "--sjdbGTFchrPrefix" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--sjdbGTFfeatureExon" + description: "feature type in GTF file to be used as exons for building transcripts" + info: + step: "star" + orig_arg: "--sjdbGTFfeatureExon" + example: + - "exon" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--sjdbGTFtagExonParentTranscript" + description: "GTF attribute name for parent transcript ID (default \"transcript_id\"\ + \ works for GTF files)" + info: + step: "star" + orig_arg: "--sjdbGTFtagExonParentTranscript" + example: + - "transcript_id" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--sjdbGTFtagExonParentGene" + description: "GTF attribute name for parent gene ID (default \"gene_id\" works\ + \ for GTF files)" + info: + step: "star" + orig_arg: "--sjdbGTFtagExonParentGene" + example: + - "gene_id" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--sjdbGTFtagExonParentGeneName" + description: "GTF attribute name for parent gene name" + info: + step: "star" + orig_arg: "--sjdbGTFtagExonParentGeneName" + example: + - "gene_name" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--sjdbGTFtagExonParentGeneType" + description: "GTF attribute name for parent gene type" + info: + step: "star" + orig_arg: "--sjdbGTFtagExonParentGeneType" + example: + - "gene_type" + - "gene_biotype" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--sjdbOverhang" + description: "length of the donor/acceptor sequence on each side of the junctions,\ + \ ideally = (mate_length - 1)" + info: + step: "star" + orig_arg: "--sjdbOverhang" + example: + - 100 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--sjdbScore" + description: "extra alignment score for alignments that cross database junctions" + info: + step: "star" + orig_arg: "--sjdbScore" + example: + - 2 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--sjdbInsertSave" + description: "which files to save when sjdb junctions are inserted on the fly\ + \ at the mapping step\n\n- Basic ... only small junction / transcript files\n\ + - All ... all files including big Genome, SA and SAindex - this will create\ + \ a complete genome directory" + info: + step: "star" + orig_arg: "--sjdbInsertSave" + example: + - "Basic" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Variation parameters" + arguments: + - type: "string" + name: "--varVCFfile" + description: "path to the VCF file that contains variation data. The 10th column\ + \ should contain the genotype information, e.g. 0/1" + info: + step: "star" + orig_arg: "--varVCFfile" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Read Parameters" + arguments: + - type: "string" + name: "--readFilesType" + description: "format of input read files\n\n- Fastx ... FASTA or FASTQ\n\ + - SAM SE ... SAM or BAM single-end reads; for BAM use --readFilesCommand\ + \ samtools view\n- SAM PE ... SAM or BAM paired-end reads; for BAM use\ + \ --readFilesCommand samtools view" + info: + step: "star" + orig_arg: "--readFilesType" + example: + - "Fastx" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--readFilesSAMattrKeep" + description: "for --readFilesType SAM SE/PE, which SAM tags to keep in the output\ + \ BAM, e.g.: --readFilesSAMtagsKeep RG PL\n\n- All ... keep all tags\n\ + - None ... do not keep any tags" + info: + step: "star" + orig_arg: "--readFilesSAMattrKeep" + example: + - "All" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "file" + name: "--readFilesManifest" + description: "path to the \"manifest\" file with the names of read files. The\ + \ manifest file should contain 3 tab-separated columns:\n\npaired-end reads:\ + \ read1_file_name $tab$ read2_file_name $tab$ read_group_line.\nsingle-end\ + \ reads: read1_file_name $tab$ - $tab$ read_group_line.\nSpaces,\ + \ but not tabs are allowed in file names.\nIf read_group_line does not start\ + \ with ID:, it can only contain one ID field, and ID: will be added to it.\n\ + If read_group_line starts with ID:, it can contain several fields separated\ + \ by $tab$, and all fields will be be copied verbatim into SAM @RG header\ + \ line." + info: + step: "star" + orig_arg: "--readFilesManifest" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--readFilesPrefix" + description: "prefix for the read files names, i.e. it will be added in front\ + \ of the strings in --readFilesIn" + info: + step: "star" + orig_arg: "--readFilesPrefix" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--readFilesCommand" + description: "command line to execute for each of the input file. This command\ + \ should generate FASTA or FASTQ text and send it to stdout\n\nFor example:\ + \ zcat - to uncompress .gz files, bzcat - to uncompress .bz2 files, etc." + info: + step: "star" + orig_arg: "--readFilesCommand" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--readMapNumber" + description: "number of reads to map from the beginning of the file\n\n-1: map\ + \ all reads" + info: + step: "star" + orig_arg: "--readMapNumber" + example: + - -1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--readMatesLengthsIn" + description: "Equal/NotEqual - lengths of names,sequences,qualities for both\ + \ mates are the same / not the same. NotEqual is safe in all situations." + info: + step: "star" + orig_arg: "--readMatesLengthsIn" + example: + - "NotEqual" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--readNameSeparator" + description: "character(s) separating the part of the read names that will be\ + \ trimmed in output (read name after space is always trimmed)" + info: + step: "star" + orig_arg: "--readNameSeparator" + example: + - "/" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--readQualityScoreBase" + description: "number to be subtracted from the ASCII code to get Phred quality\ + \ score" + info: + step: "star" + orig_arg: "--readQualityScoreBase" + example: + - 33 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Read Clipping" + arguments: + - type: "string" + name: "--clipAdapterType" + description: "adapter clipping type\n\n- Hamming ... adapter clipping based\ + \ on Hamming distance, with the number of mismatches controlled by --clip5pAdapterMMp\n\ + - CellRanger4 ... 5p and 3p adapter clipping similar to CellRanger4. Utilizes\ + \ Opal package by Martin Sosic: https://github.com/Martinsos/opal\n- None\ + \ ... no adapter clipping, all other clip* parameters are disregarded" + info: + step: "star" + orig_arg: "--clipAdapterType" + example: + - "Hamming" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--clip3pNbases" + description: "number(s) of bases to clip from 3p of each mate. If one value\ + \ is given, it will be assumed the same for both mates." + info: + step: "star" + orig_arg: "--clip3pNbases" + example: + - 0 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--clip3pAdapterSeq" + description: "adapter sequences to clip from 3p of each mate. If one value\ + \ is given, it will be assumed the same for both mates.\n\n- polyA ... polyA\ + \ sequence with the length equal to read length" + info: + step: "star" + orig_arg: "--clip3pAdapterSeq" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "double" + name: "--clip3pAdapterMMp" + description: "max proportion of mismatches for 3p adapter clipping for each\ + \ mate. If one value is given, it will be assumed the same for both mates." + info: + step: "star" + orig_arg: "--clip3pAdapterMMp" + example: + - 0.1 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--clip3pAfterAdapterNbases" + description: "number of bases to clip from 3p of each mate after the adapter\ + \ clipping. If one value is given, it will be assumed the same for both mates." + info: + step: "star" + orig_arg: "--clip3pAfterAdapterNbases" + example: + - 0 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--clip5pNbases" + description: "number(s) of bases to clip from 5p of each mate. If one value\ + \ is given, it will be assumed the same for both mates." + info: + step: "star" + orig_arg: "--clip5pNbases" + example: + - 0 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - name: "Limits" + arguments: + - type: "long" + name: "--limitGenomeGenerateRAM" + description: "maximum available RAM (bytes) for genome generation" + info: + step: "star" + orig_arg: "--limitGenomeGenerateRAM" + example: + - 31000000000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "long" + name: "--limitIObufferSize" + description: "max available buffers size (bytes) for input/output, per thread" + info: + step: "star" + orig_arg: "--limitIObufferSize" + example: + - 30000000 + - 50000000 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "long" + name: "--limitOutSAMoneReadBytes" + description: "max size of the SAM record (bytes) for one read. Recommended value:\ + \ >(2*(LengthMate1+LengthMate2+100)*outFilterMultimapNmax" + info: + step: "star" + orig_arg: "--limitOutSAMoneReadBytes" + example: + - 100000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--limitOutSJoneRead" + description: "max number of junctions for one read (including all multi-mappers)" + info: + step: "star" + orig_arg: "--limitOutSJoneRead" + example: + - 1000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--limitOutSJcollapsed" + description: "max number of collapsed junctions" + info: + step: "star" + orig_arg: "--limitOutSJcollapsed" + example: + - 1000000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "long" + name: "--limitBAMsortRAM" + description: "maximum available RAM (bytes) for sorting BAM. If =0, it will\ + \ be set to the genome index size. 0 value can only be used with --genomeLoad\ + \ NoSharedMemory option." + info: + step: "star" + orig_arg: "--limitBAMsortRAM" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--limitSjdbInsertNsj" + description: "maximum number of junctions to be inserted to the genome on the\ + \ fly at the mapping stage, including those from annotations and those detected\ + \ in the 1st step of the 2-pass run" + info: + step: "star" + orig_arg: "--limitSjdbInsertNsj" + example: + - 1000000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--limitNreadsSoft" + description: "soft limit on the number of reads" + info: + step: "star" + orig_arg: "--limitNreadsSoft" + example: + - -1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Output: general" + arguments: + - type: "string" + name: "--outTmpKeep" + description: "whether to keep the temporary files after STAR runs is finished\n\ + \n- None ... remove all temporary files\n- All ... keep all files" + info: + step: "star" + orig_arg: "--outTmpKeep" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outStd" + description: "which output will be directed to stdout (standard out)\n\n- Log\ + \ ... log messages\n- SAM ... alignments\ + \ in SAM format (which normally are output to Aligned.out.sam file), normal\ + \ standard output will go into Log.std.out\n- BAM_Unsorted ... alignments\ + \ in BAM format, unsorted. Requires --outSAMtype BAM Unsorted\n- BAM_SortedByCoordinate\ + \ ... alignments in BAM format, sorted by coordinate. Requires --outSAMtype\ + \ BAM SortedByCoordinate\n- BAM_Quant ... alignments to transcriptome\ + \ in BAM format, unsorted. Requires --quantMode TranscriptomeSAM" + info: + step: "star" + orig_arg: "--outStd" + example: + - "Log" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outReadsUnmapped" + description: "output of unmapped and partially mapped (i.e. mapped only one\ + \ mate of a paired end read) reads in separate file(s).\n\n- None ... no\ + \ output\n- Fastx ... output in separate fasta/fastq files, Unmapped.out.mate1/2" + info: + step: "star" + orig_arg: "--outReadsUnmapped" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outQSconversionAdd" + description: "add this number to the quality score (e.g. to convert from Illumina\ + \ to Sanger, use -31)" + info: + step: "star" + orig_arg: "--outQSconversionAdd" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outMultimapperOrder" + description: "order of multimapping alignments in the output files\n\n- Old_2.4\ + \ ... quasi-random order used before 2.5.0\n- Random \ + \ ... random order of alignments for each multi-mapper. Read mates (pairs)\ + \ are always adjacent, all alignment for each read stay together. This option\ + \ will become default in the future releases." + info: + step: "star" + orig_arg: "--outMultimapperOrder" + example: + - "Old_2.4" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Output: SAM and BAM" + arguments: + - type: "string" + name: "--outSAMmode" + description: "mode of SAM output\n\n- None ... no SAM output\n- Full ... full\ + \ SAM output\n- NoQS ... full SAM but without quality scores" + info: + step: "star" + orig_arg: "--outSAMmode" + example: + - "Full" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outSAMstrandField" + description: "Cufflinks-like strand field flag\n\n- None ... not used\n\ + - intronMotif ... strand derived from the intron motif. This option changes\ + \ the output alignments: reads with inconsistent and/or non-canonical introns\ + \ are filtered out." + info: + step: "star" + orig_arg: "--outSAMstrandField" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outSAMattributes" + description: "a string of desired SAM attributes, in the order desired for the\ + \ output SAM. Tags can be listed in any combination/order.\n\n***Presets:\n\ + - None ... no attributes\n- Standard ... NH HI AS nM\n- All \ + \ ... NH HI AS nM NM MD jM jI MC ch\n***Alignment:\n- NH ...\ + \ number of loci the reads maps to: =1 for unique mappers, >1 for multimappers.\ + \ Standard SAM tag.\n- HI ... multiple alignment index, starts with\ + \ --outSAMattrIHstart (=1 by default). Standard SAM tag.\n- AS ...\ + \ local alignment score, +1/-1 for matches/mismateches, score* penalties for\ + \ indels and gaps. For PE reads, total score for two mates. Stadnard SAM tag.\n\ + - nM ... number of mismatches. For PE reads, sum over two mates.\n\ + - NM ... edit distance to the reference (number of mismatched + inserted\ + \ + deleted bases) for each mate. Standard SAM tag.\n- MD ... string\ + \ encoding mismatched and deleted reference bases (see standard SAM specifications).\ + \ Standard SAM tag.\n- jM ... intron motifs for all junctions (i.e.\ + \ N in CIGAR): 0: non-canonical; 1: GT/AG, 2: CT/AC, 3: GC/AG, 4: CT/GC, 5:\ + \ AT/AC, 6: GT/AT. If splice junctions database is used, and a junction is\ + \ annotated, 20 is added to its motif value.\n- jI ... start and\ + \ end of introns for all junctions (1-based).\n- XS ... alignment\ + \ strand according to --outSAMstrandField.\n- MC ... mate's CIGAR\ + \ string. Standard SAM tag.\n- ch ... marks all segment of all chimeric\ + \ alingments for --chimOutType WithinBAM output.\n- cN ... number\ + \ of bases clipped from the read ends: 5' and 3'\n***Variation:\n- vA \ + \ ... variant allele\n- vG ... genomic coordinate of the variant\ + \ overlapped by the read.\n- vW ... 1 - alignment passes WASP filtering;\ + \ 2,3,4,5,6,7 - alignment does not pass WASP filtering. Requires --waspOutputMode\ + \ SAMtag.\n***STARsolo:\n- CR CY UR UY ... sequences and quality scores of\ + \ cell barcodes and UMIs for the solo* demultiplexing.\n- GX GN ...\ + \ gene ID and gene name for unique-gene reads.\n- gx gn ... gene IDs\ + \ and gene names for unique- and multi-gene reads.\n- CB UB ... error-corrected\ + \ cell barcodes and UMIs for solo* demultiplexing. Requires --outSAMtype BAM\ + \ SortedByCoordinate.\n- sM ... assessment of CB and UMI.\n- sS \ + \ ... sequence of the entire barcode (CB,UMI,adapter).\n- sQ \ + \ ... quality of the entire barcode.\n***Unsupported/undocumented:\n-\ + \ ha ... haplotype (1/2) when mapping to the diploid genome. Requires\ + \ genome generated with --genomeTransformType Diploid .\n- rB ...\ + \ alignment block read/genomic coordinates.\n- vR ... read coordinate\ + \ of the variant." + info: + step: "star" + orig_arg: "--outSAMattributes" + example: + - "Standard" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--outSAMattrIHstart" + description: "start value for the IH attribute. 0 may be required by some downstream\ + \ software, such as Cufflinks or StringTie." + info: + step: "star" + orig_arg: "--outSAMattrIHstart" + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outSAMunmapped" + description: "output of unmapped reads in the SAM format\n\n1st word:\n- None\ + \ ... no output\n- Within ... output unmapped reads within the main SAM\ + \ file (i.e. Aligned.out.sam)\n2nd word:\n- KeepPairs ... record unmapped\ + \ mate for each alignment, and, in case of unsorted output, keep it adjacent\ + \ to its mapped mate. Only affects multi-mapping reads." + info: + step: "star" + orig_arg: "--outSAMunmapped" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--outSAMorder" + description: "type of sorting for the SAM output\n\nPaired: one mate after the\ + \ other for all paired alignments\nPairedKeepInputOrder: one mate after the\ + \ other for all paired alignments, the order is kept the same as in the input\ + \ FASTQ files" + info: + step: "star" + orig_arg: "--outSAMorder" + example: + - "Paired" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outSAMprimaryFlag" + description: "which alignments are considered primary - all others will be marked\ + \ with 0x100 bit in the FLAG\n\n- OneBestScore ... only one alignment with\ + \ the best score is primary\n- AllBestScore ... all alignments with the best\ + \ score are primary" + info: + step: "star" + orig_arg: "--outSAMprimaryFlag" + example: + - "OneBestScore" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outSAMreadID" + description: "read ID record type\n\n- Standard ... first word (until space)\ + \ from the FASTx read ID line, removing /1,/2 from the end\n- Number ...\ + \ read number (index) in the FASTx file" + info: + step: "star" + orig_arg: "--outSAMreadID" + example: + - "Standard" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outSAMmapqUnique" + description: "0 to 255: the MAPQ value for unique mappers" + info: + step: "star" + orig_arg: "--outSAMmapqUnique" + example: + - 255 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outSAMflagOR" + description: "0 to 65535: sam FLAG will be bitwise OR'd with this value, i.e.\ + \ FLAG=FLAG | outSAMflagOR. This is applied after all flags have been set\ + \ by STAR, and after outSAMflagAND. Can be used to set specific bits that\ + \ are not set otherwise." + info: + step: "star" + orig_arg: "--outSAMflagOR" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outSAMflagAND" + description: "0 to 65535: sam FLAG will be bitwise AND'd with this value, i.e.\ + \ FLAG=FLAG & outSAMflagOR. This is applied after all flags have been set\ + \ by STAR, but before outSAMflagOR. Can be used to unset specific bits that\ + \ are not set otherwise." + info: + step: "star" + orig_arg: "--outSAMflagAND" + example: + - 65535 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outSAMattrRGline" + description: "SAM/BAM read group line. The first word contains the read group\ + \ identifier and must start with \"ID:\", e.g. --outSAMattrRGline ID:xxx CN:yy\ + \ \"DS:z z z\".\n\nxxx will be added as RG tag to each output alignment. Any\ + \ spaces in the tag values have to be double quoted.\nComma separated RG lines\ + \ correspons to different (comma separated) input files in --readFilesIn.\ + \ Commas have to be surrounded by spaces, e.g.\n--outSAMattrRGline ID:xxx\ + \ , ID:zzz \"DS:z z\" , ID:yyy DS:yyyy" + info: + step: "star" + orig_arg: "--outSAMattrRGline" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--outSAMheaderHD" + description: "@HD (header) line of the SAM header" + info: + step: "star" + orig_arg: "--outSAMheaderHD" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--outSAMheaderPG" + description: "extra @PG (software) line of the SAM header (in addition to STAR)" + info: + step: "star" + orig_arg: "--outSAMheaderPG" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--outSAMheaderCommentFile" + description: "path to the file with @CO (comment) lines of the SAM header" + info: + step: "star" + orig_arg: "--outSAMheaderCommentFile" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outSAMfilter" + description: "filter the output into main SAM/BAM files\n\n- KeepOnlyAddedReferences\ + \ ... only keep the reads for which all alignments are to the extra reference\ + \ sequences added with --genomeFastaFiles at the mapping stage.\n- KeepAllAddedReferences\ + \ ... keep all alignments to the extra reference sequences added with --genomeFastaFiles\ + \ at the mapping stage." + info: + step: "star" + orig_arg: "--outSAMfilter" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--outSAMmultNmax" + description: "max number of multiple alignments for a read that will be output\ + \ to the SAM/BAM files. Note that if this value is not equal to -1, the top\ + \ scoring alignment will be output first\n\n- -1 ... all alignments (up to\ + \ --outFilterMultimapNmax) will be output" + info: + step: "star" + orig_arg: "--outSAMmultNmax" + example: + - -1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outSAMtlen" + description: "calculation method for the TLEN field in the SAM/BAM files\n\n\ + - 1 ... leftmost base of the (+)strand mate to rightmost base of the (-)mate.\ + \ (+)sign for the (+)strand mate\n- 2 ... leftmost base of any mate to rightmost\ + \ base of any mate. (+)sign for the mate with the leftmost base. This is different\ + \ from 1 for overlapping mates with protruding ends" + info: + step: "star" + orig_arg: "--outSAMtlen" + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outBAMcompression" + description: "-1 to 10 BAM compression level, -1=default compression (6?),\ + \ 0=no compression, 10=maximum compression" + info: + step: "star" + orig_arg: "--outBAMcompression" + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outBAMsortingThreadN" + description: ">=0: number of threads for BAM sorting. 0 will default to min(6,--runThreadN)." + info: + step: "star" + orig_arg: "--outBAMsortingThreadN" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outBAMsortingBinsN" + description: ">0: number of genome bins for coordinate-sorting" + info: + step: "star" + orig_arg: "--outBAMsortingBinsN" + example: + - 50 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "BAM processing" + arguments: + - type: "string" + name: "--bamRemoveDuplicatesType" + description: "mark duplicates in the BAM file, for now only works with (i) sorted\ + \ BAM fed with inputBAMfile, and (ii) for paired-end alignments only\n\n-\ + \ - ... no duplicate removal/marking\n- UniqueIdentical\ + \ ... mark all multimappers, and duplicate unique mappers. The coordinates,\ + \ FLAG, CIGAR must be identical\n- UniqueIdenticalNotMulti ... mark duplicate\ + \ unique mappers but not multimappers." + info: + step: "star" + orig_arg: "--bamRemoveDuplicatesType" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--bamRemoveDuplicatesMate2basesN" + description: "number of bases from the 5' of mate 2 to use in collapsing (e.g.\ + \ for RAMPAGE)" + info: + step: "star" + orig_arg: "--bamRemoveDuplicatesMate2basesN" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Output Wiggle" + arguments: + - type: "string" + name: "--outWigType" + description: "type of signal output, e.g. \"bedGraph\" OR \"bedGraph read1_5p\"\ + . Requires sorted BAM: --outSAMtype BAM SortedByCoordinate .\n\n1st word:\n\ + - None ... no signal output\n- bedGraph ... bedGraph format\n- wiggle\ + \ ... wiggle format\n2nd word:\n- read1_5p ... signal from only 5' of\ + \ the 1st read, useful for CAGE/RAMPAGE etc\n- read2 ... signal from\ + \ only 2nd read" + info: + step: "star" + orig_arg: "--outWigType" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--outWigStrand" + description: "strandedness of wiggle/bedGraph output\n\n- Stranded ... separate\ + \ strands, str1 and str2\n- Unstranded ... collapsed strands" + info: + step: "star" + orig_arg: "--outWigStrand" + example: + - "Stranded" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outWigReferencesPrefix" + description: "prefix matching reference names to include in the output wiggle\ + \ file, e.g. \"chr\", default \"-\" - include all references" + info: + step: "star" + orig_arg: "--outWigReferencesPrefix" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outWigNorm" + description: "type of normalization for the signal\n\n- RPM ... reads per\ + \ million of mapped reads\n- None ... no normalization, \"raw\" counts" + info: + step: "star" + orig_arg: "--outWigNorm" + example: + - "RPM" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Output Filtering" + arguments: + - type: "string" + name: "--outFilterType" + description: "type of filtering\n\n- Normal ... standard filtering using only\ + \ current alignment\n- BySJout ... keep only those reads that contain junctions\ + \ that passed filtering into SJ.out.tab" + info: + step: "star" + orig_arg: "--outFilterType" + example: + - "Normal" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outFilterMultimapScoreRange" + description: "the score range below the maximum score for multimapping alignments" + info: + step: "star" + orig_arg: "--outFilterMultimapScoreRange" + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outFilterMultimapNmax" + description: "maximum number of loci the read is allowed to map to. Alignments\ + \ (all of them) will be output only if the read maps to no more loci than\ + \ this value.\n\nOtherwise no alignments will be output, and the read will\ + \ be counted as \"mapped to too many loci\" in the Log.final.out ." + info: + step: "star" + orig_arg: "--outFilterMultimapNmax" + example: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outFilterMismatchNmax" + description: "alignment will be output only if it has no more mismatches than\ + \ this value." + info: + step: "star" + orig_arg: "--outFilterMismatchNmax" + example: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--outFilterMismatchNoverLmax" + description: "alignment will be output only if its ratio of mismatches to *mapped*\ + \ length is less than or equal to this value." + info: + step: "star" + orig_arg: "--outFilterMismatchNoverLmax" + example: + - 0.3 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--outFilterMismatchNoverReadLmax" + description: "alignment will be output only if its ratio of mismatches to *read*\ + \ length is less than or equal to this value." + info: + step: "star" + orig_arg: "--outFilterMismatchNoverReadLmax" + example: + - 1.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outFilterScoreMin" + description: "alignment will be output only if its score is higher than or equal\ + \ to this value." + info: + step: "star" + orig_arg: "--outFilterScoreMin" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--outFilterScoreMinOverLread" + description: "same as outFilterScoreMin, but normalized to read length (sum\ + \ of mates' lengths for paired-end reads)" + info: + step: "star" + orig_arg: "--outFilterScoreMinOverLread" + example: + - 0.66 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outFilterMatchNmin" + description: "alignment will be output only if the number of matched bases is\ + \ higher than or equal to this value." + info: + step: "star" + orig_arg: "--outFilterMatchNmin" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--outFilterMatchNminOverLread" + description: "sam as outFilterMatchNmin, but normalized to the read length (sum\ + \ of mates' lengths for paired-end reads)." + info: + step: "star" + orig_arg: "--outFilterMatchNminOverLread" + example: + - 0.66 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outFilterIntronMotifs" + description: "filter alignment using their motifs\n\n- None \ + \ ... no filtering\n- RemoveNoncanonical ... filter\ + \ out alignments that contain non-canonical junctions\n- RemoveNoncanonicalUnannotated\ + \ ... filter out alignments that contain non-canonical unannotated junctions\ + \ when using annotated splice junctions database. The annotated non-canonical\ + \ junctions will be kept." + info: + step: "star" + orig_arg: "--outFilterIntronMotifs" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outFilterIntronStrands" + description: "filter alignments\n\n- RemoveInconsistentStrands ... remove\ + \ alignments that have junctions with inconsistent strands\n- None \ + \ ... no filtering" + info: + step: "star" + orig_arg: "--outFilterIntronStrands" + example: + - "RemoveInconsistentStrands" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Output splice junctions (SJ.out.tab)" + arguments: + - type: "string" + name: "--outSJtype" + description: "type of splice junction output\n\n- Standard ... standard SJ.out.tab\ + \ output\n- None ... no splice junction output" + info: + step: "star" + orig_arg: "--outSJtype" + example: + - "Standard" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Output Filtering: Splice Junctions" + arguments: + - type: "string" + name: "--outSJfilterReads" + description: "which reads to consider for collapsed splice junctions output\n\ + \n- All ... all reads, unique- and multi-mappers\n- Unique ... uniquely\ + \ mapping reads only" + info: + step: "star" + orig_arg: "--outSJfilterReads" + example: + - "All" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outSJfilterOverhangMin" + description: "minimum overhang length for splice junctions on both sides for:\ + \ (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC\ + \ motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\n\ + does not apply to annotated junctions" + info: + step: "star" + orig_arg: "--outSJfilterOverhangMin" + example: + - 30 + - 12 + - 12 + - 12 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--outSJfilterCountUniqueMin" + description: "minimum uniquely mapping read count per junction for: (1) non-canonical\ + \ motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC\ + \ and GT/AT motif. -1 means no output for that motif\n\nJunctions are output\ + \ if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin conditions\ + \ are satisfied\ndoes not apply to annotated junctions" + info: + step: "star" + orig_arg: "--outSJfilterCountUniqueMin" + example: + - 3 + - 1 + - 1 + - 1 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--outSJfilterCountTotalMin" + description: "minimum total (multi-mapping+unique) read count per junction for:\ + \ (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC\ + \ motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\n\ + Junctions are output if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin\ + \ conditions are satisfied\ndoes not apply to annotated junctions" + info: + step: "star" + orig_arg: "--outSJfilterCountTotalMin" + example: + - 3 + - 1 + - 1 + - 1 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--outSJfilterDistToOtherSJmin" + description: "minimum allowed distance to other junctions' donor/acceptor\n\n\ + does not apply to annotated junctions" + info: + step: "star" + orig_arg: "--outSJfilterDistToOtherSJmin" + example: + - 10 + - 0 + - 5 + - 10 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--outSJfilterIntronMaxVsReadN" + description: "maximum gap allowed for junctions supported by 1,2,3,,,N reads\n\ + \ni.e. by default junctions supported by 1 read can have gaps <=50000b, by\ + \ 2 reads: <=100000b, by 3 reads: <=200000. by >=4 reads any gap <=alignIntronMax\n\ + does not apply to annotated junctions" + info: + step: "star" + orig_arg: "--outSJfilterIntronMaxVsReadN" + example: + - 50000 + - 100000 + - 200000 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - name: "Scoring" + arguments: + - type: "integer" + name: "--scoreGap" + description: "splice junction penalty (independent on intron motif)" + info: + step: "star" + orig_arg: "--scoreGap" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--scoreGapNoncan" + description: "non-canonical junction penalty (in addition to scoreGap)" + info: + step: "star" + orig_arg: "--scoreGapNoncan" + example: + - -8 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--scoreGapGCAG" + description: "GC/AG and CT/GC junction penalty (in addition to scoreGap)" + info: + step: "star" + orig_arg: "--scoreGapGCAG" + example: + - -4 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--scoreGapATAC" + description: "AT/AC and GT/AT junction penalty (in addition to scoreGap)" + info: + step: "star" + orig_arg: "--scoreGapATAC" + example: + - -8 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--scoreGenomicLengthLog2scale" + description: "extra score logarithmically scaled with genomic length of the\ + \ alignment: scoreGenomicLengthLog2scale*log2(genomicLength)" + info: + step: "star" + orig_arg: "--scoreGenomicLengthLog2scale" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--scoreDelOpen" + description: "deletion open penalty" + info: + step: "star" + orig_arg: "--scoreDelOpen" + example: + - -2 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--scoreDelBase" + description: "deletion extension penalty per base (in addition to scoreDelOpen)" + info: + step: "star" + orig_arg: "--scoreDelBase" + example: + - -2 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--scoreInsOpen" + description: "insertion open penalty" + info: + step: "star" + orig_arg: "--scoreInsOpen" + example: + - -2 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--scoreInsBase" + description: "insertion extension penalty per base (in addition to scoreInsOpen)" + info: + step: "star" + orig_arg: "--scoreInsBase" + example: + - -2 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--scoreStitchSJshift" + description: "maximum score reduction while searching for SJ boundaries in the\ + \ stitching step" + info: + step: "star" + orig_arg: "--scoreStitchSJshift" + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Alignments and Seeding" + arguments: + - type: "integer" + name: "--seedSearchStartLmax" + description: "defines the search start point through the read - the read is\ + \ split into pieces no longer than this value" + info: + step: "star" + orig_arg: "--seedSearchStartLmax" + example: + - 50 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--seedSearchStartLmaxOverLread" + description: "seedSearchStartLmax normalized to read length (sum of mates' lengths\ + \ for paired-end reads)" + info: + step: "star" + orig_arg: "--seedSearchStartLmaxOverLread" + example: + - 1.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--seedSearchLmax" + description: "defines the maximum length of the seeds, if =0 seed length is\ + \ not limited" + info: + step: "star" + orig_arg: "--seedSearchLmax" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--seedMultimapNmax" + description: "only pieces that map fewer than this value are utilized in the\ + \ stitching procedure" + info: + step: "star" + orig_arg: "--seedMultimapNmax" + example: + - 10000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--seedPerReadNmax" + description: "max number of seeds per read" + info: + step: "star" + orig_arg: "--seedPerReadNmax" + example: + - 1000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--seedPerWindowNmax" + description: "max number of seeds per window" + info: + step: "star" + orig_arg: "--seedPerWindowNmax" + example: + - 50 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--seedNoneLociPerWindow" + description: "max number of one seed loci per window" + info: + step: "star" + orig_arg: "--seedNoneLociPerWindow" + example: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--seedSplitMin" + description: "min length of the seed sequences split by Ns or mate gap" + info: + step: "star" + orig_arg: "--seedSplitMin" + example: + - 12 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--seedMapMin" + description: "min length of seeds to be mapped" + info: + step: "star" + orig_arg: "--seedMapMin" + example: + - 5 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--alignIntronMin" + description: "minimum intron size, genomic gap is considered intron if its length>=alignIntronMin,\ + \ otherwise it is considered Deletion" + info: + step: "star" + orig_arg: "--alignIntronMin" + example: + - 21 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--alignIntronMax" + description: "maximum intron size, if 0, max intron size will be determined\ + \ by (2^winBinNbits)*winAnchorDistNbins" + info: + step: "star" + orig_arg: "--alignIntronMax" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--alignMatesGapMax" + description: "maximum gap between two mates, if 0, max intron gap will be determined\ + \ by (2^winBinNbits)*winAnchorDistNbins" + info: + step: "star" + orig_arg: "--alignMatesGapMax" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--alignSJoverhangMin" + description: "minimum overhang (i.e. block size) for spliced alignments" + info: + step: "star" + orig_arg: "--alignSJoverhangMin" + example: + - 5 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--alignSJstitchMismatchNmax" + description: "maximum number of mismatches for stitching of the splice junctions\ + \ (-1: no limit).\n\n(1) non-canonical motifs, (2) GT/AG and CT/AC motif,\ + \ (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif." + info: + step: "star" + orig_arg: "--alignSJstitchMismatchNmax" + example: + - 0 + - -1 + - 0 + - 0 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--alignSJDBoverhangMin" + description: "minimum overhang (i.e. block size) for annotated (sjdb) spliced\ + \ alignments" + info: + step: "star" + orig_arg: "--alignSJDBoverhangMin" + example: + - 3 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--alignSplicedMateMapLmin" + description: "minimum mapped length for a read mate that is spliced" + info: + step: "star" + orig_arg: "--alignSplicedMateMapLmin" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--alignSplicedMateMapLminOverLmate" + description: "alignSplicedMateMapLmin normalized to mate length" + info: + step: "star" + orig_arg: "--alignSplicedMateMapLminOverLmate" + example: + - 0.66 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--alignWindowsPerReadNmax" + description: "max number of windows per read" + info: + step: "star" + orig_arg: "--alignWindowsPerReadNmax" + example: + - 10000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--alignTranscriptsPerWindowNmax" + description: "max number of transcripts per window" + info: + step: "star" + orig_arg: "--alignTranscriptsPerWindowNmax" + example: + - 100 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--alignTranscriptsPerReadNmax" + description: "max number of different alignments per read to consider" + info: + step: "star" + orig_arg: "--alignTranscriptsPerReadNmax" + example: + - 10000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--alignEndsType" + description: "type of read ends alignment\n\n- Local ... standard\ + \ local alignment with soft-clipping allowed\n- EndToEnd ... force\ + \ end-to-end read alignment, do not soft-clip\n- Extend5pOfRead1 ... fully\ + \ extend only the 5p of the read1, all other ends: local alignment\n- Extend5pOfReads12\ + \ ... fully extend only the 5p of the both read1 and read2, all other ends:\ + \ local alignment" + info: + step: "star" + orig_arg: "--alignEndsType" + example: + - "Local" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--alignEndsProtrude" + description: "allow protrusion of alignment ends, i.e. start (end) of the +strand\ + \ mate downstream of the start (end) of the -strand mate\n\n1st word: int:\ + \ maximum number of protrusion bases allowed\n2nd word: string:\n- \ + \ ConcordantPair ... report alignments with non-zero protrusion\ + \ as concordant pairs\n- DiscordantPair ... report alignments\ + \ with non-zero protrusion as discordant pairs" + info: + step: "star" + orig_arg: "--alignEndsProtrude" + example: + - "0 ConcordantPair" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--alignSoftClipAtReferenceEnds" + description: "allow the soft-clipping of the alignments past the end of the\ + \ chromosomes\n\n- Yes ... allow\n- No ... prohibit, useful for compatibility\ + \ with Cufflinks" + info: + step: "star" + orig_arg: "--alignSoftClipAtReferenceEnds" + example: + - "Yes" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--alignInsertionFlush" + description: "how to flush ambiguous insertion positions\n\n- None ... insertions\ + \ are not flushed\n- Right ... insertions are flushed to the right" + info: + step: "star" + orig_arg: "--alignInsertionFlush" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Paired-End reads" + arguments: + - type: "integer" + name: "--peOverlapNbasesMin" + description: "minimum number of overlapping bases to trigger mates merging and\ + \ realignment. Specify >0 value to switch on the \"merginf of overlapping\ + \ mates\" algorithm." + info: + step: "star" + orig_arg: "--peOverlapNbasesMin" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--peOverlapMMp" + description: "maximum proportion of mismatched bases in the overlap area" + info: + step: "star" + orig_arg: "--peOverlapMMp" + example: + - 0.01 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Windows, Anchors, Binning" + arguments: + - type: "integer" + name: "--winAnchorMultimapNmax" + description: "max number of loci anchors are allowed to map to" + info: + step: "star" + orig_arg: "--winAnchorMultimapNmax" + example: + - 50 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--winBinNbits" + description: "=log2(winBin), where winBin is the size of the bin for the windows/clustering,\ + \ each window will occupy an integer number of bins." + info: + step: "star" + orig_arg: "--winBinNbits" + example: + - 16 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--winAnchorDistNbins" + description: "max number of bins between two anchors that allows aggregation\ + \ of anchors into one window" + info: + step: "star" + orig_arg: "--winAnchorDistNbins" + example: + - 9 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--winFlankNbins" + description: "log2(winFlank), where win Flank is the size of the left and right\ + \ flanking regions for each window" + info: + step: "star" + orig_arg: "--winFlankNbins" + example: + - 4 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--winReadCoverageRelativeMin" + description: "minimum relative coverage of the read sequence by the seeds in\ + \ a window, for STARlong algorithm only." + info: + step: "star" + orig_arg: "--winReadCoverageRelativeMin" + example: + - 0.5 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--winReadCoverageBasesMin" + description: "minimum number of bases covered by the seeds in a window , for\ + \ STARlong algorithm only." + info: + step: "star" + orig_arg: "--winReadCoverageBasesMin" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Chimeric Alignments" + arguments: + - type: "string" + name: "--chimOutType" + description: "type of chimeric output\n\n- Junctions ... Chimeric.out.junction\n\ + - SeparateSAMold ... output old SAM into separate Chimeric.out.sam file\n\ + - WithinBAM ... output into main aligned BAM files (Aligned.*.bam)\n\ + - WithinBAM HardClip ... (default) hard-clipping in the CIGAR for supplemental\ + \ chimeric alignments (default if no 2nd word is present)\n- WithinBAM SoftClip\ + \ ... soft-clipping in the CIGAR for supplemental chimeric alignments" + info: + step: "star" + orig_arg: "--chimOutType" + example: + - "Junctions" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--chimSegmentMin" + description: "minimum length of chimeric segment length, if ==0, no chimeric\ + \ output" + info: + step: "star" + orig_arg: "--chimSegmentMin" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimScoreMin" + description: "minimum total (summed) score of the chimeric segments" + info: + step: "star" + orig_arg: "--chimScoreMin" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimScoreDropMax" + description: "max drop (difference) of chimeric score (the sum of scores of\ + \ all chimeric segments) from the read length" + info: + step: "star" + orig_arg: "--chimScoreDropMax" + example: + - 20 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimScoreSeparation" + description: "minimum difference (separation) between the best chimeric score\ + \ and the next one" + info: + step: "star" + orig_arg: "--chimScoreSeparation" + example: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimScoreJunctionNonGTAG" + description: "penalty for a non-GT/AG chimeric junction" + info: + step: "star" + orig_arg: "--chimScoreJunctionNonGTAG" + example: + - -1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimJunctionOverhangMin" + description: "minimum overhang for a chimeric junction" + info: + step: "star" + orig_arg: "--chimJunctionOverhangMin" + example: + - 20 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimSegmentReadGapMax" + description: "maximum gap in the read sequence between chimeric segments" + info: + step: "star" + orig_arg: "--chimSegmentReadGapMax" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--chimFilter" + description: "different filters for chimeric alignments\n\n- None ... no filtering\n\ + - banGenomicN ... Ns are not allowed in the genome sequence around the chimeric\ + \ junction" + info: + step: "star" + orig_arg: "--chimFilter" + example: + - "banGenomicN" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--chimMainSegmentMultNmax" + description: "maximum number of multi-alignments for the main chimeric segment.\ + \ =1 will prohibit multimapping main segments." + info: + step: "star" + orig_arg: "--chimMainSegmentMultNmax" + example: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimMultimapNmax" + description: "maximum number of chimeric multi-alignments\n\n- 0 ... use the\ + \ old scheme for chimeric detection which only considered unique alignments" + info: + step: "star" + orig_arg: "--chimMultimapNmax" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimMultimapScoreRange" + description: "the score range for multi-mapping chimeras below the best chimeric\ + \ score. Only works with --chimMultimapNmax > 1" + info: + step: "star" + orig_arg: "--chimMultimapScoreRange" + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimNonchimScoreDropMin" + description: "to trigger chimeric detection, the drop in the best non-chimeric\ + \ alignment score with respect to the read length has to be greater than this\ + \ value" + info: + step: "star" + orig_arg: "--chimNonchimScoreDropMin" + example: + - 20 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimOutJunctionFormat" + description: "formatting type for the Chimeric.out.junction file\n\n- 0 ...\ + \ no comment lines/headers\n- 1 ... comment lines at the end of the file:\ + \ command line and Nreads: total, unique/multi-mapping" + info: + step: "star" + orig_arg: "--chimOutJunctionFormat" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Quantification of Annotations" + arguments: + - type: "string" + name: "--quantMode" + description: "types of quantification requested\n\n- - ... none\n\ + - TranscriptomeSAM ... output SAM/BAM alignments to transcriptome into a separate\ + \ file\n- GeneCounts ... count reads per gene" + info: + step: "star" + orig_arg: "--quantMode" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--quantTranscriptomeBAMcompression" + description: "-2 to 10 transcriptome BAM compression level\n\n- -2 ... no\ + \ BAM output\n- -1 ... default compression (6?)\n- 0 ... no compression\n\ + - 10 ... maximum compression" + info: + step: "star" + orig_arg: "--quantTranscriptomeBAMcompression" + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--quantTranscriptomeBan" + description: "prohibit various alignment type\n\n- IndelSoftclipSingleend ...\ + \ prohibit indels, soft clipping and single-end alignments - compatible with\ + \ RSEM\n- Singleend ... prohibit single-end alignments" + info: + step: "star" + orig_arg: "--quantTranscriptomeBan" + example: + - "IndelSoftclipSingleend" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "2-pass Mapping" + arguments: + - type: "string" + name: "--twopassMode" + description: "2-pass mapping mode.\n\n- None ... 1-pass mapping\n- Basic\ + \ ... basic 2-pass mapping, with all 1st pass junctions inserted into\ + \ the genome indices on the fly" + info: + step: "star" + orig_arg: "--twopassMode" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--twopass1readsN" + description: "number of reads to process for the 1st step. Use very large number\ + \ (or default -1) to map all reads in the first step." + info: + step: "star" + orig_arg: "--twopass1readsN" + example: + - -1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "WASP parameters" + arguments: + - type: "string" + name: "--waspOutputMode" + description: "WASP allele-specific output type. This is re-implementation of\ + \ the original WASP mappability filtering by Bryce van de Geijn, Graham McVicker,\ + \ Yoav Gilad & Jonathan K Pritchard. Please cite the original WASP paper:\ + \ Nature Methods 12, 1061-1063 (2015), https://www.nature.com/articles/nmeth.3582\ + \ .\n\n- SAMtag ... add WASP tags to the alignments that pass WASP filtering" + info: + step: "star" + orig_arg: "--waspOutputMode" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "STARsolo (single cell RNA-seq) parameters" + arguments: + - type: "string" + name: "--soloType" + description: "type of single-cell RNA-seq\n\n- CB_UMI_Simple ... (a.k.a. Droplet)\ + \ one UMI and one Cell Barcode of fixed length in read2, e.g. Drop-seq and\ + \ 10X Chromium.\n- CB_UMI_Complex ... multiple Cell Barcodes of varying length,\ + \ one UMI of fixed length and one adapter sequence of fixed length are allowed\ + \ in read2 only (e.g. inDrop, ddSeq).\n- CB_samTagOut ... output Cell Barcode\ + \ as CR and/or CB SAm tag. No UMI counting. --readFilesIn cDNA_read1 [cDNA_read2\ + \ if paired-end] CellBarcode_read . Requires --outSAMtype BAM Unsorted [and/or\ + \ SortedByCoordinate]\n- SmartSeq ... Smart-seq: each cell in a separate\ + \ FASTQ (paired- or single-end), barcodes are corresponding read-groups, no\ + \ UMI sequences, alignments deduplicated according to alignment start and\ + \ end (after extending soft-clipped bases)" + info: + step: "star" + orig_arg: "--soloType" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloCBwhitelist" + description: "file(s) with whitelist(s) of cell barcodes. Only --soloType CB_UMI_Complex\ + \ allows more than one whitelist file.\n\n- None ... no whitelist:\ + \ all cell barcodes are allowed" + info: + step: "star" + orig_arg: "--soloCBwhitelist" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--soloCBstart" + description: "cell barcode start base" + info: + step: "star" + orig_arg: "--soloCBstart" + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--soloCBlen" + description: "cell barcode length" + info: + step: "star" + orig_arg: "--soloCBlen" + example: + - 16 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--soloUMIstart" + description: "UMI start base" + info: + step: "star" + orig_arg: "--soloUMIstart" + example: + - 17 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--soloUMIlen" + description: "UMI length" + info: + step: "star" + orig_arg: "--soloUMIlen" + example: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--soloBarcodeReadLength" + description: "length of the barcode read\n\n- 1 ... equal to sum of soloCBlen+soloUMIlen\n\ + - 0 ... not defined, do not check" + info: + step: "star" + orig_arg: "--soloBarcodeReadLength" + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--soloBarcodeMate" + description: "identifies which read mate contains the barcode (CB+UMI) sequence\n\ + \n- 0 ... barcode sequence is on separate read, which should always be the\ + \ last file in the --readFilesIn listed\n- 1 ... barcode sequence is a part\ + \ of mate 1\n- 2 ... barcode sequence is a part of mate 2" + info: + step: "star" + orig_arg: "--soloBarcodeMate" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--soloCBposition" + description: "position of Cell Barcode(s) on the barcode read.\n\nPresently\ + \ only works with --soloType CB_UMI_Complex, and barcodes are assumed to be\ + \ on Read2.\nFormat for each barcode: startAnchor_startPosition_endAnchor_endPosition\n\ + start(end)Anchor defines the Anchor Base for the CB: 0: read start; 1: read\ + \ end; 2: adapter start; 3: adapter end\nstart(end)Position is the 0-based\ + \ position with of the CB start(end) with respect to the Anchor Base\nString\ + \ for different barcodes are separated by space.\nExample: inDrop (Zilionis\ + \ et al, Nat. Protocols, 2017):\n--soloCBposition 0_0_2_-1 3_1_3_8" + info: + step: "star" + orig_arg: "--soloCBposition" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloUMIposition" + description: "position of the UMI on the barcode read, same as soloCBposition\n\ + \nExample: inDrop (Zilionis et al, Nat. Protocols, 2017):\n--soloCBposition\ + \ 3_9_3_14" + info: + step: "star" + orig_arg: "--soloUMIposition" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--soloAdapterSequence" + description: "adapter sequence to anchor barcodes. Only one adapter sequence\ + \ is allowed." + info: + step: "star" + orig_arg: "--soloAdapterSequence" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--soloAdapterMismatchesNmax" + description: "maximum number of mismatches allowed in adapter sequence." + info: + step: "star" + orig_arg: "--soloAdapterMismatchesNmax" + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--soloCBmatchWLtype" + description: "matching the Cell Barcodes to the WhiteList\n\n- Exact \ + \ ... only exact matches allowed\n- 1MM \ + \ ... only one match in whitelist with 1 mismatched base allowed.\ + \ Allowed CBs have to have at least one read with exact match.\n- 1MM_multi\ + \ ... multiple matches in whitelist with 1 mismatched\ + \ base allowed, posterior probability calculation is used choose one of the\ + \ matches.\nAllowed CBs have to have at least one read with exact match. This\ + \ option matches best with CellRanger 2.2.0\n- 1MM_multi_pseudocounts \ + \ ... same as 1MM_Multi, but pseudocounts of 1 are added to all whitelist\ + \ barcodes.\n- 1MM_multi_Nbase_pseudocounts ... same as 1MM_multi_pseudocounts,\ + \ multimatching to WL is allowed for CBs with N-bases. This option matches\ + \ best with CellRanger >= 3.0.0\n- EditDist_2 ... allow\ + \ up to edit distance of 3 fpr each of the barcodes. May include one deletion\ + \ + one insertion. Only works with --soloType CB_UMI_Complex. Matches to multiple\ + \ passlist barcdoes are not allowed. Similar to ParseBio Split-seq pipeline." + info: + step: "star" + orig_arg: "--soloCBmatchWLtype" + example: + - "1MM_multi" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--soloInputSAMattrBarcodeSeq" + description: "when inputting reads from a SAM file (--readsFileType SAM SE/PE),\ + \ these SAM attributes mark the barcode sequence (in proper order).\n\nFor\ + \ instance, for 10X CellRanger or STARsolo BAMs, use --soloInputSAMattrBarcodeSeq\ + \ CR UR .\nThis parameter is required when running STARsolo with input from\ + \ SAM." + info: + step: "star" + orig_arg: "--soloInputSAMattrBarcodeSeq" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloInputSAMattrBarcodeQual" + description: "when inputting reads from a SAM file (--readsFileType SAM SE/PE),\ + \ these SAM attributes mark the barcode qualities (in proper order).\n\nFor\ + \ instance, for 10X CellRanger or STARsolo BAMs, use --soloInputSAMattrBarcodeQual\ + \ CY UY .\nIf this parameter is '-' (default), the quality 'H' will be assigned\ + \ to all bases." + info: + step: "star" + orig_arg: "--soloInputSAMattrBarcodeQual" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloStrand" + description: "strandedness of the solo libraries:\n\n- Unstranded ... no strand\ + \ information\n- Forward ... read strand same as the original RNA molecule\n\ + - Reverse ... read strand opposite to the original RNA molecule" + info: + step: "star" + orig_arg: "--soloStrand" + example: + - "Forward" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--soloFeatures" + description: "genomic features for which the UMI counts per Cell Barcode are\ + \ collected\n\n- Gene ... genes: reads match the gene transcript\n\ + - SJ ... splice junctions: reported in SJ.out.tab\n- GeneFull\ + \ ... full gene (pre-mRNA): count all reads overlapping genes' exons\ + \ and introns\n- GeneFull_ExonOverIntron ... full gene (pre-mRNA): count all\ + \ reads overlapping genes' exons and introns: prioritize 100% overlap with\ + \ exons\n- GeneFull_Ex50pAS ... full gene (pre-RNA): count all reads\ + \ overlapping genes' exons and introns: prioritize >50% overlap with exons.\ + \ Do not count reads with 100% exonic overlap in the antisense direction." + info: + step: "star" + orig_arg: "--soloFeatures" + example: + - "Gene" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloMultiMappers" + description: "counting method for reads mapping to multiple genes\n\n- Unique\ + \ ... count only reads that map to unique genes\n- Uniform ... uniformly\ + \ distribute multi-genic UMIs to all genes\n- Rescue ... distribute UMIs\ + \ proportionally to unique+uniform counts (~ first iteration of EM)\n- PropUnique\ + \ ... distribute UMIs proportionally to unique mappers, if present, and uniformly\ + \ if not.\n- EM ... multi-gene UMIs are distributed using Expectation\ + \ Maximization algorithm" + info: + step: "star" + orig_arg: "--soloMultiMappers" + example: + - "Unique" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloUMIdedup" + description: "type of UMI deduplication (collapsing) algorithm\n\n- 1MM_All\ + \ ... all UMIs with 1 mismatch distance to each other\ + \ are collapsed (i.e. counted once).\n- 1MM_Directional_UMItools ... follows\ + \ the \"directional\" method from the UMI-tools by Smith, Heger and Sudbery\ + \ (Genome Research 2017).\n- 1MM_Directional ... same as 1MM_Directional_UMItools,\ + \ but with more stringent criteria for duplicate UMIs\n- Exact \ + \ ... only exactly matching UMIs are collapsed.\n- NoDedup \ + \ ... no deduplication of UMIs, count all reads.\n- 1MM_CR\ + \ ... CellRanger2-4 algorithm for 1MM UMI collapsing." + info: + step: "star" + orig_arg: "--soloUMIdedup" + example: + - "1MM_All" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloUMIfiltering" + description: "type of UMI filtering (for reads uniquely mapping to genes)\n\n\ + - - ... basic filtering: remove UMIs with N and homopolymers\ + \ (similar to CellRanger 2.2.0).\n- MultiGeneUMI ... basic + remove\ + \ lower-count UMIs that map to more than one gene.\n- MultiGeneUMI_All ...\ + \ basic + remove all UMIs that map to more than one gene.\n- MultiGeneUMI_CR\ + \ ... basic + remove lower-count UMIs that map to more than one gene, matching\ + \ CellRanger > 3.0.0 .\nOnly works with --soloUMIdedup 1MM_CR" + info: + step: "star" + orig_arg: "--soloUMIfiltering" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloOutFileNames" + description: "file names for STARsolo output:\n\nfile_name_prefix gene_names\ + \ barcode_sequences cell_feature_count_matrix" + info: + step: "star" + orig_arg: "--soloOutFileNames" + example: + - "Solo.out/" + - "features.tsv" + - "barcodes.tsv" + - "matrix.mtx" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloCellFilter" + description: "cell filtering type and parameters\n\n- None ... do\ + \ not output filtered cells\n- TopCells ... only report top cells by\ + \ UMI count, followed by the exact number of cells\n- CellRanger2.2 ...\ + \ simple filtering of CellRanger 2.2.\nCan be followed by numbers: number\ + \ of expected cells, robust maximum percentile for UMI count, maximum to minimum\ + \ ratio for UMI count\nThe harcoded values are from CellRanger: nExpectedCells=3000;\ + \ maxPercentile=0.99; maxMinRatio=10\n- EmptyDrops_CR ... EmptyDrops filtering\ + \ in CellRanger flavor. Please cite the original EmptyDrops paper: A.T.L Lun\ + \ et al, Genome Biology, 20, 63 (2019): https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1662-y\n\ + Can be followed by 10 numeric parameters: nExpectedCells maxPercentile\ + \ maxMinRatio indMin indMax umiMin umiMinFracMedian candMaxN \ + \ FDR simN\nThe harcoded values are from CellRanger: 3000 \ + \ 0.99 10 45000 90000 500 0.01\ + \ 20000 0.01 10000" + info: + step: "star" + orig_arg: "--soloCellFilter" + example: + - "CellRanger2.2" + - "3000" + - "0.99" + - "10" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloOutFormatFeaturesGeneField3" + description: "field 3 in the Gene features.tsv file. If \"-\", then no 3rd field\ + \ is output." + info: + step: "star" + orig_arg: "--soloOutFormatFeaturesGeneField3" + example: + - "Gene Expression" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloCellReadStats" + description: "Output reads statistics for each CB\n\n- Standard ... standard\ + \ output" + info: + step: "star" + orig_arg: "--soloCellReadStats" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "HTSeq arguments" + arguments: + - type: "string" + name: "--stranded" + alternatives: + - "-s" + description: "Whether the data is from a strand-specific assay. 'reverse' means\ + \ 'yes' with reversed strand interpretation." + info: + step: "htseq" + orig_arg: "--stranded" + default: + - "yes" + required: false + choices: + - "yes" + - "no" + - "reverse" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--minimum_alignment_quality" + alternatives: + - "-a" + - "--minaqual" + description: "Skip all reads with MAPQ alignment quality lower than the given\ + \ minimum value. \nMAPQ is the 5th column of a SAM/BAM file and its usage\ + \ depends on the software \nused to map the reads.\n" + info: + step: "htseq" + orig_arg: "--minaqual" + default: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--type" + alternatives: + - "-t" + description: "Feature type (3rd column in GTF file) to be used, all features\ + \ of other type are ignored (default, suitable for Ensembl GTF files: exon)" + info: + step: "htseq" + orig_arg: "--type" + example: + - "exon" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--id_attribute" + alternatives: + - "-i" + description: "GTF attribute to be used as feature ID (default, suitable for\ + \ Ensembl GTF files: gene_id).\nAll feature of the right type (see -t option)\ + \ within the same GTF attribute will be added\ntogether. The typical way of\ + \ using this option is to count all exonic reads from each gene\nand add the\ + \ exons but other uses are possible as well. You can call this option multiple\n\ + times: in that case, the combination of all attributes separated by colons\ + \ (:) will be used\nas a unique identifier, e.g. for exons you might use -i\ + \ gene_id -i exon_number.\n" + info: + step: "htseq" + orig_arg: "--idattr" + example: + - "gene_id" + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--additional_attributes" + description: "Additional feature attributes (suitable for Ensembl GTF files:\ + \ gene_name). Use multiple times\nfor more than one additional attribute.\ + \ These attributes are only used as annotations in the\noutput, while the\ + \ determination of how the counts are added together is done based on option\ + \ -i.\n" + info: + step: "htseq" + orig_arg: "--additional-attr" + example: + - "gene_name" + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--add_chromosome_info" + description: "Store information about the chromosome of each feature as an additional\ + \ attribute\n(e.g. colunm in the TSV output file).\n" + info: + step: "htseq" + orig_arg: "--add-chromosome-info" + direction: "input" + dest: "par" + - type: "string" + name: "--mode" + alternatives: + - "-m" + description: "Mode to handle reads overlapping more than one feature." + info: + step: "htseq" + orig_arg: "--mode" + default: + - "union" + required: false + choices: + - "union" + - "intersection-strict" + - "intersection-nonempty" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--non_unique" + description: "Whether and how to score reads that are not uniquely aligned or\ + \ ambiguously assigned to features." + info: + step: "htseq" + orig_arg: "--nonunique" + default: + - "none" + required: false + choices: + - "none" + - "all" + - "fraction" + - "random" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--secondary_alignments" + description: "Whether to score secondary alignments (0x100 flag)." + info: + step: "htseq" + orig_arg: "--secondary-alignments" + required: false + choices: + - "score" + - "ignore" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--supplementary_alignments" + description: "Whether to score supplementary alignments (0x800 flag)." + info: + step: "htseq" + orig_arg: "--supplementary-alignments" + required: false + choices: + - "score" + - "ignore" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--counts_output_sparse" + description: "Store the counts as a sparse matrix (mtx, h5ad, loom)." + info: + step: "htseq" + orig_arg: "--counts-output-sparse" + direction: "input" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + description: "Align fastq files using STAR." + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/cellranger_tiny_fastq" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "docker" + env: + - "STAR_VERSION 2.7.10b" + - "PACKAGES gcc g++ make wget zlib1g-dev unzip" + - type: "docker" + run: + - "apt-get update && \\\n apt-get install -y --no-install-recommends ${PACKAGES}\ + \ && \\\n cd /tmp && \\\n wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip\ + \ && \\\n unzip ${STAR_VERSION}.zip && \\\n cd STAR-${STAR_VERSION}/source\ + \ && \\\n make STARstatic CXXFLAGS_SIMD=-std=c++11 && \\\n cp STAR /usr/local/bin\ + \ && \\\n cd / && \\\n rm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip\ + \ && \\\n apt-get --purge autoremove -y ${PACKAGES} && \\\n apt-get clean\n" + - type: "apt" + packages: + - "samtools" + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "pyyaml" + - "HTSeq" + - "multiprocess" + - "gtfparse<2.0" + - "pandas" + - "multiqc~=1.15.0" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "pytest" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highmem" + - "highcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/mapping/multi_star/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/mapping/multi_star" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/mapping/multi_star/multi_star" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/mapping/multi_star/multi_star b/target/docker/mapping/multi_star/multi_star new file mode 100755 index 00000000000..067d52b4274 --- /dev/null +++ b/target/docker/mapping/multi_star/multi_star @@ -0,0 +1,6362 @@ +#!/usr/bin/env bash + +# multi_star 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Angela Oliveira Pisco (author) +# * Robrecht Cannoodt (author, maintainer) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="multi_star" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "multi_star 0.12.4" + echo "" + echo "Align fastq files using STAR." + echo "" + echo "Input/Output:" + echo " --input_id" + echo " type: string, required parameter, multiple values allowed" + echo " example: mysample;mysample" + echo " The ID of the sample being processed. This vector should have the same" + echo " length as the \`--input_r1\` argument." + echo "" + echo " --input_r1" + echo " type: file, required parameter, multiple values allowed, file must exist" + echo " example:" + echo "mysample_S1_L001_R1_001.fastq.gz;mysample_S1_L002_R1_001.fastq.gz" + echo " Paths to the sequences to be mapped. If using Illumina paired-end reads," + echo " only the R1 files should be passed." + echo "" + echo " --input_r2" + echo " type: file, multiple values allowed, file must exist" + echo " example:" + echo "mysample_S1_L001_R2_001.fastq.gz;mysample_S1_L002_R2_001.fastq.gz" + echo " Paths to the sequences to be mapped. If using Illumina paired-end reads," + echo " only the R2 files should be passed." + echo "" + echo " --genomeDir, --reference_index" + echo " type: file, required parameter, file must exist" + echo " example: /path/to/reference" + echo " Path to the reference built by star_build_reference. Corresponds to the" + echo " --genomeDir argument in the STAR command." + echo "" + echo " --reference_gtf" + echo " type: file, required parameter, file must exist" + echo " example: genes.gtf" + echo " Path to the gtf reference file." + echo "" + echo " --outFileNamePrefix, --output" + echo " type: file, required parameter, output, file must exist" + echo " example: /path/to/foo" + echo " Path to output directory. Corresponds to the --outFileNamePrefix" + echo " argument in the STAR command." + echo "" + echo "Processing arguments:" + echo " --run_htseq_count" + echo " type: boolean" + echo " default: true" + echo " Whether or not to also run htseq-count after STAR." + echo "" + echo " --run_multiqc" + echo " type: boolean" + echo " default: true" + echo " Whether or not to also run MultiQC at the end." + echo "" + echo " --min_success_rate" + echo " type: double" + echo " default: 0.5" + echo " Fail when the success rate is below this threshold." + echo "" + echo "Run Parameters:" + echo " --runRNGseed" + echo " type: integer" + echo " example: 777" + echo " random number generator seed." + echo "" + echo "Genome Parameters:" + echo " --genomeFastaFiles" + echo " type: file, multiple values allowed, file must exist" + echo " path(s) to the fasta files with the genome sequences, separated by" + echo " spaces. These files should be plain text FASTA files, they *cannot* be" + echo " zipped." + echo " Required for the genome generation (--runMode genomeGenerate). Can also" + echo " be used in the mapping (--runMode alignReads) to add extra (new)" + echo " sequences to the genome (e.g. spike-ins)." + echo "" + echo "Splice Junctions Database:" + echo " --sjdbFileChrStartEnd" + echo " type: string, multiple values allowed" + echo " path to the files with genomic coordinates (chr start end" + echo " strand) for the splice junction introns. Multiple files can be" + echo " supplied and will be concatenated." + echo "" + echo " --sjdbGTFfile" + echo " type: file, file must exist" + echo " path to the GTF file with annotations" + echo "" + echo " --sjdbGTFchrPrefix" + echo " type: string" + echo " prefix for chromosome names in a GTF file (e.g. 'chr' for using ENSMEBL" + echo " annotations with UCSC genomes)" + echo "" + echo " --sjdbGTFfeatureExon" + echo " type: string" + echo " example: exon" + echo " feature type in GTF file to be used as exons for building transcripts" + echo "" + echo " --sjdbGTFtagExonParentTranscript" + echo " type: string" + echo " example: transcript_id" + echo " GTF attribute name for parent transcript ID (default \"transcript_id\"" + echo " works for GTF files)" + echo "" + echo " --sjdbGTFtagExonParentGene" + echo " type: string" + echo " example: gene_id" + echo " GTF attribute name for parent gene ID (default \"gene_id\" works for GTF" + echo " files)" + echo "" + echo " --sjdbGTFtagExonParentGeneName" + echo " type: string, multiple values allowed" + echo " example: gene_name" + echo " GTF attribute name for parent gene name" + echo "" + echo " --sjdbGTFtagExonParentGeneType" + echo " type: string, multiple values allowed" + echo " example: gene_type;gene_biotype" + echo " GTF attribute name for parent gene type" + echo "" + echo " --sjdbOverhang" + echo " type: integer" + echo " example: 100" + echo " length of the donor/acceptor sequence on each side of the junctions," + echo " ideally = (mate_length - 1)" + echo "" + echo " --sjdbScore" + echo " type: integer" + echo " example: 2" + echo " extra alignment score for alignments that cross database junctions" + echo "" + echo " --sjdbInsertSave" + echo " type: string" + echo " example: Basic" + echo " which files to save when sjdb junctions are inserted on the fly at the" + echo " mapping step" + echo " - Basic ... only small junction / transcript files" + echo " - All ... all files including big Genome, SA and SAindex - this will" + echo " create a complete genome directory" + echo "" + echo "Variation parameters:" + echo " --varVCFfile" + echo " type: string" + echo " path to the VCF file that contains variation data. The 10th column" + echo " should contain the genotype information, e.g. 0/1" + echo "" + echo "Read Parameters:" + echo " --readFilesType" + echo " type: string" + echo " example: Fastx" + echo " format of input read files" + echo " - Fastx ... FASTA or FASTQ" + echo " - SAM SE ... SAM or BAM single-end reads; for BAM use" + echo " --readFilesCommand samtools view" + echo " - SAM PE ... SAM or BAM paired-end reads; for BAM use" + echo " --readFilesCommand samtools view" + echo "" + echo " --readFilesSAMattrKeep" + echo " type: string, multiple values allowed" + echo " example: All" + echo " for --readFilesType SAM SE/PE, which SAM tags to keep in the output BAM," + echo " e.g.: --readFilesSAMtagsKeep RG PL" + echo " - All ... keep all tags" + echo " - None ... do not keep any tags" + echo "" + echo " --readFilesManifest" + echo " type: file, file must exist" + echo " path to the \"manifest\" file with the names of read files. The manifest" + echo " file should contain 3 tab-separated columns:" + echo " paired-end reads: read1_file_name \$tab\$ read2_file_name \$tab\$" + echo " read_group_line." + echo " single-end reads: read1_file_name \$tab\$ - \$tab\$" + echo " read_group_line." + echo " Spaces, but not tabs are allowed in file names." + echo " If read_group_line does not start with ID:, it can only contain one ID" + echo " field, and ID: will be added to it." + echo " If read_group_line starts with ID:, it can contain several fields" + echo " separated by \$tab\$, and all fields will be be copied verbatim into SAM" + echo " @RG header line." + echo "" + echo " --readFilesPrefix" + echo " type: string" + echo " prefix for the read files names, i.e. it will be added in front of the" + echo " strings in --readFilesIn" + echo "" + echo " --readFilesCommand" + echo " type: string, multiple values allowed" + echo " command line to execute for each of the input file. This command should" + echo " generate FASTA or FASTQ text and send it to stdout" + echo " For example: zcat - to uncompress .gz files, bzcat - to uncompress .bz2" + echo " files, etc." + echo "" + echo " --readMapNumber" + echo " type: integer" + echo " example: -1" + echo " number of reads to map from the beginning of the file" + echo " -1: map all reads" + echo "" + echo " --readMatesLengthsIn" + echo " type: string" + echo " example: NotEqual" + echo " Equal/NotEqual - lengths of names,sequences,qualities for both mates are" + echo " the same / not the same. NotEqual is safe in all situations." + echo "" + echo " --readNameSeparator" + echo " type: string, multiple values allowed" + echo " example: /" + echo " character(s) separating the part of the read names that will be trimmed" + echo " in output (read name after space is always trimmed)" + echo "" + echo " --readQualityScoreBase" + echo " type: integer" + echo " example: 33" + echo " number to be subtracted from the ASCII code to get Phred quality score" + echo "" + echo "Read Clipping:" + echo " --clipAdapterType" + echo " type: string" + echo " example: Hamming" + echo " adapter clipping type" + echo " - Hamming ... adapter clipping based on Hamming distance, with the" + echo " number of mismatches controlled by --clip5pAdapterMMp" + echo " - CellRanger4 ... 5p and 3p adapter clipping similar to CellRanger4." + echo " Utilizes Opal package by Martin Sosic: https://github.com/Martinsos/opal" + echo " - None ... no adapter clipping, all other clip* parameters are" + echo " disregarded" + echo "" + echo " --clip3pNbases" + echo " type: integer, multiple values allowed" + echo " example: 0" + echo " number(s) of bases to clip from 3p of each mate. If one value is given," + echo " it will be assumed the same for both mates." + echo "" + echo " --clip3pAdapterSeq" + echo " type: string, multiple values allowed" + echo " adapter sequences to clip from 3p of each mate. If one value is given," + echo " it will be assumed the same for both mates." + echo " - polyA ... polyA sequence with the length equal to read length" + echo "" + echo " --clip3pAdapterMMp" + echo " type: double, multiple values allowed" + echo " example: 0.1" + echo " max proportion of mismatches for 3p adapter clipping for each mate. If" + echo " one value is given, it will be assumed the same for both mates." + echo "" + echo " --clip3pAfterAdapterNbases" + echo " type: integer, multiple values allowed" + echo " example: 0" + echo " number of bases to clip from 3p of each mate after the adapter clipping." + echo " If one value is given, it will be assumed the same for both mates." + echo "" + echo " --clip5pNbases" + echo " type: integer, multiple values allowed" + echo " example: 0" + echo " number(s) of bases to clip from 5p of each mate. If one value is given," + echo " it will be assumed the same for both mates." + echo "" + echo "Limits:" + echo " --limitGenomeGenerateRAM" + echo " type: long" + echo " example: 31000000000" + echo " maximum available RAM (bytes) for genome generation" + echo "" + echo " --limitIObufferSize" + echo " type: long, multiple values allowed" + echo " example: 30000000;50000000" + echo " max available buffers size (bytes) for input/output, per thread" + echo "" + echo " --limitOutSAMoneReadBytes" + echo " type: long" + echo " example: 100000" + echo " max size of the SAM record (bytes) for one read. Recommended value:" + echo " >(2*(LengthMate1+LengthMate2+100)*outFilterMultimapNmax" + echo "" + echo " --limitOutSJoneRead" + echo " type: integer" + echo " example: 1000" + echo " max number of junctions for one read (including all multi-mappers)" + echo "" + echo " --limitOutSJcollapsed" + echo " type: integer" + echo " example: 1000000" + echo " max number of collapsed junctions" + echo "" + echo " --limitBAMsortRAM" + echo " type: long" + echo " example: 0" + echo " maximum available RAM (bytes) for sorting BAM. If =0, it will be set to" + echo " the genome index size. 0 value can only be used with --genomeLoad" + echo " NoSharedMemory option." + echo "" + echo " --limitSjdbInsertNsj" + echo " type: integer" + echo " example: 1000000" + echo " maximum number of junctions to be inserted to the genome on the fly at" + echo " the mapping stage, including those from annotations and those detected" + echo " in the 1st step of the 2-pass run" + echo "" + echo " --limitNreadsSoft" + echo " type: integer" + echo " example: -1" + echo " soft limit on the number of reads" + echo "" + echo "Output: general:" + echo " --outTmpKeep" + echo " type: string" + echo " whether to keep the temporary files after STAR runs is finished" + echo " - None ... remove all temporary files" + echo " - All ... keep all files" + echo "" + echo " --outStd" + echo " type: string" + echo " example: Log" + echo " which output will be directed to stdout (standard out)" + echo " - Log ... log messages" + echo " - SAM ... alignments in SAM format (which normally" + echo " are output to Aligned.out.sam file), normal standard output will go into" + echo " Log.std.out" + echo " - BAM_Unsorted ... alignments in BAM format, unsorted." + echo " Requires --outSAMtype BAM Unsorted" + echo " - BAM_SortedByCoordinate ... alignments in BAM format, sorted by" + echo " coordinate. Requires --outSAMtype BAM SortedByCoordinate" + echo " - BAM_Quant ... alignments to transcriptome in BAM format," + echo " unsorted. Requires --quantMode TranscriptomeSAM" + echo "" + echo " --outReadsUnmapped" + echo " type: string" + echo " output of unmapped and partially mapped (i.e. mapped only one mate of a" + echo " paired end read) reads in separate file(s)." + echo " - None ... no output" + echo " - Fastx ... output in separate fasta/fastq files, Unmapped.out.mate1/2" + echo "" + echo " --outQSconversionAdd" + echo " type: integer" + echo " example: 0" + echo " add this number to the quality score (e.g. to convert from Illumina to" + echo " Sanger, use -31)" + echo "" + echo " --outMultimapperOrder" + echo " type: string" + echo " example: Old_2.4" + echo " order of multimapping alignments in the output files" + echo " - Old_2.4 ... quasi-random order used before 2.5.0" + echo " - Random ... random order of alignments for each" + echo " multi-mapper. Read mates (pairs) are always adjacent, all alignment for" + echo " each read stay together. This option will become default in the future" + echo " releases." + echo "" + echo "Output: SAM and BAM:" + echo " --outSAMmode" + echo " type: string" + echo " example: Full" + echo " mode of SAM output" + echo " - None ... no SAM output" + echo " - Full ... full SAM output" + echo " - NoQS ... full SAM but without quality scores" + echo "" + echo " --outSAMstrandField" + echo " type: string" + echo " Cufflinks-like strand field flag" + echo " - None ... not used" + echo " - intronMotif ... strand derived from the intron motif. This option" + echo " changes the output alignments: reads with inconsistent and/or" + echo " non-canonical introns are filtered out." + echo "" + echo " --outSAMattributes" + echo " type: string, multiple values allowed" + echo " example: Standard" + echo " a string of desired SAM attributes, in the order desired for the output" + echo " SAM. Tags can be listed in any combination/order." + echo " ***Presets:" + echo " - None ... no attributes" + echo " - Standard ... NH HI AS nM" + echo " - All ... NH HI AS nM NM MD jM jI MC ch" + echo " ***Alignment:" + echo " - NH ... number of loci the reads maps to: =1 for unique" + echo " mappers, >1 for multimappers. Standard SAM tag." + echo " - HI ... multiple alignment index, starts with" + echo " --outSAMattrIHstart (=1 by default). Standard SAM tag." + echo " - AS ... local alignment score, +1/-1 for matches/mismateches," + echo " score* penalties for indels and gaps. For PE reads, total score for two" + echo " mates. Stadnard SAM tag." + echo " - nM ... number of mismatches. For PE reads, sum over two" + echo " mates." + echo " - NM ... edit distance to the reference (number of mismatched +" + echo " inserted + deleted bases) for each mate. Standard SAM tag." + echo " - MD ... string encoding mismatched and deleted reference bases" + echo " (see standard SAM specifications). Standard SAM tag." + echo " - jM ... intron motifs for all junctions (i.e. N in CIGAR): 0:" + echo " non-canonical; 1: GT/AG, 2: CT/AC, 3: GC/AG, 4: CT/GC, 5: AT/AC, 6:" + echo " GT/AT. If splice junctions database is used, and a junction is" + echo " annotated, 20 is added to its motif value." + echo " - jI ... start and end of introns for all junctions (1-based)." + echo " - XS ... alignment strand according to --outSAMstrandField." + echo " - MC ... mate's CIGAR string. Standard SAM tag." + echo " - ch ... marks all segment of all chimeric alingments for" + echo " --chimOutType WithinBAM output." + echo " - cN ... number of bases clipped from the read ends: 5' and 3'" + echo " ***Variation:" + echo " - vA ... variant allele" + echo " - vG ... genomic coordinate of the variant overlapped by the" + echo " read." + echo " - vW ... 1 - alignment passes WASP filtering; 2,3,4,5,6,7 -" + echo " alignment does not pass WASP filtering. Requires --waspOutputMode" + echo " SAMtag." + echo " ***STARsolo:" + echo " - CR CY UR UY ... sequences and quality scores of cell barcodes and UMIs" + echo " for the solo* demultiplexing." + echo " - GX GN ... gene ID and gene name for unique-gene reads." + echo " - gx gn ... gene IDs and gene names for unique- and multi-gene" + echo " reads." + echo " - CB UB ... error-corrected cell barcodes and UMIs for solo*" + echo " demultiplexing. Requires --outSAMtype BAM SortedByCoordinate." + echo " - sM ... assessment of CB and UMI." + echo " - sS ... sequence of the entire barcode (CB,UMI,adapter)." + echo " - sQ ... quality of the entire barcode." + echo " ***Unsupported/undocumented:" + echo " - ha ... haplotype (1/2) when mapping to the diploid genome." + echo " Requires genome generated with --genomeTransformType Diploid ." + echo " - rB ... alignment block read/genomic coordinates." + echo " - vR ... read coordinate of the variant." + echo "" + echo " --outSAMattrIHstart" + echo " type: integer" + echo " example: 1" + echo " start value for the IH attribute. 0 may be required by some downstream" + echo " software, such as Cufflinks or StringTie." + echo "" + echo " --outSAMunmapped" + echo " type: string, multiple values allowed" + echo " output of unmapped reads in the SAM format" + echo " 1st word:" + echo " - None ... no output" + echo " - Within ... output unmapped reads within the main SAM file (i.e." + echo " Aligned.out.sam)" + echo " 2nd word:" + echo " - KeepPairs ... record unmapped mate for each alignment, and, in case of" + echo " unsorted output, keep it adjacent to its mapped mate. Only affects" + echo " multi-mapping reads." + echo "" + echo " --outSAMorder" + echo " type: string" + echo " example: Paired" + echo " type of sorting for the SAM output" + echo " Paired: one mate after the other for all paired alignments" + echo " PairedKeepInputOrder: one mate after the other for all paired" + echo " alignments, the order is kept the same as in the input FASTQ files" + echo "" + echo " --outSAMprimaryFlag" + echo " type: string" + echo " example: OneBestScore" + echo " which alignments are considered primary - all others will be marked with" + echo " 0x100 bit in the FLAG" + echo " - OneBestScore ... only one alignment with the best score is primary" + echo " - AllBestScore ... all alignments with the best score are primary" + echo "" + echo " --outSAMreadID" + echo " type: string" + echo " example: Standard" + echo " read ID record type" + echo " - Standard ... first word (until space) from the FASTx read ID line," + echo " removing /1,/2 from the end" + echo " - Number ... read number (index) in the FASTx file" + echo "" + echo " --outSAMmapqUnique" + echo " type: integer" + echo " example: 255" + echo " 0 to 255: the MAPQ value for unique mappers" + echo "" + echo " --outSAMflagOR" + echo " type: integer" + echo " example: 0" + echo " 0 to 65535: sam FLAG will be bitwise OR'd with this value, i.e." + echo " FLAG=FLAG | outSAMflagOR. This is applied after all flags have been set" + echo " by STAR, and after outSAMflagAND. Can be used to set specific bits that" + echo " are not set otherwise." + echo "" + echo " --outSAMflagAND" + echo " type: integer" + echo " example: 65535" + echo " 0 to 65535: sam FLAG will be bitwise AND'd with this value, i.e." + echo " FLAG=FLAG & outSAMflagOR. This is applied after all flags have been set" + echo " by STAR, but before outSAMflagOR. Can be used to unset specific bits" + echo " that are not set otherwise." + echo "" + echo " --outSAMattrRGline" + echo " type: string, multiple values allowed" + echo " SAM/BAM read group line. The first word contains the read group" + echo " identifier and must start with \"ID:\", e.g. --outSAMattrRGline ID:xxx" + echo " CN:yy \"DS:z z z\"." + echo " xxx will be added as RG tag to each output alignment. Any spaces in the" + echo " tag values have to be double quoted." + echo " Comma separated RG lines correspons to different (comma separated) input" + echo " files in --readFilesIn. Commas have to be surrounded by spaces, e.g." + echo " --outSAMattrRGline ID:xxx , ID:zzz \"DS:z z\" , ID:yyy DS:yyyy" + echo "" + echo " --outSAMheaderHD" + echo " type: string, multiple values allowed" + echo " @HD (header) line of the SAM header" + echo "" + echo " --outSAMheaderPG" + echo " type: string, multiple values allowed" + echo " extra @PG (software) line of the SAM header (in addition to STAR)" + echo "" + echo " --outSAMheaderCommentFile" + echo " type: string" + echo " path to the file with @CO (comment) lines of the SAM header" + echo "" + echo " --outSAMfilter" + echo " type: string, multiple values allowed" + echo " filter the output into main SAM/BAM files" + echo " - KeepOnlyAddedReferences ... only keep the reads for which all" + echo " alignments are to the extra reference sequences added with" + echo " --genomeFastaFiles at the mapping stage." + echo " - KeepAllAddedReferences ... keep all alignments to the extra reference" + echo " sequences added with --genomeFastaFiles at the mapping stage." + echo "" + echo " --outSAMmultNmax" + echo " type: integer" + echo " example: -1" + echo " max number of multiple alignments for a read that will be output to the" + echo " SAM/BAM files. Note that if this value is not equal to -1, the top" + echo " scoring alignment will be output first" + echo " - -1 ... all alignments (up to --outFilterMultimapNmax) will be output" + echo "" + echo " --outSAMtlen" + echo " type: integer" + echo " example: 1" + echo " calculation method for the TLEN field in the SAM/BAM files" + echo " - 1 ... leftmost base of the (+)strand mate to rightmost base of the" + echo " (-)mate. (+)sign for the (+)strand mate" + echo " - 2 ... leftmost base of any mate to rightmost base of any mate. (+)sign" + echo " for the mate with the leftmost base. This is different from 1 for" + echo " overlapping mates with protruding ends" + echo "" + echo " --outBAMcompression" + echo " type: integer" + echo " example: 1" + echo " -1 to 10 BAM compression level, -1=default compression (6?), 0=no" + echo " compression, 10=maximum compression" + echo "" + echo " --outBAMsortingThreadN" + echo " type: integer" + echo " example: 0" + echo " >=0: number of threads for BAM sorting. 0 will default to" + echo " min(6,--runThreadN)." + echo "" + echo " --outBAMsortingBinsN" + echo " type: integer" + echo " example: 50" + echo " >0: number of genome bins for coordinate-sorting" + echo "" + echo "BAM processing:" + echo " --bamRemoveDuplicatesType" + echo " type: string" + echo " mark duplicates in the BAM file, for now only works with (i) sorted BAM" + echo " fed with inputBAMfile, and (ii) for paired-end alignments only" + echo " - - ... no duplicate removal/marking" + echo " - UniqueIdentical ... mark all multimappers, and duplicate" + echo " unique mappers. The coordinates, FLAG, CIGAR must be identical" + echo " - UniqueIdenticalNotMulti ... mark duplicate unique mappers but not" + echo " multimappers." + echo "" + echo " --bamRemoveDuplicatesMate2basesN" + echo " type: integer" + echo " example: 0" + echo " number of bases from the 5' of mate 2 to use in collapsing (e.g. for" + echo " RAMPAGE)" + echo "" + echo "Output Wiggle:" + echo " --outWigType" + echo " type: string, multiple values allowed" + echo " type of signal output, e.g. \"bedGraph\" OR \"bedGraph read1_5p\". Requires" + echo " sorted BAM: --outSAMtype BAM SortedByCoordinate ." + echo " 1st word:" + echo " - None ... no signal output" + echo " - bedGraph ... bedGraph format" + echo " - wiggle ... wiggle format" + echo " 2nd word:" + echo " - read1_5p ... signal from only 5' of the 1st read, useful for" + echo " CAGE/RAMPAGE etc" + echo " - read2 ... signal from only 2nd read" + echo "" + echo " --outWigStrand" + echo " type: string" + echo " example: Stranded" + echo " strandedness of wiggle/bedGraph output" + echo " - Stranded ... separate strands, str1 and str2" + echo " - Unstranded ... collapsed strands" + echo "" + echo " --outWigReferencesPrefix" + echo " type: string" + echo " prefix matching reference names to include in the output wiggle file," + echo " e.g. \"chr\", default \"-\" - include all references" + echo "" + echo " --outWigNorm" + echo " type: string" + echo " example: RPM" + echo " type of normalization for the signal" + echo " - RPM ... reads per million of mapped reads" + echo " - None ... no normalization, \"raw\" counts" + echo "" + echo "Output Filtering:" + echo " --outFilterType" + echo " type: string" + echo " example: Normal" + echo " type of filtering" + echo " - Normal ... standard filtering using only current alignment" + echo " - BySJout ... keep only those reads that contain junctions that passed" + echo " filtering into SJ.out.tab" + echo "" + echo " --outFilterMultimapScoreRange" + echo " type: integer" + echo " example: 1" + echo " the score range below the maximum score for multimapping alignments" + echo "" + echo " --outFilterMultimapNmax" + echo " type: integer" + echo " example: 10" + echo " maximum number of loci the read is allowed to map to. Alignments (all of" + echo " them) will be output only if the read maps to no more loci than this" + echo " value." + echo " Otherwise no alignments will be output, and the read will be counted as" + echo " \"mapped to too many loci\" in the Log.final.out ." + echo "" + echo " --outFilterMismatchNmax" + echo " type: integer" + echo " example: 10" + echo " alignment will be output only if it has no more mismatches than this" + echo " value." + echo "" + echo " --outFilterMismatchNoverLmax" + echo " type: double" + echo " example: 0.3" + echo " alignment will be output only if its ratio of mismatches to *mapped*" + echo " length is less than or equal to this value." + echo "" + echo " --outFilterMismatchNoverReadLmax" + echo " type: double" + echo " example: 1.0" + echo " alignment will be output only if its ratio of mismatches to *read*" + echo " length is less than or equal to this value." + echo "" + echo " --outFilterScoreMin" + echo " type: integer" + echo " example: 0" + echo " alignment will be output only if its score is higher than or equal to" + echo " this value." + echo "" + echo " --outFilterScoreMinOverLread" + echo " type: double" + echo " example: 0.66" + echo " same as outFilterScoreMin, but normalized to read length (sum of mates'" + echo " lengths for paired-end reads)" + echo "" + echo " --outFilterMatchNmin" + echo " type: integer" + echo " example: 0" + echo " alignment will be output only if the number of matched bases is higher" + echo " than or equal to this value." + echo "" + echo " --outFilterMatchNminOverLread" + echo " type: double" + echo " example: 0.66" + echo " sam as outFilterMatchNmin, but normalized to the read length (sum of" + echo " mates' lengths for paired-end reads)." + echo "" + echo " --outFilterIntronMotifs" + echo " type: string" + echo " filter alignment using their motifs" + echo " - None ... no filtering" + echo " - RemoveNoncanonical ... filter out alignments that contain" + echo " non-canonical junctions" + echo " - RemoveNoncanonicalUnannotated ... filter out alignments that contain" + echo " non-canonical unannotated junctions when using annotated splice" + echo " junctions database. The annotated non-canonical junctions will be kept." + echo "" + echo " --outFilterIntronStrands" + echo " type: string" + echo " example: RemoveInconsistentStrands" + echo " filter alignments" + echo " - RemoveInconsistentStrands ... remove alignments that have" + echo " junctions with inconsistent strands" + echo " - None ... no filtering" + echo "" + echo "Output splice junctions (SJ.out.tab):" + echo " --outSJtype" + echo " type: string" + echo " example: Standard" + echo " type of splice junction output" + echo " - Standard ... standard SJ.out.tab output" + echo " - None ... no splice junction output" + echo "" + echo "Output Filtering: Splice Junctions:" + echo " --outSJfilterReads" + echo " type: string" + echo " example: All" + echo " which reads to consider for collapsed splice junctions output" + echo " - All ... all reads, unique- and multi-mappers" + echo " - Unique ... uniquely mapping reads only" + echo "" + echo " --outSJfilterOverhangMin" + echo " type: integer, multiple values allowed" + echo " example: 30;12;12;12" + echo " minimum overhang length for splice junctions on both sides for: (1)" + echo " non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC" + echo " motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif" + echo " does not apply to annotated junctions" + echo "" + echo " --outSJfilterCountUniqueMin" + echo " type: integer, multiple values allowed" + echo " example: 3;1;1;1" + echo " minimum uniquely mapping read count per junction for: (1) non-canonical" + echo " motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC" + echo " and GT/AT motif. -1 means no output for that motif" + echo " Junctions are output if one of outSJfilterCountUniqueMin OR" + echo " outSJfilterCountTotalMin conditions are satisfied" + echo " does not apply to annotated junctions" + echo "" + echo " --outSJfilterCountTotalMin" + echo " type: integer, multiple values allowed" + echo " example: 3;1;1;1" + echo " minimum total (multi-mapping+unique) read count per junction for: (1)" + echo " non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC" + echo " motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif" + echo " Junctions are output if one of outSJfilterCountUniqueMin OR" + echo " outSJfilterCountTotalMin conditions are satisfied" + echo " does not apply to annotated junctions" + echo "" + echo " --outSJfilterDistToOtherSJmin" + echo " type: integer, multiple values allowed" + echo " example: 10;0;5;10" + echo " minimum allowed distance to other junctions' donor/acceptor" + echo " does not apply to annotated junctions" + echo "" + echo " --outSJfilterIntronMaxVsReadN" + echo " type: integer, multiple values allowed" + echo " example: 50000;100000;200000" + echo " maximum gap allowed for junctions supported by 1,2,3,,,N reads" + echo " i.e. by default junctions supported by 1 read can have gaps <=50000b, by" + echo " 2 reads: <=100000b, by 3 reads: <=200000. by >=4 reads any gap" + echo " <=alignIntronMax" + echo " does not apply to annotated junctions" + echo "" + echo "Scoring:" + echo " --scoreGap" + echo " type: integer" + echo " example: 0" + echo " splice junction penalty (independent on intron motif)" + echo "" + echo " --scoreGapNoncan" + echo " type: integer" + echo " example: -8" + echo " non-canonical junction penalty (in addition to scoreGap)" + echo "" + echo " --scoreGapGCAG" + echo " type: integer" + echo " example: -4" + echo " GC/AG and CT/GC junction penalty (in addition to scoreGap)" + echo "" + echo " --scoreGapATAC" + echo " type: integer" + echo " example: -8" + echo " AT/AC and GT/AT junction penalty (in addition to scoreGap)" + echo "" + echo " --scoreGenomicLengthLog2scale" + echo " type: integer" + echo " example: 0" + echo " extra score logarithmically scaled with genomic length of the alignment:" + echo " scoreGenomicLengthLog2scale*log2(genomicLength)" + echo "" + echo " --scoreDelOpen" + echo " type: integer" + echo " example: -2" + echo " deletion open penalty" + echo "" + echo " --scoreDelBase" + echo " type: integer" + echo " example: -2" + echo " deletion extension penalty per base (in addition to scoreDelOpen)" + echo "" + echo " --scoreInsOpen" + echo " type: integer" + echo " example: -2" + echo " insertion open penalty" + echo "" + echo " --scoreInsBase" + echo " type: integer" + echo " example: -2" + echo " insertion extension penalty per base (in addition to scoreInsOpen)" + echo "" + echo " --scoreStitchSJshift" + echo " type: integer" + echo " example: 1" + echo " maximum score reduction while searching for SJ boundaries in the" + echo " stitching step" + echo "" + echo "Alignments and Seeding:" + echo " --seedSearchStartLmax" + echo " type: integer" + echo " example: 50" + echo " defines the search start point through the read - the read is split into" + echo " pieces no longer than this value" + echo "" + echo " --seedSearchStartLmaxOverLread" + echo " type: double" + echo " example: 1.0" + echo " seedSearchStartLmax normalized to read length (sum of mates' lengths for" + echo " paired-end reads)" + echo "" + echo " --seedSearchLmax" + echo " type: integer" + echo " example: 0" + echo " defines the maximum length of the seeds, if =0 seed length is not" + echo " limited" + echo "" + echo " --seedMultimapNmax" + echo " type: integer" + echo " example: 10000" + echo " only pieces that map fewer than this value are utilized in the stitching" + echo " procedure" + echo "" + echo " --seedPerReadNmax" + echo " type: integer" + echo " example: 1000" + echo " max number of seeds per read" + echo "" + echo " --seedPerWindowNmax" + echo " type: integer" + echo " example: 50" + echo " max number of seeds per window" + echo "" + echo " --seedNoneLociPerWindow" + echo " type: integer" + echo " example: 10" + echo " max number of one seed loci per window" + echo "" + echo " --seedSplitMin" + echo " type: integer" + echo " example: 12" + echo " min length of the seed sequences split by Ns or mate gap" + echo "" + echo " --seedMapMin" + echo " type: integer" + echo " example: 5" + echo " min length of seeds to be mapped" + echo "" + echo " --alignIntronMin" + echo " type: integer" + echo " example: 21" + echo " minimum intron size, genomic gap is considered intron if its" + echo " length>=alignIntronMin, otherwise it is considered Deletion" + echo "" + echo " --alignIntronMax" + echo " type: integer" + echo " example: 0" + echo " maximum intron size, if 0, max intron size will be determined by" + echo " (2^winBinNbits)*winAnchorDistNbins" + echo "" + echo " --alignMatesGapMax" + echo " type: integer" + echo " example: 0" + echo " maximum gap between two mates, if 0, max intron gap will be determined" + echo " by (2^winBinNbits)*winAnchorDistNbins" + echo "" + echo " --alignSJoverhangMin" + echo " type: integer" + echo " example: 5" + echo " minimum overhang (i.e. block size) for spliced alignments" + echo "" + echo " --alignSJstitchMismatchNmax" + echo " type: integer, multiple values allowed" + echo " example: 0;-1;0;0" + echo " maximum number of mismatches for stitching of the splice junctions (-1:" + echo " no limit)." + echo " (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC" + echo " motif, (4) AT/AC and GT/AT motif." + echo "" + echo " --alignSJDBoverhangMin" + echo " type: integer" + echo " example: 3" + echo " minimum overhang (i.e. block size) for annotated (sjdb) spliced" + echo " alignments" + echo "" + echo " --alignSplicedMateMapLmin" + echo " type: integer" + echo " example: 0" + echo " minimum mapped length for a read mate that is spliced" + echo "" + echo " --alignSplicedMateMapLminOverLmate" + echo " type: double" + echo " example: 0.66" + echo " alignSplicedMateMapLmin normalized to mate length" + echo "" + echo " --alignWindowsPerReadNmax" + echo " type: integer" + echo " example: 10000" + echo " max number of windows per read" + echo "" + echo " --alignTranscriptsPerWindowNmax" + echo " type: integer" + echo " example: 100" + echo " max number of transcripts per window" + echo "" + echo " --alignTranscriptsPerReadNmax" + echo " type: integer" + echo " example: 10000" + echo " max number of different alignments per read to consider" + echo "" + echo " --alignEndsType" + echo " type: string" + echo " example: Local" + echo " type of read ends alignment" + echo " - Local ... standard local alignment with soft-clipping" + echo " allowed" + echo " - EndToEnd ... force end-to-end read alignment, do not" + echo " soft-clip" + echo " - Extend5pOfRead1 ... fully extend only the 5p of the read1, all other" + echo " ends: local alignment" + echo " - Extend5pOfReads12 ... fully extend only the 5p of the both read1 and" + echo " read2, all other ends: local alignment" + echo "" + echo " --alignEndsProtrude" + echo " type: string" + echo " example: 0 ConcordantPair" + echo " allow protrusion of alignment ends, i.e. start (end) of the +strand mate" + echo " downstream of the start (end) of the -strand mate" + echo " 1st word: int: maximum number of protrusion bases allowed" + echo " 2nd word: string:" + echo " - ConcordantPair ... report alignments with non-zero" + echo " protrusion as concordant pairs" + echo " - DiscordantPair ... report alignments with non-zero" + echo " protrusion as discordant pairs" + echo "" + echo " --alignSoftClipAtReferenceEnds" + echo " type: string" + echo " example: Yes" + echo " allow the soft-clipping of the alignments past the end of the" + echo " chromosomes" + echo " - Yes ... allow" + echo " - No ... prohibit, useful for compatibility with Cufflinks" + echo "" + echo " --alignInsertionFlush" + echo " type: string" + echo " how to flush ambiguous insertion positions" + echo " - None ... insertions are not flushed" + echo " - Right ... insertions are flushed to the right" + echo "" + echo "Paired-End reads:" + echo " --peOverlapNbasesMin" + echo " type: integer" + echo " example: 0" + echo " minimum number of overlapping bases to trigger mates merging and" + echo " realignment. Specify >0 value to switch on the \"merginf of overlapping" + echo " mates\" algorithm." + echo "" + echo " --peOverlapMMp" + echo " type: double" + echo " example: 0.01" + echo " maximum proportion of mismatched bases in the overlap area" + echo "" + echo "Windows, Anchors, Binning:" + echo " --winAnchorMultimapNmax" + echo " type: integer" + echo " example: 50" + echo " max number of loci anchors are allowed to map to" + echo "" + echo " --winBinNbits" + echo " type: integer" + echo " example: 16" + echo " =log2(winBin), where winBin is the size of the bin for the" + echo " windows/clustering, each window will occupy an integer number of bins." + echo "" + echo " --winAnchorDistNbins" + echo " type: integer" + echo " example: 9" + echo " max number of bins between two anchors that allows aggregation of" + echo " anchors into one window" + echo "" + echo " --winFlankNbins" + echo " type: integer" + echo " example: 4" + echo " log2(winFlank), where win Flank is the size of the left and right" + echo " flanking regions for each window" + echo "" + echo " --winReadCoverageRelativeMin" + echo " type: double" + echo " example: 0.5" + echo " minimum relative coverage of the read sequence by the seeds in a window," + echo " for STARlong algorithm only." + echo "" + echo " --winReadCoverageBasesMin" + echo " type: integer" + echo " example: 0" + echo " minimum number of bases covered by the seeds in a window , for STARlong" + echo " algorithm only." + echo "" + echo "Chimeric Alignments:" + echo " --chimOutType" + echo " type: string, multiple values allowed" + echo " example: Junctions" + echo " type of chimeric output" + echo " - Junctions ... Chimeric.out.junction" + echo " - SeparateSAMold ... output old SAM into separate Chimeric.out.sam file" + echo " - WithinBAM ... output into main aligned BAM files (Aligned.*.bam)" + echo " - WithinBAM HardClip ... (default) hard-clipping in the CIGAR for" + echo " supplemental chimeric alignments (default if no 2nd word is present)" + echo " - WithinBAM SoftClip ... soft-clipping in the CIGAR for supplemental" + echo " chimeric alignments" + echo "" + echo " --chimSegmentMin" + echo " type: integer" + echo " example: 0" + echo " minimum length of chimeric segment length, if ==0, no chimeric output" + echo "" + echo " --chimScoreMin" + echo " type: integer" + echo " example: 0" + echo " minimum total (summed) score of the chimeric segments" + echo "" + echo " --chimScoreDropMax" + echo " type: integer" + echo " example: 20" + echo " max drop (difference) of chimeric score (the sum of scores of all" + echo " chimeric segments) from the read length" + echo "" + echo " --chimScoreSeparation" + echo " type: integer" + echo " example: 10" + echo " minimum difference (separation) between the best chimeric score and the" + echo " next one" + echo "" + echo " --chimScoreJunctionNonGTAG" + echo " type: integer" + echo " example: -1" + echo " penalty for a non-GT/AG chimeric junction" + echo "" + echo " --chimJunctionOverhangMin" + echo " type: integer" + echo " example: 20" + echo " minimum overhang for a chimeric junction" + echo "" + echo " --chimSegmentReadGapMax" + echo " type: integer" + echo " example: 0" + echo " maximum gap in the read sequence between chimeric segments" + echo "" + echo " --chimFilter" + echo " type: string, multiple values allowed" + echo " example: banGenomicN" + echo " different filters for chimeric alignments" + echo " - None ... no filtering" + echo " - banGenomicN ... Ns are not allowed in the genome sequence around the" + echo " chimeric junction" + echo "" + echo " --chimMainSegmentMultNmax" + echo " type: integer" + echo " example: 10" + echo " maximum number of multi-alignments for the main chimeric segment. =1" + echo " will prohibit multimapping main segments." + echo "" + echo " --chimMultimapNmax" + echo " type: integer" + echo " example: 0" + echo " maximum number of chimeric multi-alignments" + echo " - 0 ... use the old scheme for chimeric detection which only considered" + echo " unique alignments" + echo "" + echo " --chimMultimapScoreRange" + echo " type: integer" + echo " example: 1" + echo " the score range for multi-mapping chimeras below the best chimeric" + echo " score. Only works with --chimMultimapNmax > 1" + echo "" + echo " --chimNonchimScoreDropMin" + echo " type: integer" + echo " example: 20" + echo " to trigger chimeric detection, the drop in the best non-chimeric" + echo " alignment score with respect to the read length has to be greater than" + echo " this value" + echo "" + echo " --chimOutJunctionFormat" + echo " type: integer" + echo " example: 0" + echo " formatting type for the Chimeric.out.junction file" + echo " - 0 ... no comment lines/headers" + echo " - 1 ... comment lines at the end of the file: command line and Nreads:" + echo " total, unique/multi-mapping" + echo "" + echo "Quantification of Annotations:" + echo " --quantMode" + echo " type: string, multiple values allowed" + echo " types of quantification requested" + echo " - - ... none" + echo " - TranscriptomeSAM ... output SAM/BAM alignments to transcriptome into a" + echo " separate file" + echo " - GeneCounts ... count reads per gene" + echo "" + echo " --quantTranscriptomeBAMcompression" + echo " type: integer" + echo " example: 1" + echo " -2 to 10 transcriptome BAM compression level" + echo " - -2 ... no BAM output" + echo " - -1 ... default compression (6?)" + echo " - 0 ... no compression" + echo " - 10 ... maximum compression" + echo "" + echo " --quantTranscriptomeBan" + echo " type: string" + echo " example: IndelSoftclipSingleend" + echo " prohibit various alignment type" + echo " - IndelSoftclipSingleend ... prohibit indels, soft clipping and" + echo " single-end alignments - compatible with RSEM" + echo " - Singleend ... prohibit single-end alignments" + echo "" + echo "2-pass Mapping:" + echo " --twopassMode" + echo " type: string" + echo " 2-pass mapping mode." + echo " - None ... 1-pass mapping" + echo " - Basic ... basic 2-pass mapping, with all 1st pass junctions" + echo " inserted into the genome indices on the fly" + echo "" + echo " --twopass1readsN" + echo " type: integer" + echo " example: -1" + echo " number of reads to process for the 1st step. Use very large number (or" + echo " default -1) to map all reads in the first step." + echo "" + echo "WASP parameters:" + echo " --waspOutputMode" + echo " type: string" + echo " WASP allele-specific output type. This is re-implementation of the" + echo " original WASP mappability filtering by Bryce van de Geijn, Graham" + echo " McVicker, Yoav Gilad & Jonathan K Pritchard. Please cite the original" + echo " WASP paper: Nature Methods 12, 1061-1063 (2015)," + echo " https://www.nature.com/articles/nmeth.3582 ." + echo " - SAMtag ... add WASP tags to the alignments that pass WASP" + echo " filtering" + echo "" + echo "STARsolo (single cell RNA-seq) parameters:" + echo " --soloType" + echo " type: string, multiple values allowed" + echo " type of single-cell RNA-seq" + echo " - CB_UMI_Simple ... (a.k.a. Droplet) one UMI and one Cell Barcode of" + echo " fixed length in read2, e.g. Drop-seq and 10X Chromium." + echo " - CB_UMI_Complex ... multiple Cell Barcodes of varying length, one UMI" + echo " of fixed length and one adapter sequence of fixed length are allowed in" + echo " read2 only (e.g. inDrop, ddSeq)." + echo " - CB_samTagOut ... output Cell Barcode as CR and/or CB SAm tag. No" + echo " UMI counting. --readFilesIn cDNA_read1 [cDNA_read2 if paired-end]" + echo " CellBarcode_read . Requires --outSAMtype BAM Unsorted [and/or" + echo " SortedByCoordinate]" + echo " - SmartSeq ... Smart-seq: each cell in a separate FASTQ (paired-" + echo " or single-end), barcodes are corresponding read-groups, no UMI" + echo " sequences, alignments deduplicated according to alignment start and end" + echo " (after extending soft-clipped bases)" + echo "" + echo " --soloCBwhitelist" + echo " type: string, multiple values allowed" + echo " file(s) with whitelist(s) of cell barcodes. Only --soloType" + echo " CB_UMI_Complex allows more than one whitelist file." + echo " - None ... no whitelist: all cell barcodes are allowed" + echo "" + echo " --soloCBstart" + echo " type: integer" + echo " example: 1" + echo " cell barcode start base" + echo "" + echo " --soloCBlen" + echo " type: integer" + echo " example: 16" + echo " cell barcode length" + echo "" + echo " --soloUMIstart" + echo " type: integer" + echo " example: 17" + echo " UMI start base" + echo "" + echo " --soloUMIlen" + echo " type: integer" + echo " example: 10" + echo " UMI length" + echo "" + echo " --soloBarcodeReadLength" + echo " type: integer" + echo " example: 1" + echo " length of the barcode read" + echo " - 1 ... equal to sum of soloCBlen+soloUMIlen" + echo " - 0 ... not defined, do not check" + echo "" + echo " --soloBarcodeMate" + echo " type: integer" + echo " example: 0" + echo " identifies which read mate contains the barcode (CB+UMI) sequence" + echo " - 0 ... barcode sequence is on separate read, which should always be" + echo " the last file in the --readFilesIn listed" + echo " - 1 ... barcode sequence is a part of mate 1" + echo " - 2 ... barcode sequence is a part of mate 2" + echo "" + echo " --soloCBposition" + echo " type: string, multiple values allowed" + echo " position of Cell Barcode(s) on the barcode read." + echo " Presently only works with --soloType CB_UMI_Complex, and barcodes are" + echo " assumed to be on Read2." + echo " Format for each barcode: startAnchor_startPosition_endAnchor_endPosition" + echo " start(end)Anchor defines the Anchor Base for the CB: 0: read start; 1:" + echo " read end; 2: adapter start; 3: adapter end" + echo " start(end)Position is the 0-based position with of the CB start(end)" + echo " with respect to the Anchor Base" + echo " String for different barcodes are separated by space." + echo " Example: inDrop (Zilionis et al, Nat. Protocols, 2017):" + echo " --soloCBposition 0_0_2_-1 3_1_3_8" + echo "" + echo " --soloUMIposition" + echo " type: string" + echo " position of the UMI on the barcode read, same as soloCBposition" + echo " Example: inDrop (Zilionis et al, Nat. Protocols, 2017):" + echo " --soloCBposition 3_9_3_14" + echo "" + echo " --soloAdapterSequence" + echo " type: string" + echo " adapter sequence to anchor barcodes. Only one adapter sequence is" + echo " allowed." + echo "" + echo " --soloAdapterMismatchesNmax" + echo " type: integer" + echo " example: 1" + echo " maximum number of mismatches allowed in adapter sequence." + echo "" + echo " --soloCBmatchWLtype" + echo " type: string" + echo " example: 1MM_multi" + echo " matching the Cell Barcodes to the WhiteList" + echo " - Exact ... only exact matches allowed" + echo " - 1MM ... only one match in whitelist with 1" + echo " mismatched base allowed. Allowed CBs have to have at least one read with" + echo " exact match." + echo " - 1MM_multi ... multiple matches in whitelist with" + echo " 1 mismatched base allowed, posterior probability calculation is used" + echo " choose one of the matches." + echo " Allowed CBs have to have at least one read with exact match. This option" + echo " matches best with CellRanger 2.2.0" + echo " - 1MM_multi_pseudocounts ... same as 1MM_Multi, but" + echo " pseudocounts of 1 are added to all whitelist barcodes." + echo " - 1MM_multi_Nbase_pseudocounts ... same as 1MM_multi_pseudocounts," + echo " multimatching to WL is allowed for CBs with N-bases. This option matches" + echo " best with CellRanger >= 3.0.0" + echo " - EditDist_2 ... allow up to edit distance of 3 fpr" + echo " each of the barcodes. May include one deletion + one insertion. Only" + echo " works with --soloType CB_UMI_Complex. Matches to multiple passlist" + echo " barcdoes are not allowed. Similar to ParseBio Split-seq pipeline." + echo "" + echo " --soloInputSAMattrBarcodeSeq" + echo " type: string, multiple values allowed" + echo " when inputting reads from a SAM file (--readsFileType SAM SE/PE), these" + echo " SAM attributes mark the barcode sequence (in proper order)." + echo " For instance, for 10X CellRanger or STARsolo BAMs, use" + echo " --soloInputSAMattrBarcodeSeq CR UR ." + echo " This parameter is required when running STARsolo with input from SAM." + echo "" + echo " --soloInputSAMattrBarcodeQual" + echo " type: string, multiple values allowed" + echo " when inputting reads from a SAM file (--readsFileType SAM SE/PE), these" + echo " SAM attributes mark the barcode qualities (in proper order)." + echo " For instance, for 10X CellRanger or STARsolo BAMs, use" + echo " --soloInputSAMattrBarcodeQual CY UY ." + echo " If this parameter is '-' (default), the quality 'H' will be assigned to" + echo " all bases." + echo "" + echo " --soloStrand" + echo " type: string" + echo " example: Forward" + echo " strandedness of the solo libraries:" + echo " - Unstranded ... no strand information" + echo " - Forward ... read strand same as the original RNA molecule" + echo " - Reverse ... read strand opposite to the original RNA molecule" + echo "" + echo " --soloFeatures" + echo " type: string, multiple values allowed" + echo " example: Gene" + echo " genomic features for which the UMI counts per Cell Barcode are collected" + echo " - Gene ... genes: reads match the gene transcript" + echo " - SJ ... splice junctions: reported in SJ.out.tab" + echo " - GeneFull ... full gene (pre-mRNA): count all reads overlapping" + echo " genes' exons and introns" + echo " - GeneFull_ExonOverIntron ... full gene (pre-mRNA): count all reads" + echo " overlapping genes' exons and introns: prioritize 100% overlap with exons" + echo " - GeneFull_Ex50pAS ... full gene (pre-RNA): count all reads" + echo " overlapping genes' exons and introns: prioritize >50% overlap with" + echo " exons. Do not count reads with 100% exonic overlap in the antisense" + echo " direction." + echo "" + echo " --soloMultiMappers" + echo " type: string, multiple values allowed" + echo " example: Unique" + echo " counting method for reads mapping to multiple genes" + echo " - Unique ... count only reads that map to unique genes" + echo " - Uniform ... uniformly distribute multi-genic UMIs to all genes" + echo " - Rescue ... distribute UMIs proportionally to unique+uniform counts" + echo " (~ first iteration of EM)" + echo " - PropUnique ... distribute UMIs proportionally to unique mappers, if" + echo " present, and uniformly if not." + echo " - EM ... multi-gene UMIs are distributed using Expectation" + echo " Maximization algorithm" + echo "" + echo " --soloUMIdedup" + echo " type: string, multiple values allowed" + echo " example: 1MM_All" + echo " type of UMI deduplication (collapsing) algorithm" + echo " - 1MM_All ... all UMIs with 1 mismatch distance to" + echo " each other are collapsed (i.e. counted once)." + echo " - 1MM_Directional_UMItools ... follows the \"directional\" method from" + echo " the UMI-tools by Smith, Heger and Sudbery (Genome Research 2017)." + echo " - 1MM_Directional ... same as 1MM_Directional_UMItools, but" + echo " with more stringent criteria for duplicate UMIs" + echo " - Exact ... only exactly matching UMIs are" + echo " collapsed." + echo " - NoDedup ... no deduplication of UMIs, count all" + echo " reads." + echo " - 1MM_CR ... CellRanger2-4 algorithm for 1MM UMI" + echo " collapsing." + echo "" + echo " --soloUMIfiltering" + echo " type: string, multiple values allowed" + echo " type of UMI filtering (for reads uniquely mapping to genes)" + echo " - - ... basic filtering: remove UMIs with N and" + echo " homopolymers (similar to CellRanger 2.2.0)." + echo " - MultiGeneUMI ... basic + remove lower-count UMIs that map to" + echo " more than one gene." + echo " - MultiGeneUMI_All ... basic + remove all UMIs that map to more than" + echo " one gene." + echo " - MultiGeneUMI_CR ... basic + remove lower-count UMIs that map to" + echo " more than one gene, matching CellRanger > 3.0.0 ." + echo " Only works with --soloUMIdedup 1MM_CR" + echo "" + echo " --soloOutFileNames" + echo " type: string, multiple values allowed" + echo " example: Solo.out/;features.tsv;barcodes.tsv;matrix.mtx" + echo " file names for STARsolo output:" + echo " file_name_prefix gene_names barcode_sequences" + echo " cell_feature_count_matrix" + echo "" + echo " --soloCellFilter" + echo " type: string, multiple values allowed" + echo " example: CellRanger2.2;3000;0.99;10" + echo " cell filtering type and parameters" + echo " - None ... do not output filtered cells" + echo " - TopCells ... only report top cells by UMI count, followed by" + echo " the exact number of cells" + echo " - CellRanger2.2 ... simple filtering of CellRanger 2.2." + echo " Can be followed by numbers: number of expected cells, robust maximum" + echo " percentile for UMI count, maximum to minimum ratio for UMI count" + echo " The harcoded values are from CellRanger: nExpectedCells=3000;" + echo " maxPercentile=0.99; maxMinRatio=10" + echo " - EmptyDrops_CR ... EmptyDrops filtering in CellRanger flavor. Please" + echo " cite the original EmptyDrops paper: A.T.L Lun et al, Genome Biology, 20," + echo " 63 (2019):" + echo " " + echo "https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1662-y" + echo " Can be followed by 10 numeric parameters: nExpectedCells" + echo " maxPercentile maxMinRatio indMin indMax umiMin" + echo " umiMinFracMedian candMaxN FDR simN" + echo " The harcoded values are from CellRanger: 3000" + echo " 0.99 10 45000 90000 500 0.01" + echo " 20000 0.01 10000" + echo "" + echo " --soloOutFormatFeaturesGeneField3" + echo " type: string, multiple values allowed" + echo " example: Gene Expression" + echo " field 3 in the Gene features.tsv file. If \"-\", then no 3rd field is" + echo " output." + echo "" + echo " --soloCellReadStats" + echo " type: string" + echo " Output reads statistics for each CB" + echo " - Standard ... standard output" + echo "" + echo "HTSeq arguments:" + echo " -s, --stranded" + echo " type: string" + echo " default: yes" + echo " choices: [ yes, no, reverse ]" + echo " Whether the data is from a strand-specific assay. 'reverse' means 'yes'" + echo " with reversed strand interpretation." + echo "" + echo " -a, --minaqual, --minimum_alignment_quality" + echo " type: integer" + echo " default: 10" + echo " Skip all reads with MAPQ alignment quality lower than the given minimum" + echo " value." + echo " MAPQ is the 5th column of a SAM/BAM file and its usage depends on the" + echo " software" + echo " used to map the reads." + echo "" + echo " -t, --type" + echo " type: string" + echo " example: exon" + echo " Feature type (3rd column in GTF file) to be used, all features of other" + echo " type are ignored (default, suitable for Ensembl GTF files: exon)" + echo "" + echo " -i, --id_attribute" + echo " type: string, multiple values allowed" + echo " example: gene_id" + echo " GTF attribute to be used as feature ID (default, suitable for Ensembl" + echo " GTF files: gene_id)." + echo " All feature of the right type (see -t option) within the same GTF" + echo " attribute will be added" + echo " together. The typical way of using this option is to count all exonic" + echo " reads from each gene" + echo " and add the exons but other uses are possible as well. You can call this" + echo " option multiple" + echo " times: in that case, the combination of all attributes separated by" + echo " colons (:) will be used" + echo " as a unique identifier, e.g. for exons you might use -i gene_id -i" + echo " exon_number." + echo "" + echo " --additional_attributes" + echo " type: string, multiple values allowed" + echo " example: gene_name" + echo " Additional feature attributes (suitable for Ensembl GTF files:" + echo " gene_name). Use multiple times" + echo " for more than one additional attribute. These attributes are only used" + echo " as annotations in the" + echo " output, while the determination of how the counts are added together is" + echo " done based on option -i." + echo "" + echo " --add_chromosome_info" + echo " type: boolean_true" + echo " Store information about the chromosome of each feature as an additional" + echo " attribute" + echo " (e.g. colunm in the TSV output file)." + echo "" + echo " -m, --mode" + echo " type: string" + echo " default: union" + echo " choices: [ union, intersection-strict, intersection-nonempty ]" + echo " Mode to handle reads overlapping more than one feature." + echo "" + echo " --non_unique" + echo " type: string" + echo " default: none" + echo " choices: [ none, all, fraction, random ]" + echo " Whether and how to score reads that are not uniquely aligned or" + echo " ambiguously assigned to features." + echo "" + echo " --secondary_alignments" + echo " type: string" + echo " choices: [ score, ignore ]" + echo " Whether to score secondary alignments (0x100 flag)." + echo "" + echo " --supplementary_alignments" + echo " type: string" + echo " choices: [ score, ignore ]" + echo " Whether to score supplementary alignments (0x800 flag)." + echo "" + echo " --counts_output_sparse" + echo " type: boolean_true" + echo " Store the counts as a sparse matrix (mtx, h5ad, loom)." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM python:3.10-slim + +ENTRYPOINT [] + + +ENV STAR_VERSION 2.7.10b +ENV PACKAGES gcc g++ make wget zlib1g-dev unzip +RUN apt-get update && \ + apt-get install -y --no-install-recommends ${PACKAGES} && \ + cd /tmp && \ + wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip && \ + unzip ${STAR_VERSION}.zip && \ + cd STAR-${STAR_VERSION}/source && \ + make STARstatic CXXFLAGS_SIMD=-std=c++11 && \ + cp STAR /usr/local/bin && \ + cd / && \ + rm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip && \ + apt-get --purge autoremove -y ${PACKAGES} && \ + apt-get clean + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y samtools procps && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "pyyaml" "HTSeq" "multiprocess" "gtfparse<2.0" "pandas" "multiqc~=1.15.0" + +LABEL org.opencontainers.image.authors="Angela Oliveira Pisco, Robrecht Cannoodt" +LABEL org.opencontainers.image.description="Companion container for running component mapping multi_star" +LABEL org.opencontainers.image.created="2024-01-31T09:08:31Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-multi_star-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "multi_star 0.12.4" + exit + ;; + --input_id) + if [ -z "$VIASH_PAR_INPUT_ID" ]; then + VIASH_PAR_INPUT_ID="$2" + else + VIASH_PAR_INPUT_ID="$VIASH_PAR_INPUT_ID;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_id. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input_id=*) + if [ -z "$VIASH_PAR_INPUT_ID" ]; then + VIASH_PAR_INPUT_ID=$(ViashRemoveFlags "$1") + else + VIASH_PAR_INPUT_ID="$VIASH_PAR_INPUT_ID;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --input_r1) + if [ -z "$VIASH_PAR_INPUT_R1" ]; then + VIASH_PAR_INPUT_R1="$2" + else + VIASH_PAR_INPUT_R1="$VIASH_PAR_INPUT_R1;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_r1. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input_r1=*) + if [ -z "$VIASH_PAR_INPUT_R1" ]; then + VIASH_PAR_INPUT_R1=$(ViashRemoveFlags "$1") + else + VIASH_PAR_INPUT_R1="$VIASH_PAR_INPUT_R1;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --input_r2) + if [ -z "$VIASH_PAR_INPUT_R2" ]; then + VIASH_PAR_INPUT_R2="$2" + else + VIASH_PAR_INPUT_R2="$VIASH_PAR_INPUT_R2;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_r2. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input_r2=*) + if [ -z "$VIASH_PAR_INPUT_R2" ]; then + VIASH_PAR_INPUT_R2=$(ViashRemoveFlags "$1") + else + VIASH_PAR_INPUT_R2="$VIASH_PAR_INPUT_R2;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --reference_index) + [ -n "$VIASH_PAR_REFERENCE_INDEX" ] && ViashError Bad arguments for option \'--reference_index\': \'$VIASH_PAR_REFERENCE_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE_INDEX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference_index. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reference_index=*) + [ -n "$VIASH_PAR_REFERENCE_INDEX" ] && ViashError Bad arguments for option \'--reference_index=*\': \'$VIASH_PAR_REFERENCE_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE_INDEX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --genomeDir) + [ -n "$VIASH_PAR_REFERENCE_INDEX" ] && ViashError Bad arguments for option \'--genomeDir\': \'$VIASH_PAR_REFERENCE_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE_INDEX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --genomeDir. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reference_gtf) + [ -n "$VIASH_PAR_REFERENCE_GTF" ] && ViashError Bad arguments for option \'--reference_gtf\': \'$VIASH_PAR_REFERENCE_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE_GTF="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference_gtf. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reference_gtf=*) + [ -n "$VIASH_PAR_REFERENCE_GTF" ] && ViashError Bad arguments for option \'--reference_gtf=*\': \'$VIASH_PAR_REFERENCE_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE_GTF=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outFileNamePrefix) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--outFileNamePrefix\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFileNamePrefix. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --run_htseq_count) + [ -n "$VIASH_PAR_RUN_HTSEQ_COUNT" ] && ViashError Bad arguments for option \'--run_htseq_count\': \'$VIASH_PAR_RUN_HTSEQ_COUNT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_RUN_HTSEQ_COUNT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --run_htseq_count. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --run_htseq_count=*) + [ -n "$VIASH_PAR_RUN_HTSEQ_COUNT" ] && ViashError Bad arguments for option \'--run_htseq_count=*\': \'$VIASH_PAR_RUN_HTSEQ_COUNT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_RUN_HTSEQ_COUNT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --run_multiqc) + [ -n "$VIASH_PAR_RUN_MULTIQC" ] && ViashError Bad arguments for option \'--run_multiqc\': \'$VIASH_PAR_RUN_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_RUN_MULTIQC="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --run_multiqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --run_multiqc=*) + [ -n "$VIASH_PAR_RUN_MULTIQC" ] && ViashError Bad arguments for option \'--run_multiqc=*\': \'$VIASH_PAR_RUN_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_RUN_MULTIQC=$(ViashRemoveFlags "$1") + shift 1 + ;; + --min_success_rate) + [ -n "$VIASH_PAR_MIN_SUCCESS_RATE" ] && ViashError Bad arguments for option \'--min_success_rate\': \'$VIASH_PAR_MIN_SUCCESS_RATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_SUCCESS_RATE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_success_rate. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --min_success_rate=*) + [ -n "$VIASH_PAR_MIN_SUCCESS_RATE" ] && ViashError Bad arguments for option \'--min_success_rate=*\': \'$VIASH_PAR_MIN_SUCCESS_RATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_SUCCESS_RATE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --runRNGseed) + [ -n "$VIASH_PAR_RUNRNGSEED" ] && ViashError Bad arguments for option \'--runRNGseed\': \'$VIASH_PAR_RUNRNGSEED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_RUNRNGSEED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --runRNGseed. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --runRNGseed=*) + [ -n "$VIASH_PAR_RUNRNGSEED" ] && ViashError Bad arguments for option \'--runRNGseed=*\': \'$VIASH_PAR_RUNRNGSEED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_RUNRNGSEED=$(ViashRemoveFlags "$1") + shift 1 + ;; + --genomeFastaFiles) + if [ -z "$VIASH_PAR_GENOMEFASTAFILES" ]; then + VIASH_PAR_GENOMEFASTAFILES="$2" + else + VIASH_PAR_GENOMEFASTAFILES="$VIASH_PAR_GENOMEFASTAFILES;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --genomeFastaFiles. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --genomeFastaFiles=*) + if [ -z "$VIASH_PAR_GENOMEFASTAFILES" ]; then + VIASH_PAR_GENOMEFASTAFILES=$(ViashRemoveFlags "$1") + else + VIASH_PAR_GENOMEFASTAFILES="$VIASH_PAR_GENOMEFASTAFILES;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --sjdbFileChrStartEnd) + if [ -z "$VIASH_PAR_SJDBFILECHRSTARTEND" ]; then + VIASH_PAR_SJDBFILECHRSTARTEND="$2" + else + VIASH_PAR_SJDBFILECHRSTARTEND="$VIASH_PAR_SJDBFILECHRSTARTEND;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbFileChrStartEnd. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sjdbFileChrStartEnd=*) + if [ -z "$VIASH_PAR_SJDBFILECHRSTARTEND" ]; then + VIASH_PAR_SJDBFILECHRSTARTEND=$(ViashRemoveFlags "$1") + else + VIASH_PAR_SJDBFILECHRSTARTEND="$VIASH_PAR_SJDBFILECHRSTARTEND;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --sjdbGTFfile) + [ -n "$VIASH_PAR_SJDBGTFFILE" ] && ViashError Bad arguments for option \'--sjdbGTFfile\': \'$VIASH_PAR_SJDBGTFFILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SJDBGTFFILE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbGTFfile. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sjdbGTFfile=*) + [ -n "$VIASH_PAR_SJDBGTFFILE" ] && ViashError Bad arguments for option \'--sjdbGTFfile=*\': \'$VIASH_PAR_SJDBGTFFILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SJDBGTFFILE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --sjdbGTFchrPrefix) + [ -n "$VIASH_PAR_SJDBGTFCHRPREFIX" ] && ViashError Bad arguments for option \'--sjdbGTFchrPrefix\': \'$VIASH_PAR_SJDBGTFCHRPREFIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SJDBGTFCHRPREFIX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbGTFchrPrefix. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sjdbGTFchrPrefix=*) + [ -n "$VIASH_PAR_SJDBGTFCHRPREFIX" ] && ViashError Bad arguments for option \'--sjdbGTFchrPrefix=*\': \'$VIASH_PAR_SJDBGTFCHRPREFIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SJDBGTFCHRPREFIX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --sjdbGTFfeatureExon) + [ -n "$VIASH_PAR_SJDBGTFFEATUREEXON" ] && ViashError Bad arguments for option \'--sjdbGTFfeatureExon\': \'$VIASH_PAR_SJDBGTFFEATUREEXON\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SJDBGTFFEATUREEXON="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbGTFfeatureExon. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sjdbGTFfeatureExon=*) + [ -n "$VIASH_PAR_SJDBGTFFEATUREEXON" ] && ViashError Bad arguments for option \'--sjdbGTFfeatureExon=*\': \'$VIASH_PAR_SJDBGTFFEATUREEXON\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SJDBGTFFEATUREEXON=$(ViashRemoveFlags "$1") + shift 1 + ;; + --sjdbGTFtagExonParentTranscript) + [ -n "$VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT" ] && ViashError Bad arguments for option \'--sjdbGTFtagExonParentTranscript\': \'$VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbGTFtagExonParentTranscript. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sjdbGTFtagExonParentTranscript=*) + [ -n "$VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT" ] && ViashError Bad arguments for option \'--sjdbGTFtagExonParentTranscript=*\': \'$VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --sjdbGTFtagExonParentGene) + [ -n "$VIASH_PAR_SJDBGTFTAGEXONPARENTGENE" ] && ViashError Bad arguments for option \'--sjdbGTFtagExonParentGene\': \'$VIASH_PAR_SJDBGTFTAGEXONPARENTGENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SJDBGTFTAGEXONPARENTGENE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbGTFtagExonParentGene. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sjdbGTFtagExonParentGene=*) + [ -n "$VIASH_PAR_SJDBGTFTAGEXONPARENTGENE" ] && ViashError Bad arguments for option \'--sjdbGTFtagExonParentGene=*\': \'$VIASH_PAR_SJDBGTFTAGEXONPARENTGENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SJDBGTFTAGEXONPARENTGENE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --sjdbGTFtagExonParentGeneName) + if [ -z "$VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME" ]; then + VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME="$2" + else + VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME="$VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbGTFtagExonParentGeneName. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sjdbGTFtagExonParentGeneName=*) + if [ -z "$VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME" ]; then + VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME=$(ViashRemoveFlags "$1") + else + VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME="$VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --sjdbGTFtagExonParentGeneType) + if [ -z "$VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE" ]; then + VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE="$2" + else + VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE="$VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbGTFtagExonParentGeneType. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sjdbGTFtagExonParentGeneType=*) + if [ -z "$VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE" ]; then + VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE=$(ViashRemoveFlags "$1") + else + VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE="$VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --sjdbOverhang) + [ -n "$VIASH_PAR_SJDBOVERHANG" ] && ViashError Bad arguments for option \'--sjdbOverhang\': \'$VIASH_PAR_SJDBOVERHANG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SJDBOVERHANG="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbOverhang. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sjdbOverhang=*) + [ -n "$VIASH_PAR_SJDBOVERHANG" ] && ViashError Bad arguments for option \'--sjdbOverhang=*\': \'$VIASH_PAR_SJDBOVERHANG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SJDBOVERHANG=$(ViashRemoveFlags "$1") + shift 1 + ;; + --sjdbScore) + [ -n "$VIASH_PAR_SJDBSCORE" ] && ViashError Bad arguments for option \'--sjdbScore\': \'$VIASH_PAR_SJDBSCORE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SJDBSCORE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbScore. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sjdbScore=*) + [ -n "$VIASH_PAR_SJDBSCORE" ] && ViashError Bad arguments for option \'--sjdbScore=*\': \'$VIASH_PAR_SJDBSCORE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SJDBSCORE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --sjdbInsertSave) + [ -n "$VIASH_PAR_SJDBINSERTSAVE" ] && ViashError Bad arguments for option \'--sjdbInsertSave\': \'$VIASH_PAR_SJDBINSERTSAVE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SJDBINSERTSAVE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbInsertSave. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sjdbInsertSave=*) + [ -n "$VIASH_PAR_SJDBINSERTSAVE" ] && ViashError Bad arguments for option \'--sjdbInsertSave=*\': \'$VIASH_PAR_SJDBINSERTSAVE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SJDBINSERTSAVE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --varVCFfile) + [ -n "$VIASH_PAR_VARVCFFILE" ] && ViashError Bad arguments for option \'--varVCFfile\': \'$VIASH_PAR_VARVCFFILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_VARVCFFILE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --varVCFfile. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --varVCFfile=*) + [ -n "$VIASH_PAR_VARVCFFILE" ] && ViashError Bad arguments for option \'--varVCFfile=*\': \'$VIASH_PAR_VARVCFFILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_VARVCFFILE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --readFilesType) + [ -n "$VIASH_PAR_READFILESTYPE" ] && ViashError Bad arguments for option \'--readFilesType\': \'$VIASH_PAR_READFILESTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READFILESTYPE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --readFilesType. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --readFilesType=*) + [ -n "$VIASH_PAR_READFILESTYPE" ] && ViashError Bad arguments for option \'--readFilesType=*\': \'$VIASH_PAR_READFILESTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READFILESTYPE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --readFilesSAMattrKeep) + if [ -z "$VIASH_PAR_READFILESSAMATTRKEEP" ]; then + VIASH_PAR_READFILESSAMATTRKEEP="$2" + else + VIASH_PAR_READFILESSAMATTRKEEP="$VIASH_PAR_READFILESSAMATTRKEEP;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --readFilesSAMattrKeep. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --readFilesSAMattrKeep=*) + if [ -z "$VIASH_PAR_READFILESSAMATTRKEEP" ]; then + VIASH_PAR_READFILESSAMATTRKEEP=$(ViashRemoveFlags "$1") + else + VIASH_PAR_READFILESSAMATTRKEEP="$VIASH_PAR_READFILESSAMATTRKEEP;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --readFilesManifest) + [ -n "$VIASH_PAR_READFILESMANIFEST" ] && ViashError Bad arguments for option \'--readFilesManifest\': \'$VIASH_PAR_READFILESMANIFEST\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READFILESMANIFEST="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --readFilesManifest. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --readFilesManifest=*) + [ -n "$VIASH_PAR_READFILESMANIFEST" ] && ViashError Bad arguments for option \'--readFilesManifest=*\': \'$VIASH_PAR_READFILESMANIFEST\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READFILESMANIFEST=$(ViashRemoveFlags "$1") + shift 1 + ;; + --readFilesPrefix) + [ -n "$VIASH_PAR_READFILESPREFIX" ] && ViashError Bad arguments for option \'--readFilesPrefix\': \'$VIASH_PAR_READFILESPREFIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READFILESPREFIX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --readFilesPrefix. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --readFilesPrefix=*) + [ -n "$VIASH_PAR_READFILESPREFIX" ] && ViashError Bad arguments for option \'--readFilesPrefix=*\': \'$VIASH_PAR_READFILESPREFIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READFILESPREFIX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --readFilesCommand) + if [ -z "$VIASH_PAR_READFILESCOMMAND" ]; then + VIASH_PAR_READFILESCOMMAND="$2" + else + VIASH_PAR_READFILESCOMMAND="$VIASH_PAR_READFILESCOMMAND;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --readFilesCommand. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --readFilesCommand=*) + if [ -z "$VIASH_PAR_READFILESCOMMAND" ]; then + VIASH_PAR_READFILESCOMMAND=$(ViashRemoveFlags "$1") + else + VIASH_PAR_READFILESCOMMAND="$VIASH_PAR_READFILESCOMMAND;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --readMapNumber) + [ -n "$VIASH_PAR_READMAPNUMBER" ] && ViashError Bad arguments for option \'--readMapNumber\': \'$VIASH_PAR_READMAPNUMBER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READMAPNUMBER="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --readMapNumber. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --readMapNumber=*) + [ -n "$VIASH_PAR_READMAPNUMBER" ] && ViashError Bad arguments for option \'--readMapNumber=*\': \'$VIASH_PAR_READMAPNUMBER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READMAPNUMBER=$(ViashRemoveFlags "$1") + shift 1 + ;; + --readMatesLengthsIn) + [ -n "$VIASH_PAR_READMATESLENGTHSIN" ] && ViashError Bad arguments for option \'--readMatesLengthsIn\': \'$VIASH_PAR_READMATESLENGTHSIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READMATESLENGTHSIN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --readMatesLengthsIn. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --readMatesLengthsIn=*) + [ -n "$VIASH_PAR_READMATESLENGTHSIN" ] && ViashError Bad arguments for option \'--readMatesLengthsIn=*\': \'$VIASH_PAR_READMATESLENGTHSIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READMATESLENGTHSIN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --readNameSeparator) + if [ -z "$VIASH_PAR_READNAMESEPARATOR" ]; then + VIASH_PAR_READNAMESEPARATOR="$2" + else + VIASH_PAR_READNAMESEPARATOR="$VIASH_PAR_READNAMESEPARATOR;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --readNameSeparator. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --readNameSeparator=*) + if [ -z "$VIASH_PAR_READNAMESEPARATOR" ]; then + VIASH_PAR_READNAMESEPARATOR=$(ViashRemoveFlags "$1") + else + VIASH_PAR_READNAMESEPARATOR="$VIASH_PAR_READNAMESEPARATOR;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --readQualityScoreBase) + [ -n "$VIASH_PAR_READQUALITYSCOREBASE" ] && ViashError Bad arguments for option \'--readQualityScoreBase\': \'$VIASH_PAR_READQUALITYSCOREBASE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READQUALITYSCOREBASE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --readQualityScoreBase. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --readQualityScoreBase=*) + [ -n "$VIASH_PAR_READQUALITYSCOREBASE" ] && ViashError Bad arguments for option \'--readQualityScoreBase=*\': \'$VIASH_PAR_READQUALITYSCOREBASE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READQUALITYSCOREBASE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --clipAdapterType) + [ -n "$VIASH_PAR_CLIPADAPTERTYPE" ] && ViashError Bad arguments for option \'--clipAdapterType\': \'$VIASH_PAR_CLIPADAPTERTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CLIPADAPTERTYPE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --clipAdapterType. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --clipAdapterType=*) + [ -n "$VIASH_PAR_CLIPADAPTERTYPE" ] && ViashError Bad arguments for option \'--clipAdapterType=*\': \'$VIASH_PAR_CLIPADAPTERTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CLIPADAPTERTYPE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --clip3pNbases) + if [ -z "$VIASH_PAR_CLIP3PNBASES" ]; then + VIASH_PAR_CLIP3PNBASES="$2" + else + VIASH_PAR_CLIP3PNBASES="$VIASH_PAR_CLIP3PNBASES;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --clip3pNbases. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --clip3pNbases=*) + if [ -z "$VIASH_PAR_CLIP3PNBASES" ]; then + VIASH_PAR_CLIP3PNBASES=$(ViashRemoveFlags "$1") + else + VIASH_PAR_CLIP3PNBASES="$VIASH_PAR_CLIP3PNBASES;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --clip3pAdapterSeq) + if [ -z "$VIASH_PAR_CLIP3PADAPTERSEQ" ]; then + VIASH_PAR_CLIP3PADAPTERSEQ="$2" + else + VIASH_PAR_CLIP3PADAPTERSEQ="$VIASH_PAR_CLIP3PADAPTERSEQ;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --clip3pAdapterSeq. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --clip3pAdapterSeq=*) + if [ -z "$VIASH_PAR_CLIP3PADAPTERSEQ" ]; then + VIASH_PAR_CLIP3PADAPTERSEQ=$(ViashRemoveFlags "$1") + else + VIASH_PAR_CLIP3PADAPTERSEQ="$VIASH_PAR_CLIP3PADAPTERSEQ;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --clip3pAdapterMMp) + if [ -z "$VIASH_PAR_CLIP3PADAPTERMMP" ]; then + VIASH_PAR_CLIP3PADAPTERMMP="$2" + else + VIASH_PAR_CLIP3PADAPTERMMP="$VIASH_PAR_CLIP3PADAPTERMMP;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --clip3pAdapterMMp. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --clip3pAdapterMMp=*) + if [ -z "$VIASH_PAR_CLIP3PADAPTERMMP" ]; then + VIASH_PAR_CLIP3PADAPTERMMP=$(ViashRemoveFlags "$1") + else + VIASH_PAR_CLIP3PADAPTERMMP="$VIASH_PAR_CLIP3PADAPTERMMP;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --clip3pAfterAdapterNbases) + if [ -z "$VIASH_PAR_CLIP3PAFTERADAPTERNBASES" ]; then + VIASH_PAR_CLIP3PAFTERADAPTERNBASES="$2" + else + VIASH_PAR_CLIP3PAFTERADAPTERNBASES="$VIASH_PAR_CLIP3PAFTERADAPTERNBASES;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --clip3pAfterAdapterNbases. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --clip3pAfterAdapterNbases=*) + if [ -z "$VIASH_PAR_CLIP3PAFTERADAPTERNBASES" ]; then + VIASH_PAR_CLIP3PAFTERADAPTERNBASES=$(ViashRemoveFlags "$1") + else + VIASH_PAR_CLIP3PAFTERADAPTERNBASES="$VIASH_PAR_CLIP3PAFTERADAPTERNBASES;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --clip5pNbases) + if [ -z "$VIASH_PAR_CLIP5PNBASES" ]; then + VIASH_PAR_CLIP5PNBASES="$2" + else + VIASH_PAR_CLIP5PNBASES="$VIASH_PAR_CLIP5PNBASES;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --clip5pNbases. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --clip5pNbases=*) + if [ -z "$VIASH_PAR_CLIP5PNBASES" ]; then + VIASH_PAR_CLIP5PNBASES=$(ViashRemoveFlags "$1") + else + VIASH_PAR_CLIP5PNBASES="$VIASH_PAR_CLIP5PNBASES;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --limitGenomeGenerateRAM) + [ -n "$VIASH_PAR_LIMITGENOMEGENERATERAM" ] && ViashError Bad arguments for option \'--limitGenomeGenerateRAM\': \'$VIASH_PAR_LIMITGENOMEGENERATERAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LIMITGENOMEGENERATERAM="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --limitGenomeGenerateRAM. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --limitGenomeGenerateRAM=*) + [ -n "$VIASH_PAR_LIMITGENOMEGENERATERAM" ] && ViashError Bad arguments for option \'--limitGenomeGenerateRAM=*\': \'$VIASH_PAR_LIMITGENOMEGENERATERAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LIMITGENOMEGENERATERAM=$(ViashRemoveFlags "$1") + shift 1 + ;; + --limitIObufferSize) + if [ -z "$VIASH_PAR_LIMITIOBUFFERSIZE" ]; then + VIASH_PAR_LIMITIOBUFFERSIZE="$2" + else + VIASH_PAR_LIMITIOBUFFERSIZE="$VIASH_PAR_LIMITIOBUFFERSIZE;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --limitIObufferSize. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --limitIObufferSize=*) + if [ -z "$VIASH_PAR_LIMITIOBUFFERSIZE" ]; then + VIASH_PAR_LIMITIOBUFFERSIZE=$(ViashRemoveFlags "$1") + else + VIASH_PAR_LIMITIOBUFFERSIZE="$VIASH_PAR_LIMITIOBUFFERSIZE;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --limitOutSAMoneReadBytes) + [ -n "$VIASH_PAR_LIMITOUTSAMONEREADBYTES" ] && ViashError Bad arguments for option \'--limitOutSAMoneReadBytes\': \'$VIASH_PAR_LIMITOUTSAMONEREADBYTES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LIMITOUTSAMONEREADBYTES="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --limitOutSAMoneReadBytes. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --limitOutSAMoneReadBytes=*) + [ -n "$VIASH_PAR_LIMITOUTSAMONEREADBYTES" ] && ViashError Bad arguments for option \'--limitOutSAMoneReadBytes=*\': \'$VIASH_PAR_LIMITOUTSAMONEREADBYTES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LIMITOUTSAMONEREADBYTES=$(ViashRemoveFlags "$1") + shift 1 + ;; + --limitOutSJoneRead) + [ -n "$VIASH_PAR_LIMITOUTSJONEREAD" ] && ViashError Bad arguments for option \'--limitOutSJoneRead\': \'$VIASH_PAR_LIMITOUTSJONEREAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LIMITOUTSJONEREAD="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --limitOutSJoneRead. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --limitOutSJoneRead=*) + [ -n "$VIASH_PAR_LIMITOUTSJONEREAD" ] && ViashError Bad arguments for option \'--limitOutSJoneRead=*\': \'$VIASH_PAR_LIMITOUTSJONEREAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LIMITOUTSJONEREAD=$(ViashRemoveFlags "$1") + shift 1 + ;; + --limitOutSJcollapsed) + [ -n "$VIASH_PAR_LIMITOUTSJCOLLAPSED" ] && ViashError Bad arguments for option \'--limitOutSJcollapsed\': \'$VIASH_PAR_LIMITOUTSJCOLLAPSED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LIMITOUTSJCOLLAPSED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --limitOutSJcollapsed. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --limitOutSJcollapsed=*) + [ -n "$VIASH_PAR_LIMITOUTSJCOLLAPSED" ] && ViashError Bad arguments for option \'--limitOutSJcollapsed=*\': \'$VIASH_PAR_LIMITOUTSJCOLLAPSED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LIMITOUTSJCOLLAPSED=$(ViashRemoveFlags "$1") + shift 1 + ;; + --limitBAMsortRAM) + [ -n "$VIASH_PAR_LIMITBAMSORTRAM" ] && ViashError Bad arguments for option \'--limitBAMsortRAM\': \'$VIASH_PAR_LIMITBAMSORTRAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LIMITBAMSORTRAM="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --limitBAMsortRAM. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --limitBAMsortRAM=*) + [ -n "$VIASH_PAR_LIMITBAMSORTRAM" ] && ViashError Bad arguments for option \'--limitBAMsortRAM=*\': \'$VIASH_PAR_LIMITBAMSORTRAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LIMITBAMSORTRAM=$(ViashRemoveFlags "$1") + shift 1 + ;; + --limitSjdbInsertNsj) + [ -n "$VIASH_PAR_LIMITSJDBINSERTNSJ" ] && ViashError Bad arguments for option \'--limitSjdbInsertNsj\': \'$VIASH_PAR_LIMITSJDBINSERTNSJ\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LIMITSJDBINSERTNSJ="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --limitSjdbInsertNsj. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --limitSjdbInsertNsj=*) + [ -n "$VIASH_PAR_LIMITSJDBINSERTNSJ" ] && ViashError Bad arguments for option \'--limitSjdbInsertNsj=*\': \'$VIASH_PAR_LIMITSJDBINSERTNSJ\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LIMITSJDBINSERTNSJ=$(ViashRemoveFlags "$1") + shift 1 + ;; + --limitNreadsSoft) + [ -n "$VIASH_PAR_LIMITNREADSSOFT" ] && ViashError Bad arguments for option \'--limitNreadsSoft\': \'$VIASH_PAR_LIMITNREADSSOFT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LIMITNREADSSOFT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --limitNreadsSoft. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --limitNreadsSoft=*) + [ -n "$VIASH_PAR_LIMITNREADSSOFT" ] && ViashError Bad arguments for option \'--limitNreadsSoft=*\': \'$VIASH_PAR_LIMITNREADSSOFT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LIMITNREADSSOFT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outTmpKeep) + [ -n "$VIASH_PAR_OUTTMPKEEP" ] && ViashError Bad arguments for option \'--outTmpKeep\': \'$VIASH_PAR_OUTTMPKEEP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTTMPKEEP="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outTmpKeep. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outTmpKeep=*) + [ -n "$VIASH_PAR_OUTTMPKEEP" ] && ViashError Bad arguments for option \'--outTmpKeep=*\': \'$VIASH_PAR_OUTTMPKEEP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTTMPKEEP=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outStd) + [ -n "$VIASH_PAR_OUTSTD" ] && ViashError Bad arguments for option \'--outStd\': \'$VIASH_PAR_OUTSTD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSTD="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outStd. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outStd=*) + [ -n "$VIASH_PAR_OUTSTD" ] && ViashError Bad arguments for option \'--outStd=*\': \'$VIASH_PAR_OUTSTD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSTD=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outReadsUnmapped) + [ -n "$VIASH_PAR_OUTREADSUNMAPPED" ] && ViashError Bad arguments for option \'--outReadsUnmapped\': \'$VIASH_PAR_OUTREADSUNMAPPED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTREADSUNMAPPED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outReadsUnmapped. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outReadsUnmapped=*) + [ -n "$VIASH_PAR_OUTREADSUNMAPPED" ] && ViashError Bad arguments for option \'--outReadsUnmapped=*\': \'$VIASH_PAR_OUTREADSUNMAPPED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTREADSUNMAPPED=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outQSconversionAdd) + [ -n "$VIASH_PAR_OUTQSCONVERSIONADD" ] && ViashError Bad arguments for option \'--outQSconversionAdd\': \'$VIASH_PAR_OUTQSCONVERSIONADD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTQSCONVERSIONADD="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outQSconversionAdd. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outQSconversionAdd=*) + [ -n "$VIASH_PAR_OUTQSCONVERSIONADD" ] && ViashError Bad arguments for option \'--outQSconversionAdd=*\': \'$VIASH_PAR_OUTQSCONVERSIONADD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTQSCONVERSIONADD=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outMultimapperOrder) + [ -n "$VIASH_PAR_OUTMULTIMAPPERORDER" ] && ViashError Bad arguments for option \'--outMultimapperOrder\': \'$VIASH_PAR_OUTMULTIMAPPERORDER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTMULTIMAPPERORDER="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outMultimapperOrder. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outMultimapperOrder=*) + [ -n "$VIASH_PAR_OUTMULTIMAPPERORDER" ] && ViashError Bad arguments for option \'--outMultimapperOrder=*\': \'$VIASH_PAR_OUTMULTIMAPPERORDER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTMULTIMAPPERORDER=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outSAMmode) + [ -n "$VIASH_PAR_OUTSAMMODE" ] && ViashError Bad arguments for option \'--outSAMmode\': \'$VIASH_PAR_OUTSAMMODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMMODE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMmode. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMmode=*) + [ -n "$VIASH_PAR_OUTSAMMODE" ] && ViashError Bad arguments for option \'--outSAMmode=*\': \'$VIASH_PAR_OUTSAMMODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMMODE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outSAMstrandField) + [ -n "$VIASH_PAR_OUTSAMSTRANDFIELD" ] && ViashError Bad arguments for option \'--outSAMstrandField\': \'$VIASH_PAR_OUTSAMSTRANDFIELD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMSTRANDFIELD="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMstrandField. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMstrandField=*) + [ -n "$VIASH_PAR_OUTSAMSTRANDFIELD" ] && ViashError Bad arguments for option \'--outSAMstrandField=*\': \'$VIASH_PAR_OUTSAMSTRANDFIELD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMSTRANDFIELD=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outSAMattributes) + if [ -z "$VIASH_PAR_OUTSAMATTRIBUTES" ]; then + VIASH_PAR_OUTSAMATTRIBUTES="$2" + else + VIASH_PAR_OUTSAMATTRIBUTES="$VIASH_PAR_OUTSAMATTRIBUTES;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMattributes. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMattributes=*) + if [ -z "$VIASH_PAR_OUTSAMATTRIBUTES" ]; then + VIASH_PAR_OUTSAMATTRIBUTES=$(ViashRemoveFlags "$1") + else + VIASH_PAR_OUTSAMATTRIBUTES="$VIASH_PAR_OUTSAMATTRIBUTES;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --outSAMattrIHstart) + [ -n "$VIASH_PAR_OUTSAMATTRIHSTART" ] && ViashError Bad arguments for option \'--outSAMattrIHstart\': \'$VIASH_PAR_OUTSAMATTRIHSTART\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMATTRIHSTART="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMattrIHstart. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMattrIHstart=*) + [ -n "$VIASH_PAR_OUTSAMATTRIHSTART" ] && ViashError Bad arguments for option \'--outSAMattrIHstart=*\': \'$VIASH_PAR_OUTSAMATTRIHSTART\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMATTRIHSTART=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outSAMunmapped) + if [ -z "$VIASH_PAR_OUTSAMUNMAPPED" ]; then + VIASH_PAR_OUTSAMUNMAPPED="$2" + else + VIASH_PAR_OUTSAMUNMAPPED="$VIASH_PAR_OUTSAMUNMAPPED;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMunmapped. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMunmapped=*) + if [ -z "$VIASH_PAR_OUTSAMUNMAPPED" ]; then + VIASH_PAR_OUTSAMUNMAPPED=$(ViashRemoveFlags "$1") + else + VIASH_PAR_OUTSAMUNMAPPED="$VIASH_PAR_OUTSAMUNMAPPED;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --outSAMorder) + [ -n "$VIASH_PAR_OUTSAMORDER" ] && ViashError Bad arguments for option \'--outSAMorder\': \'$VIASH_PAR_OUTSAMORDER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMORDER="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMorder. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMorder=*) + [ -n "$VIASH_PAR_OUTSAMORDER" ] && ViashError Bad arguments for option \'--outSAMorder=*\': \'$VIASH_PAR_OUTSAMORDER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMORDER=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outSAMprimaryFlag) + [ -n "$VIASH_PAR_OUTSAMPRIMARYFLAG" ] && ViashError Bad arguments for option \'--outSAMprimaryFlag\': \'$VIASH_PAR_OUTSAMPRIMARYFLAG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMPRIMARYFLAG="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMprimaryFlag. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMprimaryFlag=*) + [ -n "$VIASH_PAR_OUTSAMPRIMARYFLAG" ] && ViashError Bad arguments for option \'--outSAMprimaryFlag=*\': \'$VIASH_PAR_OUTSAMPRIMARYFLAG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMPRIMARYFLAG=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outSAMreadID) + [ -n "$VIASH_PAR_OUTSAMREADID" ] && ViashError Bad arguments for option \'--outSAMreadID\': \'$VIASH_PAR_OUTSAMREADID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMREADID="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMreadID. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMreadID=*) + [ -n "$VIASH_PAR_OUTSAMREADID" ] && ViashError Bad arguments for option \'--outSAMreadID=*\': \'$VIASH_PAR_OUTSAMREADID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMREADID=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outSAMmapqUnique) + [ -n "$VIASH_PAR_OUTSAMMAPQUNIQUE" ] && ViashError Bad arguments for option \'--outSAMmapqUnique\': \'$VIASH_PAR_OUTSAMMAPQUNIQUE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMMAPQUNIQUE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMmapqUnique. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMmapqUnique=*) + [ -n "$VIASH_PAR_OUTSAMMAPQUNIQUE" ] && ViashError Bad arguments for option \'--outSAMmapqUnique=*\': \'$VIASH_PAR_OUTSAMMAPQUNIQUE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMMAPQUNIQUE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outSAMflagOR) + [ -n "$VIASH_PAR_OUTSAMFLAGOR" ] && ViashError Bad arguments for option \'--outSAMflagOR\': \'$VIASH_PAR_OUTSAMFLAGOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMFLAGOR="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMflagOR. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMflagOR=*) + [ -n "$VIASH_PAR_OUTSAMFLAGOR" ] && ViashError Bad arguments for option \'--outSAMflagOR=*\': \'$VIASH_PAR_OUTSAMFLAGOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMFLAGOR=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outSAMflagAND) + [ -n "$VIASH_PAR_OUTSAMFLAGAND" ] && ViashError Bad arguments for option \'--outSAMflagAND\': \'$VIASH_PAR_OUTSAMFLAGAND\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMFLAGAND="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMflagAND. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMflagAND=*) + [ -n "$VIASH_PAR_OUTSAMFLAGAND" ] && ViashError Bad arguments for option \'--outSAMflagAND=*\': \'$VIASH_PAR_OUTSAMFLAGAND\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMFLAGAND=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outSAMattrRGline) + if [ -z "$VIASH_PAR_OUTSAMATTRRGLINE" ]; then + VIASH_PAR_OUTSAMATTRRGLINE="$2" + else + VIASH_PAR_OUTSAMATTRRGLINE="$VIASH_PAR_OUTSAMATTRRGLINE;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMattrRGline. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMattrRGline=*) + if [ -z "$VIASH_PAR_OUTSAMATTRRGLINE" ]; then + VIASH_PAR_OUTSAMATTRRGLINE=$(ViashRemoveFlags "$1") + else + VIASH_PAR_OUTSAMATTRRGLINE="$VIASH_PAR_OUTSAMATTRRGLINE;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --outSAMheaderHD) + if [ -z "$VIASH_PAR_OUTSAMHEADERHD" ]; then + VIASH_PAR_OUTSAMHEADERHD="$2" + else + VIASH_PAR_OUTSAMHEADERHD="$VIASH_PAR_OUTSAMHEADERHD;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMheaderHD. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMheaderHD=*) + if [ -z "$VIASH_PAR_OUTSAMHEADERHD" ]; then + VIASH_PAR_OUTSAMHEADERHD=$(ViashRemoveFlags "$1") + else + VIASH_PAR_OUTSAMHEADERHD="$VIASH_PAR_OUTSAMHEADERHD;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --outSAMheaderPG) + if [ -z "$VIASH_PAR_OUTSAMHEADERPG" ]; then + VIASH_PAR_OUTSAMHEADERPG="$2" + else + VIASH_PAR_OUTSAMHEADERPG="$VIASH_PAR_OUTSAMHEADERPG;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMheaderPG. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMheaderPG=*) + if [ -z "$VIASH_PAR_OUTSAMHEADERPG" ]; then + VIASH_PAR_OUTSAMHEADERPG=$(ViashRemoveFlags "$1") + else + VIASH_PAR_OUTSAMHEADERPG="$VIASH_PAR_OUTSAMHEADERPG;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --outSAMheaderCommentFile) + [ -n "$VIASH_PAR_OUTSAMHEADERCOMMENTFILE" ] && ViashError Bad arguments for option \'--outSAMheaderCommentFile\': \'$VIASH_PAR_OUTSAMHEADERCOMMENTFILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMHEADERCOMMENTFILE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMheaderCommentFile. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMheaderCommentFile=*) + [ -n "$VIASH_PAR_OUTSAMHEADERCOMMENTFILE" ] && ViashError Bad arguments for option \'--outSAMheaderCommentFile=*\': \'$VIASH_PAR_OUTSAMHEADERCOMMENTFILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMHEADERCOMMENTFILE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outSAMfilter) + if [ -z "$VIASH_PAR_OUTSAMFILTER" ]; then + VIASH_PAR_OUTSAMFILTER="$2" + else + VIASH_PAR_OUTSAMFILTER="$VIASH_PAR_OUTSAMFILTER;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMfilter. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMfilter=*) + if [ -z "$VIASH_PAR_OUTSAMFILTER" ]; then + VIASH_PAR_OUTSAMFILTER=$(ViashRemoveFlags "$1") + else + VIASH_PAR_OUTSAMFILTER="$VIASH_PAR_OUTSAMFILTER;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --outSAMmultNmax) + [ -n "$VIASH_PAR_OUTSAMMULTNMAX" ] && ViashError Bad arguments for option \'--outSAMmultNmax\': \'$VIASH_PAR_OUTSAMMULTNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMMULTNMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMmultNmax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMmultNmax=*) + [ -n "$VIASH_PAR_OUTSAMMULTNMAX" ] && ViashError Bad arguments for option \'--outSAMmultNmax=*\': \'$VIASH_PAR_OUTSAMMULTNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMMULTNMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outSAMtlen) + [ -n "$VIASH_PAR_OUTSAMTLEN" ] && ViashError Bad arguments for option \'--outSAMtlen\': \'$VIASH_PAR_OUTSAMTLEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMTLEN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMtlen. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMtlen=*) + [ -n "$VIASH_PAR_OUTSAMTLEN" ] && ViashError Bad arguments for option \'--outSAMtlen=*\': \'$VIASH_PAR_OUTSAMTLEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMTLEN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outBAMcompression) + [ -n "$VIASH_PAR_OUTBAMCOMPRESSION" ] && ViashError Bad arguments for option \'--outBAMcompression\': \'$VIASH_PAR_OUTBAMCOMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTBAMCOMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outBAMcompression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outBAMcompression=*) + [ -n "$VIASH_PAR_OUTBAMCOMPRESSION" ] && ViashError Bad arguments for option \'--outBAMcompression=*\': \'$VIASH_PAR_OUTBAMCOMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTBAMCOMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outBAMsortingThreadN) + [ -n "$VIASH_PAR_OUTBAMSORTINGTHREADN" ] && ViashError Bad arguments for option \'--outBAMsortingThreadN\': \'$VIASH_PAR_OUTBAMSORTINGTHREADN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTBAMSORTINGTHREADN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outBAMsortingThreadN. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outBAMsortingThreadN=*) + [ -n "$VIASH_PAR_OUTBAMSORTINGTHREADN" ] && ViashError Bad arguments for option \'--outBAMsortingThreadN=*\': \'$VIASH_PAR_OUTBAMSORTINGTHREADN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTBAMSORTINGTHREADN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outBAMsortingBinsN) + [ -n "$VIASH_PAR_OUTBAMSORTINGBINSN" ] && ViashError Bad arguments for option \'--outBAMsortingBinsN\': \'$VIASH_PAR_OUTBAMSORTINGBINSN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTBAMSORTINGBINSN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outBAMsortingBinsN. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outBAMsortingBinsN=*) + [ -n "$VIASH_PAR_OUTBAMSORTINGBINSN" ] && ViashError Bad arguments for option \'--outBAMsortingBinsN=*\': \'$VIASH_PAR_OUTBAMSORTINGBINSN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTBAMSORTINGBINSN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --bamRemoveDuplicatesType) + [ -n "$VIASH_PAR_BAMREMOVEDUPLICATESTYPE" ] && ViashError Bad arguments for option \'--bamRemoveDuplicatesType\': \'$VIASH_PAR_BAMREMOVEDUPLICATESTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BAMREMOVEDUPLICATESTYPE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --bamRemoveDuplicatesType. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --bamRemoveDuplicatesType=*) + [ -n "$VIASH_PAR_BAMREMOVEDUPLICATESTYPE" ] && ViashError Bad arguments for option \'--bamRemoveDuplicatesType=*\': \'$VIASH_PAR_BAMREMOVEDUPLICATESTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BAMREMOVEDUPLICATESTYPE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --bamRemoveDuplicatesMate2basesN) + [ -n "$VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN" ] && ViashError Bad arguments for option \'--bamRemoveDuplicatesMate2basesN\': \'$VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --bamRemoveDuplicatesMate2basesN. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --bamRemoveDuplicatesMate2basesN=*) + [ -n "$VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN" ] && ViashError Bad arguments for option \'--bamRemoveDuplicatesMate2basesN=*\': \'$VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outWigType) + if [ -z "$VIASH_PAR_OUTWIGTYPE" ]; then + VIASH_PAR_OUTWIGTYPE="$2" + else + VIASH_PAR_OUTWIGTYPE="$VIASH_PAR_OUTWIGTYPE;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outWigType. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outWigType=*) + if [ -z "$VIASH_PAR_OUTWIGTYPE" ]; then + VIASH_PAR_OUTWIGTYPE=$(ViashRemoveFlags "$1") + else + VIASH_PAR_OUTWIGTYPE="$VIASH_PAR_OUTWIGTYPE;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --outWigStrand) + [ -n "$VIASH_PAR_OUTWIGSTRAND" ] && ViashError Bad arguments for option \'--outWigStrand\': \'$VIASH_PAR_OUTWIGSTRAND\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTWIGSTRAND="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outWigStrand. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outWigStrand=*) + [ -n "$VIASH_PAR_OUTWIGSTRAND" ] && ViashError Bad arguments for option \'--outWigStrand=*\': \'$VIASH_PAR_OUTWIGSTRAND\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTWIGSTRAND=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outWigReferencesPrefix) + [ -n "$VIASH_PAR_OUTWIGREFERENCESPREFIX" ] && ViashError Bad arguments for option \'--outWigReferencesPrefix\': \'$VIASH_PAR_OUTWIGREFERENCESPREFIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTWIGREFERENCESPREFIX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outWigReferencesPrefix. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outWigReferencesPrefix=*) + [ -n "$VIASH_PAR_OUTWIGREFERENCESPREFIX" ] && ViashError Bad arguments for option \'--outWigReferencesPrefix=*\': \'$VIASH_PAR_OUTWIGREFERENCESPREFIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTWIGREFERENCESPREFIX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outWigNorm) + [ -n "$VIASH_PAR_OUTWIGNORM" ] && ViashError Bad arguments for option \'--outWigNorm\': \'$VIASH_PAR_OUTWIGNORM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTWIGNORM="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outWigNorm. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outWigNorm=*) + [ -n "$VIASH_PAR_OUTWIGNORM" ] && ViashError Bad arguments for option \'--outWigNorm=*\': \'$VIASH_PAR_OUTWIGNORM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTWIGNORM=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outFilterType) + [ -n "$VIASH_PAR_OUTFILTERTYPE" ] && ViashError Bad arguments for option \'--outFilterType\': \'$VIASH_PAR_OUTFILTERTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERTYPE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterType. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outFilterType=*) + [ -n "$VIASH_PAR_OUTFILTERTYPE" ] && ViashError Bad arguments for option \'--outFilterType=*\': \'$VIASH_PAR_OUTFILTERTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERTYPE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outFilterMultimapScoreRange) + [ -n "$VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE" ] && ViashError Bad arguments for option \'--outFilterMultimapScoreRange\': \'$VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterMultimapScoreRange. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outFilterMultimapScoreRange=*) + [ -n "$VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE" ] && ViashError Bad arguments for option \'--outFilterMultimapScoreRange=*\': \'$VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outFilterMultimapNmax) + [ -n "$VIASH_PAR_OUTFILTERMULTIMAPNMAX" ] && ViashError Bad arguments for option \'--outFilterMultimapNmax\': \'$VIASH_PAR_OUTFILTERMULTIMAPNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERMULTIMAPNMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterMultimapNmax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outFilterMultimapNmax=*) + [ -n "$VIASH_PAR_OUTFILTERMULTIMAPNMAX" ] && ViashError Bad arguments for option \'--outFilterMultimapNmax=*\': \'$VIASH_PAR_OUTFILTERMULTIMAPNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERMULTIMAPNMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outFilterMismatchNmax) + [ -n "$VIASH_PAR_OUTFILTERMISMATCHNMAX" ] && ViashError Bad arguments for option \'--outFilterMismatchNmax\': \'$VIASH_PAR_OUTFILTERMISMATCHNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERMISMATCHNMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterMismatchNmax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outFilterMismatchNmax=*) + [ -n "$VIASH_PAR_OUTFILTERMISMATCHNMAX" ] && ViashError Bad arguments for option \'--outFilterMismatchNmax=*\': \'$VIASH_PAR_OUTFILTERMISMATCHNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERMISMATCHNMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outFilterMismatchNoverLmax) + [ -n "$VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX" ] && ViashError Bad arguments for option \'--outFilterMismatchNoverLmax\': \'$VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterMismatchNoverLmax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outFilterMismatchNoverLmax=*) + [ -n "$VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX" ] && ViashError Bad arguments for option \'--outFilterMismatchNoverLmax=*\': \'$VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outFilterMismatchNoverReadLmax) + [ -n "$VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX" ] && ViashError Bad arguments for option \'--outFilterMismatchNoverReadLmax\': \'$VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterMismatchNoverReadLmax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outFilterMismatchNoverReadLmax=*) + [ -n "$VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX" ] && ViashError Bad arguments for option \'--outFilterMismatchNoverReadLmax=*\': \'$VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outFilterScoreMin) + [ -n "$VIASH_PAR_OUTFILTERSCOREMIN" ] && ViashError Bad arguments for option \'--outFilterScoreMin\': \'$VIASH_PAR_OUTFILTERSCOREMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERSCOREMIN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterScoreMin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outFilterScoreMin=*) + [ -n "$VIASH_PAR_OUTFILTERSCOREMIN" ] && ViashError Bad arguments for option \'--outFilterScoreMin=*\': \'$VIASH_PAR_OUTFILTERSCOREMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERSCOREMIN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outFilterScoreMinOverLread) + [ -n "$VIASH_PAR_OUTFILTERSCOREMINOVERLREAD" ] && ViashError Bad arguments for option \'--outFilterScoreMinOverLread\': \'$VIASH_PAR_OUTFILTERSCOREMINOVERLREAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERSCOREMINOVERLREAD="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterScoreMinOverLread. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outFilterScoreMinOverLread=*) + [ -n "$VIASH_PAR_OUTFILTERSCOREMINOVERLREAD" ] && ViashError Bad arguments for option \'--outFilterScoreMinOverLread=*\': \'$VIASH_PAR_OUTFILTERSCOREMINOVERLREAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERSCOREMINOVERLREAD=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outFilterMatchNmin) + [ -n "$VIASH_PAR_OUTFILTERMATCHNMIN" ] && ViashError Bad arguments for option \'--outFilterMatchNmin\': \'$VIASH_PAR_OUTFILTERMATCHNMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERMATCHNMIN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterMatchNmin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outFilterMatchNmin=*) + [ -n "$VIASH_PAR_OUTFILTERMATCHNMIN" ] && ViashError Bad arguments for option \'--outFilterMatchNmin=*\': \'$VIASH_PAR_OUTFILTERMATCHNMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERMATCHNMIN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outFilterMatchNminOverLread) + [ -n "$VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD" ] && ViashError Bad arguments for option \'--outFilterMatchNminOverLread\': \'$VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterMatchNminOverLread. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outFilterMatchNminOverLread=*) + [ -n "$VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD" ] && ViashError Bad arguments for option \'--outFilterMatchNminOverLread=*\': \'$VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outFilterIntronMotifs) + [ -n "$VIASH_PAR_OUTFILTERINTRONMOTIFS" ] && ViashError Bad arguments for option \'--outFilterIntronMotifs\': \'$VIASH_PAR_OUTFILTERINTRONMOTIFS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERINTRONMOTIFS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterIntronMotifs. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outFilterIntronMotifs=*) + [ -n "$VIASH_PAR_OUTFILTERINTRONMOTIFS" ] && ViashError Bad arguments for option \'--outFilterIntronMotifs=*\': \'$VIASH_PAR_OUTFILTERINTRONMOTIFS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERINTRONMOTIFS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outFilterIntronStrands) + [ -n "$VIASH_PAR_OUTFILTERINTRONSTRANDS" ] && ViashError Bad arguments for option \'--outFilterIntronStrands\': \'$VIASH_PAR_OUTFILTERINTRONSTRANDS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERINTRONSTRANDS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterIntronStrands. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outFilterIntronStrands=*) + [ -n "$VIASH_PAR_OUTFILTERINTRONSTRANDS" ] && ViashError Bad arguments for option \'--outFilterIntronStrands=*\': \'$VIASH_PAR_OUTFILTERINTRONSTRANDS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERINTRONSTRANDS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outSJtype) + [ -n "$VIASH_PAR_OUTSJTYPE" ] && ViashError Bad arguments for option \'--outSJtype\': \'$VIASH_PAR_OUTSJTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSJTYPE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSJtype. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSJtype=*) + [ -n "$VIASH_PAR_OUTSJTYPE" ] && ViashError Bad arguments for option \'--outSJtype=*\': \'$VIASH_PAR_OUTSJTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSJTYPE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outSJfilterReads) + [ -n "$VIASH_PAR_OUTSJFILTERREADS" ] && ViashError Bad arguments for option \'--outSJfilterReads\': \'$VIASH_PAR_OUTSJFILTERREADS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSJFILTERREADS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSJfilterReads. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSJfilterReads=*) + [ -n "$VIASH_PAR_OUTSJFILTERREADS" ] && ViashError Bad arguments for option \'--outSJfilterReads=*\': \'$VIASH_PAR_OUTSJFILTERREADS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSJFILTERREADS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outSJfilterOverhangMin) + if [ -z "$VIASH_PAR_OUTSJFILTEROVERHANGMIN" ]; then + VIASH_PAR_OUTSJFILTEROVERHANGMIN="$2" + else + VIASH_PAR_OUTSJFILTEROVERHANGMIN="$VIASH_PAR_OUTSJFILTEROVERHANGMIN;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSJfilterOverhangMin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSJfilterOverhangMin=*) + if [ -z "$VIASH_PAR_OUTSJFILTEROVERHANGMIN" ]; then + VIASH_PAR_OUTSJFILTEROVERHANGMIN=$(ViashRemoveFlags "$1") + else + VIASH_PAR_OUTSJFILTEROVERHANGMIN="$VIASH_PAR_OUTSJFILTEROVERHANGMIN;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --outSJfilterCountUniqueMin) + if [ -z "$VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN" ]; then + VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN="$2" + else + VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN="$VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSJfilterCountUniqueMin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSJfilterCountUniqueMin=*) + if [ -z "$VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN" ]; then + VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN=$(ViashRemoveFlags "$1") + else + VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN="$VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --outSJfilterCountTotalMin) + if [ -z "$VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN" ]; then + VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN="$2" + else + VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN="$VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSJfilterCountTotalMin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSJfilterCountTotalMin=*) + if [ -z "$VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN" ]; then + VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN=$(ViashRemoveFlags "$1") + else + VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN="$VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --outSJfilterDistToOtherSJmin) + if [ -z "$VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN" ]; then + VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN="$2" + else + VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN="$VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSJfilterDistToOtherSJmin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSJfilterDistToOtherSJmin=*) + if [ -z "$VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN" ]; then + VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN=$(ViashRemoveFlags "$1") + else + VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN="$VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --outSJfilterIntronMaxVsReadN) + if [ -z "$VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN" ]; then + VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN="$2" + else + VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN="$VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSJfilterIntronMaxVsReadN. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSJfilterIntronMaxVsReadN=*) + if [ -z "$VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN" ]; then + VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN=$(ViashRemoveFlags "$1") + else + VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN="$VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --scoreGap) + [ -n "$VIASH_PAR_SCOREGAP" ] && ViashError Bad arguments for option \'--scoreGap\': \'$VIASH_PAR_SCOREGAP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCOREGAP="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreGap. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --scoreGap=*) + [ -n "$VIASH_PAR_SCOREGAP" ] && ViashError Bad arguments for option \'--scoreGap=*\': \'$VIASH_PAR_SCOREGAP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCOREGAP=$(ViashRemoveFlags "$1") + shift 1 + ;; + --scoreGapNoncan) + [ -n "$VIASH_PAR_SCOREGAPNONCAN" ] && ViashError Bad arguments for option \'--scoreGapNoncan\': \'$VIASH_PAR_SCOREGAPNONCAN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCOREGAPNONCAN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreGapNoncan. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --scoreGapNoncan=*) + [ -n "$VIASH_PAR_SCOREGAPNONCAN" ] && ViashError Bad arguments for option \'--scoreGapNoncan=*\': \'$VIASH_PAR_SCOREGAPNONCAN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCOREGAPNONCAN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --scoreGapGCAG) + [ -n "$VIASH_PAR_SCOREGAPGCAG" ] && ViashError Bad arguments for option \'--scoreGapGCAG\': \'$VIASH_PAR_SCOREGAPGCAG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCOREGAPGCAG="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreGapGCAG. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --scoreGapGCAG=*) + [ -n "$VIASH_PAR_SCOREGAPGCAG" ] && ViashError Bad arguments for option \'--scoreGapGCAG=*\': \'$VIASH_PAR_SCOREGAPGCAG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCOREGAPGCAG=$(ViashRemoveFlags "$1") + shift 1 + ;; + --scoreGapATAC) + [ -n "$VIASH_PAR_SCOREGAPATAC" ] && ViashError Bad arguments for option \'--scoreGapATAC\': \'$VIASH_PAR_SCOREGAPATAC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCOREGAPATAC="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreGapATAC. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --scoreGapATAC=*) + [ -n "$VIASH_PAR_SCOREGAPATAC" ] && ViashError Bad arguments for option \'--scoreGapATAC=*\': \'$VIASH_PAR_SCOREGAPATAC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCOREGAPATAC=$(ViashRemoveFlags "$1") + shift 1 + ;; + --scoreGenomicLengthLog2scale) + [ -n "$VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE" ] && ViashError Bad arguments for option \'--scoreGenomicLengthLog2scale\': \'$VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreGenomicLengthLog2scale. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --scoreGenomicLengthLog2scale=*) + [ -n "$VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE" ] && ViashError Bad arguments for option \'--scoreGenomicLengthLog2scale=*\': \'$VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --scoreDelOpen) + [ -n "$VIASH_PAR_SCOREDELOPEN" ] && ViashError Bad arguments for option \'--scoreDelOpen\': \'$VIASH_PAR_SCOREDELOPEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCOREDELOPEN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreDelOpen. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --scoreDelOpen=*) + [ -n "$VIASH_PAR_SCOREDELOPEN" ] && ViashError Bad arguments for option \'--scoreDelOpen=*\': \'$VIASH_PAR_SCOREDELOPEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCOREDELOPEN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --scoreDelBase) + [ -n "$VIASH_PAR_SCOREDELBASE" ] && ViashError Bad arguments for option \'--scoreDelBase\': \'$VIASH_PAR_SCOREDELBASE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCOREDELBASE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreDelBase. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --scoreDelBase=*) + [ -n "$VIASH_PAR_SCOREDELBASE" ] && ViashError Bad arguments for option \'--scoreDelBase=*\': \'$VIASH_PAR_SCOREDELBASE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCOREDELBASE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --scoreInsOpen) + [ -n "$VIASH_PAR_SCOREINSOPEN" ] && ViashError Bad arguments for option \'--scoreInsOpen\': \'$VIASH_PAR_SCOREINSOPEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCOREINSOPEN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreInsOpen. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --scoreInsOpen=*) + [ -n "$VIASH_PAR_SCOREINSOPEN" ] && ViashError Bad arguments for option \'--scoreInsOpen=*\': \'$VIASH_PAR_SCOREINSOPEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCOREINSOPEN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --scoreInsBase) + [ -n "$VIASH_PAR_SCOREINSBASE" ] && ViashError Bad arguments for option \'--scoreInsBase\': \'$VIASH_PAR_SCOREINSBASE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCOREINSBASE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreInsBase. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --scoreInsBase=*) + [ -n "$VIASH_PAR_SCOREINSBASE" ] && ViashError Bad arguments for option \'--scoreInsBase=*\': \'$VIASH_PAR_SCOREINSBASE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCOREINSBASE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --scoreStitchSJshift) + [ -n "$VIASH_PAR_SCORESTITCHSJSHIFT" ] && ViashError Bad arguments for option \'--scoreStitchSJshift\': \'$VIASH_PAR_SCORESTITCHSJSHIFT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCORESTITCHSJSHIFT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreStitchSJshift. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --scoreStitchSJshift=*) + [ -n "$VIASH_PAR_SCORESTITCHSJSHIFT" ] && ViashError Bad arguments for option \'--scoreStitchSJshift=*\': \'$VIASH_PAR_SCORESTITCHSJSHIFT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCORESTITCHSJSHIFT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --seedSearchStartLmax) + [ -n "$VIASH_PAR_SEEDSEARCHSTARTLMAX" ] && ViashError Bad arguments for option \'--seedSearchStartLmax\': \'$VIASH_PAR_SEEDSEARCHSTARTLMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEEDSEARCHSTARTLMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --seedSearchStartLmax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --seedSearchStartLmax=*) + [ -n "$VIASH_PAR_SEEDSEARCHSTARTLMAX" ] && ViashError Bad arguments for option \'--seedSearchStartLmax=*\': \'$VIASH_PAR_SEEDSEARCHSTARTLMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEEDSEARCHSTARTLMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --seedSearchStartLmaxOverLread) + [ -n "$VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD" ] && ViashError Bad arguments for option \'--seedSearchStartLmaxOverLread\': \'$VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --seedSearchStartLmaxOverLread. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --seedSearchStartLmaxOverLread=*) + [ -n "$VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD" ] && ViashError Bad arguments for option \'--seedSearchStartLmaxOverLread=*\': \'$VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD=$(ViashRemoveFlags "$1") + shift 1 + ;; + --seedSearchLmax) + [ -n "$VIASH_PAR_SEEDSEARCHLMAX" ] && ViashError Bad arguments for option \'--seedSearchLmax\': \'$VIASH_PAR_SEEDSEARCHLMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEEDSEARCHLMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --seedSearchLmax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --seedSearchLmax=*) + [ -n "$VIASH_PAR_SEEDSEARCHLMAX" ] && ViashError Bad arguments for option \'--seedSearchLmax=*\': \'$VIASH_PAR_SEEDSEARCHLMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEEDSEARCHLMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --seedMultimapNmax) + [ -n "$VIASH_PAR_SEEDMULTIMAPNMAX" ] && ViashError Bad arguments for option \'--seedMultimapNmax\': \'$VIASH_PAR_SEEDMULTIMAPNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEEDMULTIMAPNMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --seedMultimapNmax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --seedMultimapNmax=*) + [ -n "$VIASH_PAR_SEEDMULTIMAPNMAX" ] && ViashError Bad arguments for option \'--seedMultimapNmax=*\': \'$VIASH_PAR_SEEDMULTIMAPNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEEDMULTIMAPNMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --seedPerReadNmax) + [ -n "$VIASH_PAR_SEEDPERREADNMAX" ] && ViashError Bad arguments for option \'--seedPerReadNmax\': \'$VIASH_PAR_SEEDPERREADNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEEDPERREADNMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --seedPerReadNmax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --seedPerReadNmax=*) + [ -n "$VIASH_PAR_SEEDPERREADNMAX" ] && ViashError Bad arguments for option \'--seedPerReadNmax=*\': \'$VIASH_PAR_SEEDPERREADNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEEDPERREADNMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --seedPerWindowNmax) + [ -n "$VIASH_PAR_SEEDPERWINDOWNMAX" ] && ViashError Bad arguments for option \'--seedPerWindowNmax\': \'$VIASH_PAR_SEEDPERWINDOWNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEEDPERWINDOWNMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --seedPerWindowNmax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --seedPerWindowNmax=*) + [ -n "$VIASH_PAR_SEEDPERWINDOWNMAX" ] && ViashError Bad arguments for option \'--seedPerWindowNmax=*\': \'$VIASH_PAR_SEEDPERWINDOWNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEEDPERWINDOWNMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --seedNoneLociPerWindow) + [ -n "$VIASH_PAR_SEEDNONELOCIPERWINDOW" ] && ViashError Bad arguments for option \'--seedNoneLociPerWindow\': \'$VIASH_PAR_SEEDNONELOCIPERWINDOW\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEEDNONELOCIPERWINDOW="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --seedNoneLociPerWindow. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --seedNoneLociPerWindow=*) + [ -n "$VIASH_PAR_SEEDNONELOCIPERWINDOW" ] && ViashError Bad arguments for option \'--seedNoneLociPerWindow=*\': \'$VIASH_PAR_SEEDNONELOCIPERWINDOW\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEEDNONELOCIPERWINDOW=$(ViashRemoveFlags "$1") + shift 1 + ;; + --seedSplitMin) + [ -n "$VIASH_PAR_SEEDSPLITMIN" ] && ViashError Bad arguments for option \'--seedSplitMin\': \'$VIASH_PAR_SEEDSPLITMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEEDSPLITMIN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --seedSplitMin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --seedSplitMin=*) + [ -n "$VIASH_PAR_SEEDSPLITMIN" ] && ViashError Bad arguments for option \'--seedSplitMin=*\': \'$VIASH_PAR_SEEDSPLITMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEEDSPLITMIN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --seedMapMin) + [ -n "$VIASH_PAR_SEEDMAPMIN" ] && ViashError Bad arguments for option \'--seedMapMin\': \'$VIASH_PAR_SEEDMAPMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEEDMAPMIN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --seedMapMin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --seedMapMin=*) + [ -n "$VIASH_PAR_SEEDMAPMIN" ] && ViashError Bad arguments for option \'--seedMapMin=*\': \'$VIASH_PAR_SEEDMAPMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEEDMAPMIN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --alignIntronMin) + [ -n "$VIASH_PAR_ALIGNINTRONMIN" ] && ViashError Bad arguments for option \'--alignIntronMin\': \'$VIASH_PAR_ALIGNINTRONMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNINTRONMIN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignIntronMin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --alignIntronMin=*) + [ -n "$VIASH_PAR_ALIGNINTRONMIN" ] && ViashError Bad arguments for option \'--alignIntronMin=*\': \'$VIASH_PAR_ALIGNINTRONMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNINTRONMIN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --alignIntronMax) + [ -n "$VIASH_PAR_ALIGNINTRONMAX" ] && ViashError Bad arguments for option \'--alignIntronMax\': \'$VIASH_PAR_ALIGNINTRONMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNINTRONMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignIntronMax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --alignIntronMax=*) + [ -n "$VIASH_PAR_ALIGNINTRONMAX" ] && ViashError Bad arguments for option \'--alignIntronMax=*\': \'$VIASH_PAR_ALIGNINTRONMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNINTRONMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --alignMatesGapMax) + [ -n "$VIASH_PAR_ALIGNMATESGAPMAX" ] && ViashError Bad arguments for option \'--alignMatesGapMax\': \'$VIASH_PAR_ALIGNMATESGAPMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNMATESGAPMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignMatesGapMax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --alignMatesGapMax=*) + [ -n "$VIASH_PAR_ALIGNMATESGAPMAX" ] && ViashError Bad arguments for option \'--alignMatesGapMax=*\': \'$VIASH_PAR_ALIGNMATESGAPMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNMATESGAPMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --alignSJoverhangMin) + [ -n "$VIASH_PAR_ALIGNSJOVERHANGMIN" ] && ViashError Bad arguments for option \'--alignSJoverhangMin\': \'$VIASH_PAR_ALIGNSJOVERHANGMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNSJOVERHANGMIN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignSJoverhangMin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --alignSJoverhangMin=*) + [ -n "$VIASH_PAR_ALIGNSJOVERHANGMIN" ] && ViashError Bad arguments for option \'--alignSJoverhangMin=*\': \'$VIASH_PAR_ALIGNSJOVERHANGMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNSJOVERHANGMIN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --alignSJstitchMismatchNmax) + if [ -z "$VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX" ]; then + VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX="$2" + else + VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX="$VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignSJstitchMismatchNmax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --alignSJstitchMismatchNmax=*) + if [ -z "$VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX" ]; then + VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX=$(ViashRemoveFlags "$1") + else + VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX="$VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --alignSJDBoverhangMin) + [ -n "$VIASH_PAR_ALIGNSJDBOVERHANGMIN" ] && ViashError Bad arguments for option \'--alignSJDBoverhangMin\': \'$VIASH_PAR_ALIGNSJDBOVERHANGMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNSJDBOVERHANGMIN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignSJDBoverhangMin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --alignSJDBoverhangMin=*) + [ -n "$VIASH_PAR_ALIGNSJDBOVERHANGMIN" ] && ViashError Bad arguments for option \'--alignSJDBoverhangMin=*\': \'$VIASH_PAR_ALIGNSJDBOVERHANGMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNSJDBOVERHANGMIN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --alignSplicedMateMapLmin) + [ -n "$VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN" ] && ViashError Bad arguments for option \'--alignSplicedMateMapLmin\': \'$VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignSplicedMateMapLmin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --alignSplicedMateMapLmin=*) + [ -n "$VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN" ] && ViashError Bad arguments for option \'--alignSplicedMateMapLmin=*\': \'$VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --alignSplicedMateMapLminOverLmate) + [ -n "$VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE" ] && ViashError Bad arguments for option \'--alignSplicedMateMapLminOverLmate\': \'$VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignSplicedMateMapLminOverLmate. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --alignSplicedMateMapLminOverLmate=*) + [ -n "$VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE" ] && ViashError Bad arguments for option \'--alignSplicedMateMapLminOverLmate=*\': \'$VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --alignWindowsPerReadNmax) + [ -n "$VIASH_PAR_ALIGNWINDOWSPERREADNMAX" ] && ViashError Bad arguments for option \'--alignWindowsPerReadNmax\': \'$VIASH_PAR_ALIGNWINDOWSPERREADNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNWINDOWSPERREADNMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignWindowsPerReadNmax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --alignWindowsPerReadNmax=*) + [ -n "$VIASH_PAR_ALIGNWINDOWSPERREADNMAX" ] && ViashError Bad arguments for option \'--alignWindowsPerReadNmax=*\': \'$VIASH_PAR_ALIGNWINDOWSPERREADNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNWINDOWSPERREADNMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --alignTranscriptsPerWindowNmax) + [ -n "$VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX" ] && ViashError Bad arguments for option \'--alignTranscriptsPerWindowNmax\': \'$VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignTranscriptsPerWindowNmax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --alignTranscriptsPerWindowNmax=*) + [ -n "$VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX" ] && ViashError Bad arguments for option \'--alignTranscriptsPerWindowNmax=*\': \'$VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --alignTranscriptsPerReadNmax) + [ -n "$VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX" ] && ViashError Bad arguments for option \'--alignTranscriptsPerReadNmax\': \'$VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignTranscriptsPerReadNmax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --alignTranscriptsPerReadNmax=*) + [ -n "$VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX" ] && ViashError Bad arguments for option \'--alignTranscriptsPerReadNmax=*\': \'$VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --alignEndsType) + [ -n "$VIASH_PAR_ALIGNENDSTYPE" ] && ViashError Bad arguments for option \'--alignEndsType\': \'$VIASH_PAR_ALIGNENDSTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNENDSTYPE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignEndsType. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --alignEndsType=*) + [ -n "$VIASH_PAR_ALIGNENDSTYPE" ] && ViashError Bad arguments for option \'--alignEndsType=*\': \'$VIASH_PAR_ALIGNENDSTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNENDSTYPE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --alignEndsProtrude) + [ -n "$VIASH_PAR_ALIGNENDSPROTRUDE" ] && ViashError Bad arguments for option \'--alignEndsProtrude\': \'$VIASH_PAR_ALIGNENDSPROTRUDE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNENDSPROTRUDE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignEndsProtrude. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --alignEndsProtrude=*) + [ -n "$VIASH_PAR_ALIGNENDSPROTRUDE" ] && ViashError Bad arguments for option \'--alignEndsProtrude=*\': \'$VIASH_PAR_ALIGNENDSPROTRUDE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNENDSPROTRUDE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --alignSoftClipAtReferenceEnds) + [ -n "$VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS" ] && ViashError Bad arguments for option \'--alignSoftClipAtReferenceEnds\': \'$VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignSoftClipAtReferenceEnds. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --alignSoftClipAtReferenceEnds=*) + [ -n "$VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS" ] && ViashError Bad arguments for option \'--alignSoftClipAtReferenceEnds=*\': \'$VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --alignInsertionFlush) + [ -n "$VIASH_PAR_ALIGNINSERTIONFLUSH" ] && ViashError Bad arguments for option \'--alignInsertionFlush\': \'$VIASH_PAR_ALIGNINSERTIONFLUSH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNINSERTIONFLUSH="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignInsertionFlush. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --alignInsertionFlush=*) + [ -n "$VIASH_PAR_ALIGNINSERTIONFLUSH" ] && ViashError Bad arguments for option \'--alignInsertionFlush=*\': \'$VIASH_PAR_ALIGNINSERTIONFLUSH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNINSERTIONFLUSH=$(ViashRemoveFlags "$1") + shift 1 + ;; + --peOverlapNbasesMin) + [ -n "$VIASH_PAR_PEOVERLAPNBASESMIN" ] && ViashError Bad arguments for option \'--peOverlapNbasesMin\': \'$VIASH_PAR_PEOVERLAPNBASESMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PEOVERLAPNBASESMIN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --peOverlapNbasesMin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --peOverlapNbasesMin=*) + [ -n "$VIASH_PAR_PEOVERLAPNBASESMIN" ] && ViashError Bad arguments for option \'--peOverlapNbasesMin=*\': \'$VIASH_PAR_PEOVERLAPNBASESMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PEOVERLAPNBASESMIN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --peOverlapMMp) + [ -n "$VIASH_PAR_PEOVERLAPMMP" ] && ViashError Bad arguments for option \'--peOverlapMMp\': \'$VIASH_PAR_PEOVERLAPMMP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PEOVERLAPMMP="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --peOverlapMMp. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --peOverlapMMp=*) + [ -n "$VIASH_PAR_PEOVERLAPMMP" ] && ViashError Bad arguments for option \'--peOverlapMMp=*\': \'$VIASH_PAR_PEOVERLAPMMP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PEOVERLAPMMP=$(ViashRemoveFlags "$1") + shift 1 + ;; + --winAnchorMultimapNmax) + [ -n "$VIASH_PAR_WINANCHORMULTIMAPNMAX" ] && ViashError Bad arguments for option \'--winAnchorMultimapNmax\': \'$VIASH_PAR_WINANCHORMULTIMAPNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WINANCHORMULTIMAPNMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --winAnchorMultimapNmax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --winAnchorMultimapNmax=*) + [ -n "$VIASH_PAR_WINANCHORMULTIMAPNMAX" ] && ViashError Bad arguments for option \'--winAnchorMultimapNmax=*\': \'$VIASH_PAR_WINANCHORMULTIMAPNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WINANCHORMULTIMAPNMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --winBinNbits) + [ -n "$VIASH_PAR_WINBINNBITS" ] && ViashError Bad arguments for option \'--winBinNbits\': \'$VIASH_PAR_WINBINNBITS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WINBINNBITS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --winBinNbits. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --winBinNbits=*) + [ -n "$VIASH_PAR_WINBINNBITS" ] && ViashError Bad arguments for option \'--winBinNbits=*\': \'$VIASH_PAR_WINBINNBITS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WINBINNBITS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --winAnchorDistNbins) + [ -n "$VIASH_PAR_WINANCHORDISTNBINS" ] && ViashError Bad arguments for option \'--winAnchorDistNbins\': \'$VIASH_PAR_WINANCHORDISTNBINS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WINANCHORDISTNBINS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --winAnchorDistNbins. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --winAnchorDistNbins=*) + [ -n "$VIASH_PAR_WINANCHORDISTNBINS" ] && ViashError Bad arguments for option \'--winAnchorDistNbins=*\': \'$VIASH_PAR_WINANCHORDISTNBINS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WINANCHORDISTNBINS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --winFlankNbins) + [ -n "$VIASH_PAR_WINFLANKNBINS" ] && ViashError Bad arguments for option \'--winFlankNbins\': \'$VIASH_PAR_WINFLANKNBINS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WINFLANKNBINS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --winFlankNbins. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --winFlankNbins=*) + [ -n "$VIASH_PAR_WINFLANKNBINS" ] && ViashError Bad arguments for option \'--winFlankNbins=*\': \'$VIASH_PAR_WINFLANKNBINS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WINFLANKNBINS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --winReadCoverageRelativeMin) + [ -n "$VIASH_PAR_WINREADCOVERAGERELATIVEMIN" ] && ViashError Bad arguments for option \'--winReadCoverageRelativeMin\': \'$VIASH_PAR_WINREADCOVERAGERELATIVEMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WINREADCOVERAGERELATIVEMIN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --winReadCoverageRelativeMin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --winReadCoverageRelativeMin=*) + [ -n "$VIASH_PAR_WINREADCOVERAGERELATIVEMIN" ] && ViashError Bad arguments for option \'--winReadCoverageRelativeMin=*\': \'$VIASH_PAR_WINREADCOVERAGERELATIVEMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WINREADCOVERAGERELATIVEMIN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --winReadCoverageBasesMin) + [ -n "$VIASH_PAR_WINREADCOVERAGEBASESMIN" ] && ViashError Bad arguments for option \'--winReadCoverageBasesMin\': \'$VIASH_PAR_WINREADCOVERAGEBASESMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WINREADCOVERAGEBASESMIN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --winReadCoverageBasesMin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --winReadCoverageBasesMin=*) + [ -n "$VIASH_PAR_WINREADCOVERAGEBASESMIN" ] && ViashError Bad arguments for option \'--winReadCoverageBasesMin=*\': \'$VIASH_PAR_WINREADCOVERAGEBASESMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WINREADCOVERAGEBASESMIN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --chimOutType) + if [ -z "$VIASH_PAR_CHIMOUTTYPE" ]; then + VIASH_PAR_CHIMOUTTYPE="$2" + else + VIASH_PAR_CHIMOUTTYPE="$VIASH_PAR_CHIMOUTTYPE;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimOutType. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --chimOutType=*) + if [ -z "$VIASH_PAR_CHIMOUTTYPE" ]; then + VIASH_PAR_CHIMOUTTYPE=$(ViashRemoveFlags "$1") + else + VIASH_PAR_CHIMOUTTYPE="$VIASH_PAR_CHIMOUTTYPE;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --chimSegmentMin) + [ -n "$VIASH_PAR_CHIMSEGMENTMIN" ] && ViashError Bad arguments for option \'--chimSegmentMin\': \'$VIASH_PAR_CHIMSEGMENTMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMSEGMENTMIN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimSegmentMin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --chimSegmentMin=*) + [ -n "$VIASH_PAR_CHIMSEGMENTMIN" ] && ViashError Bad arguments for option \'--chimSegmentMin=*\': \'$VIASH_PAR_CHIMSEGMENTMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMSEGMENTMIN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --chimScoreMin) + [ -n "$VIASH_PAR_CHIMSCOREMIN" ] && ViashError Bad arguments for option \'--chimScoreMin\': \'$VIASH_PAR_CHIMSCOREMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMSCOREMIN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimScoreMin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --chimScoreMin=*) + [ -n "$VIASH_PAR_CHIMSCOREMIN" ] && ViashError Bad arguments for option \'--chimScoreMin=*\': \'$VIASH_PAR_CHIMSCOREMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMSCOREMIN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --chimScoreDropMax) + [ -n "$VIASH_PAR_CHIMSCOREDROPMAX" ] && ViashError Bad arguments for option \'--chimScoreDropMax\': \'$VIASH_PAR_CHIMSCOREDROPMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMSCOREDROPMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimScoreDropMax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --chimScoreDropMax=*) + [ -n "$VIASH_PAR_CHIMSCOREDROPMAX" ] && ViashError Bad arguments for option \'--chimScoreDropMax=*\': \'$VIASH_PAR_CHIMSCOREDROPMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMSCOREDROPMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --chimScoreSeparation) + [ -n "$VIASH_PAR_CHIMSCORESEPARATION" ] && ViashError Bad arguments for option \'--chimScoreSeparation\': \'$VIASH_PAR_CHIMSCORESEPARATION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMSCORESEPARATION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimScoreSeparation. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --chimScoreSeparation=*) + [ -n "$VIASH_PAR_CHIMSCORESEPARATION" ] && ViashError Bad arguments for option \'--chimScoreSeparation=*\': \'$VIASH_PAR_CHIMSCORESEPARATION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMSCORESEPARATION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --chimScoreJunctionNonGTAG) + [ -n "$VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG" ] && ViashError Bad arguments for option \'--chimScoreJunctionNonGTAG\': \'$VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimScoreJunctionNonGTAG. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --chimScoreJunctionNonGTAG=*) + [ -n "$VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG" ] && ViashError Bad arguments for option \'--chimScoreJunctionNonGTAG=*\': \'$VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG=$(ViashRemoveFlags "$1") + shift 1 + ;; + --chimJunctionOverhangMin) + [ -n "$VIASH_PAR_CHIMJUNCTIONOVERHANGMIN" ] && ViashError Bad arguments for option \'--chimJunctionOverhangMin\': \'$VIASH_PAR_CHIMJUNCTIONOVERHANGMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMJUNCTIONOVERHANGMIN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimJunctionOverhangMin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --chimJunctionOverhangMin=*) + [ -n "$VIASH_PAR_CHIMJUNCTIONOVERHANGMIN" ] && ViashError Bad arguments for option \'--chimJunctionOverhangMin=*\': \'$VIASH_PAR_CHIMJUNCTIONOVERHANGMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMJUNCTIONOVERHANGMIN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --chimSegmentReadGapMax) + [ -n "$VIASH_PAR_CHIMSEGMENTREADGAPMAX" ] && ViashError Bad arguments for option \'--chimSegmentReadGapMax\': \'$VIASH_PAR_CHIMSEGMENTREADGAPMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMSEGMENTREADGAPMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimSegmentReadGapMax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --chimSegmentReadGapMax=*) + [ -n "$VIASH_PAR_CHIMSEGMENTREADGAPMAX" ] && ViashError Bad arguments for option \'--chimSegmentReadGapMax=*\': \'$VIASH_PAR_CHIMSEGMENTREADGAPMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMSEGMENTREADGAPMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --chimFilter) + if [ -z "$VIASH_PAR_CHIMFILTER" ]; then + VIASH_PAR_CHIMFILTER="$2" + else + VIASH_PAR_CHIMFILTER="$VIASH_PAR_CHIMFILTER;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimFilter. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --chimFilter=*) + if [ -z "$VIASH_PAR_CHIMFILTER" ]; then + VIASH_PAR_CHIMFILTER=$(ViashRemoveFlags "$1") + else + VIASH_PAR_CHIMFILTER="$VIASH_PAR_CHIMFILTER;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --chimMainSegmentMultNmax) + [ -n "$VIASH_PAR_CHIMMAINSEGMENTMULTNMAX" ] && ViashError Bad arguments for option \'--chimMainSegmentMultNmax\': \'$VIASH_PAR_CHIMMAINSEGMENTMULTNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMMAINSEGMENTMULTNMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimMainSegmentMultNmax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --chimMainSegmentMultNmax=*) + [ -n "$VIASH_PAR_CHIMMAINSEGMENTMULTNMAX" ] && ViashError Bad arguments for option \'--chimMainSegmentMultNmax=*\': \'$VIASH_PAR_CHIMMAINSEGMENTMULTNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMMAINSEGMENTMULTNMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --chimMultimapNmax) + [ -n "$VIASH_PAR_CHIMMULTIMAPNMAX" ] && ViashError Bad arguments for option \'--chimMultimapNmax\': \'$VIASH_PAR_CHIMMULTIMAPNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMMULTIMAPNMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimMultimapNmax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --chimMultimapNmax=*) + [ -n "$VIASH_PAR_CHIMMULTIMAPNMAX" ] && ViashError Bad arguments for option \'--chimMultimapNmax=*\': \'$VIASH_PAR_CHIMMULTIMAPNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMMULTIMAPNMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --chimMultimapScoreRange) + [ -n "$VIASH_PAR_CHIMMULTIMAPSCORERANGE" ] && ViashError Bad arguments for option \'--chimMultimapScoreRange\': \'$VIASH_PAR_CHIMMULTIMAPSCORERANGE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMMULTIMAPSCORERANGE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimMultimapScoreRange. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --chimMultimapScoreRange=*) + [ -n "$VIASH_PAR_CHIMMULTIMAPSCORERANGE" ] && ViashError Bad arguments for option \'--chimMultimapScoreRange=*\': \'$VIASH_PAR_CHIMMULTIMAPSCORERANGE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMMULTIMAPSCORERANGE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --chimNonchimScoreDropMin) + [ -n "$VIASH_PAR_CHIMNONCHIMSCOREDROPMIN" ] && ViashError Bad arguments for option \'--chimNonchimScoreDropMin\': \'$VIASH_PAR_CHIMNONCHIMSCOREDROPMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMNONCHIMSCOREDROPMIN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimNonchimScoreDropMin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --chimNonchimScoreDropMin=*) + [ -n "$VIASH_PAR_CHIMNONCHIMSCOREDROPMIN" ] && ViashError Bad arguments for option \'--chimNonchimScoreDropMin=*\': \'$VIASH_PAR_CHIMNONCHIMSCOREDROPMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMNONCHIMSCOREDROPMIN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --chimOutJunctionFormat) + [ -n "$VIASH_PAR_CHIMOUTJUNCTIONFORMAT" ] && ViashError Bad arguments for option \'--chimOutJunctionFormat\': \'$VIASH_PAR_CHIMOUTJUNCTIONFORMAT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMOUTJUNCTIONFORMAT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimOutJunctionFormat. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --chimOutJunctionFormat=*) + [ -n "$VIASH_PAR_CHIMOUTJUNCTIONFORMAT" ] && ViashError Bad arguments for option \'--chimOutJunctionFormat=*\': \'$VIASH_PAR_CHIMOUTJUNCTIONFORMAT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMOUTJUNCTIONFORMAT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --quantMode) + if [ -z "$VIASH_PAR_QUANTMODE" ]; then + VIASH_PAR_QUANTMODE="$2" + else + VIASH_PAR_QUANTMODE="$VIASH_PAR_QUANTMODE;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --quantMode. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --quantMode=*) + if [ -z "$VIASH_PAR_QUANTMODE" ]; then + VIASH_PAR_QUANTMODE=$(ViashRemoveFlags "$1") + else + VIASH_PAR_QUANTMODE="$VIASH_PAR_QUANTMODE;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --quantTranscriptomeBAMcompression) + [ -n "$VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION" ] && ViashError Bad arguments for option \'--quantTranscriptomeBAMcompression\': \'$VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --quantTranscriptomeBAMcompression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --quantTranscriptomeBAMcompression=*) + [ -n "$VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION" ] && ViashError Bad arguments for option \'--quantTranscriptomeBAMcompression=*\': \'$VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --quantTranscriptomeBan) + [ -n "$VIASH_PAR_QUANTTRANSCRIPTOMEBAN" ] && ViashError Bad arguments for option \'--quantTranscriptomeBan\': \'$VIASH_PAR_QUANTTRANSCRIPTOMEBAN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUANTTRANSCRIPTOMEBAN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --quantTranscriptomeBan. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --quantTranscriptomeBan=*) + [ -n "$VIASH_PAR_QUANTTRANSCRIPTOMEBAN" ] && ViashError Bad arguments for option \'--quantTranscriptomeBan=*\': \'$VIASH_PAR_QUANTTRANSCRIPTOMEBAN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUANTTRANSCRIPTOMEBAN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --twopassMode) + [ -n "$VIASH_PAR_TWOPASSMODE" ] && ViashError Bad arguments for option \'--twopassMode\': \'$VIASH_PAR_TWOPASSMODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TWOPASSMODE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --twopassMode. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --twopassMode=*) + [ -n "$VIASH_PAR_TWOPASSMODE" ] && ViashError Bad arguments for option \'--twopassMode=*\': \'$VIASH_PAR_TWOPASSMODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TWOPASSMODE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --twopass1readsN) + [ -n "$VIASH_PAR_TWOPASS1READSN" ] && ViashError Bad arguments for option \'--twopass1readsN\': \'$VIASH_PAR_TWOPASS1READSN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TWOPASS1READSN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --twopass1readsN. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --twopass1readsN=*) + [ -n "$VIASH_PAR_TWOPASS1READSN" ] && ViashError Bad arguments for option \'--twopass1readsN=*\': \'$VIASH_PAR_TWOPASS1READSN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TWOPASS1READSN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --waspOutputMode) + [ -n "$VIASH_PAR_WASPOUTPUTMODE" ] && ViashError Bad arguments for option \'--waspOutputMode\': \'$VIASH_PAR_WASPOUTPUTMODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WASPOUTPUTMODE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --waspOutputMode. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --waspOutputMode=*) + [ -n "$VIASH_PAR_WASPOUTPUTMODE" ] && ViashError Bad arguments for option \'--waspOutputMode=*\': \'$VIASH_PAR_WASPOUTPUTMODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WASPOUTPUTMODE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --soloType) + if [ -z "$VIASH_PAR_SOLOTYPE" ]; then + VIASH_PAR_SOLOTYPE="$2" + else + VIASH_PAR_SOLOTYPE="$VIASH_PAR_SOLOTYPE;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloType. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloType=*) + if [ -z "$VIASH_PAR_SOLOTYPE" ]; then + VIASH_PAR_SOLOTYPE=$(ViashRemoveFlags "$1") + else + VIASH_PAR_SOLOTYPE="$VIASH_PAR_SOLOTYPE;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --soloCBwhitelist) + if [ -z "$VIASH_PAR_SOLOCBWHITELIST" ]; then + VIASH_PAR_SOLOCBWHITELIST="$2" + else + VIASH_PAR_SOLOCBWHITELIST="$VIASH_PAR_SOLOCBWHITELIST;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloCBwhitelist. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloCBwhitelist=*) + if [ -z "$VIASH_PAR_SOLOCBWHITELIST" ]; then + VIASH_PAR_SOLOCBWHITELIST=$(ViashRemoveFlags "$1") + else + VIASH_PAR_SOLOCBWHITELIST="$VIASH_PAR_SOLOCBWHITELIST;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --soloCBstart) + [ -n "$VIASH_PAR_SOLOCBSTART" ] && ViashError Bad arguments for option \'--soloCBstart\': \'$VIASH_PAR_SOLOCBSTART\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOCBSTART="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloCBstart. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloCBstart=*) + [ -n "$VIASH_PAR_SOLOCBSTART" ] && ViashError Bad arguments for option \'--soloCBstart=*\': \'$VIASH_PAR_SOLOCBSTART\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOCBSTART=$(ViashRemoveFlags "$1") + shift 1 + ;; + --soloCBlen) + [ -n "$VIASH_PAR_SOLOCBLEN" ] && ViashError Bad arguments for option \'--soloCBlen\': \'$VIASH_PAR_SOLOCBLEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOCBLEN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloCBlen. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloCBlen=*) + [ -n "$VIASH_PAR_SOLOCBLEN" ] && ViashError Bad arguments for option \'--soloCBlen=*\': \'$VIASH_PAR_SOLOCBLEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOCBLEN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --soloUMIstart) + [ -n "$VIASH_PAR_SOLOUMISTART" ] && ViashError Bad arguments for option \'--soloUMIstart\': \'$VIASH_PAR_SOLOUMISTART\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOUMISTART="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloUMIstart. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloUMIstart=*) + [ -n "$VIASH_PAR_SOLOUMISTART" ] && ViashError Bad arguments for option \'--soloUMIstart=*\': \'$VIASH_PAR_SOLOUMISTART\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOUMISTART=$(ViashRemoveFlags "$1") + shift 1 + ;; + --soloUMIlen) + [ -n "$VIASH_PAR_SOLOUMILEN" ] && ViashError Bad arguments for option \'--soloUMIlen\': \'$VIASH_PAR_SOLOUMILEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOUMILEN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloUMIlen. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloUMIlen=*) + [ -n "$VIASH_PAR_SOLOUMILEN" ] && ViashError Bad arguments for option \'--soloUMIlen=*\': \'$VIASH_PAR_SOLOUMILEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOUMILEN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --soloBarcodeReadLength) + [ -n "$VIASH_PAR_SOLOBARCODEREADLENGTH" ] && ViashError Bad arguments for option \'--soloBarcodeReadLength\': \'$VIASH_PAR_SOLOBARCODEREADLENGTH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOBARCODEREADLENGTH="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloBarcodeReadLength. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloBarcodeReadLength=*) + [ -n "$VIASH_PAR_SOLOBARCODEREADLENGTH" ] && ViashError Bad arguments for option \'--soloBarcodeReadLength=*\': \'$VIASH_PAR_SOLOBARCODEREADLENGTH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOBARCODEREADLENGTH=$(ViashRemoveFlags "$1") + shift 1 + ;; + --soloBarcodeMate) + [ -n "$VIASH_PAR_SOLOBARCODEMATE" ] && ViashError Bad arguments for option \'--soloBarcodeMate\': \'$VIASH_PAR_SOLOBARCODEMATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOBARCODEMATE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloBarcodeMate. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloBarcodeMate=*) + [ -n "$VIASH_PAR_SOLOBARCODEMATE" ] && ViashError Bad arguments for option \'--soloBarcodeMate=*\': \'$VIASH_PAR_SOLOBARCODEMATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOBARCODEMATE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --soloCBposition) + if [ -z "$VIASH_PAR_SOLOCBPOSITION" ]; then + VIASH_PAR_SOLOCBPOSITION="$2" + else + VIASH_PAR_SOLOCBPOSITION="$VIASH_PAR_SOLOCBPOSITION;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloCBposition. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloCBposition=*) + if [ -z "$VIASH_PAR_SOLOCBPOSITION" ]; then + VIASH_PAR_SOLOCBPOSITION=$(ViashRemoveFlags "$1") + else + VIASH_PAR_SOLOCBPOSITION="$VIASH_PAR_SOLOCBPOSITION;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --soloUMIposition) + [ -n "$VIASH_PAR_SOLOUMIPOSITION" ] && ViashError Bad arguments for option \'--soloUMIposition\': \'$VIASH_PAR_SOLOUMIPOSITION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOUMIPOSITION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloUMIposition. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloUMIposition=*) + [ -n "$VIASH_PAR_SOLOUMIPOSITION" ] && ViashError Bad arguments for option \'--soloUMIposition=*\': \'$VIASH_PAR_SOLOUMIPOSITION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOUMIPOSITION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --soloAdapterSequence) + [ -n "$VIASH_PAR_SOLOADAPTERSEQUENCE" ] && ViashError Bad arguments for option \'--soloAdapterSequence\': \'$VIASH_PAR_SOLOADAPTERSEQUENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOADAPTERSEQUENCE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloAdapterSequence. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloAdapterSequence=*) + [ -n "$VIASH_PAR_SOLOADAPTERSEQUENCE" ] && ViashError Bad arguments for option \'--soloAdapterSequence=*\': \'$VIASH_PAR_SOLOADAPTERSEQUENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOADAPTERSEQUENCE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --soloAdapterMismatchesNmax) + [ -n "$VIASH_PAR_SOLOADAPTERMISMATCHESNMAX" ] && ViashError Bad arguments for option \'--soloAdapterMismatchesNmax\': \'$VIASH_PAR_SOLOADAPTERMISMATCHESNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOADAPTERMISMATCHESNMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloAdapterMismatchesNmax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloAdapterMismatchesNmax=*) + [ -n "$VIASH_PAR_SOLOADAPTERMISMATCHESNMAX" ] && ViashError Bad arguments for option \'--soloAdapterMismatchesNmax=*\': \'$VIASH_PAR_SOLOADAPTERMISMATCHESNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOADAPTERMISMATCHESNMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --soloCBmatchWLtype) + [ -n "$VIASH_PAR_SOLOCBMATCHWLTYPE" ] && ViashError Bad arguments for option \'--soloCBmatchWLtype\': \'$VIASH_PAR_SOLOCBMATCHWLTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOCBMATCHWLTYPE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloCBmatchWLtype. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloCBmatchWLtype=*) + [ -n "$VIASH_PAR_SOLOCBMATCHWLTYPE" ] && ViashError Bad arguments for option \'--soloCBmatchWLtype=*\': \'$VIASH_PAR_SOLOCBMATCHWLTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOCBMATCHWLTYPE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --soloInputSAMattrBarcodeSeq) + if [ -z "$VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ" ]; then + VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ="$2" + else + VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ="$VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloInputSAMattrBarcodeSeq. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloInputSAMattrBarcodeSeq=*) + if [ -z "$VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ" ]; then + VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ=$(ViashRemoveFlags "$1") + else + VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ="$VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --soloInputSAMattrBarcodeQual) + if [ -z "$VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL" ]; then + VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL="$2" + else + VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL="$VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloInputSAMattrBarcodeQual. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloInputSAMattrBarcodeQual=*) + if [ -z "$VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL" ]; then + VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL=$(ViashRemoveFlags "$1") + else + VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL="$VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --soloStrand) + [ -n "$VIASH_PAR_SOLOSTRAND" ] && ViashError Bad arguments for option \'--soloStrand\': \'$VIASH_PAR_SOLOSTRAND\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOSTRAND="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloStrand. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloStrand=*) + [ -n "$VIASH_PAR_SOLOSTRAND" ] && ViashError Bad arguments for option \'--soloStrand=*\': \'$VIASH_PAR_SOLOSTRAND\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOSTRAND=$(ViashRemoveFlags "$1") + shift 1 + ;; + --soloFeatures) + if [ -z "$VIASH_PAR_SOLOFEATURES" ]; then + VIASH_PAR_SOLOFEATURES="$2" + else + VIASH_PAR_SOLOFEATURES="$VIASH_PAR_SOLOFEATURES;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloFeatures. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloFeatures=*) + if [ -z "$VIASH_PAR_SOLOFEATURES" ]; then + VIASH_PAR_SOLOFEATURES=$(ViashRemoveFlags "$1") + else + VIASH_PAR_SOLOFEATURES="$VIASH_PAR_SOLOFEATURES;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --soloMultiMappers) + if [ -z "$VIASH_PAR_SOLOMULTIMAPPERS" ]; then + VIASH_PAR_SOLOMULTIMAPPERS="$2" + else + VIASH_PAR_SOLOMULTIMAPPERS="$VIASH_PAR_SOLOMULTIMAPPERS;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloMultiMappers. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloMultiMappers=*) + if [ -z "$VIASH_PAR_SOLOMULTIMAPPERS" ]; then + VIASH_PAR_SOLOMULTIMAPPERS=$(ViashRemoveFlags "$1") + else + VIASH_PAR_SOLOMULTIMAPPERS="$VIASH_PAR_SOLOMULTIMAPPERS;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --soloUMIdedup) + if [ -z "$VIASH_PAR_SOLOUMIDEDUP" ]; then + VIASH_PAR_SOLOUMIDEDUP="$2" + else + VIASH_PAR_SOLOUMIDEDUP="$VIASH_PAR_SOLOUMIDEDUP;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloUMIdedup. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloUMIdedup=*) + if [ -z "$VIASH_PAR_SOLOUMIDEDUP" ]; then + VIASH_PAR_SOLOUMIDEDUP=$(ViashRemoveFlags "$1") + else + VIASH_PAR_SOLOUMIDEDUP="$VIASH_PAR_SOLOUMIDEDUP;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --soloUMIfiltering) + if [ -z "$VIASH_PAR_SOLOUMIFILTERING" ]; then + VIASH_PAR_SOLOUMIFILTERING="$2" + else + VIASH_PAR_SOLOUMIFILTERING="$VIASH_PAR_SOLOUMIFILTERING;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloUMIfiltering. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloUMIfiltering=*) + if [ -z "$VIASH_PAR_SOLOUMIFILTERING" ]; then + VIASH_PAR_SOLOUMIFILTERING=$(ViashRemoveFlags "$1") + else + VIASH_PAR_SOLOUMIFILTERING="$VIASH_PAR_SOLOUMIFILTERING;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --soloOutFileNames) + if [ -z "$VIASH_PAR_SOLOOUTFILENAMES" ]; then + VIASH_PAR_SOLOOUTFILENAMES="$2" + else + VIASH_PAR_SOLOOUTFILENAMES="$VIASH_PAR_SOLOOUTFILENAMES;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloOutFileNames. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloOutFileNames=*) + if [ -z "$VIASH_PAR_SOLOOUTFILENAMES" ]; then + VIASH_PAR_SOLOOUTFILENAMES=$(ViashRemoveFlags "$1") + else + VIASH_PAR_SOLOOUTFILENAMES="$VIASH_PAR_SOLOOUTFILENAMES;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --soloCellFilter) + if [ -z "$VIASH_PAR_SOLOCELLFILTER" ]; then + VIASH_PAR_SOLOCELLFILTER="$2" + else + VIASH_PAR_SOLOCELLFILTER="$VIASH_PAR_SOLOCELLFILTER;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloCellFilter. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloCellFilter=*) + if [ -z "$VIASH_PAR_SOLOCELLFILTER" ]; then + VIASH_PAR_SOLOCELLFILTER=$(ViashRemoveFlags "$1") + else + VIASH_PAR_SOLOCELLFILTER="$VIASH_PAR_SOLOCELLFILTER;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --soloOutFormatFeaturesGeneField3) + if [ -z "$VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3" ]; then + VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3="$2" + else + VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3="$VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloOutFormatFeaturesGeneField3. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloOutFormatFeaturesGeneField3=*) + if [ -z "$VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3" ]; then + VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3=$(ViashRemoveFlags "$1") + else + VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3="$VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --soloCellReadStats) + [ -n "$VIASH_PAR_SOLOCELLREADSTATS" ] && ViashError Bad arguments for option \'--soloCellReadStats\': \'$VIASH_PAR_SOLOCELLREADSTATS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOCELLREADSTATS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloCellReadStats. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloCellReadStats=*) + [ -n "$VIASH_PAR_SOLOCELLREADSTATS" ] && ViashError Bad arguments for option \'--soloCellReadStats=*\': \'$VIASH_PAR_SOLOCELLREADSTATS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOCELLREADSTATS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --stranded) + [ -n "$VIASH_PAR_STRANDED" ] && ViashError Bad arguments for option \'--stranded\': \'$VIASH_PAR_STRANDED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRANDED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --stranded. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --stranded=*) + [ -n "$VIASH_PAR_STRANDED" ] && ViashError Bad arguments for option \'--stranded=*\': \'$VIASH_PAR_STRANDED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRANDED=$(ViashRemoveFlags "$1") + shift 1 + ;; + -s) + [ -n "$VIASH_PAR_STRANDED" ] && ViashError Bad arguments for option \'-s\': \'$VIASH_PAR_STRANDED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRANDED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -s. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --minimum_alignment_quality) + [ -n "$VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY" ] && ViashError Bad arguments for option \'--minimum_alignment_quality\': \'$VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --minimum_alignment_quality. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --minimum_alignment_quality=*) + [ -n "$VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY" ] && ViashError Bad arguments for option \'--minimum_alignment_quality=*\': \'$VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + -a) + [ -n "$VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY" ] && ViashError Bad arguments for option \'-a\': \'$VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -a. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --minaqual) + [ -n "$VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY" ] && ViashError Bad arguments for option \'--minaqual\': \'$VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --minaqual. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --type) + [ -n "$VIASH_PAR_TYPE" ] && ViashError Bad arguments for option \'--type\': \'$VIASH_PAR_TYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TYPE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --type. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --type=*) + [ -n "$VIASH_PAR_TYPE" ] && ViashError Bad arguments for option \'--type=*\': \'$VIASH_PAR_TYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TYPE=$(ViashRemoveFlags "$1") + shift 1 + ;; + -t) + [ -n "$VIASH_PAR_TYPE" ] && ViashError Bad arguments for option \'-t\': \'$VIASH_PAR_TYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TYPE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -t. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --id_attribute) + if [ -z "$VIASH_PAR_ID_ATTRIBUTE" ]; then + VIASH_PAR_ID_ATTRIBUTE="$2" + else + VIASH_PAR_ID_ATTRIBUTE="$VIASH_PAR_ID_ATTRIBUTE:""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --id_attribute. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --id_attribute=*) + if [ -z "$VIASH_PAR_ID_ATTRIBUTE" ]; then + VIASH_PAR_ID_ATTRIBUTE=$(ViashRemoveFlags "$1") + else + VIASH_PAR_ID_ATTRIBUTE="$VIASH_PAR_ID_ATTRIBUTE:"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + -i) + if [ -z "$VIASH_PAR_ID_ATTRIBUTE" ]; then + VIASH_PAR_ID_ATTRIBUTE="$2" + else + VIASH_PAR_ID_ATTRIBUTE="$VIASH_PAR_ID_ATTRIBUTE:""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --additional_attributes) + if [ -z "$VIASH_PAR_ADDITIONAL_ATTRIBUTES" ]; then + VIASH_PAR_ADDITIONAL_ATTRIBUTES="$2" + else + VIASH_PAR_ADDITIONAL_ATTRIBUTES="$VIASH_PAR_ADDITIONAL_ATTRIBUTES:""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --additional_attributes. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --additional_attributes=*) + if [ -z "$VIASH_PAR_ADDITIONAL_ATTRIBUTES" ]; then + VIASH_PAR_ADDITIONAL_ATTRIBUTES=$(ViashRemoveFlags "$1") + else + VIASH_PAR_ADDITIONAL_ATTRIBUTES="$VIASH_PAR_ADDITIONAL_ATTRIBUTES:"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --add_chromosome_info) + [ -n "$VIASH_PAR_ADD_CHROMOSOME_INFO" ] && ViashError Bad arguments for option \'--add_chromosome_info\': \'$VIASH_PAR_ADD_CHROMOSOME_INFO\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ADD_CHROMOSOME_INFO=true + shift 1 + ;; + --mode) + [ -n "$VIASH_PAR_MODE" ] && ViashError Bad arguments for option \'--mode\': \'$VIASH_PAR_MODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --mode. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --mode=*) + [ -n "$VIASH_PAR_MODE" ] && ViashError Bad arguments for option \'--mode=*\': \'$VIASH_PAR_MODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODE=$(ViashRemoveFlags "$1") + shift 1 + ;; + -m) + [ -n "$VIASH_PAR_MODE" ] && ViashError Bad arguments for option \'-m\': \'$VIASH_PAR_MODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -m. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --non_unique) + [ -n "$VIASH_PAR_NON_UNIQUE" ] && ViashError Bad arguments for option \'--non_unique\': \'$VIASH_PAR_NON_UNIQUE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_NON_UNIQUE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --non_unique. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --non_unique=*) + [ -n "$VIASH_PAR_NON_UNIQUE" ] && ViashError Bad arguments for option \'--non_unique=*\': \'$VIASH_PAR_NON_UNIQUE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_NON_UNIQUE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --secondary_alignments) + [ -n "$VIASH_PAR_SECONDARY_ALIGNMENTS" ] && ViashError Bad arguments for option \'--secondary_alignments\': \'$VIASH_PAR_SECONDARY_ALIGNMENTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SECONDARY_ALIGNMENTS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --secondary_alignments. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --secondary_alignments=*) + [ -n "$VIASH_PAR_SECONDARY_ALIGNMENTS" ] && ViashError Bad arguments for option \'--secondary_alignments=*\': \'$VIASH_PAR_SECONDARY_ALIGNMENTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SECONDARY_ALIGNMENTS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --supplementary_alignments) + [ -n "$VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS" ] && ViashError Bad arguments for option \'--supplementary_alignments\': \'$VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --supplementary_alignments. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --supplementary_alignments=*) + [ -n "$VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS" ] && ViashError Bad arguments for option \'--supplementary_alignments=*\': \'$VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --counts_output_sparse) + [ -n "$VIASH_PAR_COUNTS_OUTPUT_SPARSE" ] && ViashError Bad arguments for option \'--counts_output_sparse\': \'$VIASH_PAR_COUNTS_OUTPUT_SPARSE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COUNTS_OUTPUT_SPARSE=true + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/mapping_multi_star:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/mapping_multi_star:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/mapping_multi_star:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/mapping_multi_star:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT_ID+x} ]; then + ViashError '--input_id' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_INPUT_R1+x} ]; then + ViashError '--input_r1' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_REFERENCE_INDEX+x} ]; then + ViashError '--reference_index' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_REFERENCE_GTF+x} ]; then + ViashError '--reference_gtf' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_RUN_HTSEQ_COUNT+x} ]; then + VIASH_PAR_RUN_HTSEQ_COUNT="true" +fi +if [ -z ${VIASH_PAR_RUN_MULTIQC+x} ]; then + VIASH_PAR_RUN_MULTIQC="true" +fi +if [ -z ${VIASH_PAR_MIN_SUCCESS_RATE+x} ]; then + VIASH_PAR_MIN_SUCCESS_RATE="0.5" +fi +if [ -z ${VIASH_PAR_STRANDED+x} ]; then + VIASH_PAR_STRANDED="yes" +fi +if [ -z ${VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY+x} ]; then + VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY="10" +fi +if [ -z ${VIASH_PAR_ADD_CHROMOSOME_INFO+x} ]; then + VIASH_PAR_ADD_CHROMOSOME_INFO="false" +fi +if [ -z ${VIASH_PAR_MODE+x} ]; then + VIASH_PAR_MODE="union" +fi +if [ -z ${VIASH_PAR_NON_UNIQUE+x} ]; then + VIASH_PAR_NON_UNIQUE="none" +fi +if [ -z ${VIASH_PAR_COUNTS_OUTPUT_SPARSE+x} ]; then + VIASH_PAR_COUNTS_OUTPUT_SPARSE="false" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT_R1" ]; then + IFS=';' + set -f + for file in $VIASH_PAR_INPUT_R1; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi +if [ ! -z "$VIASH_PAR_INPUT_R2" ]; then + IFS=';' + set -f + for file in $VIASH_PAR_INPUT_R2; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi +if [ ! -z "$VIASH_PAR_REFERENCE_INDEX" ] && [ ! -e "$VIASH_PAR_REFERENCE_INDEX" ]; then + ViashError "Input file '$VIASH_PAR_REFERENCE_INDEX' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_REFERENCE_GTF" ] && [ ! -e "$VIASH_PAR_REFERENCE_GTF" ]; then + ViashError "Input file '$VIASH_PAR_REFERENCE_GTF' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_GENOMEFASTAFILES" ]; then + IFS=';' + set -f + for file in $VIASH_PAR_GENOMEFASTAFILES; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi +if [ ! -z "$VIASH_PAR_SJDBGTFFILE" ] && [ ! -e "$VIASH_PAR_SJDBGTFFILE" ]; then + ViashError "Input file '$VIASH_PAR_SJDBGTFFILE' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_READFILESMANIFEST" ] && [ ! -e "$VIASH_PAR_READFILESMANIFEST" ]; then + ViashError "Input file '$VIASH_PAR_READFILESMANIFEST' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_RUN_HTSEQ_COUNT" ]]; then + if ! [[ "$VIASH_PAR_RUN_HTSEQ_COUNT" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--run_htseq_count' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_RUN_MULTIQC" ]]; then + if ! [[ "$VIASH_PAR_RUN_MULTIQC" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--run_multiqc' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MIN_SUCCESS_RATE" ]]; then + if ! [[ "$VIASH_PAR_MIN_SUCCESS_RATE" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--min_success_rate' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_RUNRNGSEED" ]]; then + if ! [[ "$VIASH_PAR_RUNRNGSEED" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--runRNGseed' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SJDBOVERHANG" ]]; then + if ! [[ "$VIASH_PAR_SJDBOVERHANG" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--sjdbOverhang' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SJDBSCORE" ]]; then + if ! [[ "$VIASH_PAR_SJDBSCORE" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--sjdbScore' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_READMAPNUMBER" ]]; then + if ! [[ "$VIASH_PAR_READMAPNUMBER" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--readMapNumber' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_READQUALITYSCOREBASE" ]]; then + if ! [[ "$VIASH_PAR_READQUALITYSCOREBASE" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--readQualityScoreBase' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [ -n "$VIASH_PAR_CLIP3PNBASES" ]; then + IFS=';' + set -f + for val in $VIASH_PAR_CLIP3PNBASES; do + if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--clip3pNbases' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + done + set +f + unset IFS +fi + +if [ -n "$VIASH_PAR_CLIP3PADAPTERMMP" ]; then + IFS=';' + set -f + for val in $VIASH_PAR_CLIP3PADAPTERMMP; do + if ! [[ "${val}" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--clip3pAdapterMMp' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi + done + set +f + unset IFS +fi + +if [ -n "$VIASH_PAR_CLIP3PAFTERADAPTERNBASES" ]; then + IFS=';' + set -f + for val in $VIASH_PAR_CLIP3PAFTERADAPTERNBASES; do + if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--clip3pAfterAdapterNbases' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + done + set +f + unset IFS +fi + +if [ -n "$VIASH_PAR_CLIP5PNBASES" ]; then + IFS=';' + set -f + for val in $VIASH_PAR_CLIP5PNBASES; do + if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--clip5pNbases' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + done + set +f + unset IFS +fi + +if [[ -n "$VIASH_PAR_LIMITGENOMEGENERATERAM" ]]; then + if ! [[ "$VIASH_PAR_LIMITGENOMEGENERATERAM" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--limitGenomeGenerateRAM' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [ -n "$VIASH_PAR_LIMITIOBUFFERSIZE" ]; then + IFS=';' + set -f + for val in $VIASH_PAR_LIMITIOBUFFERSIZE; do + if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--limitIObufferSize' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi + done + set +f + unset IFS +fi + +if [[ -n "$VIASH_PAR_LIMITOUTSAMONEREADBYTES" ]]; then + if ! [[ "$VIASH_PAR_LIMITOUTSAMONEREADBYTES" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--limitOutSAMoneReadBytes' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_LIMITOUTSJONEREAD" ]]; then + if ! [[ "$VIASH_PAR_LIMITOUTSJONEREAD" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--limitOutSJoneRead' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_LIMITOUTSJCOLLAPSED" ]]; then + if ! [[ "$VIASH_PAR_LIMITOUTSJCOLLAPSED" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--limitOutSJcollapsed' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_LIMITBAMSORTRAM" ]]; then + if ! [[ "$VIASH_PAR_LIMITBAMSORTRAM" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--limitBAMsortRAM' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_LIMITSJDBINSERTNSJ" ]]; then + if ! [[ "$VIASH_PAR_LIMITSJDBINSERTNSJ" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--limitSjdbInsertNsj' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_LIMITNREADSSOFT" ]]; then + if ! [[ "$VIASH_PAR_LIMITNREADSSOFT" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--limitNreadsSoft' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTQSCONVERSIONADD" ]]; then + if ! [[ "$VIASH_PAR_OUTQSCONVERSIONADD" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outQSconversionAdd' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTSAMATTRIHSTART" ]]; then + if ! [[ "$VIASH_PAR_OUTSAMATTRIHSTART" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outSAMattrIHstart' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTSAMMAPQUNIQUE" ]]; then + if ! [[ "$VIASH_PAR_OUTSAMMAPQUNIQUE" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outSAMmapqUnique' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTSAMFLAGOR" ]]; then + if ! [[ "$VIASH_PAR_OUTSAMFLAGOR" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outSAMflagOR' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTSAMFLAGAND" ]]; then + if ! [[ "$VIASH_PAR_OUTSAMFLAGAND" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outSAMflagAND' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTSAMMULTNMAX" ]]; then + if ! [[ "$VIASH_PAR_OUTSAMMULTNMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outSAMmultNmax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTSAMTLEN" ]]; then + if ! [[ "$VIASH_PAR_OUTSAMTLEN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outSAMtlen' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTBAMCOMPRESSION" ]]; then + if ! [[ "$VIASH_PAR_OUTBAMCOMPRESSION" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outBAMcompression' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTBAMSORTINGTHREADN" ]]; then + if ! [[ "$VIASH_PAR_OUTBAMSORTINGTHREADN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outBAMsortingThreadN' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTBAMSORTINGBINSN" ]]; then + if ! [[ "$VIASH_PAR_OUTBAMSORTINGBINSN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outBAMsortingBinsN' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN" ]]; then + if ! [[ "$VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--bamRemoveDuplicatesMate2basesN' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE" ]]; then + if ! [[ "$VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outFilterMultimapScoreRange' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTFILTERMULTIMAPNMAX" ]]; then + if ! [[ "$VIASH_PAR_OUTFILTERMULTIMAPNMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outFilterMultimapNmax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTFILTERMISMATCHNMAX" ]]; then + if ! [[ "$VIASH_PAR_OUTFILTERMISMATCHNMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outFilterMismatchNmax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX" ]]; then + if ! [[ "$VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--outFilterMismatchNoverLmax' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX" ]]; then + if ! [[ "$VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--outFilterMismatchNoverReadLmax' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTFILTERSCOREMIN" ]]; then + if ! [[ "$VIASH_PAR_OUTFILTERSCOREMIN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outFilterScoreMin' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTFILTERSCOREMINOVERLREAD" ]]; then + if ! [[ "$VIASH_PAR_OUTFILTERSCOREMINOVERLREAD" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--outFilterScoreMinOverLread' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTFILTERMATCHNMIN" ]]; then + if ! [[ "$VIASH_PAR_OUTFILTERMATCHNMIN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outFilterMatchNmin' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD" ]]; then + if ! [[ "$VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--outFilterMatchNminOverLread' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [ -n "$VIASH_PAR_OUTSJFILTEROVERHANGMIN" ]; then + IFS=';' + set -f + for val in $VIASH_PAR_OUTSJFILTEROVERHANGMIN; do + if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outSJfilterOverhangMin' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + done + set +f + unset IFS +fi + +if [ -n "$VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN" ]; then + IFS=';' + set -f + for val in $VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN; do + if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outSJfilterCountUniqueMin' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + done + set +f + unset IFS +fi + +if [ -n "$VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN" ]; then + IFS=';' + set -f + for val in $VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN; do + if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outSJfilterCountTotalMin' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + done + set +f + unset IFS +fi + +if [ -n "$VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN" ]; then + IFS=';' + set -f + for val in $VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN; do + if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outSJfilterDistToOtherSJmin' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + done + set +f + unset IFS +fi + +if [ -n "$VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN" ]; then + IFS=';' + set -f + for val in $VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN; do + if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outSJfilterIntronMaxVsReadN' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + done + set +f + unset IFS +fi + +if [[ -n "$VIASH_PAR_SCOREGAP" ]]; then + if ! [[ "$VIASH_PAR_SCOREGAP" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--scoreGap' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SCOREGAPNONCAN" ]]; then + if ! [[ "$VIASH_PAR_SCOREGAPNONCAN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--scoreGapNoncan' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SCOREGAPGCAG" ]]; then + if ! [[ "$VIASH_PAR_SCOREGAPGCAG" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--scoreGapGCAG' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SCOREGAPATAC" ]]; then + if ! [[ "$VIASH_PAR_SCOREGAPATAC" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--scoreGapATAC' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE" ]]; then + if ! [[ "$VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--scoreGenomicLengthLog2scale' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SCOREDELOPEN" ]]; then + if ! [[ "$VIASH_PAR_SCOREDELOPEN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--scoreDelOpen' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SCOREDELBASE" ]]; then + if ! [[ "$VIASH_PAR_SCOREDELBASE" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--scoreDelBase' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SCOREINSOPEN" ]]; then + if ! [[ "$VIASH_PAR_SCOREINSOPEN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--scoreInsOpen' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SCOREINSBASE" ]]; then + if ! [[ "$VIASH_PAR_SCOREINSBASE" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--scoreInsBase' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SCORESTITCHSJSHIFT" ]]; then + if ! [[ "$VIASH_PAR_SCORESTITCHSJSHIFT" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--scoreStitchSJshift' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SEEDSEARCHSTARTLMAX" ]]; then + if ! [[ "$VIASH_PAR_SEEDSEARCHSTARTLMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--seedSearchStartLmax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD" ]]; then + if ! [[ "$VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--seedSearchStartLmaxOverLread' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SEEDSEARCHLMAX" ]]; then + if ! [[ "$VIASH_PAR_SEEDSEARCHLMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--seedSearchLmax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SEEDMULTIMAPNMAX" ]]; then + if ! [[ "$VIASH_PAR_SEEDMULTIMAPNMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--seedMultimapNmax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SEEDPERREADNMAX" ]]; then + if ! [[ "$VIASH_PAR_SEEDPERREADNMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--seedPerReadNmax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SEEDPERWINDOWNMAX" ]]; then + if ! [[ "$VIASH_PAR_SEEDPERWINDOWNMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--seedPerWindowNmax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SEEDNONELOCIPERWINDOW" ]]; then + if ! [[ "$VIASH_PAR_SEEDNONELOCIPERWINDOW" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--seedNoneLociPerWindow' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SEEDSPLITMIN" ]]; then + if ! [[ "$VIASH_PAR_SEEDSPLITMIN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--seedSplitMin' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SEEDMAPMIN" ]]; then + if ! [[ "$VIASH_PAR_SEEDMAPMIN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--seedMapMin' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_ALIGNINTRONMIN" ]]; then + if ! [[ "$VIASH_PAR_ALIGNINTRONMIN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--alignIntronMin' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_ALIGNINTRONMAX" ]]; then + if ! [[ "$VIASH_PAR_ALIGNINTRONMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--alignIntronMax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_ALIGNMATESGAPMAX" ]]; then + if ! [[ "$VIASH_PAR_ALIGNMATESGAPMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--alignMatesGapMax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_ALIGNSJOVERHANGMIN" ]]; then + if ! [[ "$VIASH_PAR_ALIGNSJOVERHANGMIN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--alignSJoverhangMin' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [ -n "$VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX" ]; then + IFS=';' + set -f + for val in $VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX; do + if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--alignSJstitchMismatchNmax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + done + set +f + unset IFS +fi + +if [[ -n "$VIASH_PAR_ALIGNSJDBOVERHANGMIN" ]]; then + if ! [[ "$VIASH_PAR_ALIGNSJDBOVERHANGMIN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--alignSJDBoverhangMin' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN" ]]; then + if ! [[ "$VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--alignSplicedMateMapLmin' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE" ]]; then + if ! [[ "$VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--alignSplicedMateMapLminOverLmate' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_ALIGNWINDOWSPERREADNMAX" ]]; then + if ! [[ "$VIASH_PAR_ALIGNWINDOWSPERREADNMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--alignWindowsPerReadNmax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX" ]]; then + if ! [[ "$VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--alignTranscriptsPerWindowNmax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX" ]]; then + if ! [[ "$VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--alignTranscriptsPerReadNmax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_PEOVERLAPNBASESMIN" ]]; then + if ! [[ "$VIASH_PAR_PEOVERLAPNBASESMIN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--peOverlapNbasesMin' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_PEOVERLAPMMP" ]]; then + if ! [[ "$VIASH_PAR_PEOVERLAPMMP" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--peOverlapMMp' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_WINANCHORMULTIMAPNMAX" ]]; then + if ! [[ "$VIASH_PAR_WINANCHORMULTIMAPNMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--winAnchorMultimapNmax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_WINBINNBITS" ]]; then + if ! [[ "$VIASH_PAR_WINBINNBITS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--winBinNbits' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_WINANCHORDISTNBINS" ]]; then + if ! [[ "$VIASH_PAR_WINANCHORDISTNBINS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--winAnchorDistNbins' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_WINFLANKNBINS" ]]; then + if ! [[ "$VIASH_PAR_WINFLANKNBINS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--winFlankNbins' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_WINREADCOVERAGERELATIVEMIN" ]]; then + if ! [[ "$VIASH_PAR_WINREADCOVERAGERELATIVEMIN" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--winReadCoverageRelativeMin' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_WINREADCOVERAGEBASESMIN" ]]; then + if ! [[ "$VIASH_PAR_WINREADCOVERAGEBASESMIN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--winReadCoverageBasesMin' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_CHIMSEGMENTMIN" ]]; then + if ! [[ "$VIASH_PAR_CHIMSEGMENTMIN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--chimSegmentMin' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_CHIMSCOREMIN" ]]; then + if ! [[ "$VIASH_PAR_CHIMSCOREMIN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--chimScoreMin' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_CHIMSCOREDROPMAX" ]]; then + if ! [[ "$VIASH_PAR_CHIMSCOREDROPMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--chimScoreDropMax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_CHIMSCORESEPARATION" ]]; then + if ! [[ "$VIASH_PAR_CHIMSCORESEPARATION" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--chimScoreSeparation' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG" ]]; then + if ! [[ "$VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--chimScoreJunctionNonGTAG' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_CHIMJUNCTIONOVERHANGMIN" ]]; then + if ! [[ "$VIASH_PAR_CHIMJUNCTIONOVERHANGMIN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--chimJunctionOverhangMin' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_CHIMSEGMENTREADGAPMAX" ]]; then + if ! [[ "$VIASH_PAR_CHIMSEGMENTREADGAPMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--chimSegmentReadGapMax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_CHIMMAINSEGMENTMULTNMAX" ]]; then + if ! [[ "$VIASH_PAR_CHIMMAINSEGMENTMULTNMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--chimMainSegmentMultNmax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_CHIMMULTIMAPNMAX" ]]; then + if ! [[ "$VIASH_PAR_CHIMMULTIMAPNMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--chimMultimapNmax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_CHIMMULTIMAPSCORERANGE" ]]; then + if ! [[ "$VIASH_PAR_CHIMMULTIMAPSCORERANGE" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--chimMultimapScoreRange' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_CHIMNONCHIMSCOREDROPMIN" ]]; then + if ! [[ "$VIASH_PAR_CHIMNONCHIMSCOREDROPMIN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--chimNonchimScoreDropMin' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_CHIMOUTJUNCTIONFORMAT" ]]; then + if ! [[ "$VIASH_PAR_CHIMOUTJUNCTIONFORMAT" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--chimOutJunctionFormat' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION" ]]; then + if ! [[ "$VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--quantTranscriptomeBAMcompression' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_TWOPASS1READSN" ]]; then + if ! [[ "$VIASH_PAR_TWOPASS1READSN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--twopass1readsN' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SOLOCBSTART" ]]; then + if ! [[ "$VIASH_PAR_SOLOCBSTART" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--soloCBstart' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SOLOCBLEN" ]]; then + if ! [[ "$VIASH_PAR_SOLOCBLEN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--soloCBlen' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SOLOUMISTART" ]]; then + if ! [[ "$VIASH_PAR_SOLOUMISTART" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--soloUMIstart' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SOLOUMILEN" ]]; then + if ! [[ "$VIASH_PAR_SOLOUMILEN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--soloUMIlen' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SOLOBARCODEREADLENGTH" ]]; then + if ! [[ "$VIASH_PAR_SOLOBARCODEREADLENGTH" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--soloBarcodeReadLength' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SOLOBARCODEMATE" ]]; then + if ! [[ "$VIASH_PAR_SOLOBARCODEMATE" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--soloBarcodeMate' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SOLOADAPTERMISMATCHESNMAX" ]]; then + if ! [[ "$VIASH_PAR_SOLOADAPTERMISMATCHESNMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--soloAdapterMismatchesNmax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY" ]]; then + if ! [[ "$VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--minimum_alignment_quality' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_ADD_CHROMOSOME_INFO" ]]; then + if ! [[ "$VIASH_PAR_ADD_CHROMOSOME_INFO" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--add_chromosome_info' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_COUNTS_OUTPUT_SPARSE" ]]; then + if ! [[ "$VIASH_PAR_COUNTS_OUTPUT_SPARSE" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--counts_output_sparse' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_STRANDED" ]; then + VIASH_PAR_STRANDED_CHOICES=("yes:no:reverse") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_STRANDED_CHOICES[*]}:" =~ ":$VIASH_PAR_STRANDED:" ]]; then + ViashError '--stranded' specified value of \'$VIASH_PAR_STRANDED\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +if [ ! -z "$VIASH_PAR_MODE" ]; then + VIASH_PAR_MODE_CHOICES=("union:intersection-strict:intersection-nonempty") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_MODE_CHOICES[*]}:" =~ ":$VIASH_PAR_MODE:" ]]; then + ViashError '--mode' specified value of \'$VIASH_PAR_MODE\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +if [ ! -z "$VIASH_PAR_NON_UNIQUE" ]; then + VIASH_PAR_NON_UNIQUE_CHOICES=("none:all:fraction:random") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_NON_UNIQUE_CHOICES[*]}:" =~ ":$VIASH_PAR_NON_UNIQUE:" ]]; then + ViashError '--non_unique' specified value of \'$VIASH_PAR_NON_UNIQUE\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +if [ ! -z "$VIASH_PAR_SECONDARY_ALIGNMENTS" ]; then + VIASH_PAR_SECONDARY_ALIGNMENTS_CHOICES=("score:ignore") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_SECONDARY_ALIGNMENTS_CHOICES[*]}:" =~ ":$VIASH_PAR_SECONDARY_ALIGNMENTS:" ]]; then + ViashError '--secondary_alignments' specified value of \'$VIASH_PAR_SECONDARY_ALIGNMENTS\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +if [ ! -z "$VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS" ]; then + VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS_CHOICES=("score:ignore") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS_CHOICES[*]}:" =~ ":$VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS:" ]]; then + ViashError '--supplementary_alignments' specified value of \'$VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT_R1" ]; then + VIASH_TEST_INPUT_R1=() + IFS=';' + for var in $VIASH_PAR_INPUT_R1; do + unset IFS + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$var")" ) + var=$(ViashAutodetectMount "$var") + VIASH_TEST_INPUT_R1+=( "$var" ) + done + VIASH_PAR_INPUT_R1=$(IFS=';' ; echo "${VIASH_TEST_INPUT_R1[*]}") +fi +if [ ! -z "$VIASH_PAR_INPUT_R2" ]; then + VIASH_TEST_INPUT_R2=() + IFS=';' + for var in $VIASH_PAR_INPUT_R2; do + unset IFS + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$var")" ) + var=$(ViashAutodetectMount "$var") + VIASH_TEST_INPUT_R2+=( "$var" ) + done + VIASH_PAR_INPUT_R2=$(IFS=';' ; echo "${VIASH_TEST_INPUT_R2[*]}") +fi +if [ ! -z "$VIASH_PAR_REFERENCE_INDEX" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_REFERENCE_INDEX")" ) + VIASH_PAR_REFERENCE_INDEX=$(ViashAutodetectMount "$VIASH_PAR_REFERENCE_INDEX") +fi +if [ ! -z "$VIASH_PAR_REFERENCE_GTF" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_REFERENCE_GTF")" ) + VIASH_PAR_REFERENCE_GTF=$(ViashAutodetectMount "$VIASH_PAR_REFERENCE_GTF") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_PAR_GENOMEFASTAFILES" ]; then + VIASH_TEST_GENOMEFASTAFILES=() + IFS=';' + for var in $VIASH_PAR_GENOMEFASTAFILES; do + unset IFS + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$var")" ) + var=$(ViashAutodetectMount "$var") + VIASH_TEST_GENOMEFASTAFILES+=( "$var" ) + done + VIASH_PAR_GENOMEFASTAFILES=$(IFS=';' ; echo "${VIASH_TEST_GENOMEFASTAFILES[*]}") +fi +if [ ! -z "$VIASH_PAR_SJDBGTFFILE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_SJDBGTFFILE")" ) + VIASH_PAR_SJDBGTFFILE=$(ViashAutodetectMount "$VIASH_PAR_SJDBGTFFILE") +fi +if [ ! -z "$VIASH_PAR_READFILESMANIFEST" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_READFILESMANIFEST")" ) + VIASH_PAR_READFILESMANIFEST=$(ViashAutodetectMount "$VIASH_PAR_READFILESMANIFEST") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/mapping_multi_star:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/mapping_multi_star:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/mapping_multi_star:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-multi_star-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +from typing import Any, Dict, List, Tuple +import math +import tempfile +import subprocess +import tarfile +import gzip +import shutil +from pathlib import Path +import yaml +import pandas as pd +from multiprocess import Pool +import gtfparse + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input_id': $( if [ ! -z ${VIASH_PAR_INPUT_ID+x} ]; then echo "r'${VIASH_PAR_INPUT_ID//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'input_r1': $( if [ ! -z ${VIASH_PAR_INPUT_R1+x} ]; then echo "r'${VIASH_PAR_INPUT_R1//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'input_r2': $( if [ ! -z ${VIASH_PAR_INPUT_R2+x} ]; then echo "r'${VIASH_PAR_INPUT_R2//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'reference_index': $( if [ ! -z ${VIASH_PAR_REFERENCE_INDEX+x} ]; then echo "r'${VIASH_PAR_REFERENCE_INDEX//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'reference_gtf': $( if [ ! -z ${VIASH_PAR_REFERENCE_GTF+x} ]; then echo "r'${VIASH_PAR_REFERENCE_GTF//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'run_htseq_count': $( if [ ! -z ${VIASH_PAR_RUN_HTSEQ_COUNT+x} ]; then echo "r'${VIASH_PAR_RUN_HTSEQ_COUNT//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), + 'run_multiqc': $( if [ ! -z ${VIASH_PAR_RUN_MULTIQC+x} ]; then echo "r'${VIASH_PAR_RUN_MULTIQC//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), + 'min_success_rate': $( if [ ! -z ${VIASH_PAR_MIN_SUCCESS_RATE+x} ]; then echo "float(r'${VIASH_PAR_MIN_SUCCESS_RATE//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'runRNGseed': $( if [ ! -z ${VIASH_PAR_RUNRNGSEED+x} ]; then echo "int(r'${VIASH_PAR_RUNRNGSEED//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'genomeFastaFiles': $( if [ ! -z ${VIASH_PAR_GENOMEFASTAFILES+x} ]; then echo "r'${VIASH_PAR_GENOMEFASTAFILES//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'sjdbFileChrStartEnd': $( if [ ! -z ${VIASH_PAR_SJDBFILECHRSTARTEND+x} ]; then echo "r'${VIASH_PAR_SJDBFILECHRSTARTEND//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'sjdbGTFfile': $( if [ ! -z ${VIASH_PAR_SJDBGTFFILE+x} ]; then echo "r'${VIASH_PAR_SJDBGTFFILE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'sjdbGTFchrPrefix': $( if [ ! -z ${VIASH_PAR_SJDBGTFCHRPREFIX+x} ]; then echo "r'${VIASH_PAR_SJDBGTFCHRPREFIX//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'sjdbGTFfeatureExon': $( if [ ! -z ${VIASH_PAR_SJDBGTFFEATUREEXON+x} ]; then echo "r'${VIASH_PAR_SJDBGTFFEATUREEXON//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'sjdbGTFtagExonParentTranscript': $( if [ ! -z ${VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT+x} ]; then echo "r'${VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'sjdbGTFtagExonParentGene': $( if [ ! -z ${VIASH_PAR_SJDBGTFTAGEXONPARENTGENE+x} ]; then echo "r'${VIASH_PAR_SJDBGTFTAGEXONPARENTGENE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'sjdbGTFtagExonParentGeneName': $( if [ ! -z ${VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME+x} ]; then echo "r'${VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'sjdbGTFtagExonParentGeneType': $( if [ ! -z ${VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE+x} ]; then echo "r'${VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'sjdbOverhang': $( if [ ! -z ${VIASH_PAR_SJDBOVERHANG+x} ]; then echo "int(r'${VIASH_PAR_SJDBOVERHANG//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'sjdbScore': $( if [ ! -z ${VIASH_PAR_SJDBSCORE+x} ]; then echo "int(r'${VIASH_PAR_SJDBSCORE//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'sjdbInsertSave': $( if [ ! -z ${VIASH_PAR_SJDBINSERTSAVE+x} ]; then echo "r'${VIASH_PAR_SJDBINSERTSAVE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'varVCFfile': $( if [ ! -z ${VIASH_PAR_VARVCFFILE+x} ]; then echo "r'${VIASH_PAR_VARVCFFILE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'readFilesType': $( if [ ! -z ${VIASH_PAR_READFILESTYPE+x} ]; then echo "r'${VIASH_PAR_READFILESTYPE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'readFilesSAMattrKeep': $( if [ ! -z ${VIASH_PAR_READFILESSAMATTRKEEP+x} ]; then echo "r'${VIASH_PAR_READFILESSAMATTRKEEP//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'readFilesManifest': $( if [ ! -z ${VIASH_PAR_READFILESMANIFEST+x} ]; then echo "r'${VIASH_PAR_READFILESMANIFEST//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'readFilesPrefix': $( if [ ! -z ${VIASH_PAR_READFILESPREFIX+x} ]; then echo "r'${VIASH_PAR_READFILESPREFIX//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'readFilesCommand': $( if [ ! -z ${VIASH_PAR_READFILESCOMMAND+x} ]; then echo "r'${VIASH_PAR_READFILESCOMMAND//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'readMapNumber': $( if [ ! -z ${VIASH_PAR_READMAPNUMBER+x} ]; then echo "int(r'${VIASH_PAR_READMAPNUMBER//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'readMatesLengthsIn': $( if [ ! -z ${VIASH_PAR_READMATESLENGTHSIN+x} ]; then echo "r'${VIASH_PAR_READMATESLENGTHSIN//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'readNameSeparator': $( if [ ! -z ${VIASH_PAR_READNAMESEPARATOR+x} ]; then echo "r'${VIASH_PAR_READNAMESEPARATOR//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'readQualityScoreBase': $( if [ ! -z ${VIASH_PAR_READQUALITYSCOREBASE+x} ]; then echo "int(r'${VIASH_PAR_READQUALITYSCOREBASE//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'clipAdapterType': $( if [ ! -z ${VIASH_PAR_CLIPADAPTERTYPE+x} ]; then echo "r'${VIASH_PAR_CLIPADAPTERTYPE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'clip3pNbases': $( if [ ! -z ${VIASH_PAR_CLIP3PNBASES+x} ]; then echo "list(map(int, r'${VIASH_PAR_CLIP3PNBASES//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), + 'clip3pAdapterSeq': $( if [ ! -z ${VIASH_PAR_CLIP3PADAPTERSEQ+x} ]; then echo "r'${VIASH_PAR_CLIP3PADAPTERSEQ//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'clip3pAdapterMMp': $( if [ ! -z ${VIASH_PAR_CLIP3PADAPTERMMP+x} ]; then echo "list(map(float, r'${VIASH_PAR_CLIP3PADAPTERMMP//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), + 'clip3pAfterAdapterNbases': $( if [ ! -z ${VIASH_PAR_CLIP3PAFTERADAPTERNBASES+x} ]; then echo "list(map(int, r'${VIASH_PAR_CLIP3PAFTERADAPTERNBASES//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), + 'clip5pNbases': $( if [ ! -z ${VIASH_PAR_CLIP5PNBASES+x} ]; then echo "list(map(int, r'${VIASH_PAR_CLIP5PNBASES//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), + 'limitGenomeGenerateRAM': $( if [ ! -z ${VIASH_PAR_LIMITGENOMEGENERATERAM+x} ]; then echo "int(r'${VIASH_PAR_LIMITGENOMEGENERATERAM//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'limitIObufferSize': $( if [ ! -z ${VIASH_PAR_LIMITIOBUFFERSIZE+x} ]; then echo "list(map(int, r'${VIASH_PAR_LIMITIOBUFFERSIZE//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), + 'limitOutSAMoneReadBytes': $( if [ ! -z ${VIASH_PAR_LIMITOUTSAMONEREADBYTES+x} ]; then echo "int(r'${VIASH_PAR_LIMITOUTSAMONEREADBYTES//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'limitOutSJoneRead': $( if [ ! -z ${VIASH_PAR_LIMITOUTSJONEREAD+x} ]; then echo "int(r'${VIASH_PAR_LIMITOUTSJONEREAD//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'limitOutSJcollapsed': $( if [ ! -z ${VIASH_PAR_LIMITOUTSJCOLLAPSED+x} ]; then echo "int(r'${VIASH_PAR_LIMITOUTSJCOLLAPSED//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'limitBAMsortRAM': $( if [ ! -z ${VIASH_PAR_LIMITBAMSORTRAM+x} ]; then echo "int(r'${VIASH_PAR_LIMITBAMSORTRAM//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'limitSjdbInsertNsj': $( if [ ! -z ${VIASH_PAR_LIMITSJDBINSERTNSJ+x} ]; then echo "int(r'${VIASH_PAR_LIMITSJDBINSERTNSJ//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'limitNreadsSoft': $( if [ ! -z ${VIASH_PAR_LIMITNREADSSOFT+x} ]; then echo "int(r'${VIASH_PAR_LIMITNREADSSOFT//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outTmpKeep': $( if [ ! -z ${VIASH_PAR_OUTTMPKEEP+x} ]; then echo "r'${VIASH_PAR_OUTTMPKEEP//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'outStd': $( if [ ! -z ${VIASH_PAR_OUTSTD+x} ]; then echo "r'${VIASH_PAR_OUTSTD//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'outReadsUnmapped': $( if [ ! -z ${VIASH_PAR_OUTREADSUNMAPPED+x} ]; then echo "r'${VIASH_PAR_OUTREADSUNMAPPED//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'outQSconversionAdd': $( if [ ! -z ${VIASH_PAR_OUTQSCONVERSIONADD+x} ]; then echo "int(r'${VIASH_PAR_OUTQSCONVERSIONADD//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outMultimapperOrder': $( if [ ! -z ${VIASH_PAR_OUTMULTIMAPPERORDER+x} ]; then echo "r'${VIASH_PAR_OUTMULTIMAPPERORDER//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'outSAMmode': $( if [ ! -z ${VIASH_PAR_OUTSAMMODE+x} ]; then echo "r'${VIASH_PAR_OUTSAMMODE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'outSAMstrandField': $( if [ ! -z ${VIASH_PAR_OUTSAMSTRANDFIELD+x} ]; then echo "r'${VIASH_PAR_OUTSAMSTRANDFIELD//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'outSAMattributes': $( if [ ! -z ${VIASH_PAR_OUTSAMATTRIBUTES+x} ]; then echo "r'${VIASH_PAR_OUTSAMATTRIBUTES//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'outSAMattrIHstart': $( if [ ! -z ${VIASH_PAR_OUTSAMATTRIHSTART+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMATTRIHSTART//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outSAMunmapped': $( if [ ! -z ${VIASH_PAR_OUTSAMUNMAPPED+x} ]; then echo "r'${VIASH_PAR_OUTSAMUNMAPPED//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'outSAMorder': $( if [ ! -z ${VIASH_PAR_OUTSAMORDER+x} ]; then echo "r'${VIASH_PAR_OUTSAMORDER//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'outSAMprimaryFlag': $( if [ ! -z ${VIASH_PAR_OUTSAMPRIMARYFLAG+x} ]; then echo "r'${VIASH_PAR_OUTSAMPRIMARYFLAG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'outSAMreadID': $( if [ ! -z ${VIASH_PAR_OUTSAMREADID+x} ]; then echo "r'${VIASH_PAR_OUTSAMREADID//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'outSAMmapqUnique': $( if [ ! -z ${VIASH_PAR_OUTSAMMAPQUNIQUE+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMMAPQUNIQUE//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outSAMflagOR': $( if [ ! -z ${VIASH_PAR_OUTSAMFLAGOR+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMFLAGOR//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outSAMflagAND': $( if [ ! -z ${VIASH_PAR_OUTSAMFLAGAND+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMFLAGAND//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outSAMattrRGline': $( if [ ! -z ${VIASH_PAR_OUTSAMATTRRGLINE+x} ]; then echo "r'${VIASH_PAR_OUTSAMATTRRGLINE//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'outSAMheaderHD': $( if [ ! -z ${VIASH_PAR_OUTSAMHEADERHD+x} ]; then echo "r'${VIASH_PAR_OUTSAMHEADERHD//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'outSAMheaderPG': $( if [ ! -z ${VIASH_PAR_OUTSAMHEADERPG+x} ]; then echo "r'${VIASH_PAR_OUTSAMHEADERPG//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'outSAMheaderCommentFile': $( if [ ! -z ${VIASH_PAR_OUTSAMHEADERCOMMENTFILE+x} ]; then echo "r'${VIASH_PAR_OUTSAMHEADERCOMMENTFILE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'outSAMfilter': $( if [ ! -z ${VIASH_PAR_OUTSAMFILTER+x} ]; then echo "r'${VIASH_PAR_OUTSAMFILTER//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'outSAMmultNmax': $( if [ ! -z ${VIASH_PAR_OUTSAMMULTNMAX+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMMULTNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outSAMtlen': $( if [ ! -z ${VIASH_PAR_OUTSAMTLEN+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMTLEN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outBAMcompression': $( if [ ! -z ${VIASH_PAR_OUTBAMCOMPRESSION+x} ]; then echo "int(r'${VIASH_PAR_OUTBAMCOMPRESSION//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outBAMsortingThreadN': $( if [ ! -z ${VIASH_PAR_OUTBAMSORTINGTHREADN+x} ]; then echo "int(r'${VIASH_PAR_OUTBAMSORTINGTHREADN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outBAMsortingBinsN': $( if [ ! -z ${VIASH_PAR_OUTBAMSORTINGBINSN+x} ]; then echo "int(r'${VIASH_PAR_OUTBAMSORTINGBINSN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'bamRemoveDuplicatesType': $( if [ ! -z ${VIASH_PAR_BAMREMOVEDUPLICATESTYPE+x} ]; then echo "r'${VIASH_PAR_BAMREMOVEDUPLICATESTYPE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'bamRemoveDuplicatesMate2basesN': $( if [ ! -z ${VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN+x} ]; then echo "int(r'${VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outWigType': $( if [ ! -z ${VIASH_PAR_OUTWIGTYPE+x} ]; then echo "r'${VIASH_PAR_OUTWIGTYPE//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'outWigStrand': $( if [ ! -z ${VIASH_PAR_OUTWIGSTRAND+x} ]; then echo "r'${VIASH_PAR_OUTWIGSTRAND//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'outWigReferencesPrefix': $( if [ ! -z ${VIASH_PAR_OUTWIGREFERENCESPREFIX+x} ]; then echo "r'${VIASH_PAR_OUTWIGREFERENCESPREFIX//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'outWigNorm': $( if [ ! -z ${VIASH_PAR_OUTWIGNORM+x} ]; then echo "r'${VIASH_PAR_OUTWIGNORM//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'outFilterType': $( if [ ! -z ${VIASH_PAR_OUTFILTERTYPE+x} ]; then echo "r'${VIASH_PAR_OUTFILTERTYPE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'outFilterMultimapScoreRange': $( if [ ! -z ${VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outFilterMultimapNmax': $( if [ ! -z ${VIASH_PAR_OUTFILTERMULTIMAPNMAX+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERMULTIMAPNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outFilterMismatchNmax': $( if [ ! -z ${VIASH_PAR_OUTFILTERMISMATCHNMAX+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERMISMATCHNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outFilterMismatchNoverLmax': $( if [ ! -z ${VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX+x} ]; then echo "float(r'${VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outFilterMismatchNoverReadLmax': $( if [ ! -z ${VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX+x} ]; then echo "float(r'${VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outFilterScoreMin': $( if [ ! -z ${VIASH_PAR_OUTFILTERSCOREMIN+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERSCOREMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outFilterScoreMinOverLread': $( if [ ! -z ${VIASH_PAR_OUTFILTERSCOREMINOVERLREAD+x} ]; then echo "float(r'${VIASH_PAR_OUTFILTERSCOREMINOVERLREAD//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outFilterMatchNmin': $( if [ ! -z ${VIASH_PAR_OUTFILTERMATCHNMIN+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERMATCHNMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outFilterMatchNminOverLread': $( if [ ! -z ${VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD+x} ]; then echo "float(r'${VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outFilterIntronMotifs': $( if [ ! -z ${VIASH_PAR_OUTFILTERINTRONMOTIFS+x} ]; then echo "r'${VIASH_PAR_OUTFILTERINTRONMOTIFS//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'outFilterIntronStrands': $( if [ ! -z ${VIASH_PAR_OUTFILTERINTRONSTRANDS+x} ]; then echo "r'${VIASH_PAR_OUTFILTERINTRONSTRANDS//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'outSJtype': $( if [ ! -z ${VIASH_PAR_OUTSJTYPE+x} ]; then echo "r'${VIASH_PAR_OUTSJTYPE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'outSJfilterReads': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERREADS+x} ]; then echo "r'${VIASH_PAR_OUTSJFILTERREADS//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'outSJfilterOverhangMin': $( if [ ! -z ${VIASH_PAR_OUTSJFILTEROVERHANGMIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTEROVERHANGMIN//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), + 'outSJfilterCountUniqueMin': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), + 'outSJfilterCountTotalMin': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), + 'outSJfilterDistToOtherSJmin': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), + 'outSJfilterIntronMaxVsReadN': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), + 'scoreGap': $( if [ ! -z ${VIASH_PAR_SCOREGAP+x} ]; then echo "int(r'${VIASH_PAR_SCOREGAP//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'scoreGapNoncan': $( if [ ! -z ${VIASH_PAR_SCOREGAPNONCAN+x} ]; then echo "int(r'${VIASH_PAR_SCOREGAPNONCAN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'scoreGapGCAG': $( if [ ! -z ${VIASH_PAR_SCOREGAPGCAG+x} ]; then echo "int(r'${VIASH_PAR_SCOREGAPGCAG//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'scoreGapATAC': $( if [ ! -z ${VIASH_PAR_SCOREGAPATAC+x} ]; then echo "int(r'${VIASH_PAR_SCOREGAPATAC//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'scoreGenomicLengthLog2scale': $( if [ ! -z ${VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE+x} ]; then echo "int(r'${VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'scoreDelOpen': $( if [ ! -z ${VIASH_PAR_SCOREDELOPEN+x} ]; then echo "int(r'${VIASH_PAR_SCOREDELOPEN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'scoreDelBase': $( if [ ! -z ${VIASH_PAR_SCOREDELBASE+x} ]; then echo "int(r'${VIASH_PAR_SCOREDELBASE//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'scoreInsOpen': $( if [ ! -z ${VIASH_PAR_SCOREINSOPEN+x} ]; then echo "int(r'${VIASH_PAR_SCOREINSOPEN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'scoreInsBase': $( if [ ! -z ${VIASH_PAR_SCOREINSBASE+x} ]; then echo "int(r'${VIASH_PAR_SCOREINSBASE//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'scoreStitchSJshift': $( if [ ! -z ${VIASH_PAR_SCORESTITCHSJSHIFT+x} ]; then echo "int(r'${VIASH_PAR_SCORESTITCHSJSHIFT//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'seedSearchStartLmax': $( if [ ! -z ${VIASH_PAR_SEEDSEARCHSTARTLMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDSEARCHSTARTLMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'seedSearchStartLmaxOverLread': $( if [ ! -z ${VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD+x} ]; then echo "float(r'${VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'seedSearchLmax': $( if [ ! -z ${VIASH_PAR_SEEDSEARCHLMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDSEARCHLMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'seedMultimapNmax': $( if [ ! -z ${VIASH_PAR_SEEDMULTIMAPNMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDMULTIMAPNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'seedPerReadNmax': $( if [ ! -z ${VIASH_PAR_SEEDPERREADNMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDPERREADNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'seedPerWindowNmax': $( if [ ! -z ${VIASH_PAR_SEEDPERWINDOWNMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDPERWINDOWNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'seedNoneLociPerWindow': $( if [ ! -z ${VIASH_PAR_SEEDNONELOCIPERWINDOW+x} ]; then echo "int(r'${VIASH_PAR_SEEDNONELOCIPERWINDOW//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'seedSplitMin': $( if [ ! -z ${VIASH_PAR_SEEDSPLITMIN+x} ]; then echo "int(r'${VIASH_PAR_SEEDSPLITMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'seedMapMin': $( if [ ! -z ${VIASH_PAR_SEEDMAPMIN+x} ]; then echo "int(r'${VIASH_PAR_SEEDMAPMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'alignIntronMin': $( if [ ! -z ${VIASH_PAR_ALIGNINTRONMIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGNINTRONMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'alignIntronMax': $( if [ ! -z ${VIASH_PAR_ALIGNINTRONMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNINTRONMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'alignMatesGapMax': $( if [ ! -z ${VIASH_PAR_ALIGNMATESGAPMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNMATESGAPMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'alignSJoverhangMin': $( if [ ! -z ${VIASH_PAR_ALIGNSJOVERHANGMIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGNSJOVERHANGMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'alignSJstitchMismatchNmax': $( if [ ! -z ${VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX+x} ]; then echo "list(map(int, r'${VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), + 'alignSJDBoverhangMin': $( if [ ! -z ${VIASH_PAR_ALIGNSJDBOVERHANGMIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGNSJDBOVERHANGMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'alignSplicedMateMapLmin': $( if [ ! -z ${VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'alignSplicedMateMapLminOverLmate': $( if [ ! -z ${VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE+x} ]; then echo "float(r'${VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'alignWindowsPerReadNmax': $( if [ ! -z ${VIASH_PAR_ALIGNWINDOWSPERREADNMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNWINDOWSPERREADNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'alignTranscriptsPerWindowNmax': $( if [ ! -z ${VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'alignTranscriptsPerReadNmax': $( if [ ! -z ${VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'alignEndsType': $( if [ ! -z ${VIASH_PAR_ALIGNENDSTYPE+x} ]; then echo "r'${VIASH_PAR_ALIGNENDSTYPE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'alignEndsProtrude': $( if [ ! -z ${VIASH_PAR_ALIGNENDSPROTRUDE+x} ]; then echo "r'${VIASH_PAR_ALIGNENDSPROTRUDE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'alignSoftClipAtReferenceEnds': $( if [ ! -z ${VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS+x} ]; then echo "r'${VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'alignInsertionFlush': $( if [ ! -z ${VIASH_PAR_ALIGNINSERTIONFLUSH+x} ]; then echo "r'${VIASH_PAR_ALIGNINSERTIONFLUSH//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'peOverlapNbasesMin': $( if [ ! -z ${VIASH_PAR_PEOVERLAPNBASESMIN+x} ]; then echo "int(r'${VIASH_PAR_PEOVERLAPNBASESMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'peOverlapMMp': $( if [ ! -z ${VIASH_PAR_PEOVERLAPMMP+x} ]; then echo "float(r'${VIASH_PAR_PEOVERLAPMMP//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'winAnchorMultimapNmax': $( if [ ! -z ${VIASH_PAR_WINANCHORMULTIMAPNMAX+x} ]; then echo "int(r'${VIASH_PAR_WINANCHORMULTIMAPNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'winBinNbits': $( if [ ! -z ${VIASH_PAR_WINBINNBITS+x} ]; then echo "int(r'${VIASH_PAR_WINBINNBITS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'winAnchorDistNbins': $( if [ ! -z ${VIASH_PAR_WINANCHORDISTNBINS+x} ]; then echo "int(r'${VIASH_PAR_WINANCHORDISTNBINS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'winFlankNbins': $( if [ ! -z ${VIASH_PAR_WINFLANKNBINS+x} ]; then echo "int(r'${VIASH_PAR_WINFLANKNBINS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'winReadCoverageRelativeMin': $( if [ ! -z ${VIASH_PAR_WINREADCOVERAGERELATIVEMIN+x} ]; then echo "float(r'${VIASH_PAR_WINREADCOVERAGERELATIVEMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'winReadCoverageBasesMin': $( if [ ! -z ${VIASH_PAR_WINREADCOVERAGEBASESMIN+x} ]; then echo "int(r'${VIASH_PAR_WINREADCOVERAGEBASESMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'chimOutType': $( if [ ! -z ${VIASH_PAR_CHIMOUTTYPE+x} ]; then echo "r'${VIASH_PAR_CHIMOUTTYPE//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'chimSegmentMin': $( if [ ! -z ${VIASH_PAR_CHIMSEGMENTMIN+x} ]; then echo "int(r'${VIASH_PAR_CHIMSEGMENTMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'chimScoreMin': $( if [ ! -z ${VIASH_PAR_CHIMSCOREMIN+x} ]; then echo "int(r'${VIASH_PAR_CHIMSCOREMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'chimScoreDropMax': $( if [ ! -z ${VIASH_PAR_CHIMSCOREDROPMAX+x} ]; then echo "int(r'${VIASH_PAR_CHIMSCOREDROPMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'chimScoreSeparation': $( if [ ! -z ${VIASH_PAR_CHIMSCORESEPARATION+x} ]; then echo "int(r'${VIASH_PAR_CHIMSCORESEPARATION//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'chimScoreJunctionNonGTAG': $( if [ ! -z ${VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG+x} ]; then echo "int(r'${VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'chimJunctionOverhangMin': $( if [ ! -z ${VIASH_PAR_CHIMJUNCTIONOVERHANGMIN+x} ]; then echo "int(r'${VIASH_PAR_CHIMJUNCTIONOVERHANGMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'chimSegmentReadGapMax': $( if [ ! -z ${VIASH_PAR_CHIMSEGMENTREADGAPMAX+x} ]; then echo "int(r'${VIASH_PAR_CHIMSEGMENTREADGAPMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'chimFilter': $( if [ ! -z ${VIASH_PAR_CHIMFILTER+x} ]; then echo "r'${VIASH_PAR_CHIMFILTER//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'chimMainSegmentMultNmax': $( if [ ! -z ${VIASH_PAR_CHIMMAINSEGMENTMULTNMAX+x} ]; then echo "int(r'${VIASH_PAR_CHIMMAINSEGMENTMULTNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'chimMultimapNmax': $( if [ ! -z ${VIASH_PAR_CHIMMULTIMAPNMAX+x} ]; then echo "int(r'${VIASH_PAR_CHIMMULTIMAPNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'chimMultimapScoreRange': $( if [ ! -z ${VIASH_PAR_CHIMMULTIMAPSCORERANGE+x} ]; then echo "int(r'${VIASH_PAR_CHIMMULTIMAPSCORERANGE//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'chimNonchimScoreDropMin': $( if [ ! -z ${VIASH_PAR_CHIMNONCHIMSCOREDROPMIN+x} ]; then echo "int(r'${VIASH_PAR_CHIMNONCHIMSCOREDROPMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'chimOutJunctionFormat': $( if [ ! -z ${VIASH_PAR_CHIMOUTJUNCTIONFORMAT+x} ]; then echo "int(r'${VIASH_PAR_CHIMOUTJUNCTIONFORMAT//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'quantMode': $( if [ ! -z ${VIASH_PAR_QUANTMODE+x} ]; then echo "r'${VIASH_PAR_QUANTMODE//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'quantTranscriptomeBAMcompression': $( if [ ! -z ${VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION+x} ]; then echo "int(r'${VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'quantTranscriptomeBan': $( if [ ! -z ${VIASH_PAR_QUANTTRANSCRIPTOMEBAN+x} ]; then echo "r'${VIASH_PAR_QUANTTRANSCRIPTOMEBAN//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'twopassMode': $( if [ ! -z ${VIASH_PAR_TWOPASSMODE+x} ]; then echo "r'${VIASH_PAR_TWOPASSMODE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'twopass1readsN': $( if [ ! -z ${VIASH_PAR_TWOPASS1READSN+x} ]; then echo "int(r'${VIASH_PAR_TWOPASS1READSN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'waspOutputMode': $( if [ ! -z ${VIASH_PAR_WASPOUTPUTMODE+x} ]; then echo "r'${VIASH_PAR_WASPOUTPUTMODE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'soloType': $( if [ ! -z ${VIASH_PAR_SOLOTYPE+x} ]; then echo "r'${VIASH_PAR_SOLOTYPE//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'soloCBwhitelist': $( if [ ! -z ${VIASH_PAR_SOLOCBWHITELIST+x} ]; then echo "r'${VIASH_PAR_SOLOCBWHITELIST//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'soloCBstart': $( if [ ! -z ${VIASH_PAR_SOLOCBSTART+x} ]; then echo "int(r'${VIASH_PAR_SOLOCBSTART//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'soloCBlen': $( if [ ! -z ${VIASH_PAR_SOLOCBLEN+x} ]; then echo "int(r'${VIASH_PAR_SOLOCBLEN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'soloUMIstart': $( if [ ! -z ${VIASH_PAR_SOLOUMISTART+x} ]; then echo "int(r'${VIASH_PAR_SOLOUMISTART//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'soloUMIlen': $( if [ ! -z ${VIASH_PAR_SOLOUMILEN+x} ]; then echo "int(r'${VIASH_PAR_SOLOUMILEN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'soloBarcodeReadLength': $( if [ ! -z ${VIASH_PAR_SOLOBARCODEREADLENGTH+x} ]; then echo "int(r'${VIASH_PAR_SOLOBARCODEREADLENGTH//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'soloBarcodeMate': $( if [ ! -z ${VIASH_PAR_SOLOBARCODEMATE+x} ]; then echo "int(r'${VIASH_PAR_SOLOBARCODEMATE//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'soloCBposition': $( if [ ! -z ${VIASH_PAR_SOLOCBPOSITION+x} ]; then echo "r'${VIASH_PAR_SOLOCBPOSITION//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'soloUMIposition': $( if [ ! -z ${VIASH_PAR_SOLOUMIPOSITION+x} ]; then echo "r'${VIASH_PAR_SOLOUMIPOSITION//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'soloAdapterSequence': $( if [ ! -z ${VIASH_PAR_SOLOADAPTERSEQUENCE+x} ]; then echo "r'${VIASH_PAR_SOLOADAPTERSEQUENCE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'soloAdapterMismatchesNmax': $( if [ ! -z ${VIASH_PAR_SOLOADAPTERMISMATCHESNMAX+x} ]; then echo "int(r'${VIASH_PAR_SOLOADAPTERMISMATCHESNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'soloCBmatchWLtype': $( if [ ! -z ${VIASH_PAR_SOLOCBMATCHWLTYPE+x} ]; then echo "r'${VIASH_PAR_SOLOCBMATCHWLTYPE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'soloInputSAMattrBarcodeSeq': $( if [ ! -z ${VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ+x} ]; then echo "r'${VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'soloInputSAMattrBarcodeQual': $( if [ ! -z ${VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL+x} ]; then echo "r'${VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'soloStrand': $( if [ ! -z ${VIASH_PAR_SOLOSTRAND+x} ]; then echo "r'${VIASH_PAR_SOLOSTRAND//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'soloFeatures': $( if [ ! -z ${VIASH_PAR_SOLOFEATURES+x} ]; then echo "r'${VIASH_PAR_SOLOFEATURES//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'soloMultiMappers': $( if [ ! -z ${VIASH_PAR_SOLOMULTIMAPPERS+x} ]; then echo "r'${VIASH_PAR_SOLOMULTIMAPPERS//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'soloUMIdedup': $( if [ ! -z ${VIASH_PAR_SOLOUMIDEDUP+x} ]; then echo "r'${VIASH_PAR_SOLOUMIDEDUP//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'soloUMIfiltering': $( if [ ! -z ${VIASH_PAR_SOLOUMIFILTERING+x} ]; then echo "r'${VIASH_PAR_SOLOUMIFILTERING//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'soloOutFileNames': $( if [ ! -z ${VIASH_PAR_SOLOOUTFILENAMES+x} ]; then echo "r'${VIASH_PAR_SOLOOUTFILENAMES//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'soloCellFilter': $( if [ ! -z ${VIASH_PAR_SOLOCELLFILTER+x} ]; then echo "r'${VIASH_PAR_SOLOCELLFILTER//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'soloOutFormatFeaturesGeneField3': $( if [ ! -z ${VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3+x} ]; then echo "r'${VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'soloCellReadStats': $( if [ ! -z ${VIASH_PAR_SOLOCELLREADSTATS+x} ]; then echo "r'${VIASH_PAR_SOLOCELLREADSTATS//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'stranded': $( if [ ! -z ${VIASH_PAR_STRANDED+x} ]; then echo "r'${VIASH_PAR_STRANDED//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'minimum_alignment_quality': $( if [ ! -z ${VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY+x} ]; then echo "int(r'${VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'type': $( if [ ! -z ${VIASH_PAR_TYPE+x} ]; then echo "r'${VIASH_PAR_TYPE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'id_attribute': $( if [ ! -z ${VIASH_PAR_ID_ATTRIBUTE+x} ]; then echo "r'${VIASH_PAR_ID_ATTRIBUTE//\'/\'\"\'\"r\'}'.split(':')"; else echo None; fi ), + 'additional_attributes': $( if [ ! -z ${VIASH_PAR_ADDITIONAL_ATTRIBUTES+x} ]; then echo "r'${VIASH_PAR_ADDITIONAL_ATTRIBUTES//\'/\'\"\'\"r\'}'.split(':')"; else echo None; fi ), + 'add_chromosome_info': $( if [ ! -z ${VIASH_PAR_ADD_CHROMOSOME_INFO+x} ]; then echo "r'${VIASH_PAR_ADD_CHROMOSOME_INFO//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), + 'mode': $( if [ ! -z ${VIASH_PAR_MODE+x} ]; then echo "r'${VIASH_PAR_MODE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'non_unique': $( if [ ! -z ${VIASH_PAR_NON_UNIQUE+x} ]; then echo "r'${VIASH_PAR_NON_UNIQUE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'secondary_alignments': $( if [ ! -z ${VIASH_PAR_SECONDARY_ALIGNMENTS+x} ]; then echo "r'${VIASH_PAR_SECONDARY_ALIGNMENTS//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'supplementary_alignments': $( if [ ! -z ${VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS+x} ]; then echo "r'${VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'counts_output_sparse': $( if [ ! -z ${VIASH_PAR_COUNTS_OUTPUT_SPARSE+x} ]; then echo "r'${VIASH_PAR_COUNTS_OUTPUT_SPARSE//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +## VIASH END + +######################## +### Helper functions ### +######################## + + +def fetch_arguments_info(config: Dict[str, Any]) -> Dict[str, Any]: + """Fetch arguments from config""" + arguments = { + arg["name"].removeprefix("-").removeprefix("-"): arg + for group in config["functionality"]["argument_groups"] + for arg in group["arguments"] + } + return arguments + +def process_par( + par: Dict[str, Any], + arguments_info: Dict[str, Any], + gz_args: List[str], + temp_dir: Path +) -> Dict[str, Any]: + """ + Process the Viash par dictionary + + This turns file strings into Path objects and extracting gzipped files if need be. + + Parameters + ---------- + par: The par dictionary created by Viash + arguments_info: The arguments info Dictionary created by \`fetch_arguments_info\` + gz_args: A list of argument keys which could be gzip files which need to be decompressed. + temp_dir: A temporary directory in which to ungzip files + """ + new_par = {} + for key, value in par.items(): + arg_info = arguments_info[key] + # turn file arguments into paths + if value and arg_info["type"] == "file": + is_multiple = isinstance(value, list) + + if is_multiple: + value = [Path(val) for val in value] + else: + value = Path(value) + + if key in gz_args: + print(f">> Checking compression of --{key}", flush=True) + # turn value into list if need be + if not is_multiple: + value = [value] + + # extract + value = [extract_if_need_be(path, temp_dir) for path in value] + + # unlist if need be + if not is_multiple: + value = value[0] + + new_par[key] = value + return new_par + +def generate_cmd_arguments(par, arguments_info, step_filter=None, flatten=False): + """ + Generate command-line arguments by fetching the relevant args + + Parameters + ---------- + par: The par dictionary created by Viash + arguments_info: The arguments info Dictionary created by \`fetch_arguments_info\` + step_filter: If provided,\`par\` will be filtered to only contain arguments for which + argument.info.step == step_filter. + flatten: If \`False\`, the command for an argument with multiple values will be + \`["--key", "value1", "--key", "value2"]\`, otherwise \`["--key", "value1", "value2"]\`. + """ + cmd_args = [] + + for key, arg in arguments_info.items(): + arg_val = par.get(key) + # The info key is always present (changed in viash 0.7.4) + # in the parsed config (None if not specified in source config) + info = arg["info"] or {} + orig_arg = info.get("orig_arg") + step = info.get("step") + if arg_val and orig_arg and (not step_filter or step == step_filter): + if not arg.get("multiple", False): + arg_val = [arg_val] + + if arg["type"] in ["boolean_true", "boolean_false"]: + # if argument is a boolean_true or boolean_false, simply add the flag + arg_val = [orig_arg] + elif orig_arg.startswith("-"): + # if the orig arg flag is not a positional, + # add the flag in front of each element and flatten + if flatten: + arg_val = [str(x) for x in [orig_arg] + arg_val] + else: + arg_val = [str(x) for val in arg_val for x in [orig_arg, val]] + + cmd_args.extend(arg_val) + + return cmd_args + +def is_gz_file(path: Path) -> bool: + """Check whether something is a gzip""" + with open(path, "rb") as file: + return file.read(2) == b"\\x1f\\x8b" + +def extract_if_need_be(par_value: Path, temp_dir_path: Path) -> Path: + """if {par_value} is a Path, extract it to a temp_dir_path and return the resulting path""" + if par_value.is_file() and tarfile.is_tarfile(par_value): + # Remove two extensions (if they exist) + extaction_dir_name = Path(par_value.stem).stem + unpacked_path = temp_dir_path / extaction_dir_name + print(f" Tar detected; extracting {par_value} to {unpacked_path}", flush=True) + + with tarfile.open(par_value, "r") as open_tar: + members = open_tar.getmembers() + root_dirs = [ + member + for member in members + if member.isdir() and member.name != "." and "/" not in member.name + ] + # if there is only one root_dir (and there are files in that directory) + # strip that directory name from the destination folder + if len(root_dirs) == 1: + for mem in members: + mem.path = Path(*Path(mem.path).parts[1:]) + members_to_move = [mem for mem in members if mem.path != Path(".")] + open_tar.extractall(unpacked_path, members=members_to_move) + return unpacked_path + + elif par_value.is_file() and is_gz_file(par_value): + # Remove extension (if it exists) + extaction_file_name = Path(par_value.stem) + unpacked_path = temp_dir_path / extaction_file_name + print(f" Gzip detected; extracting {par_value} to {unpacked_path}", flush=True) + + with gzip.open(par_value, "rb") as f_in: + with open(unpacked_path, "wb") as f_out: + shutil.copyfileobj(f_in, f_out) + return unpacked_path + + else: + return par_value + +def load_star_reference(reference_index: str) -> None: + """Load star reference index into memory.""" + subprocess.run( + [ + "STAR", + "--genomeLoad", "LoadAndExit", + "--genomeDir", str(reference_index), + ], + check=True + ) + +def unload_star_reference(reference_index: str) -> None: + """Remove star reference index from memory.""" + subprocess.run( + [ + "STAR", + "--genomeLoad", "Remove", + "--genomeDir", str(reference_index), + ], + check=True + ) + +def star_and_htseq( + group_id: str, + r1_files: List[Path], + r2_files: List[Path], + temp_dir: Path, + par: Dict[str, Any], + arguments_info: Dict[str, Any], + num_threads: int +) -> Tuple[int, str] : + star_output = par["output"] / "per" / group_id + temp_dir_group = temp_dir / f"star_tmp_{group_id}" + unsorted_bam = star_output / "Aligned.out.bam" + sorted_bam = star_output / "Aligned.sorted.out.bam" + counts_file = star_output / "htseq-count.txt" + multiqc_path = star_output / "multiqc_data" + + print(f">> Running STAR for group '{group_id}' with command:", flush=True) + star_output.mkdir(parents=True, exist_ok=True) + temp_dir_group.parent.mkdir(parents=True, exist_ok=True) + run_star( + r1_files=r1_files, + r2_files=r2_files, + output_dir=star_output, + temp_dir=temp_dir / f"star_tmp_{group_id}", + par=par, + arguments_info=arguments_info, + num_threads=num_threads + ) + if not unsorted_bam.exists(): + return (1, f"Could not find unsorted bam at '{unsorted_bam}'") + + if par["run_htseq_count"]: + print(f">> Running samtools sort for group '{group_id}' with command:", flush=True) + run_samtools_sort(unsorted_bam, sorted_bam) + if not sorted_bam.exists(): + return (1, f"Could not find sorted bam at '{unsorted_bam}'") + + print(f">> Running htseq-count for group '{group_id}' with command:", flush=True) + run_htseq_count(sorted_bam, counts_file, par, arguments_info) + if not counts_file.exists(): + return (1, f"Could not find counts at '{counts_file}'") + + if par["run_multiqc"]: + run_multiqc(star_output) + if not multiqc_path.exists(): + return (1, f"Could not find MultiQC output at '{multiqc_path}'") + + return (0, "") + +def run_star( + r1_files: List[Path], + r2_files: List[Path], + output_dir: Path, + temp_dir: Path, + par: Dict[str, Any], + arguments_info: Dict[str, Any], + num_threads: int +) -> None: + """Run star""" + # process manual arguments + r1_pasted = [",".join([str(r1) for r1 in r1_files])] + r2_pasted = [",".join([str(r2) for r2 in r2_files])] if r2_files else [] + manual_par = { + "--genomeDir": [par["reference_index"]], + "--genomeLoad": ["LoadAndRemove"], + "--runThreadN": [str(num_threads)], + "--runMode": ["alignReads"], + "--readFilesIn": r1_pasted + r2_pasted, + # create a tempdir per group + "--outTmpDir": [temp_dir], + # make sure there is a trailing / + "--outFileNamePrefix": [f"{output_dir}/"], + # fix the outSAMtype to return unsorted BAM files + "--outSAMtype": ["BAM", "Unsorted"] + } + manual_cmd = [str(x) + for key, values in manual_par.items() + for x in [key] + values + ] + + # process all passthrough star arguments + par_cmd = generate_cmd_arguments(par, arguments_info, "star", flatten=True) + + # combine into one command and turn into strings + cmd_args = [str(val) for val in ["STAR"] + manual_cmd + par_cmd] + + # run star + subprocess.run(cmd_args, check=True) + +def run_samtools_sort( + unsorted_bam: Path, + sorted_bam: Path +) -> None: + "Run samtools sort" + cmd_args = [ + "samtools", + "sort", + "-o", + sorted_bam, + unsorted_bam, + ] + subprocess.run(cmd_args, check=True) + +def run_htseq_count( + sorted_bam: Path, + counts_file: Path, + par: Dict[str, Any], + arguments_info: Dict[str, Any] +) -> None: + """Run HTSeq count""" + # process manual arguments + manual_cmd = [ + sorted_bam, + par["reference_gtf"] + ] + + # process all passthrough htseq arguments + par_cmd = generate_cmd_arguments(par, arguments_info, "htseq") + + # combine into one command and turn into strings + cmd_args = [str(val) for val in ["htseq-count"] + manual_cmd + par_cmd] + + # run htseq + with open(counts_file, "w", encoding="utf-8") as file: + subprocess.run(cmd_args, check=True, stdout=file) + +def get_feature_info(reference_gtf) -> pd.DataFrame: + ref = gtfparse.read_gtf(reference_gtf) + ref_genes = ref.loc[(ref["feature"] == "gene") | (ref["source"] == "ERCC")] + return pd.DataFrame( + { + "feature_id": ref_genes["gene_id"], + "feature_type": "Gene Expression", + "feature_name": ref_genes["gene_name"] + } + ) + +def run_multiqc(input_dir: Path) -> None: + cmd_args = ["multiqc", str(input_dir), "--outdir", str(input_dir), "--no-report", "--force"] + + # run multiqc + subprocess.run(cmd_args, check=True) + + +######################## +### Main code ### +######################## + +def main(par, meta): + """Main function""" + + # check input arguments + assert len(par["input_id"]) == len(par["input_r1"]), "--input_r1 should have same length as --input_id" + if par["input_r2"]: + assert len(par["input_id"]) == len(par["input_r2"]), "--input_r2 should have same length as --input_id" + + # read config arguments + with open(meta["config"], "r", encoding="utf-8") as file: + config = yaml.safe_load(file) + + # fetch all arguments from the config and turn it into a Dict[str, Argument] + arguments_info = fetch_arguments_info(config) + + # temp_dir = "tmp/" + with tempfile.TemporaryDirectory( + prefix=f"{meta['functionality_name']}-", + dir=meta["temp_dir"], + ignore_cleanup_errors=True + ) as temp_dir: + temp_dir = Path(temp_dir) + temp_dir.mkdir(parents=True, exist_ok=True) + + # turn file strings into Paths and decompress gzip if need be + gz_args = ["input_r1", "input_r2", "reference_index", "reference_gtf"] + par = process_par(par, arguments_info, gz_args, temp_dir) + + # make sure input_r2 has same length as input_r1 + if not par["input_r2"]: + par["input_r2"] = [None for _ in par["input_r1"]] + + # group input_files by input_id + print(">> Group by --input_id", flush=True) + grouped_inputs = {} + for group_id, file_r1, file_r2 in zip(par["input_id"], par["input_r1"], par["input_r2"]): + if group_id not in grouped_inputs: + grouped_inputs[group_id] = ([], []) + grouped_inputs[group_id][0].append(file_r1) + if file_r2: + grouped_inputs[group_id][1].append(file_r2) + + # create output dir if need be + par["output"].mkdir(parents=True, exist_ok=True) + + # store features metadata + feature_info = get_feature_info(str(par["reference_gtf"])) + with open(par["output"] / "feature_info.tsv", "w", encoding="utf-8") as file: + feature_info.to_csv(file, sep="\\t", index=False) + + # try: + # print(">> Loading genome in memory", flush=True) + # load_star_reference(par["reference_index"]) + + cpus = meta.get("cpus", 1) + num_items = len(grouped_inputs) + pool_size = min(cpus, num_items) + num_threads_per_task = math.ceil(cpus / pool_size) + + with Pool(pool_size) as pool: + outs = pool.starmap( + lambda group_id, files: star_and_htseq( + group_id=group_id, + r1_files=files[0], + r2_files=files[1], + temp_dir=temp_dir, + par=par, + arguments_info=arguments_info, + num_threads=num_threads_per_task + ), + grouped_inputs.items() + ) + + num_errored = 0 + for exit, msg in outs: + if exit != 0: + print(f"Error: {msg}") + num_errored += 1 + + pct_succeeded = 1.0 - num_errored / len(outs) + print("------------------") + print(f"Success rate: {math.ceil(pct_succeeded * 100)}%") + + assert pct_succeeded >= par["min_success_rate"], f"Success rate should be at least {math.ceil(par['min_success_rate'] * 100)}%" + +if __name__ == "__main__": + main(par, meta) +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT_R1" ]; then + unset VIASH_TEST_INPUT_R1 + IFS=';' + for var in $VIASH_PAR_INPUT_R1; do + unset IFS + if [ -z "$VIASH_TEST_INPUT_R1" ]; then + VIASH_TEST_INPUT_R1="$(ViashStripAutomount "$var")" + else + VIASH_TEST_INPUT_R1="$VIASH_TEST_INPUT_R1;""$(ViashStripAutomount "$var")" + fi + done + VIASH_PAR_INPUT_R1="$VIASH_TEST_INPUT_R1" +fi +if [ ! -z "$VIASH_PAR_INPUT_R2" ]; then + unset VIASH_TEST_INPUT_R2 + IFS=';' + for var in $VIASH_PAR_INPUT_R2; do + unset IFS + if [ -z "$VIASH_TEST_INPUT_R2" ]; then + VIASH_TEST_INPUT_R2="$(ViashStripAutomount "$var")" + else + VIASH_TEST_INPUT_R2="$VIASH_TEST_INPUT_R2;""$(ViashStripAutomount "$var")" + fi + done + VIASH_PAR_INPUT_R2="$VIASH_TEST_INPUT_R2" +fi +if [ ! -z "$VIASH_PAR_REFERENCE_INDEX" ]; then + VIASH_PAR_REFERENCE_INDEX=$(ViashStripAutomount "$VIASH_PAR_REFERENCE_INDEX") +fi +if [ ! -z "$VIASH_PAR_REFERENCE_GTF" ]; then + VIASH_PAR_REFERENCE_GTF=$(ViashStripAutomount "$VIASH_PAR_REFERENCE_GTF") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_PAR_GENOMEFASTAFILES" ]; then + unset VIASH_TEST_GENOMEFASTAFILES + IFS=';' + for var in $VIASH_PAR_GENOMEFASTAFILES; do + unset IFS + if [ -z "$VIASH_TEST_GENOMEFASTAFILES" ]; then + VIASH_TEST_GENOMEFASTAFILES="$(ViashStripAutomount "$var")" + else + VIASH_TEST_GENOMEFASTAFILES="$VIASH_TEST_GENOMEFASTAFILES;""$(ViashStripAutomount "$var")" + fi + done + VIASH_PAR_GENOMEFASTAFILES="$VIASH_TEST_GENOMEFASTAFILES" +fi +if [ ! -z "$VIASH_PAR_SJDBGTFFILE" ]; then + VIASH_PAR_SJDBGTFFILE=$(ViashStripAutomount "$VIASH_PAR_SJDBGTFFILE") +fi +if [ ! -z "$VIASH_PAR_READFILESMANIFEST" ]; then + VIASH_PAR_READFILESMANIFEST=$(ViashStripAutomount "$VIASH_PAR_READFILESMANIFEST") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/mapping/multi_star_to_h5mu/.config.vsh.yaml b/target/docker/mapping/multi_star_to_h5mu/.config.vsh.yaml new file mode 100644 index 00000000000..650943f4a64 --- /dev/null +++ b/target/docker/mapping/multi_star_to_h5mu/.config.vsh.yaml @@ -0,0 +1,179 @@ +functionality: + name: "multi_star_to_h5mu" + namespace: "mapping" + version: "0.12.4" + authors: + - name: "Robrecht Cannoodt" + roles: + - "author" + - "maintainer" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + - name: "Angela Oliveira Pisco" + roles: + - "author" + info: + role: "Contributor" + links: + github: "aopisco" + orcid: "0000-0003-0142-2355" + linkedin: "aopisco" + organizations: + - name: "Insitro" + href: "https://insitro.com" + role: "Director of Computational Biology" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + arguments: + - type: "file" + name: "--input" + description: "The directory created by `multi_star`" + info: null + example: + - "/path/to/foo" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output h5mu file." + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + description: "Convert the output of `multi_star` to a h5mu.\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/cellranger_tiny_fastq/multi_star" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "midmem" + - "midcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/mapping/multi_star_to_h5mu/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/mapping/multi_star_to_h5mu" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/mapping/multi_star_to_h5mu/multi_star_to_h5mu" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/mapping/multi_star_to_h5mu/multi_star_to_h5mu b/target/docker/mapping/multi_star_to_h5mu/multi_star_to_h5mu new file mode 100755 index 00000000000..cc20dfe2277 --- /dev/null +++ b/target/docker/mapping/multi_star_to_h5mu/multi_star_to_h5mu @@ -0,0 +1,1017 @@ +#!/usr/bin/env bash + +# multi_star_to_h5mu 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Robrecht Cannoodt (author, maintainer) +# * Angela Oliveira Pisco (author) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="multi_star_to_h5mu" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "multi_star_to_h5mu 0.12.4" + echo "" + echo "Convert the output of \`multi_star\` to a h5mu." + echo "" + echo "Arguments:" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " example: /path/to/foo" + echo " The directory created by \`multi_star\`" + echo "" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " example: output.h5mu" + echo " Output h5mu file." + echo "" + echo " --output_compression" + echo " type: string" + echo " example: gzip" + echo " choices: [ gzip, lzf ]" + echo " The compression format to be used on the output h5mu object." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM python:3.10-slim + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" + +LABEL org.opencontainers.image.authors="Robrecht Cannoodt, Angela Oliveira Pisco" +LABEL org.opencontainers.image.description="Companion container for running component mapping multi_star_to_h5mu" +LABEL org.opencontainers.image.created="2024-01-31T09:08:36Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-multi_star_to_h5mu-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "multi_star_to_h5mu 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/mapping_multi_star_to_h5mu:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/mapping_multi_star_to_h5mu:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/mapping_multi_star_to_h5mu:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/mapping_multi_star_to_h5mu:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/mapping_multi_star_to_h5mu:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/mapping_multi_star_to_h5mu:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/mapping_multi_star_to_h5mu:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-multi_star_to_h5mu-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +from pathlib import Path +import pandas as pd +import mudata as md +import anndata as ad +import numpy as np +import json + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +## VIASH END + +# convert to path +input_dir = Path(par["input"]) + +# read counts information +print("> Read counts data", flush=True) +per_obs_data = [] + +for input_counts in (input_dir / "per").glob("**/htseq-count.txt"): + per_obs_dir = input_counts.parent + input_id = per_obs_dir.name + input_multiqc = per_obs_dir / "multiqc_data" / "multiqc_data.json" + + data = pd.read_table( + input_counts, + index_col=0, + names=["cell_id", input_id], + dtype={"cell_id": "U", input_id: "i"} + ) + data2 = data[~data.index.str.startswith("__")] + + with open(input_multiqc, "r") as file: + qc = json.load(file) + + qc_star = qc.get("report_saved_raw_data", {}).get("multiqc_star", {}).get(input_id) + qc_htseq = qc.get("report_saved_raw_data", {}).get("multiqc_htseq", {}).get("htseq-count") + + per_obs_data.append({ + "counts": data2.transpose(), + "qc_star": pd.DataFrame(qc_star, index=[input_id]), + "qc_htseq": pd.DataFrame(qc_htseq, index=[input_id]) + }) + + +# combine all counts +counts = pd.concat([x["counts"] for x in per_obs_data], axis=0) +qc_star = pd.concat([x["qc_star"] for x in per_obs_data], axis=0) +qc_htseq = pd.concat([x["qc_htseq"] for x in per_obs_data], axis=0) + +# read feature info +feature_info = pd.read_csv(input_dir / "feature_info.tsv", sep="\\t", index_col=0) +feature_info_ord = feature_info.loc[counts.columns] + +var = pd.DataFrame( + data={ + "gene_ids": feature_info_ord.index, + "feature_types": "Gene Expression", + "gene_name": feature_info_ord["feature_name"], + } +).set_index("gene_ids") + +print("> construct anndata", flush=True) +adata = ad.AnnData( + X=counts, + obsm={"qc_star": qc_star, "qc_htseq": qc_htseq}, + var=var, + dtype=np.int32 +) + +print("> convert to mudata", flush=True) +mdata = md.MuData(adata) + +print("> write to file", flush=True) +mdata.write_h5mu(par["output"], compression=par["output_compression"]) +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/mapping/samtools_sort/.config.vsh.yaml b/target/docker/mapping/samtools_sort/.config.vsh.yaml new file mode 100644 index 00000000000..6d9b998df68 --- /dev/null +++ b/target/docker/mapping/samtools_sort/.config.vsh.yaml @@ -0,0 +1,270 @@ +functionality: + name: "samtools_sort" + namespace: "mapping" + version: "0.12.4" + authors: + - name: "Robrecht Cannoodt" + roles: + - "author" + - "maintainer" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + - name: "Angela Oliveira Pisco" + roles: + - "author" + info: + role: "Contributor" + links: + github: "aopisco" + orcid: "0000-0003-0142-2355" + linkedin: "aopisco" + organizations: + - name: "Insitro" + href: "https://insitro.com" + role: "Director of Computational Biology" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + argument_groups: + - name: "Input" + arguments: + - type: "file" + name: "--input" + description: "Path to the SAM/BAM/CRAM files containing the mapped reads." + info: + orig_arg: "in_sam" + example: + - "input.bam" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Output" + arguments: + - type: "file" + name: "--output_bam" + description: "Filename to output the counts to." + info: + orig_arg: "-o" + example: + - "output.bam" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output_bai" + description: "BAI-format index for BAM file." + info: null + example: + - "output.bam.bai" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_format" + description: "The output format. By default, samtools tries to select a format\ + \ based on the -o filename extension; if output is to standard output or no\ + \ format can be deduced, bam is selected." + info: + orig_arg: "-O" + example: + - "bam" + required: false + choices: + - "sam" + - "bam" + - "cram" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--compression" + description: "Compression level, from 0 (uncompressed) to 9 (best" + info: + orig_arg: "-l" + example: + - 5 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Arguments" + arguments: + - type: "boolean_true" + name: "--minimizer_cluster" + description: "Sort unmapped reads (those in chromosome \"*\") by their sequence\ + \ minimiser (Schleimer et al., 2003; Roberts et al., 2004), \nalso reverse\ + \ complementing as appropriate. This has the effect of collating some similar\ + \ data together, improving the \ncompressibility of the unmapped sequence.\ + \ The minimiser kmer size is adjusted using the -K option. Note data compressed\ + \ \nin this manner may need to be name collated prior to conversion back to\ + \ fastq.\n\nMapped sequences are sorted by chromosome and position. \n" + info: + orig_arg: "-M" + direction: "input" + dest: "par" + - type: "integer" + name: "--minimizer_kmer" + description: "Sets the kmer size to be used in the -M option." + info: + orig_arg: "-K" + example: + - 20 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--sort_by_read_names" + description: "Sort by read names (i.e., the QNAME field) rather than by chromosomal\ + \ coordinates." + info: + orig_arg: "-n" + direction: "input" + dest: "par" + - type: "string" + name: "--sort_by" + description: "Sort first by this value in the alignment tag, then by position\ + \ or name (if also using -n)." + info: + orig_arg: "-t" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--no_pg" + description: "Do not add a @PG line to the header of the output file." + info: + orig_arg: "--no-PG" + direction: "input" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + description: "Sort and (optionally) index alignments.\n\nReads are sorted by leftmost\ + \ coordinates, or by read name when `--sort_by_read_names` is used.\n\nAn appropriate\ + \ `@HD-SO` sort order header tag will be added or an existing one updated if necessary.\n\ + \nNote that to generate an index file (by specifying `--output_bai`), the default\ + \ coordinate sort must be used.\nThus the `--sort_by_read_names` and `--sort_by\ + \ ` options are incompatible with `--output_bai`. \n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/cellranger_tiny_fastq" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "samtools" + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "pyyaml" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highmem" + - "highcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/mapping/samtools_sort/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/mapping/samtools_sort" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/mapping/samtools_sort/samtools_sort" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/mapping/samtools_sort/samtools_sort b/target/docker/mapping/samtools_sort/samtools_sort new file mode 100755 index 00000000000..72d60fdc9b4 --- /dev/null +++ b/target/docker/mapping/samtools_sort/samtools_sort @@ -0,0 +1,1185 @@ +#!/usr/bin/env bash + +# samtools_sort 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Robrecht Cannoodt (author, maintainer) +# * Angela Oliveira Pisco (author) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="samtools_sort" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "samtools_sort 0.12.4" + echo "" + echo "Sort and (optionally) index alignments." + echo "" + echo "Reads are sorted by leftmost coordinates, or by read name when" + echo "\`--sort_by_read_names\` is used." + echo "" + echo "An appropriate \`@HD-SO\` sort order header tag will be added or an existing one" + echo "updated if necessary." + echo "" + echo "Note that to generate an index file (by specifying \`--output_bai\`), the default" + echo "coordinate sort must be used." + echo "Thus the \`--sort_by_read_names\` and \`--sort_by \` options are incompatible" + echo "with \`--output_bai\`." + echo "" + echo "Input:" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " example: input.bam" + echo " Path to the SAM/BAM/CRAM files containing the mapped reads." + echo "" + echo "Output:" + echo " --output_bam" + echo " type: file, required parameter, output, file must exist" + echo " example: output.bam" + echo " Filename to output the counts to." + echo "" + echo " --output_bai" + echo " type: file, output, file must exist" + echo " example: output.bam.bai" + echo " BAI-format index for BAM file." + echo "" + echo " --output_format" + echo " type: string" + echo " example: bam" + echo " choices: [ sam, bam, cram ]" + echo " The output format. By default, samtools tries to select a format based" + echo " on the -o filename extension; if output is to standard output or no" + echo " format can be deduced, bam is selected." + echo "" + echo " --compression" + echo " type: integer" + echo " example: 5" + echo " Compression level, from 0 (uncompressed) to 9 (best" + echo "" + echo "Arguments:" + echo " --minimizer_cluster" + echo " type: boolean_true" + echo " Sort unmapped reads (those in chromosome \"*\") by their sequence" + echo " minimiser (Schleimer et al., 2003; Roberts et al., 2004)," + echo " also reverse complementing as appropriate. This has the effect of" + echo " collating some similar data together, improving the" + echo " compressibility of the unmapped sequence. The minimiser kmer size is" + echo " adjusted using the -K option. Note data compressed" + echo " in this manner may need to be name collated prior to conversion back to" + echo " fastq." + echo " Mapped sequences are sorted by chromosome and position." + echo "" + echo " --minimizer_kmer" + echo " type: integer" + echo " example: 20" + echo " Sets the kmer size to be used in the -M option." + echo "" + echo " --sort_by_read_names" + echo " type: boolean_true" + echo " Sort by read names (i.e., the QNAME field) rather than by chromosomal" + echo " coordinates." + echo "" + echo " --sort_by" + echo " type: string" + echo " Sort first by this value in the alignment tag, then by position or name" + echo " (if also using -n)." + echo "" + echo " --no_pg" + echo " type: boolean_true" + echo " Do not add a @PG line to the header of the output file." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM python:3.10-slim + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y samtools procps && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "pyyaml" + +LABEL org.opencontainers.image.authors="Robrecht Cannoodt, Angela Oliveira Pisco" +LABEL org.opencontainers.image.description="Companion container for running component mapping samtools_sort" +LABEL org.opencontainers.image.created="2024-01-31T09:08:31Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-samtools_sort-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "samtools_sort 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_bam) + [ -n "$VIASH_PAR_OUTPUT_BAM" ] && ViashError Bad arguments for option \'--output_bam\': \'$VIASH_PAR_OUTPUT_BAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_BAM="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_bam. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_bam=*) + [ -n "$VIASH_PAR_OUTPUT_BAM" ] && ViashError Bad arguments for option \'--output_bam=*\': \'$VIASH_PAR_OUTPUT_BAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_BAM=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_bai) + [ -n "$VIASH_PAR_OUTPUT_BAI" ] && ViashError Bad arguments for option \'--output_bai\': \'$VIASH_PAR_OUTPUT_BAI\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_BAI="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_bai. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_bai=*) + [ -n "$VIASH_PAR_OUTPUT_BAI" ] && ViashError Bad arguments for option \'--output_bai=*\': \'$VIASH_PAR_OUTPUT_BAI\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_BAI=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_format) + [ -n "$VIASH_PAR_OUTPUT_FORMAT" ] && ViashError Bad arguments for option \'--output_format\': \'$VIASH_PAR_OUTPUT_FORMAT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_FORMAT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_format. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_format=*) + [ -n "$VIASH_PAR_OUTPUT_FORMAT" ] && ViashError Bad arguments for option \'--output_format=*\': \'$VIASH_PAR_OUTPUT_FORMAT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_FORMAT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --compression) + [ -n "$VIASH_PAR_COMPRESSION" ] && ViashError Bad arguments for option \'--compression\': \'$VIASH_PAR_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --compression=*) + [ -n "$VIASH_PAR_COMPRESSION" ] && ViashError Bad arguments for option \'--compression=*\': \'$VIASH_PAR_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --minimizer_cluster) + [ -n "$VIASH_PAR_MINIMIZER_CLUSTER" ] && ViashError Bad arguments for option \'--minimizer_cluster\': \'$VIASH_PAR_MINIMIZER_CLUSTER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MINIMIZER_CLUSTER=true + shift 1 + ;; + --minimizer_kmer) + [ -n "$VIASH_PAR_MINIMIZER_KMER" ] && ViashError Bad arguments for option \'--minimizer_kmer\': \'$VIASH_PAR_MINIMIZER_KMER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MINIMIZER_KMER="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --minimizer_kmer. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --minimizer_kmer=*) + [ -n "$VIASH_PAR_MINIMIZER_KMER" ] && ViashError Bad arguments for option \'--minimizer_kmer=*\': \'$VIASH_PAR_MINIMIZER_KMER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MINIMIZER_KMER=$(ViashRemoveFlags "$1") + shift 1 + ;; + --sort_by_read_names) + [ -n "$VIASH_PAR_SORT_BY_READ_NAMES" ] && ViashError Bad arguments for option \'--sort_by_read_names\': \'$VIASH_PAR_SORT_BY_READ_NAMES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SORT_BY_READ_NAMES=true + shift 1 + ;; + --sort_by) + [ -n "$VIASH_PAR_SORT_BY" ] && ViashError Bad arguments for option \'--sort_by\': \'$VIASH_PAR_SORT_BY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SORT_BY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sort_by. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sort_by=*) + [ -n "$VIASH_PAR_SORT_BY" ] && ViashError Bad arguments for option \'--sort_by=*\': \'$VIASH_PAR_SORT_BY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SORT_BY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --no_pg) + [ -n "$VIASH_PAR_NO_PG" ] && ViashError Bad arguments for option \'--no_pg\': \'$VIASH_PAR_NO_PG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_NO_PG=true + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/mapping_samtools_sort:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/mapping_samtools_sort:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/mapping_samtools_sort:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/mapping_samtools_sort:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT_BAM+x} ]; then + ViashError '--output_bam' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_MINIMIZER_CLUSTER+x} ]; then + VIASH_PAR_MINIMIZER_CLUSTER="false" +fi +if [ -z ${VIASH_PAR_SORT_BY_READ_NAMES+x} ]; then + VIASH_PAR_SORT_BY_READ_NAMES="false" +fi +if [ -z ${VIASH_PAR_NO_PG+x} ]; then + VIASH_PAR_NO_PG="false" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_COMPRESSION" ]]; then + if ! [[ "$VIASH_PAR_COMPRESSION" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--compression' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MINIMIZER_CLUSTER" ]]; then + if ! [[ "$VIASH_PAR_MINIMIZER_CLUSTER" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--minimizer_cluster' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MINIMIZER_KMER" ]]; then + if ! [[ "$VIASH_PAR_MINIMIZER_KMER" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--minimizer_kmer' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SORT_BY_READ_NAMES" ]]; then + if ! [[ "$VIASH_PAR_SORT_BY_READ_NAMES" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--sort_by_read_names' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_NO_PG" ]]; then + if ! [[ "$VIASH_PAR_NO_PG" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--no_pg' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_OUTPUT_FORMAT" ]; then + VIASH_PAR_OUTPUT_FORMAT_CHOICES=("sam:bam:cram") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_FORMAT_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_FORMAT:" ]]; then + ViashError '--output_format' specified value of \'$VIASH_PAR_OUTPUT_FORMAT\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT_BAM" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_BAM")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_BAM")" +fi +if [ ! -z "$VIASH_PAR_OUTPUT_BAI" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_BAI")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_BAI")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT_BAM" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT_BAM")" ) + VIASH_PAR_OUTPUT_BAM=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT_BAM") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_BAM" ) +fi +if [ ! -z "$VIASH_PAR_OUTPUT_BAI" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT_BAI")" ) + VIASH_PAR_OUTPUT_BAI=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT_BAI") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_BAI" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/mapping_samtools_sort:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/mapping_samtools_sort:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/mapping_samtools_sort:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-samtools_sort-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +import tempfile +import subprocess +from pathlib import Path +import yaml + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_bam': $( if [ ! -z ${VIASH_PAR_OUTPUT_BAM+x} ]; then echo "r'${VIASH_PAR_OUTPUT_BAM//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_bai': $( if [ ! -z ${VIASH_PAR_OUTPUT_BAI+x} ]; then echo "r'${VIASH_PAR_OUTPUT_BAI//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_format': $( if [ ! -z ${VIASH_PAR_OUTPUT_FORMAT+x} ]; then echo "r'${VIASH_PAR_OUTPUT_FORMAT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'compression': $( if [ ! -z ${VIASH_PAR_COMPRESSION+x} ]; then echo "int(r'${VIASH_PAR_COMPRESSION//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'minimizer_cluster': $( if [ ! -z ${VIASH_PAR_MINIMIZER_CLUSTER+x} ]; then echo "r'${VIASH_PAR_MINIMIZER_CLUSTER//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), + 'minimizer_kmer': $( if [ ! -z ${VIASH_PAR_MINIMIZER_KMER+x} ]; then echo "int(r'${VIASH_PAR_MINIMIZER_KMER//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'sort_by_read_names': $( if [ ! -z ${VIASH_PAR_SORT_BY_READ_NAMES+x} ]; then echo "r'${VIASH_PAR_SORT_BY_READ_NAMES//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), + 'sort_by': $( if [ ! -z ${VIASH_PAR_SORT_BY+x} ]; then echo "r'${VIASH_PAR_SORT_BY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'no_pg': $( if [ ! -z ${VIASH_PAR_NO_PG+x} ]; then echo "r'${VIASH_PAR_NO_PG//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +## VIASH END + +def generate_args(par, config): + # fetch arguments from config + arguments = [ + arg + for group in config["functionality"]["argument_groups"] + for arg in group["arguments"] + ] + + cmd_args = [] + + for arg in arguments: + arg_val = par.get(arg["name"].removeprefix("--")) + # The info key is always present (changed in viash 0.7.4) + # in the parsed config (None if not specified in source config) + info = arg["info"] or {} + orig_arg = info.get("orig_arg") + if arg_val and orig_arg: + if not arg.get("multiple", False): + arg_val = [arg_val] + + if arg["type"] in ["boolean_true", "boolean_false"]: + # if argument is a boolean_true or boolean_false, simply add the flag + arg_val = [orig_arg] + elif orig_arg.startswith("-"): + # if the orig arg flag is not a positional, + # add the flag in front of each element and flatten + arg_val = [str(x) for val in arg_val for x in [orig_arg, val]] + + cmd_args.extend(arg_val) + + return cmd_args + +# read config arguments +config = yaml.safe_load(Path(meta["config"]).read_text()) + +print(">> Constructing command", flush=True) +cmd_args = [ "samtools", "sort" ] + generate_args(par, config) + +# manually process cpus parameter +if 'cpus' in meta and meta['cpus']: + cmd_args.extend(["--threads", str(meta["cpus"])]) +# add memory +if 'memory_mb' in meta and meta['memory_mb']: + import math + mem_per_thread = math.ceil(meta['memory_mb'] * .8 / meta['cpus']) + cmd_args.extend(["-m", f"{mem_per_thread}M"]) + +with tempfile.TemporaryDirectory(prefix="samtools-", dir=meta["temp_dir"]) as temp_dir: + # add tempdir + cmd_args.extend(["-T", str(temp_dir + "/")]) + + # run command + print(">> Running samtools sort with command:", flush=True) + print("+ " + ' '.join([str(x) for x in cmd_args]), flush=True) + subprocess.run(cmd_args, check=True) + +if par.get("output_bai"): + print(">> Running samtools index with command:", flush=True) + cmd_index_args = ["samtools", "index", "-b", par["output_bam"], par["output_bai"]] + print("+ " + ' '.join([str(x) for x in cmd_index_args]), flush=True) + subprocess.run(cmd_index_args, check=True) +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT_BAM" ]; then + VIASH_PAR_OUTPUT_BAM=$(ViashStripAutomount "$VIASH_PAR_OUTPUT_BAM") +fi +if [ ! -z "$VIASH_PAR_OUTPUT_BAI" ]; then + VIASH_PAR_OUTPUT_BAI=$(ViashStripAutomount "$VIASH_PAR_OUTPUT_BAI") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT_BAM" ] && [ ! -e "$VIASH_PAR_OUTPUT_BAM" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT_BAM' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_OUTPUT_BAI" ] && [ ! -e "$VIASH_PAR_OUTPUT_BAI" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT_BAI' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/mapping/star_align/.config.vsh.yaml b/target/docker/mapping/star_align/.config.vsh.yaml new file mode 100644 index 00000000000..b88e343adcd --- /dev/null +++ b/target/docker/mapping/star_align/.config.vsh.yaml @@ -0,0 +1,2535 @@ +functionality: + name: "star_align" + namespace: "mapping" + version: "0.12.4" + authors: + - name: "Angela Oliveira Pisco" + roles: + - "author" + info: + role: "Contributor" + links: + github: "aopisco" + orcid: "0000-0003-0142-2355" + linkedin: "aopisco" + organizations: + - name: "Insitro" + href: "https://insitro.com" + role: "Director of Computational Biology" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + - name: "Robrecht Cannoodt" + roles: + - "author" + - "maintainer" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + argument_groups: + - name: "Input/Output" + arguments: + - type: "file" + name: "--input" + alternatives: + - "--readFilesIn" + description: "The FASTQ files to be analyzed. Corresponds to the --readFilesIn\ + \ argument in the STAR command." + info: null + example: + - "mysample_S1_L001_R1_001.fastq.gz" + - "mysample_S1_L001_R2_001.fastq.gz" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "file" + name: "--reference" + alternatives: + - "--genomeDir" + description: "Path to the reference built by star_build_reference. Corresponds\ + \ to the --genomeDir argument in the STAR command." + info: null + example: + - "/path/to/reference" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "--outFileNamePrefix" + description: "Path to output directory. Corresponds to the --outFileNamePrefix\ + \ argument in the STAR command." + info: null + example: + - "/path/to/foo" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Run Parameters" + arguments: + - type: "integer" + name: "--runRNGseed" + description: "random number generator seed." + info: null + example: + - 777 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Genome Parameters" + arguments: + - type: "string" + name: "--genomeLoad" + description: "mode of shared memory usage for the genome files. Only used with\ + \ --runMode alignReads.\n\n- LoadAndKeep ... load genome into shared and\ + \ keep it in memory after run\n- LoadAndRemove ... load genome into shared\ + \ but remove it after run\n- LoadAndExit ... load genome into shared memory\ + \ and exit, keeping the genome in memory for future runs\n- Remove \ + \ ... do not map anything, just remove loaded genome from memory\n- NoSharedMemory\ + \ ... do not use shared memory, each job will have its own private copy of\ + \ the genome" + info: null + example: + - "NoSharedMemory" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--genomeFastaFiles" + description: "path(s) to the fasta files with the genome sequences, separated\ + \ by spaces. These files should be plain text FASTA files, they *cannot* be\ + \ zipped.\n\nRequired for the genome generation (--runMode genomeGenerate).\ + \ Can also be used in the mapping (--runMode alignReads) to add extra (new)\ + \ sequences to the genome (e.g. spike-ins)." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--genomeFileSizes" + description: "genome files exact sizes in bytes. Typically, this should not\ + \ be defined by the user." + info: null + example: + - 0 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--genomeTransformOutput" + description: "which output to transform back to original genome\n\n- SAM \ + \ ... SAM/BAM alignments\n- SJ ... splice junctions (SJ.out.tab)\n-\ + \ None ... no transformation of the output" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--genomeChrSetMitochondrial" + description: "names of the mitochondrial chromosomes. Presently only used for\ + \ STARsolo statistics output/" + info: null + example: + - "chrM" + - "M" + - "MT" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - name: "Splice Junctions Database" + arguments: + - type: "string" + name: "--sjdbFileChrStartEnd" + description: "path to the files with genomic coordinates (chr start \ + \ end strand) for the splice junction introns. Multiple files can be\ + \ supplied and will be concatenated." + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "file" + name: "--sjdbGTFfile" + description: "path to the GTF file with annotations" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--sjdbGTFchrPrefix" + description: "prefix for chromosome names in a GTF file (e.g. 'chr' for using\ + \ ENSMEBL annotations with UCSC genomes)" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--sjdbGTFfeatureExon" + description: "feature type in GTF file to be used as exons for building transcripts" + info: null + example: + - "exon" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--sjdbGTFtagExonParentTranscript" + description: "GTF attribute name for parent transcript ID (default \"transcript_id\"\ + \ works for GTF files)" + info: null + example: + - "transcript_id" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--sjdbGTFtagExonParentGene" + description: "GTF attribute name for parent gene ID (default \"gene_id\" works\ + \ for GTF files)" + info: null + example: + - "gene_id" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--sjdbGTFtagExonParentGeneName" + description: "GTF attribute name for parent gene name" + info: null + example: + - "gene_name" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--sjdbGTFtagExonParentGeneType" + description: "GTF attribute name for parent gene type" + info: null + example: + - "gene_type" + - "gene_biotype" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--sjdbOverhang" + description: "length of the donor/acceptor sequence on each side of the junctions,\ + \ ideally = (mate_length - 1)" + info: null + example: + - 100 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--sjdbScore" + description: "extra alignment score for alignments that cross database junctions" + info: null + example: + - 2 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--sjdbInsertSave" + description: "which files to save when sjdb junctions are inserted on the fly\ + \ at the mapping step\n\n- Basic ... only small junction / transcript files\n\ + - All ... all files including big Genome, SA and SAindex - this will create\ + \ a complete genome directory" + info: null + example: + - "Basic" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Variation parameters" + arguments: + - type: "string" + name: "--varVCFfile" + description: "path to the VCF file that contains variation data. The 10th column\ + \ should contain the genotype information, e.g. 0/1" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Read Parameters" + arguments: + - type: "string" + name: "--readFilesType" + description: "format of input read files\n\n- Fastx ... FASTA or FASTQ\n\ + - SAM SE ... SAM or BAM single-end reads; for BAM use --readFilesCommand\ + \ samtools view\n- SAM PE ... SAM or BAM paired-end reads; for BAM use\ + \ --readFilesCommand samtools view" + info: null + example: + - "Fastx" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--readFilesSAMattrKeep" + description: "for --readFilesType SAM SE/PE, which SAM tags to keep in the output\ + \ BAM, e.g.: --readFilesSAMtagsKeep RG PL\n\n- All ... keep all tags\n\ + - None ... do not keep any tags" + info: null + example: + - "All" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "file" + name: "--readFilesManifest" + description: "path to the \"manifest\" file with the names of read files. The\ + \ manifest file should contain 3 tab-separated columns:\n\npaired-end reads:\ + \ read1_file_name $tab$ read2_file_name $tab$ read_group_line.\nsingle-end\ + \ reads: read1_file_name $tab$ - $tab$ read_group_line.\nSpaces,\ + \ but not tabs are allowed in file names.\nIf read_group_line does not start\ + \ with ID:, it can only contain one ID field, and ID: will be added to it.\n\ + If read_group_line starts with ID:, it can contain several fields separated\ + \ by $tab$, and all fields will be be copied verbatim into SAM @RG header\ + \ line." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--readFilesPrefix" + description: "prefix for the read files names, i.e. it will be added in front\ + \ of the strings in --readFilesIn" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--readFilesCommand" + description: "command line to execute for each of the input file. This command\ + \ should generate FASTA or FASTQ text and send it to stdout\n\nFor example:\ + \ zcat - to uncompress .gz files, bzcat - to uncompress .bz2 files, etc." + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--readMapNumber" + description: "number of reads to map from the beginning of the file\n\n-1: map\ + \ all reads" + info: null + example: + - -1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--readMatesLengthsIn" + description: "Equal/NotEqual - lengths of names,sequences,qualities for both\ + \ mates are the same / not the same. NotEqual is safe in all situations." + info: null + example: + - "NotEqual" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--readNameSeparator" + description: "character(s) separating the part of the read names that will be\ + \ trimmed in output (read name after space is always trimmed)" + info: null + example: + - "/" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--readQualityScoreBase" + description: "number to be subtracted from the ASCII code to get Phred quality\ + \ score" + info: null + example: + - 33 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Read Clipping" + arguments: + - type: "string" + name: "--clipAdapterType" + description: "adapter clipping type\n\n- Hamming ... adapter clipping based\ + \ on Hamming distance, with the number of mismatches controlled by --clip5pAdapterMMp\n\ + - CellRanger4 ... 5p and 3p adapter clipping similar to CellRanger4. Utilizes\ + \ Opal package by Martin Sosic: https://github.com/Martinsos/opal\n- None\ + \ ... no adapter clipping, all other clip* parameters are disregarded" + info: null + example: + - "Hamming" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--clip3pNbases" + description: "number(s) of bases to clip from 3p of each mate. If one value\ + \ is given, it will be assumed the same for both mates." + info: null + example: + - 0 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--clip3pAdapterSeq" + description: "adapter sequences to clip from 3p of each mate. If one value\ + \ is given, it will be assumed the same for both mates.\n\n- polyA ... polyA\ + \ sequence with the length equal to read length" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "double" + name: "--clip3pAdapterMMp" + description: "max proportion of mismatches for 3p adapter clipping for each\ + \ mate. If one value is given, it will be assumed the same for both mates." + info: null + example: + - 0.1 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--clip3pAfterAdapterNbases" + description: "number of bases to clip from 3p of each mate after the adapter\ + \ clipping. If one value is given, it will be assumed the same for both mates." + info: null + example: + - 0 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--clip5pNbases" + description: "number(s) of bases to clip from 5p of each mate. If one value\ + \ is given, it will be assumed the same for both mates." + info: null + example: + - 0 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - name: "Limits" + arguments: + - type: "long" + name: "--limitGenomeGenerateRAM" + description: "maximum available RAM (bytes) for genome generation" + info: null + example: + - 31000000000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "long" + name: "--limitIObufferSize" + description: "max available buffers size (bytes) for input/output, per thread" + info: null + example: + - 30000000 + - 50000000 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "long" + name: "--limitOutSAMoneReadBytes" + description: "max size of the SAM record (bytes) for one read. Recommended value:\ + \ >(2*(LengthMate1+LengthMate2+100)*outFilterMultimapNmax" + info: null + example: + - 100000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--limitOutSJoneRead" + description: "max number of junctions for one read (including all multi-mappers)" + info: null + example: + - 1000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--limitOutSJcollapsed" + description: "max number of collapsed junctions" + info: null + example: + - 1000000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "long" + name: "--limitBAMsortRAM" + description: "maximum available RAM (bytes) for sorting BAM. If =0, it will\ + \ be set to the genome index size. 0 value can only be used with --genomeLoad\ + \ NoSharedMemory option." + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--limitSjdbInsertNsj" + description: "maximum number of junctions to be inserted to the genome on the\ + \ fly at the mapping stage, including those from annotations and those detected\ + \ in the 1st step of the 2-pass run" + info: null + example: + - 1000000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--limitNreadsSoft" + description: "soft limit on the number of reads" + info: null + example: + - -1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Output: general" + arguments: + - type: "string" + name: "--outTmpKeep" + description: "whether to keep the temporary files after STAR runs is finished\n\ + \n- None ... remove all temporary files\n- All ... keep all files" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outStd" + description: "which output will be directed to stdout (standard out)\n\n- Log\ + \ ... log messages\n- SAM ... alignments\ + \ in SAM format (which normally are output to Aligned.out.sam file), normal\ + \ standard output will go into Log.std.out\n- BAM_Unsorted ... alignments\ + \ in BAM format, unsorted. Requires --outSAMtype BAM Unsorted\n- BAM_SortedByCoordinate\ + \ ... alignments in BAM format, sorted by coordinate. Requires --outSAMtype\ + \ BAM SortedByCoordinate\n- BAM_Quant ... alignments to transcriptome\ + \ in BAM format, unsorted. Requires --quantMode TranscriptomeSAM" + info: null + example: + - "Log" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outReadsUnmapped" + description: "output of unmapped and partially mapped (i.e. mapped only one\ + \ mate of a paired end read) reads in separate file(s).\n\n- None ... no\ + \ output\n- Fastx ... output in separate fasta/fastq files, Unmapped.out.mate1/2" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outQSconversionAdd" + description: "add this number to the quality score (e.g. to convert from Illumina\ + \ to Sanger, use -31)" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outMultimapperOrder" + description: "order of multimapping alignments in the output files\n\n- Old_2.4\ + \ ... quasi-random order used before 2.5.0\n- Random \ + \ ... random order of alignments for each multi-mapper. Read mates (pairs)\ + \ are always adjacent, all alignment for each read stay together. This option\ + \ will become default in the future releases." + info: null + example: + - "Old_2.4" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Output: SAM and BAM" + arguments: + - type: "string" + name: "--outSAMtype" + description: "type of SAM/BAM output\n\n1st word:\n- BAM ... output BAM without\ + \ sorting\n- SAM ... output SAM without sorting\n- None ... no SAM/BAM output\n\ + 2nd, 3rd:\n- Unsorted ... standard unsorted\n- SortedByCoordinate\ + \ ... sorted by coordinate. This option will allocate extra memory for sorting\ + \ which can be specified by --limitBAMsortRAM." + info: null + example: + - "SAM" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--outSAMmode" + description: "mode of SAM output\n\n- None ... no SAM output\n- Full ... full\ + \ SAM output\n- NoQS ... full SAM but without quality scores" + info: null + example: + - "Full" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outSAMstrandField" + description: "Cufflinks-like strand field flag\n\n- None ... not used\n\ + - intronMotif ... strand derived from the intron motif. This option changes\ + \ the output alignments: reads with inconsistent and/or non-canonical introns\ + \ are filtered out." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outSAMattributes" + description: "a string of desired SAM attributes, in the order desired for the\ + \ output SAM. Tags can be listed in any combination/order.\n\n***Presets:\n\ + - None ... no attributes\n- Standard ... NH HI AS nM\n- All \ + \ ... NH HI AS nM NM MD jM jI MC ch\n***Alignment:\n- NH ...\ + \ number of loci the reads maps to: =1 for unique mappers, >1 for multimappers.\ + \ Standard SAM tag.\n- HI ... multiple alignment index, starts with\ + \ --outSAMattrIHstart (=1 by default). Standard SAM tag.\n- AS ...\ + \ local alignment score, +1/-1 for matches/mismateches, score* penalties for\ + \ indels and gaps. For PE reads, total score for two mates. Stadnard SAM tag.\n\ + - nM ... number of mismatches. For PE reads, sum over two mates.\n\ + - NM ... edit distance to the reference (number of mismatched + inserted\ + \ + deleted bases) for each mate. Standard SAM tag.\n- MD ... string\ + \ encoding mismatched and deleted reference bases (see standard SAM specifications).\ + \ Standard SAM tag.\n- jM ... intron motifs for all junctions (i.e.\ + \ N in CIGAR): 0: non-canonical; 1: GT/AG, 2: CT/AC, 3: GC/AG, 4: CT/GC, 5:\ + \ AT/AC, 6: GT/AT. If splice junctions database is used, and a junction is\ + \ annotated, 20 is added to its motif value.\n- jI ... start and\ + \ end of introns for all junctions (1-based).\n- XS ... alignment\ + \ strand according to --outSAMstrandField.\n- MC ... mate's CIGAR\ + \ string. Standard SAM tag.\n- ch ... marks all segment of all chimeric\ + \ alingments for --chimOutType WithinBAM output.\n- cN ... number\ + \ of bases clipped from the read ends: 5' and 3'\n***Variation:\n- vA \ + \ ... variant allele\n- vG ... genomic coordinate of the variant\ + \ overlapped by the read.\n- vW ... 1 - alignment passes WASP filtering;\ + \ 2,3,4,5,6,7 - alignment does not pass WASP filtering. Requires --waspOutputMode\ + \ SAMtag.\n***STARsolo:\n- CR CY UR UY ... sequences and quality scores of\ + \ cell barcodes and UMIs for the solo* demultiplexing.\n- GX GN ...\ + \ gene ID and gene name for unique-gene reads.\n- gx gn ... gene IDs\ + \ and gene names for unique- and multi-gene reads.\n- CB UB ... error-corrected\ + \ cell barcodes and UMIs for solo* demultiplexing. Requires --outSAMtype BAM\ + \ SortedByCoordinate.\n- sM ... assessment of CB and UMI.\n- sS \ + \ ... sequence of the entire barcode (CB,UMI,adapter).\n- sQ \ + \ ... quality of the entire barcode.\n***Unsupported/undocumented:\n-\ + \ ha ... haplotype (1/2) when mapping to the diploid genome. Requires\ + \ genome generated with --genomeTransformType Diploid .\n- rB ...\ + \ alignment block read/genomic coordinates.\n- vR ... read coordinate\ + \ of the variant." + info: null + example: + - "Standard" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--outSAMattrIHstart" + description: "start value for the IH attribute. 0 may be required by some downstream\ + \ software, such as Cufflinks or StringTie." + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outSAMunmapped" + description: "output of unmapped reads in the SAM format\n\n1st word:\n- None\ + \ ... no output\n- Within ... output unmapped reads within the main SAM\ + \ file (i.e. Aligned.out.sam)\n2nd word:\n- KeepPairs ... record unmapped\ + \ mate for each alignment, and, in case of unsorted output, keep it adjacent\ + \ to its mapped mate. Only affects multi-mapping reads." + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--outSAMorder" + description: "type of sorting for the SAM output\n\nPaired: one mate after the\ + \ other for all paired alignments\nPairedKeepInputOrder: one mate after the\ + \ other for all paired alignments, the order is kept the same as in the input\ + \ FASTQ files" + info: null + example: + - "Paired" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outSAMprimaryFlag" + description: "which alignments are considered primary - all others will be marked\ + \ with 0x100 bit in the FLAG\n\n- OneBestScore ... only one alignment with\ + \ the best score is primary\n- AllBestScore ... all alignments with the best\ + \ score are primary" + info: null + example: + - "OneBestScore" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outSAMreadID" + description: "read ID record type\n\n- Standard ... first word (until space)\ + \ from the FASTx read ID line, removing /1,/2 from the end\n- Number ...\ + \ read number (index) in the FASTx file" + info: null + example: + - "Standard" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outSAMmapqUnique" + description: "0 to 255: the MAPQ value for unique mappers" + info: null + example: + - 255 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outSAMflagOR" + description: "0 to 65535: sam FLAG will be bitwise OR'd with this value, i.e.\ + \ FLAG=FLAG | outSAMflagOR. This is applied after all flags have been set\ + \ by STAR, and after outSAMflagAND. Can be used to set specific bits that\ + \ are not set otherwise." + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outSAMflagAND" + description: "0 to 65535: sam FLAG will be bitwise AND'd with this value, i.e.\ + \ FLAG=FLAG & outSAMflagOR. This is applied after all flags have been set\ + \ by STAR, but before outSAMflagOR. Can be used to unset specific bits that\ + \ are not set otherwise." + info: null + example: + - 65535 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outSAMattrRGline" + description: "SAM/BAM read group line. The first word contains the read group\ + \ identifier and must start with \"ID:\", e.g. --outSAMattrRGline ID:xxx CN:yy\ + \ \"DS:z z z\".\n\nxxx will be added as RG tag to each output alignment. Any\ + \ spaces in the tag values have to be double quoted.\nComma separated RG lines\ + \ correspons to different (comma separated) input files in --readFilesIn.\ + \ Commas have to be surrounded by spaces, e.g.\n--outSAMattrRGline ID:xxx\ + \ , ID:zzz \"DS:z z\" , ID:yyy DS:yyyy" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--outSAMheaderHD" + description: "@HD (header) line of the SAM header" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--outSAMheaderPG" + description: "extra @PG (software) line of the SAM header (in addition to STAR)" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--outSAMheaderCommentFile" + description: "path to the file with @CO (comment) lines of the SAM header" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outSAMfilter" + description: "filter the output into main SAM/BAM files\n\n- KeepOnlyAddedReferences\ + \ ... only keep the reads for which all alignments are to the extra reference\ + \ sequences added with --genomeFastaFiles at the mapping stage.\n- KeepAllAddedReferences\ + \ ... keep all alignments to the extra reference sequences added with --genomeFastaFiles\ + \ at the mapping stage." + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--outSAMmultNmax" + description: "max number of multiple alignments for a read that will be output\ + \ to the SAM/BAM files. Note that if this value is not equal to -1, the top\ + \ scoring alignment will be output first\n\n- -1 ... all alignments (up to\ + \ --outFilterMultimapNmax) will be output" + info: null + example: + - -1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outSAMtlen" + description: "calculation method for the TLEN field in the SAM/BAM files\n\n\ + - 1 ... leftmost base of the (+)strand mate to rightmost base of the (-)mate.\ + \ (+)sign for the (+)strand mate\n- 2 ... leftmost base of any mate to rightmost\ + \ base of any mate. (+)sign for the mate with the leftmost base. This is different\ + \ from 1 for overlapping mates with protruding ends" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outBAMcompression" + description: "-1 to 10 BAM compression level, -1=default compression (6?),\ + \ 0=no compression, 10=maximum compression" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outBAMsortingThreadN" + description: ">=0: number of threads for BAM sorting. 0 will default to min(6,--runThreadN)." + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outBAMsortingBinsN" + description: ">0: number of genome bins for coordinate-sorting" + info: null + example: + - 50 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "BAM processing" + arguments: + - type: "string" + name: "--bamRemoveDuplicatesType" + description: "mark duplicates in the BAM file, for now only works with (i) sorted\ + \ BAM fed with inputBAMfile, and (ii) for paired-end alignments only\n\n-\ + \ - ... no duplicate removal/marking\n- UniqueIdentical\ + \ ... mark all multimappers, and duplicate unique mappers. The coordinates,\ + \ FLAG, CIGAR must be identical\n- UniqueIdenticalNotMulti ... mark duplicate\ + \ unique mappers but not multimappers." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--bamRemoveDuplicatesMate2basesN" + description: "number of bases from the 5' of mate 2 to use in collapsing (e.g.\ + \ for RAMPAGE)" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Output Wiggle" + arguments: + - type: "string" + name: "--outWigType" + description: "type of signal output, e.g. \"bedGraph\" OR \"bedGraph read1_5p\"\ + . Requires sorted BAM: --outSAMtype BAM SortedByCoordinate .\n\n1st word:\n\ + - None ... no signal output\n- bedGraph ... bedGraph format\n- wiggle\ + \ ... wiggle format\n2nd word:\n- read1_5p ... signal from only 5' of\ + \ the 1st read, useful for CAGE/RAMPAGE etc\n- read2 ... signal from\ + \ only 2nd read" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--outWigStrand" + description: "strandedness of wiggle/bedGraph output\n\n- Stranded ... separate\ + \ strands, str1 and str2\n- Unstranded ... collapsed strands" + info: null + example: + - "Stranded" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outWigReferencesPrefix" + description: "prefix matching reference names to include in the output wiggle\ + \ file, e.g. \"chr\", default \"-\" - include all references" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outWigNorm" + description: "type of normalization for the signal\n\n- RPM ... reads per\ + \ million of mapped reads\n- None ... no normalization, \"raw\" counts" + info: null + example: + - "RPM" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Output Filtering" + arguments: + - type: "string" + name: "--outFilterType" + description: "type of filtering\n\n- Normal ... standard filtering using only\ + \ current alignment\n- BySJout ... keep only those reads that contain junctions\ + \ that passed filtering into SJ.out.tab" + info: null + example: + - "Normal" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outFilterMultimapScoreRange" + description: "the score range below the maximum score for multimapping alignments" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outFilterMultimapNmax" + description: "maximum number of loci the read is allowed to map to. Alignments\ + \ (all of them) will be output only if the read maps to no more loci than\ + \ this value.\n\nOtherwise no alignments will be output, and the read will\ + \ be counted as \"mapped to too many loci\" in the Log.final.out ." + info: null + example: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outFilterMismatchNmax" + description: "alignment will be output only if it has no more mismatches than\ + \ this value." + info: null + example: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--outFilterMismatchNoverLmax" + description: "alignment will be output only if its ratio of mismatches to *mapped*\ + \ length is less than or equal to this value." + info: null + example: + - 0.3 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--outFilterMismatchNoverReadLmax" + description: "alignment will be output only if its ratio of mismatches to *read*\ + \ length is less than or equal to this value." + info: null + example: + - 1.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outFilterScoreMin" + description: "alignment will be output only if its score is higher than or equal\ + \ to this value." + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--outFilterScoreMinOverLread" + description: "same as outFilterScoreMin, but normalized to read length (sum\ + \ of mates' lengths for paired-end reads)" + info: null + example: + - 0.66 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outFilterMatchNmin" + description: "alignment will be output only if the number of matched bases is\ + \ higher than or equal to this value." + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--outFilterMatchNminOverLread" + description: "sam as outFilterMatchNmin, but normalized to the read length (sum\ + \ of mates' lengths for paired-end reads)." + info: null + example: + - 0.66 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outFilterIntronMotifs" + description: "filter alignment using their motifs\n\n- None \ + \ ... no filtering\n- RemoveNoncanonical ... filter\ + \ out alignments that contain non-canonical junctions\n- RemoveNoncanonicalUnannotated\ + \ ... filter out alignments that contain non-canonical unannotated junctions\ + \ when using annotated splice junctions database. The annotated non-canonical\ + \ junctions will be kept." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outFilterIntronStrands" + description: "filter alignments\n\n- RemoveInconsistentStrands ... remove\ + \ alignments that have junctions with inconsistent strands\n- None \ + \ ... no filtering" + info: null + example: + - "RemoveInconsistentStrands" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Output splice junctions (SJ.out.tab)" + arguments: + - type: "string" + name: "--outSJtype" + description: "type of splice junction output\n\n- Standard ... standard SJ.out.tab\ + \ output\n- None ... no splice junction output" + info: null + example: + - "Standard" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Output Filtering: Splice Junctions" + arguments: + - type: "string" + name: "--outSJfilterReads" + description: "which reads to consider for collapsed splice junctions output\n\ + \n- All ... all reads, unique- and multi-mappers\n- Unique ... uniquely\ + \ mapping reads only" + info: null + example: + - "All" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outSJfilterOverhangMin" + description: "minimum overhang length for splice junctions on both sides for:\ + \ (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC\ + \ motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\n\ + does not apply to annotated junctions" + info: null + example: + - 30 + - 12 + - 12 + - 12 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--outSJfilterCountUniqueMin" + description: "minimum uniquely mapping read count per junction for: (1) non-canonical\ + \ motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC\ + \ and GT/AT motif. -1 means no output for that motif\n\nJunctions are output\ + \ if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin conditions\ + \ are satisfied\ndoes not apply to annotated junctions" + info: null + example: + - 3 + - 1 + - 1 + - 1 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--outSJfilterCountTotalMin" + description: "minimum total (multi-mapping+unique) read count per junction for:\ + \ (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC\ + \ motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\n\ + Junctions are output if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin\ + \ conditions are satisfied\ndoes not apply to annotated junctions" + info: null + example: + - 3 + - 1 + - 1 + - 1 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--outSJfilterDistToOtherSJmin" + description: "minimum allowed distance to other junctions' donor/acceptor\n\n\ + does not apply to annotated junctions" + info: null + example: + - 10 + - 0 + - 5 + - 10 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--outSJfilterIntronMaxVsReadN" + description: "maximum gap allowed for junctions supported by 1,2,3,,,N reads\n\ + \ni.e. by default junctions supported by 1 read can have gaps <=50000b, by\ + \ 2 reads: <=100000b, by 3 reads: <=200000. by >=4 reads any gap <=alignIntronMax\n\ + does not apply to annotated junctions" + info: null + example: + - 50000 + - 100000 + - 200000 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - name: "Scoring" + arguments: + - type: "integer" + name: "--scoreGap" + description: "splice junction penalty (independent on intron motif)" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--scoreGapNoncan" + description: "non-canonical junction penalty (in addition to scoreGap)" + info: null + example: + - -8 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--scoreGapGCAG" + description: "GC/AG and CT/GC junction penalty (in addition to scoreGap)" + info: null + example: + - -4 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--scoreGapATAC" + description: "AT/AC and GT/AT junction penalty (in addition to scoreGap)" + info: null + example: + - -8 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--scoreGenomicLengthLog2scale" + description: "extra score logarithmically scaled with genomic length of the\ + \ alignment: scoreGenomicLengthLog2scale*log2(genomicLength)" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--scoreDelOpen" + description: "deletion open penalty" + info: null + example: + - -2 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--scoreDelBase" + description: "deletion extension penalty per base (in addition to scoreDelOpen)" + info: null + example: + - -2 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--scoreInsOpen" + description: "insertion open penalty" + info: null + example: + - -2 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--scoreInsBase" + description: "insertion extension penalty per base (in addition to scoreInsOpen)" + info: null + example: + - -2 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--scoreStitchSJshift" + description: "maximum score reduction while searching for SJ boundaries in the\ + \ stitching step" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Alignments and Seeding" + arguments: + - type: "integer" + name: "--seedSearchStartLmax" + description: "defines the search start point through the read - the read is\ + \ split into pieces no longer than this value" + info: null + example: + - 50 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--seedSearchStartLmaxOverLread" + description: "seedSearchStartLmax normalized to read length (sum of mates' lengths\ + \ for paired-end reads)" + info: null + example: + - 1.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--seedSearchLmax" + description: "defines the maximum length of the seeds, if =0 seed length is\ + \ not limited" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--seedMultimapNmax" + description: "only pieces that map fewer than this value are utilized in the\ + \ stitching procedure" + info: null + example: + - 10000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--seedPerReadNmax" + description: "max number of seeds per read" + info: null + example: + - 1000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--seedPerWindowNmax" + description: "max number of seeds per window" + info: null + example: + - 50 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--seedNoneLociPerWindow" + description: "max number of one seed loci per window" + info: null + example: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--seedSplitMin" + description: "min length of the seed sequences split by Ns or mate gap" + info: null + example: + - 12 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--seedMapMin" + description: "min length of seeds to be mapped" + info: null + example: + - 5 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--alignIntronMin" + description: "minimum intron size, genomic gap is considered intron if its length>=alignIntronMin,\ + \ otherwise it is considered Deletion" + info: null + example: + - 21 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--alignIntronMax" + description: "maximum intron size, if 0, max intron size will be determined\ + \ by (2^winBinNbits)*winAnchorDistNbins" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--alignMatesGapMax" + description: "maximum gap between two mates, if 0, max intron gap will be determined\ + \ by (2^winBinNbits)*winAnchorDistNbins" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--alignSJoverhangMin" + description: "minimum overhang (i.e. block size) for spliced alignments" + info: null + example: + - 5 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--alignSJstitchMismatchNmax" + description: "maximum number of mismatches for stitching of the splice junctions\ + \ (-1: no limit).\n\n(1) non-canonical motifs, (2) GT/AG and CT/AC motif,\ + \ (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif." + info: null + example: + - 0 + - -1 + - 0 + - 0 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--alignSJDBoverhangMin" + description: "minimum overhang (i.e. block size) for annotated (sjdb) spliced\ + \ alignments" + info: null + example: + - 3 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--alignSplicedMateMapLmin" + description: "minimum mapped length for a read mate that is spliced" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--alignSplicedMateMapLminOverLmate" + description: "alignSplicedMateMapLmin normalized to mate length" + info: null + example: + - 0.66 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--alignWindowsPerReadNmax" + description: "max number of windows per read" + info: null + example: + - 10000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--alignTranscriptsPerWindowNmax" + description: "max number of transcripts per window" + info: null + example: + - 100 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--alignTranscriptsPerReadNmax" + description: "max number of different alignments per read to consider" + info: null + example: + - 10000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--alignEndsType" + description: "type of read ends alignment\n\n- Local ... standard\ + \ local alignment with soft-clipping allowed\n- EndToEnd ... force\ + \ end-to-end read alignment, do not soft-clip\n- Extend5pOfRead1 ... fully\ + \ extend only the 5p of the read1, all other ends: local alignment\n- Extend5pOfReads12\ + \ ... fully extend only the 5p of the both read1 and read2, all other ends:\ + \ local alignment" + info: null + example: + - "Local" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--alignEndsProtrude" + description: "allow protrusion of alignment ends, i.e. start (end) of the +strand\ + \ mate downstream of the start (end) of the -strand mate\n\n1st word: int:\ + \ maximum number of protrusion bases allowed\n2nd word: string:\n- \ + \ ConcordantPair ... report alignments with non-zero protrusion\ + \ as concordant pairs\n- DiscordantPair ... report alignments\ + \ with non-zero protrusion as discordant pairs" + info: null + example: + - "0 ConcordantPair" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--alignSoftClipAtReferenceEnds" + description: "allow the soft-clipping of the alignments past the end of the\ + \ chromosomes\n\n- Yes ... allow\n- No ... prohibit, useful for compatibility\ + \ with Cufflinks" + info: null + example: + - "Yes" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--alignInsertionFlush" + description: "how to flush ambiguous insertion positions\n\n- None ... insertions\ + \ are not flushed\n- Right ... insertions are flushed to the right" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Paired-End reads" + arguments: + - type: "integer" + name: "--peOverlapNbasesMin" + description: "minimum number of overlapping bases to trigger mates merging and\ + \ realignment. Specify >0 value to switch on the \"merginf of overlapping\ + \ mates\" algorithm." + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--peOverlapMMp" + description: "maximum proportion of mismatched bases in the overlap area" + info: null + example: + - 0.01 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Windows, Anchors, Binning" + arguments: + - type: "integer" + name: "--winAnchorMultimapNmax" + description: "max number of loci anchors are allowed to map to" + info: null + example: + - 50 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--winBinNbits" + description: "=log2(winBin), where winBin is the size of the bin for the windows/clustering,\ + \ each window will occupy an integer number of bins." + info: null + example: + - 16 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--winAnchorDistNbins" + description: "max number of bins between two anchors that allows aggregation\ + \ of anchors into one window" + info: null + example: + - 9 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--winFlankNbins" + description: "log2(winFlank), where win Flank is the size of the left and right\ + \ flanking regions for each window" + info: null + example: + - 4 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--winReadCoverageRelativeMin" + description: "minimum relative coverage of the read sequence by the seeds in\ + \ a window, for STARlong algorithm only." + info: null + example: + - 0.5 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--winReadCoverageBasesMin" + description: "minimum number of bases covered by the seeds in a window , for\ + \ STARlong algorithm only." + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Chimeric Alignments" + arguments: + - type: "string" + name: "--chimOutType" + description: "type of chimeric output\n\n- Junctions ... Chimeric.out.junction\n\ + - SeparateSAMold ... output old SAM into separate Chimeric.out.sam file\n\ + - WithinBAM ... output into main aligned BAM files (Aligned.*.bam)\n\ + - WithinBAM HardClip ... (default) hard-clipping in the CIGAR for supplemental\ + \ chimeric alignments (default if no 2nd word is present)\n- WithinBAM SoftClip\ + \ ... soft-clipping in the CIGAR for supplemental chimeric alignments" + info: null + example: + - "Junctions" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--chimSegmentMin" + description: "minimum length of chimeric segment length, if ==0, no chimeric\ + \ output" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimScoreMin" + description: "minimum total (summed) score of the chimeric segments" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimScoreDropMax" + description: "max drop (difference) of chimeric score (the sum of scores of\ + \ all chimeric segments) from the read length" + info: null + example: + - 20 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimScoreSeparation" + description: "minimum difference (separation) between the best chimeric score\ + \ and the next one" + info: null + example: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimScoreJunctionNonGTAG" + description: "penalty for a non-GT/AG chimeric junction" + info: null + example: + - -1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimJunctionOverhangMin" + description: "minimum overhang for a chimeric junction" + info: null + example: + - 20 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimSegmentReadGapMax" + description: "maximum gap in the read sequence between chimeric segments" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--chimFilter" + description: "different filters for chimeric alignments\n\n- None ... no filtering\n\ + - banGenomicN ... Ns are not allowed in the genome sequence around the chimeric\ + \ junction" + info: null + example: + - "banGenomicN" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--chimMainSegmentMultNmax" + description: "maximum number of multi-alignments for the main chimeric segment.\ + \ =1 will prohibit multimapping main segments." + info: null + example: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimMultimapNmax" + description: "maximum number of chimeric multi-alignments\n\n- 0 ... use the\ + \ old scheme for chimeric detection which only considered unique alignments" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimMultimapScoreRange" + description: "the score range for multi-mapping chimeras below the best chimeric\ + \ score. Only works with --chimMultimapNmax > 1" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimNonchimScoreDropMin" + description: "to trigger chimeric detection, the drop in the best non-chimeric\ + \ alignment score with respect to the read length has to be greater than this\ + \ value" + info: null + example: + - 20 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimOutJunctionFormat" + description: "formatting type for the Chimeric.out.junction file\n\n- 0 ...\ + \ no comment lines/headers\n- 1 ... comment lines at the end of the file:\ + \ command line and Nreads: total, unique/multi-mapping" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Quantification of Annotations" + arguments: + - type: "string" + name: "--quantMode" + description: "types of quantification requested\n\n- - ... none\n\ + - TranscriptomeSAM ... output SAM/BAM alignments to transcriptome into a separate\ + \ file\n- GeneCounts ... count reads per gene" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--quantTranscriptomeBAMcompression" + description: "-2 to 10 transcriptome BAM compression level\n\n- -2 ... no\ + \ BAM output\n- -1 ... default compression (6?)\n- 0 ... no compression\n\ + - 10 ... maximum compression" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--quantTranscriptomeBan" + description: "prohibit various alignment type\n\n- IndelSoftclipSingleend ...\ + \ prohibit indels, soft clipping and single-end alignments - compatible with\ + \ RSEM\n- Singleend ... prohibit single-end alignments" + info: null + example: + - "IndelSoftclipSingleend" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "2-pass Mapping" + arguments: + - type: "string" + name: "--twopassMode" + description: "2-pass mapping mode.\n\n- None ... 1-pass mapping\n- Basic\ + \ ... basic 2-pass mapping, with all 1st pass junctions inserted into\ + \ the genome indices on the fly" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--twopass1readsN" + description: "number of reads to process for the 1st step. Use very large number\ + \ (or default -1) to map all reads in the first step." + info: null + example: + - -1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "WASP parameters" + arguments: + - type: "string" + name: "--waspOutputMode" + description: "WASP allele-specific output type. This is re-implementation of\ + \ the original WASP mappability filtering by Bryce van de Geijn, Graham McVicker,\ + \ Yoav Gilad & Jonathan K Pritchard. Please cite the original WASP paper:\ + \ Nature Methods 12, 1061-1063 (2015), https://www.nature.com/articles/nmeth.3582\ + \ .\n\n- SAMtag ... add WASP tags to the alignments that pass WASP filtering" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "STARsolo (single cell RNA-seq) parameters" + arguments: + - type: "string" + name: "--soloType" + description: "type of single-cell RNA-seq\n\n- CB_UMI_Simple ... (a.k.a. Droplet)\ + \ one UMI and one Cell Barcode of fixed length in read2, e.g. Drop-seq and\ + \ 10X Chromium.\n- CB_UMI_Complex ... multiple Cell Barcodes of varying length,\ + \ one UMI of fixed length and one adapter sequence of fixed length are allowed\ + \ in read2 only (e.g. inDrop, ddSeq).\n- CB_samTagOut ... output Cell Barcode\ + \ as CR and/or CB SAm tag. No UMI counting. --readFilesIn cDNA_read1 [cDNA_read2\ + \ if paired-end] CellBarcode_read . Requires --outSAMtype BAM Unsorted [and/or\ + \ SortedByCoordinate]\n- SmartSeq ... Smart-seq: each cell in a separate\ + \ FASTQ (paired- or single-end), barcodes are corresponding read-groups, no\ + \ UMI sequences, alignments deduplicated according to alignment start and\ + \ end (after extending soft-clipped bases)" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloCBwhitelist" + description: "file(s) with whitelist(s) of cell barcodes. Only --soloType CB_UMI_Complex\ + \ allows more than one whitelist file.\n\n- None ... no whitelist:\ + \ all cell barcodes are allowed" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--soloCBstart" + description: "cell barcode start base" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--soloCBlen" + description: "cell barcode length" + info: null + example: + - 16 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--soloUMIstart" + description: "UMI start base" + info: null + example: + - 17 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--soloUMIlen" + description: "UMI length" + info: null + example: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--soloBarcodeReadLength" + description: "length of the barcode read\n\n- 1 ... equal to sum of soloCBlen+soloUMIlen\n\ + - 0 ... not defined, do not check" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--soloBarcodeMate" + description: "identifies which read mate contains the barcode (CB+UMI) sequence\n\ + \n- 0 ... barcode sequence is on separate read, which should always be the\ + \ last file in the --readFilesIn listed\n- 1 ... barcode sequence is a part\ + \ of mate 1\n- 2 ... barcode sequence is a part of mate 2" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--soloCBposition" + description: "position of Cell Barcode(s) on the barcode read.\n\nPresently\ + \ only works with --soloType CB_UMI_Complex, and barcodes are assumed to be\ + \ on Read2.\nFormat for each barcode: startAnchor_startPosition_endAnchor_endPosition\n\ + start(end)Anchor defines the Anchor Base for the CB: 0: read start; 1: read\ + \ end; 2: adapter start; 3: adapter end\nstart(end)Position is the 0-based\ + \ position with of the CB start(end) with respect to the Anchor Base\nString\ + \ for different barcodes are separated by space.\nExample: inDrop (Zilionis\ + \ et al, Nat. Protocols, 2017):\n--soloCBposition 0_0_2_-1 3_1_3_8" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloUMIposition" + description: "position of the UMI on the barcode read, same as soloCBposition\n\ + \nExample: inDrop (Zilionis et al, Nat. Protocols, 2017):\n--soloCBposition\ + \ 3_9_3_14" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--soloAdapterSequence" + description: "adapter sequence to anchor barcodes. Only one adapter sequence\ + \ is allowed." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--soloAdapterMismatchesNmax" + description: "maximum number of mismatches allowed in adapter sequence." + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--soloCBmatchWLtype" + description: "matching the Cell Barcodes to the WhiteList\n\n- Exact \ + \ ... only exact matches allowed\n- 1MM \ + \ ... only one match in whitelist with 1 mismatched base allowed.\ + \ Allowed CBs have to have at least one read with exact match.\n- 1MM_multi\ + \ ... multiple matches in whitelist with 1 mismatched\ + \ base allowed, posterior probability calculation is used choose one of the\ + \ matches.\nAllowed CBs have to have at least one read with exact match. This\ + \ option matches best with CellRanger 2.2.0\n- 1MM_multi_pseudocounts \ + \ ... same as 1MM_Multi, but pseudocounts of 1 are added to all whitelist\ + \ barcodes.\n- 1MM_multi_Nbase_pseudocounts ... same as 1MM_multi_pseudocounts,\ + \ multimatching to WL is allowed for CBs with N-bases. This option matches\ + \ best with CellRanger >= 3.0.0\n- EditDist_2 ... allow\ + \ up to edit distance of 3 fpr each of the barcodes. May include one deletion\ + \ + one insertion. Only works with --soloType CB_UMI_Complex. Matches to multiple\ + \ passlist barcdoes are not allowed. Similar to ParseBio Split-seq pipeline." + info: null + example: + - "1MM_multi" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--soloInputSAMattrBarcodeSeq" + description: "when inputting reads from a SAM file (--readsFileType SAM SE/PE),\ + \ these SAM attributes mark the barcode sequence (in proper order).\n\nFor\ + \ instance, for 10X CellRanger or STARsolo BAMs, use --soloInputSAMattrBarcodeSeq\ + \ CR UR .\nThis parameter is required when running STARsolo with input from\ + \ SAM." + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloInputSAMattrBarcodeQual" + description: "when inputting reads from a SAM file (--readsFileType SAM SE/PE),\ + \ these SAM attributes mark the barcode qualities (in proper order).\n\nFor\ + \ instance, for 10X CellRanger or STARsolo BAMs, use --soloInputSAMattrBarcodeQual\ + \ CY UY .\nIf this parameter is '-' (default), the quality 'H' will be assigned\ + \ to all bases." + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloStrand" + description: "strandedness of the solo libraries:\n\n- Unstranded ... no strand\ + \ information\n- Forward ... read strand same as the original RNA molecule\n\ + - Reverse ... read strand opposite to the original RNA molecule" + info: null + example: + - "Forward" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--soloFeatures" + description: "genomic features for which the UMI counts per Cell Barcode are\ + \ collected\n\n- Gene ... genes: reads match the gene transcript\n\ + - SJ ... splice junctions: reported in SJ.out.tab\n- GeneFull\ + \ ... full gene (pre-mRNA): count all reads overlapping genes' exons\ + \ and introns\n- GeneFull_ExonOverIntron ... full gene (pre-mRNA): count all\ + \ reads overlapping genes' exons and introns: prioritize 100% overlap with\ + \ exons\n- GeneFull_Ex50pAS ... full gene (pre-RNA): count all reads\ + \ overlapping genes' exons and introns: prioritize >50% overlap with exons.\ + \ Do not count reads with 100% exonic overlap in the antisense direction." + info: null + example: + - "Gene" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloMultiMappers" + description: "counting method for reads mapping to multiple genes\n\n- Unique\ + \ ... count only reads that map to unique genes\n- Uniform ... uniformly\ + \ distribute multi-genic UMIs to all genes\n- Rescue ... distribute UMIs\ + \ proportionally to unique+uniform counts (~ first iteration of EM)\n- PropUnique\ + \ ... distribute UMIs proportionally to unique mappers, if present, and uniformly\ + \ if not.\n- EM ... multi-gene UMIs are distributed using Expectation\ + \ Maximization algorithm" + info: null + example: + - "Unique" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloUMIdedup" + description: "type of UMI deduplication (collapsing) algorithm\n\n- 1MM_All\ + \ ... all UMIs with 1 mismatch distance to each other\ + \ are collapsed (i.e. counted once).\n- 1MM_Directional_UMItools ... follows\ + \ the \"directional\" method from the UMI-tools by Smith, Heger and Sudbery\ + \ (Genome Research 2017).\n- 1MM_Directional ... same as 1MM_Directional_UMItools,\ + \ but with more stringent criteria for duplicate UMIs\n- Exact \ + \ ... only exactly matching UMIs are collapsed.\n- NoDedup \ + \ ... no deduplication of UMIs, count all reads.\n- 1MM_CR\ + \ ... CellRanger2-4 algorithm for 1MM UMI collapsing." + info: null + example: + - "1MM_All" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloUMIfiltering" + description: "type of UMI filtering (for reads uniquely mapping to genes)\n\n\ + - - ... basic filtering: remove UMIs with N and homopolymers\ + \ (similar to CellRanger 2.2.0).\n- MultiGeneUMI ... basic + remove\ + \ lower-count UMIs that map to more than one gene.\n- MultiGeneUMI_All ...\ + \ basic + remove all UMIs that map to more than one gene.\n- MultiGeneUMI_CR\ + \ ... basic + remove lower-count UMIs that map to more than one gene, matching\ + \ CellRanger > 3.0.0 .\nOnly works with --soloUMIdedup 1MM_CR" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloOutFileNames" + description: "file names for STARsolo output:\n\nfile_name_prefix gene_names\ + \ barcode_sequences cell_feature_count_matrix" + info: null + example: + - "Solo.out/" + - "features.tsv" + - "barcodes.tsv" + - "matrix.mtx" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloCellFilter" + description: "cell filtering type and parameters\n\n- None ... do\ + \ not output filtered cells\n- TopCells ... only report top cells by\ + \ UMI count, followed by the exact number of cells\n- CellRanger2.2 ...\ + \ simple filtering of CellRanger 2.2.\nCan be followed by numbers: number\ + \ of expected cells, robust maximum percentile for UMI count, maximum to minimum\ + \ ratio for UMI count\nThe harcoded values are from CellRanger: nExpectedCells=3000;\ + \ maxPercentile=0.99; maxMinRatio=10\n- EmptyDrops_CR ... EmptyDrops filtering\ + \ in CellRanger flavor. Please cite the original EmptyDrops paper: A.T.L Lun\ + \ et al, Genome Biology, 20, 63 (2019): https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1662-y\n\ + Can be followed by 10 numeric parameters: nExpectedCells maxPercentile\ + \ maxMinRatio indMin indMax umiMin umiMinFracMedian candMaxN \ + \ FDR simN\nThe harcoded values are from CellRanger: 3000 \ + \ 0.99 10 45000 90000 500 0.01\ + \ 20000 0.01 10000" + info: null + example: + - "CellRanger2.2" + - "3000" + - "0.99" + - "10" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloOutFormatFeaturesGeneField3" + description: "field 3 in the Gene features.tsv file. If \"-\", then no 3rd field\ + \ is output." + info: null + example: + - "Gene Expression" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloCellReadStats" + description: "Output reads statistics for each CB\n\n- Standard ... standard\ + \ output" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Align fastq files using STAR." + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/cellranger_tiny_fastq" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "docker" + env: + - "STAR_VERSION 2.7.10b" + - "PACKAGES gcc g++ make wget zlib1g-dev unzip" + - type: "docker" + run: + - "apt-get update && \\\n apt-get install -y --no-install-recommends ${PACKAGES}\ + \ && \\\n cd /tmp && \\\n wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip\ + \ && \\\n unzip ${STAR_VERSION}.zip && \\\n cd STAR-${STAR_VERSION}/source\ + \ && \\\n make STARstatic CXXFLAGS_SIMD=-std=c++11 && \\\n cp STAR /usr/local/bin\ + \ && \\\n cd / && \\\n rm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip\ + \ && \\\n apt-get --purge autoremove -y ${PACKAGES} && \\\n apt-get clean\n" + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highmem" + - "highcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/mapping/star_align/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/mapping/star_align" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/mapping/star_align/star_align" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/mapping/star_align/setup_logger.py b/target/docker/mapping/star_align/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/docker/mapping/star_align/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/docker/mapping/star_align/star_align b/target/docker/mapping/star_align/star_align new file mode 100755 index 00000000000..655d3b08039 --- /dev/null +++ b/target/docker/mapping/star_align/star_align @@ -0,0 +1,5713 @@ +#!/usr/bin/env bash + +# star_align 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Angela Oliveira Pisco (author) +# * Robrecht Cannoodt (author, maintainer) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="star_align" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "star_align 0.12.4" + echo "" + echo "Align fastq files using STAR." + echo "" + echo "Input/Output:" + echo " --readFilesIn, --input" + echo " type: file, required parameter, multiple values allowed, file must exist" + echo " example:" + echo "mysample_S1_L001_R1_001.fastq.gz;mysample_S1_L001_R2_001.fastq.gz" + echo " The FASTQ files to be analyzed. Corresponds to the --readFilesIn" + echo " argument in the STAR command." + echo "" + echo " --genomeDir, --reference" + echo " type: file, required parameter, file must exist" + echo " example: /path/to/reference" + echo " Path to the reference built by star_build_reference. Corresponds to the" + echo " --genomeDir argument in the STAR command." + echo "" + echo " --outFileNamePrefix, --output" + echo " type: file, required parameter, output, file must exist" + echo " example: /path/to/foo" + echo " Path to output directory. Corresponds to the --outFileNamePrefix" + echo " argument in the STAR command." + echo "" + echo "Run Parameters:" + echo " --runRNGseed" + echo " type: integer" + echo " example: 777" + echo " random number generator seed." + echo "" + echo "Genome Parameters:" + echo " --genomeLoad" + echo " type: string" + echo " example: NoSharedMemory" + echo " mode of shared memory usage for the genome files. Only used with" + echo " --runMode alignReads." + echo " - LoadAndKeep ... load genome into shared and keep it in memory" + echo " after run" + echo " - LoadAndRemove ... load genome into shared but remove it after run" + echo " - LoadAndExit ... load genome into shared memory and exit, keeping" + echo " the genome in memory for future runs" + echo " - Remove ... do not map anything, just remove loaded genome" + echo " from memory" + echo " - NoSharedMemory ... do not use shared memory, each job will have its" + echo " own private copy of the genome" + echo "" + echo " --genomeFastaFiles" + echo " type: file, multiple values allowed, file must exist" + echo " path(s) to the fasta files with the genome sequences, separated by" + echo " spaces. These files should be plain text FASTA files, they *cannot* be" + echo " zipped." + echo " Required for the genome generation (--runMode genomeGenerate). Can also" + echo " be used in the mapping (--runMode alignReads) to add extra (new)" + echo " sequences to the genome (e.g. spike-ins)." + echo "" + echo " --genomeFileSizes" + echo " type: integer, multiple values allowed" + echo " example: 0" + echo " genome files exact sizes in bytes. Typically, this should not be defined" + echo " by the user." + echo "" + echo " --genomeTransformOutput" + echo " type: string, multiple values allowed" + echo " which output to transform back to original genome" + echo " - SAM ... SAM/BAM alignments" + echo " - SJ ... splice junctions (SJ.out.tab)" + echo " - None ... no transformation of the output" + echo "" + echo " --genomeChrSetMitochondrial" + echo " type: string, multiple values allowed" + echo " example: chrM;M;MT" + echo " names of the mitochondrial chromosomes. Presently only used for STARsolo" + echo " statistics output/" + echo "" + echo "Splice Junctions Database:" + echo " --sjdbFileChrStartEnd" + echo " type: string, multiple values allowed" + echo " path to the files with genomic coordinates (chr start end" + echo " strand) for the splice junction introns. Multiple files can be" + echo " supplied and will be concatenated." + echo "" + echo " --sjdbGTFfile" + echo " type: file, file must exist" + echo " path to the GTF file with annotations" + echo "" + echo " --sjdbGTFchrPrefix" + echo " type: string" + echo " prefix for chromosome names in a GTF file (e.g. 'chr' for using ENSMEBL" + echo " annotations with UCSC genomes)" + echo "" + echo " --sjdbGTFfeatureExon" + echo " type: string" + echo " example: exon" + echo " feature type in GTF file to be used as exons for building transcripts" + echo "" + echo " --sjdbGTFtagExonParentTranscript" + echo " type: string" + echo " example: transcript_id" + echo " GTF attribute name for parent transcript ID (default \"transcript_id\"" + echo " works for GTF files)" + echo "" + echo " --sjdbGTFtagExonParentGene" + echo " type: string" + echo " example: gene_id" + echo " GTF attribute name for parent gene ID (default \"gene_id\" works for GTF" + echo " files)" + echo "" + echo " --sjdbGTFtagExonParentGeneName" + echo " type: string, multiple values allowed" + echo " example: gene_name" + echo " GTF attribute name for parent gene name" + echo "" + echo " --sjdbGTFtagExonParentGeneType" + echo " type: string, multiple values allowed" + echo " example: gene_type;gene_biotype" + echo " GTF attribute name for parent gene type" + echo "" + echo " --sjdbOverhang" + echo " type: integer" + echo " example: 100" + echo " length of the donor/acceptor sequence on each side of the junctions," + echo " ideally = (mate_length - 1)" + echo "" + echo " --sjdbScore" + echo " type: integer" + echo " example: 2" + echo " extra alignment score for alignments that cross database junctions" + echo "" + echo " --sjdbInsertSave" + echo " type: string" + echo " example: Basic" + echo " which files to save when sjdb junctions are inserted on the fly at the" + echo " mapping step" + echo " - Basic ... only small junction / transcript files" + echo " - All ... all files including big Genome, SA and SAindex - this will" + echo " create a complete genome directory" + echo "" + echo "Variation parameters:" + echo " --varVCFfile" + echo " type: string" + echo " path to the VCF file that contains variation data. The 10th column" + echo " should contain the genotype information, e.g. 0/1" + echo "" + echo "Read Parameters:" + echo " --readFilesType" + echo " type: string" + echo " example: Fastx" + echo " format of input read files" + echo " - Fastx ... FASTA or FASTQ" + echo " - SAM SE ... SAM or BAM single-end reads; for BAM use" + echo " --readFilesCommand samtools view" + echo " - SAM PE ... SAM or BAM paired-end reads; for BAM use" + echo " --readFilesCommand samtools view" + echo "" + echo " --readFilesSAMattrKeep" + echo " type: string, multiple values allowed" + echo " example: All" + echo " for --readFilesType SAM SE/PE, which SAM tags to keep in the output BAM," + echo " e.g.: --readFilesSAMtagsKeep RG PL" + echo " - All ... keep all tags" + echo " - None ... do not keep any tags" + echo "" + echo " --readFilesManifest" + echo " type: file, file must exist" + echo " path to the \"manifest\" file with the names of read files. The manifest" + echo " file should contain 3 tab-separated columns:" + echo " paired-end reads: read1_file_name \$tab\$ read2_file_name \$tab\$" + echo " read_group_line." + echo " single-end reads: read1_file_name \$tab\$ - \$tab\$" + echo " read_group_line." + echo " Spaces, but not tabs are allowed in file names." + echo " If read_group_line does not start with ID:, it can only contain one ID" + echo " field, and ID: will be added to it." + echo " If read_group_line starts with ID:, it can contain several fields" + echo " separated by \$tab\$, and all fields will be be copied verbatim into SAM" + echo " @RG header line." + echo "" + echo " --readFilesPrefix" + echo " type: string" + echo " prefix for the read files names, i.e. it will be added in front of the" + echo " strings in --readFilesIn" + echo "" + echo " --readFilesCommand" + echo " type: string, multiple values allowed" + echo " command line to execute for each of the input file. This command should" + echo " generate FASTA or FASTQ text and send it to stdout" + echo " For example: zcat - to uncompress .gz files, bzcat - to uncompress .bz2" + echo " files, etc." + echo "" + echo " --readMapNumber" + echo " type: integer" + echo " example: -1" + echo " number of reads to map from the beginning of the file" + echo " -1: map all reads" + echo "" + echo " --readMatesLengthsIn" + echo " type: string" + echo " example: NotEqual" + echo " Equal/NotEqual - lengths of names,sequences,qualities for both mates are" + echo " the same / not the same. NotEqual is safe in all situations." + echo "" + echo " --readNameSeparator" + echo " type: string, multiple values allowed" + echo " example: /" + echo " character(s) separating the part of the read names that will be trimmed" + echo " in output (read name after space is always trimmed)" + echo "" + echo " --readQualityScoreBase" + echo " type: integer" + echo " example: 33" + echo " number to be subtracted from the ASCII code to get Phred quality score" + echo "" + echo "Read Clipping:" + echo " --clipAdapterType" + echo " type: string" + echo " example: Hamming" + echo " adapter clipping type" + echo " - Hamming ... adapter clipping based on Hamming distance, with the" + echo " number of mismatches controlled by --clip5pAdapterMMp" + echo " - CellRanger4 ... 5p and 3p adapter clipping similar to CellRanger4." + echo " Utilizes Opal package by Martin Sosic: https://github.com/Martinsos/opal" + echo " - None ... no adapter clipping, all other clip* parameters are" + echo " disregarded" + echo "" + echo " --clip3pNbases" + echo " type: integer, multiple values allowed" + echo " example: 0" + echo " number(s) of bases to clip from 3p of each mate. If one value is given," + echo " it will be assumed the same for both mates." + echo "" + echo " --clip3pAdapterSeq" + echo " type: string, multiple values allowed" + echo " adapter sequences to clip from 3p of each mate. If one value is given," + echo " it will be assumed the same for both mates." + echo " - polyA ... polyA sequence with the length equal to read length" + echo "" + echo " --clip3pAdapterMMp" + echo " type: double, multiple values allowed" + echo " example: 0.1" + echo " max proportion of mismatches for 3p adapter clipping for each mate. If" + echo " one value is given, it will be assumed the same for both mates." + echo "" + echo " --clip3pAfterAdapterNbases" + echo " type: integer, multiple values allowed" + echo " example: 0" + echo " number of bases to clip from 3p of each mate after the adapter clipping." + echo " If one value is given, it will be assumed the same for both mates." + echo "" + echo " --clip5pNbases" + echo " type: integer, multiple values allowed" + echo " example: 0" + echo " number(s) of bases to clip from 5p of each mate. If one value is given," + echo " it will be assumed the same for both mates." + echo "" + echo "Limits:" + echo " --limitGenomeGenerateRAM" + echo " type: long" + echo " example: 31000000000" + echo " maximum available RAM (bytes) for genome generation" + echo "" + echo " --limitIObufferSize" + echo " type: long, multiple values allowed" + echo " example: 30000000;50000000" + echo " max available buffers size (bytes) for input/output, per thread" + echo "" + echo " --limitOutSAMoneReadBytes" + echo " type: long" + echo " example: 100000" + echo " max size of the SAM record (bytes) for one read. Recommended value:" + echo " >(2*(LengthMate1+LengthMate2+100)*outFilterMultimapNmax" + echo "" + echo " --limitOutSJoneRead" + echo " type: integer" + echo " example: 1000" + echo " max number of junctions for one read (including all multi-mappers)" + echo "" + echo " --limitOutSJcollapsed" + echo " type: integer" + echo " example: 1000000" + echo " max number of collapsed junctions" + echo "" + echo " --limitBAMsortRAM" + echo " type: long" + echo " example: 0" + echo " maximum available RAM (bytes) for sorting BAM. If =0, it will be set to" + echo " the genome index size. 0 value can only be used with --genomeLoad" + echo " NoSharedMemory option." + echo "" + echo " --limitSjdbInsertNsj" + echo " type: integer" + echo " example: 1000000" + echo " maximum number of junctions to be inserted to the genome on the fly at" + echo " the mapping stage, including those from annotations and those detected" + echo " in the 1st step of the 2-pass run" + echo "" + echo " --limitNreadsSoft" + echo " type: integer" + echo " example: -1" + echo " soft limit on the number of reads" + echo "" + echo "Output: general:" + echo " --outTmpKeep" + echo " type: string" + echo " whether to keep the temporary files after STAR runs is finished" + echo " - None ... remove all temporary files" + echo " - All ... keep all files" + echo "" + echo " --outStd" + echo " type: string" + echo " example: Log" + echo " which output will be directed to stdout (standard out)" + echo " - Log ... log messages" + echo " - SAM ... alignments in SAM format (which normally" + echo " are output to Aligned.out.sam file), normal standard output will go into" + echo " Log.std.out" + echo " - BAM_Unsorted ... alignments in BAM format, unsorted." + echo " Requires --outSAMtype BAM Unsorted" + echo " - BAM_SortedByCoordinate ... alignments in BAM format, sorted by" + echo " coordinate. Requires --outSAMtype BAM SortedByCoordinate" + echo " - BAM_Quant ... alignments to transcriptome in BAM format," + echo " unsorted. Requires --quantMode TranscriptomeSAM" + echo "" + echo " --outReadsUnmapped" + echo " type: string" + echo " output of unmapped and partially mapped (i.e. mapped only one mate of a" + echo " paired end read) reads in separate file(s)." + echo " - None ... no output" + echo " - Fastx ... output in separate fasta/fastq files, Unmapped.out.mate1/2" + echo "" + echo " --outQSconversionAdd" + echo " type: integer" + echo " example: 0" + echo " add this number to the quality score (e.g. to convert from Illumina to" + echo " Sanger, use -31)" + echo "" + echo " --outMultimapperOrder" + echo " type: string" + echo " example: Old_2.4" + echo " order of multimapping alignments in the output files" + echo " - Old_2.4 ... quasi-random order used before 2.5.0" + echo " - Random ... random order of alignments for each" + echo " multi-mapper. Read mates (pairs) are always adjacent, all alignment for" + echo " each read stay together. This option will become default in the future" + echo " releases." + echo "" + echo "Output: SAM and BAM:" + echo " --outSAMtype" + echo " type: string, multiple values allowed" + echo " example: SAM" + echo " type of SAM/BAM output" + echo " 1st word:" + echo " - BAM ... output BAM without sorting" + echo " - SAM ... output SAM without sorting" + echo " - None ... no SAM/BAM output" + echo " 2nd, 3rd:" + echo " - Unsorted ... standard unsorted" + echo " - SortedByCoordinate ... sorted by coordinate. This option will allocate" + echo " extra memory for sorting which can be specified by --limitBAMsortRAM." + echo "" + echo " --outSAMmode" + echo " type: string" + echo " example: Full" + echo " mode of SAM output" + echo " - None ... no SAM output" + echo " - Full ... full SAM output" + echo " - NoQS ... full SAM but without quality scores" + echo "" + echo " --outSAMstrandField" + echo " type: string" + echo " Cufflinks-like strand field flag" + echo " - None ... not used" + echo " - intronMotif ... strand derived from the intron motif. This option" + echo " changes the output alignments: reads with inconsistent and/or" + echo " non-canonical introns are filtered out." + echo "" + echo " --outSAMattributes" + echo " type: string, multiple values allowed" + echo " example: Standard" + echo " a string of desired SAM attributes, in the order desired for the output" + echo " SAM. Tags can be listed in any combination/order." + echo " ***Presets:" + echo " - None ... no attributes" + echo " - Standard ... NH HI AS nM" + echo " - All ... NH HI AS nM NM MD jM jI MC ch" + echo " ***Alignment:" + echo " - NH ... number of loci the reads maps to: =1 for unique" + echo " mappers, >1 for multimappers. Standard SAM tag." + echo " - HI ... multiple alignment index, starts with" + echo " --outSAMattrIHstart (=1 by default). Standard SAM tag." + echo " - AS ... local alignment score, +1/-1 for matches/mismateches," + echo " score* penalties for indels and gaps. For PE reads, total score for two" + echo " mates. Stadnard SAM tag." + echo " - nM ... number of mismatches. For PE reads, sum over two" + echo " mates." + echo " - NM ... edit distance to the reference (number of mismatched +" + echo " inserted + deleted bases) for each mate. Standard SAM tag." + echo " - MD ... string encoding mismatched and deleted reference bases" + echo " (see standard SAM specifications). Standard SAM tag." + echo " - jM ... intron motifs for all junctions (i.e. N in CIGAR): 0:" + echo " non-canonical; 1: GT/AG, 2: CT/AC, 3: GC/AG, 4: CT/GC, 5: AT/AC, 6:" + echo " GT/AT. If splice junctions database is used, and a junction is" + echo " annotated, 20 is added to its motif value." + echo " - jI ... start and end of introns for all junctions (1-based)." + echo " - XS ... alignment strand according to --outSAMstrandField." + echo " - MC ... mate's CIGAR string. Standard SAM tag." + echo " - ch ... marks all segment of all chimeric alingments for" + echo " --chimOutType WithinBAM output." + echo " - cN ... number of bases clipped from the read ends: 5' and 3'" + echo " ***Variation:" + echo " - vA ... variant allele" + echo " - vG ... genomic coordinate of the variant overlapped by the" + echo " read." + echo " - vW ... 1 - alignment passes WASP filtering; 2,3,4,5,6,7 -" + echo " alignment does not pass WASP filtering. Requires --waspOutputMode" + echo " SAMtag." + echo " ***STARsolo:" + echo " - CR CY UR UY ... sequences and quality scores of cell barcodes and UMIs" + echo " for the solo* demultiplexing." + echo " - GX GN ... gene ID and gene name for unique-gene reads." + echo " - gx gn ... gene IDs and gene names for unique- and multi-gene" + echo " reads." + echo " - CB UB ... error-corrected cell barcodes and UMIs for solo*" + echo " demultiplexing. Requires --outSAMtype BAM SortedByCoordinate." + echo " - sM ... assessment of CB and UMI." + echo " - sS ... sequence of the entire barcode (CB,UMI,adapter)." + echo " - sQ ... quality of the entire barcode." + echo " ***Unsupported/undocumented:" + echo " - ha ... haplotype (1/2) when mapping to the diploid genome." + echo " Requires genome generated with --genomeTransformType Diploid ." + echo " - rB ... alignment block read/genomic coordinates." + echo " - vR ... read coordinate of the variant." + echo "" + echo " --outSAMattrIHstart" + echo " type: integer" + echo " example: 1" + echo " start value for the IH attribute. 0 may be required by some downstream" + echo " software, such as Cufflinks or StringTie." + echo "" + echo " --outSAMunmapped" + echo " type: string, multiple values allowed" + echo " output of unmapped reads in the SAM format" + echo " 1st word:" + echo " - None ... no output" + echo " - Within ... output unmapped reads within the main SAM file (i.e." + echo " Aligned.out.sam)" + echo " 2nd word:" + echo " - KeepPairs ... record unmapped mate for each alignment, and, in case of" + echo " unsorted output, keep it adjacent to its mapped mate. Only affects" + echo " multi-mapping reads." + echo "" + echo " --outSAMorder" + echo " type: string" + echo " example: Paired" + echo " type of sorting for the SAM output" + echo " Paired: one mate after the other for all paired alignments" + echo " PairedKeepInputOrder: one mate after the other for all paired" + echo " alignments, the order is kept the same as in the input FASTQ files" + echo "" + echo " --outSAMprimaryFlag" + echo " type: string" + echo " example: OneBestScore" + echo " which alignments are considered primary - all others will be marked with" + echo " 0x100 bit in the FLAG" + echo " - OneBestScore ... only one alignment with the best score is primary" + echo " - AllBestScore ... all alignments with the best score are primary" + echo "" + echo " --outSAMreadID" + echo " type: string" + echo " example: Standard" + echo " read ID record type" + echo " - Standard ... first word (until space) from the FASTx read ID line," + echo " removing /1,/2 from the end" + echo " - Number ... read number (index) in the FASTx file" + echo "" + echo " --outSAMmapqUnique" + echo " type: integer" + echo " example: 255" + echo " 0 to 255: the MAPQ value for unique mappers" + echo "" + echo " --outSAMflagOR" + echo " type: integer" + echo " example: 0" + echo " 0 to 65535: sam FLAG will be bitwise OR'd with this value, i.e." + echo " FLAG=FLAG | outSAMflagOR. This is applied after all flags have been set" + echo " by STAR, and after outSAMflagAND. Can be used to set specific bits that" + echo " are not set otherwise." + echo "" + echo " --outSAMflagAND" + echo " type: integer" + echo " example: 65535" + echo " 0 to 65535: sam FLAG will be bitwise AND'd with this value, i.e." + echo " FLAG=FLAG & outSAMflagOR. This is applied after all flags have been set" + echo " by STAR, but before outSAMflagOR. Can be used to unset specific bits" + echo " that are not set otherwise." + echo "" + echo " --outSAMattrRGline" + echo " type: string, multiple values allowed" + echo " SAM/BAM read group line. The first word contains the read group" + echo " identifier and must start with \"ID:\", e.g. --outSAMattrRGline ID:xxx" + echo " CN:yy \"DS:z z z\"." + echo " xxx will be added as RG tag to each output alignment. Any spaces in the" + echo " tag values have to be double quoted." + echo " Comma separated RG lines correspons to different (comma separated) input" + echo " files in --readFilesIn. Commas have to be surrounded by spaces, e.g." + echo " --outSAMattrRGline ID:xxx , ID:zzz \"DS:z z\" , ID:yyy DS:yyyy" + echo "" + echo " --outSAMheaderHD" + echo " type: string, multiple values allowed" + echo " @HD (header) line of the SAM header" + echo "" + echo " --outSAMheaderPG" + echo " type: string, multiple values allowed" + echo " extra @PG (software) line of the SAM header (in addition to STAR)" + echo "" + echo " --outSAMheaderCommentFile" + echo " type: string" + echo " path to the file with @CO (comment) lines of the SAM header" + echo "" + echo " --outSAMfilter" + echo " type: string, multiple values allowed" + echo " filter the output into main SAM/BAM files" + echo " - KeepOnlyAddedReferences ... only keep the reads for which all" + echo " alignments are to the extra reference sequences added with" + echo " --genomeFastaFiles at the mapping stage." + echo " - KeepAllAddedReferences ... keep all alignments to the extra reference" + echo " sequences added with --genomeFastaFiles at the mapping stage." + echo "" + echo " --outSAMmultNmax" + echo " type: integer" + echo " example: -1" + echo " max number of multiple alignments for a read that will be output to the" + echo " SAM/BAM files. Note that if this value is not equal to -1, the top" + echo " scoring alignment will be output first" + echo " - -1 ... all alignments (up to --outFilterMultimapNmax) will be output" + echo "" + echo " --outSAMtlen" + echo " type: integer" + echo " example: 1" + echo " calculation method for the TLEN field in the SAM/BAM files" + echo " - 1 ... leftmost base of the (+)strand mate to rightmost base of the" + echo " (-)mate. (+)sign for the (+)strand mate" + echo " - 2 ... leftmost base of any mate to rightmost base of any mate. (+)sign" + echo " for the mate with the leftmost base. This is different from 1 for" + echo " overlapping mates with protruding ends" + echo "" + echo " --outBAMcompression" + echo " type: integer" + echo " example: 1" + echo " -1 to 10 BAM compression level, -1=default compression (6?), 0=no" + echo " compression, 10=maximum compression" + echo "" + echo " --outBAMsortingThreadN" + echo " type: integer" + echo " example: 0" + echo " >=0: number of threads for BAM sorting. 0 will default to" + echo " min(6,--runThreadN)." + echo "" + echo " --outBAMsortingBinsN" + echo " type: integer" + echo " example: 50" + echo " >0: number of genome bins for coordinate-sorting" + echo "" + echo "BAM processing:" + echo " --bamRemoveDuplicatesType" + echo " type: string" + echo " mark duplicates in the BAM file, for now only works with (i) sorted BAM" + echo " fed with inputBAMfile, and (ii) for paired-end alignments only" + echo " - - ... no duplicate removal/marking" + echo " - UniqueIdentical ... mark all multimappers, and duplicate" + echo " unique mappers. The coordinates, FLAG, CIGAR must be identical" + echo " - UniqueIdenticalNotMulti ... mark duplicate unique mappers but not" + echo " multimappers." + echo "" + echo " --bamRemoveDuplicatesMate2basesN" + echo " type: integer" + echo " example: 0" + echo " number of bases from the 5' of mate 2 to use in collapsing (e.g. for" + echo " RAMPAGE)" + echo "" + echo "Output Wiggle:" + echo " --outWigType" + echo " type: string, multiple values allowed" + echo " type of signal output, e.g. \"bedGraph\" OR \"bedGraph read1_5p\". Requires" + echo " sorted BAM: --outSAMtype BAM SortedByCoordinate ." + echo " 1st word:" + echo " - None ... no signal output" + echo " - bedGraph ... bedGraph format" + echo " - wiggle ... wiggle format" + echo " 2nd word:" + echo " - read1_5p ... signal from only 5' of the 1st read, useful for" + echo " CAGE/RAMPAGE etc" + echo " - read2 ... signal from only 2nd read" + echo "" + echo " --outWigStrand" + echo " type: string" + echo " example: Stranded" + echo " strandedness of wiggle/bedGraph output" + echo " - Stranded ... separate strands, str1 and str2" + echo " - Unstranded ... collapsed strands" + echo "" + echo " --outWigReferencesPrefix" + echo " type: string" + echo " prefix matching reference names to include in the output wiggle file," + echo " e.g. \"chr\", default \"-\" - include all references" + echo "" + echo " --outWigNorm" + echo " type: string" + echo " example: RPM" + echo " type of normalization for the signal" + echo " - RPM ... reads per million of mapped reads" + echo " - None ... no normalization, \"raw\" counts" + echo "" + echo "Output Filtering:" + echo " --outFilterType" + echo " type: string" + echo " example: Normal" + echo " type of filtering" + echo " - Normal ... standard filtering using only current alignment" + echo " - BySJout ... keep only those reads that contain junctions that passed" + echo " filtering into SJ.out.tab" + echo "" + echo " --outFilterMultimapScoreRange" + echo " type: integer" + echo " example: 1" + echo " the score range below the maximum score for multimapping alignments" + echo "" + echo " --outFilterMultimapNmax" + echo " type: integer" + echo " example: 10" + echo " maximum number of loci the read is allowed to map to. Alignments (all of" + echo " them) will be output only if the read maps to no more loci than this" + echo " value." + echo " Otherwise no alignments will be output, and the read will be counted as" + echo " \"mapped to too many loci\" in the Log.final.out ." + echo "" + echo " --outFilterMismatchNmax" + echo " type: integer" + echo " example: 10" + echo " alignment will be output only if it has no more mismatches than this" + echo " value." + echo "" + echo " --outFilterMismatchNoverLmax" + echo " type: double" + echo " example: 0.3" + echo " alignment will be output only if its ratio of mismatches to *mapped*" + echo " length is less than or equal to this value." + echo "" + echo " --outFilterMismatchNoverReadLmax" + echo " type: double" + echo " example: 1.0" + echo " alignment will be output only if its ratio of mismatches to *read*" + echo " length is less than or equal to this value." + echo "" + echo " --outFilterScoreMin" + echo " type: integer" + echo " example: 0" + echo " alignment will be output only if its score is higher than or equal to" + echo " this value." + echo "" + echo " --outFilterScoreMinOverLread" + echo " type: double" + echo " example: 0.66" + echo " same as outFilterScoreMin, but normalized to read length (sum of mates'" + echo " lengths for paired-end reads)" + echo "" + echo " --outFilterMatchNmin" + echo " type: integer" + echo " example: 0" + echo " alignment will be output only if the number of matched bases is higher" + echo " than or equal to this value." + echo "" + echo " --outFilterMatchNminOverLread" + echo " type: double" + echo " example: 0.66" + echo " sam as outFilterMatchNmin, but normalized to the read length (sum of" + echo " mates' lengths for paired-end reads)." + echo "" + echo " --outFilterIntronMotifs" + echo " type: string" + echo " filter alignment using their motifs" + echo " - None ... no filtering" + echo " - RemoveNoncanonical ... filter out alignments that contain" + echo " non-canonical junctions" + echo " - RemoveNoncanonicalUnannotated ... filter out alignments that contain" + echo " non-canonical unannotated junctions when using annotated splice" + echo " junctions database. The annotated non-canonical junctions will be kept." + echo "" + echo " --outFilterIntronStrands" + echo " type: string" + echo " example: RemoveInconsistentStrands" + echo " filter alignments" + echo " - RemoveInconsistentStrands ... remove alignments that have" + echo " junctions with inconsistent strands" + echo " - None ... no filtering" + echo "" + echo "Output splice junctions (SJ.out.tab):" + echo " --outSJtype" + echo " type: string" + echo " example: Standard" + echo " type of splice junction output" + echo " - Standard ... standard SJ.out.tab output" + echo " - None ... no splice junction output" + echo "" + echo "Output Filtering: Splice Junctions:" + echo " --outSJfilterReads" + echo " type: string" + echo " example: All" + echo " which reads to consider for collapsed splice junctions output" + echo " - All ... all reads, unique- and multi-mappers" + echo " - Unique ... uniquely mapping reads only" + echo "" + echo " --outSJfilterOverhangMin" + echo " type: integer, multiple values allowed" + echo " example: 30;12;12;12" + echo " minimum overhang length for splice junctions on both sides for: (1)" + echo " non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC" + echo " motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif" + echo " does not apply to annotated junctions" + echo "" + echo " --outSJfilterCountUniqueMin" + echo " type: integer, multiple values allowed" + echo " example: 3;1;1;1" + echo " minimum uniquely mapping read count per junction for: (1) non-canonical" + echo " motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC" + echo " and GT/AT motif. -1 means no output for that motif" + echo " Junctions are output if one of outSJfilterCountUniqueMin OR" + echo " outSJfilterCountTotalMin conditions are satisfied" + echo " does not apply to annotated junctions" + echo "" + echo " --outSJfilterCountTotalMin" + echo " type: integer, multiple values allowed" + echo " example: 3;1;1;1" + echo " minimum total (multi-mapping+unique) read count per junction for: (1)" + echo " non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC" + echo " motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif" + echo " Junctions are output if one of outSJfilterCountUniqueMin OR" + echo " outSJfilterCountTotalMin conditions are satisfied" + echo " does not apply to annotated junctions" + echo "" + echo " --outSJfilterDistToOtherSJmin" + echo " type: integer, multiple values allowed" + echo " example: 10;0;5;10" + echo " minimum allowed distance to other junctions' donor/acceptor" + echo " does not apply to annotated junctions" + echo "" + echo " --outSJfilterIntronMaxVsReadN" + echo " type: integer, multiple values allowed" + echo " example: 50000;100000;200000" + echo " maximum gap allowed for junctions supported by 1,2,3,,,N reads" + echo " i.e. by default junctions supported by 1 read can have gaps <=50000b, by" + echo " 2 reads: <=100000b, by 3 reads: <=200000. by >=4 reads any gap" + echo " <=alignIntronMax" + echo " does not apply to annotated junctions" + echo "" + echo "Scoring:" + echo " --scoreGap" + echo " type: integer" + echo " example: 0" + echo " splice junction penalty (independent on intron motif)" + echo "" + echo " --scoreGapNoncan" + echo " type: integer" + echo " example: -8" + echo " non-canonical junction penalty (in addition to scoreGap)" + echo "" + echo " --scoreGapGCAG" + echo " type: integer" + echo " example: -4" + echo " GC/AG and CT/GC junction penalty (in addition to scoreGap)" + echo "" + echo " --scoreGapATAC" + echo " type: integer" + echo " example: -8" + echo " AT/AC and GT/AT junction penalty (in addition to scoreGap)" + echo "" + echo " --scoreGenomicLengthLog2scale" + echo " type: integer" + echo " example: 0" + echo " extra score logarithmically scaled with genomic length of the alignment:" + echo " scoreGenomicLengthLog2scale*log2(genomicLength)" + echo "" + echo " --scoreDelOpen" + echo " type: integer" + echo " example: -2" + echo " deletion open penalty" + echo "" + echo " --scoreDelBase" + echo " type: integer" + echo " example: -2" + echo " deletion extension penalty per base (in addition to scoreDelOpen)" + echo "" + echo " --scoreInsOpen" + echo " type: integer" + echo " example: -2" + echo " insertion open penalty" + echo "" + echo " --scoreInsBase" + echo " type: integer" + echo " example: -2" + echo " insertion extension penalty per base (in addition to scoreInsOpen)" + echo "" + echo " --scoreStitchSJshift" + echo " type: integer" + echo " example: 1" + echo " maximum score reduction while searching for SJ boundaries in the" + echo " stitching step" + echo "" + echo "Alignments and Seeding:" + echo " --seedSearchStartLmax" + echo " type: integer" + echo " example: 50" + echo " defines the search start point through the read - the read is split into" + echo " pieces no longer than this value" + echo "" + echo " --seedSearchStartLmaxOverLread" + echo " type: double" + echo " example: 1.0" + echo " seedSearchStartLmax normalized to read length (sum of mates' lengths for" + echo " paired-end reads)" + echo "" + echo " --seedSearchLmax" + echo " type: integer" + echo " example: 0" + echo " defines the maximum length of the seeds, if =0 seed length is not" + echo " limited" + echo "" + echo " --seedMultimapNmax" + echo " type: integer" + echo " example: 10000" + echo " only pieces that map fewer than this value are utilized in the stitching" + echo " procedure" + echo "" + echo " --seedPerReadNmax" + echo " type: integer" + echo " example: 1000" + echo " max number of seeds per read" + echo "" + echo " --seedPerWindowNmax" + echo " type: integer" + echo " example: 50" + echo " max number of seeds per window" + echo "" + echo " --seedNoneLociPerWindow" + echo " type: integer" + echo " example: 10" + echo " max number of one seed loci per window" + echo "" + echo " --seedSplitMin" + echo " type: integer" + echo " example: 12" + echo " min length of the seed sequences split by Ns or mate gap" + echo "" + echo " --seedMapMin" + echo " type: integer" + echo " example: 5" + echo " min length of seeds to be mapped" + echo "" + echo " --alignIntronMin" + echo " type: integer" + echo " example: 21" + echo " minimum intron size, genomic gap is considered intron if its" + echo " length>=alignIntronMin, otherwise it is considered Deletion" + echo "" + echo " --alignIntronMax" + echo " type: integer" + echo " example: 0" + echo " maximum intron size, if 0, max intron size will be determined by" + echo " (2^winBinNbits)*winAnchorDistNbins" + echo "" + echo " --alignMatesGapMax" + echo " type: integer" + echo " example: 0" + echo " maximum gap between two mates, if 0, max intron gap will be determined" + echo " by (2^winBinNbits)*winAnchorDistNbins" + echo "" + echo " --alignSJoverhangMin" + echo " type: integer" + echo " example: 5" + echo " minimum overhang (i.e. block size) for spliced alignments" + echo "" + echo " --alignSJstitchMismatchNmax" + echo " type: integer, multiple values allowed" + echo " example: 0;-1;0;0" + echo " maximum number of mismatches for stitching of the splice junctions (-1:" + echo " no limit)." + echo " (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC" + echo " motif, (4) AT/AC and GT/AT motif." + echo "" + echo " --alignSJDBoverhangMin" + echo " type: integer" + echo " example: 3" + echo " minimum overhang (i.e. block size) for annotated (sjdb) spliced" + echo " alignments" + echo "" + echo " --alignSplicedMateMapLmin" + echo " type: integer" + echo " example: 0" + echo " minimum mapped length for a read mate that is spliced" + echo "" + echo " --alignSplicedMateMapLminOverLmate" + echo " type: double" + echo " example: 0.66" + echo " alignSplicedMateMapLmin normalized to mate length" + echo "" + echo " --alignWindowsPerReadNmax" + echo " type: integer" + echo " example: 10000" + echo " max number of windows per read" + echo "" + echo " --alignTranscriptsPerWindowNmax" + echo " type: integer" + echo " example: 100" + echo " max number of transcripts per window" + echo "" + echo " --alignTranscriptsPerReadNmax" + echo " type: integer" + echo " example: 10000" + echo " max number of different alignments per read to consider" + echo "" + echo " --alignEndsType" + echo " type: string" + echo " example: Local" + echo " type of read ends alignment" + echo " - Local ... standard local alignment with soft-clipping" + echo " allowed" + echo " - EndToEnd ... force end-to-end read alignment, do not" + echo " soft-clip" + echo " - Extend5pOfRead1 ... fully extend only the 5p of the read1, all other" + echo " ends: local alignment" + echo " - Extend5pOfReads12 ... fully extend only the 5p of the both read1 and" + echo " read2, all other ends: local alignment" + echo "" + echo " --alignEndsProtrude" + echo " type: string" + echo " example: 0 ConcordantPair" + echo " allow protrusion of alignment ends, i.e. start (end) of the +strand mate" + echo " downstream of the start (end) of the -strand mate" + echo " 1st word: int: maximum number of protrusion bases allowed" + echo " 2nd word: string:" + echo " - ConcordantPair ... report alignments with non-zero" + echo " protrusion as concordant pairs" + echo " - DiscordantPair ... report alignments with non-zero" + echo " protrusion as discordant pairs" + echo "" + echo " --alignSoftClipAtReferenceEnds" + echo " type: string" + echo " example: Yes" + echo " allow the soft-clipping of the alignments past the end of the" + echo " chromosomes" + echo " - Yes ... allow" + echo " - No ... prohibit, useful for compatibility with Cufflinks" + echo "" + echo " --alignInsertionFlush" + echo " type: string" + echo " how to flush ambiguous insertion positions" + echo " - None ... insertions are not flushed" + echo " - Right ... insertions are flushed to the right" + echo "" + echo "Paired-End reads:" + echo " --peOverlapNbasesMin" + echo " type: integer" + echo " example: 0" + echo " minimum number of overlapping bases to trigger mates merging and" + echo " realignment. Specify >0 value to switch on the \"merginf of overlapping" + echo " mates\" algorithm." + echo "" + echo " --peOverlapMMp" + echo " type: double" + echo " example: 0.01" + echo " maximum proportion of mismatched bases in the overlap area" + echo "" + echo "Windows, Anchors, Binning:" + echo " --winAnchorMultimapNmax" + echo " type: integer" + echo " example: 50" + echo " max number of loci anchors are allowed to map to" + echo "" + echo " --winBinNbits" + echo " type: integer" + echo " example: 16" + echo " =log2(winBin), where winBin is the size of the bin for the" + echo " windows/clustering, each window will occupy an integer number of bins." + echo "" + echo " --winAnchorDistNbins" + echo " type: integer" + echo " example: 9" + echo " max number of bins between two anchors that allows aggregation of" + echo " anchors into one window" + echo "" + echo " --winFlankNbins" + echo " type: integer" + echo " example: 4" + echo " log2(winFlank), where win Flank is the size of the left and right" + echo " flanking regions for each window" + echo "" + echo " --winReadCoverageRelativeMin" + echo " type: double" + echo " example: 0.5" + echo " minimum relative coverage of the read sequence by the seeds in a window," + echo " for STARlong algorithm only." + echo "" + echo " --winReadCoverageBasesMin" + echo " type: integer" + echo " example: 0" + echo " minimum number of bases covered by the seeds in a window , for STARlong" + echo " algorithm only." + echo "" + echo "Chimeric Alignments:" + echo " --chimOutType" + echo " type: string, multiple values allowed" + echo " example: Junctions" + echo " type of chimeric output" + echo " - Junctions ... Chimeric.out.junction" + echo " - SeparateSAMold ... output old SAM into separate Chimeric.out.sam file" + echo " - WithinBAM ... output into main aligned BAM files (Aligned.*.bam)" + echo " - WithinBAM HardClip ... (default) hard-clipping in the CIGAR for" + echo " supplemental chimeric alignments (default if no 2nd word is present)" + echo " - WithinBAM SoftClip ... soft-clipping in the CIGAR for supplemental" + echo " chimeric alignments" + echo "" + echo " --chimSegmentMin" + echo " type: integer" + echo " example: 0" + echo " minimum length of chimeric segment length, if ==0, no chimeric output" + echo "" + echo " --chimScoreMin" + echo " type: integer" + echo " example: 0" + echo " minimum total (summed) score of the chimeric segments" + echo "" + echo " --chimScoreDropMax" + echo " type: integer" + echo " example: 20" + echo " max drop (difference) of chimeric score (the sum of scores of all" + echo " chimeric segments) from the read length" + echo "" + echo " --chimScoreSeparation" + echo " type: integer" + echo " example: 10" + echo " minimum difference (separation) between the best chimeric score and the" + echo " next one" + echo "" + echo " --chimScoreJunctionNonGTAG" + echo " type: integer" + echo " example: -1" + echo " penalty for a non-GT/AG chimeric junction" + echo "" + echo " --chimJunctionOverhangMin" + echo " type: integer" + echo " example: 20" + echo " minimum overhang for a chimeric junction" + echo "" + echo " --chimSegmentReadGapMax" + echo " type: integer" + echo " example: 0" + echo " maximum gap in the read sequence between chimeric segments" + echo "" + echo " --chimFilter" + echo " type: string, multiple values allowed" + echo " example: banGenomicN" + echo " different filters for chimeric alignments" + echo " - None ... no filtering" + echo " - banGenomicN ... Ns are not allowed in the genome sequence around the" + echo " chimeric junction" + echo "" + echo " --chimMainSegmentMultNmax" + echo " type: integer" + echo " example: 10" + echo " maximum number of multi-alignments for the main chimeric segment. =1" + echo " will prohibit multimapping main segments." + echo "" + echo " --chimMultimapNmax" + echo " type: integer" + echo " example: 0" + echo " maximum number of chimeric multi-alignments" + echo " - 0 ... use the old scheme for chimeric detection which only considered" + echo " unique alignments" + echo "" + echo " --chimMultimapScoreRange" + echo " type: integer" + echo " example: 1" + echo " the score range for multi-mapping chimeras below the best chimeric" + echo " score. Only works with --chimMultimapNmax > 1" + echo "" + echo " --chimNonchimScoreDropMin" + echo " type: integer" + echo " example: 20" + echo " to trigger chimeric detection, the drop in the best non-chimeric" + echo " alignment score with respect to the read length has to be greater than" + echo " this value" + echo "" + echo " --chimOutJunctionFormat" + echo " type: integer" + echo " example: 0" + echo " formatting type for the Chimeric.out.junction file" + echo " - 0 ... no comment lines/headers" + echo " - 1 ... comment lines at the end of the file: command line and Nreads:" + echo " total, unique/multi-mapping" + echo "" + echo "Quantification of Annotations:" + echo " --quantMode" + echo " type: string, multiple values allowed" + echo " types of quantification requested" + echo " - - ... none" + echo " - TranscriptomeSAM ... output SAM/BAM alignments to transcriptome into a" + echo " separate file" + echo " - GeneCounts ... count reads per gene" + echo "" + echo " --quantTranscriptomeBAMcompression" + echo " type: integer" + echo " example: 1" + echo " -2 to 10 transcriptome BAM compression level" + echo " - -2 ... no BAM output" + echo " - -1 ... default compression (6?)" + echo " - 0 ... no compression" + echo " - 10 ... maximum compression" + echo "" + echo " --quantTranscriptomeBan" + echo " type: string" + echo " example: IndelSoftclipSingleend" + echo " prohibit various alignment type" + echo " - IndelSoftclipSingleend ... prohibit indels, soft clipping and" + echo " single-end alignments - compatible with RSEM" + echo " - Singleend ... prohibit single-end alignments" + echo "" + echo "2-pass Mapping:" + echo " --twopassMode" + echo " type: string" + echo " 2-pass mapping mode." + echo " - None ... 1-pass mapping" + echo " - Basic ... basic 2-pass mapping, with all 1st pass junctions" + echo " inserted into the genome indices on the fly" + echo "" + echo " --twopass1readsN" + echo " type: integer" + echo " example: -1" + echo " number of reads to process for the 1st step. Use very large number (or" + echo " default -1) to map all reads in the first step." + echo "" + echo "WASP parameters:" + echo " --waspOutputMode" + echo " type: string" + echo " WASP allele-specific output type. This is re-implementation of the" + echo " original WASP mappability filtering by Bryce van de Geijn, Graham" + echo " McVicker, Yoav Gilad & Jonathan K Pritchard. Please cite the original" + echo " WASP paper: Nature Methods 12, 1061-1063 (2015)," + echo " https://www.nature.com/articles/nmeth.3582 ." + echo " - SAMtag ... add WASP tags to the alignments that pass WASP" + echo " filtering" + echo "" + echo "STARsolo (single cell RNA-seq) parameters:" + echo " --soloType" + echo " type: string, multiple values allowed" + echo " type of single-cell RNA-seq" + echo " - CB_UMI_Simple ... (a.k.a. Droplet) one UMI and one Cell Barcode of" + echo " fixed length in read2, e.g. Drop-seq and 10X Chromium." + echo " - CB_UMI_Complex ... multiple Cell Barcodes of varying length, one UMI" + echo " of fixed length and one adapter sequence of fixed length are allowed in" + echo " read2 only (e.g. inDrop, ddSeq)." + echo " - CB_samTagOut ... output Cell Barcode as CR and/or CB SAm tag. No" + echo " UMI counting. --readFilesIn cDNA_read1 [cDNA_read2 if paired-end]" + echo " CellBarcode_read . Requires --outSAMtype BAM Unsorted [and/or" + echo " SortedByCoordinate]" + echo " - SmartSeq ... Smart-seq: each cell in a separate FASTQ (paired-" + echo " or single-end), barcodes are corresponding read-groups, no UMI" + echo " sequences, alignments deduplicated according to alignment start and end" + echo " (after extending soft-clipped bases)" + echo "" + echo " --soloCBwhitelist" + echo " type: string, multiple values allowed" + echo " file(s) with whitelist(s) of cell barcodes. Only --soloType" + echo " CB_UMI_Complex allows more than one whitelist file." + echo " - None ... no whitelist: all cell barcodes are allowed" + echo "" + echo " --soloCBstart" + echo " type: integer" + echo " example: 1" + echo " cell barcode start base" + echo "" + echo " --soloCBlen" + echo " type: integer" + echo " example: 16" + echo " cell barcode length" + echo "" + echo " --soloUMIstart" + echo " type: integer" + echo " example: 17" + echo " UMI start base" + echo "" + echo " --soloUMIlen" + echo " type: integer" + echo " example: 10" + echo " UMI length" + echo "" + echo " --soloBarcodeReadLength" + echo " type: integer" + echo " example: 1" + echo " length of the barcode read" + echo " - 1 ... equal to sum of soloCBlen+soloUMIlen" + echo " - 0 ... not defined, do not check" + echo "" + echo " --soloBarcodeMate" + echo " type: integer" + echo " example: 0" + echo " identifies which read mate contains the barcode (CB+UMI) sequence" + echo " - 0 ... barcode sequence is on separate read, which should always be" + echo " the last file in the --readFilesIn listed" + echo " - 1 ... barcode sequence is a part of mate 1" + echo " - 2 ... barcode sequence is a part of mate 2" + echo "" + echo " --soloCBposition" + echo " type: string, multiple values allowed" + echo " position of Cell Barcode(s) on the barcode read." + echo " Presently only works with --soloType CB_UMI_Complex, and barcodes are" + echo " assumed to be on Read2." + echo " Format for each barcode: startAnchor_startPosition_endAnchor_endPosition" + echo " start(end)Anchor defines the Anchor Base for the CB: 0: read start; 1:" + echo " read end; 2: adapter start; 3: adapter end" + echo " start(end)Position is the 0-based position with of the CB start(end)" + echo " with respect to the Anchor Base" + echo " String for different barcodes are separated by space." + echo " Example: inDrop (Zilionis et al, Nat. Protocols, 2017):" + echo " --soloCBposition 0_0_2_-1 3_1_3_8" + echo "" + echo " --soloUMIposition" + echo " type: string" + echo " position of the UMI on the barcode read, same as soloCBposition" + echo " Example: inDrop (Zilionis et al, Nat. Protocols, 2017):" + echo " --soloCBposition 3_9_3_14" + echo "" + echo " --soloAdapterSequence" + echo " type: string" + echo " adapter sequence to anchor barcodes. Only one adapter sequence is" + echo " allowed." + echo "" + echo " --soloAdapterMismatchesNmax" + echo " type: integer" + echo " example: 1" + echo " maximum number of mismatches allowed in adapter sequence." + echo "" + echo " --soloCBmatchWLtype" + echo " type: string" + echo " example: 1MM_multi" + echo " matching the Cell Barcodes to the WhiteList" + echo " - Exact ... only exact matches allowed" + echo " - 1MM ... only one match in whitelist with 1" + echo " mismatched base allowed. Allowed CBs have to have at least one read with" + echo " exact match." + echo " - 1MM_multi ... multiple matches in whitelist with" + echo " 1 mismatched base allowed, posterior probability calculation is used" + echo " choose one of the matches." + echo " Allowed CBs have to have at least one read with exact match. This option" + echo " matches best with CellRanger 2.2.0" + echo " - 1MM_multi_pseudocounts ... same as 1MM_Multi, but" + echo " pseudocounts of 1 are added to all whitelist barcodes." + echo " - 1MM_multi_Nbase_pseudocounts ... same as 1MM_multi_pseudocounts," + echo " multimatching to WL is allowed for CBs with N-bases. This option matches" + echo " best with CellRanger >= 3.0.0" + echo " - EditDist_2 ... allow up to edit distance of 3 fpr" + echo " each of the barcodes. May include one deletion + one insertion. Only" + echo " works with --soloType CB_UMI_Complex. Matches to multiple passlist" + echo " barcdoes are not allowed. Similar to ParseBio Split-seq pipeline." + echo "" + echo " --soloInputSAMattrBarcodeSeq" + echo " type: string, multiple values allowed" + echo " when inputting reads from a SAM file (--readsFileType SAM SE/PE), these" + echo " SAM attributes mark the barcode sequence (in proper order)." + echo " For instance, for 10X CellRanger or STARsolo BAMs, use" + echo " --soloInputSAMattrBarcodeSeq CR UR ." + echo " This parameter is required when running STARsolo with input from SAM." + echo "" + echo " --soloInputSAMattrBarcodeQual" + echo " type: string, multiple values allowed" + echo " when inputting reads from a SAM file (--readsFileType SAM SE/PE), these" + echo " SAM attributes mark the barcode qualities (in proper order)." + echo " For instance, for 10X CellRanger or STARsolo BAMs, use" + echo " --soloInputSAMattrBarcodeQual CY UY ." + echo " If this parameter is '-' (default), the quality 'H' will be assigned to" + echo " all bases." + echo "" + echo " --soloStrand" + echo " type: string" + echo " example: Forward" + echo " strandedness of the solo libraries:" + echo " - Unstranded ... no strand information" + echo " - Forward ... read strand same as the original RNA molecule" + echo " - Reverse ... read strand opposite to the original RNA molecule" + echo "" + echo " --soloFeatures" + echo " type: string, multiple values allowed" + echo " example: Gene" + echo " genomic features for which the UMI counts per Cell Barcode are collected" + echo " - Gene ... genes: reads match the gene transcript" + echo " - SJ ... splice junctions: reported in SJ.out.tab" + echo " - GeneFull ... full gene (pre-mRNA): count all reads overlapping" + echo " genes' exons and introns" + echo " - GeneFull_ExonOverIntron ... full gene (pre-mRNA): count all reads" + echo " overlapping genes' exons and introns: prioritize 100% overlap with exons" + echo " - GeneFull_Ex50pAS ... full gene (pre-RNA): count all reads" + echo " overlapping genes' exons and introns: prioritize >50% overlap with" + echo " exons. Do not count reads with 100% exonic overlap in the antisense" + echo " direction." + echo "" + echo " --soloMultiMappers" + echo " type: string, multiple values allowed" + echo " example: Unique" + echo " counting method for reads mapping to multiple genes" + echo " - Unique ... count only reads that map to unique genes" + echo " - Uniform ... uniformly distribute multi-genic UMIs to all genes" + echo " - Rescue ... distribute UMIs proportionally to unique+uniform counts" + echo " (~ first iteration of EM)" + echo " - PropUnique ... distribute UMIs proportionally to unique mappers, if" + echo " present, and uniformly if not." + echo " - EM ... multi-gene UMIs are distributed using Expectation" + echo " Maximization algorithm" + echo "" + echo " --soloUMIdedup" + echo " type: string, multiple values allowed" + echo " example: 1MM_All" + echo " type of UMI deduplication (collapsing) algorithm" + echo " - 1MM_All ... all UMIs with 1 mismatch distance to" + echo " each other are collapsed (i.e. counted once)." + echo " - 1MM_Directional_UMItools ... follows the \"directional\" method from" + echo " the UMI-tools by Smith, Heger and Sudbery (Genome Research 2017)." + echo " - 1MM_Directional ... same as 1MM_Directional_UMItools, but" + echo " with more stringent criteria for duplicate UMIs" + echo " - Exact ... only exactly matching UMIs are" + echo " collapsed." + echo " - NoDedup ... no deduplication of UMIs, count all" + echo " reads." + echo " - 1MM_CR ... CellRanger2-4 algorithm for 1MM UMI" + echo " collapsing." + echo "" + echo " --soloUMIfiltering" + echo " type: string, multiple values allowed" + echo " type of UMI filtering (for reads uniquely mapping to genes)" + echo " - - ... basic filtering: remove UMIs with N and" + echo " homopolymers (similar to CellRanger 2.2.0)." + echo " - MultiGeneUMI ... basic + remove lower-count UMIs that map to" + echo " more than one gene." + echo " - MultiGeneUMI_All ... basic + remove all UMIs that map to more than" + echo " one gene." + echo " - MultiGeneUMI_CR ... basic + remove lower-count UMIs that map to" + echo " more than one gene, matching CellRanger > 3.0.0 ." + echo " Only works with --soloUMIdedup 1MM_CR" + echo "" + echo " --soloOutFileNames" + echo " type: string, multiple values allowed" + echo " example: Solo.out/;features.tsv;barcodes.tsv;matrix.mtx" + echo " file names for STARsolo output:" + echo " file_name_prefix gene_names barcode_sequences" + echo " cell_feature_count_matrix" + echo "" + echo " --soloCellFilter" + echo " type: string, multiple values allowed" + echo " example: CellRanger2.2;3000;0.99;10" + echo " cell filtering type and parameters" + echo " - None ... do not output filtered cells" + echo " - TopCells ... only report top cells by UMI count, followed by" + echo " the exact number of cells" + echo " - CellRanger2.2 ... simple filtering of CellRanger 2.2." + echo " Can be followed by numbers: number of expected cells, robust maximum" + echo " percentile for UMI count, maximum to minimum ratio for UMI count" + echo " The harcoded values are from CellRanger: nExpectedCells=3000;" + echo " maxPercentile=0.99; maxMinRatio=10" + echo " - EmptyDrops_CR ... EmptyDrops filtering in CellRanger flavor. Please" + echo " cite the original EmptyDrops paper: A.T.L Lun et al, Genome Biology, 20," + echo " 63 (2019):" + echo " " + echo "https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1662-y" + echo " Can be followed by 10 numeric parameters: nExpectedCells" + echo " maxPercentile maxMinRatio indMin indMax umiMin" + echo " umiMinFracMedian candMaxN FDR simN" + echo " The harcoded values are from CellRanger: 3000" + echo " 0.99 10 45000 90000 500 0.01" + echo " 20000 0.01 10000" + echo "" + echo " --soloOutFormatFeaturesGeneField3" + echo " type: string, multiple values allowed" + echo " example: Gene Expression" + echo " field 3 in the Gene features.tsv file. If \"-\", then no 3rd field is" + echo " output." + echo "" + echo " --soloCellReadStats" + echo " type: string" + echo " Output reads statistics for each CB" + echo " - Standard ... standard output" +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM python:3.10-slim + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ + rm -rf /var/lib/apt/lists/* + +ENV STAR_VERSION 2.7.10b +ENV PACKAGES gcc g++ make wget zlib1g-dev unzip +RUN apt-get update && \ + apt-get install -y --no-install-recommends ${PACKAGES} && \ + cd /tmp && \ + wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip && \ + unzip ${STAR_VERSION}.zip && \ + cd STAR-${STAR_VERSION}/source && \ + make STARstatic CXXFLAGS_SIMD=-std=c++11 && \ + cp STAR /usr/local/bin && \ + cd / && \ + rm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip && \ + apt-get --purge autoremove -y ${PACKAGES} && \ + apt-get clean + +LABEL org.opencontainers.image.authors="Angela Oliveira Pisco, Robrecht Cannoodt" +LABEL org.opencontainers.image.description="Companion container for running component mapping star_align" +LABEL org.opencontainers.image.created="2024-01-31T09:08:32Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-star_align-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "star_align 0.12.4" + exit + ;; + --input) + if [ -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT="$2" + else + VIASH_PAR_INPUT="$VIASH_PAR_INPUT;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + if [ -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + else + VIASH_PAR_INPUT="$VIASH_PAR_INPUT;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --readFilesIn) + if [ -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT="$2" + else + VIASH_PAR_INPUT="$VIASH_PAR_INPUT;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --readFilesIn. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reference) + [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reference=*) + [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference=*\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --genomeDir) + [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--genomeDir\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --genomeDir. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outFileNamePrefix) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--outFileNamePrefix\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFileNamePrefix. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --runRNGseed) + [ -n "$VIASH_PAR_RUNRNGSEED" ] && ViashError Bad arguments for option \'--runRNGseed\': \'$VIASH_PAR_RUNRNGSEED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_RUNRNGSEED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --runRNGseed. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --runRNGseed=*) + [ -n "$VIASH_PAR_RUNRNGSEED" ] && ViashError Bad arguments for option \'--runRNGseed=*\': \'$VIASH_PAR_RUNRNGSEED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_RUNRNGSEED=$(ViashRemoveFlags "$1") + shift 1 + ;; + --genomeLoad) + [ -n "$VIASH_PAR_GENOMELOAD" ] && ViashError Bad arguments for option \'--genomeLoad\': \'$VIASH_PAR_GENOMELOAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENOMELOAD="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --genomeLoad. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --genomeLoad=*) + [ -n "$VIASH_PAR_GENOMELOAD" ] && ViashError Bad arguments for option \'--genomeLoad=*\': \'$VIASH_PAR_GENOMELOAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENOMELOAD=$(ViashRemoveFlags "$1") + shift 1 + ;; + --genomeFastaFiles) + if [ -z "$VIASH_PAR_GENOMEFASTAFILES" ]; then + VIASH_PAR_GENOMEFASTAFILES="$2" + else + VIASH_PAR_GENOMEFASTAFILES="$VIASH_PAR_GENOMEFASTAFILES;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --genomeFastaFiles. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --genomeFastaFiles=*) + if [ -z "$VIASH_PAR_GENOMEFASTAFILES" ]; then + VIASH_PAR_GENOMEFASTAFILES=$(ViashRemoveFlags "$1") + else + VIASH_PAR_GENOMEFASTAFILES="$VIASH_PAR_GENOMEFASTAFILES;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --genomeFileSizes) + if [ -z "$VIASH_PAR_GENOMEFILESIZES" ]; then + VIASH_PAR_GENOMEFILESIZES="$2" + else + VIASH_PAR_GENOMEFILESIZES="$VIASH_PAR_GENOMEFILESIZES;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --genomeFileSizes. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --genomeFileSizes=*) + if [ -z "$VIASH_PAR_GENOMEFILESIZES" ]; then + VIASH_PAR_GENOMEFILESIZES=$(ViashRemoveFlags "$1") + else + VIASH_PAR_GENOMEFILESIZES="$VIASH_PAR_GENOMEFILESIZES;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --genomeTransformOutput) + if [ -z "$VIASH_PAR_GENOMETRANSFORMOUTPUT" ]; then + VIASH_PAR_GENOMETRANSFORMOUTPUT="$2" + else + VIASH_PAR_GENOMETRANSFORMOUTPUT="$VIASH_PAR_GENOMETRANSFORMOUTPUT;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --genomeTransformOutput. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --genomeTransformOutput=*) + if [ -z "$VIASH_PAR_GENOMETRANSFORMOUTPUT" ]; then + VIASH_PAR_GENOMETRANSFORMOUTPUT=$(ViashRemoveFlags "$1") + else + VIASH_PAR_GENOMETRANSFORMOUTPUT="$VIASH_PAR_GENOMETRANSFORMOUTPUT;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --genomeChrSetMitochondrial) + if [ -z "$VIASH_PAR_GENOMECHRSETMITOCHONDRIAL" ]; then + VIASH_PAR_GENOMECHRSETMITOCHONDRIAL="$2" + else + VIASH_PAR_GENOMECHRSETMITOCHONDRIAL="$VIASH_PAR_GENOMECHRSETMITOCHONDRIAL;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --genomeChrSetMitochondrial. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --genomeChrSetMitochondrial=*) + if [ -z "$VIASH_PAR_GENOMECHRSETMITOCHONDRIAL" ]; then + VIASH_PAR_GENOMECHRSETMITOCHONDRIAL=$(ViashRemoveFlags "$1") + else + VIASH_PAR_GENOMECHRSETMITOCHONDRIAL="$VIASH_PAR_GENOMECHRSETMITOCHONDRIAL;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --sjdbFileChrStartEnd) + if [ -z "$VIASH_PAR_SJDBFILECHRSTARTEND" ]; then + VIASH_PAR_SJDBFILECHRSTARTEND="$2" + else + VIASH_PAR_SJDBFILECHRSTARTEND="$VIASH_PAR_SJDBFILECHRSTARTEND;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbFileChrStartEnd. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sjdbFileChrStartEnd=*) + if [ -z "$VIASH_PAR_SJDBFILECHRSTARTEND" ]; then + VIASH_PAR_SJDBFILECHRSTARTEND=$(ViashRemoveFlags "$1") + else + VIASH_PAR_SJDBFILECHRSTARTEND="$VIASH_PAR_SJDBFILECHRSTARTEND;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --sjdbGTFfile) + [ -n "$VIASH_PAR_SJDBGTFFILE" ] && ViashError Bad arguments for option \'--sjdbGTFfile\': \'$VIASH_PAR_SJDBGTFFILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SJDBGTFFILE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbGTFfile. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sjdbGTFfile=*) + [ -n "$VIASH_PAR_SJDBGTFFILE" ] && ViashError Bad arguments for option \'--sjdbGTFfile=*\': \'$VIASH_PAR_SJDBGTFFILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SJDBGTFFILE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --sjdbGTFchrPrefix) + [ -n "$VIASH_PAR_SJDBGTFCHRPREFIX" ] && ViashError Bad arguments for option \'--sjdbGTFchrPrefix\': \'$VIASH_PAR_SJDBGTFCHRPREFIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SJDBGTFCHRPREFIX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbGTFchrPrefix. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sjdbGTFchrPrefix=*) + [ -n "$VIASH_PAR_SJDBGTFCHRPREFIX" ] && ViashError Bad arguments for option \'--sjdbGTFchrPrefix=*\': \'$VIASH_PAR_SJDBGTFCHRPREFIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SJDBGTFCHRPREFIX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --sjdbGTFfeatureExon) + [ -n "$VIASH_PAR_SJDBGTFFEATUREEXON" ] && ViashError Bad arguments for option \'--sjdbGTFfeatureExon\': \'$VIASH_PAR_SJDBGTFFEATUREEXON\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SJDBGTFFEATUREEXON="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbGTFfeatureExon. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sjdbGTFfeatureExon=*) + [ -n "$VIASH_PAR_SJDBGTFFEATUREEXON" ] && ViashError Bad arguments for option \'--sjdbGTFfeatureExon=*\': \'$VIASH_PAR_SJDBGTFFEATUREEXON\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SJDBGTFFEATUREEXON=$(ViashRemoveFlags "$1") + shift 1 + ;; + --sjdbGTFtagExonParentTranscript) + [ -n "$VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT" ] && ViashError Bad arguments for option \'--sjdbGTFtagExonParentTranscript\': \'$VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbGTFtagExonParentTranscript. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sjdbGTFtagExonParentTranscript=*) + [ -n "$VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT" ] && ViashError Bad arguments for option \'--sjdbGTFtagExonParentTranscript=*\': \'$VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --sjdbGTFtagExonParentGene) + [ -n "$VIASH_PAR_SJDBGTFTAGEXONPARENTGENE" ] && ViashError Bad arguments for option \'--sjdbGTFtagExonParentGene\': \'$VIASH_PAR_SJDBGTFTAGEXONPARENTGENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SJDBGTFTAGEXONPARENTGENE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbGTFtagExonParentGene. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sjdbGTFtagExonParentGene=*) + [ -n "$VIASH_PAR_SJDBGTFTAGEXONPARENTGENE" ] && ViashError Bad arguments for option \'--sjdbGTFtagExonParentGene=*\': \'$VIASH_PAR_SJDBGTFTAGEXONPARENTGENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SJDBGTFTAGEXONPARENTGENE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --sjdbGTFtagExonParentGeneName) + if [ -z "$VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME" ]; then + VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME="$2" + else + VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME="$VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbGTFtagExonParentGeneName. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sjdbGTFtagExonParentGeneName=*) + if [ -z "$VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME" ]; then + VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME=$(ViashRemoveFlags "$1") + else + VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME="$VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --sjdbGTFtagExonParentGeneType) + if [ -z "$VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE" ]; then + VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE="$2" + else + VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE="$VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbGTFtagExonParentGeneType. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sjdbGTFtagExonParentGeneType=*) + if [ -z "$VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE" ]; then + VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE=$(ViashRemoveFlags "$1") + else + VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE="$VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --sjdbOverhang) + [ -n "$VIASH_PAR_SJDBOVERHANG" ] && ViashError Bad arguments for option \'--sjdbOverhang\': \'$VIASH_PAR_SJDBOVERHANG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SJDBOVERHANG="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbOverhang. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sjdbOverhang=*) + [ -n "$VIASH_PAR_SJDBOVERHANG" ] && ViashError Bad arguments for option \'--sjdbOverhang=*\': \'$VIASH_PAR_SJDBOVERHANG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SJDBOVERHANG=$(ViashRemoveFlags "$1") + shift 1 + ;; + --sjdbScore) + [ -n "$VIASH_PAR_SJDBSCORE" ] && ViashError Bad arguments for option \'--sjdbScore\': \'$VIASH_PAR_SJDBSCORE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SJDBSCORE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbScore. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sjdbScore=*) + [ -n "$VIASH_PAR_SJDBSCORE" ] && ViashError Bad arguments for option \'--sjdbScore=*\': \'$VIASH_PAR_SJDBSCORE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SJDBSCORE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --sjdbInsertSave) + [ -n "$VIASH_PAR_SJDBINSERTSAVE" ] && ViashError Bad arguments for option \'--sjdbInsertSave\': \'$VIASH_PAR_SJDBINSERTSAVE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SJDBINSERTSAVE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbInsertSave. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sjdbInsertSave=*) + [ -n "$VIASH_PAR_SJDBINSERTSAVE" ] && ViashError Bad arguments for option \'--sjdbInsertSave=*\': \'$VIASH_PAR_SJDBINSERTSAVE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SJDBINSERTSAVE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --varVCFfile) + [ -n "$VIASH_PAR_VARVCFFILE" ] && ViashError Bad arguments for option \'--varVCFfile\': \'$VIASH_PAR_VARVCFFILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_VARVCFFILE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --varVCFfile. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --varVCFfile=*) + [ -n "$VIASH_PAR_VARVCFFILE" ] && ViashError Bad arguments for option \'--varVCFfile=*\': \'$VIASH_PAR_VARVCFFILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_VARVCFFILE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --readFilesType) + [ -n "$VIASH_PAR_READFILESTYPE" ] && ViashError Bad arguments for option \'--readFilesType\': \'$VIASH_PAR_READFILESTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READFILESTYPE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --readFilesType. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --readFilesType=*) + [ -n "$VIASH_PAR_READFILESTYPE" ] && ViashError Bad arguments for option \'--readFilesType=*\': \'$VIASH_PAR_READFILESTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READFILESTYPE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --readFilesSAMattrKeep) + if [ -z "$VIASH_PAR_READFILESSAMATTRKEEP" ]; then + VIASH_PAR_READFILESSAMATTRKEEP="$2" + else + VIASH_PAR_READFILESSAMATTRKEEP="$VIASH_PAR_READFILESSAMATTRKEEP;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --readFilesSAMattrKeep. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --readFilesSAMattrKeep=*) + if [ -z "$VIASH_PAR_READFILESSAMATTRKEEP" ]; then + VIASH_PAR_READFILESSAMATTRKEEP=$(ViashRemoveFlags "$1") + else + VIASH_PAR_READFILESSAMATTRKEEP="$VIASH_PAR_READFILESSAMATTRKEEP;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --readFilesManifest) + [ -n "$VIASH_PAR_READFILESMANIFEST" ] && ViashError Bad arguments for option \'--readFilesManifest\': \'$VIASH_PAR_READFILESMANIFEST\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READFILESMANIFEST="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --readFilesManifest. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --readFilesManifest=*) + [ -n "$VIASH_PAR_READFILESMANIFEST" ] && ViashError Bad arguments for option \'--readFilesManifest=*\': \'$VIASH_PAR_READFILESMANIFEST\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READFILESMANIFEST=$(ViashRemoveFlags "$1") + shift 1 + ;; + --readFilesPrefix) + [ -n "$VIASH_PAR_READFILESPREFIX" ] && ViashError Bad arguments for option \'--readFilesPrefix\': \'$VIASH_PAR_READFILESPREFIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READFILESPREFIX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --readFilesPrefix. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --readFilesPrefix=*) + [ -n "$VIASH_PAR_READFILESPREFIX" ] && ViashError Bad arguments for option \'--readFilesPrefix=*\': \'$VIASH_PAR_READFILESPREFIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READFILESPREFIX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --readFilesCommand) + if [ -z "$VIASH_PAR_READFILESCOMMAND" ]; then + VIASH_PAR_READFILESCOMMAND="$2" + else + VIASH_PAR_READFILESCOMMAND="$VIASH_PAR_READFILESCOMMAND;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --readFilesCommand. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --readFilesCommand=*) + if [ -z "$VIASH_PAR_READFILESCOMMAND" ]; then + VIASH_PAR_READFILESCOMMAND=$(ViashRemoveFlags "$1") + else + VIASH_PAR_READFILESCOMMAND="$VIASH_PAR_READFILESCOMMAND;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --readMapNumber) + [ -n "$VIASH_PAR_READMAPNUMBER" ] && ViashError Bad arguments for option \'--readMapNumber\': \'$VIASH_PAR_READMAPNUMBER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READMAPNUMBER="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --readMapNumber. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --readMapNumber=*) + [ -n "$VIASH_PAR_READMAPNUMBER" ] && ViashError Bad arguments for option \'--readMapNumber=*\': \'$VIASH_PAR_READMAPNUMBER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READMAPNUMBER=$(ViashRemoveFlags "$1") + shift 1 + ;; + --readMatesLengthsIn) + [ -n "$VIASH_PAR_READMATESLENGTHSIN" ] && ViashError Bad arguments for option \'--readMatesLengthsIn\': \'$VIASH_PAR_READMATESLENGTHSIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READMATESLENGTHSIN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --readMatesLengthsIn. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --readMatesLengthsIn=*) + [ -n "$VIASH_PAR_READMATESLENGTHSIN" ] && ViashError Bad arguments for option \'--readMatesLengthsIn=*\': \'$VIASH_PAR_READMATESLENGTHSIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READMATESLENGTHSIN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --readNameSeparator) + if [ -z "$VIASH_PAR_READNAMESEPARATOR" ]; then + VIASH_PAR_READNAMESEPARATOR="$2" + else + VIASH_PAR_READNAMESEPARATOR="$VIASH_PAR_READNAMESEPARATOR;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --readNameSeparator. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --readNameSeparator=*) + if [ -z "$VIASH_PAR_READNAMESEPARATOR" ]; then + VIASH_PAR_READNAMESEPARATOR=$(ViashRemoveFlags "$1") + else + VIASH_PAR_READNAMESEPARATOR="$VIASH_PAR_READNAMESEPARATOR;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --readQualityScoreBase) + [ -n "$VIASH_PAR_READQUALITYSCOREBASE" ] && ViashError Bad arguments for option \'--readQualityScoreBase\': \'$VIASH_PAR_READQUALITYSCOREBASE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READQUALITYSCOREBASE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --readQualityScoreBase. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --readQualityScoreBase=*) + [ -n "$VIASH_PAR_READQUALITYSCOREBASE" ] && ViashError Bad arguments for option \'--readQualityScoreBase=*\': \'$VIASH_PAR_READQUALITYSCOREBASE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READQUALITYSCOREBASE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --clipAdapterType) + [ -n "$VIASH_PAR_CLIPADAPTERTYPE" ] && ViashError Bad arguments for option \'--clipAdapterType\': \'$VIASH_PAR_CLIPADAPTERTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CLIPADAPTERTYPE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --clipAdapterType. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --clipAdapterType=*) + [ -n "$VIASH_PAR_CLIPADAPTERTYPE" ] && ViashError Bad arguments for option \'--clipAdapterType=*\': \'$VIASH_PAR_CLIPADAPTERTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CLIPADAPTERTYPE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --clip3pNbases) + if [ -z "$VIASH_PAR_CLIP3PNBASES" ]; then + VIASH_PAR_CLIP3PNBASES="$2" + else + VIASH_PAR_CLIP3PNBASES="$VIASH_PAR_CLIP3PNBASES;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --clip3pNbases. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --clip3pNbases=*) + if [ -z "$VIASH_PAR_CLIP3PNBASES" ]; then + VIASH_PAR_CLIP3PNBASES=$(ViashRemoveFlags "$1") + else + VIASH_PAR_CLIP3PNBASES="$VIASH_PAR_CLIP3PNBASES;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --clip3pAdapterSeq) + if [ -z "$VIASH_PAR_CLIP3PADAPTERSEQ" ]; then + VIASH_PAR_CLIP3PADAPTERSEQ="$2" + else + VIASH_PAR_CLIP3PADAPTERSEQ="$VIASH_PAR_CLIP3PADAPTERSEQ;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --clip3pAdapterSeq. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --clip3pAdapterSeq=*) + if [ -z "$VIASH_PAR_CLIP3PADAPTERSEQ" ]; then + VIASH_PAR_CLIP3PADAPTERSEQ=$(ViashRemoveFlags "$1") + else + VIASH_PAR_CLIP3PADAPTERSEQ="$VIASH_PAR_CLIP3PADAPTERSEQ;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --clip3pAdapterMMp) + if [ -z "$VIASH_PAR_CLIP3PADAPTERMMP" ]; then + VIASH_PAR_CLIP3PADAPTERMMP="$2" + else + VIASH_PAR_CLIP3PADAPTERMMP="$VIASH_PAR_CLIP3PADAPTERMMP;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --clip3pAdapterMMp. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --clip3pAdapterMMp=*) + if [ -z "$VIASH_PAR_CLIP3PADAPTERMMP" ]; then + VIASH_PAR_CLIP3PADAPTERMMP=$(ViashRemoveFlags "$1") + else + VIASH_PAR_CLIP3PADAPTERMMP="$VIASH_PAR_CLIP3PADAPTERMMP;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --clip3pAfterAdapterNbases) + if [ -z "$VIASH_PAR_CLIP3PAFTERADAPTERNBASES" ]; then + VIASH_PAR_CLIP3PAFTERADAPTERNBASES="$2" + else + VIASH_PAR_CLIP3PAFTERADAPTERNBASES="$VIASH_PAR_CLIP3PAFTERADAPTERNBASES;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --clip3pAfterAdapterNbases. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --clip3pAfterAdapterNbases=*) + if [ -z "$VIASH_PAR_CLIP3PAFTERADAPTERNBASES" ]; then + VIASH_PAR_CLIP3PAFTERADAPTERNBASES=$(ViashRemoveFlags "$1") + else + VIASH_PAR_CLIP3PAFTERADAPTERNBASES="$VIASH_PAR_CLIP3PAFTERADAPTERNBASES;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --clip5pNbases) + if [ -z "$VIASH_PAR_CLIP5PNBASES" ]; then + VIASH_PAR_CLIP5PNBASES="$2" + else + VIASH_PAR_CLIP5PNBASES="$VIASH_PAR_CLIP5PNBASES;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --clip5pNbases. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --clip5pNbases=*) + if [ -z "$VIASH_PAR_CLIP5PNBASES" ]; then + VIASH_PAR_CLIP5PNBASES=$(ViashRemoveFlags "$1") + else + VIASH_PAR_CLIP5PNBASES="$VIASH_PAR_CLIP5PNBASES;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --limitGenomeGenerateRAM) + [ -n "$VIASH_PAR_LIMITGENOMEGENERATERAM" ] && ViashError Bad arguments for option \'--limitGenomeGenerateRAM\': \'$VIASH_PAR_LIMITGENOMEGENERATERAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LIMITGENOMEGENERATERAM="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --limitGenomeGenerateRAM. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --limitGenomeGenerateRAM=*) + [ -n "$VIASH_PAR_LIMITGENOMEGENERATERAM" ] && ViashError Bad arguments for option \'--limitGenomeGenerateRAM=*\': \'$VIASH_PAR_LIMITGENOMEGENERATERAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LIMITGENOMEGENERATERAM=$(ViashRemoveFlags "$1") + shift 1 + ;; + --limitIObufferSize) + if [ -z "$VIASH_PAR_LIMITIOBUFFERSIZE" ]; then + VIASH_PAR_LIMITIOBUFFERSIZE="$2" + else + VIASH_PAR_LIMITIOBUFFERSIZE="$VIASH_PAR_LIMITIOBUFFERSIZE;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --limitIObufferSize. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --limitIObufferSize=*) + if [ -z "$VIASH_PAR_LIMITIOBUFFERSIZE" ]; then + VIASH_PAR_LIMITIOBUFFERSIZE=$(ViashRemoveFlags "$1") + else + VIASH_PAR_LIMITIOBUFFERSIZE="$VIASH_PAR_LIMITIOBUFFERSIZE;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --limitOutSAMoneReadBytes) + [ -n "$VIASH_PAR_LIMITOUTSAMONEREADBYTES" ] && ViashError Bad arguments for option \'--limitOutSAMoneReadBytes\': \'$VIASH_PAR_LIMITOUTSAMONEREADBYTES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LIMITOUTSAMONEREADBYTES="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --limitOutSAMoneReadBytes. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --limitOutSAMoneReadBytes=*) + [ -n "$VIASH_PAR_LIMITOUTSAMONEREADBYTES" ] && ViashError Bad arguments for option \'--limitOutSAMoneReadBytes=*\': \'$VIASH_PAR_LIMITOUTSAMONEREADBYTES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LIMITOUTSAMONEREADBYTES=$(ViashRemoveFlags "$1") + shift 1 + ;; + --limitOutSJoneRead) + [ -n "$VIASH_PAR_LIMITOUTSJONEREAD" ] && ViashError Bad arguments for option \'--limitOutSJoneRead\': \'$VIASH_PAR_LIMITOUTSJONEREAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LIMITOUTSJONEREAD="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --limitOutSJoneRead. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --limitOutSJoneRead=*) + [ -n "$VIASH_PAR_LIMITOUTSJONEREAD" ] && ViashError Bad arguments for option \'--limitOutSJoneRead=*\': \'$VIASH_PAR_LIMITOUTSJONEREAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LIMITOUTSJONEREAD=$(ViashRemoveFlags "$1") + shift 1 + ;; + --limitOutSJcollapsed) + [ -n "$VIASH_PAR_LIMITOUTSJCOLLAPSED" ] && ViashError Bad arguments for option \'--limitOutSJcollapsed\': \'$VIASH_PAR_LIMITOUTSJCOLLAPSED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LIMITOUTSJCOLLAPSED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --limitOutSJcollapsed. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --limitOutSJcollapsed=*) + [ -n "$VIASH_PAR_LIMITOUTSJCOLLAPSED" ] && ViashError Bad arguments for option \'--limitOutSJcollapsed=*\': \'$VIASH_PAR_LIMITOUTSJCOLLAPSED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LIMITOUTSJCOLLAPSED=$(ViashRemoveFlags "$1") + shift 1 + ;; + --limitBAMsortRAM) + [ -n "$VIASH_PAR_LIMITBAMSORTRAM" ] && ViashError Bad arguments for option \'--limitBAMsortRAM\': \'$VIASH_PAR_LIMITBAMSORTRAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LIMITBAMSORTRAM="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --limitBAMsortRAM. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --limitBAMsortRAM=*) + [ -n "$VIASH_PAR_LIMITBAMSORTRAM" ] && ViashError Bad arguments for option \'--limitBAMsortRAM=*\': \'$VIASH_PAR_LIMITBAMSORTRAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LIMITBAMSORTRAM=$(ViashRemoveFlags "$1") + shift 1 + ;; + --limitSjdbInsertNsj) + [ -n "$VIASH_PAR_LIMITSJDBINSERTNSJ" ] && ViashError Bad arguments for option \'--limitSjdbInsertNsj\': \'$VIASH_PAR_LIMITSJDBINSERTNSJ\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LIMITSJDBINSERTNSJ="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --limitSjdbInsertNsj. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --limitSjdbInsertNsj=*) + [ -n "$VIASH_PAR_LIMITSJDBINSERTNSJ" ] && ViashError Bad arguments for option \'--limitSjdbInsertNsj=*\': \'$VIASH_PAR_LIMITSJDBINSERTNSJ\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LIMITSJDBINSERTNSJ=$(ViashRemoveFlags "$1") + shift 1 + ;; + --limitNreadsSoft) + [ -n "$VIASH_PAR_LIMITNREADSSOFT" ] && ViashError Bad arguments for option \'--limitNreadsSoft\': \'$VIASH_PAR_LIMITNREADSSOFT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LIMITNREADSSOFT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --limitNreadsSoft. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --limitNreadsSoft=*) + [ -n "$VIASH_PAR_LIMITNREADSSOFT" ] && ViashError Bad arguments for option \'--limitNreadsSoft=*\': \'$VIASH_PAR_LIMITNREADSSOFT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LIMITNREADSSOFT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outTmpKeep) + [ -n "$VIASH_PAR_OUTTMPKEEP" ] && ViashError Bad arguments for option \'--outTmpKeep\': \'$VIASH_PAR_OUTTMPKEEP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTTMPKEEP="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outTmpKeep. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outTmpKeep=*) + [ -n "$VIASH_PAR_OUTTMPKEEP" ] && ViashError Bad arguments for option \'--outTmpKeep=*\': \'$VIASH_PAR_OUTTMPKEEP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTTMPKEEP=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outStd) + [ -n "$VIASH_PAR_OUTSTD" ] && ViashError Bad arguments for option \'--outStd\': \'$VIASH_PAR_OUTSTD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSTD="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outStd. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outStd=*) + [ -n "$VIASH_PAR_OUTSTD" ] && ViashError Bad arguments for option \'--outStd=*\': \'$VIASH_PAR_OUTSTD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSTD=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outReadsUnmapped) + [ -n "$VIASH_PAR_OUTREADSUNMAPPED" ] && ViashError Bad arguments for option \'--outReadsUnmapped\': \'$VIASH_PAR_OUTREADSUNMAPPED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTREADSUNMAPPED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outReadsUnmapped. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outReadsUnmapped=*) + [ -n "$VIASH_PAR_OUTREADSUNMAPPED" ] && ViashError Bad arguments for option \'--outReadsUnmapped=*\': \'$VIASH_PAR_OUTREADSUNMAPPED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTREADSUNMAPPED=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outQSconversionAdd) + [ -n "$VIASH_PAR_OUTQSCONVERSIONADD" ] && ViashError Bad arguments for option \'--outQSconversionAdd\': \'$VIASH_PAR_OUTQSCONVERSIONADD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTQSCONVERSIONADD="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outQSconversionAdd. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outQSconversionAdd=*) + [ -n "$VIASH_PAR_OUTQSCONVERSIONADD" ] && ViashError Bad arguments for option \'--outQSconversionAdd=*\': \'$VIASH_PAR_OUTQSCONVERSIONADD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTQSCONVERSIONADD=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outMultimapperOrder) + [ -n "$VIASH_PAR_OUTMULTIMAPPERORDER" ] && ViashError Bad arguments for option \'--outMultimapperOrder\': \'$VIASH_PAR_OUTMULTIMAPPERORDER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTMULTIMAPPERORDER="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outMultimapperOrder. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outMultimapperOrder=*) + [ -n "$VIASH_PAR_OUTMULTIMAPPERORDER" ] && ViashError Bad arguments for option \'--outMultimapperOrder=*\': \'$VIASH_PAR_OUTMULTIMAPPERORDER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTMULTIMAPPERORDER=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outSAMtype) + if [ -z "$VIASH_PAR_OUTSAMTYPE" ]; then + VIASH_PAR_OUTSAMTYPE="$2" + else + VIASH_PAR_OUTSAMTYPE="$VIASH_PAR_OUTSAMTYPE;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMtype. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMtype=*) + if [ -z "$VIASH_PAR_OUTSAMTYPE" ]; then + VIASH_PAR_OUTSAMTYPE=$(ViashRemoveFlags "$1") + else + VIASH_PAR_OUTSAMTYPE="$VIASH_PAR_OUTSAMTYPE;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --outSAMmode) + [ -n "$VIASH_PAR_OUTSAMMODE" ] && ViashError Bad arguments for option \'--outSAMmode\': \'$VIASH_PAR_OUTSAMMODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMMODE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMmode. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMmode=*) + [ -n "$VIASH_PAR_OUTSAMMODE" ] && ViashError Bad arguments for option \'--outSAMmode=*\': \'$VIASH_PAR_OUTSAMMODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMMODE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outSAMstrandField) + [ -n "$VIASH_PAR_OUTSAMSTRANDFIELD" ] && ViashError Bad arguments for option \'--outSAMstrandField\': \'$VIASH_PAR_OUTSAMSTRANDFIELD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMSTRANDFIELD="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMstrandField. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMstrandField=*) + [ -n "$VIASH_PAR_OUTSAMSTRANDFIELD" ] && ViashError Bad arguments for option \'--outSAMstrandField=*\': \'$VIASH_PAR_OUTSAMSTRANDFIELD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMSTRANDFIELD=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outSAMattributes) + if [ -z "$VIASH_PAR_OUTSAMATTRIBUTES" ]; then + VIASH_PAR_OUTSAMATTRIBUTES="$2" + else + VIASH_PAR_OUTSAMATTRIBUTES="$VIASH_PAR_OUTSAMATTRIBUTES;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMattributes. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMattributes=*) + if [ -z "$VIASH_PAR_OUTSAMATTRIBUTES" ]; then + VIASH_PAR_OUTSAMATTRIBUTES=$(ViashRemoveFlags "$1") + else + VIASH_PAR_OUTSAMATTRIBUTES="$VIASH_PAR_OUTSAMATTRIBUTES;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --outSAMattrIHstart) + [ -n "$VIASH_PAR_OUTSAMATTRIHSTART" ] && ViashError Bad arguments for option \'--outSAMattrIHstart\': \'$VIASH_PAR_OUTSAMATTRIHSTART\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMATTRIHSTART="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMattrIHstart. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMattrIHstart=*) + [ -n "$VIASH_PAR_OUTSAMATTRIHSTART" ] && ViashError Bad arguments for option \'--outSAMattrIHstart=*\': \'$VIASH_PAR_OUTSAMATTRIHSTART\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMATTRIHSTART=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outSAMunmapped) + if [ -z "$VIASH_PAR_OUTSAMUNMAPPED" ]; then + VIASH_PAR_OUTSAMUNMAPPED="$2" + else + VIASH_PAR_OUTSAMUNMAPPED="$VIASH_PAR_OUTSAMUNMAPPED;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMunmapped. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMunmapped=*) + if [ -z "$VIASH_PAR_OUTSAMUNMAPPED" ]; then + VIASH_PAR_OUTSAMUNMAPPED=$(ViashRemoveFlags "$1") + else + VIASH_PAR_OUTSAMUNMAPPED="$VIASH_PAR_OUTSAMUNMAPPED;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --outSAMorder) + [ -n "$VIASH_PAR_OUTSAMORDER" ] && ViashError Bad arguments for option \'--outSAMorder\': \'$VIASH_PAR_OUTSAMORDER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMORDER="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMorder. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMorder=*) + [ -n "$VIASH_PAR_OUTSAMORDER" ] && ViashError Bad arguments for option \'--outSAMorder=*\': \'$VIASH_PAR_OUTSAMORDER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMORDER=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outSAMprimaryFlag) + [ -n "$VIASH_PAR_OUTSAMPRIMARYFLAG" ] && ViashError Bad arguments for option \'--outSAMprimaryFlag\': \'$VIASH_PAR_OUTSAMPRIMARYFLAG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMPRIMARYFLAG="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMprimaryFlag. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMprimaryFlag=*) + [ -n "$VIASH_PAR_OUTSAMPRIMARYFLAG" ] && ViashError Bad arguments for option \'--outSAMprimaryFlag=*\': \'$VIASH_PAR_OUTSAMPRIMARYFLAG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMPRIMARYFLAG=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outSAMreadID) + [ -n "$VIASH_PAR_OUTSAMREADID" ] && ViashError Bad arguments for option \'--outSAMreadID\': \'$VIASH_PAR_OUTSAMREADID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMREADID="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMreadID. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMreadID=*) + [ -n "$VIASH_PAR_OUTSAMREADID" ] && ViashError Bad arguments for option \'--outSAMreadID=*\': \'$VIASH_PAR_OUTSAMREADID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMREADID=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outSAMmapqUnique) + [ -n "$VIASH_PAR_OUTSAMMAPQUNIQUE" ] && ViashError Bad arguments for option \'--outSAMmapqUnique\': \'$VIASH_PAR_OUTSAMMAPQUNIQUE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMMAPQUNIQUE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMmapqUnique. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMmapqUnique=*) + [ -n "$VIASH_PAR_OUTSAMMAPQUNIQUE" ] && ViashError Bad arguments for option \'--outSAMmapqUnique=*\': \'$VIASH_PAR_OUTSAMMAPQUNIQUE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMMAPQUNIQUE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outSAMflagOR) + [ -n "$VIASH_PAR_OUTSAMFLAGOR" ] && ViashError Bad arguments for option \'--outSAMflagOR\': \'$VIASH_PAR_OUTSAMFLAGOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMFLAGOR="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMflagOR. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMflagOR=*) + [ -n "$VIASH_PAR_OUTSAMFLAGOR" ] && ViashError Bad arguments for option \'--outSAMflagOR=*\': \'$VIASH_PAR_OUTSAMFLAGOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMFLAGOR=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outSAMflagAND) + [ -n "$VIASH_PAR_OUTSAMFLAGAND" ] && ViashError Bad arguments for option \'--outSAMflagAND\': \'$VIASH_PAR_OUTSAMFLAGAND\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMFLAGAND="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMflagAND. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMflagAND=*) + [ -n "$VIASH_PAR_OUTSAMFLAGAND" ] && ViashError Bad arguments for option \'--outSAMflagAND=*\': \'$VIASH_PAR_OUTSAMFLAGAND\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMFLAGAND=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outSAMattrRGline) + if [ -z "$VIASH_PAR_OUTSAMATTRRGLINE" ]; then + VIASH_PAR_OUTSAMATTRRGLINE="$2" + else + VIASH_PAR_OUTSAMATTRRGLINE="$VIASH_PAR_OUTSAMATTRRGLINE;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMattrRGline. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMattrRGline=*) + if [ -z "$VIASH_PAR_OUTSAMATTRRGLINE" ]; then + VIASH_PAR_OUTSAMATTRRGLINE=$(ViashRemoveFlags "$1") + else + VIASH_PAR_OUTSAMATTRRGLINE="$VIASH_PAR_OUTSAMATTRRGLINE;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --outSAMheaderHD) + if [ -z "$VIASH_PAR_OUTSAMHEADERHD" ]; then + VIASH_PAR_OUTSAMHEADERHD="$2" + else + VIASH_PAR_OUTSAMHEADERHD="$VIASH_PAR_OUTSAMHEADERHD;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMheaderHD. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMheaderHD=*) + if [ -z "$VIASH_PAR_OUTSAMHEADERHD" ]; then + VIASH_PAR_OUTSAMHEADERHD=$(ViashRemoveFlags "$1") + else + VIASH_PAR_OUTSAMHEADERHD="$VIASH_PAR_OUTSAMHEADERHD;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --outSAMheaderPG) + if [ -z "$VIASH_PAR_OUTSAMHEADERPG" ]; then + VIASH_PAR_OUTSAMHEADERPG="$2" + else + VIASH_PAR_OUTSAMHEADERPG="$VIASH_PAR_OUTSAMHEADERPG;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMheaderPG. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMheaderPG=*) + if [ -z "$VIASH_PAR_OUTSAMHEADERPG" ]; then + VIASH_PAR_OUTSAMHEADERPG=$(ViashRemoveFlags "$1") + else + VIASH_PAR_OUTSAMHEADERPG="$VIASH_PAR_OUTSAMHEADERPG;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --outSAMheaderCommentFile) + [ -n "$VIASH_PAR_OUTSAMHEADERCOMMENTFILE" ] && ViashError Bad arguments for option \'--outSAMheaderCommentFile\': \'$VIASH_PAR_OUTSAMHEADERCOMMENTFILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMHEADERCOMMENTFILE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMheaderCommentFile. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMheaderCommentFile=*) + [ -n "$VIASH_PAR_OUTSAMHEADERCOMMENTFILE" ] && ViashError Bad arguments for option \'--outSAMheaderCommentFile=*\': \'$VIASH_PAR_OUTSAMHEADERCOMMENTFILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMHEADERCOMMENTFILE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outSAMfilter) + if [ -z "$VIASH_PAR_OUTSAMFILTER" ]; then + VIASH_PAR_OUTSAMFILTER="$2" + else + VIASH_PAR_OUTSAMFILTER="$VIASH_PAR_OUTSAMFILTER;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMfilter. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMfilter=*) + if [ -z "$VIASH_PAR_OUTSAMFILTER" ]; then + VIASH_PAR_OUTSAMFILTER=$(ViashRemoveFlags "$1") + else + VIASH_PAR_OUTSAMFILTER="$VIASH_PAR_OUTSAMFILTER;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --outSAMmultNmax) + [ -n "$VIASH_PAR_OUTSAMMULTNMAX" ] && ViashError Bad arguments for option \'--outSAMmultNmax\': \'$VIASH_PAR_OUTSAMMULTNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMMULTNMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMmultNmax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMmultNmax=*) + [ -n "$VIASH_PAR_OUTSAMMULTNMAX" ] && ViashError Bad arguments for option \'--outSAMmultNmax=*\': \'$VIASH_PAR_OUTSAMMULTNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMMULTNMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outSAMtlen) + [ -n "$VIASH_PAR_OUTSAMTLEN" ] && ViashError Bad arguments for option \'--outSAMtlen\': \'$VIASH_PAR_OUTSAMTLEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMTLEN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMtlen. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMtlen=*) + [ -n "$VIASH_PAR_OUTSAMTLEN" ] && ViashError Bad arguments for option \'--outSAMtlen=*\': \'$VIASH_PAR_OUTSAMTLEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMTLEN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outBAMcompression) + [ -n "$VIASH_PAR_OUTBAMCOMPRESSION" ] && ViashError Bad arguments for option \'--outBAMcompression\': \'$VIASH_PAR_OUTBAMCOMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTBAMCOMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outBAMcompression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outBAMcompression=*) + [ -n "$VIASH_PAR_OUTBAMCOMPRESSION" ] && ViashError Bad arguments for option \'--outBAMcompression=*\': \'$VIASH_PAR_OUTBAMCOMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTBAMCOMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outBAMsortingThreadN) + [ -n "$VIASH_PAR_OUTBAMSORTINGTHREADN" ] && ViashError Bad arguments for option \'--outBAMsortingThreadN\': \'$VIASH_PAR_OUTBAMSORTINGTHREADN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTBAMSORTINGTHREADN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outBAMsortingThreadN. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outBAMsortingThreadN=*) + [ -n "$VIASH_PAR_OUTBAMSORTINGTHREADN" ] && ViashError Bad arguments for option \'--outBAMsortingThreadN=*\': \'$VIASH_PAR_OUTBAMSORTINGTHREADN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTBAMSORTINGTHREADN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outBAMsortingBinsN) + [ -n "$VIASH_PAR_OUTBAMSORTINGBINSN" ] && ViashError Bad arguments for option \'--outBAMsortingBinsN\': \'$VIASH_PAR_OUTBAMSORTINGBINSN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTBAMSORTINGBINSN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outBAMsortingBinsN. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outBAMsortingBinsN=*) + [ -n "$VIASH_PAR_OUTBAMSORTINGBINSN" ] && ViashError Bad arguments for option \'--outBAMsortingBinsN=*\': \'$VIASH_PAR_OUTBAMSORTINGBINSN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTBAMSORTINGBINSN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --bamRemoveDuplicatesType) + [ -n "$VIASH_PAR_BAMREMOVEDUPLICATESTYPE" ] && ViashError Bad arguments for option \'--bamRemoveDuplicatesType\': \'$VIASH_PAR_BAMREMOVEDUPLICATESTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BAMREMOVEDUPLICATESTYPE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --bamRemoveDuplicatesType. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --bamRemoveDuplicatesType=*) + [ -n "$VIASH_PAR_BAMREMOVEDUPLICATESTYPE" ] && ViashError Bad arguments for option \'--bamRemoveDuplicatesType=*\': \'$VIASH_PAR_BAMREMOVEDUPLICATESTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BAMREMOVEDUPLICATESTYPE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --bamRemoveDuplicatesMate2basesN) + [ -n "$VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN" ] && ViashError Bad arguments for option \'--bamRemoveDuplicatesMate2basesN\': \'$VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --bamRemoveDuplicatesMate2basesN. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --bamRemoveDuplicatesMate2basesN=*) + [ -n "$VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN" ] && ViashError Bad arguments for option \'--bamRemoveDuplicatesMate2basesN=*\': \'$VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outWigType) + if [ -z "$VIASH_PAR_OUTWIGTYPE" ]; then + VIASH_PAR_OUTWIGTYPE="$2" + else + VIASH_PAR_OUTWIGTYPE="$VIASH_PAR_OUTWIGTYPE;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outWigType. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outWigType=*) + if [ -z "$VIASH_PAR_OUTWIGTYPE" ]; then + VIASH_PAR_OUTWIGTYPE=$(ViashRemoveFlags "$1") + else + VIASH_PAR_OUTWIGTYPE="$VIASH_PAR_OUTWIGTYPE;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --outWigStrand) + [ -n "$VIASH_PAR_OUTWIGSTRAND" ] && ViashError Bad arguments for option \'--outWigStrand\': \'$VIASH_PAR_OUTWIGSTRAND\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTWIGSTRAND="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outWigStrand. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outWigStrand=*) + [ -n "$VIASH_PAR_OUTWIGSTRAND" ] && ViashError Bad arguments for option \'--outWigStrand=*\': \'$VIASH_PAR_OUTWIGSTRAND\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTWIGSTRAND=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outWigReferencesPrefix) + [ -n "$VIASH_PAR_OUTWIGREFERENCESPREFIX" ] && ViashError Bad arguments for option \'--outWigReferencesPrefix\': \'$VIASH_PAR_OUTWIGREFERENCESPREFIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTWIGREFERENCESPREFIX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outWigReferencesPrefix. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outWigReferencesPrefix=*) + [ -n "$VIASH_PAR_OUTWIGREFERENCESPREFIX" ] && ViashError Bad arguments for option \'--outWigReferencesPrefix=*\': \'$VIASH_PAR_OUTWIGREFERENCESPREFIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTWIGREFERENCESPREFIX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outWigNorm) + [ -n "$VIASH_PAR_OUTWIGNORM" ] && ViashError Bad arguments for option \'--outWigNorm\': \'$VIASH_PAR_OUTWIGNORM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTWIGNORM="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outWigNorm. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outWigNorm=*) + [ -n "$VIASH_PAR_OUTWIGNORM" ] && ViashError Bad arguments for option \'--outWigNorm=*\': \'$VIASH_PAR_OUTWIGNORM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTWIGNORM=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outFilterType) + [ -n "$VIASH_PAR_OUTFILTERTYPE" ] && ViashError Bad arguments for option \'--outFilterType\': \'$VIASH_PAR_OUTFILTERTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERTYPE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterType. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outFilterType=*) + [ -n "$VIASH_PAR_OUTFILTERTYPE" ] && ViashError Bad arguments for option \'--outFilterType=*\': \'$VIASH_PAR_OUTFILTERTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERTYPE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outFilterMultimapScoreRange) + [ -n "$VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE" ] && ViashError Bad arguments for option \'--outFilterMultimapScoreRange\': \'$VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterMultimapScoreRange. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outFilterMultimapScoreRange=*) + [ -n "$VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE" ] && ViashError Bad arguments for option \'--outFilterMultimapScoreRange=*\': \'$VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outFilterMultimapNmax) + [ -n "$VIASH_PAR_OUTFILTERMULTIMAPNMAX" ] && ViashError Bad arguments for option \'--outFilterMultimapNmax\': \'$VIASH_PAR_OUTFILTERMULTIMAPNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERMULTIMAPNMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterMultimapNmax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outFilterMultimapNmax=*) + [ -n "$VIASH_PAR_OUTFILTERMULTIMAPNMAX" ] && ViashError Bad arguments for option \'--outFilterMultimapNmax=*\': \'$VIASH_PAR_OUTFILTERMULTIMAPNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERMULTIMAPNMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outFilterMismatchNmax) + [ -n "$VIASH_PAR_OUTFILTERMISMATCHNMAX" ] && ViashError Bad arguments for option \'--outFilterMismatchNmax\': \'$VIASH_PAR_OUTFILTERMISMATCHNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERMISMATCHNMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterMismatchNmax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outFilterMismatchNmax=*) + [ -n "$VIASH_PAR_OUTFILTERMISMATCHNMAX" ] && ViashError Bad arguments for option \'--outFilterMismatchNmax=*\': \'$VIASH_PAR_OUTFILTERMISMATCHNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERMISMATCHNMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outFilterMismatchNoverLmax) + [ -n "$VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX" ] && ViashError Bad arguments for option \'--outFilterMismatchNoverLmax\': \'$VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterMismatchNoverLmax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outFilterMismatchNoverLmax=*) + [ -n "$VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX" ] && ViashError Bad arguments for option \'--outFilterMismatchNoverLmax=*\': \'$VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outFilterMismatchNoverReadLmax) + [ -n "$VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX" ] && ViashError Bad arguments for option \'--outFilterMismatchNoverReadLmax\': \'$VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterMismatchNoverReadLmax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outFilterMismatchNoverReadLmax=*) + [ -n "$VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX" ] && ViashError Bad arguments for option \'--outFilterMismatchNoverReadLmax=*\': \'$VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outFilterScoreMin) + [ -n "$VIASH_PAR_OUTFILTERSCOREMIN" ] && ViashError Bad arguments for option \'--outFilterScoreMin\': \'$VIASH_PAR_OUTFILTERSCOREMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERSCOREMIN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterScoreMin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outFilterScoreMin=*) + [ -n "$VIASH_PAR_OUTFILTERSCOREMIN" ] && ViashError Bad arguments for option \'--outFilterScoreMin=*\': \'$VIASH_PAR_OUTFILTERSCOREMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERSCOREMIN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outFilterScoreMinOverLread) + [ -n "$VIASH_PAR_OUTFILTERSCOREMINOVERLREAD" ] && ViashError Bad arguments for option \'--outFilterScoreMinOverLread\': \'$VIASH_PAR_OUTFILTERSCOREMINOVERLREAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERSCOREMINOVERLREAD="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterScoreMinOverLread. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outFilterScoreMinOverLread=*) + [ -n "$VIASH_PAR_OUTFILTERSCOREMINOVERLREAD" ] && ViashError Bad arguments for option \'--outFilterScoreMinOverLread=*\': \'$VIASH_PAR_OUTFILTERSCOREMINOVERLREAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERSCOREMINOVERLREAD=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outFilterMatchNmin) + [ -n "$VIASH_PAR_OUTFILTERMATCHNMIN" ] && ViashError Bad arguments for option \'--outFilterMatchNmin\': \'$VIASH_PAR_OUTFILTERMATCHNMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERMATCHNMIN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterMatchNmin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outFilterMatchNmin=*) + [ -n "$VIASH_PAR_OUTFILTERMATCHNMIN" ] && ViashError Bad arguments for option \'--outFilterMatchNmin=*\': \'$VIASH_PAR_OUTFILTERMATCHNMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERMATCHNMIN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outFilterMatchNminOverLread) + [ -n "$VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD" ] && ViashError Bad arguments for option \'--outFilterMatchNminOverLread\': \'$VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterMatchNminOverLread. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outFilterMatchNminOverLread=*) + [ -n "$VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD" ] && ViashError Bad arguments for option \'--outFilterMatchNminOverLread=*\': \'$VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outFilterIntronMotifs) + [ -n "$VIASH_PAR_OUTFILTERINTRONMOTIFS" ] && ViashError Bad arguments for option \'--outFilterIntronMotifs\': \'$VIASH_PAR_OUTFILTERINTRONMOTIFS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERINTRONMOTIFS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterIntronMotifs. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outFilterIntronMotifs=*) + [ -n "$VIASH_PAR_OUTFILTERINTRONMOTIFS" ] && ViashError Bad arguments for option \'--outFilterIntronMotifs=*\': \'$VIASH_PAR_OUTFILTERINTRONMOTIFS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERINTRONMOTIFS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outFilterIntronStrands) + [ -n "$VIASH_PAR_OUTFILTERINTRONSTRANDS" ] && ViashError Bad arguments for option \'--outFilterIntronStrands\': \'$VIASH_PAR_OUTFILTERINTRONSTRANDS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERINTRONSTRANDS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterIntronStrands. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outFilterIntronStrands=*) + [ -n "$VIASH_PAR_OUTFILTERINTRONSTRANDS" ] && ViashError Bad arguments for option \'--outFilterIntronStrands=*\': \'$VIASH_PAR_OUTFILTERINTRONSTRANDS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERINTRONSTRANDS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outSJtype) + [ -n "$VIASH_PAR_OUTSJTYPE" ] && ViashError Bad arguments for option \'--outSJtype\': \'$VIASH_PAR_OUTSJTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSJTYPE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSJtype. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSJtype=*) + [ -n "$VIASH_PAR_OUTSJTYPE" ] && ViashError Bad arguments for option \'--outSJtype=*\': \'$VIASH_PAR_OUTSJTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSJTYPE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outSJfilterReads) + [ -n "$VIASH_PAR_OUTSJFILTERREADS" ] && ViashError Bad arguments for option \'--outSJfilterReads\': \'$VIASH_PAR_OUTSJFILTERREADS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSJFILTERREADS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSJfilterReads. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSJfilterReads=*) + [ -n "$VIASH_PAR_OUTSJFILTERREADS" ] && ViashError Bad arguments for option \'--outSJfilterReads=*\': \'$VIASH_PAR_OUTSJFILTERREADS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSJFILTERREADS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outSJfilterOverhangMin) + if [ -z "$VIASH_PAR_OUTSJFILTEROVERHANGMIN" ]; then + VIASH_PAR_OUTSJFILTEROVERHANGMIN="$2" + else + VIASH_PAR_OUTSJFILTEROVERHANGMIN="$VIASH_PAR_OUTSJFILTEROVERHANGMIN;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSJfilterOverhangMin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSJfilterOverhangMin=*) + if [ -z "$VIASH_PAR_OUTSJFILTEROVERHANGMIN" ]; then + VIASH_PAR_OUTSJFILTEROVERHANGMIN=$(ViashRemoveFlags "$1") + else + VIASH_PAR_OUTSJFILTEROVERHANGMIN="$VIASH_PAR_OUTSJFILTEROVERHANGMIN;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --outSJfilterCountUniqueMin) + if [ -z "$VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN" ]; then + VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN="$2" + else + VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN="$VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSJfilterCountUniqueMin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSJfilterCountUniqueMin=*) + if [ -z "$VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN" ]; then + VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN=$(ViashRemoveFlags "$1") + else + VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN="$VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --outSJfilterCountTotalMin) + if [ -z "$VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN" ]; then + VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN="$2" + else + VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN="$VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSJfilterCountTotalMin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSJfilterCountTotalMin=*) + if [ -z "$VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN" ]; then + VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN=$(ViashRemoveFlags "$1") + else + VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN="$VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --outSJfilterDistToOtherSJmin) + if [ -z "$VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN" ]; then + VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN="$2" + else + VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN="$VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSJfilterDistToOtherSJmin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSJfilterDistToOtherSJmin=*) + if [ -z "$VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN" ]; then + VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN=$(ViashRemoveFlags "$1") + else + VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN="$VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --outSJfilterIntronMaxVsReadN) + if [ -z "$VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN" ]; then + VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN="$2" + else + VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN="$VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSJfilterIntronMaxVsReadN. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSJfilterIntronMaxVsReadN=*) + if [ -z "$VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN" ]; then + VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN=$(ViashRemoveFlags "$1") + else + VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN="$VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --scoreGap) + [ -n "$VIASH_PAR_SCOREGAP" ] && ViashError Bad arguments for option \'--scoreGap\': \'$VIASH_PAR_SCOREGAP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCOREGAP="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreGap. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --scoreGap=*) + [ -n "$VIASH_PAR_SCOREGAP" ] && ViashError Bad arguments for option \'--scoreGap=*\': \'$VIASH_PAR_SCOREGAP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCOREGAP=$(ViashRemoveFlags "$1") + shift 1 + ;; + --scoreGapNoncan) + [ -n "$VIASH_PAR_SCOREGAPNONCAN" ] && ViashError Bad arguments for option \'--scoreGapNoncan\': \'$VIASH_PAR_SCOREGAPNONCAN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCOREGAPNONCAN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreGapNoncan. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --scoreGapNoncan=*) + [ -n "$VIASH_PAR_SCOREGAPNONCAN" ] && ViashError Bad arguments for option \'--scoreGapNoncan=*\': \'$VIASH_PAR_SCOREGAPNONCAN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCOREGAPNONCAN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --scoreGapGCAG) + [ -n "$VIASH_PAR_SCOREGAPGCAG" ] && ViashError Bad arguments for option \'--scoreGapGCAG\': \'$VIASH_PAR_SCOREGAPGCAG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCOREGAPGCAG="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreGapGCAG. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --scoreGapGCAG=*) + [ -n "$VIASH_PAR_SCOREGAPGCAG" ] && ViashError Bad arguments for option \'--scoreGapGCAG=*\': \'$VIASH_PAR_SCOREGAPGCAG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCOREGAPGCAG=$(ViashRemoveFlags "$1") + shift 1 + ;; + --scoreGapATAC) + [ -n "$VIASH_PAR_SCOREGAPATAC" ] && ViashError Bad arguments for option \'--scoreGapATAC\': \'$VIASH_PAR_SCOREGAPATAC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCOREGAPATAC="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreGapATAC. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --scoreGapATAC=*) + [ -n "$VIASH_PAR_SCOREGAPATAC" ] && ViashError Bad arguments for option \'--scoreGapATAC=*\': \'$VIASH_PAR_SCOREGAPATAC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCOREGAPATAC=$(ViashRemoveFlags "$1") + shift 1 + ;; + --scoreGenomicLengthLog2scale) + [ -n "$VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE" ] && ViashError Bad arguments for option \'--scoreGenomicLengthLog2scale\': \'$VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreGenomicLengthLog2scale. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --scoreGenomicLengthLog2scale=*) + [ -n "$VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE" ] && ViashError Bad arguments for option \'--scoreGenomicLengthLog2scale=*\': \'$VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --scoreDelOpen) + [ -n "$VIASH_PAR_SCOREDELOPEN" ] && ViashError Bad arguments for option \'--scoreDelOpen\': \'$VIASH_PAR_SCOREDELOPEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCOREDELOPEN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreDelOpen. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --scoreDelOpen=*) + [ -n "$VIASH_PAR_SCOREDELOPEN" ] && ViashError Bad arguments for option \'--scoreDelOpen=*\': \'$VIASH_PAR_SCOREDELOPEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCOREDELOPEN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --scoreDelBase) + [ -n "$VIASH_PAR_SCOREDELBASE" ] && ViashError Bad arguments for option \'--scoreDelBase\': \'$VIASH_PAR_SCOREDELBASE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCOREDELBASE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreDelBase. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --scoreDelBase=*) + [ -n "$VIASH_PAR_SCOREDELBASE" ] && ViashError Bad arguments for option \'--scoreDelBase=*\': \'$VIASH_PAR_SCOREDELBASE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCOREDELBASE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --scoreInsOpen) + [ -n "$VIASH_PAR_SCOREINSOPEN" ] && ViashError Bad arguments for option \'--scoreInsOpen\': \'$VIASH_PAR_SCOREINSOPEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCOREINSOPEN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreInsOpen. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --scoreInsOpen=*) + [ -n "$VIASH_PAR_SCOREINSOPEN" ] && ViashError Bad arguments for option \'--scoreInsOpen=*\': \'$VIASH_PAR_SCOREINSOPEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCOREINSOPEN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --scoreInsBase) + [ -n "$VIASH_PAR_SCOREINSBASE" ] && ViashError Bad arguments for option \'--scoreInsBase\': \'$VIASH_PAR_SCOREINSBASE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCOREINSBASE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreInsBase. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --scoreInsBase=*) + [ -n "$VIASH_PAR_SCOREINSBASE" ] && ViashError Bad arguments for option \'--scoreInsBase=*\': \'$VIASH_PAR_SCOREINSBASE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCOREINSBASE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --scoreStitchSJshift) + [ -n "$VIASH_PAR_SCORESTITCHSJSHIFT" ] && ViashError Bad arguments for option \'--scoreStitchSJshift\': \'$VIASH_PAR_SCORESTITCHSJSHIFT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCORESTITCHSJSHIFT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreStitchSJshift. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --scoreStitchSJshift=*) + [ -n "$VIASH_PAR_SCORESTITCHSJSHIFT" ] && ViashError Bad arguments for option \'--scoreStitchSJshift=*\': \'$VIASH_PAR_SCORESTITCHSJSHIFT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCORESTITCHSJSHIFT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --seedSearchStartLmax) + [ -n "$VIASH_PAR_SEEDSEARCHSTARTLMAX" ] && ViashError Bad arguments for option \'--seedSearchStartLmax\': \'$VIASH_PAR_SEEDSEARCHSTARTLMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEEDSEARCHSTARTLMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --seedSearchStartLmax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --seedSearchStartLmax=*) + [ -n "$VIASH_PAR_SEEDSEARCHSTARTLMAX" ] && ViashError Bad arguments for option \'--seedSearchStartLmax=*\': \'$VIASH_PAR_SEEDSEARCHSTARTLMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEEDSEARCHSTARTLMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --seedSearchStartLmaxOverLread) + [ -n "$VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD" ] && ViashError Bad arguments for option \'--seedSearchStartLmaxOverLread\': \'$VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --seedSearchStartLmaxOverLread. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --seedSearchStartLmaxOverLread=*) + [ -n "$VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD" ] && ViashError Bad arguments for option \'--seedSearchStartLmaxOverLread=*\': \'$VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD=$(ViashRemoveFlags "$1") + shift 1 + ;; + --seedSearchLmax) + [ -n "$VIASH_PAR_SEEDSEARCHLMAX" ] && ViashError Bad arguments for option \'--seedSearchLmax\': \'$VIASH_PAR_SEEDSEARCHLMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEEDSEARCHLMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --seedSearchLmax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --seedSearchLmax=*) + [ -n "$VIASH_PAR_SEEDSEARCHLMAX" ] && ViashError Bad arguments for option \'--seedSearchLmax=*\': \'$VIASH_PAR_SEEDSEARCHLMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEEDSEARCHLMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --seedMultimapNmax) + [ -n "$VIASH_PAR_SEEDMULTIMAPNMAX" ] && ViashError Bad arguments for option \'--seedMultimapNmax\': \'$VIASH_PAR_SEEDMULTIMAPNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEEDMULTIMAPNMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --seedMultimapNmax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --seedMultimapNmax=*) + [ -n "$VIASH_PAR_SEEDMULTIMAPNMAX" ] && ViashError Bad arguments for option \'--seedMultimapNmax=*\': \'$VIASH_PAR_SEEDMULTIMAPNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEEDMULTIMAPNMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --seedPerReadNmax) + [ -n "$VIASH_PAR_SEEDPERREADNMAX" ] && ViashError Bad arguments for option \'--seedPerReadNmax\': \'$VIASH_PAR_SEEDPERREADNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEEDPERREADNMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --seedPerReadNmax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --seedPerReadNmax=*) + [ -n "$VIASH_PAR_SEEDPERREADNMAX" ] && ViashError Bad arguments for option \'--seedPerReadNmax=*\': \'$VIASH_PAR_SEEDPERREADNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEEDPERREADNMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --seedPerWindowNmax) + [ -n "$VIASH_PAR_SEEDPERWINDOWNMAX" ] && ViashError Bad arguments for option \'--seedPerWindowNmax\': \'$VIASH_PAR_SEEDPERWINDOWNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEEDPERWINDOWNMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --seedPerWindowNmax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --seedPerWindowNmax=*) + [ -n "$VIASH_PAR_SEEDPERWINDOWNMAX" ] && ViashError Bad arguments for option \'--seedPerWindowNmax=*\': \'$VIASH_PAR_SEEDPERWINDOWNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEEDPERWINDOWNMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --seedNoneLociPerWindow) + [ -n "$VIASH_PAR_SEEDNONELOCIPERWINDOW" ] && ViashError Bad arguments for option \'--seedNoneLociPerWindow\': \'$VIASH_PAR_SEEDNONELOCIPERWINDOW\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEEDNONELOCIPERWINDOW="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --seedNoneLociPerWindow. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --seedNoneLociPerWindow=*) + [ -n "$VIASH_PAR_SEEDNONELOCIPERWINDOW" ] && ViashError Bad arguments for option \'--seedNoneLociPerWindow=*\': \'$VIASH_PAR_SEEDNONELOCIPERWINDOW\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEEDNONELOCIPERWINDOW=$(ViashRemoveFlags "$1") + shift 1 + ;; + --seedSplitMin) + [ -n "$VIASH_PAR_SEEDSPLITMIN" ] && ViashError Bad arguments for option \'--seedSplitMin\': \'$VIASH_PAR_SEEDSPLITMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEEDSPLITMIN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --seedSplitMin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --seedSplitMin=*) + [ -n "$VIASH_PAR_SEEDSPLITMIN" ] && ViashError Bad arguments for option \'--seedSplitMin=*\': \'$VIASH_PAR_SEEDSPLITMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEEDSPLITMIN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --seedMapMin) + [ -n "$VIASH_PAR_SEEDMAPMIN" ] && ViashError Bad arguments for option \'--seedMapMin\': \'$VIASH_PAR_SEEDMAPMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEEDMAPMIN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --seedMapMin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --seedMapMin=*) + [ -n "$VIASH_PAR_SEEDMAPMIN" ] && ViashError Bad arguments for option \'--seedMapMin=*\': \'$VIASH_PAR_SEEDMAPMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEEDMAPMIN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --alignIntronMin) + [ -n "$VIASH_PAR_ALIGNINTRONMIN" ] && ViashError Bad arguments for option \'--alignIntronMin\': \'$VIASH_PAR_ALIGNINTRONMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNINTRONMIN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignIntronMin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --alignIntronMin=*) + [ -n "$VIASH_PAR_ALIGNINTRONMIN" ] && ViashError Bad arguments for option \'--alignIntronMin=*\': \'$VIASH_PAR_ALIGNINTRONMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNINTRONMIN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --alignIntronMax) + [ -n "$VIASH_PAR_ALIGNINTRONMAX" ] && ViashError Bad arguments for option \'--alignIntronMax\': \'$VIASH_PAR_ALIGNINTRONMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNINTRONMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignIntronMax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --alignIntronMax=*) + [ -n "$VIASH_PAR_ALIGNINTRONMAX" ] && ViashError Bad arguments for option \'--alignIntronMax=*\': \'$VIASH_PAR_ALIGNINTRONMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNINTRONMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --alignMatesGapMax) + [ -n "$VIASH_PAR_ALIGNMATESGAPMAX" ] && ViashError Bad arguments for option \'--alignMatesGapMax\': \'$VIASH_PAR_ALIGNMATESGAPMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNMATESGAPMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignMatesGapMax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --alignMatesGapMax=*) + [ -n "$VIASH_PAR_ALIGNMATESGAPMAX" ] && ViashError Bad arguments for option \'--alignMatesGapMax=*\': \'$VIASH_PAR_ALIGNMATESGAPMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNMATESGAPMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --alignSJoverhangMin) + [ -n "$VIASH_PAR_ALIGNSJOVERHANGMIN" ] && ViashError Bad arguments for option \'--alignSJoverhangMin\': \'$VIASH_PAR_ALIGNSJOVERHANGMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNSJOVERHANGMIN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignSJoverhangMin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --alignSJoverhangMin=*) + [ -n "$VIASH_PAR_ALIGNSJOVERHANGMIN" ] && ViashError Bad arguments for option \'--alignSJoverhangMin=*\': \'$VIASH_PAR_ALIGNSJOVERHANGMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNSJOVERHANGMIN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --alignSJstitchMismatchNmax) + if [ -z "$VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX" ]; then + VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX="$2" + else + VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX="$VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignSJstitchMismatchNmax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --alignSJstitchMismatchNmax=*) + if [ -z "$VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX" ]; then + VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX=$(ViashRemoveFlags "$1") + else + VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX="$VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --alignSJDBoverhangMin) + [ -n "$VIASH_PAR_ALIGNSJDBOVERHANGMIN" ] && ViashError Bad arguments for option \'--alignSJDBoverhangMin\': \'$VIASH_PAR_ALIGNSJDBOVERHANGMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNSJDBOVERHANGMIN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignSJDBoverhangMin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --alignSJDBoverhangMin=*) + [ -n "$VIASH_PAR_ALIGNSJDBOVERHANGMIN" ] && ViashError Bad arguments for option \'--alignSJDBoverhangMin=*\': \'$VIASH_PAR_ALIGNSJDBOVERHANGMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNSJDBOVERHANGMIN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --alignSplicedMateMapLmin) + [ -n "$VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN" ] && ViashError Bad arguments for option \'--alignSplicedMateMapLmin\': \'$VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignSplicedMateMapLmin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --alignSplicedMateMapLmin=*) + [ -n "$VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN" ] && ViashError Bad arguments for option \'--alignSplicedMateMapLmin=*\': \'$VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --alignSplicedMateMapLminOverLmate) + [ -n "$VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE" ] && ViashError Bad arguments for option \'--alignSplicedMateMapLminOverLmate\': \'$VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignSplicedMateMapLminOverLmate. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --alignSplicedMateMapLminOverLmate=*) + [ -n "$VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE" ] && ViashError Bad arguments for option \'--alignSplicedMateMapLminOverLmate=*\': \'$VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --alignWindowsPerReadNmax) + [ -n "$VIASH_PAR_ALIGNWINDOWSPERREADNMAX" ] && ViashError Bad arguments for option \'--alignWindowsPerReadNmax\': \'$VIASH_PAR_ALIGNWINDOWSPERREADNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNWINDOWSPERREADNMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignWindowsPerReadNmax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --alignWindowsPerReadNmax=*) + [ -n "$VIASH_PAR_ALIGNWINDOWSPERREADNMAX" ] && ViashError Bad arguments for option \'--alignWindowsPerReadNmax=*\': \'$VIASH_PAR_ALIGNWINDOWSPERREADNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNWINDOWSPERREADNMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --alignTranscriptsPerWindowNmax) + [ -n "$VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX" ] && ViashError Bad arguments for option \'--alignTranscriptsPerWindowNmax\': \'$VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignTranscriptsPerWindowNmax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --alignTranscriptsPerWindowNmax=*) + [ -n "$VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX" ] && ViashError Bad arguments for option \'--alignTranscriptsPerWindowNmax=*\': \'$VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --alignTranscriptsPerReadNmax) + [ -n "$VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX" ] && ViashError Bad arguments for option \'--alignTranscriptsPerReadNmax\': \'$VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignTranscriptsPerReadNmax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --alignTranscriptsPerReadNmax=*) + [ -n "$VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX" ] && ViashError Bad arguments for option \'--alignTranscriptsPerReadNmax=*\': \'$VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --alignEndsType) + [ -n "$VIASH_PAR_ALIGNENDSTYPE" ] && ViashError Bad arguments for option \'--alignEndsType\': \'$VIASH_PAR_ALIGNENDSTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNENDSTYPE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignEndsType. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --alignEndsType=*) + [ -n "$VIASH_PAR_ALIGNENDSTYPE" ] && ViashError Bad arguments for option \'--alignEndsType=*\': \'$VIASH_PAR_ALIGNENDSTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNENDSTYPE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --alignEndsProtrude) + [ -n "$VIASH_PAR_ALIGNENDSPROTRUDE" ] && ViashError Bad arguments for option \'--alignEndsProtrude\': \'$VIASH_PAR_ALIGNENDSPROTRUDE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNENDSPROTRUDE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignEndsProtrude. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --alignEndsProtrude=*) + [ -n "$VIASH_PAR_ALIGNENDSPROTRUDE" ] && ViashError Bad arguments for option \'--alignEndsProtrude=*\': \'$VIASH_PAR_ALIGNENDSPROTRUDE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNENDSPROTRUDE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --alignSoftClipAtReferenceEnds) + [ -n "$VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS" ] && ViashError Bad arguments for option \'--alignSoftClipAtReferenceEnds\': \'$VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignSoftClipAtReferenceEnds. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --alignSoftClipAtReferenceEnds=*) + [ -n "$VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS" ] && ViashError Bad arguments for option \'--alignSoftClipAtReferenceEnds=*\': \'$VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --alignInsertionFlush) + [ -n "$VIASH_PAR_ALIGNINSERTIONFLUSH" ] && ViashError Bad arguments for option \'--alignInsertionFlush\': \'$VIASH_PAR_ALIGNINSERTIONFLUSH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNINSERTIONFLUSH="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignInsertionFlush. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --alignInsertionFlush=*) + [ -n "$VIASH_PAR_ALIGNINSERTIONFLUSH" ] && ViashError Bad arguments for option \'--alignInsertionFlush=*\': \'$VIASH_PAR_ALIGNINSERTIONFLUSH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNINSERTIONFLUSH=$(ViashRemoveFlags "$1") + shift 1 + ;; + --peOverlapNbasesMin) + [ -n "$VIASH_PAR_PEOVERLAPNBASESMIN" ] && ViashError Bad arguments for option \'--peOverlapNbasesMin\': \'$VIASH_PAR_PEOVERLAPNBASESMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PEOVERLAPNBASESMIN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --peOverlapNbasesMin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --peOverlapNbasesMin=*) + [ -n "$VIASH_PAR_PEOVERLAPNBASESMIN" ] && ViashError Bad arguments for option \'--peOverlapNbasesMin=*\': \'$VIASH_PAR_PEOVERLAPNBASESMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PEOVERLAPNBASESMIN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --peOverlapMMp) + [ -n "$VIASH_PAR_PEOVERLAPMMP" ] && ViashError Bad arguments for option \'--peOverlapMMp\': \'$VIASH_PAR_PEOVERLAPMMP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PEOVERLAPMMP="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --peOverlapMMp. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --peOverlapMMp=*) + [ -n "$VIASH_PAR_PEOVERLAPMMP" ] && ViashError Bad arguments for option \'--peOverlapMMp=*\': \'$VIASH_PAR_PEOVERLAPMMP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PEOVERLAPMMP=$(ViashRemoveFlags "$1") + shift 1 + ;; + --winAnchorMultimapNmax) + [ -n "$VIASH_PAR_WINANCHORMULTIMAPNMAX" ] && ViashError Bad arguments for option \'--winAnchorMultimapNmax\': \'$VIASH_PAR_WINANCHORMULTIMAPNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WINANCHORMULTIMAPNMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --winAnchorMultimapNmax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --winAnchorMultimapNmax=*) + [ -n "$VIASH_PAR_WINANCHORMULTIMAPNMAX" ] && ViashError Bad arguments for option \'--winAnchorMultimapNmax=*\': \'$VIASH_PAR_WINANCHORMULTIMAPNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WINANCHORMULTIMAPNMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --winBinNbits) + [ -n "$VIASH_PAR_WINBINNBITS" ] && ViashError Bad arguments for option \'--winBinNbits\': \'$VIASH_PAR_WINBINNBITS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WINBINNBITS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --winBinNbits. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --winBinNbits=*) + [ -n "$VIASH_PAR_WINBINNBITS" ] && ViashError Bad arguments for option \'--winBinNbits=*\': \'$VIASH_PAR_WINBINNBITS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WINBINNBITS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --winAnchorDistNbins) + [ -n "$VIASH_PAR_WINANCHORDISTNBINS" ] && ViashError Bad arguments for option \'--winAnchorDistNbins\': \'$VIASH_PAR_WINANCHORDISTNBINS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WINANCHORDISTNBINS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --winAnchorDistNbins. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --winAnchorDistNbins=*) + [ -n "$VIASH_PAR_WINANCHORDISTNBINS" ] && ViashError Bad arguments for option \'--winAnchorDistNbins=*\': \'$VIASH_PAR_WINANCHORDISTNBINS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WINANCHORDISTNBINS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --winFlankNbins) + [ -n "$VIASH_PAR_WINFLANKNBINS" ] && ViashError Bad arguments for option \'--winFlankNbins\': \'$VIASH_PAR_WINFLANKNBINS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WINFLANKNBINS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --winFlankNbins. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --winFlankNbins=*) + [ -n "$VIASH_PAR_WINFLANKNBINS" ] && ViashError Bad arguments for option \'--winFlankNbins=*\': \'$VIASH_PAR_WINFLANKNBINS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WINFLANKNBINS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --winReadCoverageRelativeMin) + [ -n "$VIASH_PAR_WINREADCOVERAGERELATIVEMIN" ] && ViashError Bad arguments for option \'--winReadCoverageRelativeMin\': \'$VIASH_PAR_WINREADCOVERAGERELATIVEMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WINREADCOVERAGERELATIVEMIN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --winReadCoverageRelativeMin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --winReadCoverageRelativeMin=*) + [ -n "$VIASH_PAR_WINREADCOVERAGERELATIVEMIN" ] && ViashError Bad arguments for option \'--winReadCoverageRelativeMin=*\': \'$VIASH_PAR_WINREADCOVERAGERELATIVEMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WINREADCOVERAGERELATIVEMIN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --winReadCoverageBasesMin) + [ -n "$VIASH_PAR_WINREADCOVERAGEBASESMIN" ] && ViashError Bad arguments for option \'--winReadCoverageBasesMin\': \'$VIASH_PAR_WINREADCOVERAGEBASESMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WINREADCOVERAGEBASESMIN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --winReadCoverageBasesMin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --winReadCoverageBasesMin=*) + [ -n "$VIASH_PAR_WINREADCOVERAGEBASESMIN" ] && ViashError Bad arguments for option \'--winReadCoverageBasesMin=*\': \'$VIASH_PAR_WINREADCOVERAGEBASESMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WINREADCOVERAGEBASESMIN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --chimOutType) + if [ -z "$VIASH_PAR_CHIMOUTTYPE" ]; then + VIASH_PAR_CHIMOUTTYPE="$2" + else + VIASH_PAR_CHIMOUTTYPE="$VIASH_PAR_CHIMOUTTYPE;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimOutType. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --chimOutType=*) + if [ -z "$VIASH_PAR_CHIMOUTTYPE" ]; then + VIASH_PAR_CHIMOUTTYPE=$(ViashRemoveFlags "$1") + else + VIASH_PAR_CHIMOUTTYPE="$VIASH_PAR_CHIMOUTTYPE;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --chimSegmentMin) + [ -n "$VIASH_PAR_CHIMSEGMENTMIN" ] && ViashError Bad arguments for option \'--chimSegmentMin\': \'$VIASH_PAR_CHIMSEGMENTMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMSEGMENTMIN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimSegmentMin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --chimSegmentMin=*) + [ -n "$VIASH_PAR_CHIMSEGMENTMIN" ] && ViashError Bad arguments for option \'--chimSegmentMin=*\': \'$VIASH_PAR_CHIMSEGMENTMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMSEGMENTMIN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --chimScoreMin) + [ -n "$VIASH_PAR_CHIMSCOREMIN" ] && ViashError Bad arguments for option \'--chimScoreMin\': \'$VIASH_PAR_CHIMSCOREMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMSCOREMIN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimScoreMin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --chimScoreMin=*) + [ -n "$VIASH_PAR_CHIMSCOREMIN" ] && ViashError Bad arguments for option \'--chimScoreMin=*\': \'$VIASH_PAR_CHIMSCOREMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMSCOREMIN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --chimScoreDropMax) + [ -n "$VIASH_PAR_CHIMSCOREDROPMAX" ] && ViashError Bad arguments for option \'--chimScoreDropMax\': \'$VIASH_PAR_CHIMSCOREDROPMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMSCOREDROPMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimScoreDropMax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --chimScoreDropMax=*) + [ -n "$VIASH_PAR_CHIMSCOREDROPMAX" ] && ViashError Bad arguments for option \'--chimScoreDropMax=*\': \'$VIASH_PAR_CHIMSCOREDROPMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMSCOREDROPMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --chimScoreSeparation) + [ -n "$VIASH_PAR_CHIMSCORESEPARATION" ] && ViashError Bad arguments for option \'--chimScoreSeparation\': \'$VIASH_PAR_CHIMSCORESEPARATION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMSCORESEPARATION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimScoreSeparation. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --chimScoreSeparation=*) + [ -n "$VIASH_PAR_CHIMSCORESEPARATION" ] && ViashError Bad arguments for option \'--chimScoreSeparation=*\': \'$VIASH_PAR_CHIMSCORESEPARATION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMSCORESEPARATION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --chimScoreJunctionNonGTAG) + [ -n "$VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG" ] && ViashError Bad arguments for option \'--chimScoreJunctionNonGTAG\': \'$VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimScoreJunctionNonGTAG. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --chimScoreJunctionNonGTAG=*) + [ -n "$VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG" ] && ViashError Bad arguments for option \'--chimScoreJunctionNonGTAG=*\': \'$VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG=$(ViashRemoveFlags "$1") + shift 1 + ;; + --chimJunctionOverhangMin) + [ -n "$VIASH_PAR_CHIMJUNCTIONOVERHANGMIN" ] && ViashError Bad arguments for option \'--chimJunctionOverhangMin\': \'$VIASH_PAR_CHIMJUNCTIONOVERHANGMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMJUNCTIONOVERHANGMIN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimJunctionOverhangMin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --chimJunctionOverhangMin=*) + [ -n "$VIASH_PAR_CHIMJUNCTIONOVERHANGMIN" ] && ViashError Bad arguments for option \'--chimJunctionOverhangMin=*\': \'$VIASH_PAR_CHIMJUNCTIONOVERHANGMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMJUNCTIONOVERHANGMIN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --chimSegmentReadGapMax) + [ -n "$VIASH_PAR_CHIMSEGMENTREADGAPMAX" ] && ViashError Bad arguments for option \'--chimSegmentReadGapMax\': \'$VIASH_PAR_CHIMSEGMENTREADGAPMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMSEGMENTREADGAPMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimSegmentReadGapMax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --chimSegmentReadGapMax=*) + [ -n "$VIASH_PAR_CHIMSEGMENTREADGAPMAX" ] && ViashError Bad arguments for option \'--chimSegmentReadGapMax=*\': \'$VIASH_PAR_CHIMSEGMENTREADGAPMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMSEGMENTREADGAPMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --chimFilter) + if [ -z "$VIASH_PAR_CHIMFILTER" ]; then + VIASH_PAR_CHIMFILTER="$2" + else + VIASH_PAR_CHIMFILTER="$VIASH_PAR_CHIMFILTER;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimFilter. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --chimFilter=*) + if [ -z "$VIASH_PAR_CHIMFILTER" ]; then + VIASH_PAR_CHIMFILTER=$(ViashRemoveFlags "$1") + else + VIASH_PAR_CHIMFILTER="$VIASH_PAR_CHIMFILTER;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --chimMainSegmentMultNmax) + [ -n "$VIASH_PAR_CHIMMAINSEGMENTMULTNMAX" ] && ViashError Bad arguments for option \'--chimMainSegmentMultNmax\': \'$VIASH_PAR_CHIMMAINSEGMENTMULTNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMMAINSEGMENTMULTNMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimMainSegmentMultNmax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --chimMainSegmentMultNmax=*) + [ -n "$VIASH_PAR_CHIMMAINSEGMENTMULTNMAX" ] && ViashError Bad arguments for option \'--chimMainSegmentMultNmax=*\': \'$VIASH_PAR_CHIMMAINSEGMENTMULTNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMMAINSEGMENTMULTNMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --chimMultimapNmax) + [ -n "$VIASH_PAR_CHIMMULTIMAPNMAX" ] && ViashError Bad arguments for option \'--chimMultimapNmax\': \'$VIASH_PAR_CHIMMULTIMAPNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMMULTIMAPNMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimMultimapNmax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --chimMultimapNmax=*) + [ -n "$VIASH_PAR_CHIMMULTIMAPNMAX" ] && ViashError Bad arguments for option \'--chimMultimapNmax=*\': \'$VIASH_PAR_CHIMMULTIMAPNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMMULTIMAPNMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --chimMultimapScoreRange) + [ -n "$VIASH_PAR_CHIMMULTIMAPSCORERANGE" ] && ViashError Bad arguments for option \'--chimMultimapScoreRange\': \'$VIASH_PAR_CHIMMULTIMAPSCORERANGE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMMULTIMAPSCORERANGE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimMultimapScoreRange. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --chimMultimapScoreRange=*) + [ -n "$VIASH_PAR_CHIMMULTIMAPSCORERANGE" ] && ViashError Bad arguments for option \'--chimMultimapScoreRange=*\': \'$VIASH_PAR_CHIMMULTIMAPSCORERANGE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMMULTIMAPSCORERANGE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --chimNonchimScoreDropMin) + [ -n "$VIASH_PAR_CHIMNONCHIMSCOREDROPMIN" ] && ViashError Bad arguments for option \'--chimNonchimScoreDropMin\': \'$VIASH_PAR_CHIMNONCHIMSCOREDROPMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMNONCHIMSCOREDROPMIN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimNonchimScoreDropMin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --chimNonchimScoreDropMin=*) + [ -n "$VIASH_PAR_CHIMNONCHIMSCOREDROPMIN" ] && ViashError Bad arguments for option \'--chimNonchimScoreDropMin=*\': \'$VIASH_PAR_CHIMNONCHIMSCOREDROPMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMNONCHIMSCOREDROPMIN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --chimOutJunctionFormat) + [ -n "$VIASH_PAR_CHIMOUTJUNCTIONFORMAT" ] && ViashError Bad arguments for option \'--chimOutJunctionFormat\': \'$VIASH_PAR_CHIMOUTJUNCTIONFORMAT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMOUTJUNCTIONFORMAT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimOutJunctionFormat. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --chimOutJunctionFormat=*) + [ -n "$VIASH_PAR_CHIMOUTJUNCTIONFORMAT" ] && ViashError Bad arguments for option \'--chimOutJunctionFormat=*\': \'$VIASH_PAR_CHIMOUTJUNCTIONFORMAT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMOUTJUNCTIONFORMAT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --quantMode) + if [ -z "$VIASH_PAR_QUANTMODE" ]; then + VIASH_PAR_QUANTMODE="$2" + else + VIASH_PAR_QUANTMODE="$VIASH_PAR_QUANTMODE;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --quantMode. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --quantMode=*) + if [ -z "$VIASH_PAR_QUANTMODE" ]; then + VIASH_PAR_QUANTMODE=$(ViashRemoveFlags "$1") + else + VIASH_PAR_QUANTMODE="$VIASH_PAR_QUANTMODE;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --quantTranscriptomeBAMcompression) + [ -n "$VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION" ] && ViashError Bad arguments for option \'--quantTranscriptomeBAMcompression\': \'$VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --quantTranscriptomeBAMcompression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --quantTranscriptomeBAMcompression=*) + [ -n "$VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION" ] && ViashError Bad arguments for option \'--quantTranscriptomeBAMcompression=*\': \'$VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --quantTranscriptomeBan) + [ -n "$VIASH_PAR_QUANTTRANSCRIPTOMEBAN" ] && ViashError Bad arguments for option \'--quantTranscriptomeBan\': \'$VIASH_PAR_QUANTTRANSCRIPTOMEBAN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUANTTRANSCRIPTOMEBAN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --quantTranscriptomeBan. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --quantTranscriptomeBan=*) + [ -n "$VIASH_PAR_QUANTTRANSCRIPTOMEBAN" ] && ViashError Bad arguments for option \'--quantTranscriptomeBan=*\': \'$VIASH_PAR_QUANTTRANSCRIPTOMEBAN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUANTTRANSCRIPTOMEBAN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --twopassMode) + [ -n "$VIASH_PAR_TWOPASSMODE" ] && ViashError Bad arguments for option \'--twopassMode\': \'$VIASH_PAR_TWOPASSMODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TWOPASSMODE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --twopassMode. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --twopassMode=*) + [ -n "$VIASH_PAR_TWOPASSMODE" ] && ViashError Bad arguments for option \'--twopassMode=*\': \'$VIASH_PAR_TWOPASSMODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TWOPASSMODE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --twopass1readsN) + [ -n "$VIASH_PAR_TWOPASS1READSN" ] && ViashError Bad arguments for option \'--twopass1readsN\': \'$VIASH_PAR_TWOPASS1READSN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TWOPASS1READSN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --twopass1readsN. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --twopass1readsN=*) + [ -n "$VIASH_PAR_TWOPASS1READSN" ] && ViashError Bad arguments for option \'--twopass1readsN=*\': \'$VIASH_PAR_TWOPASS1READSN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TWOPASS1READSN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --waspOutputMode) + [ -n "$VIASH_PAR_WASPOUTPUTMODE" ] && ViashError Bad arguments for option \'--waspOutputMode\': \'$VIASH_PAR_WASPOUTPUTMODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WASPOUTPUTMODE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --waspOutputMode. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --waspOutputMode=*) + [ -n "$VIASH_PAR_WASPOUTPUTMODE" ] && ViashError Bad arguments for option \'--waspOutputMode=*\': \'$VIASH_PAR_WASPOUTPUTMODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WASPOUTPUTMODE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --soloType) + if [ -z "$VIASH_PAR_SOLOTYPE" ]; then + VIASH_PAR_SOLOTYPE="$2" + else + VIASH_PAR_SOLOTYPE="$VIASH_PAR_SOLOTYPE;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloType. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloType=*) + if [ -z "$VIASH_PAR_SOLOTYPE" ]; then + VIASH_PAR_SOLOTYPE=$(ViashRemoveFlags "$1") + else + VIASH_PAR_SOLOTYPE="$VIASH_PAR_SOLOTYPE;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --soloCBwhitelist) + if [ -z "$VIASH_PAR_SOLOCBWHITELIST" ]; then + VIASH_PAR_SOLOCBWHITELIST="$2" + else + VIASH_PAR_SOLOCBWHITELIST="$VIASH_PAR_SOLOCBWHITELIST;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloCBwhitelist. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloCBwhitelist=*) + if [ -z "$VIASH_PAR_SOLOCBWHITELIST" ]; then + VIASH_PAR_SOLOCBWHITELIST=$(ViashRemoveFlags "$1") + else + VIASH_PAR_SOLOCBWHITELIST="$VIASH_PAR_SOLOCBWHITELIST;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --soloCBstart) + [ -n "$VIASH_PAR_SOLOCBSTART" ] && ViashError Bad arguments for option \'--soloCBstart\': \'$VIASH_PAR_SOLOCBSTART\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOCBSTART="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloCBstart. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloCBstart=*) + [ -n "$VIASH_PAR_SOLOCBSTART" ] && ViashError Bad arguments for option \'--soloCBstart=*\': \'$VIASH_PAR_SOLOCBSTART\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOCBSTART=$(ViashRemoveFlags "$1") + shift 1 + ;; + --soloCBlen) + [ -n "$VIASH_PAR_SOLOCBLEN" ] && ViashError Bad arguments for option \'--soloCBlen\': \'$VIASH_PAR_SOLOCBLEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOCBLEN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloCBlen. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloCBlen=*) + [ -n "$VIASH_PAR_SOLOCBLEN" ] && ViashError Bad arguments for option \'--soloCBlen=*\': \'$VIASH_PAR_SOLOCBLEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOCBLEN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --soloUMIstart) + [ -n "$VIASH_PAR_SOLOUMISTART" ] && ViashError Bad arguments for option \'--soloUMIstart\': \'$VIASH_PAR_SOLOUMISTART\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOUMISTART="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloUMIstart. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloUMIstart=*) + [ -n "$VIASH_PAR_SOLOUMISTART" ] && ViashError Bad arguments for option \'--soloUMIstart=*\': \'$VIASH_PAR_SOLOUMISTART\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOUMISTART=$(ViashRemoveFlags "$1") + shift 1 + ;; + --soloUMIlen) + [ -n "$VIASH_PAR_SOLOUMILEN" ] && ViashError Bad arguments for option \'--soloUMIlen\': \'$VIASH_PAR_SOLOUMILEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOUMILEN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloUMIlen. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloUMIlen=*) + [ -n "$VIASH_PAR_SOLOUMILEN" ] && ViashError Bad arguments for option \'--soloUMIlen=*\': \'$VIASH_PAR_SOLOUMILEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOUMILEN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --soloBarcodeReadLength) + [ -n "$VIASH_PAR_SOLOBARCODEREADLENGTH" ] && ViashError Bad arguments for option \'--soloBarcodeReadLength\': \'$VIASH_PAR_SOLOBARCODEREADLENGTH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOBARCODEREADLENGTH="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloBarcodeReadLength. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloBarcodeReadLength=*) + [ -n "$VIASH_PAR_SOLOBARCODEREADLENGTH" ] && ViashError Bad arguments for option \'--soloBarcodeReadLength=*\': \'$VIASH_PAR_SOLOBARCODEREADLENGTH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOBARCODEREADLENGTH=$(ViashRemoveFlags "$1") + shift 1 + ;; + --soloBarcodeMate) + [ -n "$VIASH_PAR_SOLOBARCODEMATE" ] && ViashError Bad arguments for option \'--soloBarcodeMate\': \'$VIASH_PAR_SOLOBARCODEMATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOBARCODEMATE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloBarcodeMate. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloBarcodeMate=*) + [ -n "$VIASH_PAR_SOLOBARCODEMATE" ] && ViashError Bad arguments for option \'--soloBarcodeMate=*\': \'$VIASH_PAR_SOLOBARCODEMATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOBARCODEMATE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --soloCBposition) + if [ -z "$VIASH_PAR_SOLOCBPOSITION" ]; then + VIASH_PAR_SOLOCBPOSITION="$2" + else + VIASH_PAR_SOLOCBPOSITION="$VIASH_PAR_SOLOCBPOSITION;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloCBposition. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloCBposition=*) + if [ -z "$VIASH_PAR_SOLOCBPOSITION" ]; then + VIASH_PAR_SOLOCBPOSITION=$(ViashRemoveFlags "$1") + else + VIASH_PAR_SOLOCBPOSITION="$VIASH_PAR_SOLOCBPOSITION;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --soloUMIposition) + [ -n "$VIASH_PAR_SOLOUMIPOSITION" ] && ViashError Bad arguments for option \'--soloUMIposition\': \'$VIASH_PAR_SOLOUMIPOSITION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOUMIPOSITION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloUMIposition. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloUMIposition=*) + [ -n "$VIASH_PAR_SOLOUMIPOSITION" ] && ViashError Bad arguments for option \'--soloUMIposition=*\': \'$VIASH_PAR_SOLOUMIPOSITION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOUMIPOSITION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --soloAdapterSequence) + [ -n "$VIASH_PAR_SOLOADAPTERSEQUENCE" ] && ViashError Bad arguments for option \'--soloAdapterSequence\': \'$VIASH_PAR_SOLOADAPTERSEQUENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOADAPTERSEQUENCE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloAdapterSequence. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloAdapterSequence=*) + [ -n "$VIASH_PAR_SOLOADAPTERSEQUENCE" ] && ViashError Bad arguments for option \'--soloAdapterSequence=*\': \'$VIASH_PAR_SOLOADAPTERSEQUENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOADAPTERSEQUENCE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --soloAdapterMismatchesNmax) + [ -n "$VIASH_PAR_SOLOADAPTERMISMATCHESNMAX" ] && ViashError Bad arguments for option \'--soloAdapterMismatchesNmax\': \'$VIASH_PAR_SOLOADAPTERMISMATCHESNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOADAPTERMISMATCHESNMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloAdapterMismatchesNmax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloAdapterMismatchesNmax=*) + [ -n "$VIASH_PAR_SOLOADAPTERMISMATCHESNMAX" ] && ViashError Bad arguments for option \'--soloAdapterMismatchesNmax=*\': \'$VIASH_PAR_SOLOADAPTERMISMATCHESNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOADAPTERMISMATCHESNMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --soloCBmatchWLtype) + [ -n "$VIASH_PAR_SOLOCBMATCHWLTYPE" ] && ViashError Bad arguments for option \'--soloCBmatchWLtype\': \'$VIASH_PAR_SOLOCBMATCHWLTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOCBMATCHWLTYPE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloCBmatchWLtype. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloCBmatchWLtype=*) + [ -n "$VIASH_PAR_SOLOCBMATCHWLTYPE" ] && ViashError Bad arguments for option \'--soloCBmatchWLtype=*\': \'$VIASH_PAR_SOLOCBMATCHWLTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOCBMATCHWLTYPE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --soloInputSAMattrBarcodeSeq) + if [ -z "$VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ" ]; then + VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ="$2" + else + VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ="$VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloInputSAMattrBarcodeSeq. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloInputSAMattrBarcodeSeq=*) + if [ -z "$VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ" ]; then + VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ=$(ViashRemoveFlags "$1") + else + VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ="$VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --soloInputSAMattrBarcodeQual) + if [ -z "$VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL" ]; then + VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL="$2" + else + VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL="$VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloInputSAMattrBarcodeQual. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloInputSAMattrBarcodeQual=*) + if [ -z "$VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL" ]; then + VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL=$(ViashRemoveFlags "$1") + else + VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL="$VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --soloStrand) + [ -n "$VIASH_PAR_SOLOSTRAND" ] && ViashError Bad arguments for option \'--soloStrand\': \'$VIASH_PAR_SOLOSTRAND\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOSTRAND="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloStrand. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloStrand=*) + [ -n "$VIASH_PAR_SOLOSTRAND" ] && ViashError Bad arguments for option \'--soloStrand=*\': \'$VIASH_PAR_SOLOSTRAND\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOSTRAND=$(ViashRemoveFlags "$1") + shift 1 + ;; + --soloFeatures) + if [ -z "$VIASH_PAR_SOLOFEATURES" ]; then + VIASH_PAR_SOLOFEATURES="$2" + else + VIASH_PAR_SOLOFEATURES="$VIASH_PAR_SOLOFEATURES;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloFeatures. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloFeatures=*) + if [ -z "$VIASH_PAR_SOLOFEATURES" ]; then + VIASH_PAR_SOLOFEATURES=$(ViashRemoveFlags "$1") + else + VIASH_PAR_SOLOFEATURES="$VIASH_PAR_SOLOFEATURES;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --soloMultiMappers) + if [ -z "$VIASH_PAR_SOLOMULTIMAPPERS" ]; then + VIASH_PAR_SOLOMULTIMAPPERS="$2" + else + VIASH_PAR_SOLOMULTIMAPPERS="$VIASH_PAR_SOLOMULTIMAPPERS;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloMultiMappers. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloMultiMappers=*) + if [ -z "$VIASH_PAR_SOLOMULTIMAPPERS" ]; then + VIASH_PAR_SOLOMULTIMAPPERS=$(ViashRemoveFlags "$1") + else + VIASH_PAR_SOLOMULTIMAPPERS="$VIASH_PAR_SOLOMULTIMAPPERS;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --soloUMIdedup) + if [ -z "$VIASH_PAR_SOLOUMIDEDUP" ]; then + VIASH_PAR_SOLOUMIDEDUP="$2" + else + VIASH_PAR_SOLOUMIDEDUP="$VIASH_PAR_SOLOUMIDEDUP;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloUMIdedup. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloUMIdedup=*) + if [ -z "$VIASH_PAR_SOLOUMIDEDUP" ]; then + VIASH_PAR_SOLOUMIDEDUP=$(ViashRemoveFlags "$1") + else + VIASH_PAR_SOLOUMIDEDUP="$VIASH_PAR_SOLOUMIDEDUP;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --soloUMIfiltering) + if [ -z "$VIASH_PAR_SOLOUMIFILTERING" ]; then + VIASH_PAR_SOLOUMIFILTERING="$2" + else + VIASH_PAR_SOLOUMIFILTERING="$VIASH_PAR_SOLOUMIFILTERING;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloUMIfiltering. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloUMIfiltering=*) + if [ -z "$VIASH_PAR_SOLOUMIFILTERING" ]; then + VIASH_PAR_SOLOUMIFILTERING=$(ViashRemoveFlags "$1") + else + VIASH_PAR_SOLOUMIFILTERING="$VIASH_PAR_SOLOUMIFILTERING;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --soloOutFileNames) + if [ -z "$VIASH_PAR_SOLOOUTFILENAMES" ]; then + VIASH_PAR_SOLOOUTFILENAMES="$2" + else + VIASH_PAR_SOLOOUTFILENAMES="$VIASH_PAR_SOLOOUTFILENAMES;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloOutFileNames. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloOutFileNames=*) + if [ -z "$VIASH_PAR_SOLOOUTFILENAMES" ]; then + VIASH_PAR_SOLOOUTFILENAMES=$(ViashRemoveFlags "$1") + else + VIASH_PAR_SOLOOUTFILENAMES="$VIASH_PAR_SOLOOUTFILENAMES;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --soloCellFilter) + if [ -z "$VIASH_PAR_SOLOCELLFILTER" ]; then + VIASH_PAR_SOLOCELLFILTER="$2" + else + VIASH_PAR_SOLOCELLFILTER="$VIASH_PAR_SOLOCELLFILTER;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloCellFilter. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloCellFilter=*) + if [ -z "$VIASH_PAR_SOLOCELLFILTER" ]; then + VIASH_PAR_SOLOCELLFILTER=$(ViashRemoveFlags "$1") + else + VIASH_PAR_SOLOCELLFILTER="$VIASH_PAR_SOLOCELLFILTER;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --soloOutFormatFeaturesGeneField3) + if [ -z "$VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3" ]; then + VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3="$2" + else + VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3="$VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloOutFormatFeaturesGeneField3. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloOutFormatFeaturesGeneField3=*) + if [ -z "$VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3" ]; then + VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3=$(ViashRemoveFlags "$1") + else + VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3="$VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --soloCellReadStats) + [ -n "$VIASH_PAR_SOLOCELLREADSTATS" ] && ViashError Bad arguments for option \'--soloCellReadStats\': \'$VIASH_PAR_SOLOCELLREADSTATS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOCELLREADSTATS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloCellReadStats. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloCellReadStats=*) + [ -n "$VIASH_PAR_SOLOCELLREADSTATS" ] && ViashError Bad arguments for option \'--soloCellReadStats=*\': \'$VIASH_PAR_SOLOCELLREADSTATS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOCELLREADSTATS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/mapping_star_align:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/mapping_star_align:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/mapping_star_align:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/mapping_star_align:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_REFERENCE+x} ]; then + ViashError '--reference' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ]; then + IFS=';' + set -f + for file in $VIASH_PAR_INPUT; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi +if [ ! -z "$VIASH_PAR_REFERENCE" ] && [ ! -e "$VIASH_PAR_REFERENCE" ]; then + ViashError "Input file '$VIASH_PAR_REFERENCE' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_GENOMEFASTAFILES" ]; then + IFS=';' + set -f + for file in $VIASH_PAR_GENOMEFASTAFILES; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi +if [ ! -z "$VIASH_PAR_SJDBGTFFILE" ] && [ ! -e "$VIASH_PAR_SJDBGTFFILE" ]; then + ViashError "Input file '$VIASH_PAR_SJDBGTFFILE' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_READFILESMANIFEST" ] && [ ! -e "$VIASH_PAR_READFILESMANIFEST" ]; then + ViashError "Input file '$VIASH_PAR_READFILESMANIFEST' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_RUNRNGSEED" ]]; then + if ! [[ "$VIASH_PAR_RUNRNGSEED" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--runRNGseed' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [ -n "$VIASH_PAR_GENOMEFILESIZES" ]; then + IFS=';' + set -f + for val in $VIASH_PAR_GENOMEFILESIZES; do + if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--genomeFileSizes' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + done + set +f + unset IFS +fi + +if [[ -n "$VIASH_PAR_SJDBOVERHANG" ]]; then + if ! [[ "$VIASH_PAR_SJDBOVERHANG" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--sjdbOverhang' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SJDBSCORE" ]]; then + if ! [[ "$VIASH_PAR_SJDBSCORE" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--sjdbScore' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_READMAPNUMBER" ]]; then + if ! [[ "$VIASH_PAR_READMAPNUMBER" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--readMapNumber' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_READQUALITYSCOREBASE" ]]; then + if ! [[ "$VIASH_PAR_READQUALITYSCOREBASE" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--readQualityScoreBase' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [ -n "$VIASH_PAR_CLIP3PNBASES" ]; then + IFS=';' + set -f + for val in $VIASH_PAR_CLIP3PNBASES; do + if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--clip3pNbases' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + done + set +f + unset IFS +fi + +if [ -n "$VIASH_PAR_CLIP3PADAPTERMMP" ]; then + IFS=';' + set -f + for val in $VIASH_PAR_CLIP3PADAPTERMMP; do + if ! [[ "${val}" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--clip3pAdapterMMp' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi + done + set +f + unset IFS +fi + +if [ -n "$VIASH_PAR_CLIP3PAFTERADAPTERNBASES" ]; then + IFS=';' + set -f + for val in $VIASH_PAR_CLIP3PAFTERADAPTERNBASES; do + if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--clip3pAfterAdapterNbases' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + done + set +f + unset IFS +fi + +if [ -n "$VIASH_PAR_CLIP5PNBASES" ]; then + IFS=';' + set -f + for val in $VIASH_PAR_CLIP5PNBASES; do + if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--clip5pNbases' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + done + set +f + unset IFS +fi + +if [[ -n "$VIASH_PAR_LIMITGENOMEGENERATERAM" ]]; then + if ! [[ "$VIASH_PAR_LIMITGENOMEGENERATERAM" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--limitGenomeGenerateRAM' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [ -n "$VIASH_PAR_LIMITIOBUFFERSIZE" ]; then + IFS=';' + set -f + for val in $VIASH_PAR_LIMITIOBUFFERSIZE; do + if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--limitIObufferSize' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi + done + set +f + unset IFS +fi + +if [[ -n "$VIASH_PAR_LIMITOUTSAMONEREADBYTES" ]]; then + if ! [[ "$VIASH_PAR_LIMITOUTSAMONEREADBYTES" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--limitOutSAMoneReadBytes' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_LIMITOUTSJONEREAD" ]]; then + if ! [[ "$VIASH_PAR_LIMITOUTSJONEREAD" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--limitOutSJoneRead' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_LIMITOUTSJCOLLAPSED" ]]; then + if ! [[ "$VIASH_PAR_LIMITOUTSJCOLLAPSED" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--limitOutSJcollapsed' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_LIMITBAMSORTRAM" ]]; then + if ! [[ "$VIASH_PAR_LIMITBAMSORTRAM" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--limitBAMsortRAM' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_LIMITSJDBINSERTNSJ" ]]; then + if ! [[ "$VIASH_PAR_LIMITSJDBINSERTNSJ" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--limitSjdbInsertNsj' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_LIMITNREADSSOFT" ]]; then + if ! [[ "$VIASH_PAR_LIMITNREADSSOFT" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--limitNreadsSoft' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTQSCONVERSIONADD" ]]; then + if ! [[ "$VIASH_PAR_OUTQSCONVERSIONADD" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outQSconversionAdd' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTSAMATTRIHSTART" ]]; then + if ! [[ "$VIASH_PAR_OUTSAMATTRIHSTART" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outSAMattrIHstart' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTSAMMAPQUNIQUE" ]]; then + if ! [[ "$VIASH_PAR_OUTSAMMAPQUNIQUE" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outSAMmapqUnique' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTSAMFLAGOR" ]]; then + if ! [[ "$VIASH_PAR_OUTSAMFLAGOR" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outSAMflagOR' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTSAMFLAGAND" ]]; then + if ! [[ "$VIASH_PAR_OUTSAMFLAGAND" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outSAMflagAND' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTSAMMULTNMAX" ]]; then + if ! [[ "$VIASH_PAR_OUTSAMMULTNMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outSAMmultNmax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTSAMTLEN" ]]; then + if ! [[ "$VIASH_PAR_OUTSAMTLEN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outSAMtlen' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTBAMCOMPRESSION" ]]; then + if ! [[ "$VIASH_PAR_OUTBAMCOMPRESSION" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outBAMcompression' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTBAMSORTINGTHREADN" ]]; then + if ! [[ "$VIASH_PAR_OUTBAMSORTINGTHREADN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outBAMsortingThreadN' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTBAMSORTINGBINSN" ]]; then + if ! [[ "$VIASH_PAR_OUTBAMSORTINGBINSN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outBAMsortingBinsN' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN" ]]; then + if ! [[ "$VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--bamRemoveDuplicatesMate2basesN' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE" ]]; then + if ! [[ "$VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outFilterMultimapScoreRange' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTFILTERMULTIMAPNMAX" ]]; then + if ! [[ "$VIASH_PAR_OUTFILTERMULTIMAPNMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outFilterMultimapNmax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTFILTERMISMATCHNMAX" ]]; then + if ! [[ "$VIASH_PAR_OUTFILTERMISMATCHNMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outFilterMismatchNmax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX" ]]; then + if ! [[ "$VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--outFilterMismatchNoverLmax' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX" ]]; then + if ! [[ "$VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--outFilterMismatchNoverReadLmax' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTFILTERSCOREMIN" ]]; then + if ! [[ "$VIASH_PAR_OUTFILTERSCOREMIN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outFilterScoreMin' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTFILTERSCOREMINOVERLREAD" ]]; then + if ! [[ "$VIASH_PAR_OUTFILTERSCOREMINOVERLREAD" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--outFilterScoreMinOverLread' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTFILTERMATCHNMIN" ]]; then + if ! [[ "$VIASH_PAR_OUTFILTERMATCHNMIN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outFilterMatchNmin' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD" ]]; then + if ! [[ "$VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--outFilterMatchNminOverLread' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [ -n "$VIASH_PAR_OUTSJFILTEROVERHANGMIN" ]; then + IFS=';' + set -f + for val in $VIASH_PAR_OUTSJFILTEROVERHANGMIN; do + if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outSJfilterOverhangMin' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + done + set +f + unset IFS +fi + +if [ -n "$VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN" ]; then + IFS=';' + set -f + for val in $VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN; do + if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outSJfilterCountUniqueMin' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + done + set +f + unset IFS +fi + +if [ -n "$VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN" ]; then + IFS=';' + set -f + for val in $VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN; do + if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outSJfilterCountTotalMin' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + done + set +f + unset IFS +fi + +if [ -n "$VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN" ]; then + IFS=';' + set -f + for val in $VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN; do + if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outSJfilterDistToOtherSJmin' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + done + set +f + unset IFS +fi + +if [ -n "$VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN" ]; then + IFS=';' + set -f + for val in $VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN; do + if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outSJfilterIntronMaxVsReadN' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + done + set +f + unset IFS +fi + +if [[ -n "$VIASH_PAR_SCOREGAP" ]]; then + if ! [[ "$VIASH_PAR_SCOREGAP" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--scoreGap' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SCOREGAPNONCAN" ]]; then + if ! [[ "$VIASH_PAR_SCOREGAPNONCAN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--scoreGapNoncan' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SCOREGAPGCAG" ]]; then + if ! [[ "$VIASH_PAR_SCOREGAPGCAG" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--scoreGapGCAG' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SCOREGAPATAC" ]]; then + if ! [[ "$VIASH_PAR_SCOREGAPATAC" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--scoreGapATAC' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE" ]]; then + if ! [[ "$VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--scoreGenomicLengthLog2scale' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SCOREDELOPEN" ]]; then + if ! [[ "$VIASH_PAR_SCOREDELOPEN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--scoreDelOpen' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SCOREDELBASE" ]]; then + if ! [[ "$VIASH_PAR_SCOREDELBASE" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--scoreDelBase' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SCOREINSOPEN" ]]; then + if ! [[ "$VIASH_PAR_SCOREINSOPEN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--scoreInsOpen' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SCOREINSBASE" ]]; then + if ! [[ "$VIASH_PAR_SCOREINSBASE" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--scoreInsBase' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SCORESTITCHSJSHIFT" ]]; then + if ! [[ "$VIASH_PAR_SCORESTITCHSJSHIFT" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--scoreStitchSJshift' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SEEDSEARCHSTARTLMAX" ]]; then + if ! [[ "$VIASH_PAR_SEEDSEARCHSTARTLMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--seedSearchStartLmax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD" ]]; then + if ! [[ "$VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--seedSearchStartLmaxOverLread' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SEEDSEARCHLMAX" ]]; then + if ! [[ "$VIASH_PAR_SEEDSEARCHLMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--seedSearchLmax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SEEDMULTIMAPNMAX" ]]; then + if ! [[ "$VIASH_PAR_SEEDMULTIMAPNMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--seedMultimapNmax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SEEDPERREADNMAX" ]]; then + if ! [[ "$VIASH_PAR_SEEDPERREADNMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--seedPerReadNmax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SEEDPERWINDOWNMAX" ]]; then + if ! [[ "$VIASH_PAR_SEEDPERWINDOWNMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--seedPerWindowNmax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SEEDNONELOCIPERWINDOW" ]]; then + if ! [[ "$VIASH_PAR_SEEDNONELOCIPERWINDOW" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--seedNoneLociPerWindow' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SEEDSPLITMIN" ]]; then + if ! [[ "$VIASH_PAR_SEEDSPLITMIN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--seedSplitMin' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SEEDMAPMIN" ]]; then + if ! [[ "$VIASH_PAR_SEEDMAPMIN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--seedMapMin' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_ALIGNINTRONMIN" ]]; then + if ! [[ "$VIASH_PAR_ALIGNINTRONMIN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--alignIntronMin' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_ALIGNINTRONMAX" ]]; then + if ! [[ "$VIASH_PAR_ALIGNINTRONMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--alignIntronMax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_ALIGNMATESGAPMAX" ]]; then + if ! [[ "$VIASH_PAR_ALIGNMATESGAPMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--alignMatesGapMax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_ALIGNSJOVERHANGMIN" ]]; then + if ! [[ "$VIASH_PAR_ALIGNSJOVERHANGMIN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--alignSJoverhangMin' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [ -n "$VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX" ]; then + IFS=';' + set -f + for val in $VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX; do + if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--alignSJstitchMismatchNmax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + done + set +f + unset IFS +fi + +if [[ -n "$VIASH_PAR_ALIGNSJDBOVERHANGMIN" ]]; then + if ! [[ "$VIASH_PAR_ALIGNSJDBOVERHANGMIN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--alignSJDBoverhangMin' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN" ]]; then + if ! [[ "$VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--alignSplicedMateMapLmin' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE" ]]; then + if ! [[ "$VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--alignSplicedMateMapLminOverLmate' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_ALIGNWINDOWSPERREADNMAX" ]]; then + if ! [[ "$VIASH_PAR_ALIGNWINDOWSPERREADNMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--alignWindowsPerReadNmax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX" ]]; then + if ! [[ "$VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--alignTranscriptsPerWindowNmax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX" ]]; then + if ! [[ "$VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--alignTranscriptsPerReadNmax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_PEOVERLAPNBASESMIN" ]]; then + if ! [[ "$VIASH_PAR_PEOVERLAPNBASESMIN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--peOverlapNbasesMin' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_PEOVERLAPMMP" ]]; then + if ! [[ "$VIASH_PAR_PEOVERLAPMMP" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--peOverlapMMp' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_WINANCHORMULTIMAPNMAX" ]]; then + if ! [[ "$VIASH_PAR_WINANCHORMULTIMAPNMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--winAnchorMultimapNmax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_WINBINNBITS" ]]; then + if ! [[ "$VIASH_PAR_WINBINNBITS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--winBinNbits' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_WINANCHORDISTNBINS" ]]; then + if ! [[ "$VIASH_PAR_WINANCHORDISTNBINS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--winAnchorDistNbins' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_WINFLANKNBINS" ]]; then + if ! [[ "$VIASH_PAR_WINFLANKNBINS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--winFlankNbins' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_WINREADCOVERAGERELATIVEMIN" ]]; then + if ! [[ "$VIASH_PAR_WINREADCOVERAGERELATIVEMIN" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--winReadCoverageRelativeMin' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_WINREADCOVERAGEBASESMIN" ]]; then + if ! [[ "$VIASH_PAR_WINREADCOVERAGEBASESMIN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--winReadCoverageBasesMin' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_CHIMSEGMENTMIN" ]]; then + if ! [[ "$VIASH_PAR_CHIMSEGMENTMIN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--chimSegmentMin' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_CHIMSCOREMIN" ]]; then + if ! [[ "$VIASH_PAR_CHIMSCOREMIN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--chimScoreMin' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_CHIMSCOREDROPMAX" ]]; then + if ! [[ "$VIASH_PAR_CHIMSCOREDROPMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--chimScoreDropMax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_CHIMSCORESEPARATION" ]]; then + if ! [[ "$VIASH_PAR_CHIMSCORESEPARATION" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--chimScoreSeparation' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG" ]]; then + if ! [[ "$VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--chimScoreJunctionNonGTAG' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_CHIMJUNCTIONOVERHANGMIN" ]]; then + if ! [[ "$VIASH_PAR_CHIMJUNCTIONOVERHANGMIN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--chimJunctionOverhangMin' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_CHIMSEGMENTREADGAPMAX" ]]; then + if ! [[ "$VIASH_PAR_CHIMSEGMENTREADGAPMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--chimSegmentReadGapMax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_CHIMMAINSEGMENTMULTNMAX" ]]; then + if ! [[ "$VIASH_PAR_CHIMMAINSEGMENTMULTNMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--chimMainSegmentMultNmax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_CHIMMULTIMAPNMAX" ]]; then + if ! [[ "$VIASH_PAR_CHIMMULTIMAPNMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--chimMultimapNmax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_CHIMMULTIMAPSCORERANGE" ]]; then + if ! [[ "$VIASH_PAR_CHIMMULTIMAPSCORERANGE" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--chimMultimapScoreRange' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_CHIMNONCHIMSCOREDROPMIN" ]]; then + if ! [[ "$VIASH_PAR_CHIMNONCHIMSCOREDROPMIN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--chimNonchimScoreDropMin' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_CHIMOUTJUNCTIONFORMAT" ]]; then + if ! [[ "$VIASH_PAR_CHIMOUTJUNCTIONFORMAT" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--chimOutJunctionFormat' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION" ]]; then + if ! [[ "$VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--quantTranscriptomeBAMcompression' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_TWOPASS1READSN" ]]; then + if ! [[ "$VIASH_PAR_TWOPASS1READSN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--twopass1readsN' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SOLOCBSTART" ]]; then + if ! [[ "$VIASH_PAR_SOLOCBSTART" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--soloCBstart' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SOLOCBLEN" ]]; then + if ! [[ "$VIASH_PAR_SOLOCBLEN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--soloCBlen' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SOLOUMISTART" ]]; then + if ! [[ "$VIASH_PAR_SOLOUMISTART" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--soloUMIstart' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SOLOUMILEN" ]]; then + if ! [[ "$VIASH_PAR_SOLOUMILEN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--soloUMIlen' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SOLOBARCODEREADLENGTH" ]]; then + if ! [[ "$VIASH_PAR_SOLOBARCODEREADLENGTH" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--soloBarcodeReadLength' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SOLOBARCODEMATE" ]]; then + if ! [[ "$VIASH_PAR_SOLOBARCODEMATE" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--soloBarcodeMate' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SOLOADAPTERMISMATCHESNMAX" ]]; then + if ! [[ "$VIASH_PAR_SOLOADAPTERMISMATCHESNMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--soloAdapterMismatchesNmax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_TEST_INPUT=() + IFS=';' + for var in $VIASH_PAR_INPUT; do + unset IFS + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$var")" ) + var=$(ViashAutodetectMount "$var") + VIASH_TEST_INPUT+=( "$var" ) + done + VIASH_PAR_INPUT=$(IFS=';' ; echo "${VIASH_TEST_INPUT[*]}") +fi +if [ ! -z "$VIASH_PAR_REFERENCE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_REFERENCE")" ) + VIASH_PAR_REFERENCE=$(ViashAutodetectMount "$VIASH_PAR_REFERENCE") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_PAR_GENOMEFASTAFILES" ]; then + VIASH_TEST_GENOMEFASTAFILES=() + IFS=';' + for var in $VIASH_PAR_GENOMEFASTAFILES; do + unset IFS + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$var")" ) + var=$(ViashAutodetectMount "$var") + VIASH_TEST_GENOMEFASTAFILES+=( "$var" ) + done + VIASH_PAR_GENOMEFASTAFILES=$(IFS=';' ; echo "${VIASH_TEST_GENOMEFASTAFILES[*]}") +fi +if [ ! -z "$VIASH_PAR_SJDBGTFFILE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_SJDBGTFFILE")" ) + VIASH_PAR_SJDBGTFFILE=$(ViashAutodetectMount "$VIASH_PAR_SJDBGTFFILE") +fi +if [ ! -z "$VIASH_PAR_READFILESMANIFEST" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_READFILESMANIFEST")" ) + VIASH_PAR_READFILESMANIFEST=$(ViashAutodetectMount "$VIASH_PAR_READFILESMANIFEST") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/mapping_star_align:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/mapping_star_align:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/mapping_star_align:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-star_align-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +import re +import tempfile +import subprocess +from pathlib import Path +import tarfile +import gzip +import shutil + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'reference': $( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "r'${VIASH_PAR_REFERENCE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'runRNGseed': $( if [ ! -z ${VIASH_PAR_RUNRNGSEED+x} ]; then echo "int(r'${VIASH_PAR_RUNRNGSEED//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'genomeLoad': $( if [ ! -z ${VIASH_PAR_GENOMELOAD+x} ]; then echo "r'${VIASH_PAR_GENOMELOAD//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'genomeFastaFiles': $( if [ ! -z ${VIASH_PAR_GENOMEFASTAFILES+x} ]; then echo "r'${VIASH_PAR_GENOMEFASTAFILES//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'genomeFileSizes': $( if [ ! -z ${VIASH_PAR_GENOMEFILESIZES+x} ]; then echo "list(map(int, r'${VIASH_PAR_GENOMEFILESIZES//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), + 'genomeTransformOutput': $( if [ ! -z ${VIASH_PAR_GENOMETRANSFORMOUTPUT+x} ]; then echo "r'${VIASH_PAR_GENOMETRANSFORMOUTPUT//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'genomeChrSetMitochondrial': $( if [ ! -z ${VIASH_PAR_GENOMECHRSETMITOCHONDRIAL+x} ]; then echo "r'${VIASH_PAR_GENOMECHRSETMITOCHONDRIAL//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'sjdbFileChrStartEnd': $( if [ ! -z ${VIASH_PAR_SJDBFILECHRSTARTEND+x} ]; then echo "r'${VIASH_PAR_SJDBFILECHRSTARTEND//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'sjdbGTFfile': $( if [ ! -z ${VIASH_PAR_SJDBGTFFILE+x} ]; then echo "r'${VIASH_PAR_SJDBGTFFILE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'sjdbGTFchrPrefix': $( if [ ! -z ${VIASH_PAR_SJDBGTFCHRPREFIX+x} ]; then echo "r'${VIASH_PAR_SJDBGTFCHRPREFIX//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'sjdbGTFfeatureExon': $( if [ ! -z ${VIASH_PAR_SJDBGTFFEATUREEXON+x} ]; then echo "r'${VIASH_PAR_SJDBGTFFEATUREEXON//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'sjdbGTFtagExonParentTranscript': $( if [ ! -z ${VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT+x} ]; then echo "r'${VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'sjdbGTFtagExonParentGene': $( if [ ! -z ${VIASH_PAR_SJDBGTFTAGEXONPARENTGENE+x} ]; then echo "r'${VIASH_PAR_SJDBGTFTAGEXONPARENTGENE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'sjdbGTFtagExonParentGeneName': $( if [ ! -z ${VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME+x} ]; then echo "r'${VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'sjdbGTFtagExonParentGeneType': $( if [ ! -z ${VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE+x} ]; then echo "r'${VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'sjdbOverhang': $( if [ ! -z ${VIASH_PAR_SJDBOVERHANG+x} ]; then echo "int(r'${VIASH_PAR_SJDBOVERHANG//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'sjdbScore': $( if [ ! -z ${VIASH_PAR_SJDBSCORE+x} ]; then echo "int(r'${VIASH_PAR_SJDBSCORE//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'sjdbInsertSave': $( if [ ! -z ${VIASH_PAR_SJDBINSERTSAVE+x} ]; then echo "r'${VIASH_PAR_SJDBINSERTSAVE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'varVCFfile': $( if [ ! -z ${VIASH_PAR_VARVCFFILE+x} ]; then echo "r'${VIASH_PAR_VARVCFFILE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'readFilesType': $( if [ ! -z ${VIASH_PAR_READFILESTYPE+x} ]; then echo "r'${VIASH_PAR_READFILESTYPE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'readFilesSAMattrKeep': $( if [ ! -z ${VIASH_PAR_READFILESSAMATTRKEEP+x} ]; then echo "r'${VIASH_PAR_READFILESSAMATTRKEEP//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'readFilesManifest': $( if [ ! -z ${VIASH_PAR_READFILESMANIFEST+x} ]; then echo "r'${VIASH_PAR_READFILESMANIFEST//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'readFilesPrefix': $( if [ ! -z ${VIASH_PAR_READFILESPREFIX+x} ]; then echo "r'${VIASH_PAR_READFILESPREFIX//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'readFilesCommand': $( if [ ! -z ${VIASH_PAR_READFILESCOMMAND+x} ]; then echo "r'${VIASH_PAR_READFILESCOMMAND//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'readMapNumber': $( if [ ! -z ${VIASH_PAR_READMAPNUMBER+x} ]; then echo "int(r'${VIASH_PAR_READMAPNUMBER//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'readMatesLengthsIn': $( if [ ! -z ${VIASH_PAR_READMATESLENGTHSIN+x} ]; then echo "r'${VIASH_PAR_READMATESLENGTHSIN//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'readNameSeparator': $( if [ ! -z ${VIASH_PAR_READNAMESEPARATOR+x} ]; then echo "r'${VIASH_PAR_READNAMESEPARATOR//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'readQualityScoreBase': $( if [ ! -z ${VIASH_PAR_READQUALITYSCOREBASE+x} ]; then echo "int(r'${VIASH_PAR_READQUALITYSCOREBASE//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'clipAdapterType': $( if [ ! -z ${VIASH_PAR_CLIPADAPTERTYPE+x} ]; then echo "r'${VIASH_PAR_CLIPADAPTERTYPE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'clip3pNbases': $( if [ ! -z ${VIASH_PAR_CLIP3PNBASES+x} ]; then echo "list(map(int, r'${VIASH_PAR_CLIP3PNBASES//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), + 'clip3pAdapterSeq': $( if [ ! -z ${VIASH_PAR_CLIP3PADAPTERSEQ+x} ]; then echo "r'${VIASH_PAR_CLIP3PADAPTERSEQ//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'clip3pAdapterMMp': $( if [ ! -z ${VIASH_PAR_CLIP3PADAPTERMMP+x} ]; then echo "list(map(float, r'${VIASH_PAR_CLIP3PADAPTERMMP//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), + 'clip3pAfterAdapterNbases': $( if [ ! -z ${VIASH_PAR_CLIP3PAFTERADAPTERNBASES+x} ]; then echo "list(map(int, r'${VIASH_PAR_CLIP3PAFTERADAPTERNBASES//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), + 'clip5pNbases': $( if [ ! -z ${VIASH_PAR_CLIP5PNBASES+x} ]; then echo "list(map(int, r'${VIASH_PAR_CLIP5PNBASES//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), + 'limitGenomeGenerateRAM': $( if [ ! -z ${VIASH_PAR_LIMITGENOMEGENERATERAM+x} ]; then echo "int(r'${VIASH_PAR_LIMITGENOMEGENERATERAM//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'limitIObufferSize': $( if [ ! -z ${VIASH_PAR_LIMITIOBUFFERSIZE+x} ]; then echo "list(map(int, r'${VIASH_PAR_LIMITIOBUFFERSIZE//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), + 'limitOutSAMoneReadBytes': $( if [ ! -z ${VIASH_PAR_LIMITOUTSAMONEREADBYTES+x} ]; then echo "int(r'${VIASH_PAR_LIMITOUTSAMONEREADBYTES//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'limitOutSJoneRead': $( if [ ! -z ${VIASH_PAR_LIMITOUTSJONEREAD+x} ]; then echo "int(r'${VIASH_PAR_LIMITOUTSJONEREAD//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'limitOutSJcollapsed': $( if [ ! -z ${VIASH_PAR_LIMITOUTSJCOLLAPSED+x} ]; then echo "int(r'${VIASH_PAR_LIMITOUTSJCOLLAPSED//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'limitBAMsortRAM': $( if [ ! -z ${VIASH_PAR_LIMITBAMSORTRAM+x} ]; then echo "int(r'${VIASH_PAR_LIMITBAMSORTRAM//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'limitSjdbInsertNsj': $( if [ ! -z ${VIASH_PAR_LIMITSJDBINSERTNSJ+x} ]; then echo "int(r'${VIASH_PAR_LIMITSJDBINSERTNSJ//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'limitNreadsSoft': $( if [ ! -z ${VIASH_PAR_LIMITNREADSSOFT+x} ]; then echo "int(r'${VIASH_PAR_LIMITNREADSSOFT//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outTmpKeep': $( if [ ! -z ${VIASH_PAR_OUTTMPKEEP+x} ]; then echo "r'${VIASH_PAR_OUTTMPKEEP//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'outStd': $( if [ ! -z ${VIASH_PAR_OUTSTD+x} ]; then echo "r'${VIASH_PAR_OUTSTD//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'outReadsUnmapped': $( if [ ! -z ${VIASH_PAR_OUTREADSUNMAPPED+x} ]; then echo "r'${VIASH_PAR_OUTREADSUNMAPPED//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'outQSconversionAdd': $( if [ ! -z ${VIASH_PAR_OUTQSCONVERSIONADD+x} ]; then echo "int(r'${VIASH_PAR_OUTQSCONVERSIONADD//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outMultimapperOrder': $( if [ ! -z ${VIASH_PAR_OUTMULTIMAPPERORDER+x} ]; then echo "r'${VIASH_PAR_OUTMULTIMAPPERORDER//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'outSAMtype': $( if [ ! -z ${VIASH_PAR_OUTSAMTYPE+x} ]; then echo "r'${VIASH_PAR_OUTSAMTYPE//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'outSAMmode': $( if [ ! -z ${VIASH_PAR_OUTSAMMODE+x} ]; then echo "r'${VIASH_PAR_OUTSAMMODE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'outSAMstrandField': $( if [ ! -z ${VIASH_PAR_OUTSAMSTRANDFIELD+x} ]; then echo "r'${VIASH_PAR_OUTSAMSTRANDFIELD//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'outSAMattributes': $( if [ ! -z ${VIASH_PAR_OUTSAMATTRIBUTES+x} ]; then echo "r'${VIASH_PAR_OUTSAMATTRIBUTES//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'outSAMattrIHstart': $( if [ ! -z ${VIASH_PAR_OUTSAMATTRIHSTART+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMATTRIHSTART//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outSAMunmapped': $( if [ ! -z ${VIASH_PAR_OUTSAMUNMAPPED+x} ]; then echo "r'${VIASH_PAR_OUTSAMUNMAPPED//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'outSAMorder': $( if [ ! -z ${VIASH_PAR_OUTSAMORDER+x} ]; then echo "r'${VIASH_PAR_OUTSAMORDER//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'outSAMprimaryFlag': $( if [ ! -z ${VIASH_PAR_OUTSAMPRIMARYFLAG+x} ]; then echo "r'${VIASH_PAR_OUTSAMPRIMARYFLAG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'outSAMreadID': $( if [ ! -z ${VIASH_PAR_OUTSAMREADID+x} ]; then echo "r'${VIASH_PAR_OUTSAMREADID//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'outSAMmapqUnique': $( if [ ! -z ${VIASH_PAR_OUTSAMMAPQUNIQUE+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMMAPQUNIQUE//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outSAMflagOR': $( if [ ! -z ${VIASH_PAR_OUTSAMFLAGOR+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMFLAGOR//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outSAMflagAND': $( if [ ! -z ${VIASH_PAR_OUTSAMFLAGAND+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMFLAGAND//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outSAMattrRGline': $( if [ ! -z ${VIASH_PAR_OUTSAMATTRRGLINE+x} ]; then echo "r'${VIASH_PAR_OUTSAMATTRRGLINE//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'outSAMheaderHD': $( if [ ! -z ${VIASH_PAR_OUTSAMHEADERHD+x} ]; then echo "r'${VIASH_PAR_OUTSAMHEADERHD//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'outSAMheaderPG': $( if [ ! -z ${VIASH_PAR_OUTSAMHEADERPG+x} ]; then echo "r'${VIASH_PAR_OUTSAMHEADERPG//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'outSAMheaderCommentFile': $( if [ ! -z ${VIASH_PAR_OUTSAMHEADERCOMMENTFILE+x} ]; then echo "r'${VIASH_PAR_OUTSAMHEADERCOMMENTFILE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'outSAMfilter': $( if [ ! -z ${VIASH_PAR_OUTSAMFILTER+x} ]; then echo "r'${VIASH_PAR_OUTSAMFILTER//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'outSAMmultNmax': $( if [ ! -z ${VIASH_PAR_OUTSAMMULTNMAX+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMMULTNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outSAMtlen': $( if [ ! -z ${VIASH_PAR_OUTSAMTLEN+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMTLEN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outBAMcompression': $( if [ ! -z ${VIASH_PAR_OUTBAMCOMPRESSION+x} ]; then echo "int(r'${VIASH_PAR_OUTBAMCOMPRESSION//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outBAMsortingThreadN': $( if [ ! -z ${VIASH_PAR_OUTBAMSORTINGTHREADN+x} ]; then echo "int(r'${VIASH_PAR_OUTBAMSORTINGTHREADN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outBAMsortingBinsN': $( if [ ! -z ${VIASH_PAR_OUTBAMSORTINGBINSN+x} ]; then echo "int(r'${VIASH_PAR_OUTBAMSORTINGBINSN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'bamRemoveDuplicatesType': $( if [ ! -z ${VIASH_PAR_BAMREMOVEDUPLICATESTYPE+x} ]; then echo "r'${VIASH_PAR_BAMREMOVEDUPLICATESTYPE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'bamRemoveDuplicatesMate2basesN': $( if [ ! -z ${VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN+x} ]; then echo "int(r'${VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outWigType': $( if [ ! -z ${VIASH_PAR_OUTWIGTYPE+x} ]; then echo "r'${VIASH_PAR_OUTWIGTYPE//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'outWigStrand': $( if [ ! -z ${VIASH_PAR_OUTWIGSTRAND+x} ]; then echo "r'${VIASH_PAR_OUTWIGSTRAND//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'outWigReferencesPrefix': $( if [ ! -z ${VIASH_PAR_OUTWIGREFERENCESPREFIX+x} ]; then echo "r'${VIASH_PAR_OUTWIGREFERENCESPREFIX//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'outWigNorm': $( if [ ! -z ${VIASH_PAR_OUTWIGNORM+x} ]; then echo "r'${VIASH_PAR_OUTWIGNORM//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'outFilterType': $( if [ ! -z ${VIASH_PAR_OUTFILTERTYPE+x} ]; then echo "r'${VIASH_PAR_OUTFILTERTYPE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'outFilterMultimapScoreRange': $( if [ ! -z ${VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outFilterMultimapNmax': $( if [ ! -z ${VIASH_PAR_OUTFILTERMULTIMAPNMAX+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERMULTIMAPNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outFilterMismatchNmax': $( if [ ! -z ${VIASH_PAR_OUTFILTERMISMATCHNMAX+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERMISMATCHNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outFilterMismatchNoverLmax': $( if [ ! -z ${VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX+x} ]; then echo "float(r'${VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outFilterMismatchNoverReadLmax': $( if [ ! -z ${VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX+x} ]; then echo "float(r'${VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outFilterScoreMin': $( if [ ! -z ${VIASH_PAR_OUTFILTERSCOREMIN+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERSCOREMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outFilterScoreMinOverLread': $( if [ ! -z ${VIASH_PAR_OUTFILTERSCOREMINOVERLREAD+x} ]; then echo "float(r'${VIASH_PAR_OUTFILTERSCOREMINOVERLREAD//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outFilterMatchNmin': $( if [ ! -z ${VIASH_PAR_OUTFILTERMATCHNMIN+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERMATCHNMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outFilterMatchNminOverLread': $( if [ ! -z ${VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD+x} ]; then echo "float(r'${VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outFilterIntronMotifs': $( if [ ! -z ${VIASH_PAR_OUTFILTERINTRONMOTIFS+x} ]; then echo "r'${VIASH_PAR_OUTFILTERINTRONMOTIFS//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'outFilterIntronStrands': $( if [ ! -z ${VIASH_PAR_OUTFILTERINTRONSTRANDS+x} ]; then echo "r'${VIASH_PAR_OUTFILTERINTRONSTRANDS//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'outSJtype': $( if [ ! -z ${VIASH_PAR_OUTSJTYPE+x} ]; then echo "r'${VIASH_PAR_OUTSJTYPE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'outSJfilterReads': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERREADS+x} ]; then echo "r'${VIASH_PAR_OUTSJFILTERREADS//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'outSJfilterOverhangMin': $( if [ ! -z ${VIASH_PAR_OUTSJFILTEROVERHANGMIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTEROVERHANGMIN//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), + 'outSJfilterCountUniqueMin': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), + 'outSJfilterCountTotalMin': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), + 'outSJfilterDistToOtherSJmin': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), + 'outSJfilterIntronMaxVsReadN': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), + 'scoreGap': $( if [ ! -z ${VIASH_PAR_SCOREGAP+x} ]; then echo "int(r'${VIASH_PAR_SCOREGAP//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'scoreGapNoncan': $( if [ ! -z ${VIASH_PAR_SCOREGAPNONCAN+x} ]; then echo "int(r'${VIASH_PAR_SCOREGAPNONCAN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'scoreGapGCAG': $( if [ ! -z ${VIASH_PAR_SCOREGAPGCAG+x} ]; then echo "int(r'${VIASH_PAR_SCOREGAPGCAG//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'scoreGapATAC': $( if [ ! -z ${VIASH_PAR_SCOREGAPATAC+x} ]; then echo "int(r'${VIASH_PAR_SCOREGAPATAC//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'scoreGenomicLengthLog2scale': $( if [ ! -z ${VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE+x} ]; then echo "int(r'${VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'scoreDelOpen': $( if [ ! -z ${VIASH_PAR_SCOREDELOPEN+x} ]; then echo "int(r'${VIASH_PAR_SCOREDELOPEN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'scoreDelBase': $( if [ ! -z ${VIASH_PAR_SCOREDELBASE+x} ]; then echo "int(r'${VIASH_PAR_SCOREDELBASE//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'scoreInsOpen': $( if [ ! -z ${VIASH_PAR_SCOREINSOPEN+x} ]; then echo "int(r'${VIASH_PAR_SCOREINSOPEN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'scoreInsBase': $( if [ ! -z ${VIASH_PAR_SCOREINSBASE+x} ]; then echo "int(r'${VIASH_PAR_SCOREINSBASE//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'scoreStitchSJshift': $( if [ ! -z ${VIASH_PAR_SCORESTITCHSJSHIFT+x} ]; then echo "int(r'${VIASH_PAR_SCORESTITCHSJSHIFT//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'seedSearchStartLmax': $( if [ ! -z ${VIASH_PAR_SEEDSEARCHSTARTLMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDSEARCHSTARTLMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'seedSearchStartLmaxOverLread': $( if [ ! -z ${VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD+x} ]; then echo "float(r'${VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'seedSearchLmax': $( if [ ! -z ${VIASH_PAR_SEEDSEARCHLMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDSEARCHLMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'seedMultimapNmax': $( if [ ! -z ${VIASH_PAR_SEEDMULTIMAPNMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDMULTIMAPNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'seedPerReadNmax': $( if [ ! -z ${VIASH_PAR_SEEDPERREADNMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDPERREADNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'seedPerWindowNmax': $( if [ ! -z ${VIASH_PAR_SEEDPERWINDOWNMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDPERWINDOWNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'seedNoneLociPerWindow': $( if [ ! -z ${VIASH_PAR_SEEDNONELOCIPERWINDOW+x} ]; then echo "int(r'${VIASH_PAR_SEEDNONELOCIPERWINDOW//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'seedSplitMin': $( if [ ! -z ${VIASH_PAR_SEEDSPLITMIN+x} ]; then echo "int(r'${VIASH_PAR_SEEDSPLITMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'seedMapMin': $( if [ ! -z ${VIASH_PAR_SEEDMAPMIN+x} ]; then echo "int(r'${VIASH_PAR_SEEDMAPMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'alignIntronMin': $( if [ ! -z ${VIASH_PAR_ALIGNINTRONMIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGNINTRONMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'alignIntronMax': $( if [ ! -z ${VIASH_PAR_ALIGNINTRONMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNINTRONMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'alignMatesGapMax': $( if [ ! -z ${VIASH_PAR_ALIGNMATESGAPMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNMATESGAPMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'alignSJoverhangMin': $( if [ ! -z ${VIASH_PAR_ALIGNSJOVERHANGMIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGNSJOVERHANGMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'alignSJstitchMismatchNmax': $( if [ ! -z ${VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX+x} ]; then echo "list(map(int, r'${VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), + 'alignSJDBoverhangMin': $( if [ ! -z ${VIASH_PAR_ALIGNSJDBOVERHANGMIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGNSJDBOVERHANGMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'alignSplicedMateMapLmin': $( if [ ! -z ${VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'alignSplicedMateMapLminOverLmate': $( if [ ! -z ${VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE+x} ]; then echo "float(r'${VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'alignWindowsPerReadNmax': $( if [ ! -z ${VIASH_PAR_ALIGNWINDOWSPERREADNMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNWINDOWSPERREADNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'alignTranscriptsPerWindowNmax': $( if [ ! -z ${VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'alignTranscriptsPerReadNmax': $( if [ ! -z ${VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'alignEndsType': $( if [ ! -z ${VIASH_PAR_ALIGNENDSTYPE+x} ]; then echo "r'${VIASH_PAR_ALIGNENDSTYPE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'alignEndsProtrude': $( if [ ! -z ${VIASH_PAR_ALIGNENDSPROTRUDE+x} ]; then echo "r'${VIASH_PAR_ALIGNENDSPROTRUDE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'alignSoftClipAtReferenceEnds': $( if [ ! -z ${VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS+x} ]; then echo "r'${VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'alignInsertionFlush': $( if [ ! -z ${VIASH_PAR_ALIGNINSERTIONFLUSH+x} ]; then echo "r'${VIASH_PAR_ALIGNINSERTIONFLUSH//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'peOverlapNbasesMin': $( if [ ! -z ${VIASH_PAR_PEOVERLAPNBASESMIN+x} ]; then echo "int(r'${VIASH_PAR_PEOVERLAPNBASESMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'peOverlapMMp': $( if [ ! -z ${VIASH_PAR_PEOVERLAPMMP+x} ]; then echo "float(r'${VIASH_PAR_PEOVERLAPMMP//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'winAnchorMultimapNmax': $( if [ ! -z ${VIASH_PAR_WINANCHORMULTIMAPNMAX+x} ]; then echo "int(r'${VIASH_PAR_WINANCHORMULTIMAPNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'winBinNbits': $( if [ ! -z ${VIASH_PAR_WINBINNBITS+x} ]; then echo "int(r'${VIASH_PAR_WINBINNBITS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'winAnchorDistNbins': $( if [ ! -z ${VIASH_PAR_WINANCHORDISTNBINS+x} ]; then echo "int(r'${VIASH_PAR_WINANCHORDISTNBINS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'winFlankNbins': $( if [ ! -z ${VIASH_PAR_WINFLANKNBINS+x} ]; then echo "int(r'${VIASH_PAR_WINFLANKNBINS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'winReadCoverageRelativeMin': $( if [ ! -z ${VIASH_PAR_WINREADCOVERAGERELATIVEMIN+x} ]; then echo "float(r'${VIASH_PAR_WINREADCOVERAGERELATIVEMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'winReadCoverageBasesMin': $( if [ ! -z ${VIASH_PAR_WINREADCOVERAGEBASESMIN+x} ]; then echo "int(r'${VIASH_PAR_WINREADCOVERAGEBASESMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'chimOutType': $( if [ ! -z ${VIASH_PAR_CHIMOUTTYPE+x} ]; then echo "r'${VIASH_PAR_CHIMOUTTYPE//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'chimSegmentMin': $( if [ ! -z ${VIASH_PAR_CHIMSEGMENTMIN+x} ]; then echo "int(r'${VIASH_PAR_CHIMSEGMENTMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'chimScoreMin': $( if [ ! -z ${VIASH_PAR_CHIMSCOREMIN+x} ]; then echo "int(r'${VIASH_PAR_CHIMSCOREMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'chimScoreDropMax': $( if [ ! -z ${VIASH_PAR_CHIMSCOREDROPMAX+x} ]; then echo "int(r'${VIASH_PAR_CHIMSCOREDROPMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'chimScoreSeparation': $( if [ ! -z ${VIASH_PAR_CHIMSCORESEPARATION+x} ]; then echo "int(r'${VIASH_PAR_CHIMSCORESEPARATION//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'chimScoreJunctionNonGTAG': $( if [ ! -z ${VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG+x} ]; then echo "int(r'${VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'chimJunctionOverhangMin': $( if [ ! -z ${VIASH_PAR_CHIMJUNCTIONOVERHANGMIN+x} ]; then echo "int(r'${VIASH_PAR_CHIMJUNCTIONOVERHANGMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'chimSegmentReadGapMax': $( if [ ! -z ${VIASH_PAR_CHIMSEGMENTREADGAPMAX+x} ]; then echo "int(r'${VIASH_PAR_CHIMSEGMENTREADGAPMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'chimFilter': $( if [ ! -z ${VIASH_PAR_CHIMFILTER+x} ]; then echo "r'${VIASH_PAR_CHIMFILTER//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'chimMainSegmentMultNmax': $( if [ ! -z ${VIASH_PAR_CHIMMAINSEGMENTMULTNMAX+x} ]; then echo "int(r'${VIASH_PAR_CHIMMAINSEGMENTMULTNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'chimMultimapNmax': $( if [ ! -z ${VIASH_PAR_CHIMMULTIMAPNMAX+x} ]; then echo "int(r'${VIASH_PAR_CHIMMULTIMAPNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'chimMultimapScoreRange': $( if [ ! -z ${VIASH_PAR_CHIMMULTIMAPSCORERANGE+x} ]; then echo "int(r'${VIASH_PAR_CHIMMULTIMAPSCORERANGE//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'chimNonchimScoreDropMin': $( if [ ! -z ${VIASH_PAR_CHIMNONCHIMSCOREDROPMIN+x} ]; then echo "int(r'${VIASH_PAR_CHIMNONCHIMSCOREDROPMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'chimOutJunctionFormat': $( if [ ! -z ${VIASH_PAR_CHIMOUTJUNCTIONFORMAT+x} ]; then echo "int(r'${VIASH_PAR_CHIMOUTJUNCTIONFORMAT//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'quantMode': $( if [ ! -z ${VIASH_PAR_QUANTMODE+x} ]; then echo "r'${VIASH_PAR_QUANTMODE//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'quantTranscriptomeBAMcompression': $( if [ ! -z ${VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION+x} ]; then echo "int(r'${VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'quantTranscriptomeBan': $( if [ ! -z ${VIASH_PAR_QUANTTRANSCRIPTOMEBAN+x} ]; then echo "r'${VIASH_PAR_QUANTTRANSCRIPTOMEBAN//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'twopassMode': $( if [ ! -z ${VIASH_PAR_TWOPASSMODE+x} ]; then echo "r'${VIASH_PAR_TWOPASSMODE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'twopass1readsN': $( if [ ! -z ${VIASH_PAR_TWOPASS1READSN+x} ]; then echo "int(r'${VIASH_PAR_TWOPASS1READSN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'waspOutputMode': $( if [ ! -z ${VIASH_PAR_WASPOUTPUTMODE+x} ]; then echo "r'${VIASH_PAR_WASPOUTPUTMODE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'soloType': $( if [ ! -z ${VIASH_PAR_SOLOTYPE+x} ]; then echo "r'${VIASH_PAR_SOLOTYPE//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'soloCBwhitelist': $( if [ ! -z ${VIASH_PAR_SOLOCBWHITELIST+x} ]; then echo "r'${VIASH_PAR_SOLOCBWHITELIST//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'soloCBstart': $( if [ ! -z ${VIASH_PAR_SOLOCBSTART+x} ]; then echo "int(r'${VIASH_PAR_SOLOCBSTART//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'soloCBlen': $( if [ ! -z ${VIASH_PAR_SOLOCBLEN+x} ]; then echo "int(r'${VIASH_PAR_SOLOCBLEN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'soloUMIstart': $( if [ ! -z ${VIASH_PAR_SOLOUMISTART+x} ]; then echo "int(r'${VIASH_PAR_SOLOUMISTART//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'soloUMIlen': $( if [ ! -z ${VIASH_PAR_SOLOUMILEN+x} ]; then echo "int(r'${VIASH_PAR_SOLOUMILEN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'soloBarcodeReadLength': $( if [ ! -z ${VIASH_PAR_SOLOBARCODEREADLENGTH+x} ]; then echo "int(r'${VIASH_PAR_SOLOBARCODEREADLENGTH//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'soloBarcodeMate': $( if [ ! -z ${VIASH_PAR_SOLOBARCODEMATE+x} ]; then echo "int(r'${VIASH_PAR_SOLOBARCODEMATE//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'soloCBposition': $( if [ ! -z ${VIASH_PAR_SOLOCBPOSITION+x} ]; then echo "r'${VIASH_PAR_SOLOCBPOSITION//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'soloUMIposition': $( if [ ! -z ${VIASH_PAR_SOLOUMIPOSITION+x} ]; then echo "r'${VIASH_PAR_SOLOUMIPOSITION//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'soloAdapterSequence': $( if [ ! -z ${VIASH_PAR_SOLOADAPTERSEQUENCE+x} ]; then echo "r'${VIASH_PAR_SOLOADAPTERSEQUENCE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'soloAdapterMismatchesNmax': $( if [ ! -z ${VIASH_PAR_SOLOADAPTERMISMATCHESNMAX+x} ]; then echo "int(r'${VIASH_PAR_SOLOADAPTERMISMATCHESNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'soloCBmatchWLtype': $( if [ ! -z ${VIASH_PAR_SOLOCBMATCHWLTYPE+x} ]; then echo "r'${VIASH_PAR_SOLOCBMATCHWLTYPE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'soloInputSAMattrBarcodeSeq': $( if [ ! -z ${VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ+x} ]; then echo "r'${VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'soloInputSAMattrBarcodeQual': $( if [ ! -z ${VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL+x} ]; then echo "r'${VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'soloStrand': $( if [ ! -z ${VIASH_PAR_SOLOSTRAND+x} ]; then echo "r'${VIASH_PAR_SOLOSTRAND//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'soloFeatures': $( if [ ! -z ${VIASH_PAR_SOLOFEATURES+x} ]; then echo "r'${VIASH_PAR_SOLOFEATURES//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'soloMultiMappers': $( if [ ! -z ${VIASH_PAR_SOLOMULTIMAPPERS+x} ]; then echo "r'${VIASH_PAR_SOLOMULTIMAPPERS//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'soloUMIdedup': $( if [ ! -z ${VIASH_PAR_SOLOUMIDEDUP+x} ]; then echo "r'${VIASH_PAR_SOLOUMIDEDUP//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'soloUMIfiltering': $( if [ ! -z ${VIASH_PAR_SOLOUMIFILTERING+x} ]; then echo "r'${VIASH_PAR_SOLOUMIFILTERING//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'soloOutFileNames': $( if [ ! -z ${VIASH_PAR_SOLOOUTFILENAMES+x} ]; then echo "r'${VIASH_PAR_SOLOOUTFILENAMES//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'soloCellFilter': $( if [ ! -z ${VIASH_PAR_SOLOCELLFILTER+x} ]; then echo "r'${VIASH_PAR_SOLOCELLFILTER//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'soloOutFormatFeaturesGeneField3': $( if [ ! -z ${VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3+x} ]; then echo "r'${VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'soloCellReadStats': $( if [ ! -z ${VIASH_PAR_SOLOCELLREADSTATS+x} ]; then echo "r'${VIASH_PAR_SOLOCELLREADSTATS//\'/\'\"\'\"r\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +## VIASH END + +######################## +### Helper functions ### +######################## + +# regex for matching R[12] fastq(gz) files +# examples: +# - TSP10_Fat_MAT_SS2_B134171_B115063_Immune_A1_L003_R1.fastq.gz +# - tinygex_S1_L001_I1_001.fastq.gz +fastqgz_regex = r'(.+)_(R\\d+)(_\\d+)?\\.fastq(\\.gz)?' + +# helper function for cheching whether something is a gzip +def is_gz_file(path: Path) -> bool: + with open(path, 'rb') as file: + return file.read(2) == b'\\x1f\\x8b' + +# look for fastq files in a directory +def search_fastqs(path: Path) -> list[Path]: + if path.is_dir(): + print(f"Input '{path}' is a directory, traversing to see if we can detect any FASTQ files.", flush=True) + value_paths = [file for file in path.iterdir() if re.match(fastqgz_regex, file.name) ] + return value_paths + else: + return [path] + +# if {par_value} is a Path, extract it to a temp_dir_path and return the resulting path +def extract_if_need_be(par_value: Path, temp_dir_path: Path) -> Path: + + if par_value.is_file() and tarfile.is_tarfile(par_value): + # Remove two extensions (if they exist) + extaction_dir_name = Path(par_value.stem).stem + unpacked_path = temp_dir_path / extaction_dir_name + print(f' Tar detected; extracting {par_value} to {unpacked_path}', flush=True) + + with tarfile.open(par_value, 'r') as open_tar: + members = open_tar.getmembers() + root_dirs = [member + for member in members + if member.isdir() and member.name != '.' and '/' not in member.name] + # if there is only one root_dir (and there are files in that directory) + # strip that directory name from the destination folder + if len(root_dirs) == 1: + for mem in members: + mem.path = Path(*Path(mem.path).parts[1:]) + members_to_move = [mem for mem in members if mem.path != Path('.')] + open_tar.extractall(unpacked_path, members=members_to_move) + return unpacked_path + + elif par_value.is_file() and is_gz_file(par_value): + # Remove extension (if it exists) + extaction_file_name = Path(par_value.stem) + unpacked_path = temp_dir_path / extaction_file_name + print(f' Gzip detected; extracting {par_value} to {unpacked_path}', flush=True) + + with gzip.open(par_value, 'rb') as f_in: + with open(unpacked_path, 'wb') as f_out: + shutil.copyfileobj(f_in, f_out) + return unpacked_path + + else: + return par_value + +######################## +### Main code ### +######################## + +# rename keys and convert path strings to Path +# note: only list file arguments here. if non-file arguments also need to be renamed, +# the \`processPar()\` generator needs to be adapted +to_rename = {'input': 'readFilesIn', 'reference': 'genomeDir', 'output': 'outFileNamePrefix'} + +def process_par(orig_par, to_rename): + for key, value in orig_par.items(): + # rename the key in par based on the \`to_rename\` dict + if key in to_rename.keys(): + new_key = to_rename[key] + + # also turn value into a Path + if isinstance(value, list): + new_value = [Path(val) for val in value] + else: + new_value = Path(value) + else: + new_key = key + new_value = value + yield new_key, new_value +par = dict(process_par(par, to_rename)) + +# create output dir if need be +par["outFileNamePrefix"].mkdir(parents=True, exist_ok=True) + +with tempfile.TemporaryDirectory(prefix="star-", dir=meta["temp_dir"], ignore_cleanup_errors=True) as temp_dir: + print(">> Check whether input files are directories", flush=True) + new_read_files_in = [] + for path in par["readFilesIn"]: + new_read_files_in.extend(search_fastqs(path)) + par["readFilesIn"] = new_read_files_in + print("", flush=True) + + # checking for compressed files, ungzip files if need be + temp_dir_path = Path(temp_dir) + for par_name in ["genomeDir", "readFilesIn"]: + par_values = par[par_name] + if par_values: + # turn value into list + is_multiple = isinstance(par_values, list) + if not is_multiple: + par_values = [ par_values ] + + # output list + new_values = [] + for par_value in par_values: + print(f'>> Check compression of --{par_name} with value: {par_value}', flush=True) + new_value = extract_if_need_be(par_value, temp_dir_path) + new_values.append(new_value) + + # unlist if need be + if not is_multiple: + new_values = new_values[0] + + # replace value + par[par_name] = new_values + # end ungzipping + print("", flush=True) + + print("Grouping R1/R2 input files into pairs", flush=True) + input_grouped = {} + for path in par['readFilesIn']: + key = re.search(fastqgz_regex, path.name).group(2) + if key not in input_grouped: + input_grouped[key] = [] + input_grouped[key].append(str(path)) + par['readFilesIn'] = [ ','.join(val) for val in input_grouped.values() ] + print("", flush=True) + + print(">> Constructing command", flush=True) + par["runMode"] = "alignReads" + par["outTmpDir"] = temp_dir_path / "run" + if 'cpus' in meta and meta['cpus']: + par["runThreadN"] = meta["cpus"] + # make sure there is a trailing / + par["outFileNamePrefix"] = f"{par['outFileNamePrefix']}/" + + cmd_args = [ "STAR" ] + for name, value in par.items(): + if value is not None: + if isinstance(value, list): + cmd_args.extend(["--" + name] + [str(x) for x in value]) + else: + cmd_args.extend(["--" + name, str(value)]) + print("", flush=True) + + print(">> Running STAR with command:", flush=True) + print("+ " + ' '.join([str(x) for x in cmd_args]), flush=True) + print("", flush=True) + + subprocess.run( + cmd_args, + check=True + ) +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + unset VIASH_TEST_INPUT + IFS=';' + for var in $VIASH_PAR_INPUT; do + unset IFS + if [ -z "$VIASH_TEST_INPUT" ]; then + VIASH_TEST_INPUT="$(ViashStripAutomount "$var")" + else + VIASH_TEST_INPUT="$VIASH_TEST_INPUT;""$(ViashStripAutomount "$var")" + fi + done + VIASH_PAR_INPUT="$VIASH_TEST_INPUT" +fi +if [ ! -z "$VIASH_PAR_REFERENCE" ]; then + VIASH_PAR_REFERENCE=$(ViashStripAutomount "$VIASH_PAR_REFERENCE") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_PAR_GENOMEFASTAFILES" ]; then + unset VIASH_TEST_GENOMEFASTAFILES + IFS=';' + for var in $VIASH_PAR_GENOMEFASTAFILES; do + unset IFS + if [ -z "$VIASH_TEST_GENOMEFASTAFILES" ]; then + VIASH_TEST_GENOMEFASTAFILES="$(ViashStripAutomount "$var")" + else + VIASH_TEST_GENOMEFASTAFILES="$VIASH_TEST_GENOMEFASTAFILES;""$(ViashStripAutomount "$var")" + fi + done + VIASH_PAR_GENOMEFASTAFILES="$VIASH_TEST_GENOMEFASTAFILES" +fi +if [ ! -z "$VIASH_PAR_SJDBGTFFILE" ]; then + VIASH_PAR_SJDBGTFFILE=$(ViashStripAutomount "$VIASH_PAR_SJDBGTFFILE") +fi +if [ ! -z "$VIASH_PAR_READFILESMANIFEST" ]; then + VIASH_PAR_READFILESMANIFEST=$(ViashStripAutomount "$VIASH_PAR_READFILESMANIFEST") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/mapping/star_align_v273a/.config.vsh.yaml b/target/docker/mapping/star_align_v273a/.config.vsh.yaml new file mode 100644 index 00000000000..6553ece2d26 --- /dev/null +++ b/target/docker/mapping/star_align_v273a/.config.vsh.yaml @@ -0,0 +1,2535 @@ +functionality: + name: "star_align_v273a" + namespace: "mapping" + version: "0.12.4" + authors: + - name: "Angela Oliveira Pisco" + roles: + - "author" + info: + role: "Contributor" + links: + github: "aopisco" + orcid: "0000-0003-0142-2355" + linkedin: "aopisco" + organizations: + - name: "Insitro" + href: "https://insitro.com" + role: "Director of Computational Biology" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + - name: "Robrecht Cannoodt" + roles: + - "author" + - "maintainer" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + argument_groups: + - name: "Input/Output" + arguments: + - type: "file" + name: "--input" + alternatives: + - "--readFilesIn" + description: "The FASTQ files to be analyzed. Corresponds to the --readFilesIn\ + \ in the STAR command." + info: null + example: + - "mysample_S1_L001_R1_001.fastq.gz" + - "mysample_S1_L001_R2_001.fastq.gz" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "file" + name: "--reference" + alternatives: + - "--genomeDir" + description: "Path to the reference built by star_build_reference. Corresponds\ + \ to the --genomeDir in the STAR command." + info: null + example: + - "/path/to/reference" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "--outFileNamePrefix" + description: "Path to output directory. Corresponds to the --outFileNamePrefix\ + \ in the STAR command." + info: null + example: + - "/path/to/foo" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Run Parameters" + arguments: + - type: "integer" + name: "--runRNGseed" + description: "random number generator seed." + info: null + example: + - 777 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Genome Parameters" + arguments: + - type: "string" + name: "--genomeLoad" + description: "mode of shared memory usage for the genome files. Only used with\ + \ --runMode alignReads.\n\n- LoadAndKeep ... load genome into shared and\ + \ keep it in memory after run\n- LoadAndRemove ... load genome into shared\ + \ but remove it after run\n- LoadAndExit ... load genome into shared memory\ + \ and exit, keeping the genome in memory for future runs\n- Remove \ + \ ... do not map anything, just remove loaded genome from memory\n- NoSharedMemory\ + \ ... do not use shared memory, each job will have its own private copy of\ + \ the genome" + info: null + example: + - "NoSharedMemory" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--genomeFastaFiles" + description: "path(s) to the fasta files with the genome sequences, separated\ + \ by spaces. These files should be plain text FASTA files, they *cannot* be\ + \ zipped.\n\nRequired for the genome generation (--runMode genomeGenerate).\ + \ Can also be used in the mapping (--runMode alignReads) to add extra (new)\ + \ sequences to the genome (e.g. spike-ins)." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--genomeFileSizes" + description: "genome files exact sizes in bytes. Typically, this should not\ + \ be defined by the user." + info: null + example: + - 0 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--genomeTransformOutput" + description: "which output to transform back to original genome\n\n- SAM \ + \ ... SAM/BAM alignments\n- SJ ... splice junctions (SJ.out.tab)\n-\ + \ None ... no transformation of the output" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--genomeChrSetMitochondrial" + description: "names of the mitochondrial chromosomes. Presently only used for\ + \ STARsolo statistics output/" + info: null + example: + - "chrM" + - "M" + - "MT" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - name: "Splice Junctions Database" + arguments: + - type: "string" + name: "--sjdbFileChrStartEnd" + description: "path to the files with genomic coordinates (chr start \ + \ end strand) for the splice junction introns. Multiple files can be\ + \ supplied and will be concatenated." + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "file" + name: "--sjdbGTFfile" + description: "path to the GTF file with annotations" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--sjdbGTFchrPrefix" + description: "prefix for chromosome names in a GTF file (e.g. 'chr' for using\ + \ ENSMEBL annotations with UCSC genomes)" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--sjdbGTFfeatureExon" + description: "feature type in GTF file to be used as exons for building transcripts" + info: null + example: + - "exon" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--sjdbGTFtagExonParentTranscript" + description: "GTF attribute name for parent transcript ID (default \"transcript_id\"\ + \ works for GTF files)" + info: null + example: + - "transcript_id" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--sjdbGTFtagExonParentGene" + description: "GTF attribute name for parent gene ID (default \"gene_id\" works\ + \ for GTF files)" + info: null + example: + - "gene_id" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--sjdbGTFtagExonParentGeneName" + description: "GTF attribute name for parent gene name" + info: null + example: + - "gene_name" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--sjdbGTFtagExonParentGeneType" + description: "GTF attribute name for parent gene type" + info: null + example: + - "gene_type" + - "gene_biotype" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--sjdbOverhang" + description: "length of the donor/acceptor sequence on each side of the junctions,\ + \ ideally = (mate_length - 1)" + info: null + example: + - 100 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--sjdbScore" + description: "extra alignment score for alignments that cross database junctions" + info: null + example: + - 2 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--sjdbInsertSave" + description: "which files to save when sjdb junctions are inserted on the fly\ + \ at the mapping step\n\n- Basic ... only small junction / transcript files\n\ + - All ... all files including big Genome, SA and SAindex - this will create\ + \ a complete genome directory" + info: null + example: + - "Basic" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Variation parameters" + arguments: + - type: "string" + name: "--varVCFfile" + description: "path to the VCF file that contains variation data. The 10th column\ + \ should contain the genotype information, e.g. 0/1" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Read Parameters" + arguments: + - type: "string" + name: "--readFilesType" + description: "format of input read files\n\n- Fastx ... FASTA or FASTQ\n\ + - SAM SE ... SAM or BAM single-end reads; for BAM use --readFilesCommand\ + \ samtools view\n- SAM PE ... SAM or BAM paired-end reads; for BAM use\ + \ --readFilesCommand samtools view" + info: null + example: + - "Fastx" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--readFilesSAMattrKeep" + description: "for --readFilesType SAM SE/PE, which SAM tags to keep in the output\ + \ BAM, e.g.: --readFilesSAMtagsKeep RG PL\n\n- All ... keep all tags\n\ + - None ... do not keep any tags" + info: null + example: + - "All" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "file" + name: "--readFilesManifest" + description: "path to the \"manifest\" file with the names of read files. The\ + \ manifest file should contain 3 tab-separated columns:\n\npaired-end reads:\ + \ read1_file_name $tab$ read2_file_name $tab$ read_group_line.\nsingle-end\ + \ reads: read1_file_name $tab$ - $tab$ read_group_line.\nSpaces,\ + \ but not tabs are allowed in file names.\nIf read_group_line does not start\ + \ with ID:, it can only contain one ID field, and ID: will be added to it.\n\ + If read_group_line starts with ID:, it can contain several fields separated\ + \ by $tab$, and all fields will be be copied verbatim into SAM @RG header\ + \ line." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--readFilesPrefix" + description: "prefix for the read files names, i.e. it will be added in front\ + \ of the strings in --readFilesIn" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--readFilesCommand" + description: "command line to execute for each of the input file. This command\ + \ should generate FASTA or FASTQ text and send it to stdout\n\nFor example:\ + \ zcat - to uncompress .gz files, bzcat - to uncompress .bz2 files, etc." + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--readMapNumber" + description: "number of reads to map from the beginning of the file\n\n-1: map\ + \ all reads" + info: null + example: + - -1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--readMatesLengthsIn" + description: "Equal/NotEqual - lengths of names,sequences,qualities for both\ + \ mates are the same / not the same. NotEqual is safe in all situations." + info: null + example: + - "NotEqual" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--readNameSeparator" + description: "character(s) separating the part of the read names that will be\ + \ trimmed in output (read name after space is always trimmed)" + info: null + example: + - "/" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--readQualityScoreBase" + description: "number to be subtracted from the ASCII code to get Phred quality\ + \ score" + info: null + example: + - 33 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Read Clipping" + arguments: + - type: "string" + name: "--clipAdapterType" + description: "adapter clipping type\n\n- Hamming ... adapter clipping based\ + \ on Hamming distance, with the number of mismatches controlled by --clip5pAdapterMMp\n\ + - CellRanger4 ... 5p and 3p adapter clipping similar to CellRanger4. Utilizes\ + \ Opal package by Martin Sosic: https://github.com/Martinsos/opal\n- None\ + \ ... no adapter clipping, all other clip* parameters are disregarded" + info: null + example: + - "Hamming" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--clip3pNbases" + description: "number(s) of bases to clip from 3p of each mate. If one value\ + \ is given, it will be assumed the same for both mates." + info: null + example: + - 0 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--clip3pAdapterSeq" + description: "adapter sequences to clip from 3p of each mate. If one value\ + \ is given, it will be assumed the same for both mates.\n\n- polyA ... polyA\ + \ sequence with the length equal to read length" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "double" + name: "--clip3pAdapterMMp" + description: "max proportion of mismatches for 3p adapter clipping for each\ + \ mate. If one value is given, it will be assumed the same for both mates." + info: null + example: + - 0.1 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--clip3pAfterAdapterNbases" + description: "number of bases to clip from 3p of each mate after the adapter\ + \ clipping. If one value is given, it will be assumed the same for both mates." + info: null + example: + - 0 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--clip5pNbases" + description: "number(s) of bases to clip from 5p of each mate. If one value\ + \ is given, it will be assumed the same for both mates." + info: null + example: + - 0 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - name: "Limits" + arguments: + - type: "long" + name: "--limitGenomeGenerateRAM" + description: "maximum available RAM (bytes) for genome generation" + info: null + example: + - 31000000000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "long" + name: "--limitIObufferSize" + description: "max available buffers size (bytes) for input/output, per thread" + info: null + example: + - 30000000 + - 50000000 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "long" + name: "--limitOutSAMoneReadBytes" + description: "max size of the SAM record (bytes) for one read. Recommended value:\ + \ >(2*(LengthMate1+LengthMate2+100)*outFilterMultimapNmax" + info: null + example: + - 100000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--limitOutSJoneRead" + description: "max number of junctions for one read (including all multi-mappers)" + info: null + example: + - 1000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--limitOutSJcollapsed" + description: "max number of collapsed junctions" + info: null + example: + - 1000000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "long" + name: "--limitBAMsortRAM" + description: "maximum available RAM (bytes) for sorting BAM. If =0, it will\ + \ be set to the genome index size. 0 value can only be used with --genomeLoad\ + \ NoSharedMemory option." + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--limitSjdbInsertNsj" + description: "maximum number of junctions to be inserted to the genome on the\ + \ fly at the mapping stage, including those from annotations and those detected\ + \ in the 1st step of the 2-pass run" + info: null + example: + - 1000000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--limitNreadsSoft" + description: "soft limit on the number of reads" + info: null + example: + - -1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Output: general" + arguments: + - type: "string" + name: "--outTmpKeep" + description: "whether to keep the temporary files after STAR runs is finished\n\ + \n- None ... remove all temporary files\n- All ... keep all files" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outStd" + description: "which output will be directed to stdout (standard out)\n\n- Log\ + \ ... log messages\n- SAM ... alignments\ + \ in SAM format (which normally are output to Aligned.out.sam file), normal\ + \ standard output will go into Log.std.out\n- BAM_Unsorted ... alignments\ + \ in BAM format, unsorted. Requires --outSAMtype BAM Unsorted\n- BAM_SortedByCoordinate\ + \ ... alignments in BAM format, sorted by coordinate. Requires --outSAMtype\ + \ BAM SortedByCoordinate\n- BAM_Quant ... alignments to transcriptome\ + \ in BAM format, unsorted. Requires --quantMode TranscriptomeSAM" + info: null + example: + - "Log" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outReadsUnmapped" + description: "output of unmapped and partially mapped (i.e. mapped only one\ + \ mate of a paired end read) reads in separate file(s).\n\n- None ... no\ + \ output\n- Fastx ... output in separate fasta/fastq files, Unmapped.out.mate1/2" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outQSconversionAdd" + description: "add this number to the quality score (e.g. to convert from Illumina\ + \ to Sanger, use -31)" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outMultimapperOrder" + description: "order of multimapping alignments in the output files\n\n- Old_2.4\ + \ ... quasi-random order used before 2.5.0\n- Random \ + \ ... random order of alignments for each multi-mapper. Read mates (pairs)\ + \ are always adjacent, all alignment for each read stay together. This option\ + \ will become default in the future releases." + info: null + example: + - "Old_2.4" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Output: SAM and BAM" + arguments: + - type: "string" + name: "--outSAMtype" + description: "type of SAM/BAM output\n\n1st word:\n- BAM ... output BAM without\ + \ sorting\n- SAM ... output SAM without sorting\n- None ... no SAM/BAM output\n\ + 2nd, 3rd:\n- Unsorted ... standard unsorted\n- SortedByCoordinate\ + \ ... sorted by coordinate. This option will allocate extra memory for sorting\ + \ which can be specified by --limitBAMsortRAM." + info: null + example: + - "SAM" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--outSAMmode" + description: "mode of SAM output\n\n- None ... no SAM output\n- Full ... full\ + \ SAM output\n- NoQS ... full SAM but without quality scores" + info: null + example: + - "Full" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outSAMstrandField" + description: "Cufflinks-like strand field flag\n\n- None ... not used\n\ + - intronMotif ... strand derived from the intron motif. This option changes\ + \ the output alignments: reads with inconsistent and/or non-canonical introns\ + \ are filtered out." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outSAMattributes" + description: "a string of desired SAM attributes, in the order desired for the\ + \ output SAM. Tags can be listed in any combination/order.\n\n***Presets:\n\ + - None ... no attributes\n- Standard ... NH HI AS nM\n- All \ + \ ... NH HI AS nM NM MD jM jI MC ch\n***Alignment:\n- NH ...\ + \ number of loci the reads maps to: =1 for unique mappers, >1 for multimappers.\ + \ Standard SAM tag.\n- HI ... multiple alignment index, starts with\ + \ --outSAMattrIHstart (=1 by default). Standard SAM tag.\n- AS ...\ + \ local alignment score, +1/-1 for matches/mismateches, score* penalties for\ + \ indels and gaps. For PE reads, total score for two mates. Stadnard SAM tag.\n\ + - nM ... number of mismatches. For PE reads, sum over two mates.\n\ + - NM ... edit distance to the reference (number of mismatched + inserted\ + \ + deleted bases) for each mate. Standard SAM tag.\n- MD ... string\ + \ encoding mismatched and deleted reference bases (see standard SAM specifications).\ + \ Standard SAM tag.\n- jM ... intron motifs for all junctions (i.e.\ + \ N in CIGAR): 0: non-canonical; 1: GT/AG, 2: CT/AC, 3: GC/AG, 4: CT/GC, 5:\ + \ AT/AC, 6: GT/AT. If splice junctions database is used, and a junction is\ + \ annotated, 20 is added to its motif value.\n- jI ... start and\ + \ end of introns for all junctions (1-based).\n- XS ... alignment\ + \ strand according to --outSAMstrandField.\n- MC ... mate's CIGAR\ + \ string. Standard SAM tag.\n- ch ... marks all segment of all chimeric\ + \ alingments for --chimOutType WithinBAM output.\n- cN ... number\ + \ of bases clipped from the read ends: 5' and 3'\n***Variation:\n- vA \ + \ ... variant allele\n- vG ... genomic coordinate of the variant\ + \ overlapped by the read.\n- vW ... 1 - alignment passes WASP filtering;\ + \ 2,3,4,5,6,7 - alignment does not pass WASP filtering. Requires --waspOutputMode\ + \ SAMtag.\n***STARsolo:\n- CR CY UR UY ... sequences and quality scores of\ + \ cell barcodes and UMIs for the solo* demultiplexing.\n- GX GN ...\ + \ gene ID and gene name for unique-gene reads.\n- gx gn ... gene IDs\ + \ and gene names for unique- and multi-gene reads.\n- CB UB ... error-corrected\ + \ cell barcodes and UMIs for solo* demultiplexing. Requires --outSAMtype BAM\ + \ SortedByCoordinate.\n- sM ... assessment of CB and UMI.\n- sS \ + \ ... sequence of the entire barcode (CB,UMI,adapter).\n- sQ \ + \ ... quality of the entire barcode.\n***Unsupported/undocumented:\n-\ + \ ha ... haplotype (1/2) when mapping to the diploid genome. Requires\ + \ genome generated with --genomeTransformType Diploid .\n- rB ...\ + \ alignment block read/genomic coordinates.\n- vR ... read coordinate\ + \ of the variant." + info: null + example: + - "Standard" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--outSAMattrIHstart" + description: "start value for the IH attribute. 0 may be required by some downstream\ + \ software, such as Cufflinks or StringTie." + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outSAMunmapped" + description: "output of unmapped reads in the SAM format\n\n1st word:\n- None\ + \ ... no output\n- Within ... output unmapped reads within the main SAM\ + \ file (i.e. Aligned.out.sam)\n2nd word:\n- KeepPairs ... record unmapped\ + \ mate for each alignment, and, in case of unsorted output, keep it adjacent\ + \ to its mapped mate. Only affects multi-mapping reads." + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--outSAMorder" + description: "type of sorting for the SAM output\n\nPaired: one mate after the\ + \ other for all paired alignments\nPairedKeepInputOrder: one mate after the\ + \ other for all paired alignments, the order is kept the same as in the input\ + \ FASTQ files" + info: null + example: + - "Paired" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outSAMprimaryFlag" + description: "which alignments are considered primary - all others will be marked\ + \ with 0x100 bit in the FLAG\n\n- OneBestScore ... only one alignment with\ + \ the best score is primary\n- AllBestScore ... all alignments with the best\ + \ score are primary" + info: null + example: + - "OneBestScore" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outSAMreadID" + description: "read ID record type\n\n- Standard ... first word (until space)\ + \ from the FASTx read ID line, removing /1,/2 from the end\n- Number ...\ + \ read number (index) in the FASTx file" + info: null + example: + - "Standard" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outSAMmapqUnique" + description: "0 to 255: the MAPQ value for unique mappers" + info: null + example: + - 255 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outSAMflagOR" + description: "0 to 65535: sam FLAG will be bitwise OR'd with this value, i.e.\ + \ FLAG=FLAG | outSAMflagOR. This is applied after all flags have been set\ + \ by STAR, and after outSAMflagAND. Can be used to set specific bits that\ + \ are not set otherwise." + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outSAMflagAND" + description: "0 to 65535: sam FLAG will be bitwise AND'd with this value, i.e.\ + \ FLAG=FLAG & outSAMflagOR. This is applied after all flags have been set\ + \ by STAR, but before outSAMflagOR. Can be used to unset specific bits that\ + \ are not set otherwise." + info: null + example: + - 65535 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outSAMattrRGline" + description: "SAM/BAM read group line. The first word contains the read group\ + \ identifier and must start with \"ID:\", e.g. --outSAMattrRGline ID:xxx CN:yy\ + \ \"DS:z z z\".\n\nxxx will be added as RG tag to each output alignment. Any\ + \ spaces in the tag values have to be double quoted.\nComma separated RG lines\ + \ correspons to different (comma separated) input files in --readFilesIn.\ + \ Commas have to be surrounded by spaces, e.g.\n--outSAMattrRGline ID:xxx\ + \ , ID:zzz \"DS:z z\" , ID:yyy DS:yyyy" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--outSAMheaderHD" + description: "@HD (header) line of the SAM header" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--outSAMheaderPG" + description: "extra @PG (software) line of the SAM header (in addition to STAR)" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--outSAMheaderCommentFile" + description: "path to the file with @CO (comment) lines of the SAM header" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outSAMfilter" + description: "filter the output into main SAM/BAM files\n\n- KeepOnlyAddedReferences\ + \ ... only keep the reads for which all alignments are to the extra reference\ + \ sequences added with --genomeFastaFiles at the mapping stage.\n- KeepAllAddedReferences\ + \ ... keep all alignments to the extra reference sequences added with --genomeFastaFiles\ + \ at the mapping stage." + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--outSAMmultNmax" + description: "max number of multiple alignments for a read that will be output\ + \ to the SAM/BAM files. Note that if this value is not equal to -1, the top\ + \ scoring alignment will be output first\n\n- -1 ... all alignments (up to\ + \ --outFilterMultimapNmax) will be output" + info: null + example: + - -1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outSAMtlen" + description: "calculation method for the TLEN field in the SAM/BAM files\n\n\ + - 1 ... leftmost base of the (+)strand mate to rightmost base of the (-)mate.\ + \ (+)sign for the (+)strand mate\n- 2 ... leftmost base of any mate to rightmost\ + \ base of any mate. (+)sign for the mate with the leftmost base. This is different\ + \ from 1 for overlapping mates with protruding ends" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outBAMcompression" + description: "-1 to 10 BAM compression level, -1=default compression (6?),\ + \ 0=no compression, 10=maximum compression" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outBAMsortingThreadN" + description: ">=0: number of threads for BAM sorting. 0 will default to min(6,--runThreadN)." + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outBAMsortingBinsN" + description: ">0: number of genome bins for coordinate-sorting" + info: null + example: + - 50 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "BAM processing" + arguments: + - type: "string" + name: "--bamRemoveDuplicatesType" + description: "mark duplicates in the BAM file, for now only works with (i) sorted\ + \ BAM fed with inputBAMfile, and (ii) for paired-end alignments only\n\n-\ + \ - ... no duplicate removal/marking\n- UniqueIdentical\ + \ ... mark all multimappers, and duplicate unique mappers. The coordinates,\ + \ FLAG, CIGAR must be identical\n- UniqueIdenticalNotMulti ... mark duplicate\ + \ unique mappers but not multimappers." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--bamRemoveDuplicatesMate2basesN" + description: "number of bases from the 5' of mate 2 to use in collapsing (e.g.\ + \ for RAMPAGE)" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Output Wiggle" + arguments: + - type: "string" + name: "--outWigType" + description: "type of signal output, e.g. \"bedGraph\" OR \"bedGraph read1_5p\"\ + . Requires sorted BAM: --outSAMtype BAM SortedByCoordinate .\n\n1st word:\n\ + - None ... no signal output\n- bedGraph ... bedGraph format\n- wiggle\ + \ ... wiggle format\n2nd word:\n- read1_5p ... signal from only 5' of\ + \ the 1st read, useful for CAGE/RAMPAGE etc\n- read2 ... signal from\ + \ only 2nd read" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--outWigStrand" + description: "strandedness of wiggle/bedGraph output\n\n- Stranded ... separate\ + \ strands, str1 and str2\n- Unstranded ... collapsed strands" + info: null + example: + - "Stranded" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outWigReferencesPrefix" + description: "prefix matching reference names to include in the output wiggle\ + \ file, e.g. \"chr\", default \"-\" - include all references" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outWigNorm" + description: "type of normalization for the signal\n\n- RPM ... reads per\ + \ million of mapped reads\n- None ... no normalization, \"raw\" counts" + info: null + example: + - "RPM" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Output Filtering" + arguments: + - type: "string" + name: "--outFilterType" + description: "type of filtering\n\n- Normal ... standard filtering using only\ + \ current alignment\n- BySJout ... keep only those reads that contain junctions\ + \ that passed filtering into SJ.out.tab" + info: null + example: + - "Normal" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outFilterMultimapScoreRange" + description: "the score range below the maximum score for multimapping alignments" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outFilterMultimapNmax" + description: "maximum number of loci the read is allowed to map to. Alignments\ + \ (all of them) will be output only if the read maps to no more loci than\ + \ this value.\n\nOtherwise no alignments will be output, and the read will\ + \ be counted as \"mapped to too many loci\" in the Log.final.out ." + info: null + example: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outFilterMismatchNmax" + description: "alignment will be output only if it has no more mismatches than\ + \ this value." + info: null + example: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--outFilterMismatchNoverLmax" + description: "alignment will be output only if its ratio of mismatches to *mapped*\ + \ length is less than or equal to this value." + info: null + example: + - 0.3 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--outFilterMismatchNoverReadLmax" + description: "alignment will be output only if its ratio of mismatches to *read*\ + \ length is less than or equal to this value." + info: null + example: + - 1.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outFilterScoreMin" + description: "alignment will be output only if its score is higher than or equal\ + \ to this value." + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--outFilterScoreMinOverLread" + description: "same as outFilterScoreMin, but normalized to read length (sum\ + \ of mates' lengths for paired-end reads)" + info: null + example: + - 0.66 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outFilterMatchNmin" + description: "alignment will be output only if the number of matched bases is\ + \ higher than or equal to this value." + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--outFilterMatchNminOverLread" + description: "sam as outFilterMatchNmin, but normalized to the read length (sum\ + \ of mates' lengths for paired-end reads)." + info: null + example: + - 0.66 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outFilterIntronMotifs" + description: "filter alignment using their motifs\n\n- None \ + \ ... no filtering\n- RemoveNoncanonical ... filter\ + \ out alignments that contain non-canonical junctions\n- RemoveNoncanonicalUnannotated\ + \ ... filter out alignments that contain non-canonical unannotated junctions\ + \ when using annotated splice junctions database. The annotated non-canonical\ + \ junctions will be kept." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outFilterIntronStrands" + description: "filter alignments\n\n- RemoveInconsistentStrands ... remove\ + \ alignments that have junctions with inconsistent strands\n- None \ + \ ... no filtering" + info: null + example: + - "RemoveInconsistentStrands" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Output splice junctions (SJ.out.tab)" + arguments: + - type: "string" + name: "--outSJtype" + description: "type of splice junction output\n\n- Standard ... standard SJ.out.tab\ + \ output\n- None ... no splice junction output" + info: null + example: + - "Standard" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Output Filtering: Splice Junctions" + arguments: + - type: "string" + name: "--outSJfilterReads" + description: "which reads to consider for collapsed splice junctions output\n\ + \n- All ... all reads, unique- and multi-mappers\n- Unique ... uniquely\ + \ mapping reads only" + info: null + example: + - "All" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outSJfilterOverhangMin" + description: "minimum overhang length for splice junctions on both sides for:\ + \ (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC\ + \ motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\n\ + does not apply to annotated junctions" + info: null + example: + - 30 + - 12 + - 12 + - 12 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--outSJfilterCountUniqueMin" + description: "minimum uniquely mapping read count per junction for: (1) non-canonical\ + \ motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC\ + \ and GT/AT motif. -1 means no output for that motif\n\nJunctions are output\ + \ if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin conditions\ + \ are satisfied\ndoes not apply to annotated junctions" + info: null + example: + - 3 + - 1 + - 1 + - 1 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--outSJfilterCountTotalMin" + description: "minimum total (multi-mapping+unique) read count per junction for:\ + \ (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC\ + \ motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\n\ + Junctions are output if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin\ + \ conditions are satisfied\ndoes not apply to annotated junctions" + info: null + example: + - 3 + - 1 + - 1 + - 1 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--outSJfilterDistToOtherSJmin" + description: "minimum allowed distance to other junctions' donor/acceptor\n\n\ + does not apply to annotated junctions" + info: null + example: + - 10 + - 0 + - 5 + - 10 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--outSJfilterIntronMaxVsReadN" + description: "maximum gap allowed for junctions supported by 1,2,3,,,N reads\n\ + \ni.e. by default junctions supported by 1 read can have gaps <=50000b, by\ + \ 2 reads: <=100000b, by 3 reads: <=200000. by >=4 reads any gap <=alignIntronMax\n\ + does not apply to annotated junctions" + info: null + example: + - 50000 + - 100000 + - 200000 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - name: "Scoring" + arguments: + - type: "integer" + name: "--scoreGap" + description: "splice junction penalty (independent on intron motif)" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--scoreGapNoncan" + description: "non-canonical junction penalty (in addition to scoreGap)" + info: null + example: + - -8 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--scoreGapGCAG" + description: "GC/AG and CT/GC junction penalty (in addition to scoreGap)" + info: null + example: + - -4 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--scoreGapATAC" + description: "AT/AC and GT/AT junction penalty (in addition to scoreGap)" + info: null + example: + - -8 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--scoreGenomicLengthLog2scale" + description: "extra score logarithmically scaled with genomic length of the\ + \ alignment: scoreGenomicLengthLog2scale*log2(genomicLength)" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--scoreDelOpen" + description: "deletion open penalty" + info: null + example: + - -2 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--scoreDelBase" + description: "deletion extension penalty per base (in addition to scoreDelOpen)" + info: null + example: + - -2 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--scoreInsOpen" + description: "insertion open penalty" + info: null + example: + - -2 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--scoreInsBase" + description: "insertion extension penalty per base (in addition to scoreInsOpen)" + info: null + example: + - -2 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--scoreStitchSJshift" + description: "maximum score reduction while searching for SJ boundaries in the\ + \ stitching step" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Alignments and Seeding" + arguments: + - type: "integer" + name: "--seedSearchStartLmax" + description: "defines the search start point through the read - the read is\ + \ split into pieces no longer than this value" + info: null + example: + - 50 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--seedSearchStartLmaxOverLread" + description: "seedSearchStartLmax normalized to read length (sum of mates' lengths\ + \ for paired-end reads)" + info: null + example: + - 1.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--seedSearchLmax" + description: "defines the maximum length of the seeds, if =0 seed length is\ + \ not limited" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--seedMultimapNmax" + description: "only pieces that map fewer than this value are utilized in the\ + \ stitching procedure" + info: null + example: + - 10000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--seedPerReadNmax" + description: "max number of seeds per read" + info: null + example: + - 1000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--seedPerWindowNmax" + description: "max number of seeds per window" + info: null + example: + - 50 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--seedNoneLociPerWindow" + description: "max number of one seed loci per window" + info: null + example: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--seedSplitMin" + description: "min length of the seed sequences split by Ns or mate gap" + info: null + example: + - 12 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--seedMapMin" + description: "min length of seeds to be mapped" + info: null + example: + - 5 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--alignIntronMin" + description: "minimum intron size, genomic gap is considered intron if its length>=alignIntronMin,\ + \ otherwise it is considered Deletion" + info: null + example: + - 21 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--alignIntronMax" + description: "maximum intron size, if 0, max intron size will be determined\ + \ by (2^winBinNbits)*winAnchorDistNbins" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--alignMatesGapMax" + description: "maximum gap between two mates, if 0, max intron gap will be determined\ + \ by (2^winBinNbits)*winAnchorDistNbins" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--alignSJoverhangMin" + description: "minimum overhang (i.e. block size) for spliced alignments" + info: null + example: + - 5 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--alignSJstitchMismatchNmax" + description: "maximum number of mismatches for stitching of the splice junctions\ + \ (-1: no limit).\n\n(1) non-canonical motifs, (2) GT/AG and CT/AC motif,\ + \ (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif." + info: null + example: + - 0 + - -1 + - 0 + - 0 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--alignSJDBoverhangMin" + description: "minimum overhang (i.e. block size) for annotated (sjdb) spliced\ + \ alignments" + info: null + example: + - 3 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--alignSplicedMateMapLmin" + description: "minimum mapped length for a read mate that is spliced" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--alignSplicedMateMapLminOverLmate" + description: "alignSplicedMateMapLmin normalized to mate length" + info: null + example: + - 0.66 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--alignWindowsPerReadNmax" + description: "max number of windows per read" + info: null + example: + - 10000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--alignTranscriptsPerWindowNmax" + description: "max number of transcripts per window" + info: null + example: + - 100 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--alignTranscriptsPerReadNmax" + description: "max number of different alignments per read to consider" + info: null + example: + - 10000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--alignEndsType" + description: "type of read ends alignment\n\n- Local ... standard\ + \ local alignment with soft-clipping allowed\n- EndToEnd ... force\ + \ end-to-end read alignment, do not soft-clip\n- Extend5pOfRead1 ... fully\ + \ extend only the 5p of the read1, all other ends: local alignment\n- Extend5pOfReads12\ + \ ... fully extend only the 5p of the both read1 and read2, all other ends:\ + \ local alignment" + info: null + example: + - "Local" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--alignEndsProtrude" + description: "allow protrusion of alignment ends, i.e. start (end) of the +strand\ + \ mate downstream of the start (end) of the -strand mate\n\n1st word: int:\ + \ maximum number of protrusion bases allowed\n2nd word: string:\n- \ + \ ConcordantPair ... report alignments with non-zero protrusion\ + \ as concordant pairs\n- DiscordantPair ... report alignments\ + \ with non-zero protrusion as discordant pairs" + info: null + example: + - "0 ConcordantPair" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--alignSoftClipAtReferenceEnds" + description: "allow the soft-clipping of the alignments past the end of the\ + \ chromosomes\n\n- Yes ... allow\n- No ... prohibit, useful for compatibility\ + \ with Cufflinks" + info: null + example: + - "Yes" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--alignInsertionFlush" + description: "how to flush ambiguous insertion positions\n\n- None ... insertions\ + \ are not flushed\n- Right ... insertions are flushed to the right" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Paired-End reads" + arguments: + - type: "integer" + name: "--peOverlapNbasesMin" + description: "minimum number of overlapping bases to trigger mates merging and\ + \ realignment. Specify >0 value to switch on the \"merginf of overlapping\ + \ mates\" algorithm." + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--peOverlapMMp" + description: "maximum proportion of mismatched bases in the overlap area" + info: null + example: + - 0.01 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Windows, Anchors, Binning" + arguments: + - type: "integer" + name: "--winAnchorMultimapNmax" + description: "max number of loci anchors are allowed to map to" + info: null + example: + - 50 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--winBinNbits" + description: "=log2(winBin), where winBin is the size of the bin for the windows/clustering,\ + \ each window will occupy an integer number of bins." + info: null + example: + - 16 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--winAnchorDistNbins" + description: "max number of bins between two anchors that allows aggregation\ + \ of anchors into one window" + info: null + example: + - 9 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--winFlankNbins" + description: "log2(winFlank), where win Flank is the size of the left and right\ + \ flanking regions for each window" + info: null + example: + - 4 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--winReadCoverageRelativeMin" + description: "minimum relative coverage of the read sequence by the seeds in\ + \ a window, for STARlong algorithm only." + info: null + example: + - 0.5 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--winReadCoverageBasesMin" + description: "minimum number of bases covered by the seeds in a window , for\ + \ STARlong algorithm only." + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Chimeric Alignments" + arguments: + - type: "string" + name: "--chimOutType" + description: "type of chimeric output\n\n- Junctions ... Chimeric.out.junction\n\ + - SeparateSAMold ... output old SAM into separate Chimeric.out.sam file\n\ + - WithinBAM ... output into main aligned BAM files (Aligned.*.bam)\n\ + - WithinBAM HardClip ... (default) hard-clipping in the CIGAR for supplemental\ + \ chimeric alignments (default if no 2nd word is present)\n- WithinBAM SoftClip\ + \ ... soft-clipping in the CIGAR for supplemental chimeric alignments" + info: null + example: + - "Junctions" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--chimSegmentMin" + description: "minimum length of chimeric segment length, if ==0, no chimeric\ + \ output" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimScoreMin" + description: "minimum total (summed) score of the chimeric segments" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimScoreDropMax" + description: "max drop (difference) of chimeric score (the sum of scores of\ + \ all chimeric segments) from the read length" + info: null + example: + - 20 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimScoreSeparation" + description: "minimum difference (separation) between the best chimeric score\ + \ and the next one" + info: null + example: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimScoreJunctionNonGTAG" + description: "penalty for a non-GT/AG chimeric junction" + info: null + example: + - -1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimJunctionOverhangMin" + description: "minimum overhang for a chimeric junction" + info: null + example: + - 20 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimSegmentReadGapMax" + description: "maximum gap in the read sequence between chimeric segments" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--chimFilter" + description: "different filters for chimeric alignments\n\n- None ... no filtering\n\ + - banGenomicN ... Ns are not allowed in the genome sequence around the chimeric\ + \ junction" + info: null + example: + - "banGenomicN" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--chimMainSegmentMultNmax" + description: "maximum number of multi-alignments for the main chimeric segment.\ + \ =1 will prohibit multimapping main segments." + info: null + example: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimMultimapNmax" + description: "maximum number of chimeric multi-alignments\n\n- 0 ... use the\ + \ old scheme for chimeric detection which only considered unique alignments" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimMultimapScoreRange" + description: "the score range for multi-mapping chimeras below the best chimeric\ + \ score. Only works with --chimMultimapNmax > 1" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimNonchimScoreDropMin" + description: "to trigger chimeric detection, the drop in the best non-chimeric\ + \ alignment score with respect to the read length has to be greater than this\ + \ value" + info: null + example: + - 20 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimOutJunctionFormat" + description: "formatting type for the Chimeric.out.junction file\n\n- 0 ...\ + \ no comment lines/headers\n- 1 ... comment lines at the end of the file:\ + \ command line and Nreads: total, unique/multi-mapping" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Quantification of Annotations" + arguments: + - type: "string" + name: "--quantMode" + description: "types of quantification requested\n\n- - ... none\n\ + - TranscriptomeSAM ... output SAM/BAM alignments to transcriptome into a separate\ + \ file\n- GeneCounts ... count reads per gene" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--quantTranscriptomeBAMcompression" + description: "-2 to 10 transcriptome BAM compression level\n\n- -2 ... no\ + \ BAM output\n- -1 ... default compression (6?)\n- 0 ... no compression\n\ + - 10 ... maximum compression" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--quantTranscriptomeBan" + description: "prohibit various alignment type\n\n- IndelSoftclipSingleend ...\ + \ prohibit indels, soft clipping and single-end alignments - compatible with\ + \ RSEM\n- Singleend ... prohibit single-end alignments" + info: null + example: + - "IndelSoftclipSingleend" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "2-pass Mapping" + arguments: + - type: "string" + name: "--twopassMode" + description: "2-pass mapping mode.\n\n- None ... 1-pass mapping\n- Basic\ + \ ... basic 2-pass mapping, with all 1st pass junctions inserted into\ + \ the genome indices on the fly" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--twopass1readsN" + description: "number of reads to process for the 1st step. Use very large number\ + \ (or default -1) to map all reads in the first step." + info: null + example: + - -1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "WASP parameters" + arguments: + - type: "string" + name: "--waspOutputMode" + description: "WASP allele-specific output type. This is re-implementation of\ + \ the original WASP mappability filtering by Bryce van de Geijn, Graham McVicker,\ + \ Yoav Gilad & Jonathan K Pritchard. Please cite the original WASP paper:\ + \ Nature Methods 12, 1061-1063 (2015), https://www.nature.com/articles/nmeth.3582\ + \ .\n\n- SAMtag ... add WASP tags to the alignments that pass WASP filtering" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "STARsolo (single cell RNA-seq) parameters" + arguments: + - type: "string" + name: "--soloType" + description: "type of single-cell RNA-seq\n\n- CB_UMI_Simple ... (a.k.a. Droplet)\ + \ one UMI and one Cell Barcode of fixed length in read2, e.g. Drop-seq and\ + \ 10X Chromium.\n- CB_UMI_Complex ... multiple Cell Barcodes of varying length,\ + \ one UMI of fixed length and one adapter sequence of fixed length are allowed\ + \ in read2 only (e.g. inDrop, ddSeq).\n- CB_samTagOut ... output Cell Barcode\ + \ as CR and/or CB SAm tag. No UMI counting. --readFilesIn cDNA_read1 [cDNA_read2\ + \ if paired-end] CellBarcode_read . Requires --outSAMtype BAM Unsorted [and/or\ + \ SortedByCoordinate]\n- SmartSeq ... Smart-seq: each cell in a separate\ + \ FASTQ (paired- or single-end), barcodes are corresponding read-groups, no\ + \ UMI sequences, alignments deduplicated according to alignment start and\ + \ end (after extending soft-clipped bases)" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloCBwhitelist" + description: "file(s) with whitelist(s) of cell barcodes. Only --soloType CB_UMI_Complex\ + \ allows more than one whitelist file.\n\n- None ... no whitelist:\ + \ all cell barcodes are allowed" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--soloCBstart" + description: "cell barcode start base" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--soloCBlen" + description: "cell barcode length" + info: null + example: + - 16 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--soloUMIstart" + description: "UMI start base" + info: null + example: + - 17 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--soloUMIlen" + description: "UMI length" + info: null + example: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--soloBarcodeReadLength" + description: "length of the barcode read\n\n- 1 ... equal to sum of soloCBlen+soloUMIlen\n\ + - 0 ... not defined, do not check" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--soloBarcodeMate" + description: "identifies which read mate contains the barcode (CB+UMI) sequence\n\ + \n- 0 ... barcode sequence is on separate read, which should always be the\ + \ last file in the --readFilesIn listed\n- 1 ... barcode sequence is a part\ + \ of mate 1\n- 2 ... barcode sequence is a part of mate 2" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--soloCBposition" + description: "position of Cell Barcode(s) on the barcode read.\n\nPresently\ + \ only works with --soloType CB_UMI_Complex, and barcodes are assumed to be\ + \ on Read2.\nFormat for each barcode: startAnchor_startPosition_endAnchor_endPosition\n\ + start(end)Anchor defines the Anchor Base for the CB: 0: read start; 1: read\ + \ end; 2: adapter start; 3: adapter end\nstart(end)Position is the 0-based\ + \ position with of the CB start(end) with respect to the Anchor Base\nString\ + \ for different barcodes are separated by space.\nExample: inDrop (Zilionis\ + \ et al, Nat. Protocols, 2017):\n--soloCBposition 0_0_2_-1 3_1_3_8" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloUMIposition" + description: "position of the UMI on the barcode read, same as soloCBposition\n\ + \nExample: inDrop (Zilionis et al, Nat. Protocols, 2017):\n--soloCBposition\ + \ 3_9_3_14" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--soloAdapterSequence" + description: "adapter sequence to anchor barcodes. Only one adapter sequence\ + \ is allowed." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--soloAdapterMismatchesNmax" + description: "maximum number of mismatches allowed in adapter sequence." + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--soloCBmatchWLtype" + description: "matching the Cell Barcodes to the WhiteList\n\n- Exact \ + \ ... only exact matches allowed\n- 1MM \ + \ ... only one match in whitelist with 1 mismatched base allowed.\ + \ Allowed CBs have to have at least one read with exact match.\n- 1MM_multi\ + \ ... multiple matches in whitelist with 1 mismatched\ + \ base allowed, posterior probability calculation is used choose one of the\ + \ matches.\nAllowed CBs have to have at least one read with exact match. This\ + \ option matches best with CellRanger 2.2.0\n- 1MM_multi_pseudocounts \ + \ ... same as 1MM_Multi, but pseudocounts of 1 are added to all whitelist\ + \ barcodes.\n- 1MM_multi_Nbase_pseudocounts ... same as 1MM_multi_pseudocounts,\ + \ multimatching to WL is allowed for CBs with N-bases. This option matches\ + \ best with CellRanger >= 3.0.0\n- EditDist_2 ... allow\ + \ up to edit distance of 3 fpr each of the barcodes. May include one deletion\ + \ + one insertion. Only works with --soloType CB_UMI_Complex. Matches to multiple\ + \ passlist barcdoes are not allowed. Similar to ParseBio Split-seq pipeline." + info: null + example: + - "1MM_multi" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--soloInputSAMattrBarcodeSeq" + description: "when inputting reads from a SAM file (--readsFileType SAM SE/PE),\ + \ these SAM attributes mark the barcode sequence (in proper order).\n\nFor\ + \ instance, for 10X CellRanger or STARsolo BAMs, use --soloInputSAMattrBarcodeSeq\ + \ CR UR .\nThis parameter is required when running STARsolo with input from\ + \ SAM." + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloInputSAMattrBarcodeQual" + description: "when inputting reads from a SAM file (--readsFileType SAM SE/PE),\ + \ these SAM attributes mark the barcode qualities (in proper order).\n\nFor\ + \ instance, for 10X CellRanger or STARsolo BAMs, use --soloInputSAMattrBarcodeQual\ + \ CY UY .\nIf this parameter is '-' (default), the quality 'H' will be assigned\ + \ to all bases." + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloStrand" + description: "strandedness of the solo libraries:\n\n- Unstranded ... no strand\ + \ information\n- Forward ... read strand same as the original RNA molecule\n\ + - Reverse ... read strand opposite to the original RNA molecule" + info: null + example: + - "Forward" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--soloFeatures" + description: "genomic features for which the UMI counts per Cell Barcode are\ + \ collected\n\n- Gene ... genes: reads match the gene transcript\n\ + - SJ ... splice junctions: reported in SJ.out.tab\n- GeneFull\ + \ ... full gene (pre-mRNA): count all reads overlapping genes' exons\ + \ and introns\n- GeneFull_ExonOverIntron ... full gene (pre-mRNA): count all\ + \ reads overlapping genes' exons and introns: prioritize 100% overlap with\ + \ exons\n- GeneFull_Ex50pAS ... full gene (pre-RNA): count all reads\ + \ overlapping genes' exons and introns: prioritize >50% overlap with exons.\ + \ Do not count reads with 100% exonic overlap in the antisense direction." + info: null + example: + - "Gene" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloMultiMappers" + description: "counting method for reads mapping to multiple genes\n\n- Unique\ + \ ... count only reads that map to unique genes\n- Uniform ... uniformly\ + \ distribute multi-genic UMIs to all genes\n- Rescue ... distribute UMIs\ + \ proportionally to unique+uniform counts (~ first iteration of EM)\n- PropUnique\ + \ ... distribute UMIs proportionally to unique mappers, if present, and uniformly\ + \ if not.\n- EM ... multi-gene UMIs are distributed using Expectation\ + \ Maximization algorithm" + info: null + example: + - "Unique" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloUMIdedup" + description: "type of UMI deduplication (collapsing) algorithm\n\n- 1MM_All\ + \ ... all UMIs with 1 mismatch distance to each other\ + \ are collapsed (i.e. counted once).\n- 1MM_Directional_UMItools ... follows\ + \ the \"directional\" method from the UMI-tools by Smith, Heger and Sudbery\ + \ (Genome Research 2017).\n- 1MM_Directional ... same as 1MM_Directional_UMItools,\ + \ but with more stringent criteria for duplicate UMIs\n- Exact \ + \ ... only exactly matching UMIs are collapsed.\n- NoDedup \ + \ ... no deduplication of UMIs, count all reads.\n- 1MM_CR\ + \ ... CellRanger2-4 algorithm for 1MM UMI collapsing." + info: null + example: + - "1MM_All" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloUMIfiltering" + description: "type of UMI filtering (for reads uniquely mapping to genes)\n\n\ + - - ... basic filtering: remove UMIs with N and homopolymers\ + \ (similar to CellRanger 2.2.0).\n- MultiGeneUMI ... basic + remove\ + \ lower-count UMIs that map to more than one gene.\n- MultiGeneUMI_All ...\ + \ basic + remove all UMIs that map to more than one gene.\n- MultiGeneUMI_CR\ + \ ... basic + remove lower-count UMIs that map to more than one gene, matching\ + \ CellRanger > 3.0.0 .\nOnly works with --soloUMIdedup 1MM_CR" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloOutFileNames" + description: "file names for STARsolo output:\n\nfile_name_prefix gene_names\ + \ barcode_sequences cell_feature_count_matrix" + info: null + example: + - "Solo.out/" + - "features.tsv" + - "barcodes.tsv" + - "matrix.mtx" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloCellFilter" + description: "cell filtering type and parameters\n\n- None ... do\ + \ not output filtered cells\n- TopCells ... only report top cells by\ + \ UMI count, followed by the exact number of cells\n- CellRanger2.2 ...\ + \ simple filtering of CellRanger 2.2.\nCan be followed by numbers: number\ + \ of expected cells, robust maximum percentile for UMI count, maximum to minimum\ + \ ratio for UMI count\nThe harcoded values are from CellRanger: nExpectedCells=3000;\ + \ maxPercentile=0.99; maxMinRatio=10\n- EmptyDrops_CR ... EmptyDrops filtering\ + \ in CellRanger flavor. Please cite the original EmptyDrops paper: A.T.L Lun\ + \ et al, Genome Biology, 20, 63 (2019): https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1662-y\n\ + Can be followed by 10 numeric parameters: nExpectedCells maxPercentile\ + \ maxMinRatio indMin indMax umiMin umiMinFracMedian candMaxN \ + \ FDR simN\nThe harcoded values are from CellRanger: 3000 \ + \ 0.99 10 45000 90000 500 0.01\ + \ 20000 0.01 10000" + info: null + example: + - "CellRanger2.2" + - "3000" + - "0.99" + - "10" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloOutFormatFeaturesGeneField3" + description: "field 3 in the Gene features.tsv file. If \"-\", then no 3rd field\ + \ is output." + info: null + example: + - "Gene Expression" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloCellReadStats" + description: "Output reads statistics for each CB\n\n- Standard ... standard\ + \ output" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "../star_align/script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Align fastq files using STAR." + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/cellranger_tiny_fastq" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "docker" + env: + - "STAR_VERSION 2.7.3a" + - "PACKAGES gcc g++ make wget zlib1g-dev unzip" + - type: "docker" + run: + - "apt-get update && \\\n apt-get install -y --no-install-recommends ${PACKAGES}\ + \ && \\\n cd /tmp && \\\n wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip\ + \ && \\\n unzip ${STAR_VERSION}.zip && \\\n cd STAR-${STAR_VERSION}/source\ + \ && \\\n make STARstatic CXXFLAGS_SIMD=-std=c++11 && \\\n cp STAR /usr/local/bin\ + \ && \\\n cd / && \\\n rm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip\ + \ && \\\n apt-get --purge autoremove -y ${PACKAGES} && \\\n apt-get clean\n" + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highmem" + - "highcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/mapping/star_align_v273a/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/mapping/star_align_v273a" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/mapping/star_align_v273a/star_align_v273a" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/mapping/star_align_v273a/setup_logger.py b/target/docker/mapping/star_align_v273a/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/docker/mapping/star_align_v273a/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/docker/mapping/star_align_v273a/star_align_v273a b/target/docker/mapping/star_align_v273a/star_align_v273a new file mode 100755 index 00000000000..e4ecaa46e27 --- /dev/null +++ b/target/docker/mapping/star_align_v273a/star_align_v273a @@ -0,0 +1,5713 @@ +#!/usr/bin/env bash + +# star_align_v273a 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Angela Oliveira Pisco (author) +# * Robrecht Cannoodt (author, maintainer) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="star_align_v273a" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "star_align_v273a 0.12.4" + echo "" + echo "Align fastq files using STAR." + echo "" + echo "Input/Output:" + echo " --readFilesIn, --input" + echo " type: file, required parameter, multiple values allowed, file must exist" + echo " example:" + echo "mysample_S1_L001_R1_001.fastq.gz;mysample_S1_L001_R2_001.fastq.gz" + echo " The FASTQ files to be analyzed. Corresponds to the --readFilesIn in the" + echo " STAR command." + echo "" + echo " --genomeDir, --reference" + echo " type: file, required parameter, file must exist" + echo " example: /path/to/reference" + echo " Path to the reference built by star_build_reference. Corresponds to the" + echo " --genomeDir in the STAR command." + echo "" + echo " --outFileNamePrefix, --output" + echo " type: file, required parameter, output, file must exist" + echo " example: /path/to/foo" + echo " Path to output directory. Corresponds to the --outFileNamePrefix in the" + echo " STAR command." + echo "" + echo "Run Parameters:" + echo " --runRNGseed" + echo " type: integer" + echo " example: 777" + echo " random number generator seed." + echo "" + echo "Genome Parameters:" + echo " --genomeLoad" + echo " type: string" + echo " example: NoSharedMemory" + echo " mode of shared memory usage for the genome files. Only used with" + echo " --runMode alignReads." + echo " - LoadAndKeep ... load genome into shared and keep it in memory" + echo " after run" + echo " - LoadAndRemove ... load genome into shared but remove it after run" + echo " - LoadAndExit ... load genome into shared memory and exit, keeping" + echo " the genome in memory for future runs" + echo " - Remove ... do not map anything, just remove loaded genome" + echo " from memory" + echo " - NoSharedMemory ... do not use shared memory, each job will have its" + echo " own private copy of the genome" + echo "" + echo " --genomeFastaFiles" + echo " type: file, multiple values allowed, file must exist" + echo " path(s) to the fasta files with the genome sequences, separated by" + echo " spaces. These files should be plain text FASTA files, they *cannot* be" + echo " zipped." + echo " Required for the genome generation (--runMode genomeGenerate). Can also" + echo " be used in the mapping (--runMode alignReads) to add extra (new)" + echo " sequences to the genome (e.g. spike-ins)." + echo "" + echo " --genomeFileSizes" + echo " type: integer, multiple values allowed" + echo " example: 0" + echo " genome files exact sizes in bytes. Typically, this should not be defined" + echo " by the user." + echo "" + echo " --genomeTransformOutput" + echo " type: string, multiple values allowed" + echo " which output to transform back to original genome" + echo " - SAM ... SAM/BAM alignments" + echo " - SJ ... splice junctions (SJ.out.tab)" + echo " - None ... no transformation of the output" + echo "" + echo " --genomeChrSetMitochondrial" + echo " type: string, multiple values allowed" + echo " example: chrM;M;MT" + echo " names of the mitochondrial chromosomes. Presently only used for STARsolo" + echo " statistics output/" + echo "" + echo "Splice Junctions Database:" + echo " --sjdbFileChrStartEnd" + echo " type: string, multiple values allowed" + echo " path to the files with genomic coordinates (chr start end" + echo " strand) for the splice junction introns. Multiple files can be" + echo " supplied and will be concatenated." + echo "" + echo " --sjdbGTFfile" + echo " type: file, file must exist" + echo " path to the GTF file with annotations" + echo "" + echo " --sjdbGTFchrPrefix" + echo " type: string" + echo " prefix for chromosome names in a GTF file (e.g. 'chr' for using ENSMEBL" + echo " annotations with UCSC genomes)" + echo "" + echo " --sjdbGTFfeatureExon" + echo " type: string" + echo " example: exon" + echo " feature type in GTF file to be used as exons for building transcripts" + echo "" + echo " --sjdbGTFtagExonParentTranscript" + echo " type: string" + echo " example: transcript_id" + echo " GTF attribute name for parent transcript ID (default \"transcript_id\"" + echo " works for GTF files)" + echo "" + echo " --sjdbGTFtagExonParentGene" + echo " type: string" + echo " example: gene_id" + echo " GTF attribute name for parent gene ID (default \"gene_id\" works for GTF" + echo " files)" + echo "" + echo " --sjdbGTFtagExonParentGeneName" + echo " type: string, multiple values allowed" + echo " example: gene_name" + echo " GTF attribute name for parent gene name" + echo "" + echo " --sjdbGTFtagExonParentGeneType" + echo " type: string, multiple values allowed" + echo " example: gene_type;gene_biotype" + echo " GTF attribute name for parent gene type" + echo "" + echo " --sjdbOverhang" + echo " type: integer" + echo " example: 100" + echo " length of the donor/acceptor sequence on each side of the junctions," + echo " ideally = (mate_length - 1)" + echo "" + echo " --sjdbScore" + echo " type: integer" + echo " example: 2" + echo " extra alignment score for alignments that cross database junctions" + echo "" + echo " --sjdbInsertSave" + echo " type: string" + echo " example: Basic" + echo " which files to save when sjdb junctions are inserted on the fly at the" + echo " mapping step" + echo " - Basic ... only small junction / transcript files" + echo " - All ... all files including big Genome, SA and SAindex - this will" + echo " create a complete genome directory" + echo "" + echo "Variation parameters:" + echo " --varVCFfile" + echo " type: string" + echo " path to the VCF file that contains variation data. The 10th column" + echo " should contain the genotype information, e.g. 0/1" + echo "" + echo "Read Parameters:" + echo " --readFilesType" + echo " type: string" + echo " example: Fastx" + echo " format of input read files" + echo " - Fastx ... FASTA or FASTQ" + echo " - SAM SE ... SAM or BAM single-end reads; for BAM use" + echo " --readFilesCommand samtools view" + echo " - SAM PE ... SAM or BAM paired-end reads; for BAM use" + echo " --readFilesCommand samtools view" + echo "" + echo " --readFilesSAMattrKeep" + echo " type: string, multiple values allowed" + echo " example: All" + echo " for --readFilesType SAM SE/PE, which SAM tags to keep in the output BAM," + echo " e.g.: --readFilesSAMtagsKeep RG PL" + echo " - All ... keep all tags" + echo " - None ... do not keep any tags" + echo "" + echo " --readFilesManifest" + echo " type: file, file must exist" + echo " path to the \"manifest\" file with the names of read files. The manifest" + echo " file should contain 3 tab-separated columns:" + echo " paired-end reads: read1_file_name \$tab\$ read2_file_name \$tab\$" + echo " read_group_line." + echo " single-end reads: read1_file_name \$tab\$ - \$tab\$" + echo " read_group_line." + echo " Spaces, but not tabs are allowed in file names." + echo " If read_group_line does not start with ID:, it can only contain one ID" + echo " field, and ID: will be added to it." + echo " If read_group_line starts with ID:, it can contain several fields" + echo " separated by \$tab\$, and all fields will be be copied verbatim into SAM" + echo " @RG header line." + echo "" + echo " --readFilesPrefix" + echo " type: string" + echo " prefix for the read files names, i.e. it will be added in front of the" + echo " strings in --readFilesIn" + echo "" + echo " --readFilesCommand" + echo " type: string, multiple values allowed" + echo " command line to execute for each of the input file. This command should" + echo " generate FASTA or FASTQ text and send it to stdout" + echo " For example: zcat - to uncompress .gz files, bzcat - to uncompress .bz2" + echo " files, etc." + echo "" + echo " --readMapNumber" + echo " type: integer" + echo " example: -1" + echo " number of reads to map from the beginning of the file" + echo " -1: map all reads" + echo "" + echo " --readMatesLengthsIn" + echo " type: string" + echo " example: NotEqual" + echo " Equal/NotEqual - lengths of names,sequences,qualities for both mates are" + echo " the same / not the same. NotEqual is safe in all situations." + echo "" + echo " --readNameSeparator" + echo " type: string, multiple values allowed" + echo " example: /" + echo " character(s) separating the part of the read names that will be trimmed" + echo " in output (read name after space is always trimmed)" + echo "" + echo " --readQualityScoreBase" + echo " type: integer" + echo " example: 33" + echo " number to be subtracted from the ASCII code to get Phred quality score" + echo "" + echo "Read Clipping:" + echo " --clipAdapterType" + echo " type: string" + echo " example: Hamming" + echo " adapter clipping type" + echo " - Hamming ... adapter clipping based on Hamming distance, with the" + echo " number of mismatches controlled by --clip5pAdapterMMp" + echo " - CellRanger4 ... 5p and 3p adapter clipping similar to CellRanger4." + echo " Utilizes Opal package by Martin Sosic: https://github.com/Martinsos/opal" + echo " - None ... no adapter clipping, all other clip* parameters are" + echo " disregarded" + echo "" + echo " --clip3pNbases" + echo " type: integer, multiple values allowed" + echo " example: 0" + echo " number(s) of bases to clip from 3p of each mate. If one value is given," + echo " it will be assumed the same for both mates." + echo "" + echo " --clip3pAdapterSeq" + echo " type: string, multiple values allowed" + echo " adapter sequences to clip from 3p of each mate. If one value is given," + echo " it will be assumed the same for both mates." + echo " - polyA ... polyA sequence with the length equal to read length" + echo "" + echo " --clip3pAdapterMMp" + echo " type: double, multiple values allowed" + echo " example: 0.1" + echo " max proportion of mismatches for 3p adapter clipping for each mate. If" + echo " one value is given, it will be assumed the same for both mates." + echo "" + echo " --clip3pAfterAdapterNbases" + echo " type: integer, multiple values allowed" + echo " example: 0" + echo " number of bases to clip from 3p of each mate after the adapter clipping." + echo " If one value is given, it will be assumed the same for both mates." + echo "" + echo " --clip5pNbases" + echo " type: integer, multiple values allowed" + echo " example: 0" + echo " number(s) of bases to clip from 5p of each mate. If one value is given," + echo " it will be assumed the same for both mates." + echo "" + echo "Limits:" + echo " --limitGenomeGenerateRAM" + echo " type: long" + echo " example: 31000000000" + echo " maximum available RAM (bytes) for genome generation" + echo "" + echo " --limitIObufferSize" + echo " type: long, multiple values allowed" + echo " example: 30000000;50000000" + echo " max available buffers size (bytes) for input/output, per thread" + echo "" + echo " --limitOutSAMoneReadBytes" + echo " type: long" + echo " example: 100000" + echo " max size of the SAM record (bytes) for one read. Recommended value:" + echo " >(2*(LengthMate1+LengthMate2+100)*outFilterMultimapNmax" + echo "" + echo " --limitOutSJoneRead" + echo " type: integer" + echo " example: 1000" + echo " max number of junctions for one read (including all multi-mappers)" + echo "" + echo " --limitOutSJcollapsed" + echo " type: integer" + echo " example: 1000000" + echo " max number of collapsed junctions" + echo "" + echo " --limitBAMsortRAM" + echo " type: long" + echo " example: 0" + echo " maximum available RAM (bytes) for sorting BAM. If =0, it will be set to" + echo " the genome index size. 0 value can only be used with --genomeLoad" + echo " NoSharedMemory option." + echo "" + echo " --limitSjdbInsertNsj" + echo " type: integer" + echo " example: 1000000" + echo " maximum number of junctions to be inserted to the genome on the fly at" + echo " the mapping stage, including those from annotations and those detected" + echo " in the 1st step of the 2-pass run" + echo "" + echo " --limitNreadsSoft" + echo " type: integer" + echo " example: -1" + echo " soft limit on the number of reads" + echo "" + echo "Output: general:" + echo " --outTmpKeep" + echo " type: string" + echo " whether to keep the temporary files after STAR runs is finished" + echo " - None ... remove all temporary files" + echo " - All ... keep all files" + echo "" + echo " --outStd" + echo " type: string" + echo " example: Log" + echo " which output will be directed to stdout (standard out)" + echo " - Log ... log messages" + echo " - SAM ... alignments in SAM format (which normally" + echo " are output to Aligned.out.sam file), normal standard output will go into" + echo " Log.std.out" + echo " - BAM_Unsorted ... alignments in BAM format, unsorted." + echo " Requires --outSAMtype BAM Unsorted" + echo " - BAM_SortedByCoordinate ... alignments in BAM format, sorted by" + echo " coordinate. Requires --outSAMtype BAM SortedByCoordinate" + echo " - BAM_Quant ... alignments to transcriptome in BAM format," + echo " unsorted. Requires --quantMode TranscriptomeSAM" + echo "" + echo " --outReadsUnmapped" + echo " type: string" + echo " output of unmapped and partially mapped (i.e. mapped only one mate of a" + echo " paired end read) reads in separate file(s)." + echo " - None ... no output" + echo " - Fastx ... output in separate fasta/fastq files, Unmapped.out.mate1/2" + echo "" + echo " --outQSconversionAdd" + echo " type: integer" + echo " example: 0" + echo " add this number to the quality score (e.g. to convert from Illumina to" + echo " Sanger, use -31)" + echo "" + echo " --outMultimapperOrder" + echo " type: string" + echo " example: Old_2.4" + echo " order of multimapping alignments in the output files" + echo " - Old_2.4 ... quasi-random order used before 2.5.0" + echo " - Random ... random order of alignments for each" + echo " multi-mapper. Read mates (pairs) are always adjacent, all alignment for" + echo " each read stay together. This option will become default in the future" + echo " releases." + echo "" + echo "Output: SAM and BAM:" + echo " --outSAMtype" + echo " type: string, multiple values allowed" + echo " example: SAM" + echo " type of SAM/BAM output" + echo " 1st word:" + echo " - BAM ... output BAM without sorting" + echo " - SAM ... output SAM without sorting" + echo " - None ... no SAM/BAM output" + echo " 2nd, 3rd:" + echo " - Unsorted ... standard unsorted" + echo " - SortedByCoordinate ... sorted by coordinate. This option will allocate" + echo " extra memory for sorting which can be specified by --limitBAMsortRAM." + echo "" + echo " --outSAMmode" + echo " type: string" + echo " example: Full" + echo " mode of SAM output" + echo " - None ... no SAM output" + echo " - Full ... full SAM output" + echo " - NoQS ... full SAM but without quality scores" + echo "" + echo " --outSAMstrandField" + echo " type: string" + echo " Cufflinks-like strand field flag" + echo " - None ... not used" + echo " - intronMotif ... strand derived from the intron motif. This option" + echo " changes the output alignments: reads with inconsistent and/or" + echo " non-canonical introns are filtered out." + echo "" + echo " --outSAMattributes" + echo " type: string, multiple values allowed" + echo " example: Standard" + echo " a string of desired SAM attributes, in the order desired for the output" + echo " SAM. Tags can be listed in any combination/order." + echo " ***Presets:" + echo " - None ... no attributes" + echo " - Standard ... NH HI AS nM" + echo " - All ... NH HI AS nM NM MD jM jI MC ch" + echo " ***Alignment:" + echo " - NH ... number of loci the reads maps to: =1 for unique" + echo " mappers, >1 for multimappers. Standard SAM tag." + echo " - HI ... multiple alignment index, starts with" + echo " --outSAMattrIHstart (=1 by default). Standard SAM tag." + echo " - AS ... local alignment score, +1/-1 for matches/mismateches," + echo " score* penalties for indels and gaps. For PE reads, total score for two" + echo " mates. Stadnard SAM tag." + echo " - nM ... number of mismatches. For PE reads, sum over two" + echo " mates." + echo " - NM ... edit distance to the reference (number of mismatched +" + echo " inserted + deleted bases) for each mate. Standard SAM tag." + echo " - MD ... string encoding mismatched and deleted reference bases" + echo " (see standard SAM specifications). Standard SAM tag." + echo " - jM ... intron motifs for all junctions (i.e. N in CIGAR): 0:" + echo " non-canonical; 1: GT/AG, 2: CT/AC, 3: GC/AG, 4: CT/GC, 5: AT/AC, 6:" + echo " GT/AT. If splice junctions database is used, and a junction is" + echo " annotated, 20 is added to its motif value." + echo " - jI ... start and end of introns for all junctions (1-based)." + echo " - XS ... alignment strand according to --outSAMstrandField." + echo " - MC ... mate's CIGAR string. Standard SAM tag." + echo " - ch ... marks all segment of all chimeric alingments for" + echo " --chimOutType WithinBAM output." + echo " - cN ... number of bases clipped from the read ends: 5' and 3'" + echo " ***Variation:" + echo " - vA ... variant allele" + echo " - vG ... genomic coordinate of the variant overlapped by the" + echo " read." + echo " - vW ... 1 - alignment passes WASP filtering; 2,3,4,5,6,7 -" + echo " alignment does not pass WASP filtering. Requires --waspOutputMode" + echo " SAMtag." + echo " ***STARsolo:" + echo " - CR CY UR UY ... sequences and quality scores of cell barcodes and UMIs" + echo " for the solo* demultiplexing." + echo " - GX GN ... gene ID and gene name for unique-gene reads." + echo " - gx gn ... gene IDs and gene names for unique- and multi-gene" + echo " reads." + echo " - CB UB ... error-corrected cell barcodes and UMIs for solo*" + echo " demultiplexing. Requires --outSAMtype BAM SortedByCoordinate." + echo " - sM ... assessment of CB and UMI." + echo " - sS ... sequence of the entire barcode (CB,UMI,adapter)." + echo " - sQ ... quality of the entire barcode." + echo " ***Unsupported/undocumented:" + echo " - ha ... haplotype (1/2) when mapping to the diploid genome." + echo " Requires genome generated with --genomeTransformType Diploid ." + echo " - rB ... alignment block read/genomic coordinates." + echo " - vR ... read coordinate of the variant." + echo "" + echo " --outSAMattrIHstart" + echo " type: integer" + echo " example: 1" + echo " start value for the IH attribute. 0 may be required by some downstream" + echo " software, such as Cufflinks or StringTie." + echo "" + echo " --outSAMunmapped" + echo " type: string, multiple values allowed" + echo " output of unmapped reads in the SAM format" + echo " 1st word:" + echo " - None ... no output" + echo " - Within ... output unmapped reads within the main SAM file (i.e." + echo " Aligned.out.sam)" + echo " 2nd word:" + echo " - KeepPairs ... record unmapped mate for each alignment, and, in case of" + echo " unsorted output, keep it adjacent to its mapped mate. Only affects" + echo " multi-mapping reads." + echo "" + echo " --outSAMorder" + echo " type: string" + echo " example: Paired" + echo " type of sorting for the SAM output" + echo " Paired: one mate after the other for all paired alignments" + echo " PairedKeepInputOrder: one mate after the other for all paired" + echo " alignments, the order is kept the same as in the input FASTQ files" + echo "" + echo " --outSAMprimaryFlag" + echo " type: string" + echo " example: OneBestScore" + echo " which alignments are considered primary - all others will be marked with" + echo " 0x100 bit in the FLAG" + echo " - OneBestScore ... only one alignment with the best score is primary" + echo " - AllBestScore ... all alignments with the best score are primary" + echo "" + echo " --outSAMreadID" + echo " type: string" + echo " example: Standard" + echo " read ID record type" + echo " - Standard ... first word (until space) from the FASTx read ID line," + echo " removing /1,/2 from the end" + echo " - Number ... read number (index) in the FASTx file" + echo "" + echo " --outSAMmapqUnique" + echo " type: integer" + echo " example: 255" + echo " 0 to 255: the MAPQ value for unique mappers" + echo "" + echo " --outSAMflagOR" + echo " type: integer" + echo " example: 0" + echo " 0 to 65535: sam FLAG will be bitwise OR'd with this value, i.e." + echo " FLAG=FLAG | outSAMflagOR. This is applied after all flags have been set" + echo " by STAR, and after outSAMflagAND. Can be used to set specific bits that" + echo " are not set otherwise." + echo "" + echo " --outSAMflagAND" + echo " type: integer" + echo " example: 65535" + echo " 0 to 65535: sam FLAG will be bitwise AND'd with this value, i.e." + echo " FLAG=FLAG & outSAMflagOR. This is applied after all flags have been set" + echo " by STAR, but before outSAMflagOR. Can be used to unset specific bits" + echo " that are not set otherwise." + echo "" + echo " --outSAMattrRGline" + echo " type: string, multiple values allowed" + echo " SAM/BAM read group line. The first word contains the read group" + echo " identifier and must start with \"ID:\", e.g. --outSAMattrRGline ID:xxx" + echo " CN:yy \"DS:z z z\"." + echo " xxx will be added as RG tag to each output alignment. Any spaces in the" + echo " tag values have to be double quoted." + echo " Comma separated RG lines correspons to different (comma separated) input" + echo " files in --readFilesIn. Commas have to be surrounded by spaces, e.g." + echo " --outSAMattrRGline ID:xxx , ID:zzz \"DS:z z\" , ID:yyy DS:yyyy" + echo "" + echo " --outSAMheaderHD" + echo " type: string, multiple values allowed" + echo " @HD (header) line of the SAM header" + echo "" + echo " --outSAMheaderPG" + echo " type: string, multiple values allowed" + echo " extra @PG (software) line of the SAM header (in addition to STAR)" + echo "" + echo " --outSAMheaderCommentFile" + echo " type: string" + echo " path to the file with @CO (comment) lines of the SAM header" + echo "" + echo " --outSAMfilter" + echo " type: string, multiple values allowed" + echo " filter the output into main SAM/BAM files" + echo " - KeepOnlyAddedReferences ... only keep the reads for which all" + echo " alignments are to the extra reference sequences added with" + echo " --genomeFastaFiles at the mapping stage." + echo " - KeepAllAddedReferences ... keep all alignments to the extra reference" + echo " sequences added with --genomeFastaFiles at the mapping stage." + echo "" + echo " --outSAMmultNmax" + echo " type: integer" + echo " example: -1" + echo " max number of multiple alignments for a read that will be output to the" + echo " SAM/BAM files. Note that if this value is not equal to -1, the top" + echo " scoring alignment will be output first" + echo " - -1 ... all alignments (up to --outFilterMultimapNmax) will be output" + echo "" + echo " --outSAMtlen" + echo " type: integer" + echo " example: 1" + echo " calculation method for the TLEN field in the SAM/BAM files" + echo " - 1 ... leftmost base of the (+)strand mate to rightmost base of the" + echo " (-)mate. (+)sign for the (+)strand mate" + echo " - 2 ... leftmost base of any mate to rightmost base of any mate. (+)sign" + echo " for the mate with the leftmost base. This is different from 1 for" + echo " overlapping mates with protruding ends" + echo "" + echo " --outBAMcompression" + echo " type: integer" + echo " example: 1" + echo " -1 to 10 BAM compression level, -1=default compression (6?), 0=no" + echo " compression, 10=maximum compression" + echo "" + echo " --outBAMsortingThreadN" + echo " type: integer" + echo " example: 0" + echo " >=0: number of threads for BAM sorting. 0 will default to" + echo " min(6,--runThreadN)." + echo "" + echo " --outBAMsortingBinsN" + echo " type: integer" + echo " example: 50" + echo " >0: number of genome bins for coordinate-sorting" + echo "" + echo "BAM processing:" + echo " --bamRemoveDuplicatesType" + echo " type: string" + echo " mark duplicates in the BAM file, for now only works with (i) sorted BAM" + echo " fed with inputBAMfile, and (ii) for paired-end alignments only" + echo " - - ... no duplicate removal/marking" + echo " - UniqueIdentical ... mark all multimappers, and duplicate" + echo " unique mappers. The coordinates, FLAG, CIGAR must be identical" + echo " - UniqueIdenticalNotMulti ... mark duplicate unique mappers but not" + echo " multimappers." + echo "" + echo " --bamRemoveDuplicatesMate2basesN" + echo " type: integer" + echo " example: 0" + echo " number of bases from the 5' of mate 2 to use in collapsing (e.g. for" + echo " RAMPAGE)" + echo "" + echo "Output Wiggle:" + echo " --outWigType" + echo " type: string, multiple values allowed" + echo " type of signal output, e.g. \"bedGraph\" OR \"bedGraph read1_5p\". Requires" + echo " sorted BAM: --outSAMtype BAM SortedByCoordinate ." + echo " 1st word:" + echo " - None ... no signal output" + echo " - bedGraph ... bedGraph format" + echo " - wiggle ... wiggle format" + echo " 2nd word:" + echo " - read1_5p ... signal from only 5' of the 1st read, useful for" + echo " CAGE/RAMPAGE etc" + echo " - read2 ... signal from only 2nd read" + echo "" + echo " --outWigStrand" + echo " type: string" + echo " example: Stranded" + echo " strandedness of wiggle/bedGraph output" + echo " - Stranded ... separate strands, str1 and str2" + echo " - Unstranded ... collapsed strands" + echo "" + echo " --outWigReferencesPrefix" + echo " type: string" + echo " prefix matching reference names to include in the output wiggle file," + echo " e.g. \"chr\", default \"-\" - include all references" + echo "" + echo " --outWigNorm" + echo " type: string" + echo " example: RPM" + echo " type of normalization for the signal" + echo " - RPM ... reads per million of mapped reads" + echo " - None ... no normalization, \"raw\" counts" + echo "" + echo "Output Filtering:" + echo " --outFilterType" + echo " type: string" + echo " example: Normal" + echo " type of filtering" + echo " - Normal ... standard filtering using only current alignment" + echo " - BySJout ... keep only those reads that contain junctions that passed" + echo " filtering into SJ.out.tab" + echo "" + echo " --outFilterMultimapScoreRange" + echo " type: integer" + echo " example: 1" + echo " the score range below the maximum score for multimapping alignments" + echo "" + echo " --outFilterMultimapNmax" + echo " type: integer" + echo " example: 10" + echo " maximum number of loci the read is allowed to map to. Alignments (all of" + echo " them) will be output only if the read maps to no more loci than this" + echo " value." + echo " Otherwise no alignments will be output, and the read will be counted as" + echo " \"mapped to too many loci\" in the Log.final.out ." + echo "" + echo " --outFilterMismatchNmax" + echo " type: integer" + echo " example: 10" + echo " alignment will be output only if it has no more mismatches than this" + echo " value." + echo "" + echo " --outFilterMismatchNoverLmax" + echo " type: double" + echo " example: 0.3" + echo " alignment will be output only if its ratio of mismatches to *mapped*" + echo " length is less than or equal to this value." + echo "" + echo " --outFilterMismatchNoverReadLmax" + echo " type: double" + echo " example: 1.0" + echo " alignment will be output only if its ratio of mismatches to *read*" + echo " length is less than or equal to this value." + echo "" + echo " --outFilterScoreMin" + echo " type: integer" + echo " example: 0" + echo " alignment will be output only if its score is higher than or equal to" + echo " this value." + echo "" + echo " --outFilterScoreMinOverLread" + echo " type: double" + echo " example: 0.66" + echo " same as outFilterScoreMin, but normalized to read length (sum of mates'" + echo " lengths for paired-end reads)" + echo "" + echo " --outFilterMatchNmin" + echo " type: integer" + echo " example: 0" + echo " alignment will be output only if the number of matched bases is higher" + echo " than or equal to this value." + echo "" + echo " --outFilterMatchNminOverLread" + echo " type: double" + echo " example: 0.66" + echo " sam as outFilterMatchNmin, but normalized to the read length (sum of" + echo " mates' lengths for paired-end reads)." + echo "" + echo " --outFilterIntronMotifs" + echo " type: string" + echo " filter alignment using their motifs" + echo " - None ... no filtering" + echo " - RemoveNoncanonical ... filter out alignments that contain" + echo " non-canonical junctions" + echo " - RemoveNoncanonicalUnannotated ... filter out alignments that contain" + echo " non-canonical unannotated junctions when using annotated splice" + echo " junctions database. The annotated non-canonical junctions will be kept." + echo "" + echo " --outFilterIntronStrands" + echo " type: string" + echo " example: RemoveInconsistentStrands" + echo " filter alignments" + echo " - RemoveInconsistentStrands ... remove alignments that have" + echo " junctions with inconsistent strands" + echo " - None ... no filtering" + echo "" + echo "Output splice junctions (SJ.out.tab):" + echo " --outSJtype" + echo " type: string" + echo " example: Standard" + echo " type of splice junction output" + echo " - Standard ... standard SJ.out.tab output" + echo " - None ... no splice junction output" + echo "" + echo "Output Filtering: Splice Junctions:" + echo " --outSJfilterReads" + echo " type: string" + echo " example: All" + echo " which reads to consider for collapsed splice junctions output" + echo " - All ... all reads, unique- and multi-mappers" + echo " - Unique ... uniquely mapping reads only" + echo "" + echo " --outSJfilterOverhangMin" + echo " type: integer, multiple values allowed" + echo " example: 30;12;12;12" + echo " minimum overhang length for splice junctions on both sides for: (1)" + echo " non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC" + echo " motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif" + echo " does not apply to annotated junctions" + echo "" + echo " --outSJfilterCountUniqueMin" + echo " type: integer, multiple values allowed" + echo " example: 3;1;1;1" + echo " minimum uniquely mapping read count per junction for: (1) non-canonical" + echo " motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC" + echo " and GT/AT motif. -1 means no output for that motif" + echo " Junctions are output if one of outSJfilterCountUniqueMin OR" + echo " outSJfilterCountTotalMin conditions are satisfied" + echo " does not apply to annotated junctions" + echo "" + echo " --outSJfilterCountTotalMin" + echo " type: integer, multiple values allowed" + echo " example: 3;1;1;1" + echo " minimum total (multi-mapping+unique) read count per junction for: (1)" + echo " non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC" + echo " motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif" + echo " Junctions are output if one of outSJfilterCountUniqueMin OR" + echo " outSJfilterCountTotalMin conditions are satisfied" + echo " does not apply to annotated junctions" + echo "" + echo " --outSJfilterDistToOtherSJmin" + echo " type: integer, multiple values allowed" + echo " example: 10;0;5;10" + echo " minimum allowed distance to other junctions' donor/acceptor" + echo " does not apply to annotated junctions" + echo "" + echo " --outSJfilterIntronMaxVsReadN" + echo " type: integer, multiple values allowed" + echo " example: 50000;100000;200000" + echo " maximum gap allowed for junctions supported by 1,2,3,,,N reads" + echo " i.e. by default junctions supported by 1 read can have gaps <=50000b, by" + echo " 2 reads: <=100000b, by 3 reads: <=200000. by >=4 reads any gap" + echo " <=alignIntronMax" + echo " does not apply to annotated junctions" + echo "" + echo "Scoring:" + echo " --scoreGap" + echo " type: integer" + echo " example: 0" + echo " splice junction penalty (independent on intron motif)" + echo "" + echo " --scoreGapNoncan" + echo " type: integer" + echo " example: -8" + echo " non-canonical junction penalty (in addition to scoreGap)" + echo "" + echo " --scoreGapGCAG" + echo " type: integer" + echo " example: -4" + echo " GC/AG and CT/GC junction penalty (in addition to scoreGap)" + echo "" + echo " --scoreGapATAC" + echo " type: integer" + echo " example: -8" + echo " AT/AC and GT/AT junction penalty (in addition to scoreGap)" + echo "" + echo " --scoreGenomicLengthLog2scale" + echo " type: integer" + echo " example: 0" + echo " extra score logarithmically scaled with genomic length of the alignment:" + echo " scoreGenomicLengthLog2scale*log2(genomicLength)" + echo "" + echo " --scoreDelOpen" + echo " type: integer" + echo " example: -2" + echo " deletion open penalty" + echo "" + echo " --scoreDelBase" + echo " type: integer" + echo " example: -2" + echo " deletion extension penalty per base (in addition to scoreDelOpen)" + echo "" + echo " --scoreInsOpen" + echo " type: integer" + echo " example: -2" + echo " insertion open penalty" + echo "" + echo " --scoreInsBase" + echo " type: integer" + echo " example: -2" + echo " insertion extension penalty per base (in addition to scoreInsOpen)" + echo "" + echo " --scoreStitchSJshift" + echo " type: integer" + echo " example: 1" + echo " maximum score reduction while searching for SJ boundaries in the" + echo " stitching step" + echo "" + echo "Alignments and Seeding:" + echo " --seedSearchStartLmax" + echo " type: integer" + echo " example: 50" + echo " defines the search start point through the read - the read is split into" + echo " pieces no longer than this value" + echo "" + echo " --seedSearchStartLmaxOverLread" + echo " type: double" + echo " example: 1.0" + echo " seedSearchStartLmax normalized to read length (sum of mates' lengths for" + echo " paired-end reads)" + echo "" + echo " --seedSearchLmax" + echo " type: integer" + echo " example: 0" + echo " defines the maximum length of the seeds, if =0 seed length is not" + echo " limited" + echo "" + echo " --seedMultimapNmax" + echo " type: integer" + echo " example: 10000" + echo " only pieces that map fewer than this value are utilized in the stitching" + echo " procedure" + echo "" + echo " --seedPerReadNmax" + echo " type: integer" + echo " example: 1000" + echo " max number of seeds per read" + echo "" + echo " --seedPerWindowNmax" + echo " type: integer" + echo " example: 50" + echo " max number of seeds per window" + echo "" + echo " --seedNoneLociPerWindow" + echo " type: integer" + echo " example: 10" + echo " max number of one seed loci per window" + echo "" + echo " --seedSplitMin" + echo " type: integer" + echo " example: 12" + echo " min length of the seed sequences split by Ns or mate gap" + echo "" + echo " --seedMapMin" + echo " type: integer" + echo " example: 5" + echo " min length of seeds to be mapped" + echo "" + echo " --alignIntronMin" + echo " type: integer" + echo " example: 21" + echo " minimum intron size, genomic gap is considered intron if its" + echo " length>=alignIntronMin, otherwise it is considered Deletion" + echo "" + echo " --alignIntronMax" + echo " type: integer" + echo " example: 0" + echo " maximum intron size, if 0, max intron size will be determined by" + echo " (2^winBinNbits)*winAnchorDistNbins" + echo "" + echo " --alignMatesGapMax" + echo " type: integer" + echo " example: 0" + echo " maximum gap between two mates, if 0, max intron gap will be determined" + echo " by (2^winBinNbits)*winAnchorDistNbins" + echo "" + echo " --alignSJoverhangMin" + echo " type: integer" + echo " example: 5" + echo " minimum overhang (i.e. block size) for spliced alignments" + echo "" + echo " --alignSJstitchMismatchNmax" + echo " type: integer, multiple values allowed" + echo " example: 0;-1;0;0" + echo " maximum number of mismatches for stitching of the splice junctions (-1:" + echo " no limit)." + echo " (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC" + echo " motif, (4) AT/AC and GT/AT motif." + echo "" + echo " --alignSJDBoverhangMin" + echo " type: integer" + echo " example: 3" + echo " minimum overhang (i.e. block size) for annotated (sjdb) spliced" + echo " alignments" + echo "" + echo " --alignSplicedMateMapLmin" + echo " type: integer" + echo " example: 0" + echo " minimum mapped length for a read mate that is spliced" + echo "" + echo " --alignSplicedMateMapLminOverLmate" + echo " type: double" + echo " example: 0.66" + echo " alignSplicedMateMapLmin normalized to mate length" + echo "" + echo " --alignWindowsPerReadNmax" + echo " type: integer" + echo " example: 10000" + echo " max number of windows per read" + echo "" + echo " --alignTranscriptsPerWindowNmax" + echo " type: integer" + echo " example: 100" + echo " max number of transcripts per window" + echo "" + echo " --alignTranscriptsPerReadNmax" + echo " type: integer" + echo " example: 10000" + echo " max number of different alignments per read to consider" + echo "" + echo " --alignEndsType" + echo " type: string" + echo " example: Local" + echo " type of read ends alignment" + echo " - Local ... standard local alignment with soft-clipping" + echo " allowed" + echo " - EndToEnd ... force end-to-end read alignment, do not" + echo " soft-clip" + echo " - Extend5pOfRead1 ... fully extend only the 5p of the read1, all other" + echo " ends: local alignment" + echo " - Extend5pOfReads12 ... fully extend only the 5p of the both read1 and" + echo " read2, all other ends: local alignment" + echo "" + echo " --alignEndsProtrude" + echo " type: string" + echo " example: 0 ConcordantPair" + echo " allow protrusion of alignment ends, i.e. start (end) of the +strand mate" + echo " downstream of the start (end) of the -strand mate" + echo " 1st word: int: maximum number of protrusion bases allowed" + echo " 2nd word: string:" + echo " - ConcordantPair ... report alignments with non-zero" + echo " protrusion as concordant pairs" + echo " - DiscordantPair ... report alignments with non-zero" + echo " protrusion as discordant pairs" + echo "" + echo " --alignSoftClipAtReferenceEnds" + echo " type: string" + echo " example: Yes" + echo " allow the soft-clipping of the alignments past the end of the" + echo " chromosomes" + echo " - Yes ... allow" + echo " - No ... prohibit, useful for compatibility with Cufflinks" + echo "" + echo " --alignInsertionFlush" + echo " type: string" + echo " how to flush ambiguous insertion positions" + echo " - None ... insertions are not flushed" + echo " - Right ... insertions are flushed to the right" + echo "" + echo "Paired-End reads:" + echo " --peOverlapNbasesMin" + echo " type: integer" + echo " example: 0" + echo " minimum number of overlapping bases to trigger mates merging and" + echo " realignment. Specify >0 value to switch on the \"merginf of overlapping" + echo " mates\" algorithm." + echo "" + echo " --peOverlapMMp" + echo " type: double" + echo " example: 0.01" + echo " maximum proportion of mismatched bases in the overlap area" + echo "" + echo "Windows, Anchors, Binning:" + echo " --winAnchorMultimapNmax" + echo " type: integer" + echo " example: 50" + echo " max number of loci anchors are allowed to map to" + echo "" + echo " --winBinNbits" + echo " type: integer" + echo " example: 16" + echo " =log2(winBin), where winBin is the size of the bin for the" + echo " windows/clustering, each window will occupy an integer number of bins." + echo "" + echo " --winAnchorDistNbins" + echo " type: integer" + echo " example: 9" + echo " max number of bins between two anchors that allows aggregation of" + echo " anchors into one window" + echo "" + echo " --winFlankNbins" + echo " type: integer" + echo " example: 4" + echo " log2(winFlank), where win Flank is the size of the left and right" + echo " flanking regions for each window" + echo "" + echo " --winReadCoverageRelativeMin" + echo " type: double" + echo " example: 0.5" + echo " minimum relative coverage of the read sequence by the seeds in a window," + echo " for STARlong algorithm only." + echo "" + echo " --winReadCoverageBasesMin" + echo " type: integer" + echo " example: 0" + echo " minimum number of bases covered by the seeds in a window , for STARlong" + echo " algorithm only." + echo "" + echo "Chimeric Alignments:" + echo " --chimOutType" + echo " type: string, multiple values allowed" + echo " example: Junctions" + echo " type of chimeric output" + echo " - Junctions ... Chimeric.out.junction" + echo " - SeparateSAMold ... output old SAM into separate Chimeric.out.sam file" + echo " - WithinBAM ... output into main aligned BAM files (Aligned.*.bam)" + echo " - WithinBAM HardClip ... (default) hard-clipping in the CIGAR for" + echo " supplemental chimeric alignments (default if no 2nd word is present)" + echo " - WithinBAM SoftClip ... soft-clipping in the CIGAR for supplemental" + echo " chimeric alignments" + echo "" + echo " --chimSegmentMin" + echo " type: integer" + echo " example: 0" + echo " minimum length of chimeric segment length, if ==0, no chimeric output" + echo "" + echo " --chimScoreMin" + echo " type: integer" + echo " example: 0" + echo " minimum total (summed) score of the chimeric segments" + echo "" + echo " --chimScoreDropMax" + echo " type: integer" + echo " example: 20" + echo " max drop (difference) of chimeric score (the sum of scores of all" + echo " chimeric segments) from the read length" + echo "" + echo " --chimScoreSeparation" + echo " type: integer" + echo " example: 10" + echo " minimum difference (separation) between the best chimeric score and the" + echo " next one" + echo "" + echo " --chimScoreJunctionNonGTAG" + echo " type: integer" + echo " example: -1" + echo " penalty for a non-GT/AG chimeric junction" + echo "" + echo " --chimJunctionOverhangMin" + echo " type: integer" + echo " example: 20" + echo " minimum overhang for a chimeric junction" + echo "" + echo " --chimSegmentReadGapMax" + echo " type: integer" + echo " example: 0" + echo " maximum gap in the read sequence between chimeric segments" + echo "" + echo " --chimFilter" + echo " type: string, multiple values allowed" + echo " example: banGenomicN" + echo " different filters for chimeric alignments" + echo " - None ... no filtering" + echo " - banGenomicN ... Ns are not allowed in the genome sequence around the" + echo " chimeric junction" + echo "" + echo " --chimMainSegmentMultNmax" + echo " type: integer" + echo " example: 10" + echo " maximum number of multi-alignments for the main chimeric segment. =1" + echo " will prohibit multimapping main segments." + echo "" + echo " --chimMultimapNmax" + echo " type: integer" + echo " example: 0" + echo " maximum number of chimeric multi-alignments" + echo " - 0 ... use the old scheme for chimeric detection which only considered" + echo " unique alignments" + echo "" + echo " --chimMultimapScoreRange" + echo " type: integer" + echo " example: 1" + echo " the score range for multi-mapping chimeras below the best chimeric" + echo " score. Only works with --chimMultimapNmax > 1" + echo "" + echo " --chimNonchimScoreDropMin" + echo " type: integer" + echo " example: 20" + echo " to trigger chimeric detection, the drop in the best non-chimeric" + echo " alignment score with respect to the read length has to be greater than" + echo " this value" + echo "" + echo " --chimOutJunctionFormat" + echo " type: integer" + echo " example: 0" + echo " formatting type for the Chimeric.out.junction file" + echo " - 0 ... no comment lines/headers" + echo " - 1 ... comment lines at the end of the file: command line and Nreads:" + echo " total, unique/multi-mapping" + echo "" + echo "Quantification of Annotations:" + echo " --quantMode" + echo " type: string, multiple values allowed" + echo " types of quantification requested" + echo " - - ... none" + echo " - TranscriptomeSAM ... output SAM/BAM alignments to transcriptome into a" + echo " separate file" + echo " - GeneCounts ... count reads per gene" + echo "" + echo " --quantTranscriptomeBAMcompression" + echo " type: integer" + echo " example: 1" + echo " -2 to 10 transcriptome BAM compression level" + echo " - -2 ... no BAM output" + echo " - -1 ... default compression (6?)" + echo " - 0 ... no compression" + echo " - 10 ... maximum compression" + echo "" + echo " --quantTranscriptomeBan" + echo " type: string" + echo " example: IndelSoftclipSingleend" + echo " prohibit various alignment type" + echo " - IndelSoftclipSingleend ... prohibit indels, soft clipping and" + echo " single-end alignments - compatible with RSEM" + echo " - Singleend ... prohibit single-end alignments" + echo "" + echo "2-pass Mapping:" + echo " --twopassMode" + echo " type: string" + echo " 2-pass mapping mode." + echo " - None ... 1-pass mapping" + echo " - Basic ... basic 2-pass mapping, with all 1st pass junctions" + echo " inserted into the genome indices on the fly" + echo "" + echo " --twopass1readsN" + echo " type: integer" + echo " example: -1" + echo " number of reads to process for the 1st step. Use very large number (or" + echo " default -1) to map all reads in the first step." + echo "" + echo "WASP parameters:" + echo " --waspOutputMode" + echo " type: string" + echo " WASP allele-specific output type. This is re-implementation of the" + echo " original WASP mappability filtering by Bryce van de Geijn, Graham" + echo " McVicker, Yoav Gilad & Jonathan K Pritchard. Please cite the original" + echo " WASP paper: Nature Methods 12, 1061-1063 (2015)," + echo " https://www.nature.com/articles/nmeth.3582 ." + echo " - SAMtag ... add WASP tags to the alignments that pass WASP" + echo " filtering" + echo "" + echo "STARsolo (single cell RNA-seq) parameters:" + echo " --soloType" + echo " type: string, multiple values allowed" + echo " type of single-cell RNA-seq" + echo " - CB_UMI_Simple ... (a.k.a. Droplet) one UMI and one Cell Barcode of" + echo " fixed length in read2, e.g. Drop-seq and 10X Chromium." + echo " - CB_UMI_Complex ... multiple Cell Barcodes of varying length, one UMI" + echo " of fixed length and one adapter sequence of fixed length are allowed in" + echo " read2 only (e.g. inDrop, ddSeq)." + echo " - CB_samTagOut ... output Cell Barcode as CR and/or CB SAm tag. No" + echo " UMI counting. --readFilesIn cDNA_read1 [cDNA_read2 if paired-end]" + echo " CellBarcode_read . Requires --outSAMtype BAM Unsorted [and/or" + echo " SortedByCoordinate]" + echo " - SmartSeq ... Smart-seq: each cell in a separate FASTQ (paired-" + echo " or single-end), barcodes are corresponding read-groups, no UMI" + echo " sequences, alignments deduplicated according to alignment start and end" + echo " (after extending soft-clipped bases)" + echo "" + echo " --soloCBwhitelist" + echo " type: string, multiple values allowed" + echo " file(s) with whitelist(s) of cell barcodes. Only --soloType" + echo " CB_UMI_Complex allows more than one whitelist file." + echo " - None ... no whitelist: all cell barcodes are allowed" + echo "" + echo " --soloCBstart" + echo " type: integer" + echo " example: 1" + echo " cell barcode start base" + echo "" + echo " --soloCBlen" + echo " type: integer" + echo " example: 16" + echo " cell barcode length" + echo "" + echo " --soloUMIstart" + echo " type: integer" + echo " example: 17" + echo " UMI start base" + echo "" + echo " --soloUMIlen" + echo " type: integer" + echo " example: 10" + echo " UMI length" + echo "" + echo " --soloBarcodeReadLength" + echo " type: integer" + echo " example: 1" + echo " length of the barcode read" + echo " - 1 ... equal to sum of soloCBlen+soloUMIlen" + echo " - 0 ... not defined, do not check" + echo "" + echo " --soloBarcodeMate" + echo " type: integer" + echo " example: 0" + echo " identifies which read mate contains the barcode (CB+UMI) sequence" + echo " - 0 ... barcode sequence is on separate read, which should always be" + echo " the last file in the --readFilesIn listed" + echo " - 1 ... barcode sequence is a part of mate 1" + echo " - 2 ... barcode sequence is a part of mate 2" + echo "" + echo " --soloCBposition" + echo " type: string, multiple values allowed" + echo " position of Cell Barcode(s) on the barcode read." + echo " Presently only works with --soloType CB_UMI_Complex, and barcodes are" + echo " assumed to be on Read2." + echo " Format for each barcode: startAnchor_startPosition_endAnchor_endPosition" + echo " start(end)Anchor defines the Anchor Base for the CB: 0: read start; 1:" + echo " read end; 2: adapter start; 3: adapter end" + echo " start(end)Position is the 0-based position with of the CB start(end)" + echo " with respect to the Anchor Base" + echo " String for different barcodes are separated by space." + echo " Example: inDrop (Zilionis et al, Nat. Protocols, 2017):" + echo " --soloCBposition 0_0_2_-1 3_1_3_8" + echo "" + echo " --soloUMIposition" + echo " type: string" + echo " position of the UMI on the barcode read, same as soloCBposition" + echo " Example: inDrop (Zilionis et al, Nat. Protocols, 2017):" + echo " --soloCBposition 3_9_3_14" + echo "" + echo " --soloAdapterSequence" + echo " type: string" + echo " adapter sequence to anchor barcodes. Only one adapter sequence is" + echo " allowed." + echo "" + echo " --soloAdapterMismatchesNmax" + echo " type: integer" + echo " example: 1" + echo " maximum number of mismatches allowed in adapter sequence." + echo "" + echo " --soloCBmatchWLtype" + echo " type: string" + echo " example: 1MM_multi" + echo " matching the Cell Barcodes to the WhiteList" + echo " - Exact ... only exact matches allowed" + echo " - 1MM ... only one match in whitelist with 1" + echo " mismatched base allowed. Allowed CBs have to have at least one read with" + echo " exact match." + echo " - 1MM_multi ... multiple matches in whitelist with" + echo " 1 mismatched base allowed, posterior probability calculation is used" + echo " choose one of the matches." + echo " Allowed CBs have to have at least one read with exact match. This option" + echo " matches best with CellRanger 2.2.0" + echo " - 1MM_multi_pseudocounts ... same as 1MM_Multi, but" + echo " pseudocounts of 1 are added to all whitelist barcodes." + echo " - 1MM_multi_Nbase_pseudocounts ... same as 1MM_multi_pseudocounts," + echo " multimatching to WL is allowed for CBs with N-bases. This option matches" + echo " best with CellRanger >= 3.0.0" + echo " - EditDist_2 ... allow up to edit distance of 3 fpr" + echo " each of the barcodes. May include one deletion + one insertion. Only" + echo " works with --soloType CB_UMI_Complex. Matches to multiple passlist" + echo " barcdoes are not allowed. Similar to ParseBio Split-seq pipeline." + echo "" + echo " --soloInputSAMattrBarcodeSeq" + echo " type: string, multiple values allowed" + echo " when inputting reads from a SAM file (--readsFileType SAM SE/PE), these" + echo " SAM attributes mark the barcode sequence (in proper order)." + echo " For instance, for 10X CellRanger or STARsolo BAMs, use" + echo " --soloInputSAMattrBarcodeSeq CR UR ." + echo " This parameter is required when running STARsolo with input from SAM." + echo "" + echo " --soloInputSAMattrBarcodeQual" + echo " type: string, multiple values allowed" + echo " when inputting reads from a SAM file (--readsFileType SAM SE/PE), these" + echo " SAM attributes mark the barcode qualities (in proper order)." + echo " For instance, for 10X CellRanger or STARsolo BAMs, use" + echo " --soloInputSAMattrBarcodeQual CY UY ." + echo " If this parameter is '-' (default), the quality 'H' will be assigned to" + echo " all bases." + echo "" + echo " --soloStrand" + echo " type: string" + echo " example: Forward" + echo " strandedness of the solo libraries:" + echo " - Unstranded ... no strand information" + echo " - Forward ... read strand same as the original RNA molecule" + echo " - Reverse ... read strand opposite to the original RNA molecule" + echo "" + echo " --soloFeatures" + echo " type: string, multiple values allowed" + echo " example: Gene" + echo " genomic features for which the UMI counts per Cell Barcode are collected" + echo " - Gene ... genes: reads match the gene transcript" + echo " - SJ ... splice junctions: reported in SJ.out.tab" + echo " - GeneFull ... full gene (pre-mRNA): count all reads overlapping" + echo " genes' exons and introns" + echo " - GeneFull_ExonOverIntron ... full gene (pre-mRNA): count all reads" + echo " overlapping genes' exons and introns: prioritize 100% overlap with exons" + echo " - GeneFull_Ex50pAS ... full gene (pre-RNA): count all reads" + echo " overlapping genes' exons and introns: prioritize >50% overlap with" + echo " exons. Do not count reads with 100% exonic overlap in the antisense" + echo " direction." + echo "" + echo " --soloMultiMappers" + echo " type: string, multiple values allowed" + echo " example: Unique" + echo " counting method for reads mapping to multiple genes" + echo " - Unique ... count only reads that map to unique genes" + echo " - Uniform ... uniformly distribute multi-genic UMIs to all genes" + echo " - Rescue ... distribute UMIs proportionally to unique+uniform counts" + echo " (~ first iteration of EM)" + echo " - PropUnique ... distribute UMIs proportionally to unique mappers, if" + echo " present, and uniformly if not." + echo " - EM ... multi-gene UMIs are distributed using Expectation" + echo " Maximization algorithm" + echo "" + echo " --soloUMIdedup" + echo " type: string, multiple values allowed" + echo " example: 1MM_All" + echo " type of UMI deduplication (collapsing) algorithm" + echo " - 1MM_All ... all UMIs with 1 mismatch distance to" + echo " each other are collapsed (i.e. counted once)." + echo " - 1MM_Directional_UMItools ... follows the \"directional\" method from" + echo " the UMI-tools by Smith, Heger and Sudbery (Genome Research 2017)." + echo " - 1MM_Directional ... same as 1MM_Directional_UMItools, but" + echo " with more stringent criteria for duplicate UMIs" + echo " - Exact ... only exactly matching UMIs are" + echo " collapsed." + echo " - NoDedup ... no deduplication of UMIs, count all" + echo " reads." + echo " - 1MM_CR ... CellRanger2-4 algorithm for 1MM UMI" + echo " collapsing." + echo "" + echo " --soloUMIfiltering" + echo " type: string, multiple values allowed" + echo " type of UMI filtering (for reads uniquely mapping to genes)" + echo " - - ... basic filtering: remove UMIs with N and" + echo " homopolymers (similar to CellRanger 2.2.0)." + echo " - MultiGeneUMI ... basic + remove lower-count UMIs that map to" + echo " more than one gene." + echo " - MultiGeneUMI_All ... basic + remove all UMIs that map to more than" + echo " one gene." + echo " - MultiGeneUMI_CR ... basic + remove lower-count UMIs that map to" + echo " more than one gene, matching CellRanger > 3.0.0 ." + echo " Only works with --soloUMIdedup 1MM_CR" + echo "" + echo " --soloOutFileNames" + echo " type: string, multiple values allowed" + echo " example: Solo.out/;features.tsv;barcodes.tsv;matrix.mtx" + echo " file names for STARsolo output:" + echo " file_name_prefix gene_names barcode_sequences" + echo " cell_feature_count_matrix" + echo "" + echo " --soloCellFilter" + echo " type: string, multiple values allowed" + echo " example: CellRanger2.2;3000;0.99;10" + echo " cell filtering type and parameters" + echo " - None ... do not output filtered cells" + echo " - TopCells ... only report top cells by UMI count, followed by" + echo " the exact number of cells" + echo " - CellRanger2.2 ... simple filtering of CellRanger 2.2." + echo " Can be followed by numbers: number of expected cells, robust maximum" + echo " percentile for UMI count, maximum to minimum ratio for UMI count" + echo " The harcoded values are from CellRanger: nExpectedCells=3000;" + echo " maxPercentile=0.99; maxMinRatio=10" + echo " - EmptyDrops_CR ... EmptyDrops filtering in CellRanger flavor. Please" + echo " cite the original EmptyDrops paper: A.T.L Lun et al, Genome Biology, 20," + echo " 63 (2019):" + echo " " + echo "https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1662-y" + echo " Can be followed by 10 numeric parameters: nExpectedCells" + echo " maxPercentile maxMinRatio indMin indMax umiMin" + echo " umiMinFracMedian candMaxN FDR simN" + echo " The harcoded values are from CellRanger: 3000" + echo " 0.99 10 45000 90000 500 0.01" + echo " 20000 0.01 10000" + echo "" + echo " --soloOutFormatFeaturesGeneField3" + echo " type: string, multiple values allowed" + echo " example: Gene Expression" + echo " field 3 in the Gene features.tsv file. If \"-\", then no 3rd field is" + echo " output." + echo "" + echo " --soloCellReadStats" + echo " type: string" + echo " Output reads statistics for each CB" + echo " - Standard ... standard output" +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM python:3.10-slim + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ + rm -rf /var/lib/apt/lists/* + +ENV STAR_VERSION 2.7.3a +ENV PACKAGES gcc g++ make wget zlib1g-dev unzip +RUN apt-get update && \ + apt-get install -y --no-install-recommends ${PACKAGES} && \ + cd /tmp && \ + wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip && \ + unzip ${STAR_VERSION}.zip && \ + cd STAR-${STAR_VERSION}/source && \ + make STARstatic CXXFLAGS_SIMD=-std=c++11 && \ + cp STAR /usr/local/bin && \ + cd / && \ + rm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip && \ + apt-get --purge autoremove -y ${PACKAGES} && \ + apt-get clean + +LABEL org.opencontainers.image.authors="Angela Oliveira Pisco, Robrecht Cannoodt" +LABEL org.opencontainers.image.description="Companion container for running component mapping star_align_v273a" +LABEL org.opencontainers.image.created="2024-01-31T09:08:35Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-star_align_v273a-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "star_align_v273a 0.12.4" + exit + ;; + --input) + if [ -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT="$2" + else + VIASH_PAR_INPUT="$VIASH_PAR_INPUT;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + if [ -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + else + VIASH_PAR_INPUT="$VIASH_PAR_INPUT;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --readFilesIn) + if [ -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT="$2" + else + VIASH_PAR_INPUT="$VIASH_PAR_INPUT;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --readFilesIn. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reference) + [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reference=*) + [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference=*\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --genomeDir) + [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--genomeDir\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --genomeDir. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outFileNamePrefix) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--outFileNamePrefix\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFileNamePrefix. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --runRNGseed) + [ -n "$VIASH_PAR_RUNRNGSEED" ] && ViashError Bad arguments for option \'--runRNGseed\': \'$VIASH_PAR_RUNRNGSEED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_RUNRNGSEED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --runRNGseed. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --runRNGseed=*) + [ -n "$VIASH_PAR_RUNRNGSEED" ] && ViashError Bad arguments for option \'--runRNGseed=*\': \'$VIASH_PAR_RUNRNGSEED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_RUNRNGSEED=$(ViashRemoveFlags "$1") + shift 1 + ;; + --genomeLoad) + [ -n "$VIASH_PAR_GENOMELOAD" ] && ViashError Bad arguments for option \'--genomeLoad\': \'$VIASH_PAR_GENOMELOAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENOMELOAD="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --genomeLoad. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --genomeLoad=*) + [ -n "$VIASH_PAR_GENOMELOAD" ] && ViashError Bad arguments for option \'--genomeLoad=*\': \'$VIASH_PAR_GENOMELOAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENOMELOAD=$(ViashRemoveFlags "$1") + shift 1 + ;; + --genomeFastaFiles) + if [ -z "$VIASH_PAR_GENOMEFASTAFILES" ]; then + VIASH_PAR_GENOMEFASTAFILES="$2" + else + VIASH_PAR_GENOMEFASTAFILES="$VIASH_PAR_GENOMEFASTAFILES;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --genomeFastaFiles. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --genomeFastaFiles=*) + if [ -z "$VIASH_PAR_GENOMEFASTAFILES" ]; then + VIASH_PAR_GENOMEFASTAFILES=$(ViashRemoveFlags "$1") + else + VIASH_PAR_GENOMEFASTAFILES="$VIASH_PAR_GENOMEFASTAFILES;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --genomeFileSizes) + if [ -z "$VIASH_PAR_GENOMEFILESIZES" ]; then + VIASH_PAR_GENOMEFILESIZES="$2" + else + VIASH_PAR_GENOMEFILESIZES="$VIASH_PAR_GENOMEFILESIZES;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --genomeFileSizes. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --genomeFileSizes=*) + if [ -z "$VIASH_PAR_GENOMEFILESIZES" ]; then + VIASH_PAR_GENOMEFILESIZES=$(ViashRemoveFlags "$1") + else + VIASH_PAR_GENOMEFILESIZES="$VIASH_PAR_GENOMEFILESIZES;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --genomeTransformOutput) + if [ -z "$VIASH_PAR_GENOMETRANSFORMOUTPUT" ]; then + VIASH_PAR_GENOMETRANSFORMOUTPUT="$2" + else + VIASH_PAR_GENOMETRANSFORMOUTPUT="$VIASH_PAR_GENOMETRANSFORMOUTPUT;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --genomeTransformOutput. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --genomeTransformOutput=*) + if [ -z "$VIASH_PAR_GENOMETRANSFORMOUTPUT" ]; then + VIASH_PAR_GENOMETRANSFORMOUTPUT=$(ViashRemoveFlags "$1") + else + VIASH_PAR_GENOMETRANSFORMOUTPUT="$VIASH_PAR_GENOMETRANSFORMOUTPUT;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --genomeChrSetMitochondrial) + if [ -z "$VIASH_PAR_GENOMECHRSETMITOCHONDRIAL" ]; then + VIASH_PAR_GENOMECHRSETMITOCHONDRIAL="$2" + else + VIASH_PAR_GENOMECHRSETMITOCHONDRIAL="$VIASH_PAR_GENOMECHRSETMITOCHONDRIAL;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --genomeChrSetMitochondrial. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --genomeChrSetMitochondrial=*) + if [ -z "$VIASH_PAR_GENOMECHRSETMITOCHONDRIAL" ]; then + VIASH_PAR_GENOMECHRSETMITOCHONDRIAL=$(ViashRemoveFlags "$1") + else + VIASH_PAR_GENOMECHRSETMITOCHONDRIAL="$VIASH_PAR_GENOMECHRSETMITOCHONDRIAL;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --sjdbFileChrStartEnd) + if [ -z "$VIASH_PAR_SJDBFILECHRSTARTEND" ]; then + VIASH_PAR_SJDBFILECHRSTARTEND="$2" + else + VIASH_PAR_SJDBFILECHRSTARTEND="$VIASH_PAR_SJDBFILECHRSTARTEND;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbFileChrStartEnd. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sjdbFileChrStartEnd=*) + if [ -z "$VIASH_PAR_SJDBFILECHRSTARTEND" ]; then + VIASH_PAR_SJDBFILECHRSTARTEND=$(ViashRemoveFlags "$1") + else + VIASH_PAR_SJDBFILECHRSTARTEND="$VIASH_PAR_SJDBFILECHRSTARTEND;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --sjdbGTFfile) + [ -n "$VIASH_PAR_SJDBGTFFILE" ] && ViashError Bad arguments for option \'--sjdbGTFfile\': \'$VIASH_PAR_SJDBGTFFILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SJDBGTFFILE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbGTFfile. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sjdbGTFfile=*) + [ -n "$VIASH_PAR_SJDBGTFFILE" ] && ViashError Bad arguments for option \'--sjdbGTFfile=*\': \'$VIASH_PAR_SJDBGTFFILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SJDBGTFFILE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --sjdbGTFchrPrefix) + [ -n "$VIASH_PAR_SJDBGTFCHRPREFIX" ] && ViashError Bad arguments for option \'--sjdbGTFchrPrefix\': \'$VIASH_PAR_SJDBGTFCHRPREFIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SJDBGTFCHRPREFIX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbGTFchrPrefix. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sjdbGTFchrPrefix=*) + [ -n "$VIASH_PAR_SJDBGTFCHRPREFIX" ] && ViashError Bad arguments for option \'--sjdbGTFchrPrefix=*\': \'$VIASH_PAR_SJDBGTFCHRPREFIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SJDBGTFCHRPREFIX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --sjdbGTFfeatureExon) + [ -n "$VIASH_PAR_SJDBGTFFEATUREEXON" ] && ViashError Bad arguments for option \'--sjdbGTFfeatureExon\': \'$VIASH_PAR_SJDBGTFFEATUREEXON\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SJDBGTFFEATUREEXON="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbGTFfeatureExon. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sjdbGTFfeatureExon=*) + [ -n "$VIASH_PAR_SJDBGTFFEATUREEXON" ] && ViashError Bad arguments for option \'--sjdbGTFfeatureExon=*\': \'$VIASH_PAR_SJDBGTFFEATUREEXON\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SJDBGTFFEATUREEXON=$(ViashRemoveFlags "$1") + shift 1 + ;; + --sjdbGTFtagExonParentTranscript) + [ -n "$VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT" ] && ViashError Bad arguments for option \'--sjdbGTFtagExonParentTranscript\': \'$VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbGTFtagExonParentTranscript. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sjdbGTFtagExonParentTranscript=*) + [ -n "$VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT" ] && ViashError Bad arguments for option \'--sjdbGTFtagExonParentTranscript=*\': \'$VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --sjdbGTFtagExonParentGene) + [ -n "$VIASH_PAR_SJDBGTFTAGEXONPARENTGENE" ] && ViashError Bad arguments for option \'--sjdbGTFtagExonParentGene\': \'$VIASH_PAR_SJDBGTFTAGEXONPARENTGENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SJDBGTFTAGEXONPARENTGENE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbGTFtagExonParentGene. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sjdbGTFtagExonParentGene=*) + [ -n "$VIASH_PAR_SJDBGTFTAGEXONPARENTGENE" ] && ViashError Bad arguments for option \'--sjdbGTFtagExonParentGene=*\': \'$VIASH_PAR_SJDBGTFTAGEXONPARENTGENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SJDBGTFTAGEXONPARENTGENE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --sjdbGTFtagExonParentGeneName) + if [ -z "$VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME" ]; then + VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME="$2" + else + VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME="$VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbGTFtagExonParentGeneName. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sjdbGTFtagExonParentGeneName=*) + if [ -z "$VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME" ]; then + VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME=$(ViashRemoveFlags "$1") + else + VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME="$VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --sjdbGTFtagExonParentGeneType) + if [ -z "$VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE" ]; then + VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE="$2" + else + VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE="$VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbGTFtagExonParentGeneType. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sjdbGTFtagExonParentGeneType=*) + if [ -z "$VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE" ]; then + VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE=$(ViashRemoveFlags "$1") + else + VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE="$VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --sjdbOverhang) + [ -n "$VIASH_PAR_SJDBOVERHANG" ] && ViashError Bad arguments for option \'--sjdbOverhang\': \'$VIASH_PAR_SJDBOVERHANG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SJDBOVERHANG="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbOverhang. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sjdbOverhang=*) + [ -n "$VIASH_PAR_SJDBOVERHANG" ] && ViashError Bad arguments for option \'--sjdbOverhang=*\': \'$VIASH_PAR_SJDBOVERHANG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SJDBOVERHANG=$(ViashRemoveFlags "$1") + shift 1 + ;; + --sjdbScore) + [ -n "$VIASH_PAR_SJDBSCORE" ] && ViashError Bad arguments for option \'--sjdbScore\': \'$VIASH_PAR_SJDBSCORE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SJDBSCORE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbScore. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sjdbScore=*) + [ -n "$VIASH_PAR_SJDBSCORE" ] && ViashError Bad arguments for option \'--sjdbScore=*\': \'$VIASH_PAR_SJDBSCORE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SJDBSCORE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --sjdbInsertSave) + [ -n "$VIASH_PAR_SJDBINSERTSAVE" ] && ViashError Bad arguments for option \'--sjdbInsertSave\': \'$VIASH_PAR_SJDBINSERTSAVE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SJDBINSERTSAVE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbInsertSave. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sjdbInsertSave=*) + [ -n "$VIASH_PAR_SJDBINSERTSAVE" ] && ViashError Bad arguments for option \'--sjdbInsertSave=*\': \'$VIASH_PAR_SJDBINSERTSAVE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SJDBINSERTSAVE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --varVCFfile) + [ -n "$VIASH_PAR_VARVCFFILE" ] && ViashError Bad arguments for option \'--varVCFfile\': \'$VIASH_PAR_VARVCFFILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_VARVCFFILE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --varVCFfile. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --varVCFfile=*) + [ -n "$VIASH_PAR_VARVCFFILE" ] && ViashError Bad arguments for option \'--varVCFfile=*\': \'$VIASH_PAR_VARVCFFILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_VARVCFFILE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --readFilesType) + [ -n "$VIASH_PAR_READFILESTYPE" ] && ViashError Bad arguments for option \'--readFilesType\': \'$VIASH_PAR_READFILESTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READFILESTYPE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --readFilesType. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --readFilesType=*) + [ -n "$VIASH_PAR_READFILESTYPE" ] && ViashError Bad arguments for option \'--readFilesType=*\': \'$VIASH_PAR_READFILESTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READFILESTYPE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --readFilesSAMattrKeep) + if [ -z "$VIASH_PAR_READFILESSAMATTRKEEP" ]; then + VIASH_PAR_READFILESSAMATTRKEEP="$2" + else + VIASH_PAR_READFILESSAMATTRKEEP="$VIASH_PAR_READFILESSAMATTRKEEP;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --readFilesSAMattrKeep. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --readFilesSAMattrKeep=*) + if [ -z "$VIASH_PAR_READFILESSAMATTRKEEP" ]; then + VIASH_PAR_READFILESSAMATTRKEEP=$(ViashRemoveFlags "$1") + else + VIASH_PAR_READFILESSAMATTRKEEP="$VIASH_PAR_READFILESSAMATTRKEEP;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --readFilesManifest) + [ -n "$VIASH_PAR_READFILESMANIFEST" ] && ViashError Bad arguments for option \'--readFilesManifest\': \'$VIASH_PAR_READFILESMANIFEST\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READFILESMANIFEST="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --readFilesManifest. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --readFilesManifest=*) + [ -n "$VIASH_PAR_READFILESMANIFEST" ] && ViashError Bad arguments for option \'--readFilesManifest=*\': \'$VIASH_PAR_READFILESMANIFEST\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READFILESMANIFEST=$(ViashRemoveFlags "$1") + shift 1 + ;; + --readFilesPrefix) + [ -n "$VIASH_PAR_READFILESPREFIX" ] && ViashError Bad arguments for option \'--readFilesPrefix\': \'$VIASH_PAR_READFILESPREFIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READFILESPREFIX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --readFilesPrefix. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --readFilesPrefix=*) + [ -n "$VIASH_PAR_READFILESPREFIX" ] && ViashError Bad arguments for option \'--readFilesPrefix=*\': \'$VIASH_PAR_READFILESPREFIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READFILESPREFIX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --readFilesCommand) + if [ -z "$VIASH_PAR_READFILESCOMMAND" ]; then + VIASH_PAR_READFILESCOMMAND="$2" + else + VIASH_PAR_READFILESCOMMAND="$VIASH_PAR_READFILESCOMMAND;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --readFilesCommand. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --readFilesCommand=*) + if [ -z "$VIASH_PAR_READFILESCOMMAND" ]; then + VIASH_PAR_READFILESCOMMAND=$(ViashRemoveFlags "$1") + else + VIASH_PAR_READFILESCOMMAND="$VIASH_PAR_READFILESCOMMAND;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --readMapNumber) + [ -n "$VIASH_PAR_READMAPNUMBER" ] && ViashError Bad arguments for option \'--readMapNumber\': \'$VIASH_PAR_READMAPNUMBER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READMAPNUMBER="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --readMapNumber. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --readMapNumber=*) + [ -n "$VIASH_PAR_READMAPNUMBER" ] && ViashError Bad arguments for option \'--readMapNumber=*\': \'$VIASH_PAR_READMAPNUMBER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READMAPNUMBER=$(ViashRemoveFlags "$1") + shift 1 + ;; + --readMatesLengthsIn) + [ -n "$VIASH_PAR_READMATESLENGTHSIN" ] && ViashError Bad arguments for option \'--readMatesLengthsIn\': \'$VIASH_PAR_READMATESLENGTHSIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READMATESLENGTHSIN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --readMatesLengthsIn. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --readMatesLengthsIn=*) + [ -n "$VIASH_PAR_READMATESLENGTHSIN" ] && ViashError Bad arguments for option \'--readMatesLengthsIn=*\': \'$VIASH_PAR_READMATESLENGTHSIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READMATESLENGTHSIN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --readNameSeparator) + if [ -z "$VIASH_PAR_READNAMESEPARATOR" ]; then + VIASH_PAR_READNAMESEPARATOR="$2" + else + VIASH_PAR_READNAMESEPARATOR="$VIASH_PAR_READNAMESEPARATOR;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --readNameSeparator. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --readNameSeparator=*) + if [ -z "$VIASH_PAR_READNAMESEPARATOR" ]; then + VIASH_PAR_READNAMESEPARATOR=$(ViashRemoveFlags "$1") + else + VIASH_PAR_READNAMESEPARATOR="$VIASH_PAR_READNAMESEPARATOR;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --readQualityScoreBase) + [ -n "$VIASH_PAR_READQUALITYSCOREBASE" ] && ViashError Bad arguments for option \'--readQualityScoreBase\': \'$VIASH_PAR_READQUALITYSCOREBASE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READQUALITYSCOREBASE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --readQualityScoreBase. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --readQualityScoreBase=*) + [ -n "$VIASH_PAR_READQUALITYSCOREBASE" ] && ViashError Bad arguments for option \'--readQualityScoreBase=*\': \'$VIASH_PAR_READQUALITYSCOREBASE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_READQUALITYSCOREBASE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --clipAdapterType) + [ -n "$VIASH_PAR_CLIPADAPTERTYPE" ] && ViashError Bad arguments for option \'--clipAdapterType\': \'$VIASH_PAR_CLIPADAPTERTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CLIPADAPTERTYPE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --clipAdapterType. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --clipAdapterType=*) + [ -n "$VIASH_PAR_CLIPADAPTERTYPE" ] && ViashError Bad arguments for option \'--clipAdapterType=*\': \'$VIASH_PAR_CLIPADAPTERTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CLIPADAPTERTYPE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --clip3pNbases) + if [ -z "$VIASH_PAR_CLIP3PNBASES" ]; then + VIASH_PAR_CLIP3PNBASES="$2" + else + VIASH_PAR_CLIP3PNBASES="$VIASH_PAR_CLIP3PNBASES;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --clip3pNbases. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --clip3pNbases=*) + if [ -z "$VIASH_PAR_CLIP3PNBASES" ]; then + VIASH_PAR_CLIP3PNBASES=$(ViashRemoveFlags "$1") + else + VIASH_PAR_CLIP3PNBASES="$VIASH_PAR_CLIP3PNBASES;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --clip3pAdapterSeq) + if [ -z "$VIASH_PAR_CLIP3PADAPTERSEQ" ]; then + VIASH_PAR_CLIP3PADAPTERSEQ="$2" + else + VIASH_PAR_CLIP3PADAPTERSEQ="$VIASH_PAR_CLIP3PADAPTERSEQ;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --clip3pAdapterSeq. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --clip3pAdapterSeq=*) + if [ -z "$VIASH_PAR_CLIP3PADAPTERSEQ" ]; then + VIASH_PAR_CLIP3PADAPTERSEQ=$(ViashRemoveFlags "$1") + else + VIASH_PAR_CLIP3PADAPTERSEQ="$VIASH_PAR_CLIP3PADAPTERSEQ;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --clip3pAdapterMMp) + if [ -z "$VIASH_PAR_CLIP3PADAPTERMMP" ]; then + VIASH_PAR_CLIP3PADAPTERMMP="$2" + else + VIASH_PAR_CLIP3PADAPTERMMP="$VIASH_PAR_CLIP3PADAPTERMMP;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --clip3pAdapterMMp. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --clip3pAdapterMMp=*) + if [ -z "$VIASH_PAR_CLIP3PADAPTERMMP" ]; then + VIASH_PAR_CLIP3PADAPTERMMP=$(ViashRemoveFlags "$1") + else + VIASH_PAR_CLIP3PADAPTERMMP="$VIASH_PAR_CLIP3PADAPTERMMP;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --clip3pAfterAdapterNbases) + if [ -z "$VIASH_PAR_CLIP3PAFTERADAPTERNBASES" ]; then + VIASH_PAR_CLIP3PAFTERADAPTERNBASES="$2" + else + VIASH_PAR_CLIP3PAFTERADAPTERNBASES="$VIASH_PAR_CLIP3PAFTERADAPTERNBASES;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --clip3pAfterAdapterNbases. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --clip3pAfterAdapterNbases=*) + if [ -z "$VIASH_PAR_CLIP3PAFTERADAPTERNBASES" ]; then + VIASH_PAR_CLIP3PAFTERADAPTERNBASES=$(ViashRemoveFlags "$1") + else + VIASH_PAR_CLIP3PAFTERADAPTERNBASES="$VIASH_PAR_CLIP3PAFTERADAPTERNBASES;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --clip5pNbases) + if [ -z "$VIASH_PAR_CLIP5PNBASES" ]; then + VIASH_PAR_CLIP5PNBASES="$2" + else + VIASH_PAR_CLIP5PNBASES="$VIASH_PAR_CLIP5PNBASES;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --clip5pNbases. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --clip5pNbases=*) + if [ -z "$VIASH_PAR_CLIP5PNBASES" ]; then + VIASH_PAR_CLIP5PNBASES=$(ViashRemoveFlags "$1") + else + VIASH_PAR_CLIP5PNBASES="$VIASH_PAR_CLIP5PNBASES;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --limitGenomeGenerateRAM) + [ -n "$VIASH_PAR_LIMITGENOMEGENERATERAM" ] && ViashError Bad arguments for option \'--limitGenomeGenerateRAM\': \'$VIASH_PAR_LIMITGENOMEGENERATERAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LIMITGENOMEGENERATERAM="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --limitGenomeGenerateRAM. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --limitGenomeGenerateRAM=*) + [ -n "$VIASH_PAR_LIMITGENOMEGENERATERAM" ] && ViashError Bad arguments for option \'--limitGenomeGenerateRAM=*\': \'$VIASH_PAR_LIMITGENOMEGENERATERAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LIMITGENOMEGENERATERAM=$(ViashRemoveFlags "$1") + shift 1 + ;; + --limitIObufferSize) + if [ -z "$VIASH_PAR_LIMITIOBUFFERSIZE" ]; then + VIASH_PAR_LIMITIOBUFFERSIZE="$2" + else + VIASH_PAR_LIMITIOBUFFERSIZE="$VIASH_PAR_LIMITIOBUFFERSIZE;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --limitIObufferSize. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --limitIObufferSize=*) + if [ -z "$VIASH_PAR_LIMITIOBUFFERSIZE" ]; then + VIASH_PAR_LIMITIOBUFFERSIZE=$(ViashRemoveFlags "$1") + else + VIASH_PAR_LIMITIOBUFFERSIZE="$VIASH_PAR_LIMITIOBUFFERSIZE;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --limitOutSAMoneReadBytes) + [ -n "$VIASH_PAR_LIMITOUTSAMONEREADBYTES" ] && ViashError Bad arguments for option \'--limitOutSAMoneReadBytes\': \'$VIASH_PAR_LIMITOUTSAMONEREADBYTES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LIMITOUTSAMONEREADBYTES="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --limitOutSAMoneReadBytes. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --limitOutSAMoneReadBytes=*) + [ -n "$VIASH_PAR_LIMITOUTSAMONEREADBYTES" ] && ViashError Bad arguments for option \'--limitOutSAMoneReadBytes=*\': \'$VIASH_PAR_LIMITOUTSAMONEREADBYTES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LIMITOUTSAMONEREADBYTES=$(ViashRemoveFlags "$1") + shift 1 + ;; + --limitOutSJoneRead) + [ -n "$VIASH_PAR_LIMITOUTSJONEREAD" ] && ViashError Bad arguments for option \'--limitOutSJoneRead\': \'$VIASH_PAR_LIMITOUTSJONEREAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LIMITOUTSJONEREAD="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --limitOutSJoneRead. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --limitOutSJoneRead=*) + [ -n "$VIASH_PAR_LIMITOUTSJONEREAD" ] && ViashError Bad arguments for option \'--limitOutSJoneRead=*\': \'$VIASH_PAR_LIMITOUTSJONEREAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LIMITOUTSJONEREAD=$(ViashRemoveFlags "$1") + shift 1 + ;; + --limitOutSJcollapsed) + [ -n "$VIASH_PAR_LIMITOUTSJCOLLAPSED" ] && ViashError Bad arguments for option \'--limitOutSJcollapsed\': \'$VIASH_PAR_LIMITOUTSJCOLLAPSED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LIMITOUTSJCOLLAPSED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --limitOutSJcollapsed. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --limitOutSJcollapsed=*) + [ -n "$VIASH_PAR_LIMITOUTSJCOLLAPSED" ] && ViashError Bad arguments for option \'--limitOutSJcollapsed=*\': \'$VIASH_PAR_LIMITOUTSJCOLLAPSED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LIMITOUTSJCOLLAPSED=$(ViashRemoveFlags "$1") + shift 1 + ;; + --limitBAMsortRAM) + [ -n "$VIASH_PAR_LIMITBAMSORTRAM" ] && ViashError Bad arguments for option \'--limitBAMsortRAM\': \'$VIASH_PAR_LIMITBAMSORTRAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LIMITBAMSORTRAM="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --limitBAMsortRAM. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --limitBAMsortRAM=*) + [ -n "$VIASH_PAR_LIMITBAMSORTRAM" ] && ViashError Bad arguments for option \'--limitBAMsortRAM=*\': \'$VIASH_PAR_LIMITBAMSORTRAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LIMITBAMSORTRAM=$(ViashRemoveFlags "$1") + shift 1 + ;; + --limitSjdbInsertNsj) + [ -n "$VIASH_PAR_LIMITSJDBINSERTNSJ" ] && ViashError Bad arguments for option \'--limitSjdbInsertNsj\': \'$VIASH_PAR_LIMITSJDBINSERTNSJ\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LIMITSJDBINSERTNSJ="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --limitSjdbInsertNsj. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --limitSjdbInsertNsj=*) + [ -n "$VIASH_PAR_LIMITSJDBINSERTNSJ" ] && ViashError Bad arguments for option \'--limitSjdbInsertNsj=*\': \'$VIASH_PAR_LIMITSJDBINSERTNSJ\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LIMITSJDBINSERTNSJ=$(ViashRemoveFlags "$1") + shift 1 + ;; + --limitNreadsSoft) + [ -n "$VIASH_PAR_LIMITNREADSSOFT" ] && ViashError Bad arguments for option \'--limitNreadsSoft\': \'$VIASH_PAR_LIMITNREADSSOFT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LIMITNREADSSOFT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --limitNreadsSoft. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --limitNreadsSoft=*) + [ -n "$VIASH_PAR_LIMITNREADSSOFT" ] && ViashError Bad arguments for option \'--limitNreadsSoft=*\': \'$VIASH_PAR_LIMITNREADSSOFT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LIMITNREADSSOFT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outTmpKeep) + [ -n "$VIASH_PAR_OUTTMPKEEP" ] && ViashError Bad arguments for option \'--outTmpKeep\': \'$VIASH_PAR_OUTTMPKEEP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTTMPKEEP="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outTmpKeep. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outTmpKeep=*) + [ -n "$VIASH_PAR_OUTTMPKEEP" ] && ViashError Bad arguments for option \'--outTmpKeep=*\': \'$VIASH_PAR_OUTTMPKEEP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTTMPKEEP=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outStd) + [ -n "$VIASH_PAR_OUTSTD" ] && ViashError Bad arguments for option \'--outStd\': \'$VIASH_PAR_OUTSTD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSTD="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outStd. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outStd=*) + [ -n "$VIASH_PAR_OUTSTD" ] && ViashError Bad arguments for option \'--outStd=*\': \'$VIASH_PAR_OUTSTD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSTD=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outReadsUnmapped) + [ -n "$VIASH_PAR_OUTREADSUNMAPPED" ] && ViashError Bad arguments for option \'--outReadsUnmapped\': \'$VIASH_PAR_OUTREADSUNMAPPED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTREADSUNMAPPED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outReadsUnmapped. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outReadsUnmapped=*) + [ -n "$VIASH_PAR_OUTREADSUNMAPPED" ] && ViashError Bad arguments for option \'--outReadsUnmapped=*\': \'$VIASH_PAR_OUTREADSUNMAPPED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTREADSUNMAPPED=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outQSconversionAdd) + [ -n "$VIASH_PAR_OUTQSCONVERSIONADD" ] && ViashError Bad arguments for option \'--outQSconversionAdd\': \'$VIASH_PAR_OUTQSCONVERSIONADD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTQSCONVERSIONADD="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outQSconversionAdd. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outQSconversionAdd=*) + [ -n "$VIASH_PAR_OUTQSCONVERSIONADD" ] && ViashError Bad arguments for option \'--outQSconversionAdd=*\': \'$VIASH_PAR_OUTQSCONVERSIONADD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTQSCONVERSIONADD=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outMultimapperOrder) + [ -n "$VIASH_PAR_OUTMULTIMAPPERORDER" ] && ViashError Bad arguments for option \'--outMultimapperOrder\': \'$VIASH_PAR_OUTMULTIMAPPERORDER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTMULTIMAPPERORDER="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outMultimapperOrder. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outMultimapperOrder=*) + [ -n "$VIASH_PAR_OUTMULTIMAPPERORDER" ] && ViashError Bad arguments for option \'--outMultimapperOrder=*\': \'$VIASH_PAR_OUTMULTIMAPPERORDER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTMULTIMAPPERORDER=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outSAMtype) + if [ -z "$VIASH_PAR_OUTSAMTYPE" ]; then + VIASH_PAR_OUTSAMTYPE="$2" + else + VIASH_PAR_OUTSAMTYPE="$VIASH_PAR_OUTSAMTYPE;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMtype. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMtype=*) + if [ -z "$VIASH_PAR_OUTSAMTYPE" ]; then + VIASH_PAR_OUTSAMTYPE=$(ViashRemoveFlags "$1") + else + VIASH_PAR_OUTSAMTYPE="$VIASH_PAR_OUTSAMTYPE;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --outSAMmode) + [ -n "$VIASH_PAR_OUTSAMMODE" ] && ViashError Bad arguments for option \'--outSAMmode\': \'$VIASH_PAR_OUTSAMMODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMMODE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMmode. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMmode=*) + [ -n "$VIASH_PAR_OUTSAMMODE" ] && ViashError Bad arguments for option \'--outSAMmode=*\': \'$VIASH_PAR_OUTSAMMODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMMODE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outSAMstrandField) + [ -n "$VIASH_PAR_OUTSAMSTRANDFIELD" ] && ViashError Bad arguments for option \'--outSAMstrandField\': \'$VIASH_PAR_OUTSAMSTRANDFIELD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMSTRANDFIELD="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMstrandField. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMstrandField=*) + [ -n "$VIASH_PAR_OUTSAMSTRANDFIELD" ] && ViashError Bad arguments for option \'--outSAMstrandField=*\': \'$VIASH_PAR_OUTSAMSTRANDFIELD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMSTRANDFIELD=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outSAMattributes) + if [ -z "$VIASH_PAR_OUTSAMATTRIBUTES" ]; then + VIASH_PAR_OUTSAMATTRIBUTES="$2" + else + VIASH_PAR_OUTSAMATTRIBUTES="$VIASH_PAR_OUTSAMATTRIBUTES;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMattributes. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMattributes=*) + if [ -z "$VIASH_PAR_OUTSAMATTRIBUTES" ]; then + VIASH_PAR_OUTSAMATTRIBUTES=$(ViashRemoveFlags "$1") + else + VIASH_PAR_OUTSAMATTRIBUTES="$VIASH_PAR_OUTSAMATTRIBUTES;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --outSAMattrIHstart) + [ -n "$VIASH_PAR_OUTSAMATTRIHSTART" ] && ViashError Bad arguments for option \'--outSAMattrIHstart\': \'$VIASH_PAR_OUTSAMATTRIHSTART\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMATTRIHSTART="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMattrIHstart. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMattrIHstart=*) + [ -n "$VIASH_PAR_OUTSAMATTRIHSTART" ] && ViashError Bad arguments for option \'--outSAMattrIHstart=*\': \'$VIASH_PAR_OUTSAMATTRIHSTART\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMATTRIHSTART=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outSAMunmapped) + if [ -z "$VIASH_PAR_OUTSAMUNMAPPED" ]; then + VIASH_PAR_OUTSAMUNMAPPED="$2" + else + VIASH_PAR_OUTSAMUNMAPPED="$VIASH_PAR_OUTSAMUNMAPPED;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMunmapped. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMunmapped=*) + if [ -z "$VIASH_PAR_OUTSAMUNMAPPED" ]; then + VIASH_PAR_OUTSAMUNMAPPED=$(ViashRemoveFlags "$1") + else + VIASH_PAR_OUTSAMUNMAPPED="$VIASH_PAR_OUTSAMUNMAPPED;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --outSAMorder) + [ -n "$VIASH_PAR_OUTSAMORDER" ] && ViashError Bad arguments for option \'--outSAMorder\': \'$VIASH_PAR_OUTSAMORDER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMORDER="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMorder. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMorder=*) + [ -n "$VIASH_PAR_OUTSAMORDER" ] && ViashError Bad arguments for option \'--outSAMorder=*\': \'$VIASH_PAR_OUTSAMORDER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMORDER=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outSAMprimaryFlag) + [ -n "$VIASH_PAR_OUTSAMPRIMARYFLAG" ] && ViashError Bad arguments for option \'--outSAMprimaryFlag\': \'$VIASH_PAR_OUTSAMPRIMARYFLAG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMPRIMARYFLAG="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMprimaryFlag. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMprimaryFlag=*) + [ -n "$VIASH_PAR_OUTSAMPRIMARYFLAG" ] && ViashError Bad arguments for option \'--outSAMprimaryFlag=*\': \'$VIASH_PAR_OUTSAMPRIMARYFLAG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMPRIMARYFLAG=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outSAMreadID) + [ -n "$VIASH_PAR_OUTSAMREADID" ] && ViashError Bad arguments for option \'--outSAMreadID\': \'$VIASH_PAR_OUTSAMREADID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMREADID="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMreadID. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMreadID=*) + [ -n "$VIASH_PAR_OUTSAMREADID" ] && ViashError Bad arguments for option \'--outSAMreadID=*\': \'$VIASH_PAR_OUTSAMREADID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMREADID=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outSAMmapqUnique) + [ -n "$VIASH_PAR_OUTSAMMAPQUNIQUE" ] && ViashError Bad arguments for option \'--outSAMmapqUnique\': \'$VIASH_PAR_OUTSAMMAPQUNIQUE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMMAPQUNIQUE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMmapqUnique. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMmapqUnique=*) + [ -n "$VIASH_PAR_OUTSAMMAPQUNIQUE" ] && ViashError Bad arguments for option \'--outSAMmapqUnique=*\': \'$VIASH_PAR_OUTSAMMAPQUNIQUE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMMAPQUNIQUE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outSAMflagOR) + [ -n "$VIASH_PAR_OUTSAMFLAGOR" ] && ViashError Bad arguments for option \'--outSAMflagOR\': \'$VIASH_PAR_OUTSAMFLAGOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMFLAGOR="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMflagOR. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMflagOR=*) + [ -n "$VIASH_PAR_OUTSAMFLAGOR" ] && ViashError Bad arguments for option \'--outSAMflagOR=*\': \'$VIASH_PAR_OUTSAMFLAGOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMFLAGOR=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outSAMflagAND) + [ -n "$VIASH_PAR_OUTSAMFLAGAND" ] && ViashError Bad arguments for option \'--outSAMflagAND\': \'$VIASH_PAR_OUTSAMFLAGAND\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMFLAGAND="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMflagAND. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMflagAND=*) + [ -n "$VIASH_PAR_OUTSAMFLAGAND" ] && ViashError Bad arguments for option \'--outSAMflagAND=*\': \'$VIASH_PAR_OUTSAMFLAGAND\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMFLAGAND=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outSAMattrRGline) + if [ -z "$VIASH_PAR_OUTSAMATTRRGLINE" ]; then + VIASH_PAR_OUTSAMATTRRGLINE="$2" + else + VIASH_PAR_OUTSAMATTRRGLINE="$VIASH_PAR_OUTSAMATTRRGLINE;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMattrRGline. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMattrRGline=*) + if [ -z "$VIASH_PAR_OUTSAMATTRRGLINE" ]; then + VIASH_PAR_OUTSAMATTRRGLINE=$(ViashRemoveFlags "$1") + else + VIASH_PAR_OUTSAMATTRRGLINE="$VIASH_PAR_OUTSAMATTRRGLINE;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --outSAMheaderHD) + if [ -z "$VIASH_PAR_OUTSAMHEADERHD" ]; then + VIASH_PAR_OUTSAMHEADERHD="$2" + else + VIASH_PAR_OUTSAMHEADERHD="$VIASH_PAR_OUTSAMHEADERHD;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMheaderHD. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMheaderHD=*) + if [ -z "$VIASH_PAR_OUTSAMHEADERHD" ]; then + VIASH_PAR_OUTSAMHEADERHD=$(ViashRemoveFlags "$1") + else + VIASH_PAR_OUTSAMHEADERHD="$VIASH_PAR_OUTSAMHEADERHD;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --outSAMheaderPG) + if [ -z "$VIASH_PAR_OUTSAMHEADERPG" ]; then + VIASH_PAR_OUTSAMHEADERPG="$2" + else + VIASH_PAR_OUTSAMHEADERPG="$VIASH_PAR_OUTSAMHEADERPG;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMheaderPG. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMheaderPG=*) + if [ -z "$VIASH_PAR_OUTSAMHEADERPG" ]; then + VIASH_PAR_OUTSAMHEADERPG=$(ViashRemoveFlags "$1") + else + VIASH_PAR_OUTSAMHEADERPG="$VIASH_PAR_OUTSAMHEADERPG;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --outSAMheaderCommentFile) + [ -n "$VIASH_PAR_OUTSAMHEADERCOMMENTFILE" ] && ViashError Bad arguments for option \'--outSAMheaderCommentFile\': \'$VIASH_PAR_OUTSAMHEADERCOMMENTFILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMHEADERCOMMENTFILE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMheaderCommentFile. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMheaderCommentFile=*) + [ -n "$VIASH_PAR_OUTSAMHEADERCOMMENTFILE" ] && ViashError Bad arguments for option \'--outSAMheaderCommentFile=*\': \'$VIASH_PAR_OUTSAMHEADERCOMMENTFILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMHEADERCOMMENTFILE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outSAMfilter) + if [ -z "$VIASH_PAR_OUTSAMFILTER" ]; then + VIASH_PAR_OUTSAMFILTER="$2" + else + VIASH_PAR_OUTSAMFILTER="$VIASH_PAR_OUTSAMFILTER;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMfilter. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMfilter=*) + if [ -z "$VIASH_PAR_OUTSAMFILTER" ]; then + VIASH_PAR_OUTSAMFILTER=$(ViashRemoveFlags "$1") + else + VIASH_PAR_OUTSAMFILTER="$VIASH_PAR_OUTSAMFILTER;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --outSAMmultNmax) + [ -n "$VIASH_PAR_OUTSAMMULTNMAX" ] && ViashError Bad arguments for option \'--outSAMmultNmax\': \'$VIASH_PAR_OUTSAMMULTNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMMULTNMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMmultNmax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMmultNmax=*) + [ -n "$VIASH_PAR_OUTSAMMULTNMAX" ] && ViashError Bad arguments for option \'--outSAMmultNmax=*\': \'$VIASH_PAR_OUTSAMMULTNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMMULTNMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outSAMtlen) + [ -n "$VIASH_PAR_OUTSAMTLEN" ] && ViashError Bad arguments for option \'--outSAMtlen\': \'$VIASH_PAR_OUTSAMTLEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMTLEN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSAMtlen. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSAMtlen=*) + [ -n "$VIASH_PAR_OUTSAMTLEN" ] && ViashError Bad arguments for option \'--outSAMtlen=*\': \'$VIASH_PAR_OUTSAMTLEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSAMTLEN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outBAMcompression) + [ -n "$VIASH_PAR_OUTBAMCOMPRESSION" ] && ViashError Bad arguments for option \'--outBAMcompression\': \'$VIASH_PAR_OUTBAMCOMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTBAMCOMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outBAMcompression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outBAMcompression=*) + [ -n "$VIASH_PAR_OUTBAMCOMPRESSION" ] && ViashError Bad arguments for option \'--outBAMcompression=*\': \'$VIASH_PAR_OUTBAMCOMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTBAMCOMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outBAMsortingThreadN) + [ -n "$VIASH_PAR_OUTBAMSORTINGTHREADN" ] && ViashError Bad arguments for option \'--outBAMsortingThreadN\': \'$VIASH_PAR_OUTBAMSORTINGTHREADN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTBAMSORTINGTHREADN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outBAMsortingThreadN. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outBAMsortingThreadN=*) + [ -n "$VIASH_PAR_OUTBAMSORTINGTHREADN" ] && ViashError Bad arguments for option \'--outBAMsortingThreadN=*\': \'$VIASH_PAR_OUTBAMSORTINGTHREADN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTBAMSORTINGTHREADN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outBAMsortingBinsN) + [ -n "$VIASH_PAR_OUTBAMSORTINGBINSN" ] && ViashError Bad arguments for option \'--outBAMsortingBinsN\': \'$VIASH_PAR_OUTBAMSORTINGBINSN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTBAMSORTINGBINSN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outBAMsortingBinsN. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outBAMsortingBinsN=*) + [ -n "$VIASH_PAR_OUTBAMSORTINGBINSN" ] && ViashError Bad arguments for option \'--outBAMsortingBinsN=*\': \'$VIASH_PAR_OUTBAMSORTINGBINSN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTBAMSORTINGBINSN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --bamRemoveDuplicatesType) + [ -n "$VIASH_PAR_BAMREMOVEDUPLICATESTYPE" ] && ViashError Bad arguments for option \'--bamRemoveDuplicatesType\': \'$VIASH_PAR_BAMREMOVEDUPLICATESTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BAMREMOVEDUPLICATESTYPE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --bamRemoveDuplicatesType. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --bamRemoveDuplicatesType=*) + [ -n "$VIASH_PAR_BAMREMOVEDUPLICATESTYPE" ] && ViashError Bad arguments for option \'--bamRemoveDuplicatesType=*\': \'$VIASH_PAR_BAMREMOVEDUPLICATESTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BAMREMOVEDUPLICATESTYPE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --bamRemoveDuplicatesMate2basesN) + [ -n "$VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN" ] && ViashError Bad arguments for option \'--bamRemoveDuplicatesMate2basesN\': \'$VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --bamRemoveDuplicatesMate2basesN. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --bamRemoveDuplicatesMate2basesN=*) + [ -n "$VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN" ] && ViashError Bad arguments for option \'--bamRemoveDuplicatesMate2basesN=*\': \'$VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outWigType) + if [ -z "$VIASH_PAR_OUTWIGTYPE" ]; then + VIASH_PAR_OUTWIGTYPE="$2" + else + VIASH_PAR_OUTWIGTYPE="$VIASH_PAR_OUTWIGTYPE;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outWigType. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outWigType=*) + if [ -z "$VIASH_PAR_OUTWIGTYPE" ]; then + VIASH_PAR_OUTWIGTYPE=$(ViashRemoveFlags "$1") + else + VIASH_PAR_OUTWIGTYPE="$VIASH_PAR_OUTWIGTYPE;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --outWigStrand) + [ -n "$VIASH_PAR_OUTWIGSTRAND" ] && ViashError Bad arguments for option \'--outWigStrand\': \'$VIASH_PAR_OUTWIGSTRAND\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTWIGSTRAND="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outWigStrand. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outWigStrand=*) + [ -n "$VIASH_PAR_OUTWIGSTRAND" ] && ViashError Bad arguments for option \'--outWigStrand=*\': \'$VIASH_PAR_OUTWIGSTRAND\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTWIGSTRAND=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outWigReferencesPrefix) + [ -n "$VIASH_PAR_OUTWIGREFERENCESPREFIX" ] && ViashError Bad arguments for option \'--outWigReferencesPrefix\': \'$VIASH_PAR_OUTWIGREFERENCESPREFIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTWIGREFERENCESPREFIX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outWigReferencesPrefix. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outWigReferencesPrefix=*) + [ -n "$VIASH_PAR_OUTWIGREFERENCESPREFIX" ] && ViashError Bad arguments for option \'--outWigReferencesPrefix=*\': \'$VIASH_PAR_OUTWIGREFERENCESPREFIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTWIGREFERENCESPREFIX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outWigNorm) + [ -n "$VIASH_PAR_OUTWIGNORM" ] && ViashError Bad arguments for option \'--outWigNorm\': \'$VIASH_PAR_OUTWIGNORM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTWIGNORM="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outWigNorm. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outWigNorm=*) + [ -n "$VIASH_PAR_OUTWIGNORM" ] && ViashError Bad arguments for option \'--outWigNorm=*\': \'$VIASH_PAR_OUTWIGNORM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTWIGNORM=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outFilterType) + [ -n "$VIASH_PAR_OUTFILTERTYPE" ] && ViashError Bad arguments for option \'--outFilterType\': \'$VIASH_PAR_OUTFILTERTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERTYPE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterType. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outFilterType=*) + [ -n "$VIASH_PAR_OUTFILTERTYPE" ] && ViashError Bad arguments for option \'--outFilterType=*\': \'$VIASH_PAR_OUTFILTERTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERTYPE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outFilterMultimapScoreRange) + [ -n "$VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE" ] && ViashError Bad arguments for option \'--outFilterMultimapScoreRange\': \'$VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterMultimapScoreRange. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outFilterMultimapScoreRange=*) + [ -n "$VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE" ] && ViashError Bad arguments for option \'--outFilterMultimapScoreRange=*\': \'$VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outFilterMultimapNmax) + [ -n "$VIASH_PAR_OUTFILTERMULTIMAPNMAX" ] && ViashError Bad arguments for option \'--outFilterMultimapNmax\': \'$VIASH_PAR_OUTFILTERMULTIMAPNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERMULTIMAPNMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterMultimapNmax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outFilterMultimapNmax=*) + [ -n "$VIASH_PAR_OUTFILTERMULTIMAPNMAX" ] && ViashError Bad arguments for option \'--outFilterMultimapNmax=*\': \'$VIASH_PAR_OUTFILTERMULTIMAPNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERMULTIMAPNMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outFilterMismatchNmax) + [ -n "$VIASH_PAR_OUTFILTERMISMATCHNMAX" ] && ViashError Bad arguments for option \'--outFilterMismatchNmax\': \'$VIASH_PAR_OUTFILTERMISMATCHNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERMISMATCHNMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterMismatchNmax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outFilterMismatchNmax=*) + [ -n "$VIASH_PAR_OUTFILTERMISMATCHNMAX" ] && ViashError Bad arguments for option \'--outFilterMismatchNmax=*\': \'$VIASH_PAR_OUTFILTERMISMATCHNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERMISMATCHNMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outFilterMismatchNoverLmax) + [ -n "$VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX" ] && ViashError Bad arguments for option \'--outFilterMismatchNoverLmax\': \'$VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterMismatchNoverLmax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outFilterMismatchNoverLmax=*) + [ -n "$VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX" ] && ViashError Bad arguments for option \'--outFilterMismatchNoverLmax=*\': \'$VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outFilterMismatchNoverReadLmax) + [ -n "$VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX" ] && ViashError Bad arguments for option \'--outFilterMismatchNoverReadLmax\': \'$VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterMismatchNoverReadLmax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outFilterMismatchNoverReadLmax=*) + [ -n "$VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX" ] && ViashError Bad arguments for option \'--outFilterMismatchNoverReadLmax=*\': \'$VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outFilterScoreMin) + [ -n "$VIASH_PAR_OUTFILTERSCOREMIN" ] && ViashError Bad arguments for option \'--outFilterScoreMin\': \'$VIASH_PAR_OUTFILTERSCOREMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERSCOREMIN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterScoreMin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outFilterScoreMin=*) + [ -n "$VIASH_PAR_OUTFILTERSCOREMIN" ] && ViashError Bad arguments for option \'--outFilterScoreMin=*\': \'$VIASH_PAR_OUTFILTERSCOREMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERSCOREMIN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outFilterScoreMinOverLread) + [ -n "$VIASH_PAR_OUTFILTERSCOREMINOVERLREAD" ] && ViashError Bad arguments for option \'--outFilterScoreMinOverLread\': \'$VIASH_PAR_OUTFILTERSCOREMINOVERLREAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERSCOREMINOVERLREAD="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterScoreMinOverLread. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outFilterScoreMinOverLread=*) + [ -n "$VIASH_PAR_OUTFILTERSCOREMINOVERLREAD" ] && ViashError Bad arguments for option \'--outFilterScoreMinOverLread=*\': \'$VIASH_PAR_OUTFILTERSCOREMINOVERLREAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERSCOREMINOVERLREAD=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outFilterMatchNmin) + [ -n "$VIASH_PAR_OUTFILTERMATCHNMIN" ] && ViashError Bad arguments for option \'--outFilterMatchNmin\': \'$VIASH_PAR_OUTFILTERMATCHNMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERMATCHNMIN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterMatchNmin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outFilterMatchNmin=*) + [ -n "$VIASH_PAR_OUTFILTERMATCHNMIN" ] && ViashError Bad arguments for option \'--outFilterMatchNmin=*\': \'$VIASH_PAR_OUTFILTERMATCHNMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERMATCHNMIN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outFilterMatchNminOverLread) + [ -n "$VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD" ] && ViashError Bad arguments for option \'--outFilterMatchNminOverLread\': \'$VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterMatchNminOverLread. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outFilterMatchNminOverLread=*) + [ -n "$VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD" ] && ViashError Bad arguments for option \'--outFilterMatchNminOverLread=*\': \'$VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outFilterIntronMotifs) + [ -n "$VIASH_PAR_OUTFILTERINTRONMOTIFS" ] && ViashError Bad arguments for option \'--outFilterIntronMotifs\': \'$VIASH_PAR_OUTFILTERINTRONMOTIFS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERINTRONMOTIFS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterIntronMotifs. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outFilterIntronMotifs=*) + [ -n "$VIASH_PAR_OUTFILTERINTRONMOTIFS" ] && ViashError Bad arguments for option \'--outFilterIntronMotifs=*\': \'$VIASH_PAR_OUTFILTERINTRONMOTIFS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERINTRONMOTIFS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outFilterIntronStrands) + [ -n "$VIASH_PAR_OUTFILTERINTRONSTRANDS" ] && ViashError Bad arguments for option \'--outFilterIntronStrands\': \'$VIASH_PAR_OUTFILTERINTRONSTRANDS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERINTRONSTRANDS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outFilterIntronStrands. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outFilterIntronStrands=*) + [ -n "$VIASH_PAR_OUTFILTERINTRONSTRANDS" ] && ViashError Bad arguments for option \'--outFilterIntronStrands=*\': \'$VIASH_PAR_OUTFILTERINTRONSTRANDS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTFILTERINTRONSTRANDS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outSJtype) + [ -n "$VIASH_PAR_OUTSJTYPE" ] && ViashError Bad arguments for option \'--outSJtype\': \'$VIASH_PAR_OUTSJTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSJTYPE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSJtype. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSJtype=*) + [ -n "$VIASH_PAR_OUTSJTYPE" ] && ViashError Bad arguments for option \'--outSJtype=*\': \'$VIASH_PAR_OUTSJTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSJTYPE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outSJfilterReads) + [ -n "$VIASH_PAR_OUTSJFILTERREADS" ] && ViashError Bad arguments for option \'--outSJfilterReads\': \'$VIASH_PAR_OUTSJFILTERREADS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSJFILTERREADS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSJfilterReads. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSJfilterReads=*) + [ -n "$VIASH_PAR_OUTSJFILTERREADS" ] && ViashError Bad arguments for option \'--outSJfilterReads=*\': \'$VIASH_PAR_OUTSJFILTERREADS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTSJFILTERREADS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --outSJfilterOverhangMin) + if [ -z "$VIASH_PAR_OUTSJFILTEROVERHANGMIN" ]; then + VIASH_PAR_OUTSJFILTEROVERHANGMIN="$2" + else + VIASH_PAR_OUTSJFILTEROVERHANGMIN="$VIASH_PAR_OUTSJFILTEROVERHANGMIN;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSJfilterOverhangMin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSJfilterOverhangMin=*) + if [ -z "$VIASH_PAR_OUTSJFILTEROVERHANGMIN" ]; then + VIASH_PAR_OUTSJFILTEROVERHANGMIN=$(ViashRemoveFlags "$1") + else + VIASH_PAR_OUTSJFILTEROVERHANGMIN="$VIASH_PAR_OUTSJFILTEROVERHANGMIN;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --outSJfilterCountUniqueMin) + if [ -z "$VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN" ]; then + VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN="$2" + else + VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN="$VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSJfilterCountUniqueMin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSJfilterCountUniqueMin=*) + if [ -z "$VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN" ]; then + VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN=$(ViashRemoveFlags "$1") + else + VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN="$VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --outSJfilterCountTotalMin) + if [ -z "$VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN" ]; then + VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN="$2" + else + VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN="$VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSJfilterCountTotalMin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSJfilterCountTotalMin=*) + if [ -z "$VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN" ]; then + VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN=$(ViashRemoveFlags "$1") + else + VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN="$VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --outSJfilterDistToOtherSJmin) + if [ -z "$VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN" ]; then + VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN="$2" + else + VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN="$VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSJfilterDistToOtherSJmin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSJfilterDistToOtherSJmin=*) + if [ -z "$VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN" ]; then + VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN=$(ViashRemoveFlags "$1") + else + VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN="$VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --outSJfilterIntronMaxVsReadN) + if [ -z "$VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN" ]; then + VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN="$2" + else + VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN="$VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --outSJfilterIntronMaxVsReadN. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --outSJfilterIntronMaxVsReadN=*) + if [ -z "$VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN" ]; then + VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN=$(ViashRemoveFlags "$1") + else + VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN="$VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --scoreGap) + [ -n "$VIASH_PAR_SCOREGAP" ] && ViashError Bad arguments for option \'--scoreGap\': \'$VIASH_PAR_SCOREGAP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCOREGAP="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreGap. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --scoreGap=*) + [ -n "$VIASH_PAR_SCOREGAP" ] && ViashError Bad arguments for option \'--scoreGap=*\': \'$VIASH_PAR_SCOREGAP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCOREGAP=$(ViashRemoveFlags "$1") + shift 1 + ;; + --scoreGapNoncan) + [ -n "$VIASH_PAR_SCOREGAPNONCAN" ] && ViashError Bad arguments for option \'--scoreGapNoncan\': \'$VIASH_PAR_SCOREGAPNONCAN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCOREGAPNONCAN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreGapNoncan. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --scoreGapNoncan=*) + [ -n "$VIASH_PAR_SCOREGAPNONCAN" ] && ViashError Bad arguments for option \'--scoreGapNoncan=*\': \'$VIASH_PAR_SCOREGAPNONCAN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCOREGAPNONCAN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --scoreGapGCAG) + [ -n "$VIASH_PAR_SCOREGAPGCAG" ] && ViashError Bad arguments for option \'--scoreGapGCAG\': \'$VIASH_PAR_SCOREGAPGCAG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCOREGAPGCAG="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreGapGCAG. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --scoreGapGCAG=*) + [ -n "$VIASH_PAR_SCOREGAPGCAG" ] && ViashError Bad arguments for option \'--scoreGapGCAG=*\': \'$VIASH_PAR_SCOREGAPGCAG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCOREGAPGCAG=$(ViashRemoveFlags "$1") + shift 1 + ;; + --scoreGapATAC) + [ -n "$VIASH_PAR_SCOREGAPATAC" ] && ViashError Bad arguments for option \'--scoreGapATAC\': \'$VIASH_PAR_SCOREGAPATAC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCOREGAPATAC="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreGapATAC. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --scoreGapATAC=*) + [ -n "$VIASH_PAR_SCOREGAPATAC" ] && ViashError Bad arguments for option \'--scoreGapATAC=*\': \'$VIASH_PAR_SCOREGAPATAC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCOREGAPATAC=$(ViashRemoveFlags "$1") + shift 1 + ;; + --scoreGenomicLengthLog2scale) + [ -n "$VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE" ] && ViashError Bad arguments for option \'--scoreGenomicLengthLog2scale\': \'$VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreGenomicLengthLog2scale. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --scoreGenomicLengthLog2scale=*) + [ -n "$VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE" ] && ViashError Bad arguments for option \'--scoreGenomicLengthLog2scale=*\': \'$VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --scoreDelOpen) + [ -n "$VIASH_PAR_SCOREDELOPEN" ] && ViashError Bad arguments for option \'--scoreDelOpen\': \'$VIASH_PAR_SCOREDELOPEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCOREDELOPEN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreDelOpen. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --scoreDelOpen=*) + [ -n "$VIASH_PAR_SCOREDELOPEN" ] && ViashError Bad arguments for option \'--scoreDelOpen=*\': \'$VIASH_PAR_SCOREDELOPEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCOREDELOPEN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --scoreDelBase) + [ -n "$VIASH_PAR_SCOREDELBASE" ] && ViashError Bad arguments for option \'--scoreDelBase\': \'$VIASH_PAR_SCOREDELBASE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCOREDELBASE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreDelBase. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --scoreDelBase=*) + [ -n "$VIASH_PAR_SCOREDELBASE" ] && ViashError Bad arguments for option \'--scoreDelBase=*\': \'$VIASH_PAR_SCOREDELBASE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCOREDELBASE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --scoreInsOpen) + [ -n "$VIASH_PAR_SCOREINSOPEN" ] && ViashError Bad arguments for option \'--scoreInsOpen\': \'$VIASH_PAR_SCOREINSOPEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCOREINSOPEN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreInsOpen. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --scoreInsOpen=*) + [ -n "$VIASH_PAR_SCOREINSOPEN" ] && ViashError Bad arguments for option \'--scoreInsOpen=*\': \'$VIASH_PAR_SCOREINSOPEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCOREINSOPEN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --scoreInsBase) + [ -n "$VIASH_PAR_SCOREINSBASE" ] && ViashError Bad arguments for option \'--scoreInsBase\': \'$VIASH_PAR_SCOREINSBASE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCOREINSBASE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreInsBase. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --scoreInsBase=*) + [ -n "$VIASH_PAR_SCOREINSBASE" ] && ViashError Bad arguments for option \'--scoreInsBase=*\': \'$VIASH_PAR_SCOREINSBASE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCOREINSBASE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --scoreStitchSJshift) + [ -n "$VIASH_PAR_SCORESTITCHSJSHIFT" ] && ViashError Bad arguments for option \'--scoreStitchSJshift\': \'$VIASH_PAR_SCORESTITCHSJSHIFT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCORESTITCHSJSHIFT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --scoreStitchSJshift. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --scoreStitchSJshift=*) + [ -n "$VIASH_PAR_SCORESTITCHSJSHIFT" ] && ViashError Bad arguments for option \'--scoreStitchSJshift=*\': \'$VIASH_PAR_SCORESTITCHSJSHIFT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCORESTITCHSJSHIFT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --seedSearchStartLmax) + [ -n "$VIASH_PAR_SEEDSEARCHSTARTLMAX" ] && ViashError Bad arguments for option \'--seedSearchStartLmax\': \'$VIASH_PAR_SEEDSEARCHSTARTLMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEEDSEARCHSTARTLMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --seedSearchStartLmax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --seedSearchStartLmax=*) + [ -n "$VIASH_PAR_SEEDSEARCHSTARTLMAX" ] && ViashError Bad arguments for option \'--seedSearchStartLmax=*\': \'$VIASH_PAR_SEEDSEARCHSTARTLMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEEDSEARCHSTARTLMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --seedSearchStartLmaxOverLread) + [ -n "$VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD" ] && ViashError Bad arguments for option \'--seedSearchStartLmaxOverLread\': \'$VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --seedSearchStartLmaxOverLread. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --seedSearchStartLmaxOverLread=*) + [ -n "$VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD" ] && ViashError Bad arguments for option \'--seedSearchStartLmaxOverLread=*\': \'$VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD=$(ViashRemoveFlags "$1") + shift 1 + ;; + --seedSearchLmax) + [ -n "$VIASH_PAR_SEEDSEARCHLMAX" ] && ViashError Bad arguments for option \'--seedSearchLmax\': \'$VIASH_PAR_SEEDSEARCHLMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEEDSEARCHLMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --seedSearchLmax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --seedSearchLmax=*) + [ -n "$VIASH_PAR_SEEDSEARCHLMAX" ] && ViashError Bad arguments for option \'--seedSearchLmax=*\': \'$VIASH_PAR_SEEDSEARCHLMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEEDSEARCHLMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --seedMultimapNmax) + [ -n "$VIASH_PAR_SEEDMULTIMAPNMAX" ] && ViashError Bad arguments for option \'--seedMultimapNmax\': \'$VIASH_PAR_SEEDMULTIMAPNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEEDMULTIMAPNMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --seedMultimapNmax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --seedMultimapNmax=*) + [ -n "$VIASH_PAR_SEEDMULTIMAPNMAX" ] && ViashError Bad arguments for option \'--seedMultimapNmax=*\': \'$VIASH_PAR_SEEDMULTIMAPNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEEDMULTIMAPNMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --seedPerReadNmax) + [ -n "$VIASH_PAR_SEEDPERREADNMAX" ] && ViashError Bad arguments for option \'--seedPerReadNmax\': \'$VIASH_PAR_SEEDPERREADNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEEDPERREADNMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --seedPerReadNmax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --seedPerReadNmax=*) + [ -n "$VIASH_PAR_SEEDPERREADNMAX" ] && ViashError Bad arguments for option \'--seedPerReadNmax=*\': \'$VIASH_PAR_SEEDPERREADNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEEDPERREADNMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --seedPerWindowNmax) + [ -n "$VIASH_PAR_SEEDPERWINDOWNMAX" ] && ViashError Bad arguments for option \'--seedPerWindowNmax\': \'$VIASH_PAR_SEEDPERWINDOWNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEEDPERWINDOWNMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --seedPerWindowNmax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --seedPerWindowNmax=*) + [ -n "$VIASH_PAR_SEEDPERWINDOWNMAX" ] && ViashError Bad arguments for option \'--seedPerWindowNmax=*\': \'$VIASH_PAR_SEEDPERWINDOWNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEEDPERWINDOWNMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --seedNoneLociPerWindow) + [ -n "$VIASH_PAR_SEEDNONELOCIPERWINDOW" ] && ViashError Bad arguments for option \'--seedNoneLociPerWindow\': \'$VIASH_PAR_SEEDNONELOCIPERWINDOW\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEEDNONELOCIPERWINDOW="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --seedNoneLociPerWindow. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --seedNoneLociPerWindow=*) + [ -n "$VIASH_PAR_SEEDNONELOCIPERWINDOW" ] && ViashError Bad arguments for option \'--seedNoneLociPerWindow=*\': \'$VIASH_PAR_SEEDNONELOCIPERWINDOW\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEEDNONELOCIPERWINDOW=$(ViashRemoveFlags "$1") + shift 1 + ;; + --seedSplitMin) + [ -n "$VIASH_PAR_SEEDSPLITMIN" ] && ViashError Bad arguments for option \'--seedSplitMin\': \'$VIASH_PAR_SEEDSPLITMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEEDSPLITMIN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --seedSplitMin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --seedSplitMin=*) + [ -n "$VIASH_PAR_SEEDSPLITMIN" ] && ViashError Bad arguments for option \'--seedSplitMin=*\': \'$VIASH_PAR_SEEDSPLITMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEEDSPLITMIN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --seedMapMin) + [ -n "$VIASH_PAR_SEEDMAPMIN" ] && ViashError Bad arguments for option \'--seedMapMin\': \'$VIASH_PAR_SEEDMAPMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEEDMAPMIN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --seedMapMin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --seedMapMin=*) + [ -n "$VIASH_PAR_SEEDMAPMIN" ] && ViashError Bad arguments for option \'--seedMapMin=*\': \'$VIASH_PAR_SEEDMAPMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEEDMAPMIN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --alignIntronMin) + [ -n "$VIASH_PAR_ALIGNINTRONMIN" ] && ViashError Bad arguments for option \'--alignIntronMin\': \'$VIASH_PAR_ALIGNINTRONMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNINTRONMIN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignIntronMin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --alignIntronMin=*) + [ -n "$VIASH_PAR_ALIGNINTRONMIN" ] && ViashError Bad arguments for option \'--alignIntronMin=*\': \'$VIASH_PAR_ALIGNINTRONMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNINTRONMIN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --alignIntronMax) + [ -n "$VIASH_PAR_ALIGNINTRONMAX" ] && ViashError Bad arguments for option \'--alignIntronMax\': \'$VIASH_PAR_ALIGNINTRONMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNINTRONMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignIntronMax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --alignIntronMax=*) + [ -n "$VIASH_PAR_ALIGNINTRONMAX" ] && ViashError Bad arguments for option \'--alignIntronMax=*\': \'$VIASH_PAR_ALIGNINTRONMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNINTRONMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --alignMatesGapMax) + [ -n "$VIASH_PAR_ALIGNMATESGAPMAX" ] && ViashError Bad arguments for option \'--alignMatesGapMax\': \'$VIASH_PAR_ALIGNMATESGAPMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNMATESGAPMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignMatesGapMax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --alignMatesGapMax=*) + [ -n "$VIASH_PAR_ALIGNMATESGAPMAX" ] && ViashError Bad arguments for option \'--alignMatesGapMax=*\': \'$VIASH_PAR_ALIGNMATESGAPMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNMATESGAPMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --alignSJoverhangMin) + [ -n "$VIASH_PAR_ALIGNSJOVERHANGMIN" ] && ViashError Bad arguments for option \'--alignSJoverhangMin\': \'$VIASH_PAR_ALIGNSJOVERHANGMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNSJOVERHANGMIN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignSJoverhangMin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --alignSJoverhangMin=*) + [ -n "$VIASH_PAR_ALIGNSJOVERHANGMIN" ] && ViashError Bad arguments for option \'--alignSJoverhangMin=*\': \'$VIASH_PAR_ALIGNSJOVERHANGMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNSJOVERHANGMIN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --alignSJstitchMismatchNmax) + if [ -z "$VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX" ]; then + VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX="$2" + else + VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX="$VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignSJstitchMismatchNmax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --alignSJstitchMismatchNmax=*) + if [ -z "$VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX" ]; then + VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX=$(ViashRemoveFlags "$1") + else + VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX="$VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --alignSJDBoverhangMin) + [ -n "$VIASH_PAR_ALIGNSJDBOVERHANGMIN" ] && ViashError Bad arguments for option \'--alignSJDBoverhangMin\': \'$VIASH_PAR_ALIGNSJDBOVERHANGMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNSJDBOVERHANGMIN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignSJDBoverhangMin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --alignSJDBoverhangMin=*) + [ -n "$VIASH_PAR_ALIGNSJDBOVERHANGMIN" ] && ViashError Bad arguments for option \'--alignSJDBoverhangMin=*\': \'$VIASH_PAR_ALIGNSJDBOVERHANGMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNSJDBOVERHANGMIN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --alignSplicedMateMapLmin) + [ -n "$VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN" ] && ViashError Bad arguments for option \'--alignSplicedMateMapLmin\': \'$VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignSplicedMateMapLmin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --alignSplicedMateMapLmin=*) + [ -n "$VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN" ] && ViashError Bad arguments for option \'--alignSplicedMateMapLmin=*\': \'$VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --alignSplicedMateMapLminOverLmate) + [ -n "$VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE" ] && ViashError Bad arguments for option \'--alignSplicedMateMapLminOverLmate\': \'$VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignSplicedMateMapLminOverLmate. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --alignSplicedMateMapLminOverLmate=*) + [ -n "$VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE" ] && ViashError Bad arguments for option \'--alignSplicedMateMapLminOverLmate=*\': \'$VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --alignWindowsPerReadNmax) + [ -n "$VIASH_PAR_ALIGNWINDOWSPERREADNMAX" ] && ViashError Bad arguments for option \'--alignWindowsPerReadNmax\': \'$VIASH_PAR_ALIGNWINDOWSPERREADNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNWINDOWSPERREADNMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignWindowsPerReadNmax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --alignWindowsPerReadNmax=*) + [ -n "$VIASH_PAR_ALIGNWINDOWSPERREADNMAX" ] && ViashError Bad arguments for option \'--alignWindowsPerReadNmax=*\': \'$VIASH_PAR_ALIGNWINDOWSPERREADNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNWINDOWSPERREADNMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --alignTranscriptsPerWindowNmax) + [ -n "$VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX" ] && ViashError Bad arguments for option \'--alignTranscriptsPerWindowNmax\': \'$VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignTranscriptsPerWindowNmax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --alignTranscriptsPerWindowNmax=*) + [ -n "$VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX" ] && ViashError Bad arguments for option \'--alignTranscriptsPerWindowNmax=*\': \'$VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --alignTranscriptsPerReadNmax) + [ -n "$VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX" ] && ViashError Bad arguments for option \'--alignTranscriptsPerReadNmax\': \'$VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignTranscriptsPerReadNmax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --alignTranscriptsPerReadNmax=*) + [ -n "$VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX" ] && ViashError Bad arguments for option \'--alignTranscriptsPerReadNmax=*\': \'$VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --alignEndsType) + [ -n "$VIASH_PAR_ALIGNENDSTYPE" ] && ViashError Bad arguments for option \'--alignEndsType\': \'$VIASH_PAR_ALIGNENDSTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNENDSTYPE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignEndsType. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --alignEndsType=*) + [ -n "$VIASH_PAR_ALIGNENDSTYPE" ] && ViashError Bad arguments for option \'--alignEndsType=*\': \'$VIASH_PAR_ALIGNENDSTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNENDSTYPE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --alignEndsProtrude) + [ -n "$VIASH_PAR_ALIGNENDSPROTRUDE" ] && ViashError Bad arguments for option \'--alignEndsProtrude\': \'$VIASH_PAR_ALIGNENDSPROTRUDE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNENDSPROTRUDE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignEndsProtrude. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --alignEndsProtrude=*) + [ -n "$VIASH_PAR_ALIGNENDSPROTRUDE" ] && ViashError Bad arguments for option \'--alignEndsProtrude=*\': \'$VIASH_PAR_ALIGNENDSPROTRUDE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNENDSPROTRUDE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --alignSoftClipAtReferenceEnds) + [ -n "$VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS" ] && ViashError Bad arguments for option \'--alignSoftClipAtReferenceEnds\': \'$VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignSoftClipAtReferenceEnds. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --alignSoftClipAtReferenceEnds=*) + [ -n "$VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS" ] && ViashError Bad arguments for option \'--alignSoftClipAtReferenceEnds=*\': \'$VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --alignInsertionFlush) + [ -n "$VIASH_PAR_ALIGNINSERTIONFLUSH" ] && ViashError Bad arguments for option \'--alignInsertionFlush\': \'$VIASH_PAR_ALIGNINSERTIONFLUSH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNINSERTIONFLUSH="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --alignInsertionFlush. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --alignInsertionFlush=*) + [ -n "$VIASH_PAR_ALIGNINSERTIONFLUSH" ] && ViashError Bad arguments for option \'--alignInsertionFlush=*\': \'$VIASH_PAR_ALIGNINSERTIONFLUSH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ALIGNINSERTIONFLUSH=$(ViashRemoveFlags "$1") + shift 1 + ;; + --peOverlapNbasesMin) + [ -n "$VIASH_PAR_PEOVERLAPNBASESMIN" ] && ViashError Bad arguments for option \'--peOverlapNbasesMin\': \'$VIASH_PAR_PEOVERLAPNBASESMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PEOVERLAPNBASESMIN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --peOverlapNbasesMin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --peOverlapNbasesMin=*) + [ -n "$VIASH_PAR_PEOVERLAPNBASESMIN" ] && ViashError Bad arguments for option \'--peOverlapNbasesMin=*\': \'$VIASH_PAR_PEOVERLAPNBASESMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PEOVERLAPNBASESMIN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --peOverlapMMp) + [ -n "$VIASH_PAR_PEOVERLAPMMP" ] && ViashError Bad arguments for option \'--peOverlapMMp\': \'$VIASH_PAR_PEOVERLAPMMP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PEOVERLAPMMP="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --peOverlapMMp. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --peOverlapMMp=*) + [ -n "$VIASH_PAR_PEOVERLAPMMP" ] && ViashError Bad arguments for option \'--peOverlapMMp=*\': \'$VIASH_PAR_PEOVERLAPMMP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_PEOVERLAPMMP=$(ViashRemoveFlags "$1") + shift 1 + ;; + --winAnchorMultimapNmax) + [ -n "$VIASH_PAR_WINANCHORMULTIMAPNMAX" ] && ViashError Bad arguments for option \'--winAnchorMultimapNmax\': \'$VIASH_PAR_WINANCHORMULTIMAPNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WINANCHORMULTIMAPNMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --winAnchorMultimapNmax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --winAnchorMultimapNmax=*) + [ -n "$VIASH_PAR_WINANCHORMULTIMAPNMAX" ] && ViashError Bad arguments for option \'--winAnchorMultimapNmax=*\': \'$VIASH_PAR_WINANCHORMULTIMAPNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WINANCHORMULTIMAPNMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --winBinNbits) + [ -n "$VIASH_PAR_WINBINNBITS" ] && ViashError Bad arguments for option \'--winBinNbits\': \'$VIASH_PAR_WINBINNBITS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WINBINNBITS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --winBinNbits. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --winBinNbits=*) + [ -n "$VIASH_PAR_WINBINNBITS" ] && ViashError Bad arguments for option \'--winBinNbits=*\': \'$VIASH_PAR_WINBINNBITS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WINBINNBITS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --winAnchorDistNbins) + [ -n "$VIASH_PAR_WINANCHORDISTNBINS" ] && ViashError Bad arguments for option \'--winAnchorDistNbins\': \'$VIASH_PAR_WINANCHORDISTNBINS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WINANCHORDISTNBINS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --winAnchorDistNbins. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --winAnchorDistNbins=*) + [ -n "$VIASH_PAR_WINANCHORDISTNBINS" ] && ViashError Bad arguments for option \'--winAnchorDistNbins=*\': \'$VIASH_PAR_WINANCHORDISTNBINS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WINANCHORDISTNBINS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --winFlankNbins) + [ -n "$VIASH_PAR_WINFLANKNBINS" ] && ViashError Bad arguments for option \'--winFlankNbins\': \'$VIASH_PAR_WINFLANKNBINS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WINFLANKNBINS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --winFlankNbins. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --winFlankNbins=*) + [ -n "$VIASH_PAR_WINFLANKNBINS" ] && ViashError Bad arguments for option \'--winFlankNbins=*\': \'$VIASH_PAR_WINFLANKNBINS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WINFLANKNBINS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --winReadCoverageRelativeMin) + [ -n "$VIASH_PAR_WINREADCOVERAGERELATIVEMIN" ] && ViashError Bad arguments for option \'--winReadCoverageRelativeMin\': \'$VIASH_PAR_WINREADCOVERAGERELATIVEMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WINREADCOVERAGERELATIVEMIN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --winReadCoverageRelativeMin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --winReadCoverageRelativeMin=*) + [ -n "$VIASH_PAR_WINREADCOVERAGERELATIVEMIN" ] && ViashError Bad arguments for option \'--winReadCoverageRelativeMin=*\': \'$VIASH_PAR_WINREADCOVERAGERELATIVEMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WINREADCOVERAGERELATIVEMIN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --winReadCoverageBasesMin) + [ -n "$VIASH_PAR_WINREADCOVERAGEBASESMIN" ] && ViashError Bad arguments for option \'--winReadCoverageBasesMin\': \'$VIASH_PAR_WINREADCOVERAGEBASESMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WINREADCOVERAGEBASESMIN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --winReadCoverageBasesMin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --winReadCoverageBasesMin=*) + [ -n "$VIASH_PAR_WINREADCOVERAGEBASESMIN" ] && ViashError Bad arguments for option \'--winReadCoverageBasesMin=*\': \'$VIASH_PAR_WINREADCOVERAGEBASESMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WINREADCOVERAGEBASESMIN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --chimOutType) + if [ -z "$VIASH_PAR_CHIMOUTTYPE" ]; then + VIASH_PAR_CHIMOUTTYPE="$2" + else + VIASH_PAR_CHIMOUTTYPE="$VIASH_PAR_CHIMOUTTYPE;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimOutType. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --chimOutType=*) + if [ -z "$VIASH_PAR_CHIMOUTTYPE" ]; then + VIASH_PAR_CHIMOUTTYPE=$(ViashRemoveFlags "$1") + else + VIASH_PAR_CHIMOUTTYPE="$VIASH_PAR_CHIMOUTTYPE;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --chimSegmentMin) + [ -n "$VIASH_PAR_CHIMSEGMENTMIN" ] && ViashError Bad arguments for option \'--chimSegmentMin\': \'$VIASH_PAR_CHIMSEGMENTMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMSEGMENTMIN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimSegmentMin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --chimSegmentMin=*) + [ -n "$VIASH_PAR_CHIMSEGMENTMIN" ] && ViashError Bad arguments for option \'--chimSegmentMin=*\': \'$VIASH_PAR_CHIMSEGMENTMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMSEGMENTMIN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --chimScoreMin) + [ -n "$VIASH_PAR_CHIMSCOREMIN" ] && ViashError Bad arguments for option \'--chimScoreMin\': \'$VIASH_PAR_CHIMSCOREMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMSCOREMIN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimScoreMin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --chimScoreMin=*) + [ -n "$VIASH_PAR_CHIMSCOREMIN" ] && ViashError Bad arguments for option \'--chimScoreMin=*\': \'$VIASH_PAR_CHIMSCOREMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMSCOREMIN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --chimScoreDropMax) + [ -n "$VIASH_PAR_CHIMSCOREDROPMAX" ] && ViashError Bad arguments for option \'--chimScoreDropMax\': \'$VIASH_PAR_CHIMSCOREDROPMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMSCOREDROPMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimScoreDropMax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --chimScoreDropMax=*) + [ -n "$VIASH_PAR_CHIMSCOREDROPMAX" ] && ViashError Bad arguments for option \'--chimScoreDropMax=*\': \'$VIASH_PAR_CHIMSCOREDROPMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMSCOREDROPMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --chimScoreSeparation) + [ -n "$VIASH_PAR_CHIMSCORESEPARATION" ] && ViashError Bad arguments for option \'--chimScoreSeparation\': \'$VIASH_PAR_CHIMSCORESEPARATION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMSCORESEPARATION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimScoreSeparation. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --chimScoreSeparation=*) + [ -n "$VIASH_PAR_CHIMSCORESEPARATION" ] && ViashError Bad arguments for option \'--chimScoreSeparation=*\': \'$VIASH_PAR_CHIMSCORESEPARATION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMSCORESEPARATION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --chimScoreJunctionNonGTAG) + [ -n "$VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG" ] && ViashError Bad arguments for option \'--chimScoreJunctionNonGTAG\': \'$VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimScoreJunctionNonGTAG. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --chimScoreJunctionNonGTAG=*) + [ -n "$VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG" ] && ViashError Bad arguments for option \'--chimScoreJunctionNonGTAG=*\': \'$VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG=$(ViashRemoveFlags "$1") + shift 1 + ;; + --chimJunctionOverhangMin) + [ -n "$VIASH_PAR_CHIMJUNCTIONOVERHANGMIN" ] && ViashError Bad arguments for option \'--chimJunctionOverhangMin\': \'$VIASH_PAR_CHIMJUNCTIONOVERHANGMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMJUNCTIONOVERHANGMIN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimJunctionOverhangMin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --chimJunctionOverhangMin=*) + [ -n "$VIASH_PAR_CHIMJUNCTIONOVERHANGMIN" ] && ViashError Bad arguments for option \'--chimJunctionOverhangMin=*\': \'$VIASH_PAR_CHIMJUNCTIONOVERHANGMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMJUNCTIONOVERHANGMIN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --chimSegmentReadGapMax) + [ -n "$VIASH_PAR_CHIMSEGMENTREADGAPMAX" ] && ViashError Bad arguments for option \'--chimSegmentReadGapMax\': \'$VIASH_PAR_CHIMSEGMENTREADGAPMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMSEGMENTREADGAPMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimSegmentReadGapMax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --chimSegmentReadGapMax=*) + [ -n "$VIASH_PAR_CHIMSEGMENTREADGAPMAX" ] && ViashError Bad arguments for option \'--chimSegmentReadGapMax=*\': \'$VIASH_PAR_CHIMSEGMENTREADGAPMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMSEGMENTREADGAPMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --chimFilter) + if [ -z "$VIASH_PAR_CHIMFILTER" ]; then + VIASH_PAR_CHIMFILTER="$2" + else + VIASH_PAR_CHIMFILTER="$VIASH_PAR_CHIMFILTER;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimFilter. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --chimFilter=*) + if [ -z "$VIASH_PAR_CHIMFILTER" ]; then + VIASH_PAR_CHIMFILTER=$(ViashRemoveFlags "$1") + else + VIASH_PAR_CHIMFILTER="$VIASH_PAR_CHIMFILTER;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --chimMainSegmentMultNmax) + [ -n "$VIASH_PAR_CHIMMAINSEGMENTMULTNMAX" ] && ViashError Bad arguments for option \'--chimMainSegmentMultNmax\': \'$VIASH_PAR_CHIMMAINSEGMENTMULTNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMMAINSEGMENTMULTNMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimMainSegmentMultNmax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --chimMainSegmentMultNmax=*) + [ -n "$VIASH_PAR_CHIMMAINSEGMENTMULTNMAX" ] && ViashError Bad arguments for option \'--chimMainSegmentMultNmax=*\': \'$VIASH_PAR_CHIMMAINSEGMENTMULTNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMMAINSEGMENTMULTNMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --chimMultimapNmax) + [ -n "$VIASH_PAR_CHIMMULTIMAPNMAX" ] && ViashError Bad arguments for option \'--chimMultimapNmax\': \'$VIASH_PAR_CHIMMULTIMAPNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMMULTIMAPNMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimMultimapNmax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --chimMultimapNmax=*) + [ -n "$VIASH_PAR_CHIMMULTIMAPNMAX" ] && ViashError Bad arguments for option \'--chimMultimapNmax=*\': \'$VIASH_PAR_CHIMMULTIMAPNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMMULTIMAPNMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --chimMultimapScoreRange) + [ -n "$VIASH_PAR_CHIMMULTIMAPSCORERANGE" ] && ViashError Bad arguments for option \'--chimMultimapScoreRange\': \'$VIASH_PAR_CHIMMULTIMAPSCORERANGE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMMULTIMAPSCORERANGE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimMultimapScoreRange. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --chimMultimapScoreRange=*) + [ -n "$VIASH_PAR_CHIMMULTIMAPSCORERANGE" ] && ViashError Bad arguments for option \'--chimMultimapScoreRange=*\': \'$VIASH_PAR_CHIMMULTIMAPSCORERANGE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMMULTIMAPSCORERANGE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --chimNonchimScoreDropMin) + [ -n "$VIASH_PAR_CHIMNONCHIMSCOREDROPMIN" ] && ViashError Bad arguments for option \'--chimNonchimScoreDropMin\': \'$VIASH_PAR_CHIMNONCHIMSCOREDROPMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMNONCHIMSCOREDROPMIN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimNonchimScoreDropMin. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --chimNonchimScoreDropMin=*) + [ -n "$VIASH_PAR_CHIMNONCHIMSCOREDROPMIN" ] && ViashError Bad arguments for option \'--chimNonchimScoreDropMin=*\': \'$VIASH_PAR_CHIMNONCHIMSCOREDROPMIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMNONCHIMSCOREDROPMIN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --chimOutJunctionFormat) + [ -n "$VIASH_PAR_CHIMOUTJUNCTIONFORMAT" ] && ViashError Bad arguments for option \'--chimOutJunctionFormat\': \'$VIASH_PAR_CHIMOUTJUNCTIONFORMAT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMOUTJUNCTIONFORMAT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --chimOutJunctionFormat. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --chimOutJunctionFormat=*) + [ -n "$VIASH_PAR_CHIMOUTJUNCTIONFORMAT" ] && ViashError Bad arguments for option \'--chimOutJunctionFormat=*\': \'$VIASH_PAR_CHIMOUTJUNCTIONFORMAT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CHIMOUTJUNCTIONFORMAT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --quantMode) + if [ -z "$VIASH_PAR_QUANTMODE" ]; then + VIASH_PAR_QUANTMODE="$2" + else + VIASH_PAR_QUANTMODE="$VIASH_PAR_QUANTMODE;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --quantMode. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --quantMode=*) + if [ -z "$VIASH_PAR_QUANTMODE" ]; then + VIASH_PAR_QUANTMODE=$(ViashRemoveFlags "$1") + else + VIASH_PAR_QUANTMODE="$VIASH_PAR_QUANTMODE;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --quantTranscriptomeBAMcompression) + [ -n "$VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION" ] && ViashError Bad arguments for option \'--quantTranscriptomeBAMcompression\': \'$VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --quantTranscriptomeBAMcompression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --quantTranscriptomeBAMcompression=*) + [ -n "$VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION" ] && ViashError Bad arguments for option \'--quantTranscriptomeBAMcompression=*\': \'$VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --quantTranscriptomeBan) + [ -n "$VIASH_PAR_QUANTTRANSCRIPTOMEBAN" ] && ViashError Bad arguments for option \'--quantTranscriptomeBan\': \'$VIASH_PAR_QUANTTRANSCRIPTOMEBAN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUANTTRANSCRIPTOMEBAN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --quantTranscriptomeBan. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --quantTranscriptomeBan=*) + [ -n "$VIASH_PAR_QUANTTRANSCRIPTOMEBAN" ] && ViashError Bad arguments for option \'--quantTranscriptomeBan=*\': \'$VIASH_PAR_QUANTTRANSCRIPTOMEBAN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUANTTRANSCRIPTOMEBAN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --twopassMode) + [ -n "$VIASH_PAR_TWOPASSMODE" ] && ViashError Bad arguments for option \'--twopassMode\': \'$VIASH_PAR_TWOPASSMODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TWOPASSMODE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --twopassMode. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --twopassMode=*) + [ -n "$VIASH_PAR_TWOPASSMODE" ] && ViashError Bad arguments for option \'--twopassMode=*\': \'$VIASH_PAR_TWOPASSMODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TWOPASSMODE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --twopass1readsN) + [ -n "$VIASH_PAR_TWOPASS1READSN" ] && ViashError Bad arguments for option \'--twopass1readsN\': \'$VIASH_PAR_TWOPASS1READSN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TWOPASS1READSN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --twopass1readsN. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --twopass1readsN=*) + [ -n "$VIASH_PAR_TWOPASS1READSN" ] && ViashError Bad arguments for option \'--twopass1readsN=*\': \'$VIASH_PAR_TWOPASS1READSN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TWOPASS1READSN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --waspOutputMode) + [ -n "$VIASH_PAR_WASPOUTPUTMODE" ] && ViashError Bad arguments for option \'--waspOutputMode\': \'$VIASH_PAR_WASPOUTPUTMODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WASPOUTPUTMODE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --waspOutputMode. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --waspOutputMode=*) + [ -n "$VIASH_PAR_WASPOUTPUTMODE" ] && ViashError Bad arguments for option \'--waspOutputMode=*\': \'$VIASH_PAR_WASPOUTPUTMODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WASPOUTPUTMODE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --soloType) + if [ -z "$VIASH_PAR_SOLOTYPE" ]; then + VIASH_PAR_SOLOTYPE="$2" + else + VIASH_PAR_SOLOTYPE="$VIASH_PAR_SOLOTYPE;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloType. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloType=*) + if [ -z "$VIASH_PAR_SOLOTYPE" ]; then + VIASH_PAR_SOLOTYPE=$(ViashRemoveFlags "$1") + else + VIASH_PAR_SOLOTYPE="$VIASH_PAR_SOLOTYPE;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --soloCBwhitelist) + if [ -z "$VIASH_PAR_SOLOCBWHITELIST" ]; then + VIASH_PAR_SOLOCBWHITELIST="$2" + else + VIASH_PAR_SOLOCBWHITELIST="$VIASH_PAR_SOLOCBWHITELIST;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloCBwhitelist. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloCBwhitelist=*) + if [ -z "$VIASH_PAR_SOLOCBWHITELIST" ]; then + VIASH_PAR_SOLOCBWHITELIST=$(ViashRemoveFlags "$1") + else + VIASH_PAR_SOLOCBWHITELIST="$VIASH_PAR_SOLOCBWHITELIST;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --soloCBstart) + [ -n "$VIASH_PAR_SOLOCBSTART" ] && ViashError Bad arguments for option \'--soloCBstart\': \'$VIASH_PAR_SOLOCBSTART\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOCBSTART="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloCBstart. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloCBstart=*) + [ -n "$VIASH_PAR_SOLOCBSTART" ] && ViashError Bad arguments for option \'--soloCBstart=*\': \'$VIASH_PAR_SOLOCBSTART\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOCBSTART=$(ViashRemoveFlags "$1") + shift 1 + ;; + --soloCBlen) + [ -n "$VIASH_PAR_SOLOCBLEN" ] && ViashError Bad arguments for option \'--soloCBlen\': \'$VIASH_PAR_SOLOCBLEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOCBLEN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloCBlen. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloCBlen=*) + [ -n "$VIASH_PAR_SOLOCBLEN" ] && ViashError Bad arguments for option \'--soloCBlen=*\': \'$VIASH_PAR_SOLOCBLEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOCBLEN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --soloUMIstart) + [ -n "$VIASH_PAR_SOLOUMISTART" ] && ViashError Bad arguments for option \'--soloUMIstart\': \'$VIASH_PAR_SOLOUMISTART\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOUMISTART="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloUMIstart. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloUMIstart=*) + [ -n "$VIASH_PAR_SOLOUMISTART" ] && ViashError Bad arguments for option \'--soloUMIstart=*\': \'$VIASH_PAR_SOLOUMISTART\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOUMISTART=$(ViashRemoveFlags "$1") + shift 1 + ;; + --soloUMIlen) + [ -n "$VIASH_PAR_SOLOUMILEN" ] && ViashError Bad arguments for option \'--soloUMIlen\': \'$VIASH_PAR_SOLOUMILEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOUMILEN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloUMIlen. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloUMIlen=*) + [ -n "$VIASH_PAR_SOLOUMILEN" ] && ViashError Bad arguments for option \'--soloUMIlen=*\': \'$VIASH_PAR_SOLOUMILEN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOUMILEN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --soloBarcodeReadLength) + [ -n "$VIASH_PAR_SOLOBARCODEREADLENGTH" ] && ViashError Bad arguments for option \'--soloBarcodeReadLength\': \'$VIASH_PAR_SOLOBARCODEREADLENGTH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOBARCODEREADLENGTH="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloBarcodeReadLength. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloBarcodeReadLength=*) + [ -n "$VIASH_PAR_SOLOBARCODEREADLENGTH" ] && ViashError Bad arguments for option \'--soloBarcodeReadLength=*\': \'$VIASH_PAR_SOLOBARCODEREADLENGTH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOBARCODEREADLENGTH=$(ViashRemoveFlags "$1") + shift 1 + ;; + --soloBarcodeMate) + [ -n "$VIASH_PAR_SOLOBARCODEMATE" ] && ViashError Bad arguments for option \'--soloBarcodeMate\': \'$VIASH_PAR_SOLOBARCODEMATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOBARCODEMATE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloBarcodeMate. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloBarcodeMate=*) + [ -n "$VIASH_PAR_SOLOBARCODEMATE" ] && ViashError Bad arguments for option \'--soloBarcodeMate=*\': \'$VIASH_PAR_SOLOBARCODEMATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOBARCODEMATE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --soloCBposition) + if [ -z "$VIASH_PAR_SOLOCBPOSITION" ]; then + VIASH_PAR_SOLOCBPOSITION="$2" + else + VIASH_PAR_SOLOCBPOSITION="$VIASH_PAR_SOLOCBPOSITION;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloCBposition. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloCBposition=*) + if [ -z "$VIASH_PAR_SOLOCBPOSITION" ]; then + VIASH_PAR_SOLOCBPOSITION=$(ViashRemoveFlags "$1") + else + VIASH_PAR_SOLOCBPOSITION="$VIASH_PAR_SOLOCBPOSITION;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --soloUMIposition) + [ -n "$VIASH_PAR_SOLOUMIPOSITION" ] && ViashError Bad arguments for option \'--soloUMIposition\': \'$VIASH_PAR_SOLOUMIPOSITION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOUMIPOSITION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloUMIposition. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloUMIposition=*) + [ -n "$VIASH_PAR_SOLOUMIPOSITION" ] && ViashError Bad arguments for option \'--soloUMIposition=*\': \'$VIASH_PAR_SOLOUMIPOSITION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOUMIPOSITION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --soloAdapterSequence) + [ -n "$VIASH_PAR_SOLOADAPTERSEQUENCE" ] && ViashError Bad arguments for option \'--soloAdapterSequence\': \'$VIASH_PAR_SOLOADAPTERSEQUENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOADAPTERSEQUENCE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloAdapterSequence. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloAdapterSequence=*) + [ -n "$VIASH_PAR_SOLOADAPTERSEQUENCE" ] && ViashError Bad arguments for option \'--soloAdapterSequence=*\': \'$VIASH_PAR_SOLOADAPTERSEQUENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOADAPTERSEQUENCE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --soloAdapterMismatchesNmax) + [ -n "$VIASH_PAR_SOLOADAPTERMISMATCHESNMAX" ] && ViashError Bad arguments for option \'--soloAdapterMismatchesNmax\': \'$VIASH_PAR_SOLOADAPTERMISMATCHESNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOADAPTERMISMATCHESNMAX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloAdapterMismatchesNmax. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloAdapterMismatchesNmax=*) + [ -n "$VIASH_PAR_SOLOADAPTERMISMATCHESNMAX" ] && ViashError Bad arguments for option \'--soloAdapterMismatchesNmax=*\': \'$VIASH_PAR_SOLOADAPTERMISMATCHESNMAX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOADAPTERMISMATCHESNMAX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --soloCBmatchWLtype) + [ -n "$VIASH_PAR_SOLOCBMATCHWLTYPE" ] && ViashError Bad arguments for option \'--soloCBmatchWLtype\': \'$VIASH_PAR_SOLOCBMATCHWLTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOCBMATCHWLTYPE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloCBmatchWLtype. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloCBmatchWLtype=*) + [ -n "$VIASH_PAR_SOLOCBMATCHWLTYPE" ] && ViashError Bad arguments for option \'--soloCBmatchWLtype=*\': \'$VIASH_PAR_SOLOCBMATCHWLTYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOCBMATCHWLTYPE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --soloInputSAMattrBarcodeSeq) + if [ -z "$VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ" ]; then + VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ="$2" + else + VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ="$VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloInputSAMattrBarcodeSeq. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloInputSAMattrBarcodeSeq=*) + if [ -z "$VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ" ]; then + VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ=$(ViashRemoveFlags "$1") + else + VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ="$VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --soloInputSAMattrBarcodeQual) + if [ -z "$VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL" ]; then + VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL="$2" + else + VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL="$VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloInputSAMattrBarcodeQual. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloInputSAMattrBarcodeQual=*) + if [ -z "$VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL" ]; then + VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL=$(ViashRemoveFlags "$1") + else + VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL="$VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --soloStrand) + [ -n "$VIASH_PAR_SOLOSTRAND" ] && ViashError Bad arguments for option \'--soloStrand\': \'$VIASH_PAR_SOLOSTRAND\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOSTRAND="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloStrand. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloStrand=*) + [ -n "$VIASH_PAR_SOLOSTRAND" ] && ViashError Bad arguments for option \'--soloStrand=*\': \'$VIASH_PAR_SOLOSTRAND\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOSTRAND=$(ViashRemoveFlags "$1") + shift 1 + ;; + --soloFeatures) + if [ -z "$VIASH_PAR_SOLOFEATURES" ]; then + VIASH_PAR_SOLOFEATURES="$2" + else + VIASH_PAR_SOLOFEATURES="$VIASH_PAR_SOLOFEATURES;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloFeatures. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloFeatures=*) + if [ -z "$VIASH_PAR_SOLOFEATURES" ]; then + VIASH_PAR_SOLOFEATURES=$(ViashRemoveFlags "$1") + else + VIASH_PAR_SOLOFEATURES="$VIASH_PAR_SOLOFEATURES;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --soloMultiMappers) + if [ -z "$VIASH_PAR_SOLOMULTIMAPPERS" ]; then + VIASH_PAR_SOLOMULTIMAPPERS="$2" + else + VIASH_PAR_SOLOMULTIMAPPERS="$VIASH_PAR_SOLOMULTIMAPPERS;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloMultiMappers. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloMultiMappers=*) + if [ -z "$VIASH_PAR_SOLOMULTIMAPPERS" ]; then + VIASH_PAR_SOLOMULTIMAPPERS=$(ViashRemoveFlags "$1") + else + VIASH_PAR_SOLOMULTIMAPPERS="$VIASH_PAR_SOLOMULTIMAPPERS;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --soloUMIdedup) + if [ -z "$VIASH_PAR_SOLOUMIDEDUP" ]; then + VIASH_PAR_SOLOUMIDEDUP="$2" + else + VIASH_PAR_SOLOUMIDEDUP="$VIASH_PAR_SOLOUMIDEDUP;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloUMIdedup. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloUMIdedup=*) + if [ -z "$VIASH_PAR_SOLOUMIDEDUP" ]; then + VIASH_PAR_SOLOUMIDEDUP=$(ViashRemoveFlags "$1") + else + VIASH_PAR_SOLOUMIDEDUP="$VIASH_PAR_SOLOUMIDEDUP;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --soloUMIfiltering) + if [ -z "$VIASH_PAR_SOLOUMIFILTERING" ]; then + VIASH_PAR_SOLOUMIFILTERING="$2" + else + VIASH_PAR_SOLOUMIFILTERING="$VIASH_PAR_SOLOUMIFILTERING;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloUMIfiltering. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloUMIfiltering=*) + if [ -z "$VIASH_PAR_SOLOUMIFILTERING" ]; then + VIASH_PAR_SOLOUMIFILTERING=$(ViashRemoveFlags "$1") + else + VIASH_PAR_SOLOUMIFILTERING="$VIASH_PAR_SOLOUMIFILTERING;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --soloOutFileNames) + if [ -z "$VIASH_PAR_SOLOOUTFILENAMES" ]; then + VIASH_PAR_SOLOOUTFILENAMES="$2" + else + VIASH_PAR_SOLOOUTFILENAMES="$VIASH_PAR_SOLOOUTFILENAMES;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloOutFileNames. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloOutFileNames=*) + if [ -z "$VIASH_PAR_SOLOOUTFILENAMES" ]; then + VIASH_PAR_SOLOOUTFILENAMES=$(ViashRemoveFlags "$1") + else + VIASH_PAR_SOLOOUTFILENAMES="$VIASH_PAR_SOLOOUTFILENAMES;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --soloCellFilter) + if [ -z "$VIASH_PAR_SOLOCELLFILTER" ]; then + VIASH_PAR_SOLOCELLFILTER="$2" + else + VIASH_PAR_SOLOCELLFILTER="$VIASH_PAR_SOLOCELLFILTER;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloCellFilter. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloCellFilter=*) + if [ -z "$VIASH_PAR_SOLOCELLFILTER" ]; then + VIASH_PAR_SOLOCELLFILTER=$(ViashRemoveFlags "$1") + else + VIASH_PAR_SOLOCELLFILTER="$VIASH_PAR_SOLOCELLFILTER;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --soloOutFormatFeaturesGeneField3) + if [ -z "$VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3" ]; then + VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3="$2" + else + VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3="$VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloOutFormatFeaturesGeneField3. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloOutFormatFeaturesGeneField3=*) + if [ -z "$VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3" ]; then + VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3=$(ViashRemoveFlags "$1") + else + VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3="$VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --soloCellReadStats) + [ -n "$VIASH_PAR_SOLOCELLREADSTATS" ] && ViashError Bad arguments for option \'--soloCellReadStats\': \'$VIASH_PAR_SOLOCELLREADSTATS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOCELLREADSTATS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --soloCellReadStats. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --soloCellReadStats=*) + [ -n "$VIASH_PAR_SOLOCELLREADSTATS" ] && ViashError Bad arguments for option \'--soloCellReadStats=*\': \'$VIASH_PAR_SOLOCELLREADSTATS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SOLOCELLREADSTATS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/mapping_star_align_v273a:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/mapping_star_align_v273a:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/mapping_star_align_v273a:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/mapping_star_align_v273a:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_REFERENCE+x} ]; then + ViashError '--reference' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ]; then + IFS=';' + set -f + for file in $VIASH_PAR_INPUT; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi +if [ ! -z "$VIASH_PAR_REFERENCE" ] && [ ! -e "$VIASH_PAR_REFERENCE" ]; then + ViashError "Input file '$VIASH_PAR_REFERENCE' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_GENOMEFASTAFILES" ]; then + IFS=';' + set -f + for file in $VIASH_PAR_GENOMEFASTAFILES; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi +if [ ! -z "$VIASH_PAR_SJDBGTFFILE" ] && [ ! -e "$VIASH_PAR_SJDBGTFFILE" ]; then + ViashError "Input file '$VIASH_PAR_SJDBGTFFILE' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_READFILESMANIFEST" ] && [ ! -e "$VIASH_PAR_READFILESMANIFEST" ]; then + ViashError "Input file '$VIASH_PAR_READFILESMANIFEST' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_RUNRNGSEED" ]]; then + if ! [[ "$VIASH_PAR_RUNRNGSEED" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--runRNGseed' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [ -n "$VIASH_PAR_GENOMEFILESIZES" ]; then + IFS=';' + set -f + for val in $VIASH_PAR_GENOMEFILESIZES; do + if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--genomeFileSizes' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + done + set +f + unset IFS +fi + +if [[ -n "$VIASH_PAR_SJDBOVERHANG" ]]; then + if ! [[ "$VIASH_PAR_SJDBOVERHANG" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--sjdbOverhang' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SJDBSCORE" ]]; then + if ! [[ "$VIASH_PAR_SJDBSCORE" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--sjdbScore' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_READMAPNUMBER" ]]; then + if ! [[ "$VIASH_PAR_READMAPNUMBER" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--readMapNumber' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_READQUALITYSCOREBASE" ]]; then + if ! [[ "$VIASH_PAR_READQUALITYSCOREBASE" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--readQualityScoreBase' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [ -n "$VIASH_PAR_CLIP3PNBASES" ]; then + IFS=';' + set -f + for val in $VIASH_PAR_CLIP3PNBASES; do + if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--clip3pNbases' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + done + set +f + unset IFS +fi + +if [ -n "$VIASH_PAR_CLIP3PADAPTERMMP" ]; then + IFS=';' + set -f + for val in $VIASH_PAR_CLIP3PADAPTERMMP; do + if ! [[ "${val}" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--clip3pAdapterMMp' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi + done + set +f + unset IFS +fi + +if [ -n "$VIASH_PAR_CLIP3PAFTERADAPTERNBASES" ]; then + IFS=';' + set -f + for val in $VIASH_PAR_CLIP3PAFTERADAPTERNBASES; do + if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--clip3pAfterAdapterNbases' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + done + set +f + unset IFS +fi + +if [ -n "$VIASH_PAR_CLIP5PNBASES" ]; then + IFS=';' + set -f + for val in $VIASH_PAR_CLIP5PNBASES; do + if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--clip5pNbases' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + done + set +f + unset IFS +fi + +if [[ -n "$VIASH_PAR_LIMITGENOMEGENERATERAM" ]]; then + if ! [[ "$VIASH_PAR_LIMITGENOMEGENERATERAM" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--limitGenomeGenerateRAM' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [ -n "$VIASH_PAR_LIMITIOBUFFERSIZE" ]; then + IFS=';' + set -f + for val in $VIASH_PAR_LIMITIOBUFFERSIZE; do + if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--limitIObufferSize' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi + done + set +f + unset IFS +fi + +if [[ -n "$VIASH_PAR_LIMITOUTSAMONEREADBYTES" ]]; then + if ! [[ "$VIASH_PAR_LIMITOUTSAMONEREADBYTES" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--limitOutSAMoneReadBytes' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_LIMITOUTSJONEREAD" ]]; then + if ! [[ "$VIASH_PAR_LIMITOUTSJONEREAD" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--limitOutSJoneRead' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_LIMITOUTSJCOLLAPSED" ]]; then + if ! [[ "$VIASH_PAR_LIMITOUTSJCOLLAPSED" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--limitOutSJcollapsed' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_LIMITBAMSORTRAM" ]]; then + if ! [[ "$VIASH_PAR_LIMITBAMSORTRAM" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--limitBAMsortRAM' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_LIMITSJDBINSERTNSJ" ]]; then + if ! [[ "$VIASH_PAR_LIMITSJDBINSERTNSJ" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--limitSjdbInsertNsj' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_LIMITNREADSSOFT" ]]; then + if ! [[ "$VIASH_PAR_LIMITNREADSSOFT" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--limitNreadsSoft' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTQSCONVERSIONADD" ]]; then + if ! [[ "$VIASH_PAR_OUTQSCONVERSIONADD" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outQSconversionAdd' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTSAMATTRIHSTART" ]]; then + if ! [[ "$VIASH_PAR_OUTSAMATTRIHSTART" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outSAMattrIHstart' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTSAMMAPQUNIQUE" ]]; then + if ! [[ "$VIASH_PAR_OUTSAMMAPQUNIQUE" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outSAMmapqUnique' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTSAMFLAGOR" ]]; then + if ! [[ "$VIASH_PAR_OUTSAMFLAGOR" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outSAMflagOR' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTSAMFLAGAND" ]]; then + if ! [[ "$VIASH_PAR_OUTSAMFLAGAND" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outSAMflagAND' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTSAMMULTNMAX" ]]; then + if ! [[ "$VIASH_PAR_OUTSAMMULTNMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outSAMmultNmax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTSAMTLEN" ]]; then + if ! [[ "$VIASH_PAR_OUTSAMTLEN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outSAMtlen' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTBAMCOMPRESSION" ]]; then + if ! [[ "$VIASH_PAR_OUTBAMCOMPRESSION" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outBAMcompression' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTBAMSORTINGTHREADN" ]]; then + if ! [[ "$VIASH_PAR_OUTBAMSORTINGTHREADN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outBAMsortingThreadN' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTBAMSORTINGBINSN" ]]; then + if ! [[ "$VIASH_PAR_OUTBAMSORTINGBINSN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outBAMsortingBinsN' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN" ]]; then + if ! [[ "$VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--bamRemoveDuplicatesMate2basesN' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE" ]]; then + if ! [[ "$VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outFilterMultimapScoreRange' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTFILTERMULTIMAPNMAX" ]]; then + if ! [[ "$VIASH_PAR_OUTFILTERMULTIMAPNMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outFilterMultimapNmax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTFILTERMISMATCHNMAX" ]]; then + if ! [[ "$VIASH_PAR_OUTFILTERMISMATCHNMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outFilterMismatchNmax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX" ]]; then + if ! [[ "$VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--outFilterMismatchNoverLmax' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX" ]]; then + if ! [[ "$VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--outFilterMismatchNoverReadLmax' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTFILTERSCOREMIN" ]]; then + if ! [[ "$VIASH_PAR_OUTFILTERSCOREMIN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outFilterScoreMin' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTFILTERSCOREMINOVERLREAD" ]]; then + if ! [[ "$VIASH_PAR_OUTFILTERSCOREMINOVERLREAD" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--outFilterScoreMinOverLread' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTFILTERMATCHNMIN" ]]; then + if ! [[ "$VIASH_PAR_OUTFILTERMATCHNMIN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outFilterMatchNmin' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD" ]]; then + if ! [[ "$VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--outFilterMatchNminOverLread' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [ -n "$VIASH_PAR_OUTSJFILTEROVERHANGMIN" ]; then + IFS=';' + set -f + for val in $VIASH_PAR_OUTSJFILTEROVERHANGMIN; do + if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outSJfilterOverhangMin' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + done + set +f + unset IFS +fi + +if [ -n "$VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN" ]; then + IFS=';' + set -f + for val in $VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN; do + if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outSJfilterCountUniqueMin' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + done + set +f + unset IFS +fi + +if [ -n "$VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN" ]; then + IFS=';' + set -f + for val in $VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN; do + if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outSJfilterCountTotalMin' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + done + set +f + unset IFS +fi + +if [ -n "$VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN" ]; then + IFS=';' + set -f + for val in $VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN; do + if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outSJfilterDistToOtherSJmin' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + done + set +f + unset IFS +fi + +if [ -n "$VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN" ]; then + IFS=';' + set -f + for val in $VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN; do + if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--outSJfilterIntronMaxVsReadN' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + done + set +f + unset IFS +fi + +if [[ -n "$VIASH_PAR_SCOREGAP" ]]; then + if ! [[ "$VIASH_PAR_SCOREGAP" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--scoreGap' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SCOREGAPNONCAN" ]]; then + if ! [[ "$VIASH_PAR_SCOREGAPNONCAN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--scoreGapNoncan' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SCOREGAPGCAG" ]]; then + if ! [[ "$VIASH_PAR_SCOREGAPGCAG" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--scoreGapGCAG' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SCOREGAPATAC" ]]; then + if ! [[ "$VIASH_PAR_SCOREGAPATAC" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--scoreGapATAC' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE" ]]; then + if ! [[ "$VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--scoreGenomicLengthLog2scale' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SCOREDELOPEN" ]]; then + if ! [[ "$VIASH_PAR_SCOREDELOPEN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--scoreDelOpen' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SCOREDELBASE" ]]; then + if ! [[ "$VIASH_PAR_SCOREDELBASE" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--scoreDelBase' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SCOREINSOPEN" ]]; then + if ! [[ "$VIASH_PAR_SCOREINSOPEN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--scoreInsOpen' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SCOREINSBASE" ]]; then + if ! [[ "$VIASH_PAR_SCOREINSBASE" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--scoreInsBase' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SCORESTITCHSJSHIFT" ]]; then + if ! [[ "$VIASH_PAR_SCORESTITCHSJSHIFT" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--scoreStitchSJshift' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SEEDSEARCHSTARTLMAX" ]]; then + if ! [[ "$VIASH_PAR_SEEDSEARCHSTARTLMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--seedSearchStartLmax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD" ]]; then + if ! [[ "$VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--seedSearchStartLmaxOverLread' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SEEDSEARCHLMAX" ]]; then + if ! [[ "$VIASH_PAR_SEEDSEARCHLMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--seedSearchLmax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SEEDMULTIMAPNMAX" ]]; then + if ! [[ "$VIASH_PAR_SEEDMULTIMAPNMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--seedMultimapNmax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SEEDPERREADNMAX" ]]; then + if ! [[ "$VIASH_PAR_SEEDPERREADNMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--seedPerReadNmax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SEEDPERWINDOWNMAX" ]]; then + if ! [[ "$VIASH_PAR_SEEDPERWINDOWNMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--seedPerWindowNmax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SEEDNONELOCIPERWINDOW" ]]; then + if ! [[ "$VIASH_PAR_SEEDNONELOCIPERWINDOW" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--seedNoneLociPerWindow' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SEEDSPLITMIN" ]]; then + if ! [[ "$VIASH_PAR_SEEDSPLITMIN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--seedSplitMin' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SEEDMAPMIN" ]]; then + if ! [[ "$VIASH_PAR_SEEDMAPMIN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--seedMapMin' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_ALIGNINTRONMIN" ]]; then + if ! [[ "$VIASH_PAR_ALIGNINTRONMIN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--alignIntronMin' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_ALIGNINTRONMAX" ]]; then + if ! [[ "$VIASH_PAR_ALIGNINTRONMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--alignIntronMax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_ALIGNMATESGAPMAX" ]]; then + if ! [[ "$VIASH_PAR_ALIGNMATESGAPMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--alignMatesGapMax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_ALIGNSJOVERHANGMIN" ]]; then + if ! [[ "$VIASH_PAR_ALIGNSJOVERHANGMIN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--alignSJoverhangMin' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [ -n "$VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX" ]; then + IFS=';' + set -f + for val in $VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX; do + if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--alignSJstitchMismatchNmax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + done + set +f + unset IFS +fi + +if [[ -n "$VIASH_PAR_ALIGNSJDBOVERHANGMIN" ]]; then + if ! [[ "$VIASH_PAR_ALIGNSJDBOVERHANGMIN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--alignSJDBoverhangMin' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN" ]]; then + if ! [[ "$VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--alignSplicedMateMapLmin' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE" ]]; then + if ! [[ "$VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--alignSplicedMateMapLminOverLmate' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_ALIGNWINDOWSPERREADNMAX" ]]; then + if ! [[ "$VIASH_PAR_ALIGNWINDOWSPERREADNMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--alignWindowsPerReadNmax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX" ]]; then + if ! [[ "$VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--alignTranscriptsPerWindowNmax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX" ]]; then + if ! [[ "$VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--alignTranscriptsPerReadNmax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_PEOVERLAPNBASESMIN" ]]; then + if ! [[ "$VIASH_PAR_PEOVERLAPNBASESMIN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--peOverlapNbasesMin' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_PEOVERLAPMMP" ]]; then + if ! [[ "$VIASH_PAR_PEOVERLAPMMP" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--peOverlapMMp' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_WINANCHORMULTIMAPNMAX" ]]; then + if ! [[ "$VIASH_PAR_WINANCHORMULTIMAPNMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--winAnchorMultimapNmax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_WINBINNBITS" ]]; then + if ! [[ "$VIASH_PAR_WINBINNBITS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--winBinNbits' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_WINANCHORDISTNBINS" ]]; then + if ! [[ "$VIASH_PAR_WINANCHORDISTNBINS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--winAnchorDistNbins' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_WINFLANKNBINS" ]]; then + if ! [[ "$VIASH_PAR_WINFLANKNBINS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--winFlankNbins' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_WINREADCOVERAGERELATIVEMIN" ]]; then + if ! [[ "$VIASH_PAR_WINREADCOVERAGERELATIVEMIN" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--winReadCoverageRelativeMin' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_WINREADCOVERAGEBASESMIN" ]]; then + if ! [[ "$VIASH_PAR_WINREADCOVERAGEBASESMIN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--winReadCoverageBasesMin' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_CHIMSEGMENTMIN" ]]; then + if ! [[ "$VIASH_PAR_CHIMSEGMENTMIN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--chimSegmentMin' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_CHIMSCOREMIN" ]]; then + if ! [[ "$VIASH_PAR_CHIMSCOREMIN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--chimScoreMin' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_CHIMSCOREDROPMAX" ]]; then + if ! [[ "$VIASH_PAR_CHIMSCOREDROPMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--chimScoreDropMax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_CHIMSCORESEPARATION" ]]; then + if ! [[ "$VIASH_PAR_CHIMSCORESEPARATION" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--chimScoreSeparation' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG" ]]; then + if ! [[ "$VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--chimScoreJunctionNonGTAG' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_CHIMJUNCTIONOVERHANGMIN" ]]; then + if ! [[ "$VIASH_PAR_CHIMJUNCTIONOVERHANGMIN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--chimJunctionOverhangMin' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_CHIMSEGMENTREADGAPMAX" ]]; then + if ! [[ "$VIASH_PAR_CHIMSEGMENTREADGAPMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--chimSegmentReadGapMax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_CHIMMAINSEGMENTMULTNMAX" ]]; then + if ! [[ "$VIASH_PAR_CHIMMAINSEGMENTMULTNMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--chimMainSegmentMultNmax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_CHIMMULTIMAPNMAX" ]]; then + if ! [[ "$VIASH_PAR_CHIMMULTIMAPNMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--chimMultimapNmax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_CHIMMULTIMAPSCORERANGE" ]]; then + if ! [[ "$VIASH_PAR_CHIMMULTIMAPSCORERANGE" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--chimMultimapScoreRange' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_CHIMNONCHIMSCOREDROPMIN" ]]; then + if ! [[ "$VIASH_PAR_CHIMNONCHIMSCOREDROPMIN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--chimNonchimScoreDropMin' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_CHIMOUTJUNCTIONFORMAT" ]]; then + if ! [[ "$VIASH_PAR_CHIMOUTJUNCTIONFORMAT" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--chimOutJunctionFormat' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION" ]]; then + if ! [[ "$VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--quantTranscriptomeBAMcompression' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_TWOPASS1READSN" ]]; then + if ! [[ "$VIASH_PAR_TWOPASS1READSN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--twopass1readsN' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SOLOCBSTART" ]]; then + if ! [[ "$VIASH_PAR_SOLOCBSTART" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--soloCBstart' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SOLOCBLEN" ]]; then + if ! [[ "$VIASH_PAR_SOLOCBLEN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--soloCBlen' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SOLOUMISTART" ]]; then + if ! [[ "$VIASH_PAR_SOLOUMISTART" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--soloUMIstart' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SOLOUMILEN" ]]; then + if ! [[ "$VIASH_PAR_SOLOUMILEN" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--soloUMIlen' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SOLOBARCODEREADLENGTH" ]]; then + if ! [[ "$VIASH_PAR_SOLOBARCODEREADLENGTH" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--soloBarcodeReadLength' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SOLOBARCODEMATE" ]]; then + if ! [[ "$VIASH_PAR_SOLOBARCODEMATE" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--soloBarcodeMate' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SOLOADAPTERMISMATCHESNMAX" ]]; then + if ! [[ "$VIASH_PAR_SOLOADAPTERMISMATCHESNMAX" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--soloAdapterMismatchesNmax' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_TEST_INPUT=() + IFS=';' + for var in $VIASH_PAR_INPUT; do + unset IFS + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$var")" ) + var=$(ViashAutodetectMount "$var") + VIASH_TEST_INPUT+=( "$var" ) + done + VIASH_PAR_INPUT=$(IFS=';' ; echo "${VIASH_TEST_INPUT[*]}") +fi +if [ ! -z "$VIASH_PAR_REFERENCE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_REFERENCE")" ) + VIASH_PAR_REFERENCE=$(ViashAutodetectMount "$VIASH_PAR_REFERENCE") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_PAR_GENOMEFASTAFILES" ]; then + VIASH_TEST_GENOMEFASTAFILES=() + IFS=';' + for var in $VIASH_PAR_GENOMEFASTAFILES; do + unset IFS + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$var")" ) + var=$(ViashAutodetectMount "$var") + VIASH_TEST_GENOMEFASTAFILES+=( "$var" ) + done + VIASH_PAR_GENOMEFASTAFILES=$(IFS=';' ; echo "${VIASH_TEST_GENOMEFASTAFILES[*]}") +fi +if [ ! -z "$VIASH_PAR_SJDBGTFFILE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_SJDBGTFFILE")" ) + VIASH_PAR_SJDBGTFFILE=$(ViashAutodetectMount "$VIASH_PAR_SJDBGTFFILE") +fi +if [ ! -z "$VIASH_PAR_READFILESMANIFEST" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_READFILESMANIFEST")" ) + VIASH_PAR_READFILESMANIFEST=$(ViashAutodetectMount "$VIASH_PAR_READFILESMANIFEST") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/mapping_star_align_v273a:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/mapping_star_align_v273a:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/mapping_star_align_v273a:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-star_align_v273a-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +import re +import tempfile +import subprocess +from pathlib import Path +import tarfile +import gzip +import shutil + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'reference': $( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "r'${VIASH_PAR_REFERENCE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'runRNGseed': $( if [ ! -z ${VIASH_PAR_RUNRNGSEED+x} ]; then echo "int(r'${VIASH_PAR_RUNRNGSEED//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'genomeLoad': $( if [ ! -z ${VIASH_PAR_GENOMELOAD+x} ]; then echo "r'${VIASH_PAR_GENOMELOAD//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'genomeFastaFiles': $( if [ ! -z ${VIASH_PAR_GENOMEFASTAFILES+x} ]; then echo "r'${VIASH_PAR_GENOMEFASTAFILES//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'genomeFileSizes': $( if [ ! -z ${VIASH_PAR_GENOMEFILESIZES+x} ]; then echo "list(map(int, r'${VIASH_PAR_GENOMEFILESIZES//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), + 'genomeTransformOutput': $( if [ ! -z ${VIASH_PAR_GENOMETRANSFORMOUTPUT+x} ]; then echo "r'${VIASH_PAR_GENOMETRANSFORMOUTPUT//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'genomeChrSetMitochondrial': $( if [ ! -z ${VIASH_PAR_GENOMECHRSETMITOCHONDRIAL+x} ]; then echo "r'${VIASH_PAR_GENOMECHRSETMITOCHONDRIAL//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'sjdbFileChrStartEnd': $( if [ ! -z ${VIASH_PAR_SJDBFILECHRSTARTEND+x} ]; then echo "r'${VIASH_PAR_SJDBFILECHRSTARTEND//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'sjdbGTFfile': $( if [ ! -z ${VIASH_PAR_SJDBGTFFILE+x} ]; then echo "r'${VIASH_PAR_SJDBGTFFILE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'sjdbGTFchrPrefix': $( if [ ! -z ${VIASH_PAR_SJDBGTFCHRPREFIX+x} ]; then echo "r'${VIASH_PAR_SJDBGTFCHRPREFIX//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'sjdbGTFfeatureExon': $( if [ ! -z ${VIASH_PAR_SJDBGTFFEATUREEXON+x} ]; then echo "r'${VIASH_PAR_SJDBGTFFEATUREEXON//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'sjdbGTFtagExonParentTranscript': $( if [ ! -z ${VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT+x} ]; then echo "r'${VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'sjdbGTFtagExonParentGene': $( if [ ! -z ${VIASH_PAR_SJDBGTFTAGEXONPARENTGENE+x} ]; then echo "r'${VIASH_PAR_SJDBGTFTAGEXONPARENTGENE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'sjdbGTFtagExonParentGeneName': $( if [ ! -z ${VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME+x} ]; then echo "r'${VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'sjdbGTFtagExonParentGeneType': $( if [ ! -z ${VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE+x} ]; then echo "r'${VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'sjdbOverhang': $( if [ ! -z ${VIASH_PAR_SJDBOVERHANG+x} ]; then echo "int(r'${VIASH_PAR_SJDBOVERHANG//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'sjdbScore': $( if [ ! -z ${VIASH_PAR_SJDBSCORE+x} ]; then echo "int(r'${VIASH_PAR_SJDBSCORE//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'sjdbInsertSave': $( if [ ! -z ${VIASH_PAR_SJDBINSERTSAVE+x} ]; then echo "r'${VIASH_PAR_SJDBINSERTSAVE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'varVCFfile': $( if [ ! -z ${VIASH_PAR_VARVCFFILE+x} ]; then echo "r'${VIASH_PAR_VARVCFFILE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'readFilesType': $( if [ ! -z ${VIASH_PAR_READFILESTYPE+x} ]; then echo "r'${VIASH_PAR_READFILESTYPE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'readFilesSAMattrKeep': $( if [ ! -z ${VIASH_PAR_READFILESSAMATTRKEEP+x} ]; then echo "r'${VIASH_PAR_READFILESSAMATTRKEEP//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'readFilesManifest': $( if [ ! -z ${VIASH_PAR_READFILESMANIFEST+x} ]; then echo "r'${VIASH_PAR_READFILESMANIFEST//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'readFilesPrefix': $( if [ ! -z ${VIASH_PAR_READFILESPREFIX+x} ]; then echo "r'${VIASH_PAR_READFILESPREFIX//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'readFilesCommand': $( if [ ! -z ${VIASH_PAR_READFILESCOMMAND+x} ]; then echo "r'${VIASH_PAR_READFILESCOMMAND//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'readMapNumber': $( if [ ! -z ${VIASH_PAR_READMAPNUMBER+x} ]; then echo "int(r'${VIASH_PAR_READMAPNUMBER//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'readMatesLengthsIn': $( if [ ! -z ${VIASH_PAR_READMATESLENGTHSIN+x} ]; then echo "r'${VIASH_PAR_READMATESLENGTHSIN//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'readNameSeparator': $( if [ ! -z ${VIASH_PAR_READNAMESEPARATOR+x} ]; then echo "r'${VIASH_PAR_READNAMESEPARATOR//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'readQualityScoreBase': $( if [ ! -z ${VIASH_PAR_READQUALITYSCOREBASE+x} ]; then echo "int(r'${VIASH_PAR_READQUALITYSCOREBASE//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'clipAdapterType': $( if [ ! -z ${VIASH_PAR_CLIPADAPTERTYPE+x} ]; then echo "r'${VIASH_PAR_CLIPADAPTERTYPE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'clip3pNbases': $( if [ ! -z ${VIASH_PAR_CLIP3PNBASES+x} ]; then echo "list(map(int, r'${VIASH_PAR_CLIP3PNBASES//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), + 'clip3pAdapterSeq': $( if [ ! -z ${VIASH_PAR_CLIP3PADAPTERSEQ+x} ]; then echo "r'${VIASH_PAR_CLIP3PADAPTERSEQ//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'clip3pAdapterMMp': $( if [ ! -z ${VIASH_PAR_CLIP3PADAPTERMMP+x} ]; then echo "list(map(float, r'${VIASH_PAR_CLIP3PADAPTERMMP//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), + 'clip3pAfterAdapterNbases': $( if [ ! -z ${VIASH_PAR_CLIP3PAFTERADAPTERNBASES+x} ]; then echo "list(map(int, r'${VIASH_PAR_CLIP3PAFTERADAPTERNBASES//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), + 'clip5pNbases': $( if [ ! -z ${VIASH_PAR_CLIP5PNBASES+x} ]; then echo "list(map(int, r'${VIASH_PAR_CLIP5PNBASES//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), + 'limitGenomeGenerateRAM': $( if [ ! -z ${VIASH_PAR_LIMITGENOMEGENERATERAM+x} ]; then echo "int(r'${VIASH_PAR_LIMITGENOMEGENERATERAM//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'limitIObufferSize': $( if [ ! -z ${VIASH_PAR_LIMITIOBUFFERSIZE+x} ]; then echo "list(map(int, r'${VIASH_PAR_LIMITIOBUFFERSIZE//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), + 'limitOutSAMoneReadBytes': $( if [ ! -z ${VIASH_PAR_LIMITOUTSAMONEREADBYTES+x} ]; then echo "int(r'${VIASH_PAR_LIMITOUTSAMONEREADBYTES//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'limitOutSJoneRead': $( if [ ! -z ${VIASH_PAR_LIMITOUTSJONEREAD+x} ]; then echo "int(r'${VIASH_PAR_LIMITOUTSJONEREAD//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'limitOutSJcollapsed': $( if [ ! -z ${VIASH_PAR_LIMITOUTSJCOLLAPSED+x} ]; then echo "int(r'${VIASH_PAR_LIMITOUTSJCOLLAPSED//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'limitBAMsortRAM': $( if [ ! -z ${VIASH_PAR_LIMITBAMSORTRAM+x} ]; then echo "int(r'${VIASH_PAR_LIMITBAMSORTRAM//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'limitSjdbInsertNsj': $( if [ ! -z ${VIASH_PAR_LIMITSJDBINSERTNSJ+x} ]; then echo "int(r'${VIASH_PAR_LIMITSJDBINSERTNSJ//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'limitNreadsSoft': $( if [ ! -z ${VIASH_PAR_LIMITNREADSSOFT+x} ]; then echo "int(r'${VIASH_PAR_LIMITNREADSSOFT//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outTmpKeep': $( if [ ! -z ${VIASH_PAR_OUTTMPKEEP+x} ]; then echo "r'${VIASH_PAR_OUTTMPKEEP//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'outStd': $( if [ ! -z ${VIASH_PAR_OUTSTD+x} ]; then echo "r'${VIASH_PAR_OUTSTD//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'outReadsUnmapped': $( if [ ! -z ${VIASH_PAR_OUTREADSUNMAPPED+x} ]; then echo "r'${VIASH_PAR_OUTREADSUNMAPPED//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'outQSconversionAdd': $( if [ ! -z ${VIASH_PAR_OUTQSCONVERSIONADD+x} ]; then echo "int(r'${VIASH_PAR_OUTQSCONVERSIONADD//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outMultimapperOrder': $( if [ ! -z ${VIASH_PAR_OUTMULTIMAPPERORDER+x} ]; then echo "r'${VIASH_PAR_OUTMULTIMAPPERORDER//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'outSAMtype': $( if [ ! -z ${VIASH_PAR_OUTSAMTYPE+x} ]; then echo "r'${VIASH_PAR_OUTSAMTYPE//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'outSAMmode': $( if [ ! -z ${VIASH_PAR_OUTSAMMODE+x} ]; then echo "r'${VIASH_PAR_OUTSAMMODE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'outSAMstrandField': $( if [ ! -z ${VIASH_PAR_OUTSAMSTRANDFIELD+x} ]; then echo "r'${VIASH_PAR_OUTSAMSTRANDFIELD//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'outSAMattributes': $( if [ ! -z ${VIASH_PAR_OUTSAMATTRIBUTES+x} ]; then echo "r'${VIASH_PAR_OUTSAMATTRIBUTES//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'outSAMattrIHstart': $( if [ ! -z ${VIASH_PAR_OUTSAMATTRIHSTART+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMATTRIHSTART//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outSAMunmapped': $( if [ ! -z ${VIASH_PAR_OUTSAMUNMAPPED+x} ]; then echo "r'${VIASH_PAR_OUTSAMUNMAPPED//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'outSAMorder': $( if [ ! -z ${VIASH_PAR_OUTSAMORDER+x} ]; then echo "r'${VIASH_PAR_OUTSAMORDER//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'outSAMprimaryFlag': $( if [ ! -z ${VIASH_PAR_OUTSAMPRIMARYFLAG+x} ]; then echo "r'${VIASH_PAR_OUTSAMPRIMARYFLAG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'outSAMreadID': $( if [ ! -z ${VIASH_PAR_OUTSAMREADID+x} ]; then echo "r'${VIASH_PAR_OUTSAMREADID//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'outSAMmapqUnique': $( if [ ! -z ${VIASH_PAR_OUTSAMMAPQUNIQUE+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMMAPQUNIQUE//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outSAMflagOR': $( if [ ! -z ${VIASH_PAR_OUTSAMFLAGOR+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMFLAGOR//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outSAMflagAND': $( if [ ! -z ${VIASH_PAR_OUTSAMFLAGAND+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMFLAGAND//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outSAMattrRGline': $( if [ ! -z ${VIASH_PAR_OUTSAMATTRRGLINE+x} ]; then echo "r'${VIASH_PAR_OUTSAMATTRRGLINE//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'outSAMheaderHD': $( if [ ! -z ${VIASH_PAR_OUTSAMHEADERHD+x} ]; then echo "r'${VIASH_PAR_OUTSAMHEADERHD//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'outSAMheaderPG': $( if [ ! -z ${VIASH_PAR_OUTSAMHEADERPG+x} ]; then echo "r'${VIASH_PAR_OUTSAMHEADERPG//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'outSAMheaderCommentFile': $( if [ ! -z ${VIASH_PAR_OUTSAMHEADERCOMMENTFILE+x} ]; then echo "r'${VIASH_PAR_OUTSAMHEADERCOMMENTFILE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'outSAMfilter': $( if [ ! -z ${VIASH_PAR_OUTSAMFILTER+x} ]; then echo "r'${VIASH_PAR_OUTSAMFILTER//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'outSAMmultNmax': $( if [ ! -z ${VIASH_PAR_OUTSAMMULTNMAX+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMMULTNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outSAMtlen': $( if [ ! -z ${VIASH_PAR_OUTSAMTLEN+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMTLEN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outBAMcompression': $( if [ ! -z ${VIASH_PAR_OUTBAMCOMPRESSION+x} ]; then echo "int(r'${VIASH_PAR_OUTBAMCOMPRESSION//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outBAMsortingThreadN': $( if [ ! -z ${VIASH_PAR_OUTBAMSORTINGTHREADN+x} ]; then echo "int(r'${VIASH_PAR_OUTBAMSORTINGTHREADN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outBAMsortingBinsN': $( if [ ! -z ${VIASH_PAR_OUTBAMSORTINGBINSN+x} ]; then echo "int(r'${VIASH_PAR_OUTBAMSORTINGBINSN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'bamRemoveDuplicatesType': $( if [ ! -z ${VIASH_PAR_BAMREMOVEDUPLICATESTYPE+x} ]; then echo "r'${VIASH_PAR_BAMREMOVEDUPLICATESTYPE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'bamRemoveDuplicatesMate2basesN': $( if [ ! -z ${VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN+x} ]; then echo "int(r'${VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outWigType': $( if [ ! -z ${VIASH_PAR_OUTWIGTYPE+x} ]; then echo "r'${VIASH_PAR_OUTWIGTYPE//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'outWigStrand': $( if [ ! -z ${VIASH_PAR_OUTWIGSTRAND+x} ]; then echo "r'${VIASH_PAR_OUTWIGSTRAND//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'outWigReferencesPrefix': $( if [ ! -z ${VIASH_PAR_OUTWIGREFERENCESPREFIX+x} ]; then echo "r'${VIASH_PAR_OUTWIGREFERENCESPREFIX//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'outWigNorm': $( if [ ! -z ${VIASH_PAR_OUTWIGNORM+x} ]; then echo "r'${VIASH_PAR_OUTWIGNORM//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'outFilterType': $( if [ ! -z ${VIASH_PAR_OUTFILTERTYPE+x} ]; then echo "r'${VIASH_PAR_OUTFILTERTYPE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'outFilterMultimapScoreRange': $( if [ ! -z ${VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outFilterMultimapNmax': $( if [ ! -z ${VIASH_PAR_OUTFILTERMULTIMAPNMAX+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERMULTIMAPNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outFilterMismatchNmax': $( if [ ! -z ${VIASH_PAR_OUTFILTERMISMATCHNMAX+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERMISMATCHNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outFilterMismatchNoverLmax': $( if [ ! -z ${VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX+x} ]; then echo "float(r'${VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outFilterMismatchNoverReadLmax': $( if [ ! -z ${VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX+x} ]; then echo "float(r'${VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outFilterScoreMin': $( if [ ! -z ${VIASH_PAR_OUTFILTERSCOREMIN+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERSCOREMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outFilterScoreMinOverLread': $( if [ ! -z ${VIASH_PAR_OUTFILTERSCOREMINOVERLREAD+x} ]; then echo "float(r'${VIASH_PAR_OUTFILTERSCOREMINOVERLREAD//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outFilterMatchNmin': $( if [ ! -z ${VIASH_PAR_OUTFILTERMATCHNMIN+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERMATCHNMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outFilterMatchNminOverLread': $( if [ ! -z ${VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD+x} ]; then echo "float(r'${VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'outFilterIntronMotifs': $( if [ ! -z ${VIASH_PAR_OUTFILTERINTRONMOTIFS+x} ]; then echo "r'${VIASH_PAR_OUTFILTERINTRONMOTIFS//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'outFilterIntronStrands': $( if [ ! -z ${VIASH_PAR_OUTFILTERINTRONSTRANDS+x} ]; then echo "r'${VIASH_PAR_OUTFILTERINTRONSTRANDS//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'outSJtype': $( if [ ! -z ${VIASH_PAR_OUTSJTYPE+x} ]; then echo "r'${VIASH_PAR_OUTSJTYPE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'outSJfilterReads': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERREADS+x} ]; then echo "r'${VIASH_PAR_OUTSJFILTERREADS//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'outSJfilterOverhangMin': $( if [ ! -z ${VIASH_PAR_OUTSJFILTEROVERHANGMIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTEROVERHANGMIN//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), + 'outSJfilterCountUniqueMin': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), + 'outSJfilterCountTotalMin': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), + 'outSJfilterDistToOtherSJmin': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), + 'outSJfilterIntronMaxVsReadN': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), + 'scoreGap': $( if [ ! -z ${VIASH_PAR_SCOREGAP+x} ]; then echo "int(r'${VIASH_PAR_SCOREGAP//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'scoreGapNoncan': $( if [ ! -z ${VIASH_PAR_SCOREGAPNONCAN+x} ]; then echo "int(r'${VIASH_PAR_SCOREGAPNONCAN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'scoreGapGCAG': $( if [ ! -z ${VIASH_PAR_SCOREGAPGCAG+x} ]; then echo "int(r'${VIASH_PAR_SCOREGAPGCAG//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'scoreGapATAC': $( if [ ! -z ${VIASH_PAR_SCOREGAPATAC+x} ]; then echo "int(r'${VIASH_PAR_SCOREGAPATAC//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'scoreGenomicLengthLog2scale': $( if [ ! -z ${VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE+x} ]; then echo "int(r'${VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'scoreDelOpen': $( if [ ! -z ${VIASH_PAR_SCOREDELOPEN+x} ]; then echo "int(r'${VIASH_PAR_SCOREDELOPEN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'scoreDelBase': $( if [ ! -z ${VIASH_PAR_SCOREDELBASE+x} ]; then echo "int(r'${VIASH_PAR_SCOREDELBASE//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'scoreInsOpen': $( if [ ! -z ${VIASH_PAR_SCOREINSOPEN+x} ]; then echo "int(r'${VIASH_PAR_SCOREINSOPEN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'scoreInsBase': $( if [ ! -z ${VIASH_PAR_SCOREINSBASE+x} ]; then echo "int(r'${VIASH_PAR_SCOREINSBASE//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'scoreStitchSJshift': $( if [ ! -z ${VIASH_PAR_SCORESTITCHSJSHIFT+x} ]; then echo "int(r'${VIASH_PAR_SCORESTITCHSJSHIFT//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'seedSearchStartLmax': $( if [ ! -z ${VIASH_PAR_SEEDSEARCHSTARTLMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDSEARCHSTARTLMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'seedSearchStartLmaxOverLread': $( if [ ! -z ${VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD+x} ]; then echo "float(r'${VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'seedSearchLmax': $( if [ ! -z ${VIASH_PAR_SEEDSEARCHLMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDSEARCHLMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'seedMultimapNmax': $( if [ ! -z ${VIASH_PAR_SEEDMULTIMAPNMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDMULTIMAPNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'seedPerReadNmax': $( if [ ! -z ${VIASH_PAR_SEEDPERREADNMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDPERREADNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'seedPerWindowNmax': $( if [ ! -z ${VIASH_PAR_SEEDPERWINDOWNMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDPERWINDOWNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'seedNoneLociPerWindow': $( if [ ! -z ${VIASH_PAR_SEEDNONELOCIPERWINDOW+x} ]; then echo "int(r'${VIASH_PAR_SEEDNONELOCIPERWINDOW//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'seedSplitMin': $( if [ ! -z ${VIASH_PAR_SEEDSPLITMIN+x} ]; then echo "int(r'${VIASH_PAR_SEEDSPLITMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'seedMapMin': $( if [ ! -z ${VIASH_PAR_SEEDMAPMIN+x} ]; then echo "int(r'${VIASH_PAR_SEEDMAPMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'alignIntronMin': $( if [ ! -z ${VIASH_PAR_ALIGNINTRONMIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGNINTRONMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'alignIntronMax': $( if [ ! -z ${VIASH_PAR_ALIGNINTRONMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNINTRONMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'alignMatesGapMax': $( if [ ! -z ${VIASH_PAR_ALIGNMATESGAPMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNMATESGAPMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'alignSJoverhangMin': $( if [ ! -z ${VIASH_PAR_ALIGNSJOVERHANGMIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGNSJOVERHANGMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'alignSJstitchMismatchNmax': $( if [ ! -z ${VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX+x} ]; then echo "list(map(int, r'${VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX//\'/\'\"\'\"r\'}'.split(';')))"; else echo None; fi ), + 'alignSJDBoverhangMin': $( if [ ! -z ${VIASH_PAR_ALIGNSJDBOVERHANGMIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGNSJDBOVERHANGMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'alignSplicedMateMapLmin': $( if [ ! -z ${VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'alignSplicedMateMapLminOverLmate': $( if [ ! -z ${VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE+x} ]; then echo "float(r'${VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'alignWindowsPerReadNmax': $( if [ ! -z ${VIASH_PAR_ALIGNWINDOWSPERREADNMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNWINDOWSPERREADNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'alignTranscriptsPerWindowNmax': $( if [ ! -z ${VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'alignTranscriptsPerReadNmax': $( if [ ! -z ${VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'alignEndsType': $( if [ ! -z ${VIASH_PAR_ALIGNENDSTYPE+x} ]; then echo "r'${VIASH_PAR_ALIGNENDSTYPE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'alignEndsProtrude': $( if [ ! -z ${VIASH_PAR_ALIGNENDSPROTRUDE+x} ]; then echo "r'${VIASH_PAR_ALIGNENDSPROTRUDE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'alignSoftClipAtReferenceEnds': $( if [ ! -z ${VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS+x} ]; then echo "r'${VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'alignInsertionFlush': $( if [ ! -z ${VIASH_PAR_ALIGNINSERTIONFLUSH+x} ]; then echo "r'${VIASH_PAR_ALIGNINSERTIONFLUSH//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'peOverlapNbasesMin': $( if [ ! -z ${VIASH_PAR_PEOVERLAPNBASESMIN+x} ]; then echo "int(r'${VIASH_PAR_PEOVERLAPNBASESMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'peOverlapMMp': $( if [ ! -z ${VIASH_PAR_PEOVERLAPMMP+x} ]; then echo "float(r'${VIASH_PAR_PEOVERLAPMMP//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'winAnchorMultimapNmax': $( if [ ! -z ${VIASH_PAR_WINANCHORMULTIMAPNMAX+x} ]; then echo "int(r'${VIASH_PAR_WINANCHORMULTIMAPNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'winBinNbits': $( if [ ! -z ${VIASH_PAR_WINBINNBITS+x} ]; then echo "int(r'${VIASH_PAR_WINBINNBITS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'winAnchorDistNbins': $( if [ ! -z ${VIASH_PAR_WINANCHORDISTNBINS+x} ]; then echo "int(r'${VIASH_PAR_WINANCHORDISTNBINS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'winFlankNbins': $( if [ ! -z ${VIASH_PAR_WINFLANKNBINS+x} ]; then echo "int(r'${VIASH_PAR_WINFLANKNBINS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'winReadCoverageRelativeMin': $( if [ ! -z ${VIASH_PAR_WINREADCOVERAGERELATIVEMIN+x} ]; then echo "float(r'${VIASH_PAR_WINREADCOVERAGERELATIVEMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'winReadCoverageBasesMin': $( if [ ! -z ${VIASH_PAR_WINREADCOVERAGEBASESMIN+x} ]; then echo "int(r'${VIASH_PAR_WINREADCOVERAGEBASESMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'chimOutType': $( if [ ! -z ${VIASH_PAR_CHIMOUTTYPE+x} ]; then echo "r'${VIASH_PAR_CHIMOUTTYPE//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'chimSegmentMin': $( if [ ! -z ${VIASH_PAR_CHIMSEGMENTMIN+x} ]; then echo "int(r'${VIASH_PAR_CHIMSEGMENTMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'chimScoreMin': $( if [ ! -z ${VIASH_PAR_CHIMSCOREMIN+x} ]; then echo "int(r'${VIASH_PAR_CHIMSCOREMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'chimScoreDropMax': $( if [ ! -z ${VIASH_PAR_CHIMSCOREDROPMAX+x} ]; then echo "int(r'${VIASH_PAR_CHIMSCOREDROPMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'chimScoreSeparation': $( if [ ! -z ${VIASH_PAR_CHIMSCORESEPARATION+x} ]; then echo "int(r'${VIASH_PAR_CHIMSCORESEPARATION//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'chimScoreJunctionNonGTAG': $( if [ ! -z ${VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG+x} ]; then echo "int(r'${VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'chimJunctionOverhangMin': $( if [ ! -z ${VIASH_PAR_CHIMJUNCTIONOVERHANGMIN+x} ]; then echo "int(r'${VIASH_PAR_CHIMJUNCTIONOVERHANGMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'chimSegmentReadGapMax': $( if [ ! -z ${VIASH_PAR_CHIMSEGMENTREADGAPMAX+x} ]; then echo "int(r'${VIASH_PAR_CHIMSEGMENTREADGAPMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'chimFilter': $( if [ ! -z ${VIASH_PAR_CHIMFILTER+x} ]; then echo "r'${VIASH_PAR_CHIMFILTER//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'chimMainSegmentMultNmax': $( if [ ! -z ${VIASH_PAR_CHIMMAINSEGMENTMULTNMAX+x} ]; then echo "int(r'${VIASH_PAR_CHIMMAINSEGMENTMULTNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'chimMultimapNmax': $( if [ ! -z ${VIASH_PAR_CHIMMULTIMAPNMAX+x} ]; then echo "int(r'${VIASH_PAR_CHIMMULTIMAPNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'chimMultimapScoreRange': $( if [ ! -z ${VIASH_PAR_CHIMMULTIMAPSCORERANGE+x} ]; then echo "int(r'${VIASH_PAR_CHIMMULTIMAPSCORERANGE//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'chimNonchimScoreDropMin': $( if [ ! -z ${VIASH_PAR_CHIMNONCHIMSCOREDROPMIN+x} ]; then echo "int(r'${VIASH_PAR_CHIMNONCHIMSCOREDROPMIN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'chimOutJunctionFormat': $( if [ ! -z ${VIASH_PAR_CHIMOUTJUNCTIONFORMAT+x} ]; then echo "int(r'${VIASH_PAR_CHIMOUTJUNCTIONFORMAT//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'quantMode': $( if [ ! -z ${VIASH_PAR_QUANTMODE+x} ]; then echo "r'${VIASH_PAR_QUANTMODE//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'quantTranscriptomeBAMcompression': $( if [ ! -z ${VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION+x} ]; then echo "int(r'${VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'quantTranscriptomeBan': $( if [ ! -z ${VIASH_PAR_QUANTTRANSCRIPTOMEBAN+x} ]; then echo "r'${VIASH_PAR_QUANTTRANSCRIPTOMEBAN//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'twopassMode': $( if [ ! -z ${VIASH_PAR_TWOPASSMODE+x} ]; then echo "r'${VIASH_PAR_TWOPASSMODE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'twopass1readsN': $( if [ ! -z ${VIASH_PAR_TWOPASS1READSN+x} ]; then echo "int(r'${VIASH_PAR_TWOPASS1READSN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'waspOutputMode': $( if [ ! -z ${VIASH_PAR_WASPOUTPUTMODE+x} ]; then echo "r'${VIASH_PAR_WASPOUTPUTMODE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'soloType': $( if [ ! -z ${VIASH_PAR_SOLOTYPE+x} ]; then echo "r'${VIASH_PAR_SOLOTYPE//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'soloCBwhitelist': $( if [ ! -z ${VIASH_PAR_SOLOCBWHITELIST+x} ]; then echo "r'${VIASH_PAR_SOLOCBWHITELIST//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'soloCBstart': $( if [ ! -z ${VIASH_PAR_SOLOCBSTART+x} ]; then echo "int(r'${VIASH_PAR_SOLOCBSTART//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'soloCBlen': $( if [ ! -z ${VIASH_PAR_SOLOCBLEN+x} ]; then echo "int(r'${VIASH_PAR_SOLOCBLEN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'soloUMIstart': $( if [ ! -z ${VIASH_PAR_SOLOUMISTART+x} ]; then echo "int(r'${VIASH_PAR_SOLOUMISTART//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'soloUMIlen': $( if [ ! -z ${VIASH_PAR_SOLOUMILEN+x} ]; then echo "int(r'${VIASH_PAR_SOLOUMILEN//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'soloBarcodeReadLength': $( if [ ! -z ${VIASH_PAR_SOLOBARCODEREADLENGTH+x} ]; then echo "int(r'${VIASH_PAR_SOLOBARCODEREADLENGTH//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'soloBarcodeMate': $( if [ ! -z ${VIASH_PAR_SOLOBARCODEMATE+x} ]; then echo "int(r'${VIASH_PAR_SOLOBARCODEMATE//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'soloCBposition': $( if [ ! -z ${VIASH_PAR_SOLOCBPOSITION+x} ]; then echo "r'${VIASH_PAR_SOLOCBPOSITION//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'soloUMIposition': $( if [ ! -z ${VIASH_PAR_SOLOUMIPOSITION+x} ]; then echo "r'${VIASH_PAR_SOLOUMIPOSITION//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'soloAdapterSequence': $( if [ ! -z ${VIASH_PAR_SOLOADAPTERSEQUENCE+x} ]; then echo "r'${VIASH_PAR_SOLOADAPTERSEQUENCE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'soloAdapterMismatchesNmax': $( if [ ! -z ${VIASH_PAR_SOLOADAPTERMISMATCHESNMAX+x} ]; then echo "int(r'${VIASH_PAR_SOLOADAPTERMISMATCHESNMAX//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'soloCBmatchWLtype': $( if [ ! -z ${VIASH_PAR_SOLOCBMATCHWLTYPE+x} ]; then echo "r'${VIASH_PAR_SOLOCBMATCHWLTYPE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'soloInputSAMattrBarcodeSeq': $( if [ ! -z ${VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ+x} ]; then echo "r'${VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'soloInputSAMattrBarcodeQual': $( if [ ! -z ${VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL+x} ]; then echo "r'${VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'soloStrand': $( if [ ! -z ${VIASH_PAR_SOLOSTRAND+x} ]; then echo "r'${VIASH_PAR_SOLOSTRAND//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'soloFeatures': $( if [ ! -z ${VIASH_PAR_SOLOFEATURES+x} ]; then echo "r'${VIASH_PAR_SOLOFEATURES//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'soloMultiMappers': $( if [ ! -z ${VIASH_PAR_SOLOMULTIMAPPERS+x} ]; then echo "r'${VIASH_PAR_SOLOMULTIMAPPERS//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'soloUMIdedup': $( if [ ! -z ${VIASH_PAR_SOLOUMIDEDUP+x} ]; then echo "r'${VIASH_PAR_SOLOUMIDEDUP//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'soloUMIfiltering': $( if [ ! -z ${VIASH_PAR_SOLOUMIFILTERING+x} ]; then echo "r'${VIASH_PAR_SOLOUMIFILTERING//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'soloOutFileNames': $( if [ ! -z ${VIASH_PAR_SOLOOUTFILENAMES+x} ]; then echo "r'${VIASH_PAR_SOLOOUTFILENAMES//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'soloCellFilter': $( if [ ! -z ${VIASH_PAR_SOLOCELLFILTER+x} ]; then echo "r'${VIASH_PAR_SOLOCELLFILTER//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'soloOutFormatFeaturesGeneField3': $( if [ ! -z ${VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3+x} ]; then echo "r'${VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), + 'soloCellReadStats': $( if [ ! -z ${VIASH_PAR_SOLOCELLREADSTATS+x} ]; then echo "r'${VIASH_PAR_SOLOCELLREADSTATS//\'/\'\"\'\"r\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +## VIASH END + +######################## +### Helper functions ### +######################## + +# regex for matching R[12] fastq(gz) files +# examples: +# - TSP10_Fat_MAT_SS2_B134171_B115063_Immune_A1_L003_R1.fastq.gz +# - tinygex_S1_L001_I1_001.fastq.gz +fastqgz_regex = r'(.+)_(R\\d+)(_\\d+)?\\.fastq(\\.gz)?' + +# helper function for cheching whether something is a gzip +def is_gz_file(path: Path) -> bool: + with open(path, 'rb') as file: + return file.read(2) == b'\\x1f\\x8b' + +# look for fastq files in a directory +def search_fastqs(path: Path) -> list[Path]: + if path.is_dir(): + print(f"Input '{path}' is a directory, traversing to see if we can detect any FASTQ files.", flush=True) + value_paths = [file for file in path.iterdir() if re.match(fastqgz_regex, file.name) ] + return value_paths + else: + return [path] + +# if {par_value} is a Path, extract it to a temp_dir_path and return the resulting path +def extract_if_need_be(par_value: Path, temp_dir_path: Path) -> Path: + + if par_value.is_file() and tarfile.is_tarfile(par_value): + # Remove two extensions (if they exist) + extaction_dir_name = Path(par_value.stem).stem + unpacked_path = temp_dir_path / extaction_dir_name + print(f' Tar detected; extracting {par_value} to {unpacked_path}', flush=True) + + with tarfile.open(par_value, 'r') as open_tar: + members = open_tar.getmembers() + root_dirs = [member + for member in members + if member.isdir() and member.name != '.' and '/' not in member.name] + # if there is only one root_dir (and there are files in that directory) + # strip that directory name from the destination folder + if len(root_dirs) == 1: + for mem in members: + mem.path = Path(*Path(mem.path).parts[1:]) + members_to_move = [mem for mem in members if mem.path != Path('.')] + open_tar.extractall(unpacked_path, members=members_to_move) + return unpacked_path + + elif par_value.is_file() and is_gz_file(par_value): + # Remove extension (if it exists) + extaction_file_name = Path(par_value.stem) + unpacked_path = temp_dir_path / extaction_file_name + print(f' Gzip detected; extracting {par_value} to {unpacked_path}', flush=True) + + with gzip.open(par_value, 'rb') as f_in: + with open(unpacked_path, 'wb') as f_out: + shutil.copyfileobj(f_in, f_out) + return unpacked_path + + else: + return par_value + +######################## +### Main code ### +######################## + +# rename keys and convert path strings to Path +# note: only list file arguments here. if non-file arguments also need to be renamed, +# the \`processPar()\` generator needs to be adapted +to_rename = {'input': 'readFilesIn', 'reference': 'genomeDir', 'output': 'outFileNamePrefix'} + +def process_par(orig_par, to_rename): + for key, value in orig_par.items(): + # rename the key in par based on the \`to_rename\` dict + if key in to_rename.keys(): + new_key = to_rename[key] + + # also turn value into a Path + if isinstance(value, list): + new_value = [Path(val) for val in value] + else: + new_value = Path(value) + else: + new_key = key + new_value = value + yield new_key, new_value +par = dict(process_par(par, to_rename)) + +# create output dir if need be +par["outFileNamePrefix"].mkdir(parents=True, exist_ok=True) + +with tempfile.TemporaryDirectory(prefix="star-", dir=meta["temp_dir"], ignore_cleanup_errors=True) as temp_dir: + print(">> Check whether input files are directories", flush=True) + new_read_files_in = [] + for path in par["readFilesIn"]: + new_read_files_in.extend(search_fastqs(path)) + par["readFilesIn"] = new_read_files_in + print("", flush=True) + + # checking for compressed files, ungzip files if need be + temp_dir_path = Path(temp_dir) + for par_name in ["genomeDir", "readFilesIn"]: + par_values = par[par_name] + if par_values: + # turn value into list + is_multiple = isinstance(par_values, list) + if not is_multiple: + par_values = [ par_values ] + + # output list + new_values = [] + for par_value in par_values: + print(f'>> Check compression of --{par_name} with value: {par_value}', flush=True) + new_value = extract_if_need_be(par_value, temp_dir_path) + new_values.append(new_value) + + # unlist if need be + if not is_multiple: + new_values = new_values[0] + + # replace value + par[par_name] = new_values + # end ungzipping + print("", flush=True) + + print("Grouping R1/R2 input files into pairs", flush=True) + input_grouped = {} + for path in par['readFilesIn']: + key = re.search(fastqgz_regex, path.name).group(2) + if key not in input_grouped: + input_grouped[key] = [] + input_grouped[key].append(str(path)) + par['readFilesIn'] = [ ','.join(val) for val in input_grouped.values() ] + print("", flush=True) + + print(">> Constructing command", flush=True) + par["runMode"] = "alignReads" + par["outTmpDir"] = temp_dir_path / "run" + if 'cpus' in meta and meta['cpus']: + par["runThreadN"] = meta["cpus"] + # make sure there is a trailing / + par["outFileNamePrefix"] = f"{par['outFileNamePrefix']}/" + + cmd_args = [ "STAR" ] + for name, value in par.items(): + if value is not None: + if isinstance(value, list): + cmd_args.extend(["--" + name] + [str(x) for x in value]) + else: + cmd_args.extend(["--" + name, str(value)]) + print("", flush=True) + + print(">> Running STAR with command:", flush=True) + print("+ " + ' '.join([str(x) for x in cmd_args]), flush=True) + print("", flush=True) + + subprocess.run( + cmd_args, + check=True + ) +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + unset VIASH_TEST_INPUT + IFS=';' + for var in $VIASH_PAR_INPUT; do + unset IFS + if [ -z "$VIASH_TEST_INPUT" ]; then + VIASH_TEST_INPUT="$(ViashStripAutomount "$var")" + else + VIASH_TEST_INPUT="$VIASH_TEST_INPUT;""$(ViashStripAutomount "$var")" + fi + done + VIASH_PAR_INPUT="$VIASH_TEST_INPUT" +fi +if [ ! -z "$VIASH_PAR_REFERENCE" ]; then + VIASH_PAR_REFERENCE=$(ViashStripAutomount "$VIASH_PAR_REFERENCE") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_PAR_GENOMEFASTAFILES" ]; then + unset VIASH_TEST_GENOMEFASTAFILES + IFS=';' + for var in $VIASH_PAR_GENOMEFASTAFILES; do + unset IFS + if [ -z "$VIASH_TEST_GENOMEFASTAFILES" ]; then + VIASH_TEST_GENOMEFASTAFILES="$(ViashStripAutomount "$var")" + else + VIASH_TEST_GENOMEFASTAFILES="$VIASH_TEST_GENOMEFASTAFILES;""$(ViashStripAutomount "$var")" + fi + done + VIASH_PAR_GENOMEFASTAFILES="$VIASH_TEST_GENOMEFASTAFILES" +fi +if [ ! -z "$VIASH_PAR_SJDBGTFFILE" ]; then + VIASH_PAR_SJDBGTFFILE=$(ViashStripAutomount "$VIASH_PAR_SJDBGTFFILE") +fi +if [ ! -z "$VIASH_PAR_READFILESMANIFEST" ]; then + VIASH_PAR_READFILESMANIFEST=$(ViashStripAutomount "$VIASH_PAR_READFILESMANIFEST") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/mapping/star_build_reference/.config.vsh.yaml b/target/docker/mapping/star_build_reference/.config.vsh.yaml new file mode 100644 index 00000000000..a13c0be97f5 --- /dev/null +++ b/target/docker/mapping/star_build_reference/.config.vsh.yaml @@ -0,0 +1,190 @@ +functionality: + name: "star_build_reference" + namespace: "mapping" + version: "0.12.4" + authors: + - name: "Dries Schaumont" + roles: + - "author" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + argument_groups: + - name: "Input/Output" + arguments: + - type: "file" + name: "--genome_fasta" + alternatives: + - "--genomeFastaFiles" + description: "The fasta files to be included in the reference. Corresponds to\ + \ the --genomeFastaFiles argument in the STAR command." + info: null + example: + - "chr1.fasta" + - "chr2.fasta" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: true + multiple_sep: " " + dest: "par" + - type: "file" + name: "--transcriptome_gtf" + alternatives: + - "--sjdbGTFfile" + description: "Specifies the path to the file with annotated transcripts in the\ + \ standard GTF\nformat. STAR will extract splice junctions from this file\ + \ and use them to greatly improve\naccuracy of the mapping. Corresponds to\ + \ the --sjdbGTFfile argument in the STAR command.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "--genomeDir" + description: "Path to output directory. Corresponds to the --genomeDir argument\ + \ in the STAR command." + info: null + example: + - "/path/to/foo" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Genome indexing arguments" + arguments: + - type: "integer" + name: "--genomeSAindexNbases" + description: "Length (bases) of the SA pre-indexing string. Typically between\ + \ 10 and 15.\nLonger strings will use much more memory, but allow faster searches.\ + \ For small\ngenomes, the parameter {genomeSAindexNbases must be scaled down\ + \ to\nmin(14, log2(GenomeLength)/2 - 1).\n" + info: null + default: + - 14 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + description: "Create a reference for STAR from a set of fasta files." + test_resources: + - type: "bash_script" + path: "test.sh" + is_executable: true + - type: "file" + path: "../../../resources_test/cellranger_tiny_fastq" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "docker" + env: + - "STAR_VERSION 2.7.10b" + - "PACKAGES gcc g++ make wget zlib1g-dev unzip" + - type: "docker" + run: + - "apt-get update && \\\n apt-get install -y --no-install-recommends ${PACKAGES}\ + \ && \\\n cd /tmp && \\\n wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip\ + \ && \\\n unzip ${STAR_VERSION}.zip && \\\n cd STAR-${STAR_VERSION}/source\ + \ && \\\n make STARstatic CXXFLAGS_SIMD=-std=c++11 && \\\n cp STAR /usr/local/bin\ + \ && \\\n cd / && \\\n rm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip\ + \ && \\\n apt-get --purge autoremove -y ${PACKAGES} && \\\n apt-get clean\n" + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highmem" + - "highcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/mapping/star_build_reference/config.vsh.yml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/mapping/star_build_reference" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/mapping/star_build_reference/star_build_reference" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/mapping/star_build_reference/star_build_reference b/target/docker/mapping/star_build_reference/star_build_reference new file mode 100755 index 00000000000..698f440b73e --- /dev/null +++ b/target/docker/mapping/star_build_reference/star_build_reference @@ -0,0 +1,1175 @@ +#!/usr/bin/env bash + +# star_build_reference 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Dries Schaumont (author) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="star_build_reference" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "star_build_reference 0.12.4" + echo "" + echo "Create a reference for STAR from a set of fasta files." + echo "" + echo "Input/Output:" + echo " --genomeFastaFiles, --genome_fasta" + echo " type: file, required parameter, multiple values allowed, file must exist" + echo " example: chr1.fasta chr2.fasta" + echo " The fasta files to be included in the reference. Corresponds to the" + echo " --genomeFastaFiles argument in the STAR command." + echo "" + echo " --sjdbGTFfile, --transcriptome_gtf" + echo " type: file, file must exist" + echo " Specifies the path to the file with annotated transcripts in the" + echo " standard GTF" + echo " format. STAR will extract splice junctions from this file and use them" + echo " to greatly improve" + echo " accuracy of the mapping. Corresponds to the --sjdbGTFfile argument in" + echo " the STAR command." + echo "" + echo " --genomeDir, --output" + echo " type: file, required parameter, output, file must exist" + echo " example: /path/to/foo" + echo " Path to output directory. Corresponds to the --genomeDir argument in the" + echo " STAR command." + echo "" + echo "Genome indexing arguments:" + echo " --genomeSAindexNbases" + echo " type: integer" + echo " default: 14" + echo " Length (bases) of the SA pre-indexing string. Typically between 10 and" + echo " 15." + echo " Longer strings will use much more memory, but allow faster searches. For" + echo " small" + echo " genomes, the parameter {genomeSAindexNbases must be scaled down to" + echo " min(14, log2(GenomeLength)/2 - 1)." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM python:3.10-slim + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ + rm -rf /var/lib/apt/lists/* + +ENV STAR_VERSION 2.7.10b +ENV PACKAGES gcc g++ make wget zlib1g-dev unzip +RUN apt-get update && \ + apt-get install -y --no-install-recommends ${PACKAGES} && \ + cd /tmp && \ + wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip && \ + unzip ${STAR_VERSION}.zip && \ + cd STAR-${STAR_VERSION}/source && \ + make STARstatic CXXFLAGS_SIMD=-std=c++11 && \ + cp STAR /usr/local/bin && \ + cd / && \ + rm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip && \ + apt-get --purge autoremove -y ${PACKAGES} && \ + apt-get clean + +LABEL org.opencontainers.image.authors="Dries Schaumont" +LABEL org.opencontainers.image.description="Companion container for running component mapping star_build_reference" +LABEL org.opencontainers.image.created="2024-01-31T09:08:33Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-star_build_reference-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "star_build_reference 0.12.4" + exit + ;; + --genome_fasta) + if [ -z "$VIASH_PAR_GENOME_FASTA" ]; then + VIASH_PAR_GENOME_FASTA="$2" + else + VIASH_PAR_GENOME_FASTA="$VIASH_PAR_GENOME_FASTA ""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --genome_fasta. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --genome_fasta=*) + if [ -z "$VIASH_PAR_GENOME_FASTA" ]; then + VIASH_PAR_GENOME_FASTA=$(ViashRemoveFlags "$1") + else + VIASH_PAR_GENOME_FASTA="$VIASH_PAR_GENOME_FASTA "$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --genomeFastaFiles) + if [ -z "$VIASH_PAR_GENOME_FASTA" ]; then + VIASH_PAR_GENOME_FASTA="$2" + else + VIASH_PAR_GENOME_FASTA="$VIASH_PAR_GENOME_FASTA ""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --genomeFastaFiles. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --transcriptome_gtf) + [ -n "$VIASH_PAR_TRANSCRIPTOME_GTF" ] && ViashError Bad arguments for option \'--transcriptome_gtf\': \'$VIASH_PAR_TRANSCRIPTOME_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRANSCRIPTOME_GTF="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --transcriptome_gtf. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --transcriptome_gtf=*) + [ -n "$VIASH_PAR_TRANSCRIPTOME_GTF" ] && ViashError Bad arguments for option \'--transcriptome_gtf=*\': \'$VIASH_PAR_TRANSCRIPTOME_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRANSCRIPTOME_GTF=$(ViashRemoveFlags "$1") + shift 1 + ;; + --sjdbGTFfile) + [ -n "$VIASH_PAR_TRANSCRIPTOME_GTF" ] && ViashError Bad arguments for option \'--sjdbGTFfile\': \'$VIASH_PAR_TRANSCRIPTOME_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRANSCRIPTOME_GTF="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sjdbGTFfile. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --genomeDir) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--genomeDir\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --genomeDir. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --genomeSAindexNbases) + [ -n "$VIASH_PAR_GENOMESAINDEXNBASES" ] && ViashError Bad arguments for option \'--genomeSAindexNbases\': \'$VIASH_PAR_GENOMESAINDEXNBASES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENOMESAINDEXNBASES="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --genomeSAindexNbases. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --genomeSAindexNbases=*) + [ -n "$VIASH_PAR_GENOMESAINDEXNBASES" ] && ViashError Bad arguments for option \'--genomeSAindexNbases=*\': \'$VIASH_PAR_GENOMESAINDEXNBASES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENOMESAINDEXNBASES=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/mapping_star_build_reference:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/mapping_star_build_reference:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/mapping_star_build_reference:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/mapping_star_build_reference:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_GENOME_FASTA+x} ]; then + ViashError '--genome_fasta' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_GENOMESAINDEXNBASES+x} ]; then + VIASH_PAR_GENOMESAINDEXNBASES="14" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_GENOME_FASTA" ]; then + IFS=' ' + set -f + for file in $VIASH_PAR_GENOME_FASTA; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi +if [ ! -z "$VIASH_PAR_TRANSCRIPTOME_GTF" ] && [ ! -e "$VIASH_PAR_TRANSCRIPTOME_GTF" ]; then + ViashError "Input file '$VIASH_PAR_TRANSCRIPTOME_GTF' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_GENOMESAINDEXNBASES" ]]; then + if ! [[ "$VIASH_PAR_GENOMESAINDEXNBASES" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--genomeSAindexNbases' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_GENOME_FASTA" ]; then + VIASH_TEST_GENOME_FASTA=() + IFS=' ' + for var in $VIASH_PAR_GENOME_FASTA; do + unset IFS + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$var")" ) + var=$(ViashAutodetectMount "$var") + VIASH_TEST_GENOME_FASTA+=( "$var" ) + done + VIASH_PAR_GENOME_FASTA=$(IFS=' ' ; echo "${VIASH_TEST_GENOME_FASTA[*]}") +fi +if [ ! -z "$VIASH_PAR_TRANSCRIPTOME_GTF" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_TRANSCRIPTOME_GTF")" ) + VIASH_PAR_TRANSCRIPTOME_GTF=$(ViashAutodetectMount "$VIASH_PAR_TRANSCRIPTOME_GTF") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/mapping_star_build_reference:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/mapping_star_build_reference:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/mapping_star_build_reference:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-star_build_reference-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +import re +import tempfile +import subprocess +from pathlib import Path +import tarfile +import gzip +import shutil + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'genome_fasta': $( if [ ! -z ${VIASH_PAR_GENOME_FASTA+x} ]; then echo "r'${VIASH_PAR_GENOME_FASTA//\'/\'\"\'\"r\'}'.split(' ')"; else echo None; fi ), + 'transcriptome_gtf': $( if [ ! -z ${VIASH_PAR_TRANSCRIPTOME_GTF+x} ]; then echo "r'${VIASH_PAR_TRANSCRIPTOME_GTF//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'genomeSAindexNbases': $( if [ ! -z ${VIASH_PAR_GENOMESAINDEXNBASES+x} ]; then echo "int(r'${VIASH_PAR_GENOMESAINDEXNBASES//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +## VIASH END + +######################## +### Helper functions ### +######################## + +# helper function for cheching whether something is a gzip +def is_gz_file(path: Path) -> bool: + with open(path, 'rb') as file: + return file.read(2) == b'\\x1f\\x8b' + +# if {par_value} is a Path, extract it to a temp_dir_path and return the resulting path +def extract_if_need_be(par_value: Path, temp_dir_path: Path) -> Path: + if par_value.is_file() and tarfile.is_tarfile(par_value): + # Remove two extensions (if they exist) + extaction_dir_name = Path(par_value.stem).stem + unpacked_path = temp_dir_path / extaction_dir_name + print(f' Tar detected; extracting {par_value} to {unpacked_path}', flush=True) + + with tarfile.open(par_value, 'r') as open_tar: + members = open_tar.getmembers() + root_dirs = [member + for member in members + if member.isdir() and member.name != '.' and '/' not in member.name] + # if there is only one root_dir (and there are files in that directory) + # strip that directory name from the destination folder + if len(root_dirs) == 1: + for mem in members: + mem.path = Path(*Path(mem.path).parts[1:]) + members_to_move = [mem for mem in members if mem.path != Path('.')] + open_tar.extractall(unpacked_path, members=members_to_move) + return unpacked_path + + elif par_value.is_file() and is_gz_file(par_value): + # Remove extension (if it exists) + extaction_file_name = Path(par_value.stem) + unpacked_path = temp_dir_path / extaction_file_name + print(f' Gzip detected; extracting {par_value} to {unpacked_path}', flush=True) + + with gzip.open(par_value, 'rb') as f_in: + with open(unpacked_path, 'wb') as f_out: + shutil.copyfileobj(f_in, f_out) + return unpacked_path + + else: + return par_value + +######################## +### Main code ### +######################## + +# rename keys and convert path strings to Path +# note: only list file arguments here. if non-file arguments also need to be renamed, +# the \`processPar()\` generator needs to be adapted +to_rename = {'genome_fasta': 'genomeFastaFiles', 'output': 'genomeDir', 'transcriptome_gtf': 'sjdbGTFfile'} + +def process_par(orig_par, to_rename): + for key, value in orig_par.items(): + # rename the key in par based on the \`to_rename\` dict + if key in to_rename.keys(): + new_key = to_rename[key] + + # also turn value into a Path + if isinstance(value, list): + new_value = [Path(val) for val in value] + else: + new_value = Path(value) + else: + new_key = key + new_value = value + yield new_key, new_value +par = dict(process_par(par, to_rename)) + +# create output dir if need be +par["genomeDir"].mkdir(parents=True, exist_ok=True) + +with tempfile.TemporaryDirectory(prefix="star-", dir=meta["temp_dir"]) as temp_dir: + + # checking for compressed files, ungzip files if need be + temp_dir_path = Path(temp_dir) + for par_name in ["genomeFastaFiles", "sjdbGTFfile"]: + par_values = par[par_name] + if par_values: + # turn value into list + is_multiple = isinstance(par_values, list) + if not is_multiple: + par_values = [ par_values ] + + # output list + new_values = [] + for par_value in par_values: + print(f'>> Check compression of --{par_name} with value: {par_value}', flush=True) + new_value = extract_if_need_be(par_value, temp_dir_path) + new_values.append(new_value) + + # unlist if need be + if not is_multiple: + new_values = new_values[0] + + # replace value + par[par_name] = new_values + # end ungzipping + print("", flush=True) + + print(">> Constructing command", flush=True) + par["runMode"] = "genomeGenerate" + par["outTmpDir"] = temp_dir_path / "run" + if 'cpus' in meta and meta['cpus']: + par["runThreadN"] = meta["cpus"] + + + cmd_args = [ "STAR" ] + for name, value in par.items(): + if value is not None: + if isinstance(value, list): + cmd_args.extend(["--" + name] + [str(x) for x in value]) + else: + cmd_args.extend(["--" + name, str(value)]) + print("", flush=True) + + print(">> Running STAR with command:", flush=True) + print("+ " + ' '.join([str(x) for x in cmd_args]), flush=True) + print("", flush=True) + + subprocess.run( + cmd_args, + check=True + ) +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_GENOME_FASTA" ]; then + unset VIASH_TEST_GENOME_FASTA + IFS=' ' + for var in $VIASH_PAR_GENOME_FASTA; do + unset IFS + if [ -z "$VIASH_TEST_GENOME_FASTA" ]; then + VIASH_TEST_GENOME_FASTA="$(ViashStripAutomount "$var")" + else + VIASH_TEST_GENOME_FASTA="$VIASH_TEST_GENOME_FASTA ""$(ViashStripAutomount "$var")" + fi + done + VIASH_PAR_GENOME_FASTA="$VIASH_TEST_GENOME_FASTA" +fi +if [ ! -z "$VIASH_PAR_TRANSCRIPTOME_GTF" ]; then + VIASH_PAR_TRANSCRIPTOME_GTF=$(ViashStripAutomount "$VIASH_PAR_TRANSCRIPTOME_GTF") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/metadata/add_id/.config.vsh.yaml b/target/docker/metadata/add_id/.config.vsh.yaml new file mode 100644 index 00000000000..6ba77619407 --- /dev/null +++ b/target/docker/metadata/add_id/.config.vsh.yaml @@ -0,0 +1,197 @@ +functionality: + name: "add_id" + namespace: "metadata" + version: "0.12.4" + authors: + - name: "Dries Schaumont" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Path to the input .h5mu." + info: null + example: + - "sample_path" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--input_id" + description: "The input id." + info: null + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_output" + description: "Name of the .obs column where to store the id." + info: null + default: + - "sample_id" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--make_observation_keys_unique" + description: "Join the id to the .obs index (.obs_names)." + info: null + direction: "input" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Add id of .obs. Also allows to make .obs_names (the .obs index) unique\ + \ \nby prefixing the values with an unique id per .h5mu file.\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/concat_test_data/e18_mouse_brain_fresh_5k_filtered_feature_bc_matrix_subset_unique_obs.h5mu" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "native" + id: "native" +- type: "nextflow" + id: "nextflow" + directives: + label: + - "singlecpu" + - "lowmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/metadata/add_id/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/metadata/add_id" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/metadata/add_id/add_id" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/metadata/add_id/add_id b/target/docker/metadata/add_id/add_id new file mode 100755 index 00000000000..8e74aaf6664 --- /dev/null +++ b/target/docker/metadata/add_id/add_id @@ -0,0 +1,1064 @@ +#!/usr/bin/env bash + +# add_id 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Dries Schaumont (maintainer) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="add_id" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "add_id 0.12.4" + echo "" + echo "Add id of .obs. Also allows to make .obs_names (the .obs index) unique" + echo "by prefixing the values with an unique id per .h5mu file." + echo "" + echo "Arguments:" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " example: sample_path" + echo " Path to the input .h5mu." + echo "" + echo " --input_id" + echo " type: string, required parameter" + echo " The input id." + echo "" + echo " --obs_output" + echo " type: string" + echo " default: sample_id" + echo " Name of the .obs column where to store the id." + echo "" + echo " -o, --output" + echo " type: file, output, file must exist" + echo " example: output.h5mu" + echo "" + echo " --output_compression" + echo " type: string" + echo " example: gzip" + echo " choices: [ gzip, lzf ]" + echo " The compression format to be used on the output h5mu object." + echo "" + echo " --make_observation_keys_unique" + echo " type: boolean_true" + echo " Join the id to the .obs index (.obs_names)." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM python:3.10-slim + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" + +LABEL org.opencontainers.image.authors="Dries Schaumont" +LABEL org.opencontainers.image.description="Companion container for running component metadata add_id" +LABEL org.opencontainers.image.created="2024-01-31T09:08:32Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-add_id-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "add_id 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input_id) + [ -n "$VIASH_PAR_INPUT_ID" ] && ViashError Bad arguments for option \'--input_id\': \'$VIASH_PAR_INPUT_ID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_ID="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_id. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input_id=*) + [ -n "$VIASH_PAR_INPUT_ID" ] && ViashError Bad arguments for option \'--input_id=*\': \'$VIASH_PAR_INPUT_ID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_ID=$(ViashRemoveFlags "$1") + shift 1 + ;; + --obs_output) + [ -n "$VIASH_PAR_OBS_OUTPUT" ] && ViashError Bad arguments for option \'--obs_output\': \'$VIASH_PAR_OBS_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBS_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obs_output=*) + [ -n "$VIASH_PAR_OBS_OUTPUT" ] && ViashError Bad arguments for option \'--obs_output=*\': \'$VIASH_PAR_OBS_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBS_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --make_observation_keys_unique) + [ -n "$VIASH_PAR_MAKE_OBSERVATION_KEYS_UNIQUE" ] && ViashError Bad arguments for option \'--make_observation_keys_unique\': \'$VIASH_PAR_MAKE_OBSERVATION_KEYS_UNIQUE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MAKE_OBSERVATION_KEYS_UNIQUE=true + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/metadata_add_id:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/metadata_add_id:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/metadata_add_id:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/metadata_add_id:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_INPUT_ID+x} ]; then + ViashError '--input_id' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_OBS_OUTPUT+x} ]; then + VIASH_PAR_OBS_OUTPUT="sample_id" +fi +if [ -z ${VIASH_PAR_MAKE_OBSERVATION_KEYS_UNIQUE+x} ]; then + VIASH_PAR_MAKE_OBSERVATION_KEYS_UNIQUE="false" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_MAKE_OBSERVATION_KEYS_UNIQUE" ]]; then + if ! [[ "$VIASH_PAR_MAKE_OBSERVATION_KEYS_UNIQUE" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--make_observation_keys_unique' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/metadata_add_id:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/metadata_add_id:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/metadata_add_id:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-add_id-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +from __future__ import annotations +import sys +from mudata import read_h5mu, MuData + +### VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'input_id': $( if [ ! -z ${VIASH_PAR_INPUT_ID+x} ]; then echo "r'${VIASH_PAR_INPUT_ID//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'obs_output': $( if [ ! -z ${VIASH_PAR_OBS_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OBS_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'make_observation_keys_unique': $( if [ ! -z ${VIASH_PAR_MAKE_OBSERVATION_KEYS_UNIQUE+x} ]; then echo "r'${VIASH_PAR_MAKE_OBSERVATION_KEYS_UNIQUE//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +### VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +def make_observation_keys_unique(sample_id: str, sample: MuData) -> None: + """ + Make the observation keys unique across all samples. At input, + the observation keys are unique within a sample. By adding the sample name + (unique for a sample) to each observation key, the observation key is made + unique across all samples as well. + """ + logger.info('Making observation keys unique across all samples.') + sample.obs.index = f"{sample_id}_" + sample.obs.index + make_observation_keys_unique_per_mod(sample_id, sample) + + +def make_observation_keys_unique_per_mod(sample_id: str, sample: MuData) -> None: + """ + Updating MuData.obs_names is not allowed (it is read-only). + So the observation keys for each modality has to be updated manually. + """ + for mod in sample.mod.values(): + mod.obs_names = f"{sample_id}_" + mod.obs_names + +def main(): + input_data = read_h5mu(par["input"]) + input_data.obs[par["obs_output"]] = par["input_id"] + for mod_data in input_data.mod.values(): + mod_data.obs[par["obs_output"]] = par["input_id"] + if par["make_observation_keys_unique"]: + make_observation_keys_unique(par["input_id"], input_data) + logger.info("Writing out data to '%s'.", par["output"]) + input_data.write_h5mu(par["output"], compression=par["output_compression"]) + +if __name__ == '__main__': + main() +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/metadata/add_id/setup_logger.py b/target/docker/metadata/add_id/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/docker/metadata/add_id/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/docker/metadata/grep_annotation_column/.config.vsh.yaml b/target/docker/metadata/grep_annotation_column/.config.vsh.yaml new file mode 100644 index 00000000000..72ca34538cf --- /dev/null +++ b/target/docker/metadata/grep_annotation_column/.config.vsh.yaml @@ -0,0 +1,244 @@ +functionality: + name: "grep_annotation_column" + namespace: "metadata" + version: "0.12.4" + authors: + - name: "Dries Schaumont" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + argument_groups: + - name: "Inputs" + description: "Arguments related to the input dataset." + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Path to the input .h5mu." + info: null + example: + - "sample_path" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--input_column" + description: "Column to query. If not specified, use .var_names or .obs_names,\ + \ depending on the value of --matrix" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + description: "Which modality to get the annotation matrix from.\n" + info: null + example: + - "rna" + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--matrix" + description: "Matrix to fetch the column from that will be searched." + info: null + example: + - "var" + required: false + choices: + - "var" + - "obs" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Outputs" + description: "Arguments related to how the output will be written." + arguments: + - type: "file" + name: "--output" + alternatives: + - "-o" + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_match_column" + description: "Name of the column to write the result to." + info: null + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_fraction_column" + description: "For the opposite axis, name of the column to write the fraction\ + \ of \nobservations that matches to the pattern.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Query options" + description: "Options related to the query" + arguments: + - type: "string" + name: "--regex_pattern" + description: "Regex to use to match with the input column." + info: null + example: + - "^[mM][tT]-" + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + description: "Perform a regex lookup on a column from the annotation matrices .obs\ + \ or .var.\nThe annotation matrix can originate from either a modality, or all\ + \ modalities (global .var or .obs).\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/concat_test_data/e18_mouse_brain_fresh_5k_filtered_feature_bc_matrix_subset_unique_obs.h5mu" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "native" + id: "native" +- type: "nextflow" + id: "nextflow" + directives: + label: + - "singlecpu" + - "lowmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/metadata/grep_annotation_column/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/metadata/grep_annotation_column" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/metadata/grep_annotation_column/grep_annotation_column" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/metadata/grep_annotation_column/grep_annotation_column b/target/docker/metadata/grep_annotation_column/grep_annotation_column new file mode 100755 index 00000000000..05d31ccc9fd --- /dev/null +++ b/target/docker/metadata/grep_annotation_column/grep_annotation_column @@ -0,0 +1,1148 @@ +#!/usr/bin/env bash + +# grep_annotation_column 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Dries Schaumont (maintainer) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="grep_annotation_column" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "grep_annotation_column 0.12.4" + echo "" + echo "Perform a regex lookup on a column from the annotation matrices .obs or .var." + echo "The annotation matrix can originate from either a modality, or all modalities" + echo "(global .var or .obs)." + echo "" + echo "Inputs:" + echo " Arguments related to the input dataset." + echo "" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " example: sample_path" + echo " Path to the input .h5mu." + echo "" + echo " --input_column" + echo " type: string" + echo " Column to query. If not specified, use .var_names or .obs_names," + echo " depending on the value of --matrix" + echo "" + echo " --modality" + echo " type: string, required parameter" + echo " example: rna" + echo " Which modality to get the annotation matrix from." + echo "" + echo " --matrix" + echo " type: string" + echo " example: var" + echo " choices: [ var, obs ]" + echo " Matrix to fetch the column from that will be searched." + echo "" + echo "Outputs:" + echo " Arguments related to how the output will be written." + echo "" + echo " -o, --output" + echo " type: file, output, file must exist" + echo " example: output.h5mu" + echo "" + echo " --output_compression" + echo " type: string" + echo " example: gzip" + echo " choices: [ gzip, lzf ]" + echo " The compression format to be used on the output h5mu object." + echo "" + echo " --output_match_column" + echo " type: string, required parameter" + echo " Name of the column to write the result to." + echo "" + echo " --output_fraction_column" + echo " type: string" + echo " For the opposite axis, name of the column to write the fraction of" + echo " observations that matches to the pattern." + echo "" + echo "Query options:" + echo " Options related to the query" + echo "" + echo " --regex_pattern" + echo " type: string, required parameter" + echo " example: ^[mM][tT]-" + echo " Regex to use to match with the input column." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM python:3.10-slim + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" + +LABEL org.opencontainers.image.authors="Dries Schaumont" +LABEL org.opencontainers.image.description="Companion container for running component metadata grep_annotation_column" +LABEL org.opencontainers.image.created="2024-01-31T09:08:33Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-grep_annotation_column-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "grep_annotation_column 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input_column) + [ -n "$VIASH_PAR_INPUT_COLUMN" ] && ViashError Bad arguments for option \'--input_column\': \'$VIASH_PAR_INPUT_COLUMN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_COLUMN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_column. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input_column=*) + [ -n "$VIASH_PAR_INPUT_COLUMN" ] && ViashError Bad arguments for option \'--input_column=*\': \'$VIASH_PAR_INPUT_COLUMN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_COLUMN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --modality) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality=*) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --matrix) + [ -n "$VIASH_PAR_MATRIX" ] && ViashError Bad arguments for option \'--matrix\': \'$VIASH_PAR_MATRIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MATRIX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --matrix. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --matrix=*) + [ -n "$VIASH_PAR_MATRIX" ] && ViashError Bad arguments for option \'--matrix=*\': \'$VIASH_PAR_MATRIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MATRIX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_match_column) + [ -n "$VIASH_PAR_OUTPUT_MATCH_COLUMN" ] && ViashError Bad arguments for option \'--output_match_column\': \'$VIASH_PAR_OUTPUT_MATCH_COLUMN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_MATCH_COLUMN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_match_column. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_match_column=*) + [ -n "$VIASH_PAR_OUTPUT_MATCH_COLUMN" ] && ViashError Bad arguments for option \'--output_match_column=*\': \'$VIASH_PAR_OUTPUT_MATCH_COLUMN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_MATCH_COLUMN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_fraction_column) + [ -n "$VIASH_PAR_OUTPUT_FRACTION_COLUMN" ] && ViashError Bad arguments for option \'--output_fraction_column\': \'$VIASH_PAR_OUTPUT_FRACTION_COLUMN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_FRACTION_COLUMN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_fraction_column. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_fraction_column=*) + [ -n "$VIASH_PAR_OUTPUT_FRACTION_COLUMN" ] && ViashError Bad arguments for option \'--output_fraction_column=*\': \'$VIASH_PAR_OUTPUT_FRACTION_COLUMN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_FRACTION_COLUMN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --regex_pattern) + [ -n "$VIASH_PAR_REGEX_PATTERN" ] && ViashError Bad arguments for option \'--regex_pattern\': \'$VIASH_PAR_REGEX_PATTERN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REGEX_PATTERN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --regex_pattern. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --regex_pattern=*) + [ -n "$VIASH_PAR_REGEX_PATTERN" ] && ViashError Bad arguments for option \'--regex_pattern=*\': \'$VIASH_PAR_REGEX_PATTERN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REGEX_PATTERN=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/metadata_grep_annotation_column:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/metadata_grep_annotation_column:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/metadata_grep_annotation_column:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/metadata_grep_annotation_column:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_MODALITY+x} ]; then + ViashError '--modality' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT_MATCH_COLUMN+x} ]; then + ViashError '--output_match_column' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_REGEX_PATTERN+x} ]; then + ViashError '--regex_pattern' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_MATRIX" ]; then + VIASH_PAR_MATRIX_CHOICES=("var:obs") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_MATRIX_CHOICES[*]}:" =~ ":$VIASH_PAR_MATRIX:" ]]; then + ViashError '--matrix' specified value of \'$VIASH_PAR_MATRIX\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/metadata_grep_annotation_column:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/metadata_grep_annotation_column:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/metadata_grep_annotation_column:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-grep_annotation_column-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +import mudata as mu +from pathlib import Path +from operator import attrgetter +import re +import numpy as np + + +### VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'input_column': $( if [ ! -z ${VIASH_PAR_INPUT_COLUMN+x} ]; then echo "r'${VIASH_PAR_INPUT_COLUMN//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'matrix': $( if [ ! -z ${VIASH_PAR_MATRIX+x} ]; then echo "r'${VIASH_PAR_MATRIX//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_match_column': $( if [ ! -z ${VIASH_PAR_OUTPUT_MATCH_COLUMN+x} ]; then echo "r'${VIASH_PAR_OUTPUT_MATCH_COLUMN//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_fraction_column': $( if [ ! -z ${VIASH_PAR_OUTPUT_FRACTION_COLUMN+x} ]; then echo "r'${VIASH_PAR_OUTPUT_FRACTION_COLUMN//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'regex_pattern': $( if [ ! -z ${VIASH_PAR_REGEX_PATTERN+x} ]; then echo "r'${VIASH_PAR_REGEX_PATTERN//\'/\'\"\'\"r\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +### VIASH END + +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +def main(par): + input_file, output_file, mod_name = Path(par["input"]), Path(par["output"]), par['modality'] + try: + compiled_regex = re.compile(par["regex_pattern"]) + except (TypeError, re.error) as e: + raise ValueError(f"{par['regex_pattern']} is not a valid regular expression pattern.") from e + else: + if compiled_regex.groups: + raise NotImplementedError("Using match groups is not supported by this component.") + logger.info('Reading input file %s, modality %s.', input_file, mod_name) + + mudata = mu.read_h5mu(input_file) + modality_data = mudata[mod_name] + annotation_matrix = getattr(modality_data, par['matrix']) + default_column = { + "var": attrgetter("var_names"), + "obs": attrgetter("obs_names") + } + if par["input_column"]: + try: + annotation_column = annotation_matrix[par["input_column"]] + except KeyError as e: + raise ValueError(f"Column {par['input_column']} could not be found for modality " + f"{par['modality']}. Available columns: {','.join(annotation_matrix.columns.to_list())}") from e + else: + annotation_column = default_column[par['matrix']](modality_data) + grep_result = annotation_column.str.contains(par["regex_pattern"], regex=True) + + other_axis_attribute = { + "var": "obs", + "obs": "var" + } + if par['output_fraction_column']: + pct_matching = np.ravel(np.sum(modality_data[:, grep_result].X, axis=1) / np.sum(modality_data.X, axis=1)) + getattr(modality_data, other_axis_attribute[par['matrix']])[par['output_fraction_column']] = pct_matching + getattr(modality_data, par['matrix'])[par["output_match_column"]] = grep_result + mudata.write(output_file, compression=par["output_compression"]) + +if __name__ == "__main__": + main(par) +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/metadata/join_csv/.config.vsh.yaml b/target/docker/metadata/join_csv/.config.vsh.yaml new file mode 100644 index 00000000000..54de10b4438 --- /dev/null +++ b/target/docker/metadata/join_csv/.config.vsh.yaml @@ -0,0 +1,229 @@ +functionality: + name: "join_csv" + namespace: "metadata" + version: "0.12.4" + authors: + - name: "Dries Schaumont" + roles: + - "author" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + argument_groups: + - name: "MuData Input" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input h5mu file" + info: null + example: + - "input.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_key" + description: "Obs column name where the sample id can be found for each observation\ + \ to join on.\nUseful when adding metadata to concatenated samples.\nMutually\ + \ exclusive with `--var_key`.\"\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--var_key" + description: "Var column name where the sample id can be found for each variable\ + \ to join on.\nMutually exclusive with `--obs_key`.\"\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "MuData Output" + arguments: + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output h5mu file." + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Metadata Input" + arguments: + - type: "file" + name: "--input_csv" + description: ".csv file containing metadata" + info: null + example: + - "metadata.csv" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--csv_key" + description: "column of the the csv that corresponds to the sample id." + info: null + default: + - "id" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Join a csv containing metadata to the .obs or .var field of a mudata\ + \ file." + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "singlecpu" + - "lowmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/metadata/join_csv/config.vsh.yml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/metadata/join_csv" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/metadata/join_csv/join_csv" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/metadata/join_csv/join_csv b/target/docker/metadata/join_csv/join_csv new file mode 100755 index 00000000000..79307c1ff13 --- /dev/null +++ b/target/docker/metadata/join_csv/join_csv @@ -0,0 +1,1119 @@ +#!/usr/bin/env bash + +# join_csv 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Dries Schaumont (author) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="join_csv" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "join_csv 0.12.4" + echo "" + echo "Join a csv containing metadata to the .obs or .var field of a mudata file." + echo "" + echo "MuData Input:" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " example: input.h5mu" + echo " Input h5mu file" + echo "" + echo " --modality" + echo " type: string" + echo " default: rna" + echo "" + echo " --obs_key" + echo " type: string" + echo " Obs column name where the sample id can be found for each observation to" + echo " join on." + echo " Useful when adding metadata to concatenated samples." + echo " Mutually exclusive with \`--var_key\`.\"" + echo "" + echo " --var_key" + echo " type: string" + echo " Var column name where the sample id can be found for each variable to" + echo " join on." + echo " Mutually exclusive with \`--obs_key\`.\"" + echo "" + echo "MuData Output:" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " example: output.h5mu" + echo " Output h5mu file." + echo "" + echo " --output_compression" + echo " type: string" + echo " example: gzip" + echo " choices: [ gzip, lzf ]" + echo " The compression format to be used on the output h5mu object." + echo "" + echo "Metadata Input:" + echo " --input_csv" + echo " type: file, required parameter, file must exist" + echo " example: metadata.csv" + echo " .csv file containing metadata" + echo "" + echo " --csv_key" + echo " type: string" + echo " default: id" + echo " column of the the csv that corresponds to the sample id." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM python:3.10-slim + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" + +LABEL org.opencontainers.image.authors="Dries Schaumont" +LABEL org.opencontainers.image.description="Companion container for running component metadata join_csv" +LABEL org.opencontainers.image.created="2024-01-31T09:08:32Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-join_csv-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "join_csv 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality=*) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --obs_key) + [ -n "$VIASH_PAR_OBS_KEY" ] && ViashError Bad arguments for option \'--obs_key\': \'$VIASH_PAR_OBS_KEY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBS_KEY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_key. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obs_key=*) + [ -n "$VIASH_PAR_OBS_KEY" ] && ViashError Bad arguments for option \'--obs_key=*\': \'$VIASH_PAR_OBS_KEY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBS_KEY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --var_key) + [ -n "$VIASH_PAR_VAR_KEY" ] && ViashError Bad arguments for option \'--var_key\': \'$VIASH_PAR_VAR_KEY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_VAR_KEY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --var_key. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --var_key=*) + [ -n "$VIASH_PAR_VAR_KEY" ] && ViashError Bad arguments for option \'--var_key=*\': \'$VIASH_PAR_VAR_KEY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_VAR_KEY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --input_csv) + [ -n "$VIASH_PAR_INPUT_CSV" ] && ViashError Bad arguments for option \'--input_csv\': \'$VIASH_PAR_INPUT_CSV\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_CSV="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_csv. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input_csv=*) + [ -n "$VIASH_PAR_INPUT_CSV" ] && ViashError Bad arguments for option \'--input_csv=*\': \'$VIASH_PAR_INPUT_CSV\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_CSV=$(ViashRemoveFlags "$1") + shift 1 + ;; + --csv_key) + [ -n "$VIASH_PAR_CSV_KEY" ] && ViashError Bad arguments for option \'--csv_key\': \'$VIASH_PAR_CSV_KEY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CSV_KEY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --csv_key. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --csv_key=*) + [ -n "$VIASH_PAR_CSV_KEY" ] && ViashError Bad arguments for option \'--csv_key=*\': \'$VIASH_PAR_CSV_KEY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CSV_KEY=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/metadata_join_csv:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/metadata_join_csv:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/metadata_join_csv:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/metadata_join_csv:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_INPUT_CSV+x} ]; then + ViashError '--input_csv' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_MODALITY+x} ]; then + VIASH_PAR_MODALITY="rna" +fi +if [ -z ${VIASH_PAR_CSV_KEY+x} ]; then + VIASH_PAR_CSV_KEY="id" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_INPUT_CSV" ] && [ ! -e "$VIASH_PAR_INPUT_CSV" ]; then + ViashError "Input file '$VIASH_PAR_INPUT_CSV' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_PAR_INPUT_CSV" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT_CSV")" ) + VIASH_PAR_INPUT_CSV=$(ViashAutodetectMount "$VIASH_PAR_INPUT_CSV") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/metadata_join_csv:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/metadata_join_csv:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/metadata_join_csv:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-join_csv-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +import sys +import pandas as pd +from mudata import read_h5mu + +### VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'obs_key': $( if [ ! -z ${VIASH_PAR_OBS_KEY+x} ]; then echo "r'${VIASH_PAR_OBS_KEY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'var_key': $( if [ ! -z ${VIASH_PAR_VAR_KEY+x} ]; then echo "r'${VIASH_PAR_VAR_KEY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'input_csv': $( if [ ! -z ${VIASH_PAR_INPUT_CSV+x} ]; then echo "r'${VIASH_PAR_INPUT_CSV//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'csv_key': $( if [ ! -z ${VIASH_PAR_CSV_KEY+x} ]; then echo "r'${VIASH_PAR_CSV_KEY//\'/\'\"\'\"r\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +### VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +if par["obs_key"] and par["var_key"]: + raise ValueError("--obs_key can not be used in conjuction with --var_key.") +if not (par["obs_key"] or par["var_key"]): + raise ValueError("Must define either --obs_key or --var_key") + +logger.info("Read metadata csv from file") +metadata = pd.read_csv(par['input_csv'], sep=",", header=0, index_col=par["csv_key"]) +metadata.fillna('', inplace=True) + +logger.info("Read mudata from file") +mdata = read_h5mu(par['input']) +mod_data = mdata.mod[par['modality']] + +logger.info("Joining csv to mudata") +matrix = 'var' if par["var_key"] else 'obs' +matrix_sample_column_name = par["var_key"] if par["var_key"] else par["obs_key"] +original_matrix = getattr(mod_data, matrix) +sample_ids = original_matrix[matrix_sample_column_name] + +try: + new_columns = metadata.loc[sample_ids.tolist()] +except KeyError as e: + raise KeyError(f"Not all sample IDs selected from {matrix} " + "(using the column selected with --var_key or --obs_key) were found in " + "the csv file.") from e +new_matrix = pd.concat([original_matrix.reset_index(drop=True), + new_columns.reset_index(drop=True)], axis=1)\\ + .set_axis(original_matrix.index) +setattr(mod_data, matrix, new_matrix) + +logger.info("Write output to mudata file") +mdata.write_h5mu(par['output'], compression=par["output_compression"]) +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_PAR_INPUT_CSV" ]; then + VIASH_PAR_INPUT_CSV=$(ViashStripAutomount "$VIASH_PAR_INPUT_CSV") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/metadata/join_csv/setup_logger.py b/target/docker/metadata/join_csv/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/docker/metadata/join_csv/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/docker/metadata/join_uns_to_obs/.config.vsh.yaml b/target/docker/metadata/join_uns_to_obs/.config.vsh.yaml new file mode 100644 index 00000000000..be91ea1b603 --- /dev/null +++ b/target/docker/metadata/join_uns_to_obs/.config.vsh.yaml @@ -0,0 +1,171 @@ +functionality: + name: "join_uns_to_obs" + namespace: "metadata" + version: "0.12.4" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input h5mu file" + info: null + example: + - "input.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--uns_key" + info: null + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output h5mu file." + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Join a data frame of length 1 (1 row index value) in .uns containing\ + \ metadata to the .obs of a mudata file." + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "singlecpu" + - "lowmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/metadata/join_uns_to_obs/config.vsh.yml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/metadata/join_uns_to_obs" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/metadata/join_uns_to_obs/join_uns_to_obs" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/metadata/join_uns_to_obs/join_uns_to_obs b/target/docker/metadata/join_uns_to_obs/join_uns_to_obs new file mode 100755 index 00000000000..f0a3260d1d2 --- /dev/null +++ b/target/docker/metadata/join_uns_to_obs/join_uns_to_obs @@ -0,0 +1,1035 @@ +#!/usr/bin/env bash + +# join_uns_to_obs 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="join_uns_to_obs" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "join_uns_to_obs 0.12.4" + echo "" + echo "Join a data frame of length 1 (1 row index value) in .uns containing metadata to" + echo "the .obs of a mudata file." + echo "" + echo "Arguments:" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " example: input.h5mu" + echo " Input h5mu file" + echo "" + echo " --modality" + echo " type: string" + echo " default: rna" + echo "" + echo " --uns_key" + echo " type: string, required parameter" + echo "" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " example: output.h5mu" + echo " Output h5mu file." + echo "" + echo " --output_compression" + echo " type: string" + echo " example: gzip" + echo " choices: [ gzip, lzf ]" + echo " The compression format to be used on the output h5mu object." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM python:3.10-slim + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" + +LABEL org.opencontainers.image.description="Companion container for running component metadata join_uns_to_obs" +LABEL org.opencontainers.image.created="2024-01-31T09:08:32Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-join_uns_to_obs-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "join_uns_to_obs 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality=*) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --uns_key) + [ -n "$VIASH_PAR_UNS_KEY" ] && ViashError Bad arguments for option \'--uns_key\': \'$VIASH_PAR_UNS_KEY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_UNS_KEY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --uns_key. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --uns_key=*) + [ -n "$VIASH_PAR_UNS_KEY" ] && ViashError Bad arguments for option \'--uns_key=*\': \'$VIASH_PAR_UNS_KEY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_UNS_KEY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/metadata_join_uns_to_obs:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/metadata_join_uns_to_obs:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/metadata_join_uns_to_obs:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/metadata_join_uns_to_obs:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_UNS_KEY+x} ]; then + ViashError '--uns_key' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_MODALITY+x} ]; then + VIASH_PAR_MODALITY="rna" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/metadata_join_uns_to_obs:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/metadata_join_uns_to_obs:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/metadata_join_uns_to_obs:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-join_uns_to_obs-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +import sys +import pandas as pd +from mudata import read_h5mu + +### VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'uns_key': $( if [ ! -z ${VIASH_PAR_UNS_KEY+x} ]; then echo "r'${VIASH_PAR_UNS_KEY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +### VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +logger.info("Read mudata from file") +mdata = read_h5mu(par['input']) +mod_data = mdata.mod[par['modality']] + +logger.info("Joining uns to obs") +# get data frame +uns_df = mod_data.uns[par['uns_key']] + +# check for overlapping colnames +intersect_keys = uns_df.keys().intersection(mod_data.obs.keys()) +obs_drop = mod_data.obs.drop(intersect_keys, axis=1) + +# create data frame to join +uns_df_rep = uns_df.loc[uns_df.index.repeat(mod_data.n_obs)] +uns_df_rep.index = mod_data.obs_names + +# create new obs +mod_data.obs = pd.concat([obs_drop, uns_df_rep], axis=1) + +logger.info("Write output to mudata file") +mdata.write_h5mu(par['output'], compression=par["output_compression"]) + + +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/metadata/join_uns_to_obs/setup_logger.py b/target/docker/metadata/join_uns_to_obs/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/docker/metadata/join_uns_to_obs/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/docker/metadata/move_obsm_to_obs/.config.vsh.yaml b/target/docker/metadata/move_obsm_to_obs/.config.vsh.yaml new file mode 100644 index 00000000000..fc8f7351b44 --- /dev/null +++ b/target/docker/metadata/move_obsm_to_obs/.config.vsh.yaml @@ -0,0 +1,192 @@ +functionality: + name: "move_obsm_to_obs" + namespace: "metadata" + version: "0.12.4" + authors: + - name: "Dries Schaumont" + roles: + - "author" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + argument_groups: + - name: "MuData Input" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input h5mu file" + info: null + example: + - "input.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obsm_key" + description: "Key of a data structure to move from `.obsm` to `.obs`." + info: null + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "MuData Output" + arguments: + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output h5mu file." + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Move a matrix from .obsm to .obs. Newly created columns in .obs will\ + \ \nbe created from the .obsm key suffixed with an underscore and the name of\ + \ the columns\nof the specified .obsm matrix.\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "singlecpu" + - "lowmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/metadata/move_obsm_to_obs/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/metadata/move_obsm_to_obs" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/metadata/move_obsm_to_obs/move_obsm_to_obs" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/metadata/move_obsm_to_obs/move_obsm_to_obs b/target/docker/metadata/move_obsm_to_obs/move_obsm_to_obs new file mode 100755 index 00000000000..9e3bfd667a7 --- /dev/null +++ b/target/docker/metadata/move_obsm_to_obs/move_obsm_to_obs @@ -0,0 +1,1054 @@ +#!/usr/bin/env bash + +# move_obsm_to_obs 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Dries Schaumont (author) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="move_obsm_to_obs" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "move_obsm_to_obs 0.12.4" + echo "" + echo "Move a matrix from .obsm to .obs. Newly created columns in .obs will" + echo "be created from the .obsm key suffixed with an underscore and the name of the" + echo "columns" + echo "of the specified .obsm matrix." + echo "" + echo "MuData Input:" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " example: input.h5mu" + echo " Input h5mu file" + echo "" + echo " --modality" + echo " type: string" + echo " default: rna" + echo "" + echo " --obsm_key" + echo " type: string, required parameter" + echo " Key of a data structure to move from \`.obsm\` to \`.obs\`." + echo "" + echo "MuData Output:" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " example: output.h5mu" + echo " Output h5mu file." + echo "" + echo " --output_compression" + echo " type: string" + echo " example: gzip" + echo " choices: [ gzip, lzf ]" + echo " The compression format to be used on the output h5mu object." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM python:3.10-slim + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" + +LABEL org.opencontainers.image.authors="Dries Schaumont" +LABEL org.opencontainers.image.description="Companion container for running component metadata move_obsm_to_obs" +LABEL org.opencontainers.image.created="2024-01-31T09:08:33Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-move_obsm_to_obs-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "move_obsm_to_obs 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality=*) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --obsm_key) + [ -n "$VIASH_PAR_OBSM_KEY" ] && ViashError Bad arguments for option \'--obsm_key\': \'$VIASH_PAR_OBSM_KEY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBSM_KEY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obsm_key. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obsm_key=*) + [ -n "$VIASH_PAR_OBSM_KEY" ] && ViashError Bad arguments for option \'--obsm_key=*\': \'$VIASH_PAR_OBSM_KEY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBSM_KEY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/metadata_move_obsm_to_obs:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/metadata_move_obsm_to_obs:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/metadata_move_obsm_to_obs:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/metadata_move_obsm_to_obs:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OBSM_KEY+x} ]; then + ViashError '--obsm_key' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_MODALITY+x} ]; then + VIASH_PAR_MODALITY="rna" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/metadata_move_obsm_to_obs:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/metadata_move_obsm_to_obs:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/metadata_move_obsm_to_obs:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-move_obsm_to_obs-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +import sys +from functools import partial +from pandas.errors import MergeError +from mudata import read_h5mu + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'obsm_key': $( if [ ! -z ${VIASH_PAR_OBSM_KEY+x} ]; then echo "r'${VIASH_PAR_OBSM_KEY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +## VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +logger.info("Read mudata from file") +mdata = read_h5mu(par['input']) +try: + mod_data = mdata.mod[par['modality']] +except KeyError: + raise ValueError(f"Modality {par['modality']} does not exist.") + +logger.info("Moving .obm key %s", par["obsm_key"]) +try: + obsm_matrix = mod_data.obsm[par["obsm_key"]].copy() +except KeyError: + raise ValueError(f".obsm key {par['obsm_key']} was not found in " + f".obsm slot for modality {par['modality']}.") + + +obsm_matrix.rename(partial("{key}_{}".format, key=par["obsm_key"]), + axis="columns", copy=False, inplace=True) + +original_n_obs = len(mod_data.obs) +try: + logger.info(f".obs names: {mod_data.obs_names}") + logger.info(f".obsm index: {obsm_matrix.index}") + mod_data.obs = mod_data.obs.merge(obsm_matrix, how="left", + validate="one_to_one", + left_index=True, right_index=True) +except MergeError as e: + raise ValueError(f"Could not join .obsm matrix at {par['obsm_key']} to .obs because there " + "are some observation that are not overlapping between the two matrices " + "(indexes should overlap). This is either a bug or your mudata file is corrupt.") +del mod_data.obsm[par["obsm_key"]] + +logger.info("Write output to mudata file") +mdata.write_h5mu(par['output'], compression=par["output_compression"]) +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/metadata/move_obsm_to_obs/setup_logger.py b/target/docker/metadata/move_obsm_to_obs/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/docker/metadata/move_obsm_to_obs/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/docker/neighbors/bbknn/.config.vsh.yaml b/target/docker/neighbors/bbknn/.config.vsh.yaml new file mode 100644 index 00000000000..db545e6970d --- /dev/null +++ b/target/docker/neighbors/bbknn/.config.vsh.yaml @@ -0,0 +1,289 @@ +functionality: + name: "bbknn" + namespace: "neighbors" + version: "0.12.4" + authors: + - name: "Dries De Maeyer" + roles: + - "author" + info: + role: "Core Team Member" + links: + email: "ddemaeyer@gmail.com" + github: "ddemaeyer" + linkedin: "dries-de-maeyer-b46a814" + organizations: + - name: "Janssen Pharmaceuticals" + href: "https://www.janssen.com" + role: "Principal Scientist" + - name: "Dries Schaumont" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input h5mu file" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obsm_input" + description: "The dimensionality reduction in `.obsm` to use for neighbour detection.\ + \ Defaults to X_pca." + info: null + default: + - "X_pca" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_batch" + description: ".obs column name discriminating between your batches." + info: null + default: + - "batch" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output .h5mu file." + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--uns_output" + description: "Mandatory .uns slot to store various neighbor output objects." + info: null + default: + - "neighbors" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obsp_distances" + description: "In which .obsp slot to store the distance matrix between the resulting\ + \ neighbors." + info: null + default: + - "distances" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obsp_connectivities" + description: "In which .obsp slot to store the connectivities matrix between the\ + \ resulting neighbors." + info: null + default: + - "connectivities" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--n_neighbors_within_batch" + description: "How many top neighbours to report for each batch; total number of\ + \ neighbours in the initial k-nearest-neighbours computation will be this number\ + \ times the number of batches." + info: null + default: + - 3 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--n_pcs" + description: "How many dimensions (in case of PCA, principal components) to use\ + \ in the analysis." + info: null + default: + - 50 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--n_trim" + description: "Trim the neighbours of each cell to these many top connectivities.\ + \ May help with population independence and improve the tidiness of clustering.\ + \ The lower the value the more independent the individual populations, at the\ + \ cost of more conserved batch effect. If `None` (default), sets the parameter\ + \ value automatically to 10 times `neighbors_within_batch` times the number\ + \ of batches. Set to 0 to skip." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + description: "BBKNN network generation\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + - "build-essential" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "scanpy~=1.9.5" + - "bbknn" + - "scikit-learn~=1.2.2" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "lowcpu" + - "highmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/neighbors/bbknn/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/neighbors/bbknn" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/neighbors/bbknn/bbknn" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/neighbors/bbknn/bbknn b/target/docker/neighbors/bbknn/bbknn new file mode 100755 index 00000000000..ba17ae94b90 --- /dev/null +++ b/target/docker/neighbors/bbknn/bbknn @@ -0,0 +1,1184 @@ +#!/usr/bin/env bash + +# bbknn 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Dries De Maeyer (author) +# * Dries Schaumont (maintainer) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="bbknn" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "bbknn 0.12.4" + echo "" + echo "BBKNN network generation" + echo "" + echo "Arguments:" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " Input h5mu file" + echo "" + echo " --modality" + echo " type: string" + echo " default: rna" + echo "" + echo " --obsm_input" + echo " type: string" + echo " default: X_pca" + echo " The dimensionality reduction in \`.obsm\` to use for neighbour detection." + echo " Defaults to X_pca." + echo "" + echo " --obs_batch" + echo " type: string" + echo " default: batch" + echo " .obs column name discriminating between your batches." + echo "" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " example: output.h5mu" + echo " Output .h5mu file." + echo "" + echo " --output_compression" + echo " type: string" + echo " example: gzip" + echo " choices: [ gzip, lzf ]" + echo " The compression format to be used on the output h5mu object." + echo "" + echo " --uns_output" + echo " type: string" + echo " default: neighbors" + echo " Mandatory .uns slot to store various neighbor output objects." + echo "" + echo " --obsp_distances" + echo " type: string" + echo " default: distances" + echo " In which .obsp slot to store the distance matrix between the resulting" + echo " neighbors." + echo "" + echo " --obsp_connectivities" + echo " type: string" + echo " default: connectivities" + echo " In which .obsp slot to store the connectivities matrix between the" + echo " resulting neighbors." + echo "" + echo " --n_neighbors_within_batch" + echo " type: integer" + echo " default: 3" + echo " How many top neighbours to report for each batch; total number of" + echo " neighbours in the initial k-nearest-neighbours computation will be this" + echo " number times the number of batches." + echo "" + echo " --n_pcs" + echo " type: integer" + echo " default: 50" + echo " How many dimensions (in case of PCA, principal components) to use in the" + echo " analysis." + echo "" + echo " --n_trim" + echo " type: integer" + echo " Trim the neighbours of each cell to these many top connectivities. May" + echo " help with population independence and improve the tidiness of" + echo " clustering. The lower the value the more independent the individual" + echo " populations, at the cost of more conserved batch effect. If \`None\`" + echo " (default), sets the parameter value automatically to 10 times" + echo " \`neighbors_within_batch\` times the number of batches. Set to 0 to skip." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM python:3.10-slim + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y procps build-essential && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "scanpy~=1.9.5" "bbknn" "scikit-learn~=1.2.2" + +LABEL org.opencontainers.image.authors="Dries De Maeyer, Dries Schaumont" +LABEL org.opencontainers.image.description="Companion container for running component neighbors bbknn" +LABEL org.opencontainers.image.created="2024-01-31T09:08:34Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-bbknn-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "bbknn 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality=*) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --obsm_input) + [ -n "$VIASH_PAR_OBSM_INPUT" ] && ViashError Bad arguments for option \'--obsm_input\': \'$VIASH_PAR_OBSM_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBSM_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obsm_input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obsm_input=*) + [ -n "$VIASH_PAR_OBSM_INPUT" ] && ViashError Bad arguments for option \'--obsm_input=*\': \'$VIASH_PAR_OBSM_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBSM_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --obs_batch) + [ -n "$VIASH_PAR_OBS_BATCH" ] && ViashError Bad arguments for option \'--obs_batch\': \'$VIASH_PAR_OBS_BATCH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBS_BATCH="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_batch. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obs_batch=*) + [ -n "$VIASH_PAR_OBS_BATCH" ] && ViashError Bad arguments for option \'--obs_batch=*\': \'$VIASH_PAR_OBS_BATCH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBS_BATCH=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --uns_output) + [ -n "$VIASH_PAR_UNS_OUTPUT" ] && ViashError Bad arguments for option \'--uns_output\': \'$VIASH_PAR_UNS_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_UNS_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --uns_output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --uns_output=*) + [ -n "$VIASH_PAR_UNS_OUTPUT" ] && ViashError Bad arguments for option \'--uns_output=*\': \'$VIASH_PAR_UNS_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_UNS_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --obsp_distances) + [ -n "$VIASH_PAR_OBSP_DISTANCES" ] && ViashError Bad arguments for option \'--obsp_distances\': \'$VIASH_PAR_OBSP_DISTANCES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBSP_DISTANCES="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obsp_distances. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obsp_distances=*) + [ -n "$VIASH_PAR_OBSP_DISTANCES" ] && ViashError Bad arguments for option \'--obsp_distances=*\': \'$VIASH_PAR_OBSP_DISTANCES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBSP_DISTANCES=$(ViashRemoveFlags "$1") + shift 1 + ;; + --obsp_connectivities) + [ -n "$VIASH_PAR_OBSP_CONNECTIVITIES" ] && ViashError Bad arguments for option \'--obsp_connectivities\': \'$VIASH_PAR_OBSP_CONNECTIVITIES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBSP_CONNECTIVITIES="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obsp_connectivities. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obsp_connectivities=*) + [ -n "$VIASH_PAR_OBSP_CONNECTIVITIES" ] && ViashError Bad arguments for option \'--obsp_connectivities=*\': \'$VIASH_PAR_OBSP_CONNECTIVITIES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBSP_CONNECTIVITIES=$(ViashRemoveFlags "$1") + shift 1 + ;; + --n_neighbors_within_batch) + [ -n "$VIASH_PAR_N_NEIGHBORS_WITHIN_BATCH" ] && ViashError Bad arguments for option \'--n_neighbors_within_batch\': \'$VIASH_PAR_N_NEIGHBORS_WITHIN_BATCH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_N_NEIGHBORS_WITHIN_BATCH="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --n_neighbors_within_batch. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --n_neighbors_within_batch=*) + [ -n "$VIASH_PAR_N_NEIGHBORS_WITHIN_BATCH" ] && ViashError Bad arguments for option \'--n_neighbors_within_batch=*\': \'$VIASH_PAR_N_NEIGHBORS_WITHIN_BATCH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_N_NEIGHBORS_WITHIN_BATCH=$(ViashRemoveFlags "$1") + shift 1 + ;; + --n_pcs) + [ -n "$VIASH_PAR_N_PCS" ] && ViashError Bad arguments for option \'--n_pcs\': \'$VIASH_PAR_N_PCS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_N_PCS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --n_pcs. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --n_pcs=*) + [ -n "$VIASH_PAR_N_PCS" ] && ViashError Bad arguments for option \'--n_pcs=*\': \'$VIASH_PAR_N_PCS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_N_PCS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --n_trim) + [ -n "$VIASH_PAR_N_TRIM" ] && ViashError Bad arguments for option \'--n_trim\': \'$VIASH_PAR_N_TRIM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_N_TRIM="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --n_trim. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --n_trim=*) + [ -n "$VIASH_PAR_N_TRIM" ] && ViashError Bad arguments for option \'--n_trim=*\': \'$VIASH_PAR_N_TRIM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_N_TRIM=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/neighbors_bbknn:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/neighbors_bbknn:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/neighbors_bbknn:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/neighbors_bbknn:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_MODALITY+x} ]; then + VIASH_PAR_MODALITY="rna" +fi +if [ -z ${VIASH_PAR_OBSM_INPUT+x} ]; then + VIASH_PAR_OBSM_INPUT="X_pca" +fi +if [ -z ${VIASH_PAR_OBS_BATCH+x} ]; then + VIASH_PAR_OBS_BATCH="batch" +fi +if [ -z ${VIASH_PAR_UNS_OUTPUT+x} ]; then + VIASH_PAR_UNS_OUTPUT="neighbors" +fi +if [ -z ${VIASH_PAR_OBSP_DISTANCES+x} ]; then + VIASH_PAR_OBSP_DISTANCES="distances" +fi +if [ -z ${VIASH_PAR_OBSP_CONNECTIVITIES+x} ]; then + VIASH_PAR_OBSP_CONNECTIVITIES="connectivities" +fi +if [ -z ${VIASH_PAR_N_NEIGHBORS_WITHIN_BATCH+x} ]; then + VIASH_PAR_N_NEIGHBORS_WITHIN_BATCH="3" +fi +if [ -z ${VIASH_PAR_N_PCS+x} ]; then + VIASH_PAR_N_PCS="50" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_N_NEIGHBORS_WITHIN_BATCH" ]]; then + if ! [[ "$VIASH_PAR_N_NEIGHBORS_WITHIN_BATCH" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--n_neighbors_within_batch' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_N_PCS" ]]; then + if ! [[ "$VIASH_PAR_N_PCS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--n_pcs' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_N_TRIM" ]]; then + if ! [[ "$VIASH_PAR_N_TRIM" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--n_trim' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/neighbors_bbknn:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/neighbors_bbknn:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/neighbors_bbknn:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-bbknn-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +from mudata import read_h5mu +import bbknn + +### VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'obsm_input': $( if [ ! -z ${VIASH_PAR_OBSM_INPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'obs_batch': $( if [ ! -z ${VIASH_PAR_OBS_BATCH+x} ]; then echo "r'${VIASH_PAR_OBS_BATCH//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'uns_output': $( if [ ! -z ${VIASH_PAR_UNS_OUTPUT+x} ]; then echo "r'${VIASH_PAR_UNS_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'obsp_distances': $( if [ ! -z ${VIASH_PAR_OBSP_DISTANCES+x} ]; then echo "r'${VIASH_PAR_OBSP_DISTANCES//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'obsp_connectivities': $( if [ ! -z ${VIASH_PAR_OBSP_CONNECTIVITIES+x} ]; then echo "r'${VIASH_PAR_OBSP_CONNECTIVITIES//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'n_neighbors_within_batch': $( if [ ! -z ${VIASH_PAR_N_NEIGHBORS_WITHIN_BATCH+x} ]; then echo "int(r'${VIASH_PAR_N_NEIGHBORS_WITHIN_BATCH//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'n_pcs': $( if [ ! -z ${VIASH_PAR_N_PCS+x} ]; then echo "int(r'${VIASH_PAR_N_PCS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'n_trim': $( if [ ! -z ${VIASH_PAR_N_TRIM+x} ]; then echo "int(r'${VIASH_PAR_N_TRIM//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +### VIASH END + +mudata = read_h5mu(par["input"]) +adata = mudata.mod[par["modality"]] + +# copy data +tmp_adata = adata.copy() +bbknn.bbknn( + tmp_adata, + use_rep=par["obsm_input"], + batch_key = par["obs_batch"], + neighbors_within_batch=par["n_neighbors_within_batch"], + n_pcs=par["n_pcs"], + trim=par["n_trim"] +) + +# store output +adata.obsp[par["obsp_connectivities"]] = tmp_adata.obsp["connectivities"] +adata.obsp[par["obsp_distances"]] = tmp_adata.obsp["distances"] +adata.uns[par["uns_output"]] = tmp_adata.uns["neighbors"] +adata.uns[par["uns_output"]]["distances_key"] = par["obsp_distances"] +adata.uns[par["uns_output"]]["connectivities_key"] = par["obsp_connectivities"] + +# write to file +mudata.write_h5mu(par["output"], compression=par["output_compression"]) +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/neighbors/find_neighbors/.config.vsh.yaml b/target/docker/neighbors/find_neighbors/.config.vsh.yaml new file mode 100644 index 00000000000..6419d2b4187 --- /dev/null +++ b/target/docker/neighbors/find_neighbors/.config.vsh.yaml @@ -0,0 +1,309 @@ +functionality: + name: "find_neighbors" + namespace: "neighbors" + version: "0.12.4" + authors: + - name: "Dries De Maeyer" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "ddemaeyer@gmail.com" + github: "ddemaeyer" + linkedin: "dries-de-maeyer-b46a814" + organizations: + - name: "Janssen Pharmaceuticals" + href: "https://www.janssen.com" + role: "Principal Scientist" + - name: "Robrecht Cannoodt" + roles: + - "contributor" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input h5mu file" + info: null + example: + - "input.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obsm_input" + description: "Which .obsm slot to use as a starting PCA embedding." + info: null + default: + - "X_pca" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output h5mu file containing the found neighbors." + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--uns_output" + description: "Mandatory .uns slot to store various neighbor output objects." + info: null + default: + - "neighbors" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obsp_distances" + description: "In which .obsp slot to store the distance matrix between the resulting\ + \ neighbors." + info: null + default: + - "distances" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obsp_connectivities" + description: "In which .obsp slot to store the connectivities matrix between the\ + \ resulting neighbors." + info: null + default: + - "connectivities" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--metric" + description: "The distance metric to be used in the generation of the nearest\ + \ neighborhood network." + info: null + default: + - "euclidean" + required: false + choices: + - "cityblock" + - "cosine" + - "euclidean" + - "l1" + - "l2" + - "manhattan" + - "braycurtis" + - "canberra" + - "chebyshev" + - "correlation" + - "dice" + - "hamming" + - "jaccard" + - "kulsinski" + - "mahalanobis" + - "minkowski" + - "rogerstanimoto" + - "russellrao" + - "seuclidean" + - "sokalmichener" + - "sokalsneath" + - "sqeuclidean" + - "yule" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--num_neighbors" + description: "The size of local neighborhood (in terms of number of neighboring\ + \ data points) used for manifold approximation. Larger values result in more\ + \ global views of the manifold, while smaller values result in more local data\ + \ being preserved. In general values should be in the range 2 to 100. If knn\ + \ is True, number of nearest neighbors to be searched. If knn is False, a Gaussian\ + \ kernel width is set to the distance of the n_neighbors neighbor." + info: null + default: + - 15 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--seed" + description: "A random seed." + info: null + default: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Compute a neighborhood graph of observations [McInnes18].\n\nThe neighbor\ + \ search efficiency of this heavily relies on UMAP [McInnes18], which also provides\ + \ a method for estimating connectivities of data points - the connectivity of\ + \ the manifold (method=='umap'). If method=='gauss', connectivities are computed\ + \ according to [Coifman05], in the adaption of [Haghverdi16].\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "scanpy~=1.9.5" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "lowcpu" + - "midmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/neighbors/find_neighbors/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/neighbors/find_neighbors" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/neighbors/find_neighbors/find_neighbors" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/neighbors/find_neighbors/find_neighbors b/target/docker/neighbors/find_neighbors/find_neighbors new file mode 100755 index 00000000000..e915895639e --- /dev/null +++ b/target/docker/neighbors/find_neighbors/find_neighbors @@ -0,0 +1,1208 @@ +#!/usr/bin/env bash + +# find_neighbors 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Dries De Maeyer (maintainer) +# * Robrecht Cannoodt (contributor) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="find_neighbors" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "find_neighbors 0.12.4" + echo "" + echo "Compute a neighborhood graph of observations [McInnes18]." + echo "" + echo "The neighbor search efficiency of this heavily relies on UMAP [McInnes18], which" + echo "also provides a method for estimating connectivities of data points - the" + echo "connectivity of the manifold (method=='umap'). If method=='gauss'," + echo "connectivities are computed according to [Coifman05], in the adaption of" + echo "[Haghverdi16]." + echo "" + echo "Arguments:" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " example: input.h5mu" + echo " Input h5mu file" + echo "" + echo " --modality" + echo " type: string" + echo " default: rna" + echo "" + echo " --obsm_input" + echo " type: string" + echo " default: X_pca" + echo " Which .obsm slot to use as a starting PCA embedding." + echo "" + echo " -o, --output" + echo " type: file, output, file must exist" + echo " example: output.h5mu" + echo " Output h5mu file containing the found neighbors." + echo "" + echo " --output_compression" + echo " type: string" + echo " example: gzip" + echo " choices: [ gzip, lzf ]" + echo " The compression format to be used on the output h5mu object." + echo "" + echo " --uns_output" + echo " type: string" + echo " default: neighbors" + echo " Mandatory .uns slot to store various neighbor output objects." + echo "" + echo " --obsp_distances" + echo " type: string" + echo " default: distances" + echo " In which .obsp slot to store the distance matrix between the resulting" + echo " neighbors." + echo "" + echo " --obsp_connectivities" + echo " type: string" + echo " default: connectivities" + echo " In which .obsp slot to store the connectivities matrix between the" + echo " resulting neighbors." + echo "" + echo " --metric" + echo " type: string" + echo " default: euclidean" + echo " choices: [ cityblock, cosine, euclidean, l1, l2, manhattan, braycurtis," + echo "canberra, chebyshev, correlation, dice, hamming, jaccard, kulsinski," + echo "mahalanobis, minkowski, rogerstanimoto, russellrao, seuclidean, sokalmichener," + echo "sokalsneath, sqeuclidean, yule ]" + echo " The distance metric to be used in the generation of the nearest" + echo " neighborhood network." + echo "" + echo " --num_neighbors" + echo " type: integer" + echo " default: 15" + echo " The size of local neighborhood (in terms of number of neighboring data" + echo " points) used for manifold approximation. Larger values result in more" + echo " global views of the manifold, while smaller values result in more local" + echo " data being preserved. In general values should be in the range 2 to 100." + echo " If knn is True, number of nearest neighbors to be searched. If knn is" + echo " False, a Gaussian kernel width is set to the distance of the n_neighbors" + echo " neighbor." + echo "" + echo " --seed" + echo " type: integer" + echo " default: 0" + echo " A random seed." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM python:3.10-slim + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "scanpy~=1.9.5" + +LABEL org.opencontainers.image.authors="Dries De Maeyer, Robrecht Cannoodt" +LABEL org.opencontainers.image.description="Companion container for running component neighbors find_neighbors" +LABEL org.opencontainers.image.created="2024-01-31T09:08:34Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-find_neighbors-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "find_neighbors 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality=*) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --obsm_input) + [ -n "$VIASH_PAR_OBSM_INPUT" ] && ViashError Bad arguments for option \'--obsm_input\': \'$VIASH_PAR_OBSM_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBSM_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obsm_input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obsm_input=*) + [ -n "$VIASH_PAR_OBSM_INPUT" ] && ViashError Bad arguments for option \'--obsm_input=*\': \'$VIASH_PAR_OBSM_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBSM_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --uns_output) + [ -n "$VIASH_PAR_UNS_OUTPUT" ] && ViashError Bad arguments for option \'--uns_output\': \'$VIASH_PAR_UNS_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_UNS_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --uns_output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --uns_output=*) + [ -n "$VIASH_PAR_UNS_OUTPUT" ] && ViashError Bad arguments for option \'--uns_output=*\': \'$VIASH_PAR_UNS_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_UNS_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --obsp_distances) + [ -n "$VIASH_PAR_OBSP_DISTANCES" ] && ViashError Bad arguments for option \'--obsp_distances\': \'$VIASH_PAR_OBSP_DISTANCES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBSP_DISTANCES="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obsp_distances. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obsp_distances=*) + [ -n "$VIASH_PAR_OBSP_DISTANCES" ] && ViashError Bad arguments for option \'--obsp_distances=*\': \'$VIASH_PAR_OBSP_DISTANCES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBSP_DISTANCES=$(ViashRemoveFlags "$1") + shift 1 + ;; + --obsp_connectivities) + [ -n "$VIASH_PAR_OBSP_CONNECTIVITIES" ] && ViashError Bad arguments for option \'--obsp_connectivities\': \'$VIASH_PAR_OBSP_CONNECTIVITIES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBSP_CONNECTIVITIES="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obsp_connectivities. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obsp_connectivities=*) + [ -n "$VIASH_PAR_OBSP_CONNECTIVITIES" ] && ViashError Bad arguments for option \'--obsp_connectivities=*\': \'$VIASH_PAR_OBSP_CONNECTIVITIES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBSP_CONNECTIVITIES=$(ViashRemoveFlags "$1") + shift 1 + ;; + --metric) + [ -n "$VIASH_PAR_METRIC" ] && ViashError Bad arguments for option \'--metric\': \'$VIASH_PAR_METRIC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_METRIC="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --metric. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --metric=*) + [ -n "$VIASH_PAR_METRIC" ] && ViashError Bad arguments for option \'--metric=*\': \'$VIASH_PAR_METRIC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_METRIC=$(ViashRemoveFlags "$1") + shift 1 + ;; + --num_neighbors) + [ -n "$VIASH_PAR_NUM_NEIGHBORS" ] && ViashError Bad arguments for option \'--num_neighbors\': \'$VIASH_PAR_NUM_NEIGHBORS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_NUM_NEIGHBORS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --num_neighbors. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --num_neighbors=*) + [ -n "$VIASH_PAR_NUM_NEIGHBORS" ] && ViashError Bad arguments for option \'--num_neighbors=*\': \'$VIASH_PAR_NUM_NEIGHBORS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_NUM_NEIGHBORS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --seed) + [ -n "$VIASH_PAR_SEED" ] && ViashError Bad arguments for option \'--seed\': \'$VIASH_PAR_SEED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEED="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --seed. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --seed=*) + [ -n "$VIASH_PAR_SEED" ] && ViashError Bad arguments for option \'--seed=*\': \'$VIASH_PAR_SEED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SEED=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/neighbors_find_neighbors:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/neighbors_find_neighbors:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/neighbors_find_neighbors:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/neighbors_find_neighbors:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_MODALITY+x} ]; then + VIASH_PAR_MODALITY="rna" +fi +if [ -z ${VIASH_PAR_OBSM_INPUT+x} ]; then + VIASH_PAR_OBSM_INPUT="X_pca" +fi +if [ -z ${VIASH_PAR_UNS_OUTPUT+x} ]; then + VIASH_PAR_UNS_OUTPUT="neighbors" +fi +if [ -z ${VIASH_PAR_OBSP_DISTANCES+x} ]; then + VIASH_PAR_OBSP_DISTANCES="distances" +fi +if [ -z ${VIASH_PAR_OBSP_CONNECTIVITIES+x} ]; then + VIASH_PAR_OBSP_CONNECTIVITIES="connectivities" +fi +if [ -z ${VIASH_PAR_METRIC+x} ]; then + VIASH_PAR_METRIC="euclidean" +fi +if [ -z ${VIASH_PAR_NUM_NEIGHBORS+x} ]; then + VIASH_PAR_NUM_NEIGHBORS="15" +fi +if [ -z ${VIASH_PAR_SEED+x} ]; then + VIASH_PAR_SEED="0" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_NUM_NEIGHBORS" ]]; then + if ! [[ "$VIASH_PAR_NUM_NEIGHBORS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--num_neighbors' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SEED" ]]; then + if ! [[ "$VIASH_PAR_SEED" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--seed' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +if [ ! -z "$VIASH_PAR_METRIC" ]; then + VIASH_PAR_METRIC_CHOICES=("cityblock:cosine:euclidean:l1:l2:manhattan:braycurtis:canberra:chebyshev:correlation:dice:hamming:jaccard:kulsinski:mahalanobis:minkowski:rogerstanimoto:russellrao:seuclidean:sokalmichener:sokalsneath:sqeuclidean:yule") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_METRIC_CHOICES[*]}:" =~ ":$VIASH_PAR_METRIC:" ]]; then + ViashError '--metric' specified value of \'$VIASH_PAR_METRIC\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/neighbors_find_neighbors:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/neighbors_find_neighbors:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/neighbors_find_neighbors:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-find_neighbors-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +import mudata as mu +import scanpy as sc +import sys + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'obsm_input': $( if [ ! -z ${VIASH_PAR_OBSM_INPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'uns_output': $( if [ ! -z ${VIASH_PAR_UNS_OUTPUT+x} ]; then echo "r'${VIASH_PAR_UNS_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'obsp_distances': $( if [ ! -z ${VIASH_PAR_OBSP_DISTANCES+x} ]; then echo "r'${VIASH_PAR_OBSP_DISTANCES//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'obsp_connectivities': $( if [ ! -z ${VIASH_PAR_OBSP_CONNECTIVITIES+x} ]; then echo "r'${VIASH_PAR_OBSP_CONNECTIVITIES//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'metric': $( if [ ! -z ${VIASH_PAR_METRIC+x} ]; then echo "r'${VIASH_PAR_METRIC//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'num_neighbors': $( if [ ! -z ${VIASH_PAR_NUM_NEIGHBORS+x} ]; then echo "int(r'${VIASH_PAR_NUM_NEIGHBORS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'seed': $( if [ ! -z ${VIASH_PAR_SEED+x} ]; then echo "int(r'${VIASH_PAR_SEED//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +## VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +logger.info("Reading input mudata") +mdata = mu.read_h5mu(par["input"]) + +mod = par["modality"] +logger.info("Computing a neighborhood graph on modality %s", mod) +adata = mdata.mod[mod] +neighbors = sc.Neighbors(adata) +neighbors.compute_neighbors( + n_neighbors=par["num_neighbors"], + use_rep=par["obsm_input"], + metric=par["metric"], + random_state=par["seed"], + method="umap" +) + +adata.uns[par["uns_output"]] = { + 'connectivities_key': par["obsp_connectivities"], + 'distances_key': par["obsp_distances"], + 'params': { + 'n_neighbors': neighbors.n_neighbors, + 'method': "umap", + 'random_state': par["seed"], + 'metric': par["metric"], + 'use_rep': par["obsm_input"] + } +} + +adata.obsp[par["obsp_distances"]] = neighbors.distances +adata.obsp[par["obsp_connectivities"]] = neighbors.connectivities + +logger.info("Writing to %s", par["output"]) +mdata.write_h5mu(filename=par["output"], compression=par["output_compression"]) +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/neighbors/find_neighbors/setup_logger.py b/target/docker/neighbors/find_neighbors/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/docker/neighbors/find_neighbors/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/docker/process_10xh5/filter_10xh5/.config.vsh.yaml b/target/docker/process_10xh5/filter_10xh5/.config.vsh.yaml new file mode 100644 index 00000000000..0eb1d0d237a --- /dev/null +++ b/target/docker/process_10xh5/filter_10xh5/.config.vsh.yaml @@ -0,0 +1,195 @@ +functionality: + name: "filter_10xh5" + namespace: "process_10xh5" + version: "0.12.4" + authors: + - name: "Robrecht Cannoodt" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + arguments: + - type: "file" + name: "--input" + description: "An h5 file from the 10x genomics website." + info: null + example: + - "pbmc_1k_protein_v3_raw_feature_bc_matrix.h5" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + description: "Output h5 file." + info: null + example: + - "pbmc_1k_protein_v3_raw_feature_bc_matrix_filtered.h5" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--min_library_size" + description: "Minimum library size." + info: null + default: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--min_cells_per_gene" + description: "Minimum number of cells per gene." + info: null + default: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--keep_feature_types" + description: "Specify which feature types will never be filtered out" + info: null + example: + - "Antibody Capture" + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--verbose" + description: "Increase verbosity" + info: null + direction: "input" + dest: "par" + resources: + - type: "r_script" + path: "script.R" + is_executable: true + description: "Filter a 10x h5 dataset.\n" + usage: "filter_10xh5 \\\n --input pbmc_1k_protein_v3_raw_feature_bc_matrix.h5 \\\ + \n --output pbmc_1k_protein_v3_raw_feature_bc_matrix_filtered.h5 \\\n --min_library_size\ + \ 1000 --min_cells_per_gene 300\n" + test_resources: + - type: "r_script" + path: "run_test.R" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "eddelbuettel/r2u:22.04" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "libhdf5-dev python3-pip python3-dev" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "scanpy~=1.9.5" + upgrade: true + - type: "r" + cran: + - "testthat" + - "anndata" + - "hdf5r" + bioc_force_install: false + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "singlecpu" + - "lowmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/process_10xh5/filter_10xh5/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/process_10xh5/filter_10xh5" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/process_10xh5/filter_10xh5/filter_10xh5" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/process_10xh5/filter_10xh5/filter_10xh5 b/target/docker/process_10xh5/filter_10xh5/filter_10xh5 new file mode 100755 index 00000000000..91c73c35138 --- /dev/null +++ b/target/docker/process_10xh5/filter_10xh5/filter_10xh5 @@ -0,0 +1,1089 @@ +#!/usr/bin/env bash + +# filter_10xh5 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Robrecht Cannoodt (maintainer) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="filter_10xh5" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "filter_10xh5 0.12.4" + echo "" + echo "Filter a 10x h5 dataset." + echo "" + echo "Usage:" + echo "filter_10xh5 \\" + echo " --input pbmc_1k_protein_v3_raw_feature_bc_matrix.h5 \\" + echo " --output pbmc_1k_protein_v3_raw_feature_bc_matrix_filtered.h5 \\" + echo " --min_library_size 1000 --min_cells_per_gene 300" + echo "" + echo "Arguments:" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " example: pbmc_1k_protein_v3_raw_feature_bc_matrix.h5" + echo " An h5 file from the 10x genomics website." + echo "" + echo " --output" + echo " type: file, required parameter, output, file must exist" + echo " example: pbmc_1k_protein_v3_raw_feature_bc_matrix_filtered.h5" + echo " Output h5 file." + echo "" + echo " --min_library_size" + echo " type: integer" + echo " default: 0" + echo " Minimum library size." + echo "" + echo " --min_cells_per_gene" + echo " type: integer" + echo " default: 0" + echo " Minimum number of cells per gene." + echo "" + echo " --keep_feature_types" + echo " type: string, multiple values allowed" + echo " example: Antibody Capture" + echo " Specify which feature types will never be filtered out" + echo "" + echo " --verbose" + echo " type: boolean_true" + echo " Increase verbosity" +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM eddelbuettel/r2u:22.04 + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y libhdf5-dev python3-pip python3-dev && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "scanpy~=1.9.5" + +RUN Rscript -e 'if (!requireNamespace("remotes", quietly = TRUE)) install.packages("remotes")' && \ + Rscript -e 'remotes::install_cran(c("testthat", "anndata", "hdf5r"), repos = "https://cran.rstudio.com")' + +LABEL org.opencontainers.image.authors="Robrecht Cannoodt" +LABEL org.opencontainers.image.description="Companion container for running component process_10xh5 filter_10xh5" +LABEL org.opencontainers.image.created="2024-01-31T09:08:36Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-filter_10xh5-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "filter_10xh5 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --min_library_size) + [ -n "$VIASH_PAR_MIN_LIBRARY_SIZE" ] && ViashError Bad arguments for option \'--min_library_size\': \'$VIASH_PAR_MIN_LIBRARY_SIZE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_LIBRARY_SIZE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_library_size. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --min_library_size=*) + [ -n "$VIASH_PAR_MIN_LIBRARY_SIZE" ] && ViashError Bad arguments for option \'--min_library_size=*\': \'$VIASH_PAR_MIN_LIBRARY_SIZE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_LIBRARY_SIZE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --min_cells_per_gene) + [ -n "$VIASH_PAR_MIN_CELLS_PER_GENE" ] && ViashError Bad arguments for option \'--min_cells_per_gene\': \'$VIASH_PAR_MIN_CELLS_PER_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_CELLS_PER_GENE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_cells_per_gene. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --min_cells_per_gene=*) + [ -n "$VIASH_PAR_MIN_CELLS_PER_GENE" ] && ViashError Bad arguments for option \'--min_cells_per_gene=*\': \'$VIASH_PAR_MIN_CELLS_PER_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_CELLS_PER_GENE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --keep_feature_types) + if [ -z "$VIASH_PAR_KEEP_FEATURE_TYPES" ]; then + VIASH_PAR_KEEP_FEATURE_TYPES="$2" + else + VIASH_PAR_KEEP_FEATURE_TYPES="$VIASH_PAR_KEEP_FEATURE_TYPES:""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --keep_feature_types. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --keep_feature_types=*) + if [ -z "$VIASH_PAR_KEEP_FEATURE_TYPES" ]; then + VIASH_PAR_KEEP_FEATURE_TYPES=$(ViashRemoveFlags "$1") + else + VIASH_PAR_KEEP_FEATURE_TYPES="$VIASH_PAR_KEEP_FEATURE_TYPES:"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --verbose) + [ -n "$VIASH_PAR_VERBOSE" ] && ViashError Bad arguments for option \'--verbose\': \'$VIASH_PAR_VERBOSE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_VERBOSE=true + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/process_10xh5_filter_10xh5:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/process_10xh5_filter_10xh5:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/process_10xh5_filter_10xh5:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/process_10xh5_filter_10xh5:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_MIN_LIBRARY_SIZE+x} ]; then + VIASH_PAR_MIN_LIBRARY_SIZE="0" +fi +if [ -z ${VIASH_PAR_MIN_CELLS_PER_GENE+x} ]; then + VIASH_PAR_MIN_CELLS_PER_GENE="0" +fi +if [ -z ${VIASH_PAR_VERBOSE+x} ]; then + VIASH_PAR_VERBOSE="false" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_MIN_LIBRARY_SIZE" ]]; then + if ! [[ "$VIASH_PAR_MIN_LIBRARY_SIZE" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--min_library_size' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MIN_CELLS_PER_GENE" ]]; then + if ! [[ "$VIASH_PAR_MIN_CELLS_PER_GENE" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--min_cells_per_gene' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_VERBOSE" ]]; then + if ! [[ "$VIASH_PAR_VERBOSE" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--verbose' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/process_10xh5_filter_10xh5:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/process_10xh5_filter_10xh5:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/process_10xh5_filter_10xh5:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-filter_10xh5-XXXXXX").R +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +## VIASH START +# The following code has been auto-generated by Viash. +# treat warnings as errors +.viash_orig_warn <- options(warn = 2) + +par <- list( + "input" = $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_INPUT" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "output" = $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_OUTPUT" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "min_library_size" = $( if [ ! -z ${VIASH_PAR_MIN_LIBRARY_SIZE+x} ]; then echo -n "as.integer('"; echo -n "$VIASH_PAR_MIN_LIBRARY_SIZE" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), + "min_cells_per_gene" = $( if [ ! -z ${VIASH_PAR_MIN_CELLS_PER_GENE+x} ]; then echo -n "as.integer('"; echo -n "$VIASH_PAR_MIN_CELLS_PER_GENE" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), + "keep_feature_types" = $( if [ ! -z ${VIASH_PAR_KEEP_FEATURE_TYPES+x} ]; then echo -n "strsplit('"; echo -n "$VIASH_PAR_KEEP_FEATURE_TYPES" | sed "s#['\\]#\\\\&#g"; echo "', split = ':')[[1]]"; else echo NULL; fi ), + "verbose" = $( if [ ! -z ${VIASH_PAR_VERBOSE+x} ]; then echo -n "as.logical(toupper('"; echo -n "$VIASH_PAR_VERBOSE" | sed "s#['\\]#\\\\&#g"; echo "'))"; else echo NULL; fi ) +) +meta <- list( + "functionality_name" = $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo -n "'"; echo -n "$VIASH_META_FUNCTIONALITY_NAME" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "resources_dir" = $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo -n "'"; echo -n "$VIASH_META_RESOURCES_DIR" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "executable" = $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo -n "'"; echo -n "$VIASH_META_EXECUTABLE" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "config" = $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo -n "'"; echo -n "$VIASH_META_CONFIG" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "temp_dir" = $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo -n "'"; echo -n "$VIASH_META_TEMP_DIR" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "cpus" = $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo -n "as.integer('"; echo -n "$VIASH_META_CPUS" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_b" = $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_B" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_kb" = $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_KB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_mb" = $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_MB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_gb" = $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_GB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_tb" = $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_TB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_pb" = $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_PB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ) +) + + +# restore original warn setting +options(.viash_orig_warn) +rm(.viash_orig_warn) + +## VIASH END + +if (par\$verbose) cat("Loading dependencies\\n") +requireNamespace("hdf5r", quietly = TRUE) + +if (par\$verbose) cat("Opening h5 file\\n") +h5 <- hdf5r::H5File\$new(par\$input, mode = "r") + +if (par\$verbose) cat("Reading data in memory\\n") +features__all_tag_keys <- h5[["matrix/features/_all_tag_keys"]][] + +features <- data.frame( + feature_type = h5[["matrix/features/feature_type"]][], + genome = h5[["matrix/features/genome"]][], + id = h5[["matrix/features/id"]][], + name = h5[["matrix/features/name"]][] +) + +mat <- Matrix::sparseMatrix( + i = h5[["matrix/indices"]][], + p = h5[["matrix/indptr"]][], + x = h5[["matrix/data"]][], + dims = h5[["matrix/shape"]][], + index1 = FALSE, + dimnames = list( + features\$id, + h5[["matrix/barcodes"]][] + ) +) + +if (par\$verbose) cat("Filtering out cells with library size < ", par\$min_library_size, "\\n", sep = "") +library_size <- Matrix::colSums(mat) +mat2 <- mat[, library_size >= par\$min_library_size, drop = FALSE] + +if (par\$verbose) cat("Filtering genes with num cells < ", par\$min_cells_per_gene, "\\n", sep = "") +num_cells <- Matrix::rowSums(mat2 > 0) +mat3 <- mat2[num_cells >= par\$min_cells_per_gene | features\$feature_type %in% par\$keep_feature_types, , drop = FALSE] +features2 <- features[match(rownames(mat3), features\$id), , drop = FALSE] + +# helper fun +set_with_type <- function(path, value) { + orig_dtype <- h5[[path]]\$get_type() + orig_chunk <- h5[[path]]\$chunk_dims + if (is.na(orig_chunk)) orig_chunk <- "auto" + h5new\$create_dataset(path, value, dtype = orig_dtype, chunk_dims = orig_chunk) +} + +# create new file +if (par\$verbose) cat("Saving h5 file at '", par\$output, "'\\n", sep = "") +h5new <- hdf5r::H5File\$new(par\$output, mode = "w") +zz <- h5new\$create_group("matrix") +zz <- h5new\$create_group("matrix/features") + +set_with_type("matrix/features/feature_type", features2\$feature_type) +set_with_type("matrix/features/genome", features2\$genome) +set_with_type("matrix/features/id", features2\$id) +set_with_type("matrix/features/name", features2\$name) +set_with_type("matrix/features/_all_tag_keys", features__all_tag_keys) +set_with_type("matrix/indices", mat3@i) +set_with_type("matrix/indptr", mat3@p) +set_with_type("matrix/data", as.integer(mat3@x)) +set_with_type("matrix/shape", dim(mat3)) +set_with_type("matrix/barcodes", colnames(mat3)) + +for (attname in hdf5r::h5attr_names(h5)) { + h5new\$create_attr(attname, hdf5r::h5attr(h5, attname)) +} +h5new\$close_all() +h5\$close_all() +VIASHMAIN +Rscript "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/qc/calculate_qc_metrics/.config.vsh.yaml b/target/docker/qc/calculate_qc_metrics/.config.vsh.yaml new file mode 100644 index 00000000000..a8a836e46d5 --- /dev/null +++ b/target/docker/qc/calculate_qc_metrics/.config.vsh.yaml @@ -0,0 +1,235 @@ +functionality: + name: "calculate_qc_metrics" + namespace: "qc" + version: "0.12.4" + authors: + - name: "Dries Schaumont" + roles: + - "author" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + argument_groups: + - name: "Inputs" + arguments: + - type: "file" + name: "--input" + description: "Input h5mu file" + info: null + example: + - "input.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--layer" + info: null + example: + - "raw_counts" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--var_qc_metrics" + description: "Keys to select a boolean (containing only True or False) column\ + \ from .var.\nFor each cell, calculate the proportion of total values for\ + \ genes which are labeled 'True', \ncompared to the total sum of the values\ + \ for all genes.\n" + info: null + example: + - "ercc,highly_variable,mitochondrial" + required: false + direction: "input" + multiple: true + multiple_sep: "," + dest: "par" + - type: "boolean" + name: "--var_qc_metrics_fill_na_value" + description: "Fill any 'NA' values found in the columns specified with --var_qc_metrics\ + \ to 'True' or 'False'.\nas False.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--top_n_vars" + description: "Number of top vars to be used to calculate cumulative proportions.\n\ + If not specified, proportions are not calculated. `--top_n_vars 20,50` finds\n\ + cumulative proportion to the 20th and 50th most expressed vars.\n" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: "," + dest: "par" + - name: "Outputs" + arguments: + - type: "file" + name: "--output" + description: "Output h5mu file." + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Add basic quality control metrics to an .h5mu file.\n\nThe metrics\ + \ are comparable to what scanpy.pp.calculate_qc_metrics output,\nalthough they\ + \ have slightly different names:\n\nVar metrics (name in this component -> name\ + \ in scanpy):\n - pct_dropout -> pct_dropout_by_{expr_type}\n - num_nonzero_obs\ + \ -> n_cells_by_{expr_type}\n - obs_mean -> mean_{expr_type}\n - total_counts\ + \ -> total_{expr_type}\n\nObs metrics:\n - num_nonzero_vars -> n_genes_by_{expr_type}\n\ + \ - pct_{var_qc_metrics} -> pct_{expr_type}_{qc_var}\n - total_counts_{var_qc_metrics}\ + \ -> total_{expr_type}_{qc_var}\n - pct_of_counts_in_top_{top_n_vars}_vars ->\ + \ pct_{expr_type}_in_top_{n}_{var_type}\n - total_counts -> total_{expr_type}\n\ + \ \n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.9-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "scikit-learn~=1.2.0" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + - "scanpy~=1.9.5" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "singlecpu" + - "midmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/qc/calculate_qc_metrics/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/qc/calculate_qc_metrics" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/qc/calculate_qc_metrics/calculate_qc_metrics" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/qc/calculate_qc_metrics/calculate_qc_metrics b/target/docker/qc/calculate_qc_metrics/calculate_qc_metrics new file mode 100755 index 00000000000..893b303ce13 --- /dev/null +++ b/target/docker/qc/calculate_qc_metrics/calculate_qc_metrics @@ -0,0 +1,1211 @@ +#!/usr/bin/env bash + +# calculate_qc_metrics 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Dries Schaumont (author) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="calculate_qc_metrics" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "calculate_qc_metrics 0.12.4" + echo "" + echo "Add basic quality control metrics to an .h5mu file." + echo "" + echo "The metrics are comparable to what scanpy.pp.calculate_qc_metrics output," + echo "although they have slightly different names:" + echo "" + echo "Var metrics (name in this component -> name in scanpy):" + echo " - pct_dropout -> pct_dropout_by_{expr_type}" + echo " - num_nonzero_obs -> n_cells_by_{expr_type}" + echo " - obs_mean -> mean_{expr_type}" + echo " - total_counts -> total_{expr_type}" + echo "" + echo "Obs metrics:" + echo " - num_nonzero_vars -> n_genes_by_{expr_type}" + echo " - pct_{var_qc_metrics} -> pct_{expr_type}_{qc_var}" + echo " - total_counts_{var_qc_metrics} -> total_{expr_type}_{qc_var}" + echo " - pct_of_counts_in_top_{top_n_vars}_vars ->" + echo "pct_{expr_type}_in_top_{n}_{var_type}" + echo " - total_counts -> total_{expr_type}" + echo "" + echo "Inputs:" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " example: input.h5mu" + echo " Input h5mu file" + echo "" + echo " --modality" + echo " type: string" + echo " default: rna" + echo "" + echo " --layer" + echo " type: string" + echo " example: raw_counts" + echo "" + echo " --var_qc_metrics" + echo " type: string, multiple values allowed" + echo " example: ercc,highly_variable,mitochondrial" + echo " Keys to select a boolean (containing only True or False) column from" + echo " .var." + echo " For each cell, calculate the proportion of total values for genes which" + echo " are labeled 'True'," + echo " compared to the total sum of the values for all genes." + echo "" + echo " --var_qc_metrics_fill_na_value" + echo " type: boolean" + echo " Fill any 'NA' values found in the columns specified with" + echo " --var_qc_metrics to 'True' or 'False'." + echo " as False." + echo "" + echo " --top_n_vars" + echo " type: integer, multiple values allowed" + echo " Number of top vars to be used to calculate cumulative proportions." + echo " If not specified, proportions are not calculated. \`--top_n_vars 20,50\`" + echo " finds" + echo " cumulative proportion to the 20th and 50th most expressed vars." + echo "" + echo "Outputs:" + echo " --output" + echo " type: file, output, file must exist" + echo " example: output.h5mu" + echo " Output h5mu file." + echo "" + echo " --output_compression" + echo " type: string" + echo " example: gzip" + echo " choices: [ gzip, lzf ]" + echo " The compression format to be used on the output h5mu object." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM python:3.9-slim + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "scikit-learn~=1.2.0" + +LABEL org.opencontainers.image.authors="Dries Schaumont" +LABEL org.opencontainers.image.description="Companion container for running component qc calculate_qc_metrics" +LABEL org.opencontainers.image.created="2024-01-31T09:08:32Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-calculate_qc_metrics-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "calculate_qc_metrics 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --modality) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality=*) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --layer) + [ -n "$VIASH_PAR_LAYER" ] && ViashError Bad arguments for option \'--layer\': \'$VIASH_PAR_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LAYER="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --layer. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --layer=*) + [ -n "$VIASH_PAR_LAYER" ] && ViashError Bad arguments for option \'--layer=*\': \'$VIASH_PAR_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LAYER=$(ViashRemoveFlags "$1") + shift 1 + ;; + --var_qc_metrics) + if [ -z "$VIASH_PAR_VAR_QC_METRICS" ]; then + VIASH_PAR_VAR_QC_METRICS="$2" + else + VIASH_PAR_VAR_QC_METRICS="$VIASH_PAR_VAR_QC_METRICS,""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --var_qc_metrics. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --var_qc_metrics=*) + if [ -z "$VIASH_PAR_VAR_QC_METRICS" ]; then + VIASH_PAR_VAR_QC_METRICS=$(ViashRemoveFlags "$1") + else + VIASH_PAR_VAR_QC_METRICS="$VIASH_PAR_VAR_QC_METRICS,"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --var_qc_metrics_fill_na_value) + [ -n "$VIASH_PAR_VAR_QC_METRICS_FILL_NA_VALUE" ] && ViashError Bad arguments for option \'--var_qc_metrics_fill_na_value\': \'$VIASH_PAR_VAR_QC_METRICS_FILL_NA_VALUE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_VAR_QC_METRICS_FILL_NA_VALUE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --var_qc_metrics_fill_na_value. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --var_qc_metrics_fill_na_value=*) + [ -n "$VIASH_PAR_VAR_QC_METRICS_FILL_NA_VALUE" ] && ViashError Bad arguments for option \'--var_qc_metrics_fill_na_value=*\': \'$VIASH_PAR_VAR_QC_METRICS_FILL_NA_VALUE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_VAR_QC_METRICS_FILL_NA_VALUE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --top_n_vars) + if [ -z "$VIASH_PAR_TOP_N_VARS" ]; then + VIASH_PAR_TOP_N_VARS="$2" + else + VIASH_PAR_TOP_N_VARS="$VIASH_PAR_TOP_N_VARS,""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --top_n_vars. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --top_n_vars=*) + if [ -z "$VIASH_PAR_TOP_N_VARS" ]; then + VIASH_PAR_TOP_N_VARS=$(ViashRemoveFlags "$1") + else + VIASH_PAR_TOP_N_VARS="$VIASH_PAR_TOP_N_VARS,"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/qc_calculate_qc_metrics:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/qc_calculate_qc_metrics:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/qc_calculate_qc_metrics:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/qc_calculate_qc_metrics:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_MODALITY+x} ]; then + VIASH_PAR_MODALITY="rna" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_VAR_QC_METRICS_FILL_NA_VALUE" ]]; then + if ! [[ "$VIASH_PAR_VAR_QC_METRICS_FILL_NA_VALUE" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--var_qc_metrics_fill_na_value' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [ -n "$VIASH_PAR_TOP_N_VARS" ]; then + IFS=',' + set -f + for val in $VIASH_PAR_TOP_N_VARS; do + if ! [[ "${val}" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--top_n_vars' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + done + set +f + unset IFS +fi + +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/qc_calculate_qc_metrics:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/qc_calculate_qc_metrics:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/qc_calculate_qc_metrics:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-calculate_qc_metrics-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +import sys +from mudata import read_h5mu +from scipy.sparse import issparse, isspmatrix_coo, csr_matrix +from sklearn.utils.sparsefuncs import mean_variance_axis +import numpy as np + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'layer': $( if [ ! -z ${VIASH_PAR_LAYER+x} ]; then echo "r'${VIASH_PAR_LAYER//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'var_qc_metrics': $( if [ ! -z ${VIASH_PAR_VAR_QC_METRICS+x} ]; then echo "r'${VIASH_PAR_VAR_QC_METRICS//\'/\'\"\'\"r\'}'.split(',')"; else echo None; fi ), + 'var_qc_metrics_fill_na_value': $( if [ ! -z ${VIASH_PAR_VAR_QC_METRICS_FILL_NA_VALUE+x} ]; then echo "r'${VIASH_PAR_VAR_QC_METRICS_FILL_NA_VALUE//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), + 'top_n_vars': $( if [ ! -z ${VIASH_PAR_TOP_N_VARS+x} ]; then echo "list(map(int, r'${VIASH_PAR_TOP_N_VARS//\'/\'\"\'\"r\'}'.split(',')))"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +## VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +def main(): + input_data = read_h5mu(par["input"]) + modality_data = input_data.mod[par["modality"]] + var = modality_data.var + layer = modality_data.X if not par['layer'] else modality_data.layers[par['layer']] + if not issparse(layer): + raise NotImplementedError("Expected layer to be in sparse format.") + if isspmatrix_coo(layer): + layer = csr_matrix(layer) + layer.eliminate_zeros() + + # var statistics + num_nonzero_obs = layer.getnnz(axis=0) + obs_mean, _ = mean_variance_axis(layer, axis=0) + pct_dropout = (1 - num_nonzero_obs / layer.shape[0]) * 100 + total_counts_obs = np.ravel(layer.sum(axis=0)) + + # obs statistics + num_nonzero_vars = layer.getnnz(axis=1) + total_counts_var = np.ravel(layer.sum(axis=1)) + + top_metrics = {} + if par["top_n_vars"]: + par["top_n_vars"] = sorted(par["top_n_vars"]) + distributions = get_top_from_csr_matrix(layer, par["top_n_vars"]) + top_metrics = {distribution_size: distribution * 100 + for distribution_size, distribution + in zip(par["top_n_vars"], distributions.T)} + + total_expr_qc = {} + pct_expr_qc = {} + if par["var_qc_metrics"]: + for qc_metric in par["var_qc_metrics"]: + if not qc_metric in var: + raise ValueError(f"Value for --var_qc_metrics, {qc_metric} " + f"not found in .var for modality {par['modality']}") + qc_column = var[qc_metric] + if qc_column.isna().any(): + if par["var_qc_metrics_fill_na_value"] is None: + raise ValueError(f"The .var column '{qc_metric}', selected by '--var_qc_metrics', contains NA values. " + "It is ambiguous whether or not to include these values in the static calulation. " + "You can explicitly map the NA values to 'False' or 'True using '--var_qc_metrics_fill_na_value'") + else: + qc_column = qc_column.fillna(par['var_qc_metrics_fill_na_value'], inplace=False) + qc_column = qc_column.values + if set(np.unique(qc_column)) - {True, False}: + raise ValueError(f"Column {qc_metric} in .var for modality {par['modality']} " + f"must only contain boolean values") + + total_expr_qc[qc_metric] = np.ravel(layer[:, qc_column].sum(axis=1)) + pct_expr_qc[qc_metric] = total_expr_qc[qc_metric] / total_counts_var * 100 + + # Write all of the calculated statistics + modality_data.var = modality_data.var.assign( + **{"pct_dropout": pct_dropout, + "num_nonzero_obs": num_nonzero_obs, + "obs_mean": obs_mean, + "total_counts": total_counts_obs}) + + modality_data.obs = modality_data.obs.assign( + **({"num_nonzero_vars": num_nonzero_vars, + "total_counts": total_counts_var} | \\ + {f"pct_{qc_metric}": col for qc_metric, col in pct_expr_qc.items()} | \\ + {f"total_counts_{qc_metrix}": col for qc_metrix, col in total_expr_qc.items()}) | \\ + {f"pct_of_counts_in_top_{n_top}_vars": col for n_top, col in top_metrics.items()}) + + input_data.write(par["output"], compression=par["output_compression"]) + +def get_top_from_csr_matrix(matrix, top_n_genes): + # csr matrices stores a 3D matrix in a format such that data for individual cells + # are stored in 1 array. Another array (indptr) here stores the ranges of indices + # to select from the data-array (.e.g. data[indptr[0]:indptr[1]] for row 0) for each row. + # Another array 'indices' maps each element of data to a column + # (data and indices arrays have the same length) + top_n_genes = np.array(top_n_genes).astype(np.int64) + assert np.all(top_n_genes[:-1] <= top_n_genes[1:]), "top_n_genes must be sorted" + row_indices, data = matrix.indptr, matrix.data + number_of_rows, max_genes_to_parse = row_indices.size-1, top_n_genes[-1] + top_data = np.zeros((number_of_rows, max_genes_to_parse), + dtype=data.dtype) + # Loop over each row to create a dense matrix without the 0 counts, + # but not for the whole matrix, only store the genes up until + # the largest number of top n genes. + for row_number in range(number_of_rows): + row_start_index, row_end_index = row_indices[row_number], row_indices[row_number+1] + row_data = data[row_start_index:row_end_index] # all non-zero counts for an row + try: + # There are less genes with counts in the row than the + # maximum number of genes we would like to select + # all these genes are in the top genes, just store them + top_data[row_number, :row_end_index-row_start_index] = row_data + except ValueError: + # Store the counts for the top genes + top_data[row_number, :] = np.partition(row_data, -max_genes_to_parse)[-max_genes_to_parse:] + + # Partition works from smallest to largest, but we want largest + # so do smallest to largest first (but with reversed indices) + top_data = np.partition(top_data, max_genes_to_parse - top_n_genes) + # And then switch the order around + top_data = np.flip(top_data, axis=1) + + cumulative = top_data.cumsum(axis=1, dtype=np.float64)[:,top_n_genes-1] + return cumulative / np.array(matrix.sum(axis=1)) + +if __name__ == "__main__": + main() +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/qc/calculate_qc_metrics/setup_logger.py b/target/docker/qc/calculate_qc_metrics/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/docker/qc/calculate_qc_metrics/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/docker/qc/fastqc/.config.vsh.yaml b/target/docker/qc/fastqc/.config.vsh.yaml new file mode 100644 index 00000000000..9c3584841ef --- /dev/null +++ b/target/docker/qc/fastqc/.config.vsh.yaml @@ -0,0 +1,156 @@ +functionality: + name: "fastqc" + namespace: "qc" + version: "0.12.4" + arguments: + - type: "string" + name: "--mode" + alternatives: + - "-m" + description: "The mode in which the component works. Can be either files or dir." + info: null + default: + - "files" + required: false + choices: + - "files" + - "dir" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Directory containing input fastq files." + info: null + example: + - "fastq_dir" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output directory to write reports to." + info: null + example: + - "qc" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--threads" + alternatives: + - "-t" + description: "Specifies the number of files which can be processed simultaneously.\ + \ Each thread will be allocated 250MB of\nmemory.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "bash_script" + path: "script.sh" + is_executable: true + description: "Fastqc component, please see https://www.bioinformatics.babraham.ac.uk/projects/fastqc/.\ + \ This component can take one or more files (by means of shell globbing) or a\ + \ complete directory.\n" + test_resources: + - type: "bash_script" + path: "test.sh" + is_executable: true + - type: "file" + path: "resources_test/cellranger_tiny_fastq/cellranger_tiny_fastq" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "fastqc" + interactive: false + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "lowcpu" + - "midmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/qc/fastqc/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/qc/fastqc" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/qc/fastqc/fastqc" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/qc/fastqc/fastqc b/target/docker/qc/fastqc/fastqc new file mode 100755 index 00000000000..886de003bf4 --- /dev/null +++ b/target/docker/qc/fastqc/fastqc @@ -0,0 +1,994 @@ +#!/usr/bin/env bash + +# fastqc 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="fastqc" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "fastqc 0.12.4" + echo "" + echo "Fastqc component, please see" + echo "https://www.bioinformatics.babraham.ac.uk/projects/fastqc/. This component can" + echo "take one or more files (by means of shell globbing) or a complete directory." + echo "" + echo "Arguments:" + echo " -m, --mode" + echo " type: string" + echo " default: files" + echo " choices: [ files, dir ]" + echo " The mode in which the component works. Can be either files or dir." + echo "" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " example: fastq_dir" + echo " Directory containing input fastq files." + echo "" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " example: qc" + echo " Output directory to write reports to." + echo "" + echo " -t, --threads" + echo " type: integer" + echo " Specifies the number of files which can be processed simultaneously." + echo " Each thread will be allocated 250MB of" + echo " memory." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM ubuntu:22.04 + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y fastqc && \ + rm -rf /var/lib/apt/lists/* + +LABEL org.opencontainers.image.description="Companion container for running component qc fastqc" +LABEL org.opencontainers.image.created="2024-01-31T09:08:32Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-fastqc-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "fastqc 0.12.4" + exit + ;; + --mode) + [ -n "$VIASH_PAR_MODE" ] && ViashError Bad arguments for option \'--mode\': \'$VIASH_PAR_MODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --mode. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --mode=*) + [ -n "$VIASH_PAR_MODE" ] && ViashError Bad arguments for option \'--mode=*\': \'$VIASH_PAR_MODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODE=$(ViashRemoveFlags "$1") + shift 1 + ;; + -m) + [ -n "$VIASH_PAR_MODE" ] && ViashError Bad arguments for option \'-m\': \'$VIASH_PAR_MODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -m. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --threads) + [ -n "$VIASH_PAR_THREADS" ] && ViashError Bad arguments for option \'--threads\': \'$VIASH_PAR_THREADS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_THREADS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --threads. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --threads=*) + [ -n "$VIASH_PAR_THREADS" ] && ViashError Bad arguments for option \'--threads=*\': \'$VIASH_PAR_THREADS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_THREADS=$(ViashRemoveFlags "$1") + shift 1 + ;; + -t) + [ -n "$VIASH_PAR_THREADS" ] && ViashError Bad arguments for option \'-t\': \'$VIASH_PAR_THREADS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_THREADS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -t. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/qc_fastqc:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/qc_fastqc:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/qc_fastqc:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/qc_fastqc:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_MODE+x} ]; then + VIASH_PAR_MODE="files" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_THREADS" ]]; then + if ! [[ "$VIASH_PAR_THREADS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--threads' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_MODE" ]; then + VIASH_PAR_MODE_CHOICES=("files:dir") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_MODE_CHOICES[*]}:" =~ ":$VIASH_PAR_MODE:" ]]; then + ViashError '--mode' specified value of \'$VIASH_PAR_MODE\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/qc_fastqc:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/qc_fastqc:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/qc_fastqc:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-fastqc-XXXXXX").sh +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_MODE+x} ]; then echo "${VIASH_PAR_MODE}" | sed "s#'#'\"'\"'#g;s#.*#par_mode='&'#" ; else echo "# par_mode="; fi ) +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_PAR_THREADS+x} ]; then echo "${VIASH_PAR_THREADS}" | sed "s#'#'\"'\"'#g;s#.*#par_threads='&'#" ; else echo "# par_threads="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +mkdir -p "\$par_output" + +if [ "\$par_mode" == "dir" ]; then + par_input="\$par_input/*.fastq.gz" +fi + +eval fastqc \${par_threads:+--threads \$par_threads} -o "\$par_output" "\$par_input" +VIASHMAIN +bash "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/qc/multiqc/.config.vsh.yaml b/target/docker/qc/multiqc/.config.vsh.yaml new file mode 100644 index 00000000000..783bf195eab --- /dev/null +++ b/target/docker/qc/multiqc/.config.vsh.yaml @@ -0,0 +1,140 @@ +functionality: + name: "multiqc" + namespace: "qc" + version: "0.12.4" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Inputs for MultiQC." + info: null + example: + - "input.txt" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Create report in the specified output directory." + info: null + example: + - "report" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + description: "MultiQC aggregates results from bioinformatics analyses across many\ + \ samples into a single report.\nIt searches a given directory for analysis logs\ + \ and compiles a HTML report. It's a general use tool, perfect for summarising\ + \ the output from numerous bioinformatics tools.\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/10x_5k_anticmv/fastqc/" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "multiqc" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "singlecpu" + - "lowmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/qc/multiqc/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/qc/multiqc" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/qc/multiqc/multiqc" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/qc/multiqc/multiqc b/target/docker/qc/multiqc/multiqc new file mode 100755 index 00000000000..042737333e5 --- /dev/null +++ b/target/docker/qc/multiqc/multiqc @@ -0,0 +1,959 @@ +#!/usr/bin/env bash + +# multiqc 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="multiqc" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "multiqc 0.12.4" + echo "" + echo "MultiQC aggregates results from bioinformatics analyses across many samples into" + echo "a single report." + echo "It searches a given directory for analysis logs and compiles a HTML report. It's" + echo "a general use tool, perfect for summarising the output from numerous" + echo "bioinformatics tools." + echo "" + echo "Arguments:" + echo " -i, --input" + echo " type: file, required parameter, multiple values allowed, file must exist" + echo " example: input.txt" + echo " Inputs for MultiQC." + echo "" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " example: report" + echo " Create report in the specified output directory." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM python:3.10-slim + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "multiqc" + +LABEL org.opencontainers.image.description="Companion container for running component qc multiqc" +LABEL org.opencontainers.image.created="2024-01-31T09:08:32Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-multiqc-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "multiqc 0.12.4" + exit + ;; + --input) + if [ -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT="$2" + else + VIASH_PAR_INPUT="$VIASH_PAR_INPUT:""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + if [ -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + else + VIASH_PAR_INPUT="$VIASH_PAR_INPUT:"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + -i) + if [ -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT="$2" + else + VIASH_PAR_INPUT="$VIASH_PAR_INPUT:""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/qc_multiqc:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/qc_multiqc:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/qc_multiqc:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/qc_multiqc:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ]; then + IFS=':' + set -f + for file in $VIASH_PAR_INPUT; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_TEST_INPUT=() + IFS=':' + for var in $VIASH_PAR_INPUT; do + unset IFS + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$var")" ) + var=$(ViashAutodetectMount "$var") + VIASH_TEST_INPUT+=( "$var" ) + done + VIASH_PAR_INPUT=$(IFS=':' ; echo "${VIASH_TEST_INPUT[*]}") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/qc_multiqc:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/qc_multiqc:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/qc_multiqc:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-multiqc-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +import subprocess + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'.split(':')"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +## VIASH END + +# Run MultiQC +subprocess.run(["multiqc", "-o", par["output"]] + par["input"]) +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + unset VIASH_TEST_INPUT + IFS=':' + for var in $VIASH_PAR_INPUT; do + unset IFS + if [ -z "$VIASH_TEST_INPUT" ]; then + VIASH_TEST_INPUT="$(ViashStripAutomount "$var")" + else + VIASH_TEST_INPUT="$VIASH_TEST_INPUT:""$(ViashStripAutomount "$var")" + fi + done + VIASH_PAR_INPUT="$VIASH_TEST_INPUT" +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/query/cellxgene_census/.config.vsh.yaml b/target/docker/query/cellxgene_census/.config.vsh.yaml new file mode 100644 index 00000000000..f09ceac9ce5 --- /dev/null +++ b/target/docker/query/cellxgene_census/.config.vsh.yaml @@ -0,0 +1,260 @@ +functionality: + name: "cellxgene_census" + namespace: "query" + version: "0.12.4" + authors: + - name: "Matthias Beyens" + info: + role: "Contributor" + links: + github: "MatthiasBeyens" + orcid: "0000-0003-3304-0706" + email: "matthias.beyens@gmail.com" + linkedin: "mbeyens" + organizations: + - name: "Janssen Pharmaceuticals" + href: "https://www.janssen.com" + role: "Principal Scientist" + - name: "Dries De Maeyer" + roles: + - "author" + info: + role: "Core Team Member" + links: + email: "ddemaeyer@gmail.com" + github: "ddemaeyer" + linkedin: "dries-de-maeyer-b46a814" + organizations: + - name: "Janssen Pharmaceuticals" + href: "https://www.janssen.com" + role: "Principal Scientist" + argument_groups: + - name: "Inputs" + description: "Arguments related to the input (aka query) dataset." + arguments: + - type: "string" + name: "--input_database" + description: "Full input database S3 prefix URL. Default: CellxGene Census" + info: null + example: + - "s3://" + default: + - "CellxGene" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + description: "Which modality to store the output in." + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--cellxgene_release" + description: "CellxGene Census release date. More information: https://chanzuckerberg.github.io/cellxgene-census/cellxgene_census_docsite_data_release_info.html" + info: null + default: + - "2023-05-15" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Query" + description: "Arguments related to the query." + arguments: + - type: "string" + name: "--species" + description: "Specie(s) of interest. If not specified, Homo Sapiens will be\ + \ queried." + info: null + example: + - "homo_sapiens" + default: + - "homo_sapiens" + required: false + choices: + - "homo_sapiens" + - "mus_musculus" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--cell_query" + description: "The query for selecting the cells as defined by the cellxgene\ + \ census schema." + info: null + example: + - "is_primary_data == True and cell_type_ontology_term_id in ['CL:0000136',\ + \ 'CL:1000311', 'CL:0002616'] and suspension_type == 'cell'" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--cells_filter_columns" + description: "The query for selecting the cells as defined by the cellxgene\ + \ census schema." + info: null + example: + - "dataset_id" + - "tissue" + - "assay" + - "disease" + - "cell_type" + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--min_cells_filter_columns" + description: "Minimum of amount of summed cells_filter_columns cells" + info: null + example: + - 100.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Outputs" + description: "Output arguments." + arguments: + - type: "file" + name: "--output" + description: "Output h5mu file." + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Query CellxGene Census or user-specified TileDBSoma object, and eventually\ + \ fetch cell and gene metadata or/and expression counts." + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.9" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "cellxgene-census~=1.2.0" + - "obonet~=1.0.0" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highmem" + - "midcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/query/cellxgene_census/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/query/cellxgene_census" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/query/cellxgene_census/cellxgene_census" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/query/cellxgene_census/cellxgene_census b/target/docker/query/cellxgene_census/cellxgene_census new file mode 100755 index 00000000000..996cd214d1b --- /dev/null +++ b/target/docker/query/cellxgene_census/cellxgene_census @@ -0,0 +1,1223 @@ +#!/usr/bin/env bash + +# cellxgene_census 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Matthias Beyens +# * Dries De Maeyer (author) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="cellxgene_census" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "cellxgene_census 0.12.4" + echo "" + echo "Query CellxGene Census or user-specified TileDBSoma object, and eventually fetch" + echo "cell and gene metadata or/and expression counts." + echo "" + echo "Inputs:" + echo " Arguments related to the input (aka query) dataset." + echo "" + echo " --input_database" + echo " type: string" + echo " default: CellxGene" + echo " example: s3://" + echo " Full input database S3 prefix URL. Default: CellxGene Census" + echo "" + echo " --modality" + echo " type: string" + echo " default: rna" + echo " Which modality to store the output in." + echo "" + echo " --cellxgene_release" + echo " type: string" + echo " default: 2023-05-15" + echo " CellxGene Census release date. More information:" + echo " " + echo "https://chanzuckerberg.github.io/cellxgene-census/cellxgene_census_docsite_data_release_info.html" + echo "" + echo "Query:" + echo " Arguments related to the query." + echo "" + echo " --species" + echo " type: string" + echo " default: homo_sapiens" + echo " example: homo_sapiens" + echo " choices: [ homo_sapiens, mus_musculus ]" + echo " Specie(s) of interest. If not specified, Homo Sapiens will be queried." + echo "" + echo " --cell_query" + echo " type: string" + echo " example: is_primary_data == True and cell_type_ontology_term_id in" + echo "['CL:0000136', 'CL:1000311', 'CL:0002616'] and suspension_type == 'cell'" + echo " The query for selecting the cells as defined by the cellxgene census" + echo " schema." + echo "" + echo " --cells_filter_columns" + echo " type: string, multiple values allowed" + echo " example: dataset_id:tissue:assay:disease:cell_type" + echo " The query for selecting the cells as defined by the cellxgene census" + echo " schema." + echo "" + echo " --min_cells_filter_columns" + echo " type: double" + echo " example: 100.0" + echo " Minimum of amount of summed cells_filter_columns cells" + echo "" + echo "Outputs:" + echo " Output arguments." + echo "" + echo " --output" + echo " type: file, required parameter, output, file must exist" + echo " example: output.h5mu" + echo " Output h5mu file." + echo "" + echo " --output_compression" + echo " type: string" + echo " example: gzip" + echo " choices: [ gzip, lzf ]" +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM python:3.9 + +ENTRYPOINT [] + + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "cellxgene-census~=1.2.0" "obonet~=1.0.0" + +LABEL org.opencontainers.image.authors="Matthias Beyens, Dries De Maeyer" +LABEL org.opencontainers.image.description="Companion container for running component query cellxgene_census" +LABEL org.opencontainers.image.created="2024-01-31T09:08:36Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-cellxgene_census-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "cellxgene_census 0.12.4" + exit + ;; + --input_database) + [ -n "$VIASH_PAR_INPUT_DATABASE" ] && ViashError Bad arguments for option \'--input_database\': \'$VIASH_PAR_INPUT_DATABASE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_DATABASE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_database. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input_database=*) + [ -n "$VIASH_PAR_INPUT_DATABASE" ] && ViashError Bad arguments for option \'--input_database=*\': \'$VIASH_PAR_INPUT_DATABASE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_DATABASE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --modality) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality=*) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --cellxgene_release) + [ -n "$VIASH_PAR_CELLXGENE_RELEASE" ] && ViashError Bad arguments for option \'--cellxgene_release\': \'$VIASH_PAR_CELLXGENE_RELEASE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CELLXGENE_RELEASE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --cellxgene_release. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --cellxgene_release=*) + [ -n "$VIASH_PAR_CELLXGENE_RELEASE" ] && ViashError Bad arguments for option \'--cellxgene_release=*\': \'$VIASH_PAR_CELLXGENE_RELEASE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CELLXGENE_RELEASE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --species) + [ -n "$VIASH_PAR_SPECIES" ] && ViashError Bad arguments for option \'--species\': \'$VIASH_PAR_SPECIES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SPECIES="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --species. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --species=*) + [ -n "$VIASH_PAR_SPECIES" ] && ViashError Bad arguments for option \'--species=*\': \'$VIASH_PAR_SPECIES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SPECIES=$(ViashRemoveFlags "$1") + shift 1 + ;; + --cell_query) + [ -n "$VIASH_PAR_CELL_QUERY" ] && ViashError Bad arguments for option \'--cell_query\': \'$VIASH_PAR_CELL_QUERY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CELL_QUERY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --cell_query. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --cell_query=*) + [ -n "$VIASH_PAR_CELL_QUERY" ] && ViashError Bad arguments for option \'--cell_query=*\': \'$VIASH_PAR_CELL_QUERY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CELL_QUERY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --cells_filter_columns) + if [ -z "$VIASH_PAR_CELLS_FILTER_COLUMNS" ]; then + VIASH_PAR_CELLS_FILTER_COLUMNS="$2" + else + VIASH_PAR_CELLS_FILTER_COLUMNS="$VIASH_PAR_CELLS_FILTER_COLUMNS:""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --cells_filter_columns. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --cells_filter_columns=*) + if [ -z "$VIASH_PAR_CELLS_FILTER_COLUMNS" ]; then + VIASH_PAR_CELLS_FILTER_COLUMNS=$(ViashRemoveFlags "$1") + else + VIASH_PAR_CELLS_FILTER_COLUMNS="$VIASH_PAR_CELLS_FILTER_COLUMNS:"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --min_cells_filter_columns) + [ -n "$VIASH_PAR_MIN_CELLS_FILTER_COLUMNS" ] && ViashError Bad arguments for option \'--min_cells_filter_columns\': \'$VIASH_PAR_MIN_CELLS_FILTER_COLUMNS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_CELLS_FILTER_COLUMNS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_cells_filter_columns. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --min_cells_filter_columns=*) + [ -n "$VIASH_PAR_MIN_CELLS_FILTER_COLUMNS" ] && ViashError Bad arguments for option \'--min_cells_filter_columns=*\': \'$VIASH_PAR_MIN_CELLS_FILTER_COLUMNS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_CELLS_FILTER_COLUMNS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/query_cellxgene_census:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/query_cellxgene_census:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/query_cellxgene_census:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/query_cellxgene_census:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_INPUT_DATABASE+x} ]; then + VIASH_PAR_INPUT_DATABASE="CellxGene" +fi +if [ -z ${VIASH_PAR_MODALITY+x} ]; then + VIASH_PAR_MODALITY="rna" +fi +if [ -z ${VIASH_PAR_CELLXGENE_RELEASE+x} ]; then + VIASH_PAR_CELLXGENE_RELEASE="2023-05-15" +fi +if [ -z ${VIASH_PAR_SPECIES+x} ]; then + VIASH_PAR_SPECIES="homo_sapiens" +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_MIN_CELLS_FILTER_COLUMNS" ]]; then + if ! [[ "$VIASH_PAR_MIN_CELLS_FILTER_COLUMNS" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--min_cells_filter_columns' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_SPECIES" ]; then + VIASH_PAR_SPECIES_CHOICES=("homo_sapiens:mus_musculus") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_SPECIES_CHOICES[*]}:" =~ ":$VIASH_PAR_SPECIES:" ]]; then + ViashError '--species' specified value of \'$VIASH_PAR_SPECIES\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/query_cellxgene_census:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/query_cellxgene_census:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/query_cellxgene_census:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-cellxgene_census-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +import sys +import os +import cellxgene_census +import mudata as mu +import anndata as ad + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input_database': $( if [ ! -z ${VIASH_PAR_INPUT_DATABASE+x} ]; then echo "r'${VIASH_PAR_INPUT_DATABASE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cellxgene_release': $( if [ ! -z ${VIASH_PAR_CELLXGENE_RELEASE+x} ]; then echo "r'${VIASH_PAR_CELLXGENE_RELEASE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'species': $( if [ ! -z ${VIASH_PAR_SPECIES+x} ]; then echo "r'${VIASH_PAR_SPECIES//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cell_query': $( if [ ! -z ${VIASH_PAR_CELL_QUERY+x} ]; then echo "r'${VIASH_PAR_CELL_QUERY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cells_filter_columns': $( if [ ! -z ${VIASH_PAR_CELLS_FILTER_COLUMNS+x} ]; then echo "r'${VIASH_PAR_CELLS_FILTER_COLUMNS//\'/\'\"\'\"r\'}'.split(':')"; else echo None; fi ), + 'min_cells_filter_columns': $( if [ ! -z ${VIASH_PAR_MIN_CELLS_FILTER_COLUMNS+x} ]; then echo "float(r'${VIASH_PAR_MIN_CELLS_FILTER_COLUMNS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +### VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +def connect_census(input_database, release): + """ + Connect to CellxGene Census or user-provided TileDBSoma object + """ + if input_database != "CellxGene": + raise NotImplementedError( + "Custom census database is not implemented yet!" + ) + + logger.info( + "Initializing %s release %s", + input_database, release + ) + return cellxgene_census.open_soma( + census_version = release + ) + + +def get_anndata(census_connection, cell_query, species): + logger.info( + "Getting gene expression data based on %s query.", + cell_query + ) + return cellxgene_census.get_anndata( + census = census_connection, + obs_value_filter = cell_query, + organism = species + ) + + +def add_cellcensus_metadata_obs(census_connection, query_data): + logger.info( + "Adding extented metadata to gene expression data." + ) + census_datasets = census_connection["census_info"]["datasets"].read().concat().to_pandas() + + query_data.obs.dataset_id = query_data.obs.dataset_id.astype("category") + + dataset_info = census_datasets[census_datasets.dataset_id.isin(query_data.obs.dataset_id.cat.categories)]\\ + [['collection_id', 'collection_name', 'collection_doi', 'dataset_id', 'dataset_title']]\\ + .reset_index(drop=True)\\ + .apply(lambda x: x.astype('category')) + + return query_data.obs.merge( + dataset_info, on='dataset_id', how = 'left' + ) + + +def cellcensus_cell_filter(query_data, cells_filter_columns, min_cells_filter_columns): + t0 = query_data.shape + query_data = query_data[ + query_data.obs.groupby(cells_filter_columns)["soma_joinid"].transform('count') >= min_cells_filter_columns + ] + t1 = query_data.shape + logger.info( + 'Removed %s cells based on %s min_cells_filter_columns of %s cells_filter_columns.' + % ((t0[0] - t1[0]), min_cells_filter_columns, cells_filter_columns) + ) + return query_data + + +def write_mudata(mdata, output_location, compression): + logger.info("Writing %s", output_location) + mdata.write_h5mu( + output_location, + compression=compression + ) + + +def main(): + + # start dev + logger.info('cells_filter_columns: %s' % par["cells_filter_columns"]) + logger.info('min_cells_filter_columns: %s' % par["min_cells_filter_columns"]) + # end dev + + census_connection = connect_census( + par["input_database"], + par["cellxgene_release"] + ) + + query_data = get_anndata( + census_connection, + par["cell_query"], + par["species"] + ) + + query_data.obs = add_cellcensus_metadata_obs( + census_connection, + query_data + ) + + census_connection.close() + del census_connection + + if par["cells_filter_columns"]: + if not par["min_cells_filter_columns"]: + raise NotImplementedError( + "You specified cells_filter_columns, thus add min_cells_filter_columns!" + ) + query_data = cellcensus_cell_filter( + query_data, + par["cells_filter_columns"], + par["min_cells_filter_columns"] + ) + + query_data.var_names = query_data.var["feature_id"] + query_data.var["gene_symbol"] = query_data.var["feature_name"] + + # Create empty mudata file + mdata = mu.MuData({par["modality"]: ad.AnnData()}) + + write_mudata( + mdata, + par["output"], + par["output_compression"] + ) + + mu.write_h5ad(par["output"], data=query_data, mod=par["modality"]) + +if __name__ == "__main__": + main() +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/query/cellxgene_census/setup_logger.py b/target/docker/query/cellxgene_census/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/docker/query/cellxgene_census/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/docker/reference/build_bdrhap_reference/.config.vsh.yaml b/target/docker/reference/build_bdrhap_reference/.config.vsh.yaml new file mode 100644 index 00000000000..69818247350 --- /dev/null +++ b/target/docker/reference/build_bdrhap_reference/.config.vsh.yaml @@ -0,0 +1,186 @@ +functionality: + name: "build_bdrhap_reference" + namespace: "reference" + version: "0.12.4" + authors: + - name: "Angela Oliveira Pisco" + roles: + - "author" + info: + role: "Contributor" + links: + github: "aopisco" + orcid: "0000-0003-0142-2355" + linkedin: "aopisco" + organizations: + - name: "Insitro" + href: "https://insitro.com" + role: "Director of Computational Biology" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + - name: "Robrecht Cannoodt" + roles: + - "author" + - "maintainer" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + arguments: + - type: "file" + name: "--genome_fasta" + description: "Reference genome fasta." + info: null + example: + - "genome_sequence.fa.gz" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--transcriptome_gtf" + description: "Reference transcriptome annotation." + info: null + example: + - "transcriptome_annotation.gtf.gz" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + description: "Star index" + info: null + example: + - "star_index.tar.gz" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "bash_script" + path: "script.sh" + is_executable: true + description: "Compile a reference into a STAR index compatible with the BD Rhapsody\ + \ pipeline." + test_resources: + - type: "bash_script" + path: "run_test.sh" + is_executable: true + - type: "file" + path: "resources_test/reference_gencodev41_chr1" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "bdgenomics/rhapsody:1.10.1" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "pigz" + interactive: false + test_setup: + - type: "docker" + env: + - "GOPATH /root/go" + - "GOBIN /root/go/bin" + - "PATH \"${PATH}:/root/go/bin\"" + - type: "apt" + packages: + - "golang" + interactive: false + - type: "docker" + run: + - "go get golang.org/dl/go1.20.6 && go1.20.6 download && \\\ngit clone --branch\ + \ v2.5.0 https://github.com/shenwei356/seqkit.git && \\\ncd seqkit/seqkit/ &&\ + \ go1.20.6 build && cp seqkit /usr/bin/ && cd ../../ && rm -rf seqkit\n" + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highmem" + - "highcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/reference/build_bdrhap_reference/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/reference/build_bdrhap_reference" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/reference/build_bdrhap_reference/build_bdrhap_reference" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/reference/build_bdrhap_reference/build_bdrhap_reference b/target/docker/reference/build_bdrhap_reference/build_bdrhap_reference new file mode 100755 index 00000000000..0b546f14ddd --- /dev/null +++ b/target/docker/reference/build_bdrhap_reference/build_bdrhap_reference @@ -0,0 +1,972 @@ +#!/usr/bin/env bash + +# build_bdrhap_reference 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Angela Oliveira Pisco (author) +# * Robrecht Cannoodt (author, maintainer) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="build_bdrhap_reference" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "build_bdrhap_reference 0.12.4" + echo "" + echo "Compile a reference into a STAR index compatible with the BD Rhapsody pipeline." + echo "" + echo "Arguments:" + echo " --genome_fasta" + echo " type: file, required parameter, file must exist" + echo " example: genome_sequence.fa.gz" + echo " Reference genome fasta." + echo "" + echo " --transcriptome_gtf" + echo " type: file, required parameter, file must exist" + echo " example: transcriptome_annotation.gtf.gz" + echo " Reference transcriptome annotation." + echo "" + echo " --output" + echo " type: file, required parameter, output, file must exist" + echo " example: star_index.tar.gz" + echo " Star index" +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM bdgenomics/rhapsody:1.10.1 + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y pigz && \ + rm -rf /var/lib/apt/lists/* + +LABEL org.opencontainers.image.authors="Angela Oliveira Pisco, Robrecht Cannoodt" +LABEL org.opencontainers.image.description="Companion container for running component reference build_bdrhap_reference" +LABEL org.opencontainers.image.created="2024-01-31T09:08:35Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-build_bdrhap_reference-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "build_bdrhap_reference 0.12.4" + exit + ;; + --genome_fasta) + [ -n "$VIASH_PAR_GENOME_FASTA" ] && ViashError Bad arguments for option \'--genome_fasta\': \'$VIASH_PAR_GENOME_FASTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENOME_FASTA="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --genome_fasta. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --genome_fasta=*) + [ -n "$VIASH_PAR_GENOME_FASTA" ] && ViashError Bad arguments for option \'--genome_fasta=*\': \'$VIASH_PAR_GENOME_FASTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENOME_FASTA=$(ViashRemoveFlags "$1") + shift 1 + ;; + --transcriptome_gtf) + [ -n "$VIASH_PAR_TRANSCRIPTOME_GTF" ] && ViashError Bad arguments for option \'--transcriptome_gtf\': \'$VIASH_PAR_TRANSCRIPTOME_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRANSCRIPTOME_GTF="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --transcriptome_gtf. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --transcriptome_gtf=*) + [ -n "$VIASH_PAR_TRANSCRIPTOME_GTF" ] && ViashError Bad arguments for option \'--transcriptome_gtf=*\': \'$VIASH_PAR_TRANSCRIPTOME_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRANSCRIPTOME_GTF=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/reference_build_bdrhap_reference:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/reference_build_bdrhap_reference:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/reference_build_bdrhap_reference:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/reference_build_bdrhap_reference:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_GENOME_FASTA+x} ]; then + ViashError '--genome_fasta' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_TRANSCRIPTOME_GTF+x} ]; then + ViashError '--transcriptome_gtf' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_GENOME_FASTA" ] && [ ! -e "$VIASH_PAR_GENOME_FASTA" ]; then + ViashError "Input file '$VIASH_PAR_GENOME_FASTA' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_TRANSCRIPTOME_GTF" ] && [ ! -e "$VIASH_PAR_TRANSCRIPTOME_GTF" ]; then + ViashError "Input file '$VIASH_PAR_TRANSCRIPTOME_GTF' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_GENOME_FASTA" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_GENOME_FASTA")" ) + VIASH_PAR_GENOME_FASTA=$(ViashAutodetectMount "$VIASH_PAR_GENOME_FASTA") +fi +if [ ! -z "$VIASH_PAR_TRANSCRIPTOME_GTF" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_TRANSCRIPTOME_GTF")" ) + VIASH_PAR_TRANSCRIPTOME_GTF=$(ViashAutodetectMount "$VIASH_PAR_TRANSCRIPTOME_GTF") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/reference_build_bdrhap_reference:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/reference_build_bdrhap_reference:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/reference_build_bdrhap_reference:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-build_bdrhap_reference-XXXXXX").sh +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +#!/bin/bash + +set -eo pipefail + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_GENOME_FASTA+x} ]; then echo "${VIASH_PAR_GENOME_FASTA}" | sed "s#'#'\"'\"'#g;s#.*#par_genome_fasta='&'#" ; else echo "# par_genome_fasta="; fi ) +$( if [ ! -z ${VIASH_PAR_TRANSCRIPTOME_GTF+x} ]; then echo "${VIASH_PAR_TRANSCRIPTOME_GTF}" | sed "s#'#'\"'\"'#g;s#.*#par_transcriptome_gtf='&'#" ; else echo "# par_transcriptome_gtf="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) + +## VIASH END + +# create temporary directory +tmpdir=\$(mktemp -d "$VIASH_TEMP/\$meta_functionality_name-XXXXXXXX") +function clean_up { + rm -rf "\$tmpdir" +} +trap clean_up EXIT + +meta_cpus="\${meta_cpus:-1}" + +# process params +extra_params=( ) + +if [ ! -z "\$meta_cpus" ]; then + extra_params+=( "--runThreadN \$meta_cpus" ) +fi + +echo "> Unzipping input files" +unpigz -c "\$par_genome_fasta" > "\$tmpdir/genome.fa" +unpigz -c "\$par_transcriptome_gtf" > "\$tmpdir/transcriptome.gtf" + +echo "> Building star index" +mkdir "\$tmpdir/out" +STAR \\ + --runMode genomeGenerate \\ + --genomeDir "\$tmpdir/out" \\ + --genomeFastaFiles "\$tmpdir/genome.fa" \\ + --sjdbGTFfile "\$tmpdir/transcriptome.gtf" \\ + --sjdbOverhang 100 \\ + --genomeSAindexNbases 11 \\ + "\${extra_params[@]}" + +echo "> Creating archive" +tar --use-compress-program="pigz -k " -cf "\$par_output" -C "\$tmpdir/out" . +VIASHMAIN +bash "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_GENOME_FASTA" ]; then + VIASH_PAR_GENOME_FASTA=$(ViashStripAutomount "$VIASH_PAR_GENOME_FASTA") +fi +if [ ! -z "$VIASH_PAR_TRANSCRIPTOME_GTF" ]; then + VIASH_PAR_TRANSCRIPTOME_GTF=$(ViashStripAutomount "$VIASH_PAR_TRANSCRIPTOME_GTF") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/reference/build_cellranger_reference/.config.vsh.yaml b/target/docker/reference/build_cellranger_reference/.config.vsh.yaml new file mode 100644 index 00000000000..01c85c3b5f3 --- /dev/null +++ b/target/docker/reference/build_cellranger_reference/.config.vsh.yaml @@ -0,0 +1,187 @@ +functionality: + name: "build_cellranger_reference" + namespace: "reference" + version: "0.12.4" + authors: + - name: "Angela Oliveira Pisco" + roles: + - "author" + info: + role: "Contributor" + links: + github: "aopisco" + orcid: "0000-0003-0142-2355" + linkedin: "aopisco" + organizations: + - name: "Insitro" + href: "https://insitro.com" + role: "Director of Computational Biology" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + - name: "Robrecht Cannoodt" + roles: + - "author" + - "maintainer" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + arguments: + - type: "file" + name: "--genome_fasta" + description: "Reference genome fasta." + info: null + example: + - "genome_sequence.fa.gz" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--transcriptome_gtf" + description: "Reference transcriptome annotation." + info: null + example: + - "transcriptome_annotation.gtf.gz" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + description: "Output folder" + info: null + example: + - "cellranger_reference" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "bash_script" + path: "script.sh" + is_executable: true + description: "Build a Cell Ranger-compatible reference folder from user-supplied\ + \ genome FASTA and gene GTF files. Creates a new folder named after the genome." + test_resources: + - type: "bash_script" + path: "run_test.sh" + is_executable: true + - type: "file" + path: "resources_test/reference_gencodev41_chr1" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "ghcr.io/data-intuitive/cellranger:7.0" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "pigz" + interactive: false + test_setup: + - type: "docker" + env: + - "GOPATH /root/go" + - "GOBIN /root/go/bin" + - "PATH \"${PATH}:/root/go/bin\"" + - type: "apt" + packages: + - "golang" + - "git" + interactive: false + - type: "docker" + run: + - "go install golang.org/dl/go1.20.6@latest && go1.20.6 download && \\\ngit clone\ + \ --branch v2.5.0 https://github.com/shenwei356/seqkit.git && \\\ncd seqkit/seqkit/\ + \ && go1.20.6 build && cp seqkit /usr/bin/ && cd ../../ && rm -rf seqkit\n" + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highmem" + - "highcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/reference/build_cellranger_reference/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/reference/build_cellranger_reference" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/reference/build_cellranger_reference/build_cellranger_reference" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/reference/build_cellranger_reference/build_cellranger_reference b/target/docker/reference/build_cellranger_reference/build_cellranger_reference new file mode 100755 index 00000000000..894c0d9411a --- /dev/null +++ b/target/docker/reference/build_cellranger_reference/build_cellranger_reference @@ -0,0 +1,977 @@ +#!/usr/bin/env bash + +# build_cellranger_reference 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Angela Oliveira Pisco (author) +# * Robrecht Cannoodt (author, maintainer) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="build_cellranger_reference" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "build_cellranger_reference 0.12.4" + echo "" + echo "Build a Cell Ranger-compatible reference folder from user-supplied genome FASTA" + echo "and gene GTF files. Creates a new folder named after the genome." + echo "" + echo "Arguments:" + echo " --genome_fasta" + echo " type: file, required parameter, file must exist" + echo " example: genome_sequence.fa.gz" + echo " Reference genome fasta." + echo "" + echo " --transcriptome_gtf" + echo " type: file, required parameter, file must exist" + echo " example: transcriptome_annotation.gtf.gz" + echo " Reference transcriptome annotation." + echo "" + echo " --output" + echo " type: file, required parameter, output, file must exist" + echo " example: cellranger_reference" + echo " Output folder" +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM ghcr.io/data-intuitive/cellranger:7.0 + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y pigz && \ + rm -rf /var/lib/apt/lists/* + +LABEL org.opencontainers.image.authors="Angela Oliveira Pisco, Robrecht Cannoodt" +LABEL org.opencontainers.image.description="Companion container for running component reference build_cellranger_reference" +LABEL org.opencontainers.image.created="2024-01-31T09:08:35Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-build_cellranger_reference-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "build_cellranger_reference 0.12.4" + exit + ;; + --genome_fasta) + [ -n "$VIASH_PAR_GENOME_FASTA" ] && ViashError Bad arguments for option \'--genome_fasta\': \'$VIASH_PAR_GENOME_FASTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENOME_FASTA="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --genome_fasta. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --genome_fasta=*) + [ -n "$VIASH_PAR_GENOME_FASTA" ] && ViashError Bad arguments for option \'--genome_fasta=*\': \'$VIASH_PAR_GENOME_FASTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENOME_FASTA=$(ViashRemoveFlags "$1") + shift 1 + ;; + --transcriptome_gtf) + [ -n "$VIASH_PAR_TRANSCRIPTOME_GTF" ] && ViashError Bad arguments for option \'--transcriptome_gtf\': \'$VIASH_PAR_TRANSCRIPTOME_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRANSCRIPTOME_GTF="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --transcriptome_gtf. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --transcriptome_gtf=*) + [ -n "$VIASH_PAR_TRANSCRIPTOME_GTF" ] && ViashError Bad arguments for option \'--transcriptome_gtf=*\': \'$VIASH_PAR_TRANSCRIPTOME_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRANSCRIPTOME_GTF=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/reference_build_cellranger_reference:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/reference_build_cellranger_reference:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/reference_build_cellranger_reference:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/reference_build_cellranger_reference:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_GENOME_FASTA+x} ]; then + ViashError '--genome_fasta' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_TRANSCRIPTOME_GTF+x} ]; then + ViashError '--transcriptome_gtf' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_GENOME_FASTA" ] && [ ! -e "$VIASH_PAR_GENOME_FASTA" ]; then + ViashError "Input file '$VIASH_PAR_GENOME_FASTA' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_TRANSCRIPTOME_GTF" ] && [ ! -e "$VIASH_PAR_TRANSCRIPTOME_GTF" ]; then + ViashError "Input file '$VIASH_PAR_TRANSCRIPTOME_GTF' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_GENOME_FASTA" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_GENOME_FASTA")" ) + VIASH_PAR_GENOME_FASTA=$(ViashAutodetectMount "$VIASH_PAR_GENOME_FASTA") +fi +if [ ! -z "$VIASH_PAR_TRANSCRIPTOME_GTF" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_TRANSCRIPTOME_GTF")" ) + VIASH_PAR_TRANSCRIPTOME_GTF=$(ViashAutodetectMount "$VIASH_PAR_TRANSCRIPTOME_GTF") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/reference_build_cellranger_reference:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/reference_build_cellranger_reference:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/reference_build_cellranger_reference:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-build_cellranger_reference-XXXXXX").sh +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +#!/bin/bash + +set -eo pipefail + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_GENOME_FASTA+x} ]; then echo "${VIASH_PAR_GENOME_FASTA}" | sed "s#'#'\"'\"'#g;s#.*#par_genome_fasta='&'#" ; else echo "# par_genome_fasta="; fi ) +$( if [ ! -z ${VIASH_PAR_TRANSCRIPTOME_GTF+x} ]; then echo "${VIASH_PAR_TRANSCRIPTOME_GTF}" | sed "s#'#'\"'\"'#g;s#.*#par_transcriptome_gtf='&'#" ; else echo "# par_transcriptome_gtf="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) + +## VIASH END + +# create temporary directory +tmpdir=\$(mktemp -d "$VIASH_TEMP/\$meta_functionality_name-XXXXXXXX") +function clean_up { + rm -rf "\$tmpdir" +} +trap clean_up EXIT + +# just to make sure +par_genome_fasta=\`realpath \$par_genome_fasta\` +par_transcriptome_gtf=\`realpath \$par_transcriptome_gtf\` +par_output=\`realpath \$par_output\` + +# process params +extra_params=( ) + +if [ ! -z "\$meta_cpus" ]; then + extra_params+=( "--nthreads=\$meta_cpus" ) +fi +if [ ! -z "\$meta_memory_gb" ]; then + # always keep 2gb for the OS itself + memory_gb=\`python -c "print(int('\$meta_memory_gb') - 2)"\` + extra_params+=( "--memgb=\$memory_gb" ) +fi + +echo "> Unzipping input files" +unpigz -c "\$par_genome_fasta" > "\$tmpdir/genome.fa" + +echo "> Building star index" +cd "\$tmpdir" +cellranger mkref \\ + --fasta "\$tmpdir/genome.fa" \\ + --genes "\$par_transcriptome_gtf" \\ + --genome output \\ + "\${extra_params[@]}" + +echo "> Creating archive" +tar --use-compress-program="pigz -k " -cf "\$par_output" -C "\$tmpdir/output" . +VIASHMAIN +bash "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_GENOME_FASTA" ]; then + VIASH_PAR_GENOME_FASTA=$(ViashStripAutomount "$VIASH_PAR_GENOME_FASTA") +fi +if [ ! -z "$VIASH_PAR_TRANSCRIPTOME_GTF" ]; then + VIASH_PAR_TRANSCRIPTOME_GTF=$(ViashStripAutomount "$VIASH_PAR_TRANSCRIPTOME_GTF") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/reference/make_reference/.config.vsh.yaml b/target/docker/reference/make_reference/.config.vsh.yaml new file mode 100644 index 00000000000..7efc898dde8 --- /dev/null +++ b/target/docker/reference/make_reference/.config.vsh.yaml @@ -0,0 +1,212 @@ +functionality: + name: "make_reference" + namespace: "reference" + version: "0.12.4" + authors: + - name: "Angela Oliveira Pisco" + roles: + - "author" + info: + role: "Contributor" + links: + github: "aopisco" + orcid: "0000-0003-0142-2355" + linkedin: "aopisco" + organizations: + - name: "Insitro" + href: "https://insitro.com" + role: "Director of Computational Biology" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + - name: "Robrecht Cannoodt" + roles: + - "author" + - "maintainer" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + arguments: + - type: "file" + name: "--genome_fasta" + description: "Reference genome fasta. Example: " + info: null + example: + - "genome_fasta.fa.gz" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--transcriptome_gtf" + description: "Reference transcriptome annotation." + info: null + example: + - "transcriptome.gtf.gz" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--ercc" + description: "ERCC sequence and annotation file." + info: null + example: + - "ercc.zip" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--subset_regex" + description: "Will subset the reference chromosomes using the given regex." + info: null + example: + - "(ERCC-00002|chr1)" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output_fasta" + description: "Output genome sequence fasta." + info: null + example: + - "genome_sequence.fa.gz" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output_gtf" + description: "Output transcriptome annotation gtf." + info: null + example: + - "transcriptome_annotation.gtf.gz" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "bash_script" + path: "script.sh" + is_executable: true + description: "Preprocess and build a transcriptome reference.\n\nExample input files\ + \ are:\n - `genome_fasta`: https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_41/GRCh38.primary_assembly.genome.fa.gz\n\ + \ - `transcriptome_gtf`: https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_41/gencode.v41.annotation.gtf.gz\n\ + \ - `ercc`: https://assets.thermofisher.com/TFS-Assets/LSG/manuals/ERCC92.zip\n" + test_resources: + - type: "bash_script" + path: "run_test.sh" + is_executable: true + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "pigz" + - "seqkit" + - "curl" + - "wget" + - "unzip" + interactive: false + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highmem" + - "highcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/reference/make_reference/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/reference/make_reference" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/reference/make_reference/make_reference" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/reference/make_reference/make_reference b/target/docker/reference/make_reference/make_reference new file mode 100755 index 00000000000..856d22f21fb --- /dev/null +++ b/target/docker/reference/make_reference/make_reference @@ -0,0 +1,1076 @@ +#!/usr/bin/env bash + +# make_reference 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Angela Oliveira Pisco (author) +# * Robrecht Cannoodt (author, maintainer) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="make_reference" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "make_reference 0.12.4" + echo "" + echo "Preprocess and build a transcriptome reference." + echo "" + echo "Example input files are:" + echo " - \`genome_fasta\`:" + echo "https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_41/GRCh38.primary_assembly.genome.fa.gz" + echo " - \`transcriptome_gtf\`:" + echo "https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_41/gencode.v41.annotation.gtf.gz" + echo " - \`ercc\`: https://assets.thermofisher.com/TFS-Assets/LSG/manuals/ERCC92.zip" + echo "" + echo "Arguments:" + echo " --genome_fasta" + echo " type: file, required parameter, file must exist" + echo " example: genome_fasta.fa.gz" + echo " Reference genome fasta. Example:" + echo "" + echo " --transcriptome_gtf" + echo " type: file, required parameter, file must exist" + echo " example: transcriptome.gtf.gz" + echo " Reference transcriptome annotation." + echo "" + echo " --ercc" + echo " type: file, file must exist" + echo " example: ercc.zip" + echo " ERCC sequence and annotation file." + echo "" + echo " --subset_regex" + echo " type: string" + echo " example: (ERCC-00002|chr1)" + echo " Will subset the reference chromosomes using the given regex." + echo "" + echo " --output_fasta" + echo " type: file, required parameter, output, file must exist" + echo " example: genome_sequence.fa.gz" + echo " Output genome sequence fasta." + echo "" + echo " --output_gtf" + echo " type: file, required parameter, output, file must exist" + echo " example: transcriptome_annotation.gtf.gz" + echo " Output transcriptome annotation gtf." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM ubuntu:22.04 + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y pigz seqkit curl wget unzip && \ + rm -rf /var/lib/apt/lists/* + +LABEL org.opencontainers.image.authors="Angela Oliveira Pisco, Robrecht Cannoodt" +LABEL org.opencontainers.image.description="Companion container for running component reference make_reference" +LABEL org.opencontainers.image.created="2024-01-31T09:08:35Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-make_reference-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "make_reference 0.12.4" + exit + ;; + --genome_fasta) + [ -n "$VIASH_PAR_GENOME_FASTA" ] && ViashError Bad arguments for option \'--genome_fasta\': \'$VIASH_PAR_GENOME_FASTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENOME_FASTA="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --genome_fasta. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --genome_fasta=*) + [ -n "$VIASH_PAR_GENOME_FASTA" ] && ViashError Bad arguments for option \'--genome_fasta=*\': \'$VIASH_PAR_GENOME_FASTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_GENOME_FASTA=$(ViashRemoveFlags "$1") + shift 1 + ;; + --transcriptome_gtf) + [ -n "$VIASH_PAR_TRANSCRIPTOME_GTF" ] && ViashError Bad arguments for option \'--transcriptome_gtf\': \'$VIASH_PAR_TRANSCRIPTOME_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRANSCRIPTOME_GTF="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --transcriptome_gtf. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --transcriptome_gtf=*) + [ -n "$VIASH_PAR_TRANSCRIPTOME_GTF" ] && ViashError Bad arguments for option \'--transcriptome_gtf=*\': \'$VIASH_PAR_TRANSCRIPTOME_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRANSCRIPTOME_GTF=$(ViashRemoveFlags "$1") + shift 1 + ;; + --ercc) + [ -n "$VIASH_PAR_ERCC" ] && ViashError Bad arguments for option \'--ercc\': \'$VIASH_PAR_ERCC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ERCC="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --ercc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --ercc=*) + [ -n "$VIASH_PAR_ERCC" ] && ViashError Bad arguments for option \'--ercc=*\': \'$VIASH_PAR_ERCC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ERCC=$(ViashRemoveFlags "$1") + shift 1 + ;; + --subset_regex) + [ -n "$VIASH_PAR_SUBSET_REGEX" ] && ViashError Bad arguments for option \'--subset_regex\': \'$VIASH_PAR_SUBSET_REGEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SUBSET_REGEX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --subset_regex. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --subset_regex=*) + [ -n "$VIASH_PAR_SUBSET_REGEX" ] && ViashError Bad arguments for option \'--subset_regex=*\': \'$VIASH_PAR_SUBSET_REGEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SUBSET_REGEX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_fasta) + [ -n "$VIASH_PAR_OUTPUT_FASTA" ] && ViashError Bad arguments for option \'--output_fasta\': \'$VIASH_PAR_OUTPUT_FASTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_FASTA="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_fasta. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_fasta=*) + [ -n "$VIASH_PAR_OUTPUT_FASTA" ] && ViashError Bad arguments for option \'--output_fasta=*\': \'$VIASH_PAR_OUTPUT_FASTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_FASTA=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_gtf) + [ -n "$VIASH_PAR_OUTPUT_GTF" ] && ViashError Bad arguments for option \'--output_gtf\': \'$VIASH_PAR_OUTPUT_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_GTF="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_gtf. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_gtf=*) + [ -n "$VIASH_PAR_OUTPUT_GTF" ] && ViashError Bad arguments for option \'--output_gtf=*\': \'$VIASH_PAR_OUTPUT_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_GTF=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/reference_make_reference:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/reference_make_reference:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/reference_make_reference:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/reference_make_reference:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_GENOME_FASTA+x} ]; then + ViashError '--genome_fasta' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_TRANSCRIPTOME_GTF+x} ]; then + ViashError '--transcriptome_gtf' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT_FASTA+x} ]; then + ViashError '--output_fasta' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT_GTF+x} ]; then + ViashError '--output_gtf' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_GENOME_FASTA" ] && [ ! -e "$VIASH_PAR_GENOME_FASTA" ]; then + ViashError "Input file '$VIASH_PAR_GENOME_FASTA' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_TRANSCRIPTOME_GTF" ] && [ ! -e "$VIASH_PAR_TRANSCRIPTOME_GTF" ]; then + ViashError "Input file '$VIASH_PAR_TRANSCRIPTOME_GTF' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_ERCC" ] && [ ! -e "$VIASH_PAR_ERCC" ]; then + ViashError "Input file '$VIASH_PAR_ERCC' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT_FASTA" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_FASTA")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_FASTA")" +fi +if [ ! -z "$VIASH_PAR_OUTPUT_GTF" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_GTF")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_GTF")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_GENOME_FASTA" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_GENOME_FASTA")" ) + VIASH_PAR_GENOME_FASTA=$(ViashAutodetectMount "$VIASH_PAR_GENOME_FASTA") +fi +if [ ! -z "$VIASH_PAR_TRANSCRIPTOME_GTF" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_TRANSCRIPTOME_GTF")" ) + VIASH_PAR_TRANSCRIPTOME_GTF=$(ViashAutodetectMount "$VIASH_PAR_TRANSCRIPTOME_GTF") +fi +if [ ! -z "$VIASH_PAR_ERCC" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_ERCC")" ) + VIASH_PAR_ERCC=$(ViashAutodetectMount "$VIASH_PAR_ERCC") +fi +if [ ! -z "$VIASH_PAR_OUTPUT_FASTA" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT_FASTA")" ) + VIASH_PAR_OUTPUT_FASTA=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT_FASTA") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_FASTA" ) +fi +if [ ! -z "$VIASH_PAR_OUTPUT_GTF" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT_GTF")" ) + VIASH_PAR_OUTPUT_GTF=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT_GTF") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_GTF" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/reference_make_reference:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/reference_make_reference:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/reference_make_reference:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-make_reference-XXXXXX").sh +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +#!/bin/bash + +set -eo pipefail + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_GENOME_FASTA+x} ]; then echo "${VIASH_PAR_GENOME_FASTA}" | sed "s#'#'\"'\"'#g;s#.*#par_genome_fasta='&'#" ; else echo "# par_genome_fasta="; fi ) +$( if [ ! -z ${VIASH_PAR_TRANSCRIPTOME_GTF+x} ]; then echo "${VIASH_PAR_TRANSCRIPTOME_GTF}" | sed "s#'#'\"'\"'#g;s#.*#par_transcriptome_gtf='&'#" ; else echo "# par_transcriptome_gtf="; fi ) +$( if [ ! -z ${VIASH_PAR_ERCC+x} ]; then echo "${VIASH_PAR_ERCC}" | sed "s#'#'\"'\"'#g;s#.*#par_ercc='&'#" ; else echo "# par_ercc="; fi ) +$( if [ ! -z ${VIASH_PAR_SUBSET_REGEX+x} ]; then echo "${VIASH_PAR_SUBSET_REGEX}" | sed "s#'#'\"'\"'#g;s#.*#par_subset_regex='&'#" ; else echo "# par_subset_regex="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_FASTA+x} ]; then echo "${VIASH_PAR_OUTPUT_FASTA}" | sed "s#'#'\"'\"'#g;s#.*#par_output_fasta='&'#" ; else echo "# par_output_fasta="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_GTF+x} ]; then echo "${VIASH_PAR_OUTPUT_GTF}" | sed "s#'#'\"'\"'#g;s#.*#par_output_gtf='&'#" ; else echo "# par_output_gtf="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) + +## VIASH END + +# create temporary directory +tmpdir=\$(mktemp -d "$VIASH_TEMP/\$meta_functionality_name-XXXXXXXX") +function clean_up { + rm -rf "\$tmpdir" +} +trap clean_up EXIT + +echo "> Processing genome sequence" +genome_fasta="\$tmpdir/genome_sequence.fa" +# curl "\$par_genome_fasta" | gunzip > "\$genome_fasta" +gunzip -c "\$par_genome_fasta" > "\$genome_fasta" + +echo "> Processing transcriptome annotation" +transcriptome_gtf="\$tmpdir/transcriptome_annotation.gtf" +# curl "\$par_transcriptome_gtf" | gunzip > "\$transcriptome_gtf" +gunzip -c "\$par_transcriptome_gtf"> "\$transcriptome_gtf" + +if [[ ! -z \$par_ercc ]]; then + echo "> Processing ERCC sequences" + # wget "\$par_ercc" -O "\$tmpdir/ercc.zip" + # unzip "\$tmpdir/ercc.zip" -d "\$tmpdir" + unzip "\$par_ercc" -d "\$tmpdir" + cat "\$tmpdir/ERCC92.fa" >> "\$genome_fasta" + cat "\$tmpdir/ERCC92.gtf" >> "\$transcriptome_gtf" +fi + +# create output & filter reference if so desired +if [[ ! -z \$par_subset_regex ]]; then + echo "> Subsetting reference with regex '\$par_subset_regex'" + awk '{print \$1}' "\$genome_fasta" | seqkit grep -r -p "^\$par_subset_regex\\\$" > "\$tmpdir/genome_sequence_filtered.fa" + genome_fasta="\$tmpdir/genome_sequence_filtered.fa" + grep -E "^\$par_subset_regex[^A-Za-z0-9]" "\$transcriptome_gtf" > "\$tmpdir/transcriptome_annotation_filtered.gtf" + transcriptome_gtf="\$tmpdir/transcriptome_annotation_filtered.gtf" + + echo + echo "Matched tags:" + cat "\$genome_fasta" | grep '^>' | sed 's#^>##' | sed 's# .*##' | sort | uniq + echo +fi + +echo "> Gzipping outputs" +pigz -c "\$genome_fasta" > "\$par_output_fasta" +pigz -c "\$transcriptome_gtf" > "\$par_output_gtf" + +# to do: re enable +# echo "> Sanity check of outputs" +# readarray -t fasta_tags < <( cat "\$genome_fasta" | grep '^>' | sed 's#^>##' | sed 's# .*##' | sort | uniq ) +# readarray -t transcriptome_tags < <( cat "\$transcriptome_gtf" | cut -d\$'\\t' -f1 | sort | uniq | grep '^[^#]' ) +# [ "\${fasta_tags[*]}" == "\${transcriptome_tags[*]}" ] || { echo "Warning: fasta tags differ from transcriptome tags"; exit 1; } +VIASHMAIN +bash "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_GENOME_FASTA" ]; then + VIASH_PAR_GENOME_FASTA=$(ViashStripAutomount "$VIASH_PAR_GENOME_FASTA") +fi +if [ ! -z "$VIASH_PAR_TRANSCRIPTOME_GTF" ]; then + VIASH_PAR_TRANSCRIPTOME_GTF=$(ViashStripAutomount "$VIASH_PAR_TRANSCRIPTOME_GTF") +fi +if [ ! -z "$VIASH_PAR_ERCC" ]; then + VIASH_PAR_ERCC=$(ViashStripAutomount "$VIASH_PAR_ERCC") +fi +if [ ! -z "$VIASH_PAR_OUTPUT_FASTA" ]; then + VIASH_PAR_OUTPUT_FASTA=$(ViashStripAutomount "$VIASH_PAR_OUTPUT_FASTA") +fi +if [ ! -z "$VIASH_PAR_OUTPUT_GTF" ]; then + VIASH_PAR_OUTPUT_GTF=$(ViashStripAutomount "$VIASH_PAR_OUTPUT_GTF") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT_FASTA" ] && [ ! -e "$VIASH_PAR_OUTPUT_FASTA" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT_FASTA' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_OUTPUT_GTF" ] && [ ! -e "$VIASH_PAR_OUTPUT_GTF" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT_GTF' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/report/mermaid/.config.vsh.yaml b/target/docker/report/mermaid/.config.vsh.yaml new file mode 100644 index 00000000000..41d1813b2c0 --- /dev/null +++ b/target/docker/report/mermaid/.config.vsh.yaml @@ -0,0 +1,185 @@ +functionality: + name: "mermaid" + namespace: "report" + version: "0.12.4" + authors: + - name: "Dries De Maeyer" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "ddemaeyer@gmail.com" + github: "ddemaeyer" + linkedin: "dries-de-maeyer-b46a814" + organizations: + - name: "Janssen Pharmaceuticals" + href: "https://www.janssen.com" + role: "Principal Scientist" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input directory" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Generated network as output." + info: null + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_format" + description: "Output format for the generated image. By default will be inferred\ + \ from the extension \nof the file specified with --output.\n" + info: null + required: false + choices: + - "svg" + - "png" + - "pdf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--width" + description: "Width of the page" + info: null + default: + - 800 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--height" + description: "Height of the page" + info: null + default: + - 600 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--background_color" + description: "Background color for pngs/svgs (not pdfs)" + info: null + example: + - "#F0F0F0" + default: + - "white" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "bash_script" + path: "script.sh" + is_executable: true + - type: "file" + path: "./puppeteer-config.json" + description: "Generates a network from mermaid code.\n" + test_resources: + - type: "bash_script" + path: "test.sh" + is_executable: true + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "node:20-bullseye" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "javascript" + npm: + - "@mermaid-js/mermaid-cli" + - type: "apt" + packages: + - "chromium" + interactive: false + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/report/mermaid/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/report/mermaid" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/report/mermaid/mermaid" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/report/mermaid/mermaid b/target/docker/report/mermaid/mermaid new file mode 100755 index 00000000000..5b031628abf --- /dev/null +++ b/target/docker/report/mermaid/mermaid @@ -0,0 +1,1029 @@ +#!/usr/bin/env bash + +# mermaid 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Dries De Maeyer (maintainer) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="mermaid" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "mermaid 0.12.4" + echo "" + echo "Generates a network from mermaid code." + echo "" + echo "Arguments:" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " Input directory" + echo "" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " Generated network as output." + echo "" + echo " --output_format" + echo " type: string" + echo " choices: [ svg, png, pdf ]" + echo " Output format for the generated image. By default will be inferred from" + echo " the extension" + echo " of the file specified with --output." + echo "" + echo " --width" + echo " type: integer" + echo " default: 800" + echo " Width of the page" + echo "" + echo " --height" + echo " type: integer" + echo " default: 600" + echo " Height of the page" + echo "" + echo " --background_color" + echo " type: string" + echo " default: white" + echo " example: #F0F0F0" + echo " Background color for pngs/svgs (not pdfs)" +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM node:20-bullseye + +ENTRYPOINT [] + + +RUN npm install -g "@mermaid-js/mermaid-cli" + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y chromium && \ + rm -rf /var/lib/apt/lists/* + +LABEL org.opencontainers.image.authors="Dries De Maeyer" +LABEL org.opencontainers.image.description="Companion container for running component report mermaid" +LABEL org.opencontainers.image.created="2024-01-31T09:08:32Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-mermaid-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "mermaid 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_format) + [ -n "$VIASH_PAR_OUTPUT_FORMAT" ] && ViashError Bad arguments for option \'--output_format\': \'$VIASH_PAR_OUTPUT_FORMAT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_FORMAT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_format. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_format=*) + [ -n "$VIASH_PAR_OUTPUT_FORMAT" ] && ViashError Bad arguments for option \'--output_format=*\': \'$VIASH_PAR_OUTPUT_FORMAT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_FORMAT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --width) + [ -n "$VIASH_PAR_WIDTH" ] && ViashError Bad arguments for option \'--width\': \'$VIASH_PAR_WIDTH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WIDTH="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --width. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --width=*) + [ -n "$VIASH_PAR_WIDTH" ] && ViashError Bad arguments for option \'--width=*\': \'$VIASH_PAR_WIDTH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WIDTH=$(ViashRemoveFlags "$1") + shift 1 + ;; + --height) + [ -n "$VIASH_PAR_HEIGHT" ] && ViashError Bad arguments for option \'--height\': \'$VIASH_PAR_HEIGHT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_HEIGHT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --height. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --height=*) + [ -n "$VIASH_PAR_HEIGHT" ] && ViashError Bad arguments for option \'--height=*\': \'$VIASH_PAR_HEIGHT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_HEIGHT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --background_color) + [ -n "$VIASH_PAR_BACKGROUND_COLOR" ] && ViashError Bad arguments for option \'--background_color\': \'$VIASH_PAR_BACKGROUND_COLOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BACKGROUND_COLOR="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --background_color. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --background_color=*) + [ -n "$VIASH_PAR_BACKGROUND_COLOR" ] && ViashError Bad arguments for option \'--background_color=*\': \'$VIASH_PAR_BACKGROUND_COLOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BACKGROUND_COLOR=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/report_mermaid:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/report_mermaid:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/report_mermaid:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/report_mermaid:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_WIDTH+x} ]; then + VIASH_PAR_WIDTH="800" +fi +if [ -z ${VIASH_PAR_HEIGHT+x} ]; then + VIASH_PAR_HEIGHT="600" +fi +if [ -z ${VIASH_PAR_BACKGROUND_COLOR+x} ]; then + VIASH_PAR_BACKGROUND_COLOR="white" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_WIDTH" ]]; then + if ! [[ "$VIASH_PAR_WIDTH" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--width' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_HEIGHT" ]]; then + if ! [[ "$VIASH_PAR_HEIGHT" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--height' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_OUTPUT_FORMAT" ]; then + VIASH_PAR_OUTPUT_FORMAT_CHOICES=("svg:png:pdf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_FORMAT_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_FORMAT:" ]]; then + ViashError '--output_format' specified value of \'$VIASH_PAR_OUTPUT_FORMAT\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/report_mermaid:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/report_mermaid:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/report_mermaid:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-mermaid-XXXXXX").sh +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_FORMAT+x} ]; then echo "${VIASH_PAR_OUTPUT_FORMAT}" | sed "s#'#'\"'\"'#g;s#.*#par_output_format='&'#" ; else echo "# par_output_format="; fi ) +$( if [ ! -z ${VIASH_PAR_WIDTH+x} ]; then echo "${VIASH_PAR_WIDTH}" | sed "s#'#'\"'\"'#g;s#.*#par_width='&'#" ; else echo "# par_width="; fi ) +$( if [ ! -z ${VIASH_PAR_HEIGHT+x} ]; then echo "${VIASH_PAR_HEIGHT}" | sed "s#'#'\"'\"'#g;s#.*#par_height='&'#" ; else echo "# par_height="; fi ) +$( if [ ! -z ${VIASH_PAR_BACKGROUND_COLOR+x} ]; then echo "${VIASH_PAR_BACKGROUND_COLOR}" | sed "s#'#'\"'\"'#g;s#.*#par_background_color='&'#" ; else echo "# par_background_color="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) + +## VIASH END +#!/bin/bash + +mmdc -p "\$meta_resources_dir/puppeteer-config.json" \\ + -i "\$par_input" \\ + -o "\$par_output" \\ + --width "\$par_width" \\ + --height "\$par_height" \\ + \${par_background_color:+--backgroundColor \$par_background_color} \\ + \${output_format:+--outputFormat \$par_output_format} +VIASHMAIN +bash "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/report/mermaid/puppeteer-config.json b/target/docker/report/mermaid/puppeteer-config.json new file mode 100644 index 00000000000..7b2851c2995 --- /dev/null +++ b/target/docker/report/mermaid/puppeteer-config.json @@ -0,0 +1,6 @@ +{ + "executablePath": "/usr/bin/chromium", + "args": [ + "--no-sandbox" + ] +} \ No newline at end of file diff --git a/target/docker/transfer/publish/.config.vsh.yaml b/target/docker/transfer/publish/.config.vsh.yaml new file mode 100644 index 00000000000..5c781ff2f79 --- /dev/null +++ b/target/docker/transfer/publish/.config.vsh.yaml @@ -0,0 +1,125 @@ +functionality: + name: "publish" + namespace: "transfer" + version: "0.12.4" + authors: + - name: "Toni Verbeiren" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + github: "tverbeiren" + linkedin: "verbeiren" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist and CEO" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input filename" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output filename" + info: null + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "bash_script" + path: "script.sh" + is_executable: true + description: "Publish an artifact and optionally rename with parameters" + test_resources: + - type: "bash_script" + path: "run_test.sh" + is_executable: true + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/transfer/publish/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/transfer/publish" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/transfer/publish/publish" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/transfer/publish/publish b/target/docker/transfer/publish/publish new file mode 100755 index 00000000000..6abf7e1016f --- /dev/null +++ b/target/docker/transfer/publish/publish @@ -0,0 +1,919 @@ +#!/usr/bin/env bash + +# publish 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Toni Verbeiren (maintainer) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="publish" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "publish 0.12.4" + echo "" + echo "Publish an artifact and optionally rename with parameters" + echo "" + echo "Arguments:" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " Input filename" + echo "" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " Output filename" +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM ubuntu:22.04 + +ENTRYPOINT [] + + +RUN : +LABEL org.opencontainers.image.authors="Toni Verbeiren" +LABEL org.opencontainers.image.description="Companion container for running component transfer publish" +LABEL org.opencontainers.image.created="2024-01-31T09:08:33Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-publish-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "publish 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/transfer_publish:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/transfer_publish:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/transfer_publish:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/transfer_publish:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/transfer_publish:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/transfer_publish:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/transfer_publish:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-publish-XXXXXX").sh +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +#!/bin/bash + +set -eo pipefail + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) + +## VIASH END + +parent=\`dirname "\$par_output"\` +if [[ ! -d "\$parent" ]]; then + mkdir -p "\$parent" +fi + +cp -r "\$par_input" "\$par_output" +VIASHMAIN +bash "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/transform/clr/.config.vsh.yaml b/target/docker/transform/clr/.config.vsh.yaml new file mode 100644 index 00000000000..ef4880b8baa --- /dev/null +++ b/target/docker/transform/clr/.config.vsh.yaml @@ -0,0 +1,188 @@ +functionality: + name: "clr" + namespace: "transform" + version: "0.12.4" + authors: + - name: "Dries Schaumont" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input h5mu file" + info: null + example: + - "input.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + default: + - "prot" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output h5mu file." + info: null + default: + - "output.h5mu" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_layer" + description: "Output layer to use. By default, use X." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + description: "Perform CLR normalization on CITE-seq data (Stoeckius et al., 2017).\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "scanpy~=1.9.5" + - "muon~=0.1.5" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "lowmem" + - "midcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/transform/clr/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/transform/clr" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/transform/clr/clr" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/transform/clr/clr b/target/docker/transform/clr/clr new file mode 100755 index 00000000000..02405a94a19 --- /dev/null +++ b/target/docker/transform/clr/clr @@ -0,0 +1,1005 @@ +#!/usr/bin/env bash + +# clr 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Dries Schaumont (maintainer) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="clr" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "clr 0.12.4" + echo "" + echo "Perform CLR normalization on CITE-seq data (Stoeckius et al., 2017)." + echo "" + echo "Arguments:" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " example: input.h5mu" + echo " Input h5mu file" + echo "" + echo " --modality" + echo " type: string" + echo " default: prot" + echo "" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " default: output.h5mu" + echo " Output h5mu file." + echo "" + echo " --output_compression" + echo " type: string" + echo " example: gzip" + echo " choices: [ gzip, lzf ]" + echo " The compression format to be used on the output h5mu object." + echo "" + echo " --output_layer" + echo " type: string" + echo " Output layer to use. By default, use X." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM python:3.10-slim + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "scanpy~=1.9.5" "muon~=0.1.5" + +LABEL org.opencontainers.image.authors="Dries Schaumont" +LABEL org.opencontainers.image.description="Companion container for running component transform clr" +LABEL org.opencontainers.image.created="2024-01-31T09:08:34Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-clr-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "clr 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality=*) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_layer) + [ -n "$VIASH_PAR_OUTPUT_LAYER" ] && ViashError Bad arguments for option \'--output_layer\': \'$VIASH_PAR_OUTPUT_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_LAYER="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_layer. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_layer=*) + [ -n "$VIASH_PAR_OUTPUT_LAYER" ] && ViashError Bad arguments for option \'--output_layer=*\': \'$VIASH_PAR_OUTPUT_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_LAYER=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/transform_clr:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/transform_clr:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/transform_clr:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/transform_clr:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_MODALITY+x} ]; then + VIASH_PAR_MODALITY="prot" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/transform_clr:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/transform_clr:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/transform_clr:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-clr-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +from muon import prot as pt +from mudata import read_h5mu + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_layer': $( if [ ! -z ${VIASH_PAR_OUTPUT_LAYER+x} ]; then echo "r'${VIASH_PAR_OUTPUT_LAYER//\'/\'\"\'\"r\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +## VIASH END + + +def main(): + input_h5mu = read_h5mu(par['input']) + modality = input_h5mu[par['modality']] + normalized_counts = pt.pp.clr(modality, inplace=False if par['output_layer'] else True) + if par['output_layer'] and not normalized_counts: + raise RuntimeError("CLR failed to return the requested output layer") + if normalized_counts: + input_h5mu[par["modality"]].layers[par['output_layer']] = normalized_counts.X + input_h5mu.write_h5mu(par['output'], compression=par["output_compression"]) + +if __name__ == "__main__": + main() +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/transform/delete_layer/.config.vsh.yaml b/target/docker/transform/delete_layer/.config.vsh.yaml new file mode 100644 index 00000000000..ae03b21b777 --- /dev/null +++ b/target/docker/transform/delete_layer/.config.vsh.yaml @@ -0,0 +1,196 @@ +functionality: + name: "delete_layer" + namespace: "transform" + version: "0.12.4" + authors: + - name: "Dries Schaumont" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input h5mu file" + info: null + example: + - "input.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--layer" + description: "Input layer to remove" + info: null + required: true + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output h5mu file." + info: null + default: + - "output.h5mu" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--missing_ok" + description: "Do not raise an error if the layer does not exist for all modalities." + info: null + direction: "input" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/compress_h5mu.py" + - type: "file" + path: "src/utils/setup_logger.py" + description: "Delete an anndata layer from one or more modalities.\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.9-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "midmem" + - "singlecpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/transform/delete_layer/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/transform/delete_layer" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/transform/delete_layer/delete_layer" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/transform/delete_layer/compress_h5mu.py b/target/docker/transform/delete_layer/compress_h5mu.py new file mode 100644 index 00000000000..9d92395a573 --- /dev/null +++ b/target/docker/transform/delete_layer/compress_h5mu.py @@ -0,0 +1,49 @@ +from h5py import File as H5File +from h5py import Group, Dataset +from pathlib import Path +from typing import Union, Literal +from functools import partial + + +def compress_h5mu(input_path: Union[str, Path], + output_path: Union[str, Path], + compression: Union[Literal['gzip'], Literal['lzf']]): + input_path, output_path = str(input_path), str(output_path) + + def copy_attributes(in_object, out_object): + for key, value in in_object.attrs.items(): + out_object.attrs[key] = value + + def visit_path(output_h5: H5File, + compression: Union[Literal['gzip'], Literal['lzf']], + name: str, object: Union[Group, Dataset]): + if isinstance(object, Group): + new_group = output_h5.create_group(name) + copy_attributes(object, new_group) + elif isinstance(object, Dataset): + # Compression only works for non-scalar Dataset objects + # Scalar objects dont have a shape defined + if not object.compression and object.shape not in [None, ()]: + new_dataset = output_h5.create_dataset(name, data=object, compression=compression) + copy_attributes(object, new_dataset) + else: + output_h5.copy(object, name) + else: + raise NotImplementedError(f"Could not copy element {name}, " + f"type has not been implemented yet: {type(object)}") + + with H5File(input_path, 'r') as input_h5, H5File(output_path, 'w', userblock_size=512) as output_h5: + copy_attributes(input_h5, output_h5) + input_h5.visititems(partial(visit_path, output_h5, compression)) + + with open(input_path, "rb") as input_bytes: + # Mudata puts metadata like this in the first 512 bytes: + # MuData (format-version=0.1.0;creator=muon;creator-version=0.2.0) + # See mudata/_core/io.py, read_h5mu() function + starting_metadata = input_bytes.read(100) + # The metadata is padded with extra null bytes up until 512 bytes + truncate_location = starting_metadata.find(b"\x00") + starting_metadata = starting_metadata[:truncate_location] + with open(output_path, "br+") as f: + nbytes = f.write(starting_metadata) + f.write(b"\0" * (512 - nbytes)) diff --git a/target/docker/transform/delete_layer/delete_layer b/target/docker/transform/delete_layer/delete_layer new file mode 100755 index 00000000000..77aeb3f0801 --- /dev/null +++ b/target/docker/transform/delete_layer/delete_layer @@ -0,0 +1,1122 @@ +#!/usr/bin/env bash + +# delete_layer 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Dries Schaumont (maintainer) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="delete_layer" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "delete_layer 0.12.4" + echo "" + echo "Delete an anndata layer from one or more modalities." + echo "" + echo "Arguments:" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " example: input.h5mu" + echo " Input h5mu file" + echo "" + echo " --modality" + echo " type: string" + echo " default: rna" + echo "" + echo " --layer" + echo " type: string, required parameter, multiple values allowed" + echo " Input layer to remove" + echo "" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " default: output.h5mu" + echo " Output h5mu file." + echo "" + echo " --output_compression" + echo " type: string" + echo " example: gzip" + echo " choices: [ gzip, lzf ]" + echo " The compression format to be used on the output h5mu object." + echo "" + echo " --missing_ok" + echo " type: boolean_true" + echo " Do not raise an error if the layer does not exist for all modalities." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM python:3.9-slim + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" + +LABEL org.opencontainers.image.authors="Dries Schaumont" +LABEL org.opencontainers.image.description="Companion container for running component transform delete_layer" +LABEL org.opencontainers.image.created="2024-01-31T09:08:36Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-delete_layer-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "delete_layer 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality=*) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --layer) + if [ -z "$VIASH_PAR_LAYER" ]; then + VIASH_PAR_LAYER="$2" + else + VIASH_PAR_LAYER="$VIASH_PAR_LAYER:""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --layer. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --layer=*) + if [ -z "$VIASH_PAR_LAYER" ]; then + VIASH_PAR_LAYER=$(ViashRemoveFlags "$1") + else + VIASH_PAR_LAYER="$VIASH_PAR_LAYER:"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --missing_ok) + [ -n "$VIASH_PAR_MISSING_OK" ] && ViashError Bad arguments for option \'--missing_ok\': \'$VIASH_PAR_MISSING_OK\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MISSING_OK=true + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/transform_delete_layer:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/transform_delete_layer:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/transform_delete_layer:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/transform_delete_layer:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_LAYER+x} ]; then + ViashError '--layer' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_MODALITY+x} ]; then + VIASH_PAR_MODALITY="rna" +fi +if [ -z ${VIASH_PAR_MISSING_OK+x} ]; then + VIASH_PAR_MISSING_OK="false" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_MISSING_OK" ]]; then + if ! [[ "$VIASH_PAR_MISSING_OK" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--missing_ok' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/transform_delete_layer:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/transform_delete_layer:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/transform_delete_layer:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-delete_layer-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +import sys +from mudata import read_h5ad, write_h5ad +import shutil +from pathlib import Path + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'layer': $( if [ ! -z ${VIASH_PAR_LAYER+x} ]; then echo "r'${VIASH_PAR_LAYER//\'/\'\"\'\"r\'}'.split(':')"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'missing_ok': $( if [ ! -z ${VIASH_PAR_MISSING_OK+x} ]; then echo "r'${VIASH_PAR_MISSING_OK//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +## VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +# START TEMPORARY WORKAROUND compress_h5mu +# reason: resources aren't available when using Nextflow fusion +# from compress_h5mu import compress_h5mu +from h5py import File as H5File +from h5py import Group, Dataset +from pathlib import Path +from typing import Union, Literal +from functools import partial + + +def compress_h5mu(input_path: Union[str, Path], + output_path: Union[str, Path], + compression: Union[Literal['gzip'], Literal['lzf']]): + input_path, output_path = str(input_path), str(output_path) + + def copy_attributes(in_object, out_object): + for key, value in in_object.attrs.items(): + out_object.attrs[key] = value + + def visit_path(output_h5: H5File, + compression: Union[Literal['gzip'], Literal['lzf']], + name: str, object: Union[Group, Dataset]): + if isinstance(object, Group): + new_group = output_h5.create_group(name) + copy_attributes(object, new_group) + elif isinstance(object, Dataset): + # Compression only works for non-scalar Dataset objects + # Scalar objects dont have a shape defined + if not object.compression and object.shape not in [None, ()]: + new_dataset = output_h5.create_dataset(name, data=object, compression=compression) + copy_attributes(object, new_dataset) + else: + output_h5.copy(object, name) + else: + raise NotImplementedError(f"Could not copy element {name}, " + f"type has not been implemented yet: {type(object)}") + + with H5File(input_path, 'r') as input_h5, H5File(output_path, 'w', userblock_size=512) as output_h5: + copy_attributes(input_h5, output_h5) + input_h5.visititems(partial(visit_path, output_h5, compression)) + + with open(input_path, "rb") as input_bytes: + # Mudata puts metadata like this in the first 512 bytes: + # MuData (format-version=0.1.0;creator=muon;creator-version=0.2.0) + # See mudata/_core/io.py, read_h5mu() function + starting_metadata = input_bytes.read(100) + # The metadata is padded with extra null bytes up until 512 bytes + truncate_location = starting_metadata.find(b"\\x00") + starting_metadata = starting_metadata[:truncate_location] + with open(output_path, "br+") as f: + nbytes = f.write(starting_metadata) + f.write(b"\\0" * (512 - nbytes)) +# END TEMPORARY WORKAROUND compress_h5mu + +def main(): + input_file, output_file, mod_name = Path(par["input"]), Path(par["output"]), par['modality'] + + logger.info('Reading input file %s, modality %s.', input_file, mod_name) + mod = read_h5ad(input_file, mod=mod_name) + for layer in par['layer']: + if layer not in mod.layers: + if par['missing_ok']: + continue + raise ValueError(f"Layer '{layer}' is not present in modality {mod_name}.") + logger.info('Deleting layer %s from modality %s.', layer, mod_name) + del mod.layers[layer] + + logger.info('Writing output to %s.', par['output']) + output_file_uncompressed = output_file.with_name(output_file.stem + "_uncompressed.h5mu") \\ + if par["output_compression"] else output_file + shutil.copyfile(par['input'], output_file_uncompressed) + write_h5ad(filename=output_file_uncompressed, mod=mod_name, data=mod) + if par["output_compression"]: + compress_h5mu(output_file_uncompressed, output_file, compression=par["output_compression"]) + output_file_uncompressed.unlink() + + logger.info('Finished.') + +if __name__ == "__main__": + main() +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/transform/delete_layer/setup_logger.py b/target/docker/transform/delete_layer/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/docker/transform/delete_layer/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/docker/transform/log1p/.config.vsh.yaml b/target/docker/transform/log1p/.config.vsh.yaml new file mode 100644 index 00000000000..a238fff4438 --- /dev/null +++ b/target/docker/transform/log1p/.config.vsh.yaml @@ -0,0 +1,225 @@ +functionality: + name: "log1p" + namespace: "transform" + version: "0.12.4" + authors: + - name: "Dries De Maeyer" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "ddemaeyer@gmail.com" + github: "ddemaeyer" + linkedin: "dries-de-maeyer-b46a814" + organizations: + - name: "Janssen Pharmaceuticals" + href: "https://www.janssen.com" + role: "Principal Scientist" + - name: "Robrecht Cannoodt" + roles: + - "contributor" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input h5mu file" + info: null + example: + - "input.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--input_layer" + description: "Input layer to use. If None, X is normalized" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_layer" + description: "Output layer to use. By default, use X." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output h5mu file." + info: null + default: + - "output.h5mu" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--base" + info: null + example: + - 2.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Logarithmize the data matrix. Computes X = log(X + 1), where log denotes\ + \ the natural logarithm unless a different base is given.\n" + test_resources: + - type: "python_script" + path: "run_test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.9-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "scanpy~=1.9.5" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "midmem" + - "lowcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/transform/log1p/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/transform/log1p" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/transform/log1p/log1p" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/transform/log1p/log1p b/target/docker/transform/log1p/log1p new file mode 100755 index 00000000000..0ef73146337 --- /dev/null +++ b/target/docker/transform/log1p/log1p @@ -0,0 +1,1081 @@ +#!/usr/bin/env bash + +# log1p 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Dries De Maeyer (maintainer) +# * Robrecht Cannoodt (contributor) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="log1p" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "log1p 0.12.4" + echo "" + echo "Logarithmize the data matrix. Computes X = log(X + 1), where log denotes the" + echo "natural logarithm unless a different base is given." + echo "" + echo "Arguments:" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " example: input.h5mu" + echo " Input h5mu file" + echo "" + echo " --modality" + echo " type: string" + echo " default: rna" + echo "" + echo " --input_layer" + echo " type: string" + echo " Input layer to use. If None, X is normalized" + echo "" + echo " --output_layer" + echo " type: string" + echo " Output layer to use. By default, use X." + echo "" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " default: output.h5mu" + echo " Output h5mu file." + echo "" + echo " --output_compression" + echo " type: string" + echo " example: gzip" + echo " choices: [ gzip, lzf ]" + echo " The compression format to be used on the output h5mu object." + echo "" + echo " --base" + echo " type: double" + echo " example: 2.0" +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM python:3.9-slim + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "scanpy~=1.9.5" + +LABEL org.opencontainers.image.authors="Dries De Maeyer, Robrecht Cannoodt" +LABEL org.opencontainers.image.description="Companion container for running component transform log1p" +LABEL org.opencontainers.image.created="2024-01-31T09:08:36Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-log1p-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "log1p 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality=*) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --input_layer) + [ -n "$VIASH_PAR_INPUT_LAYER" ] && ViashError Bad arguments for option \'--input_layer\': \'$VIASH_PAR_INPUT_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_LAYER="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_layer. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input_layer=*) + [ -n "$VIASH_PAR_INPUT_LAYER" ] && ViashError Bad arguments for option \'--input_layer=*\': \'$VIASH_PAR_INPUT_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_LAYER=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_layer) + [ -n "$VIASH_PAR_OUTPUT_LAYER" ] && ViashError Bad arguments for option \'--output_layer\': \'$VIASH_PAR_OUTPUT_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_LAYER="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_layer. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_layer=*) + [ -n "$VIASH_PAR_OUTPUT_LAYER" ] && ViashError Bad arguments for option \'--output_layer=*\': \'$VIASH_PAR_OUTPUT_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_LAYER=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --base) + [ -n "$VIASH_PAR_BASE" ] && ViashError Bad arguments for option \'--base\': \'$VIASH_PAR_BASE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BASE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --base. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --base=*) + [ -n "$VIASH_PAR_BASE" ] && ViashError Bad arguments for option \'--base=*\': \'$VIASH_PAR_BASE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BASE=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/transform_log1p:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/transform_log1p:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/transform_log1p:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/transform_log1p:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_MODALITY+x} ]; then + VIASH_PAR_MODALITY="rna" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_BASE" ]]; then + if ! [[ "$VIASH_PAR_BASE" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--base' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/transform_log1p:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/transform_log1p:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/transform_log1p:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-log1p-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +import scanpy as sc +import mudata as mu +import anndata as ad +import sys + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'input_layer': $( if [ ! -z ${VIASH_PAR_INPUT_LAYER+x} ]; then echo "r'${VIASH_PAR_INPUT_LAYER//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_layer': $( if [ ! -z ${VIASH_PAR_OUTPUT_LAYER+x} ]; then echo "r'${VIASH_PAR_OUTPUT_LAYER//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'base': $( if [ ! -z ${VIASH_PAR_BASE+x} ]; then echo "float(r'${VIASH_PAR_BASE//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +## VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +logger.info("Reading input mudata") +mdata = mu.read_h5mu(par["input"]) +mdata.var_names_make_unique() + +mod = par["modality"] +logger.info("Performing log transformation on modality %s", mod) +data = mdata.mod[mod] + +# Make our own copy with not a lot of data +# this avoid excessive memory usage and accidental overwrites +input_layer = data.layers[par["input_layer"]] \\ + if par["input_layer"] else data.X +data_for_scanpy = ad.AnnData(X=input_layer.copy()) +sc.pp.log1p(data_for_scanpy, + base=par["base"], + layer=None, # use X + copy=False) # allow overwrites in the copy that was made + +# Scanpy will overwrite the input layer. +# So fetch input layer from the copy and use it to populate the output slot +if par["output_layer"]: + data.layers[par["output_layer"]] = data_for_scanpy.X +else: + data.X = data_for_scanpy.X +data.uns['log1p'] = data_for_scanpy.uns['log1p'].copy() + +logger.info("Writing to file %s", par["output"]) +mdata.write_h5mu(filename=par["output"], compression=par["output_compression"]) +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/transform/log1p/setup_logger.py b/target/docker/transform/log1p/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/docker/transform/log1p/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/docker/transform/normalize_total/.config.vsh.yaml b/target/docker/transform/normalize_total/.config.vsh.yaml new file mode 100644 index 00000000000..169fe0cb648 --- /dev/null +++ b/target/docker/transform/normalize_total/.config.vsh.yaml @@ -0,0 +1,242 @@ +functionality: + name: "normalize_total" + namespace: "transform" + version: "0.12.4" + authors: + - name: "Dries De Maeyer" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "ddemaeyer@gmail.com" + github: "ddemaeyer" + linkedin: "dries-de-maeyer-b46a814" + organizations: + - name: "Janssen Pharmaceuticals" + href: "https://www.janssen.com" + role: "Principal Scientist" + - name: "Robrecht Cannoodt" + roles: + - "contributor" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input h5mu file" + info: null + example: + - "input.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--input_layer" + description: "Input layer to use. By default, X is normalized" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output h5mu file." + info: null + default: + - "output.h5mu" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_layer" + description: "Output layer to use. By default, use X." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--target_sum" + description: "If None, after normalization, each observation (cell) has a total\ + \ count equal to the median of total counts for observations (cells) before\ + \ normalization." + info: null + default: + - 10000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--exclude_highly_expressed" + description: "Exclude (very) highly expressed genes for the computation of the\ + \ normalization factor (size factor) for each cell. A gene is considered highly\ + \ expressed, if it has more than max_fraction of the total counts in at least\ + \ one cell. The not-excluded genes will sum up to target_sum." + info: null + direction: "input" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Normalize counts per cell.\n\nNormalize each cell by total counts\ + \ over all genes, so that every cell has the same total count after normalization.\ + \ If choosing target_sum=1e6, this is CPM normalization.\n\nIf exclude_highly_expressed=True,\ + \ very highly expressed genes are excluded from the computation of the normalization\ + \ factor (size factor) for each cell. This is meaningful as these can strongly\ + \ influence the resulting normalized values for all other genes [Weinreb17].\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim-bullseye" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "libhdf5-dev" + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "scanpy~=1.9.5" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "midmem" + - "lowcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/transform/normalize_total/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/transform/normalize_total" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/transform/normalize_total/normalize_total" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/transform/normalize_total/normalize_total b/target/docker/transform/normalize_total/normalize_total new file mode 100755 index 00000000000..05ad7cedbba --- /dev/null +++ b/target/docker/transform/normalize_total/normalize_total @@ -0,0 +1,1108 @@ +#!/usr/bin/env bash + +# normalize_total 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Dries De Maeyer (maintainer) +# * Robrecht Cannoodt (contributor) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="normalize_total" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "normalize_total 0.12.4" + echo "" + echo "Normalize counts per cell." + echo "" + echo "Normalize each cell by total counts over all genes, so that every cell has the" + echo "same total count after normalization. If choosing target_sum=1e6, this is CPM" + echo "normalization." + echo "" + echo "If exclude_highly_expressed=True, very highly expressed genes are excluded from" + echo "the computation of the normalization factor (size factor) for each cell. This is" + echo "meaningful as these can strongly influence the resulting normalized values for" + echo "all other genes [Weinreb17]." + echo "" + echo "Arguments:" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " example: input.h5mu" + echo " Input h5mu file" + echo "" + echo " --modality" + echo " type: string" + echo " default: rna" + echo "" + echo " --input_layer" + echo " type: string" + echo " Input layer to use. By default, X is normalized" + echo "" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " default: output.h5mu" + echo " Output h5mu file." + echo "" + echo " --output_compression" + echo " type: string" + echo " example: gzip" + echo " choices: [ gzip, lzf ]" + echo " The compression format to be used on the output h5mu object." + echo "" + echo " --output_layer" + echo " type: string" + echo " Output layer to use. By default, use X." + echo "" + echo " --target_sum" + echo " type: integer" + echo " default: 10000" + echo " If None, after normalization, each observation (cell) has a total count" + echo " equal to the median of total counts for observations (cells) before" + echo " normalization." + echo "" + echo " --exclude_highly_expressed" + echo " type: boolean_true" + echo " Exclude (very) highly expressed genes for the computation of the" + echo " normalization factor (size factor) for each cell. A gene is considered" + echo " highly expressed, if it has more than max_fraction of the total counts" + echo " in at least one cell. The not-excluded genes will sum up to target_sum." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM python:3.10-slim-bullseye + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y libhdf5-dev procps && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "scanpy~=1.9.5" + +LABEL org.opencontainers.image.authors="Dries De Maeyer, Robrecht Cannoodt" +LABEL org.opencontainers.image.description="Companion container for running component transform normalize_total" +LABEL org.opencontainers.image.created="2024-01-31T09:08:36Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-normalize_total-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "normalize_total 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality=*) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --input_layer) + [ -n "$VIASH_PAR_INPUT_LAYER" ] && ViashError Bad arguments for option \'--input_layer\': \'$VIASH_PAR_INPUT_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_LAYER="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_layer. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input_layer=*) + [ -n "$VIASH_PAR_INPUT_LAYER" ] && ViashError Bad arguments for option \'--input_layer=*\': \'$VIASH_PAR_INPUT_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_LAYER=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_layer) + [ -n "$VIASH_PAR_OUTPUT_LAYER" ] && ViashError Bad arguments for option \'--output_layer\': \'$VIASH_PAR_OUTPUT_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_LAYER="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_layer. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_layer=*) + [ -n "$VIASH_PAR_OUTPUT_LAYER" ] && ViashError Bad arguments for option \'--output_layer=*\': \'$VIASH_PAR_OUTPUT_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_LAYER=$(ViashRemoveFlags "$1") + shift 1 + ;; + --target_sum) + [ -n "$VIASH_PAR_TARGET_SUM" ] && ViashError Bad arguments for option \'--target_sum\': \'$VIASH_PAR_TARGET_SUM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TARGET_SUM="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --target_sum. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --target_sum=*) + [ -n "$VIASH_PAR_TARGET_SUM" ] && ViashError Bad arguments for option \'--target_sum=*\': \'$VIASH_PAR_TARGET_SUM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TARGET_SUM=$(ViashRemoveFlags "$1") + shift 1 + ;; + --exclude_highly_expressed) + [ -n "$VIASH_PAR_EXCLUDE_HIGHLY_EXPRESSED" ] && ViashError Bad arguments for option \'--exclude_highly_expressed\': \'$VIASH_PAR_EXCLUDE_HIGHLY_EXPRESSED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EXCLUDE_HIGHLY_EXPRESSED=true + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/transform_normalize_total:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/transform_normalize_total:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/transform_normalize_total:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/transform_normalize_total:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_MODALITY+x} ]; then + VIASH_PAR_MODALITY="rna" +fi +if [ -z ${VIASH_PAR_TARGET_SUM+x} ]; then + VIASH_PAR_TARGET_SUM="10000" +fi +if [ -z ${VIASH_PAR_EXCLUDE_HIGHLY_EXPRESSED+x} ]; then + VIASH_PAR_EXCLUDE_HIGHLY_EXPRESSED="false" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_TARGET_SUM" ]]; then + if ! [[ "$VIASH_PAR_TARGET_SUM" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--target_sum' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_EXCLUDE_HIGHLY_EXPRESSED" ]]; then + if ! [[ "$VIASH_PAR_EXCLUDE_HIGHLY_EXPRESSED" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--exclude_highly_expressed' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/transform_normalize_total:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/transform_normalize_total:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/transform_normalize_total:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-normalize_total-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +import sys +import scanpy as sc +import mudata as mu + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'input_layer': $( if [ ! -z ${VIASH_PAR_INPUT_LAYER+x} ]; then echo "r'${VIASH_PAR_INPUT_LAYER//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_layer': $( if [ ! -z ${VIASH_PAR_OUTPUT_LAYER+x} ]; then echo "r'${VIASH_PAR_OUTPUT_LAYER//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'target_sum': $( if [ ! -z ${VIASH_PAR_TARGET_SUM+x} ]; then echo "int(r'${VIASH_PAR_TARGET_SUM//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'exclude_highly_expressed': $( if [ ! -z ${VIASH_PAR_EXCLUDE_HIGHLY_EXPRESSED+x} ]; then echo "r'${VIASH_PAR_EXCLUDE_HIGHLY_EXPRESSED//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +## VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +logger.info("Reading input mudata") +mdata = mu.read_h5mu(par["input"]) +mdata.var_names_make_unique() + +logger.info(par) + +mod = par["modality"] +logger.info("Performing total normalization on modality %s", mod) +dat = mdata.mod[mod] +if par['input_layer'] and not par['input_layer'] in dat.layers.keys(): + raise ValueError(f"Input layer {par['input_layer']} not found in {mod}") +output_data = sc.pp.normalize_total(dat, + layer=par["input_layer"], + copy=True if par["output_layer"] else False) + +if output_data: + dat.layers[par["output_layer"]] = output_data.X + +logger.info("Writing to file") +mdata.write_h5mu(filename=par["output"], compression=par["output_compression"]) +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/transform/normalize_total/setup_logger.py b/target/docker/transform/normalize_total/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/docker/transform/normalize_total/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/docker/transform/regress_out/.config.vsh.yaml b/target/docker/transform/regress_out/.config.vsh.yaml new file mode 100644 index 00000000000..201da61a153 --- /dev/null +++ b/target/docker/transform/regress_out/.config.vsh.yaml @@ -0,0 +1,195 @@ +functionality: + name: "regress_out" + namespace: "transform" + version: "0.12.4" + authors: + - name: "Robrecht Cannoodt" + roles: + - "maintainer" + - "contributor" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + arguments: + - type: "file" + name: "--input" + description: "Input h5mu file" + info: null + example: + - "input.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output h5mu file." + info: null + default: + - "output.h5mu" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + description: "Which modality (one or more) to run this component on." + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_keys" + description: "Which .obs keys to regress on." + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Regress out (mostly) unwanted sources of variation.\nUses simple linear\ + \ regression. This is inspired by Seurat's regressOut function in R [Satija15].\ + \ \nNote that this function tends to overcorrect in certain circumstances as described\ + \ in issue theislab/scanpy#526.\nSee https://github.com/theislab/scanpy/issues/526.\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "scanpy~=1.9.5" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "lowmem" + - "lowcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/transform/regress_out/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/transform/regress_out" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/transform/regress_out/regress_out" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/transform/regress_out/regress_out b/target/docker/transform/regress_out/regress_out new file mode 100755 index 00000000000..6397997c031 --- /dev/null +++ b/target/docker/transform/regress_out/regress_out @@ -0,0 +1,1039 @@ +#!/usr/bin/env bash + +# regress_out 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Robrecht Cannoodt (maintainer, contributor) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="regress_out" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "regress_out 0.12.4" + echo "" + echo "Regress out (mostly) unwanted sources of variation." + echo "Uses simple linear regression. This is inspired by Seurat's regressOut function" + echo "in R [Satija15]." + echo "Note that this function tends to overcorrect in certain circumstances as" + echo "described in issue theislab/scanpy#526." + echo "See https://github.com/theislab/scanpy/issues/526." + echo "" + echo "Arguments:" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " example: input.h5mu" + echo " Input h5mu file" + echo "" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " default: output.h5mu" + echo " Output h5mu file." + echo "" + echo " --output_compression" + echo " type: string" + echo " example: gzip" + echo " choices: [ gzip, lzf ]" + echo " The compression format to be used on the output h5mu object." + echo "" + echo " --modality" + echo " type: string" + echo " default: rna" + echo " Which modality (one or more) to run this component on." + echo "" + echo " --obs_keys" + echo " type: string, multiple values allowed" + echo " Which .obs keys to regress on." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM python:3.10-slim + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "scanpy~=1.9.5" + +LABEL org.opencontainers.image.authors="Robrecht Cannoodt" +LABEL org.opencontainers.image.description="Companion container for running component transform regress_out" +LABEL org.opencontainers.image.created="2024-01-31T09:08:35Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-regress_out-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "regress_out 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --modality) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality=*) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --obs_keys) + if [ -z "$VIASH_PAR_OBS_KEYS" ]; then + VIASH_PAR_OBS_KEYS="$2" + else + VIASH_PAR_OBS_KEYS="$VIASH_PAR_OBS_KEYS:""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_keys. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obs_keys=*) + if [ -z "$VIASH_PAR_OBS_KEYS" ]; then + VIASH_PAR_OBS_KEYS=$(ViashRemoveFlags "$1") + else + VIASH_PAR_OBS_KEYS="$VIASH_PAR_OBS_KEYS:"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/transform_regress_out:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/transform_regress_out:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/transform_regress_out:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/transform_regress_out:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_MODALITY+x} ]; then + VIASH_PAR_MODALITY="rna" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/transform_regress_out:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/transform_regress_out:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/transform_regress_out:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-regress_out-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +import scanpy as sc +import mudata as mu +import multiprocessing +import sys + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'obs_keys': $( if [ ! -z ${VIASH_PAR_OBS_KEYS+x} ]; then echo "r'${VIASH_PAR_OBS_KEYS//\'/\'\"\'\"r\'}'.split(':')"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +## VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +logger.info("Reading input mudata") +mdata = mu.read_h5mu(par["input"]) +mdata.var_names_make_unique() + +if ( + par["obs_keys"] is not None + and len(par["obs_keys"]) > 0 +): + mod = par["modality"] + logger.info("Regress out variables on modality %s", mod) + data = mdata.mod[mod] + + sc.pp.regress_out( + data, + keys=par["obs_keys"], + n_jobs=multiprocessing.cpu_count() - 1 + ) + +logger.info("Writing to file") +mdata.write_h5mu(filename=par["output"], compression=par["output_compression"]) +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/transform/regress_out/setup_logger.py b/target/docker/transform/regress_out/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/docker/transform/regress_out/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/docker/transform/scale/.config.vsh.yaml b/target/docker/transform/scale/.config.vsh.yaml new file mode 100644 index 00000000000..f6f4229ab84 --- /dev/null +++ b/target/docker/transform/scale/.config.vsh.yaml @@ -0,0 +1,205 @@ +functionality: + name: "scale" + namespace: "transform" + version: "0.12.4" + authors: + - name: "Dries Schaumont" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input h5mu file." + info: null + example: + - "input.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + description: "List of modalities to process." + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--max_value" + description: "Clip (truncate) to this value after scaling. Does not clip by default." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean" + name: "--zero_center" + description: "If False, omit zero-centering variables, which allows to handle\ + \ sparse input efficiently." + info: null + default: + - true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output h5mu file." + info: null + default: + - "output.h5mu" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Scale data to unit variance and zero mean.\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim-bullseye" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "libhdf5-dev" + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "scanpy~=1.9.5" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "lowmem" + - "lowcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +- type: "native" + id: "native" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/transform/scaling/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/transform/scale" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/transform/scale/scale" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/transform/scale/scale b/target/docker/transform/scale/scale new file mode 100755 index 00000000000..10b71ccc27d --- /dev/null +++ b/target/docker/transform/scale/scale @@ -0,0 +1,1063 @@ +#!/usr/bin/env bash + +# scale 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Dries Schaumont (maintainer) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="scale" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "scale 0.12.4" + echo "" + echo "Scale data to unit variance and zero mean." + echo "" + echo "Arguments:" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " example: input.h5mu" + echo " Input h5mu file." + echo "" + echo " --modality" + echo " type: string" + echo " default: rna" + echo " List of modalities to process." + echo "" + echo " --max_value" + echo " type: double" + echo " Clip (truncate) to this value after scaling. Does not clip by default." + echo "" + echo " --zero_center" + echo " type: boolean" + echo " default: true" + echo " If False, omit zero-centering variables, which allows to handle sparse" + echo " input efficiently." + echo "" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " default: output.h5mu" + echo " Output h5mu file." + echo "" + echo " --output_compression" + echo " type: string" + echo " example: gzip" + echo " choices: [ gzip, lzf ]" + echo " The compression format to be used on the output h5mu object." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM python:3.10-slim-bullseye + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y libhdf5-dev procps && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "scanpy~=1.9.5" + +LABEL org.opencontainers.image.authors="Dries Schaumont" +LABEL org.opencontainers.image.description="Companion container for running component transform scale" +LABEL org.opencontainers.image.created="2024-01-31T09:08:34Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-scale-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "scale 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality=*) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --max_value) + [ -n "$VIASH_PAR_MAX_VALUE" ] && ViashError Bad arguments for option \'--max_value\': \'$VIASH_PAR_MAX_VALUE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MAX_VALUE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --max_value. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --max_value=*) + [ -n "$VIASH_PAR_MAX_VALUE" ] && ViashError Bad arguments for option \'--max_value=*\': \'$VIASH_PAR_MAX_VALUE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MAX_VALUE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --zero_center) + [ -n "$VIASH_PAR_ZERO_CENTER" ] && ViashError Bad arguments for option \'--zero_center\': \'$VIASH_PAR_ZERO_CENTER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ZERO_CENTER="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --zero_center. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --zero_center=*) + [ -n "$VIASH_PAR_ZERO_CENTER" ] && ViashError Bad arguments for option \'--zero_center=*\': \'$VIASH_PAR_ZERO_CENTER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ZERO_CENTER=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/transform_scale:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/transform_scale:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/transform_scale:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/transform_scale:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_MODALITY+x} ]; then + VIASH_PAR_MODALITY="rna" +fi +if [ -z ${VIASH_PAR_ZERO_CENTER+x} ]; then + VIASH_PAR_ZERO_CENTER="true" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_MAX_VALUE" ]]; then + if ! [[ "$VIASH_PAR_MAX_VALUE" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--max_value' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_ZERO_CENTER" ]]; then + if ! [[ "$VIASH_PAR_ZERO_CENTER" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--zero_center' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/transform_scale:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/transform_scale:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/transform_scale:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-scale-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +import sys +from mudata import read_h5mu +import scanpy + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'max_value': $( if [ ! -z ${VIASH_PAR_MAX_VALUE+x} ]; then echo "float(r'${VIASH_PAR_MAX_VALUE//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'zero_center': $( if [ ! -z ${VIASH_PAR_ZERO_CENTER+x} ]; then echo "r'${VIASH_PAR_ZERO_CENTER//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +## VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +def main(): + logger.info(f'Reading .h5mu file: {par["input"]}') + mudata = read_h5mu(par["input"]) + mod = par["modality"] + data = mudata.mod[mod] + + logger.info("Scaling modality: %s", mod) + scanpy.pp.scale(data, + zero_center=par["zero_center"], + max_value=par["max_value"]) + + logger.info("Writing to %s", par["output"]) + mudata.write_h5mu(filename=par["output"], compression=par["output_compression"]) + logger.info("Finished") + +if __name__ == "__main__": + main() +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/transform/scale/setup_logger.py b/target/docker/transform/scale/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/docker/transform/scale/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/docker/velocity/scvelo/.config.vsh.yaml b/target/docker/velocity/scvelo/.config.vsh.yaml new file mode 100644 index 00000000000..33086cd345a --- /dev/null +++ b/target/docker/velocity/scvelo/.config.vsh.yaml @@ -0,0 +1,276 @@ +functionality: + name: "scvelo" + namespace: "velocity" + version: "0.12.4" + authors: + - name: "Dries Schaumont" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + argument_groups: + - name: "Inputs" + arguments: + - type: "file" + name: "--input" + description: "Velocyto loom file." + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Outputs" + arguments: + - type: "file" + name: "--output" + description: "Output directory. If it does not exist, will be created." + info: null + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Filtering and normalization" + description: "Arguments for filtering, normalization an log transform (see scvelo.pp.filter_and_normalize\ + \ function)" + arguments: + - type: "integer" + name: "--min_counts" + description: "Minimum number of counts required for a gene to pass filtering\ + \ (spliced)." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--min_counts_u" + description: "Minimum number of counts required for a gene to pass filtering\ + \ (unspliced)." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--min_cells" + description: "Minimum number of cells expressed required to pass filtering (spliced)." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--min_cells_u" + description: "Minimum number of cells expressed required to pass filtering (unspliced)." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--min_shared_counts" + description: "Minimum number of counts (both unspliced and spliced) required\ + \ for a gene." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--min_shared_cells" + description: "Minimum number of cells required to be expressed (both unspliced\ + \ and spliced)." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--n_top_genes" + description: "Number of genes to keep." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean" + name: "--log_transform" + description: "Do not log transform counts." + info: null + default: + - true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Fitting parameters" + description: "Arguments for fitting the data" + arguments: + - type: "integer" + name: "--n_principal_components" + description: "Number of principal components to use for calculating moments." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--n_neighbors" + description: "Number of neighbors to use. First/second-order moments are computed\ + \ for each\ncell across its nearest neighbors, where the neighbor graph is\ + \ obtained from\neuclidean distances in PCA space.\n" + info: null + default: + - 30 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/rna_velocity/velocyto_processed/cellranger_tiny.loom" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.9-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "scvelo~=0.2.5" + - "numpy~=1.23.5" + - "matplotlib<3.8.0" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "native" + id: "native" +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highmem" + - "highcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/velocity/scvelo/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/velocity/scvelo" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/velocity/scvelo/scvelo" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/velocity/scvelo/scvelo b/target/docker/velocity/scvelo/scvelo new file mode 100755 index 00000000000..66876000ed6 --- /dev/null +++ b/target/docker/velocity/scvelo/scvelo @@ -0,0 +1,1272 @@ +#!/usr/bin/env bash + +# scvelo 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Dries Schaumont (maintainer) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="scvelo" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "scvelo 0.12.4" + echo "" + echo "Inputs:" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " Velocyto loom file." + echo "" + echo "Outputs:" + echo " --output" + echo " type: file, required parameter, output, file must exist" + echo " Output directory. If it does not exist, will be created." + echo "" + echo " --output_compression" + echo " type: string" + echo " example: gzip" + echo " choices: [ gzip, lzf ]" + echo " The compression format to be used on the output h5mu object." + echo "" + echo "Filtering and normalization:" + echo " Arguments for filtering, normalization an log transform (see" + echo " scvelo.pp.filter_and_normalize function)" + echo "" + echo " --min_counts" + echo " type: integer" + echo " Minimum number of counts required for a gene to pass filtering" + echo " (spliced)." + echo "" + echo " --min_counts_u" + echo " type: integer" + echo " Minimum number of counts required for a gene to pass filtering" + echo " (unspliced)." + echo "" + echo " --min_cells" + echo " type: integer" + echo " Minimum number of cells expressed required to pass filtering (spliced)." + echo "" + echo " --min_cells_u" + echo " type: integer" + echo " Minimum number of cells expressed required to pass filtering" + echo " (unspliced)." + echo "" + echo " --min_shared_counts" + echo " type: integer" + echo " Minimum number of counts (both unspliced and spliced) required for a" + echo " gene." + echo "" + echo " --min_shared_cells" + echo " type: integer" + echo " Minimum number of cells required to be expressed (both unspliced and" + echo " spliced)." + echo "" + echo " --n_top_genes" + echo " type: integer" + echo " Number of genes to keep." + echo "" + echo " --log_transform" + echo " type: boolean" + echo " default: true" + echo " Do not log transform counts." + echo "" + echo "Fitting parameters:" + echo " Arguments for fitting the data" + echo "" + echo " --n_principal_components" + echo " type: integer" + echo " Number of principal components to use for calculating moments." + echo "" + echo " --n_neighbors" + echo " type: integer" + echo " default: 30" + echo " Number of neighbors to use. First/second-order moments are computed for" + echo " each" + echo " cell across its nearest neighbors, where the neighbor graph is obtained" + echo " from" + echo " euclidean distances in PCA space." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM python:3.9-slim + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "mudata~=0.2.3" "anndata~=0.9.1" "scvelo~=0.2.5" "numpy~=1.23.5" "matplotlib<3.8.0" + +LABEL org.opencontainers.image.authors="Dries Schaumont" +LABEL org.opencontainers.image.description="Companion container for running component velocity scvelo" +LABEL org.opencontainers.image.created="2024-01-31T09:08:34Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-scvelo-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "scvelo 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --min_counts) + [ -n "$VIASH_PAR_MIN_COUNTS" ] && ViashError Bad arguments for option \'--min_counts\': \'$VIASH_PAR_MIN_COUNTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_COUNTS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_counts. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --min_counts=*) + [ -n "$VIASH_PAR_MIN_COUNTS" ] && ViashError Bad arguments for option \'--min_counts=*\': \'$VIASH_PAR_MIN_COUNTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_COUNTS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --min_counts_u) + [ -n "$VIASH_PAR_MIN_COUNTS_U" ] && ViashError Bad arguments for option \'--min_counts_u\': \'$VIASH_PAR_MIN_COUNTS_U\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_COUNTS_U="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_counts_u. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --min_counts_u=*) + [ -n "$VIASH_PAR_MIN_COUNTS_U" ] && ViashError Bad arguments for option \'--min_counts_u=*\': \'$VIASH_PAR_MIN_COUNTS_U\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_COUNTS_U=$(ViashRemoveFlags "$1") + shift 1 + ;; + --min_cells) + [ -n "$VIASH_PAR_MIN_CELLS" ] && ViashError Bad arguments for option \'--min_cells\': \'$VIASH_PAR_MIN_CELLS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_CELLS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_cells. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --min_cells=*) + [ -n "$VIASH_PAR_MIN_CELLS" ] && ViashError Bad arguments for option \'--min_cells=*\': \'$VIASH_PAR_MIN_CELLS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_CELLS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --min_cells_u) + [ -n "$VIASH_PAR_MIN_CELLS_U" ] && ViashError Bad arguments for option \'--min_cells_u\': \'$VIASH_PAR_MIN_CELLS_U\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_CELLS_U="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_cells_u. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --min_cells_u=*) + [ -n "$VIASH_PAR_MIN_CELLS_U" ] && ViashError Bad arguments for option \'--min_cells_u=*\': \'$VIASH_PAR_MIN_CELLS_U\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_CELLS_U=$(ViashRemoveFlags "$1") + shift 1 + ;; + --min_shared_counts) + [ -n "$VIASH_PAR_MIN_SHARED_COUNTS" ] && ViashError Bad arguments for option \'--min_shared_counts\': \'$VIASH_PAR_MIN_SHARED_COUNTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_SHARED_COUNTS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_shared_counts. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --min_shared_counts=*) + [ -n "$VIASH_PAR_MIN_SHARED_COUNTS" ] && ViashError Bad arguments for option \'--min_shared_counts=*\': \'$VIASH_PAR_MIN_SHARED_COUNTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_SHARED_COUNTS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --min_shared_cells) + [ -n "$VIASH_PAR_MIN_SHARED_CELLS" ] && ViashError Bad arguments for option \'--min_shared_cells\': \'$VIASH_PAR_MIN_SHARED_CELLS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_SHARED_CELLS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_shared_cells. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --min_shared_cells=*) + [ -n "$VIASH_PAR_MIN_SHARED_CELLS" ] && ViashError Bad arguments for option \'--min_shared_cells=*\': \'$VIASH_PAR_MIN_SHARED_CELLS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_SHARED_CELLS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --n_top_genes) + [ -n "$VIASH_PAR_N_TOP_GENES" ] && ViashError Bad arguments for option \'--n_top_genes\': \'$VIASH_PAR_N_TOP_GENES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_N_TOP_GENES="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --n_top_genes. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --n_top_genes=*) + [ -n "$VIASH_PAR_N_TOP_GENES" ] && ViashError Bad arguments for option \'--n_top_genes=*\': \'$VIASH_PAR_N_TOP_GENES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_N_TOP_GENES=$(ViashRemoveFlags "$1") + shift 1 + ;; + --log_transform) + [ -n "$VIASH_PAR_LOG_TRANSFORM" ] && ViashError Bad arguments for option \'--log_transform\': \'$VIASH_PAR_LOG_TRANSFORM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LOG_TRANSFORM="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --log_transform. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --log_transform=*) + [ -n "$VIASH_PAR_LOG_TRANSFORM" ] && ViashError Bad arguments for option \'--log_transform=*\': \'$VIASH_PAR_LOG_TRANSFORM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LOG_TRANSFORM=$(ViashRemoveFlags "$1") + shift 1 + ;; + --n_principal_components) + [ -n "$VIASH_PAR_N_PRINCIPAL_COMPONENTS" ] && ViashError Bad arguments for option \'--n_principal_components\': \'$VIASH_PAR_N_PRINCIPAL_COMPONENTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_N_PRINCIPAL_COMPONENTS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --n_principal_components. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --n_principal_components=*) + [ -n "$VIASH_PAR_N_PRINCIPAL_COMPONENTS" ] && ViashError Bad arguments for option \'--n_principal_components=*\': \'$VIASH_PAR_N_PRINCIPAL_COMPONENTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_N_PRINCIPAL_COMPONENTS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --n_neighbors) + [ -n "$VIASH_PAR_N_NEIGHBORS" ] && ViashError Bad arguments for option \'--n_neighbors\': \'$VIASH_PAR_N_NEIGHBORS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_N_NEIGHBORS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --n_neighbors. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --n_neighbors=*) + [ -n "$VIASH_PAR_N_NEIGHBORS" ] && ViashError Bad arguments for option \'--n_neighbors=*\': \'$VIASH_PAR_N_NEIGHBORS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_N_NEIGHBORS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/velocity_scvelo:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/velocity_scvelo:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/velocity_scvelo:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/velocity_scvelo:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_LOG_TRANSFORM+x} ]; then + VIASH_PAR_LOG_TRANSFORM="true" +fi +if [ -z ${VIASH_PAR_N_NEIGHBORS+x} ]; then + VIASH_PAR_N_NEIGHBORS="30" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_MIN_COUNTS" ]]; then + if ! [[ "$VIASH_PAR_MIN_COUNTS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--min_counts' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MIN_COUNTS_U" ]]; then + if ! [[ "$VIASH_PAR_MIN_COUNTS_U" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--min_counts_u' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MIN_CELLS" ]]; then + if ! [[ "$VIASH_PAR_MIN_CELLS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--min_cells' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MIN_CELLS_U" ]]; then + if ! [[ "$VIASH_PAR_MIN_CELLS_U" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--min_cells_u' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MIN_SHARED_COUNTS" ]]; then + if ! [[ "$VIASH_PAR_MIN_SHARED_COUNTS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--min_shared_counts' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MIN_SHARED_CELLS" ]]; then + if ! [[ "$VIASH_PAR_MIN_SHARED_CELLS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--min_shared_cells' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_N_TOP_GENES" ]]; then + if ! [[ "$VIASH_PAR_N_TOP_GENES" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--n_top_genes' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_LOG_TRANSFORM" ]]; then + if ! [[ "$VIASH_PAR_LOG_TRANSFORM" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--log_transform' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_N_PRINCIPAL_COMPONENTS" ]]; then + if ! [[ "$VIASH_PAR_N_PRINCIPAL_COMPONENTS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--n_principal_components' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_N_NEIGHBORS" ]]; then + if ! [[ "$VIASH_PAR_N_NEIGHBORS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--n_neighbors' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/velocity_scvelo:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/velocity_scvelo:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/velocity_scvelo:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-scvelo-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +import sys +import scvelo +import mudata +from contextlib import redirect_stdout +from pathlib import Path +import matplotlib as mpl + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'min_counts': $( if [ ! -z ${VIASH_PAR_MIN_COUNTS+x} ]; then echo "int(r'${VIASH_PAR_MIN_COUNTS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'min_counts_u': $( if [ ! -z ${VIASH_PAR_MIN_COUNTS_U+x} ]; then echo "int(r'${VIASH_PAR_MIN_COUNTS_U//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'min_cells': $( if [ ! -z ${VIASH_PAR_MIN_CELLS+x} ]; then echo "int(r'${VIASH_PAR_MIN_CELLS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'min_cells_u': $( if [ ! -z ${VIASH_PAR_MIN_CELLS_U+x} ]; then echo "int(r'${VIASH_PAR_MIN_CELLS_U//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'min_shared_counts': $( if [ ! -z ${VIASH_PAR_MIN_SHARED_COUNTS+x} ]; then echo "int(r'${VIASH_PAR_MIN_SHARED_COUNTS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'min_shared_cells': $( if [ ! -z ${VIASH_PAR_MIN_SHARED_CELLS+x} ]; then echo "int(r'${VIASH_PAR_MIN_SHARED_CELLS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'n_top_genes': $( if [ ! -z ${VIASH_PAR_N_TOP_GENES+x} ]; then echo "int(r'${VIASH_PAR_N_TOP_GENES//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'log_transform': $( if [ ! -z ${VIASH_PAR_LOG_TRANSFORM+x} ]; then echo "r'${VIASH_PAR_LOG_TRANSFORM//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), + 'n_principal_components': $( if [ ! -z ${VIASH_PAR_N_PRINCIPAL_COMPONENTS+x} ]; then echo "int(r'${VIASH_PAR_N_PRINCIPAL_COMPONENTS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'n_neighbors': $( if [ ! -z ${VIASH_PAR_N_NEIGHBORS+x} ]; then echo "int(r'${VIASH_PAR_N_NEIGHBORS//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +## VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +mpl.rcParams['savefig.dpi']=150 + +# Script must be wrapped into a main function because scvelo spawn subprocesses +# and this fails when the functions are not wrapped. +def main(): + # Create output directory + output_dir = Path(par['output']) + output_dir.mkdir(parents=True, exist_ok=True) + scvelo.settings.figdir = str(output_dir) + + + # Calculate the sample name + sample_name = par["output"].removesuffix(".loom") + sample_name = Path(sample_name).name + + # Read the input data + adata = scvelo.read(par['input']) + + # Save spliced vs unspliced proportions to file + with (output_dir / "proportions.txt").open('w') as target: + with redirect_stdout(target): + scvelo.utils.show_proportions(adata) + + # Plot piecharts of spliced vs unspliced proportions + scvelo.pl.proportions(adata, save=True, show=False) + + # Perform preprocessing + scvelo.pp.filter_and_normalize(adata, + min_counts=par["min_counts"], + min_counts_u=par["min_counts_u"], + min_cells=par["min_cells"], + min_cells_u=par["min_cells_u"], + min_shared_counts=par["min_shared_counts"], + min_shared_cells=par["min_shared_cells"], + n_top_genes=par["n_top_genes"], + log=par["log_transform"]) + + # Fitting + scvelo.pp.moments(adata, + n_pcs=par["n_principal_components"], + n_neighbors=par["n_neighbors"]) + + + # Second step in velocyto calculations + # Velocity calculation and visualization + # From the scvelo manual: + # The solution to the full dynamical model is obtained by setting mode='dynamical', + # which requires to run scv.tl.recover_dynamics(adata) beforehand + scvelo.tl.recover_dynamics(adata) + scvelo.tl.velocity(adata, mode="dynamical") + scvelo.tl.velocity_graph(adata) + scvelo.pl.velocity_graph(adata, save=str(output_dir / "scvelo_graph.pdf"), show=False) + + # Plotting + # TODO: add more here. + scvelo.pl.velocity_embedding_stream(adata, save=str(output_dir / "scvelo_embedding.pdf"), show=False) + + # Create output + ouput_data = mudata.MuData({'rna_velocity': adata}) + ouput_data.write_h5mu(output_dir / f"{sample_name}.h5mu", compression=par["output_compression"]) + +if __name__ == "__main__": + main() +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/docker/velocity/scvelo/setup_logger.py b/target/docker/velocity/scvelo/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/docker/velocity/scvelo/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/docker/velocity/velocyto/.config.vsh.yaml b/target/docker/velocity/velocyto/.config.vsh.yaml new file mode 100644 index 00000000000..bbbced35d7f --- /dev/null +++ b/target/docker/velocity/velocyto/.config.vsh.yaml @@ -0,0 +1,225 @@ +functionality: + name: "velocyto" + namespace: "velocity" + version: "0.12.4" + authors: + - name: "Robrecht Cannoodt" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Path to BAM file" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--transcriptome" + alternatives: + - "-t" + description: "Path to GTF file" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--barcode" + alternatives: + - "-b" + description: "Valid barcodes file, to filter the bam. If --bcfile is not specified\ + \ all the cell barcodes will be included.\nCell barcodes should be specified\ + \ in the bcfile as the 'CB' tag for each read\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--without_umi" + description: "foo" + info: null + direction: "input" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Velocyto loom file" + info: null + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--logic" + alternatives: + - "-l" + description: "The logic to use for the filtering." + info: null + default: + - "Default" + required: false + choices: + - "Default" + - "Permissive10X" + - "Intermediate10X" + - "ValidatedIntrons10X" + - "Stricter10X" + - "ObservedSpanning10X" + - "Discordant10X" + - "SmartSeq2" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "bash_script" + path: "script.sh" + is_executable: true + description: "Runs the velocity analysis on a BAM file, outputting a loom file." + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/cellranger_tiny_fastq" + - type: "file" + path: "resources_test/rna_velocity" + - type: "file" + path: "resources_test/reference_gencodev41_chr1" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.9-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + - "build-essential" + - "file" + interactive: false + - type: "python" + user: false + pip: + - "numpy" + - "Cython" + upgrade: true + - type: "python" + user: false + pip: + - "velocyto" + upgrade: true + - type: "apt" + packages: + - "samtools" + interactive: false + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "native" + id: "native" +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highmem" + - "lowcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/velocity/velocyto/config.vsh.yaml" + platform: "docker" + output: "/home/runner/work/openpipeline/openpipeline/target/docker/velocity/velocyto" + executable: "/home/runner/work/openpipeline/openpipeline/target/docker/velocity/velocyto/velocyto" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/docker/velocity/velocyto/velocyto b/target/docker/velocity/velocyto/velocyto new file mode 100755 index 00000000000..d6ba0d392cf --- /dev/null +++ b/target/docker/velocity/velocyto/velocyto @@ -0,0 +1,1097 @@ +#!/usr/bin/env bash + +# velocyto 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Robrecht Cannoodt (maintainer) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="velocyto" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "velocyto 0.12.4" + echo "" + echo "Runs the velocity analysis on a BAM file, outputting a loom file." + echo "" + echo "Arguments:" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " Path to BAM file" + echo "" + echo " -t, --transcriptome" + echo " type: file, required parameter, file must exist" + echo " Path to GTF file" + echo "" + echo " -b, --barcode" + echo " type: file, file must exist" + echo " Valid barcodes file, to filter the bam. If --bcfile is not specified all" + echo " the cell barcodes will be included." + echo " Cell barcodes should be specified in the bcfile as the 'CB' tag for each" + echo " read" + echo "" + echo " --without_umi" + echo " type: boolean_true" + echo " foo" + echo "" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " Velocyto loom file" + echo "" + echo " -l, --logic" + echo " type: string" + echo " default: Default" + echo " choices: [ Default, Permissive10X, Intermediate10X, ValidatedIntrons10X," + echo "Stricter10X, ObservedSpanning10X, Discordant10X, SmartSeq2 ]" + echo " The logic to use for the filtering." +} + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + save=$-; set +e + docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' push succeeded." + else + ViashError "Container '$VSHD_ID' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + save=$-; set +e + ViashDockerPull $1 + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + VSHD_ID="$1" + VSHD_STRAT="$2" + if [ "$VSHD_STRAT" == "alwaysbuild" -o "$VSHD_STRAT" == "build" -o "$VSHD_STRAT" == "b" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspull" -o "$VSHD_STRAT" == "pull" -o "$VSHD_STRAT" == "p" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayspullelsebuild" -o "$VSHD_STRAT" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "alwayspullelsecachedbuild" -o "$VSHD_STRAT" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "alwayscachedbuild" -o "$VSHD_STRAT" == "cachedbuild" -o "$VSHD_STRAT" == "cb" ]; then + ViashDockerBuild $VSHD_ID + elif [[ "$VSHD_STRAT" =~ ^ifneedbe ]]; then + save=$-; set +e + ViashDockerLocalTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $VSHD_ID already exists" + elif [ "$VSHD_STRAT" == "ifneedbebuild" ]; then + ViashDockerBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepull" ]; then + ViashDockerPull $VSHD_ID + elif [ "$VSHD_STRAT" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $VSHD_ID --no-cache + elif [ "$VSHD_STRAT" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $VSHD_ID + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi + elif [ "$VSHD_STRAT" == "push" -o "$VSHD_STRAT" == "forcepush" -o "$VSHD_STRAT" == "alwayspush" ]; then + ViashDockerPush "$VSHD_ID" + elif [ "$VSHD_STRAT" == "pushifnotpresent" -o "$VSHD_STRAT" == "gentlepush" -o "$VSHD_STRAT" == "maybepush" ]; then + save=$-; set +e + ViashDockerRemoteTagCheck $VSHD_ID + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$VSHD_ID' exists, doing nothing." + else + ViashNotice "Container '$VSHD_ID' does not yet exist." + ViashDockerPush "$VSHD_ID" + fi + elif [ "$VSHD_STRAT" == "donothing" -o "$VSHD_STRAT" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $VSHD_STRAT" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + tag=$1 + shift 1 + commands="$@" + save=$-; set +e + missing=$(docker run --rm --entrypoint=sh $tag -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$tag' does not contain command '$missing'." + exit 1 + fi +} + + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + cat << 'VIASHDOCKER' +FROM python:3.9-slim + +ENTRYPOINT [] + + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y procps build-essential file && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "numpy" "Cython" + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "velocyto" + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y samtools && \ + rm -rf /var/lib/apt/lists/* + +LABEL org.opencontainers.image.authors="Robrecht Cannoodt" +LABEL org.opencontainers.image.description="Companion container for running component velocity velocyto" +LABEL org.opencontainers.image.created="2024-01-31T09:08:34Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline" +LABEL org.opencontainers.image.revision="a075b9f384e200b357c4c85801062a980ddb3383" +LABEL org.opencontainers.image.version="0.12.4" + +VIASHDOCKER +} + +# ViashDockerBuild: build a docker container +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was built +function ViashDockerBuild { + # create temporary directory to store dockerfile & optional resources in + tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-velocyto-XXXXXX") + dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile > $dockerfile + + # Build the container + ViashNotice "Building container '$1' with Dockerfile" + ViashInfo "Running 'docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile'" + save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile + else + docker build -t $@ $VIASH_META_RESOURCES_DIR -f $dockerfile &> $tmpdir/docker_build.log + fi + out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$1'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi + ViashDockerCheckCommands "$1" 'ps' 'bash' +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_EXTRA_MOUNTS : Added another parameter to be passed to docker +# examples: +# ViashAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashAutodetectMount { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + echo "$mount_target/$base_name" +} +function ViashAutodetectMountArg { + abs_path=$(ViashAbsolutePath "$1") + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + mount_target="/viash_automount$mount_source" + ViashDebug "ViashAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashStripAutomount { + abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#/viash_automount}" +} +# ViashExtractFlags: Retain leading flag +# $1 : string with a possible leading flag +# return : leading flag +# examples: +# ViashExtractFlags --foo=bar # returns --foo +function ViashExtractFlags { + echo $1 | sed 's/=.*//' +} +# initialise variables +VIASH_EXTRA_MOUNTS=() + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "velocyto 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --transcriptome) + [ -n "$VIASH_PAR_TRANSCRIPTOME" ] && ViashError Bad arguments for option \'--transcriptome\': \'$VIASH_PAR_TRANSCRIPTOME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRANSCRIPTOME="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --transcriptome. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --transcriptome=*) + [ -n "$VIASH_PAR_TRANSCRIPTOME" ] && ViashError Bad arguments for option \'--transcriptome=*\': \'$VIASH_PAR_TRANSCRIPTOME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRANSCRIPTOME=$(ViashRemoveFlags "$1") + shift 1 + ;; + -t) + [ -n "$VIASH_PAR_TRANSCRIPTOME" ] && ViashError Bad arguments for option \'-t\': \'$VIASH_PAR_TRANSCRIPTOME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRANSCRIPTOME="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -t. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --barcode) + [ -n "$VIASH_PAR_BARCODE" ] && ViashError Bad arguments for option \'--barcode\': \'$VIASH_PAR_BARCODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BARCODE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --barcode. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --barcode=*) + [ -n "$VIASH_PAR_BARCODE" ] && ViashError Bad arguments for option \'--barcode=*\': \'$VIASH_PAR_BARCODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BARCODE=$(ViashRemoveFlags "$1") + shift 1 + ;; + -b) + [ -n "$VIASH_PAR_BARCODE" ] && ViashError Bad arguments for option \'-b\': \'$VIASH_PAR_BARCODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BARCODE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -b. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --without_umi) + [ -n "$VIASH_PAR_WITHOUT_UMI" ] && ViashError Bad arguments for option \'--without_umi\': \'$VIASH_PAR_WITHOUT_UMI\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WITHOUT_UMI=true + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --logic) + [ -n "$VIASH_PAR_LOGIC" ] && ViashError Bad arguments for option \'--logic\': \'$VIASH_PAR_LOGIC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LOGIC="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --logic. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --logic=*) + [ -n "$VIASH_PAR_LOGIC" ] && ViashError Bad arguments for option \'--logic=*\': \'$VIASH_PAR_LOGIC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LOGIC=$(ViashRemoveFlags "$1") + shift 1 + ;; + -l) + [ -n "$VIASH_PAR_LOGIC" ] && ViashError Bad arguments for option \'-l\': \'$VIASH_PAR_LOGIC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LOGIC="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -l. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---setup) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$2" + shift 1 + ;; + ---setup=*) + VIASH_MODE='docker_setup' + VIASH_DOCKER_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 2 + ;; + ---dockerfile) + ViashDockerfile + exit 0 + ;; + ---v|---volume) + VIASH_EXTRA_MOUNTS+=("--volume='$2'") + shift 2 + ;; + ---volume=*) + VIASH_EXTRA_MOUNTS+=("--volume='$(ViashRemoveFlags "$2")'") + shift 1 + ;; + ---debug) + VIASH_MODE='docker_debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +ViashDockerInstallationCheck + +if [ $VIASH_MODE == "docker_setup" ]; then + ViashDockerSetup 'ghcr.io/openpipelines-bio/velocity_velocyto:0.12.0' "$VIASH_DOCKER_SETUP_STRATEGY" + exit 0 +fi +ViashDockerSetup 'ghcr.io/openpipelines-bio/velocity_velocyto:0.12.0' ifneedbepullelsecachedbuild + +if [ $VIASH_MODE == "docker_debug" ]; then + ViashNotice "+ docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/velocity_velocyto:0.12.0'" + docker run --entrypoint=bash -i --rm -v "$(pwd)":/pwd --workdir /pwd -t 'ghcr.io/openpipelines-bio/velocity_velocyto:0.12.0' + exit 0 +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_TRANSCRIPTOME+x} ]; then + ViashError '--transcriptome' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_WITHOUT_UMI+x} ]; then + VIASH_PAR_WITHOUT_UMI="false" +fi +if [ -z ${VIASH_PAR_LOGIC+x} ]; then + VIASH_PAR_LOGIC="Default" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_TRANSCRIPTOME" ] && [ ! -e "$VIASH_PAR_TRANSCRIPTOME" ]; then + ViashError "Input file '$VIASH_PAR_TRANSCRIPTOME' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_BARCODE" ] && [ ! -e "$VIASH_PAR_BARCODE" ]; then + ViashError "Input file '$VIASH_PAR_BARCODE' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_WITHOUT_UMI" ]]; then + if ! [[ "$VIASH_PAR_WITHOUT_UMI" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--without_umi' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_LOGIC" ]; then + VIASH_PAR_LOGIC_CHOICES=("Default:Permissive10X:Intermediate10X:ValidatedIntrons10X:Stricter10X:ObservedSpanning10X:Discordant10X:SmartSeq2") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_LOGIC_CHOICES[*]}:" =~ ":$VIASH_PAR_LOGIC:" ]]; then + ViashError '--logic' specified value of \'$VIASH_PAR_LOGIC\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +# detect volumes from file arguments +VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_TRANSCRIPTOME" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_TRANSCRIPTOME")" ) + VIASH_PAR_TRANSCRIPTOME=$(ViashAutodetectMount "$VIASH_PAR_TRANSCRIPTOME") +fi +if [ ! -z "$VIASH_PAR_BARCODE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_BARCODE")" ) + VIASH_PAR_BARCODE=$(ViashAutodetectMount "$VIASH_PAR_BARCODE") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_EXTRA_MOUNTS+=( "$(ViashAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashAutodetectMount "$VIASH_META_TEMP_DIR") +fi + +# get unique mounts +VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_EXTRA_MOUNTS[@]}"; do echo "$val"; done | sort -u)) + +# change file ownership +function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + eval docker run --entrypoint=chown -i --rm ${VIASH_UNIQUE_MOUNTS[@]} ghcr.io/openpipelines-bio/velocity_velocyto:0.12.0 "$(id -u):$(id -g)" --silent --recursive ${VIASH_CHOWN_VARS[@]} + set -e + fi +} +trap ViashPerformChown EXIT + +# helper function for filling in extra docker args +VIASH_EXTRA_DOCKER_ARGS="" +if [ ! -z "$VIASH_META_MEMORY_MB" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --memory=${VIASH_META_MEMORY_MB}m" +fi +if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_EXTRA_DOCKER_ARGS="$VIASH_EXTRA_DOCKER_ARGS --cpus=${VIASH_META_CPUS}" +fi + +ViashDebug "Running command: $(echo docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/velocity_velocyto:0.12.0)" +cat << VIASHEOF | eval docker run --entrypoint=bash -i --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_EXTRA_DOCKER_ARGS ghcr.io/openpipelines-bio/velocity_velocyto:0.12.0 +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-velocyto-XXXXXX").sh +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +#!/bin/bash + +set -eo pipefail + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_TRANSCRIPTOME+x} ]; then echo "${VIASH_PAR_TRANSCRIPTOME}" | sed "s#'#'\"'\"'#g;s#.*#par_transcriptome='&'#" ; else echo "# par_transcriptome="; fi ) +$( if [ ! -z ${VIASH_PAR_BARCODE+x} ]; then echo "${VIASH_PAR_BARCODE}" | sed "s#'#'\"'\"'#g;s#.*#par_barcode='&'#" ; else echo "# par_barcode="; fi ) +$( if [ ! -z ${VIASH_PAR_WITHOUT_UMI+x} ]; then echo "${VIASH_PAR_WITHOUT_UMI}" | sed "s#'#'\"'\"'#g;s#.*#par_without_umi='&'#" ; else echo "# par_without_umi="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_PAR_LOGIC+x} ]; then echo "${VIASH_PAR_LOGIC}" | sed "s#'#'\"'\"'#g;s#.*#par_logic='&'#" ; else echo "# par_logic="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) + +## VIASH END + +extra_params=( ) + +if [ ! -z "\$par_barcode" ]; then + extra_params+=( "--bcfile=\$par_barcode" ) +fi +if [ "\$par_without_umi" == "true" ]; then + extra_params+=( "--without-umi" ) +fi +if [ ! -z "\$meta_cpus" ]; then + extra_params+=( "--samtools-threads" "\$meta_cpus" ) +fi +if [ ! -z "\$meta_memory_mb" ]; then + extra_params+=( "--samtools-memory" "\$meta_memory_mb" ) +fi + +output_dir=\`dirname "\$par_output"\` +sample_id=\`basename "\$par_output" .loom\` + +if (file \`readlink -f "\$par_transcriptome"\` | grep -q compressed ) ; then + # create temporary directory + tmpdir=\$(mktemp -d "\$meta_temp_dir/\$meta_functionality_name-XXXXXXXX") + function clean_up { + rm -rf "\$tmpdir" + } + trap clean_up EXIT + + zcat "\$par_transcriptome" > "\$tmpdir/genes.gtf" + par_transcriptome="\$tmpdir/genes.gtf" +fi + +velocyto run \\ + "\$par_input" \\ + "\$par_transcriptome" \\ + "\${extra_params[@]}" \\ + --outputfolder "\$output_dir" \\ + --sampleid "\$sample_id" +VIASHMAIN +bash "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# strip viash automount from file paths +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashStripAutomount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_TRANSCRIPTOME" ]; then + VIASH_PAR_TRANSCRIPTOME=$(ViashStripAutomount "$VIASH_PAR_TRANSCRIPTOME") +fi +if [ ! -z "$VIASH_PAR_BARCODE" ]; then + VIASH_PAR_BARCODE=$(ViashStripAutomount "$VIASH_PAR_BARCODE") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashStripAutomount "$VIASH_PAR_OUTPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashStripAutomount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashStripAutomount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashStripAutomount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashStripAutomount "$VIASH_META_TEMP_DIR") +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/native/compression/compress_h5mu/.config.vsh.yaml b/target/native/compression/compress_h5mu/.config.vsh.yaml new file mode 100644 index 00000000000..6b026bef48e --- /dev/null +++ b/target/native/compression/compress_h5mu/.config.vsh.yaml @@ -0,0 +1,167 @@ +functionality: + name: "compress_h5mu" + namespace: "compression" + version: "0.12.4" + authors: + - name: "Dries Schaumont" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Path to the input .h5mu." + info: null + example: + - "sample_path" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + description: "location of output file." + info: null + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--compression" + description: "Compression type." + info: null + default: + - "gzip" + required: false + choices: + - "lzf" + - "gzip" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "../../utils/compress_h5mu.py" + description: "Compress a MuData file. \n" + test_resources: + - type: "python_script" + path: "run_test.py" + is_executable: true + - type: "file" + path: "resources_test/concat_test_data/e18_mouse_brain_fresh_5k_filtered_feature_bc_matrix_subset_unique_obs.h5mu" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "native" + id: "native" +- type: "nextflow" + id: "nextflow" + directives: + label: + - "singlecpu" + - "lowmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/compression/compress_h5mu/config.vsh.yaml" + platform: "native" + output: "/home/runner/work/openpipeline/openpipeline/target/native/compression/compress_h5mu" + executable: "/home/runner/work/openpipeline/openpipeline/target/native/compression/compress_h5mu/compress_h5mu" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/native/compression/compress_h5mu/compress_h5mu b/target/native/compression/compress_h5mu/compress_h5mu new file mode 100755 index 00000000000..243c31599f1 --- /dev/null +++ b/target/native/compression/compress_h5mu/compress_h5mu @@ -0,0 +1,537 @@ +#!/usr/bin/env bash + +# compress_h5mu 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Dries Schaumont (maintainer) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="compress_h5mu" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "compress_h5mu 0.12.4" + echo "" + echo "Compress a MuData file." + echo "" + echo "Arguments:" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " example: sample_path" + echo " Path to the input .h5mu." + echo "" + echo " --output" + echo " type: file, required parameter, output, file must exist" + echo " location of output file." + echo "" + echo " --compression" + echo " type: string" + echo " default: gzip" + echo " choices: [ lzf, gzip ]" + echo " Compression type." +} + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "compress_h5mu 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --compression) + [ -n "$VIASH_PAR_COMPRESSION" ] && ViashError Bad arguments for option \'--compression\': \'$VIASH_PAR_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --compression=*) + [ -n "$VIASH_PAR_COMPRESSION" ] && ViashError Bad arguments for option \'--compression=*\': \'$VIASH_PAR_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_COMPRESSION+x} ]; then + VIASH_PAR_COMPRESSION="gzip" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_COMPRESSION" ]; then + VIASH_PAR_COMPRESSION_CHOICES=("lzf:gzip") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_COMPRESSION:" ]]; then + ViashError '--compression' specified value of \'$VIASH_PAR_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +ViashDebug "Running command: bash" +cat << VIASHEOF | bash +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-compress_h5mu-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +import sys +### VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'compression': $( if [ ! -z ${VIASH_PAR_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +### VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND compress_h5mu +# reason: resources aren't available when using Nextflow fusion +# from compress_h5mu import compress_h5mu +from h5py import File as H5File +from h5py import Group, Dataset +from pathlib import Path +from typing import Union, Literal +from functools import partial + + +def compress_h5mu(input_path: Union[str, Path], + output_path: Union[str, Path], + compression: Union[Literal['gzip'], Literal['lzf']]): + input_path, output_path = str(input_path), str(output_path) + + def copy_attributes(in_object, out_object): + for key, value in in_object.attrs.items(): + out_object.attrs[key] = value + + def visit_path(output_h5: H5File, + compression: Union[Literal['gzip'], Literal['lzf']], + name: str, object: Union[Group, Dataset]): + if isinstance(object, Group): + new_group = output_h5.create_group(name) + copy_attributes(object, new_group) + elif isinstance(object, Dataset): + # Compression only works for non-scalar Dataset objects + # Scalar objects dont have a shape defined + if not object.compression and object.shape not in [None, ()]: + new_dataset = output_h5.create_dataset(name, data=object, compression=compression) + copy_attributes(object, new_dataset) + else: + output_h5.copy(object, name) + else: + raise NotImplementedError(f"Could not copy element {name}, " + f"type has not been implemented yet: {type(object)}") + + with H5File(input_path, 'r') as input_h5, H5File(output_path, 'w', userblock_size=512) as output_h5: + copy_attributes(input_h5, output_h5) + input_h5.visititems(partial(visit_path, output_h5, compression)) + + with open(input_path, "rb") as input_bytes: + # Mudata puts metadata like this in the first 512 bytes: + # MuData (format-version=0.1.0;creator=muon;creator-version=0.2.0) + # See mudata/_core/io.py, read_h5mu() function + starting_metadata = input_bytes.read(100) + # The metadata is padded with extra null bytes up until 512 bytes + truncate_location = starting_metadata.find(b"\\x00") + starting_metadata = starting_metadata[:truncate_location] + with open(output_path, "br+") as f: + nbytes = f.write(starting_metadata) + f.write(b"\\0" * (512 - nbytes)) +# END TEMPORARY WORKAROUND compress_h5mu + +if __name__ == "__main__": + compress_h5mu(par["input"], par["output"], compression=par["compression"]) +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/native/compression/compress_h5mu/compress_h5mu.py b/target/native/compression/compress_h5mu/compress_h5mu.py new file mode 100644 index 00000000000..9d92395a573 --- /dev/null +++ b/target/native/compression/compress_h5mu/compress_h5mu.py @@ -0,0 +1,49 @@ +from h5py import File as H5File +from h5py import Group, Dataset +from pathlib import Path +from typing import Union, Literal +from functools import partial + + +def compress_h5mu(input_path: Union[str, Path], + output_path: Union[str, Path], + compression: Union[Literal['gzip'], Literal['lzf']]): + input_path, output_path = str(input_path), str(output_path) + + def copy_attributes(in_object, out_object): + for key, value in in_object.attrs.items(): + out_object.attrs[key] = value + + def visit_path(output_h5: H5File, + compression: Union[Literal['gzip'], Literal['lzf']], + name: str, object: Union[Group, Dataset]): + if isinstance(object, Group): + new_group = output_h5.create_group(name) + copy_attributes(object, new_group) + elif isinstance(object, Dataset): + # Compression only works for non-scalar Dataset objects + # Scalar objects dont have a shape defined + if not object.compression and object.shape not in [None, ()]: + new_dataset = output_h5.create_dataset(name, data=object, compression=compression) + copy_attributes(object, new_dataset) + else: + output_h5.copy(object, name) + else: + raise NotImplementedError(f"Could not copy element {name}, " + f"type has not been implemented yet: {type(object)}") + + with H5File(input_path, 'r') as input_h5, H5File(output_path, 'w', userblock_size=512) as output_h5: + copy_attributes(input_h5, output_h5) + input_h5.visititems(partial(visit_path, output_h5, compression)) + + with open(input_path, "rb") as input_bytes: + # Mudata puts metadata like this in the first 512 bytes: + # MuData (format-version=0.1.0;creator=muon;creator-version=0.2.0) + # See mudata/_core/io.py, read_h5mu() function + starting_metadata = input_bytes.read(100) + # The metadata is padded with extra null bytes up until 512 bytes + truncate_location = starting_metadata.find(b"\x00") + starting_metadata = starting_metadata[:truncate_location] + with open(output_path, "br+") as f: + nbytes = f.write(starting_metadata) + f.write(b"\0" * (512 - nbytes)) diff --git a/target/native/compression/tar_extract/.config.vsh.yaml b/target/native/compression/tar_extract/.config.vsh.yaml new file mode 100644 index 00000000000..a8d02187d2b --- /dev/null +++ b/target/native/compression/tar_extract/.config.vsh.yaml @@ -0,0 +1,106 @@ +functionality: + name: "tar_extract" + namespace: "compression" + version: "0.12.4" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input file" + info: null + example: + - "input.tar.gz" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Folder to restore file(s) to." + info: null + example: + - "output_folder" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--strip_components" + alternatives: + - "-s" + description: "Strip this amount of leading components from file names on extraction.\ + \ For example, to extract only 'myfile.txt' from an archive containing the structure\ + \ `this/goes/deep/myfile.txt', use 3 to strip 'this/goes/deep/'." + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--exclude" + alternatives: + - "-e" + description: "Prevents any file or member whose name matches the shell wildcard\ + \ (pattern) from being extracted." + info: null + example: + - "docs/figures" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "bash_script" + path: "script.sh" + is_executable: true + description: "Extract files from a tar archive" + test_resources: + - type: "bash_script" + path: "test.sh" + is_executable: true + - type: "file" + path: "../../../LICENSE" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "native" + id: "native" +- type: "docker" + id: "docker" + image: "ubuntu:latest" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + entrypoint: [] + cmd: null +info: + config: "/home/runner/work/openpipeline/openpipeline/src/compression/tar_extract/config.vsh.yaml" + platform: "native" + output: "/home/runner/work/openpipeline/openpipeline/target/native/compression/tar_extract" + executable: "/home/runner/work/openpipeline/openpipeline/target/native/compression/tar_extract/tar_extract" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/native/compression/tar_extract/tar_extract b/target/native/compression/tar_extract/tar_extract new file mode 100755 index 00000000000..bd4457f7d71 --- /dev/null +++ b/target/native/compression/tar_extract/tar_extract @@ -0,0 +1,514 @@ +#!/usr/bin/env bash + +# tar_extract 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="tar_extract" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "tar_extract 0.12.4" + echo "" + echo "Extract files from a tar archive" + echo "" + echo "Arguments:" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " example: input.tar.gz" + echo " Input file" + echo "" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " example: output_folder" + echo " Folder to restore file(s) to." + echo "" + echo " -s, --strip_components" + echo " type: integer" + echo " example: 1" + echo " Strip this amount of leading components from file names on extraction." + echo " For example, to extract only 'myfile.txt' from an archive containing the" + echo " structure \`this/goes/deep/myfile.txt', use 3 to strip 'this/goes/deep/'." + echo "" + echo " -e, --exclude" + echo " type: string" + echo " example: docs/figures" + echo " Prevents any file or member whose name matches the shell wildcard" + echo " (pattern) from being extracted." +} + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "tar_extract 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --strip_components) + [ -n "$VIASH_PAR_STRIP_COMPONENTS" ] && ViashError Bad arguments for option \'--strip_components\': \'$VIASH_PAR_STRIP_COMPONENTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRIP_COMPONENTS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --strip_components. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --strip_components=*) + [ -n "$VIASH_PAR_STRIP_COMPONENTS" ] && ViashError Bad arguments for option \'--strip_components=*\': \'$VIASH_PAR_STRIP_COMPONENTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRIP_COMPONENTS=$(ViashRemoveFlags "$1") + shift 1 + ;; + -s) + [ -n "$VIASH_PAR_STRIP_COMPONENTS" ] && ViashError Bad arguments for option \'-s\': \'$VIASH_PAR_STRIP_COMPONENTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_STRIP_COMPONENTS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -s. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --exclude) + [ -n "$VIASH_PAR_EXCLUDE" ] && ViashError Bad arguments for option \'--exclude\': \'$VIASH_PAR_EXCLUDE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EXCLUDE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --exclude. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --exclude=*) + [ -n "$VIASH_PAR_EXCLUDE" ] && ViashError Bad arguments for option \'--exclude=*\': \'$VIASH_PAR_EXCLUDE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EXCLUDE=$(ViashRemoveFlags "$1") + shift 1 + ;; + -e) + [ -n "$VIASH_PAR_EXCLUDE" ] && ViashError Bad arguments for option \'-e\': \'$VIASH_PAR_EXCLUDE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EXCLUDE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -e. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_STRIP_COMPONENTS" ]]; then + if ! [[ "$VIASH_PAR_STRIP_COMPONENTS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--strip_components' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +ViashDebug "Running command: bash" +cat << VIASHEOF | bash +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-tar_extract-XXXXXX").sh +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +#!/usr/bin/env bash + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_PAR_STRIP_COMPONENTS+x} ]; then echo "${VIASH_PAR_STRIP_COMPONENTS}" | sed "s#'#'\"'\"'#g;s#.*#par_strip_components='&'#" ; else echo "# par_strip_components="; fi ) +$( if [ ! -z ${VIASH_PAR_EXCLUDE+x} ]; then echo "${VIASH_PAR_EXCLUDE}" | sed "s#'#'\"'\"'#g;s#.*#par_exclude='&'#" ; else echo "# par_exclude="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) + +## VIASH END + +extra_params=() +mkdir -p \$par_output # Create output directory if it doesn't exist already + +if [ "\$par_strip_components" != "" ]; then + extra_params+=("--strip-components=\$par_strip_components") +fi + +if [ "\$par_exclude" != "" ]; then + extra_params+=("--exclude=\$par_exclude") +fi + +echo "Extracting \$par_input to \$par_output..." +echo "" +tar "\${extra_params[@]}" -xvf "\$par_input" -C "\$par_output" +VIASHMAIN +bash "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/native/dataflow/concat/.config.vsh.yaml b/target/native/dataflow/concat/.config.vsh.yaml new file mode 100644 index 00000000000..c5c31e51ea8 --- /dev/null +++ b/target/native/dataflow/concat/.config.vsh.yaml @@ -0,0 +1,222 @@ +functionality: + name: "concat" + namespace: "dataflow" + version: "0.12.4" + authors: + - name: "Dries Schaumont" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Paths to the different samples to be concatenated." + info: null + example: + - "sample_paths" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: true + multiple_sep: "," + dest: "par" + - type: "string" + name: "--input_id" + description: "Names of the different samples that have to be concatenated. Must\ + \ be specified when using '--mode move'.\nIn this case, the ids will be used\ + \ for the columns names of the dataframes registring the conflicts.\nIf specified,\ + \ must be of same length as `--input`.\n" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: "," + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_sample_name" + description: "Name of the .obs key under which to add the sample names." + info: null + default: + - "sample_id" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--other_axis_mode" + description: "How to handle the merging of other axis (var, obs, ...).\n\n - None:\ + \ keep no data\n - same: only keep elements of the matrices which are the same\ + \ in each of the samples\n - unique: only keep elements for which there is only\ + \ 1 possible value (1 value that can occur in multiple samples)\n - first: keep\ + \ the annotation from the first sample\n - only: keep elements that show up\ + \ in only one of the objects (1 unique element in only 1 sample)\n - move: identical\ + \ to 'same', but moving the conflicting values to .varm or .obsm\n" + info: null + default: + - "move" + required: false + choices: + - "same" + - "unique" + - "first" + - "only" + - "concat" + - "move" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Concatenates several uni-modal samples in .h5mu files into a single\ + \ file.\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/concat_test_data/e18_mouse_brain_fresh_5k_filtered_feature_bc_matrix_subset_unique_obs.h5mu" + - type: "file" + path: "resources_test/concat_test_data/human_brain_3k_filtered_feature_bc_matrix_subset_unique_obs.h5mu" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "pandas~=2.1.1" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + - "muon" + upgrade: true + entrypoint: [] + cmd: null +- type: "native" + id: "native" +- type: "nextflow" + id: "nextflow" + directives: + label: + - "midcpu" + - "highmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/dataflow/concat/config.vsh.yaml" + platform: "native" + output: "/home/runner/work/openpipeline/openpipeline/target/native/dataflow/concat" + executable: "/home/runner/work/openpipeline/openpipeline/target/native/dataflow/concat/concat" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/native/dataflow/concat/concat b/target/native/dataflow/concat/concat new file mode 100755 index 00000000000..e7aa2a1172f --- /dev/null +++ b/target/native/dataflow/concat/concat @@ -0,0 +1,898 @@ +#!/usr/bin/env bash + +# concat 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Dries Schaumont (maintainer) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="concat" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "concat 0.12.4" + echo "" + echo "Concatenates several uni-modal samples in .h5mu files into a single file." + echo "" + echo "Arguments:" + echo " -i, --input" + echo " type: file, required parameter, multiple values allowed, file must exist" + echo " example: sample_paths" + echo " Paths to the different samples to be concatenated." + echo "" + echo " --input_id" + echo " type: string, multiple values allowed" + echo " Names of the different samples that have to be concatenated. Must be" + echo " specified when using '--mode move'." + echo " In this case, the ids will be used for the columns names of the" + echo " dataframes registring the conflicts." + echo " If specified, must be of same length as \`--input\`." + echo "" + echo " -o, --output" + echo " type: file, output, file must exist" + echo " example: output.h5mu" + echo "" + echo " --output_compression" + echo " type: string" + echo " example: gzip" + echo " choices: [ gzip, lzf ]" + echo " The compression format to be used on the output h5mu object." + echo "" + echo " --obs_sample_name" + echo " type: string" + echo " default: sample_id" + echo " Name of the .obs key under which to add the sample names." + echo "" + echo " --other_axis_mode" + echo " type: string" + echo " default: move" + echo " choices: [ same, unique, first, only, concat, move ]" + echo " How to handle the merging of other axis (var, obs, ...)." + echo " - None: keep no data" + echo " - same: only keep elements of the matrices which are the same in each" + echo " of the samples" + echo " - unique: only keep elements for which there is only 1 possible value" + echo " (1 value that can occur in multiple samples)" + echo " - first: keep the annotation from the first sample" + echo " - only: keep elements that show up in only one of the objects (1 unique" + echo " element in only 1 sample)" + echo " - move: identical to 'same', but moving the conflicting values to .varm" + echo " or .obsm" +} + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "concat 0.12.4" + exit + ;; + --input) + if [ -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT="$2" + else + VIASH_PAR_INPUT="$VIASH_PAR_INPUT,""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + if [ -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + else + VIASH_PAR_INPUT="$VIASH_PAR_INPUT,"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + -i) + if [ -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT="$2" + else + VIASH_PAR_INPUT="$VIASH_PAR_INPUT,""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input_id) + if [ -z "$VIASH_PAR_INPUT_ID" ]; then + VIASH_PAR_INPUT_ID="$2" + else + VIASH_PAR_INPUT_ID="$VIASH_PAR_INPUT_ID,""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_id. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input_id=*) + if [ -z "$VIASH_PAR_INPUT_ID" ]; then + VIASH_PAR_INPUT_ID=$(ViashRemoveFlags "$1") + else + VIASH_PAR_INPUT_ID="$VIASH_PAR_INPUT_ID,"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --obs_sample_name) + [ -n "$VIASH_PAR_OBS_SAMPLE_NAME" ] && ViashError Bad arguments for option \'--obs_sample_name\': \'$VIASH_PAR_OBS_SAMPLE_NAME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBS_SAMPLE_NAME="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_sample_name. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obs_sample_name=*) + [ -n "$VIASH_PAR_OBS_SAMPLE_NAME" ] && ViashError Bad arguments for option \'--obs_sample_name=*\': \'$VIASH_PAR_OBS_SAMPLE_NAME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBS_SAMPLE_NAME=$(ViashRemoveFlags "$1") + shift 1 + ;; + --other_axis_mode) + [ -n "$VIASH_PAR_OTHER_AXIS_MODE" ] && ViashError Bad arguments for option \'--other_axis_mode\': \'$VIASH_PAR_OTHER_AXIS_MODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OTHER_AXIS_MODE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --other_axis_mode. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --other_axis_mode=*) + [ -n "$VIASH_PAR_OTHER_AXIS_MODE" ] && ViashError Bad arguments for option \'--other_axis_mode=*\': \'$VIASH_PAR_OTHER_AXIS_MODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OTHER_AXIS_MODE=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_OBS_SAMPLE_NAME+x} ]; then + VIASH_PAR_OBS_SAMPLE_NAME="sample_id" +fi +if [ -z ${VIASH_PAR_OTHER_AXIS_MODE+x} ]; then + VIASH_PAR_OTHER_AXIS_MODE="move" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ]; then + IFS=',' + set -f + for file in $VIASH_PAR_INPUT; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +if [ ! -z "$VIASH_PAR_OTHER_AXIS_MODE" ]; then + VIASH_PAR_OTHER_AXIS_MODE_CHOICES=("same:unique:first:only:concat:move") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OTHER_AXIS_MODE_CHOICES[*]}:" =~ ":$VIASH_PAR_OTHER_AXIS_MODE:" ]]; then + ViashError '--other_axis_mode' specified value of \'$VIASH_PAR_OTHER_AXIS_MODE\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +ViashDebug "Running command: bash" +cat << VIASHEOF | bash +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-concat-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +from __future__ import annotations +import sys +import anndata +import mudata as mu +import pandas as pd +import numpy as np +from collections.abc import Iterable +from multiprocessing import Pool +from pathlib import Path +from h5py import File as H5File +from typing import Literal +import shutil + +### VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'.split(',')"; else echo None; fi ), + 'input_id': $( if [ ! -z ${VIASH_PAR_INPUT_ID+x} ]; then echo "r'${VIASH_PAR_INPUT_ID//\'/\'\"\'\"r\'}'.split(',')"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'obs_sample_name': $( if [ ! -z ${VIASH_PAR_OBS_SAMPLE_NAME+x} ]; then echo "r'${VIASH_PAR_OBS_SAMPLE_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'other_axis_mode': $( if [ ! -z ${VIASH_PAR_OTHER_AXIS_MODE+x} ]; then echo "r'${VIASH_PAR_OTHER_AXIS_MODE//\'/\'\"\'\"r\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +### VIASH END + +sys.path.append(meta["resources_dir"]) + +# START TEMPORARY WORKAROUND compress_h5mu +# reason: resources aren't available when using Nextflow fusion + +# from compress_h5mu import compress_h5mu +from h5py import Group, Dataset +from typing import Union +from functools import partial + +def compress_h5mu(input_path: Union[str, Path], + output_path: Union[str, Path], + compression: Union[Literal['gzip'], Literal['lzf']]): + input_path, output_path = str(input_path), str(output_path) + + def copy_attributes(in_object, out_object): + for key, value in in_object.attrs.items(): + out_object.attrs[key] = value + + def visit_path(output_h5: H5File, + compression: Union[Literal['gzip'], Literal['lzf']], + name: str, object: Union[Group, Dataset]): + if isinstance(object, Group): + new_group = output_h5.create_group(name) + copy_attributes(object, new_group) + elif isinstance(object, Dataset): + # Compression only works for non-scalar Dataset objects + # Scalar objects dont have a shape defined + if not object.compression and object.shape not in [None, ()]: + new_dataset = output_h5.create_dataset(name, data=object, compression=compression) + copy_attributes(object, new_dataset) + else: + output_h5.copy(object, name) + else: + raise NotImplementedError(f"Could not copy element {name}, " + f"type has not been implemented yet: {type(object)}") + + with H5File(input_path, 'r') as input_h5, H5File(output_path, 'w', userblock_size=512) as output_h5: + copy_attributes(input_h5, output_h5) + input_h5.visititems(partial(visit_path, output_h5, compression)) + + with open(input_path, "rb") as input_bytes: + # Mudata puts metadata like this in the first 512 bytes: + # MuData (format-version=0.1.0;creator=muon;creator-version=0.2.0) + # See mudata/_core/io.py, read_h5mu() function + starting_metadata = input_bytes.read(100) + # The metadata is padded with extra null bytes up until 512 bytes + truncate_location = starting_metadata.find(b"\\x00") + starting_metadata = starting_metadata[:truncate_location] + with open(output_path, "br+") as f: + nbytes = f.write(starting_metadata) + f.write(b"\\0" * (512 - nbytes)) +# END TEMPORARY WORKAROUND compress_h5mu + +# START TEMPORARY WORKAROUND setup_logger +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +def indexes_unique(indices: Iterable[pd.Index]) -> bool: + combined_indices = indices[0].append(indices[1:]) + return combined_indices.is_unique + +def check_observations_unique(samples: Iterable[anndata.AnnData]) -> None: + observation_ids = [sample.obs.index for sample in samples] + if not indexes_unique(observation_ids): + raise ValueError("Observations are not unique across samples.") + + +def nunique(row): + unique = pd.unique(row) + unique_without_na = pd.core.dtypes.missing.remove_na_arraylike(unique) + return len(unique_without_na) > 1 + +def any_row_contains_duplicate_values(n_processes: int, frame: pd.DataFrame) -> bool: + """ + Check if any row contains duplicate values, that are not NA. + """ + numpy_array = frame.to_numpy() + with Pool(n_processes) as pool: + is_duplicated = pool.map(nunique, iter(numpy_array)) + return any(is_duplicated) + +def concatenate_matrices(n_processes: int, matrices: dict[str, pd.DataFrame], align_to: pd.Index | None) \\ + -> tuple[dict[str, pd.DataFrame], pd.DataFrame | None, dict[str, pd.core.dtypes.dtypes.Dtype]]: + """ + Merge matrices by combining columns that have the same name. + Columns that contain conflicting values (e.i. the columns have different values), + are not merged, but instead moved to a new dataframe. + """ + column_names = set(column_name for var in matrices.values() for column_name in var) + logger.debug('Trying to concatenate columns: %s.', ",".join(column_names)) + if not column_names: + return {}, pd.DataFrame(index=align_to) + conflicts, concatenated_matrix = \\ + split_conflicts_and_concatenated_columns(n_processes, + matrices, + column_names, + align_to) + concatenated_matrix = cast_to_writeable_dtype(concatenated_matrix) + conflicts = {conflict_name: cast_to_writeable_dtype(conflict_df) + for conflict_name, conflict_df in conflicts.items()} + return conflicts, concatenated_matrix + +def get_first_non_na_value_vector(df): + numpy_arr = df.to_numpy() + n_rows, n_cols = numpy_arr.shape + col_index = pd.isna(numpy_arr).argmin(axis=1) + flat_index = n_cols * np.arange(n_rows) + col_index + return pd.Series(numpy_arr.ravel()[flat_index], index=df.index, name=df.columns[0]) + +def split_conflicts_and_concatenated_columns(n_processes: int, + matrices: dict[str, pd.DataFrame], + column_names: Iterable[str], + align_to: pd.Index | None = None) -> \\ + tuple[dict[str, pd.DataFrame], pd.DataFrame]: + """ + Retrieve columns with the same name from a list of dataframes which are + identical across all the frames (ignoring NA values). + Columns which are not the same are regarded as 'conflicts', + which are stored in seperate dataframes, one per columns + with the same name that store conflicting values. + """ + conflicts = {} + concatenated_matrix = [] + for column_name in column_names: + columns = {input_id: var[column_name] + for input_id, var in matrices.items() + if column_name in var} + assert columns, "Some columns should have been found." + concatenated_columns = pd.concat(columns.values(), axis=1, + join="outer", sort=False) + if any_row_contains_duplicate_values(n_processes, concatenated_columns): + concatenated_columns.columns = columns.keys() # Use the sample id as column name + if align_to is not None: + concatenated_columns = concatenated_columns.reindex(align_to, copy=False) + conflicts[f'conflict_{column_name}'] = concatenated_columns + else: + unique_values = get_first_non_na_value_vector(concatenated_columns) + concatenated_matrix.append(unique_values) + if not concatenated_matrix: + return conflicts, pd.DataFrame(index=align_to) + concatenated_matrix = pd.concat(concatenated_matrix, join="outer", + axis=1, sort=False) + if align_to is not None: + concatenated_matrix = concatenated_matrix.reindex(align_to, copy=False) + return conflicts, concatenated_matrix + +def cast_to_writeable_dtype(result: pd.DataFrame) -> pd.DataFrame: + """ + Cast the dataframe to dtypes that can be written by mudata. + """ + # dtype inferral workfs better with np.nan + result = result.replace({pd.NA: np.nan}) + + # MuData supports nullable booleans and ints + # ie. \`IntegerArray\` and \`BooleanArray\` + result = result.convert_dtypes(infer_objects=True, + convert_integer=True, + convert_string=False, + convert_boolean=True, + convert_floating=False) + + # Convert leftover 'object' columns to string + # However, na values are supported, so convert all values except NA's to string + object_cols = result.select_dtypes(include='object').columns.values + for obj_col in object_cols: + result[obj_col] = result[obj_col].where(result[obj_col].isna(), result[obj_col].astype(str)).astype('category') + return result + +def split_conflicts_modalities(n_processes: int, samples: dict[str, anndata.AnnData], output: anndata.AnnData) \\ + -> anndata.AnnData: + """ + Merge .var and .obs matrices of the anndata objects. Columns are merged + when the values (excl NA) are the same in each of the matrices. + Conflicting columns are moved to a separate dataframe (one dataframe for each column, + containing all the corresponding column from each sample). + """ + matrices_to_parse = ("var", "obs") + for matrix_name in matrices_to_parse: + matrices = {sample_id: getattr(sample, matrix_name) for sample_id, sample in samples.items()} + output_index = getattr(output, matrix_name).index + align_to = output_index if matrix_name == "var" else None + conflicts, concatenated_matrix = concatenate_matrices(n_processes, matrices, align_to) + if concatenated_matrix.empty: + concatenated_matrix.index = output_index + # Write the conflicts to the output + for conflict_name, conflict_data in conflicts.items(): + getattr(output, f"{matrix_name}m")[conflict_name] = conflict_data + + # Set other annotation matrices in the output + setattr(output, matrix_name, concatenated_matrix) + + return output + + +def concatenate_modality(n_processes: int, mod: str, input_files: Iterable[str | Path], + other_axis_mode: str, input_ids: tuple[str]) -> anndata.AnnData: + + concat_modes = { + "move": None, + } + other_axis_mode_to_apply = concat_modes.get(other_axis_mode, other_axis_mode) + + mod_data = {} + for input_id, input_file in zip(input_ids, input_files): + try: + mod_data[input_id] = mu.read_h5ad(input_file, mod=mod) + except KeyError as e: # Modality does not exist for this sample, skip it + if f"Unable to open object '{mod}' doesn't exist" not in str(e): + raise e + pass + check_observations_unique(mod_data.values()) + + concatenated_data = anndata.concat(mod_data.values(), join='outer', merge=other_axis_mode_to_apply) + + if other_axis_mode == "move": + concatenated_data = split_conflicts_modalities(n_processes, mod_data, concatenated_data) + + return concatenated_data + +def concatenate_modalities(n_processes: int, modalities: list[str], input_files: Path | str, + other_axis_mode: str, output_file: Path | str, + compression: Literal['gzip'] | Literal['lzf'], + input_ids: tuple[str] | None = None) -> None: + """ + Join the modalities together into a single multimodal sample. + """ + logger.info('Concatenating samples.') + output_file, input_files = Path(output_file), [Path(input_file) for input_file in input_files] + output_file_uncompressed = output_file.with_name(output_file.stem + "_uncompressed.h5mu") + output_file_uncompressed.touch() + # Create empty mudata file + mdata = mu.MuData({modality: anndata.AnnData() for modality in modalities}) + mdata.write(output_file_uncompressed, compression=compression) + + for mod_name in modalities: + new_mod = concatenate_modality(n_processes, mod_name, + input_files, other_axis_mode, + input_ids) + logger.info("Writing out modality '%s' to '%s' with compression '%s'.", + mod_name, output_file_uncompressed, compression) + mu.write_h5ad(output_file_uncompressed, data=new_mod, mod=mod_name) + + if compression: + compress_h5mu(output_file_uncompressed, output_file, compression=compression) + output_file_uncompressed.unlink() + else: + shutil.move(output_file_uncompressed, output_file) + + logger.info("Concatenation successful.") + +def main() -> None: + # Get a list of all possible modalities + mods = set() + for path in par["input"]: + try: + with H5File(path, 'r') as f_root: + mods = mods | set(f_root["mod"].keys()) + except OSError: + raise OSError(f"Failed to load {path}. Is it a valid h5 file?") + + input_ids = None + if par["input_id"]: + input_ids: tuple[str] = tuple(i.strip() for i in par["input_id"]) + if len(input_ids) != len(par["input"]): + raise ValueError("The number of sample names must match the number of sample files.") + + if len(set(input_ids)) != len(input_ids): + raise ValueError("The sample names should be unique.") + + logger.info("\\nConcatenating data from paths:\\n\\t%s", + "\\n\\t".join(par["input"])) + + if par["other_axis_mode"] == "move" and not input_ids: + raise ValueError("--mode 'move' requires --input_ids.") + + n_processes = meta["cpus"] if meta["cpus"] else 1 + concatenate_modalities(n_processes, + list(mods), + par["input"], + par["other_axis_mode"], + par["output"], + par["output_compression"], + input_ids=input_ids) + + +if __name__ == "__main__": + main() +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/native/dataflow/concat/setup_logger.py b/target/native/dataflow/concat/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/native/dataflow/concat/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/native/dataflow/merge/.config.vsh.yaml b/target/native/dataflow/merge/.config.vsh.yaml new file mode 100644 index 00000000000..31a20481cd7 --- /dev/null +++ b/target/native/dataflow/merge/.config.vsh.yaml @@ -0,0 +1,175 @@ +functionality: + name: "merge" + namespace: "dataflow" + version: "0.12.4" + authors: + - name: "Dries Schaumont" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Paths to the single-modality .h5mu files that need to be combined" + info: null + default: + - "sample_paths" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: true + multiple_sep: "," + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Path to the output file." + info: null + default: + - "output.h5mu" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Combine one or more single-modality .h5mu files together into one\ + \ .h5mu file.\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "../../../resources_test/merge_test_data/pbmc_1k_protein_v3_filtered_feature_bc_matrix_rna.h5mu" + - type: "file" + path: "../../../resources_test/merge_test_data/pbmc_1k_protein_v3_filtered_feature_bc_matrix_prot.h5mu" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "pandas~=2.0.0" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "native" + id: "native" +- type: "nextflow" + id: "nextflow" + directives: + label: + - "singlecpu" + - "highmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/dataflow/merge/config.vsh.yml" + platform: "native" + output: "/home/runner/work/openpipeline/openpipeline/target/native/dataflow/merge" + executable: "/home/runner/work/openpipeline/openpipeline/target/native/dataflow/merge/merge" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/native/dataflow/merge/merge b/target/native/dataflow/merge/merge new file mode 100755 index 00000000000..d4173d9a2d0 --- /dev/null +++ b/target/native/dataflow/merge/merge @@ -0,0 +1,563 @@ +#!/usr/bin/env bash + +# merge 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Dries Schaumont (maintainer) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="merge" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "merge 0.12.4" + echo "" + echo "Combine one or more single-modality .h5mu files together into one .h5mu file." + echo "" + echo "Arguments:" + echo " -i, --input" + echo " type: file, required parameter, multiple values allowed, file must exist" + echo " default: sample_paths" + echo " Paths to the single-modality .h5mu files that need to be combined" + echo "" + echo " -o, --output" + echo " type: file, output, file must exist" + echo " default: output.h5mu" + echo " Path to the output file." + echo "" + echo " --output_compression" + echo " type: string" + echo " example: gzip" + echo " choices: [ gzip, lzf ]" + echo " The compression format to be used on the output h5mu object." +} + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "merge 0.12.4" + exit + ;; + --input) + if [ -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT="$2" + else + VIASH_PAR_INPUT="$VIASH_PAR_INPUT,""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + if [ -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + else + VIASH_PAR_INPUT="$VIASH_PAR_INPUT,"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + -i) + if [ -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT="$2" + else + VIASH_PAR_INPUT="$VIASH_PAR_INPUT,""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + VIASH_PAR_OUTPUT="output.h5mu" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ]; then + IFS=',' + set -f + for file in $VIASH_PAR_INPUT; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +ViashDebug "Running command: bash" +cat << VIASHEOF | bash +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-merge-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +from __future__ import annotations +import sys +import mudata as md +import pandas as pd +import numpy as np + + +### VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'.split(',')"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +### VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +def main(): + logger.info('Reading input files %s', ",".join(par["input"])) + input_samples = [md.read_h5mu(path) for path in par["input"]] + + logger.info('Merging into single object.') + sample_modalities = {} + for input_sample in input_samples: + for mod_name, mod_data in input_sample.mod.items(): + if mod_name in sample_modalities: + raise ValueError(f"Modality '{mod_name}' was found in more than 1 sample.") + sample_modalities[mod_name] = mod_data + + merged = md.MuData(sample_modalities) + merged.update() + for df_attr in ("var", "obs"): + df = getattr(merged, df_attr) + df = df.replace({pd.NA: np.nan}, inplace=False) + + # MuData supports nullable booleans and ints + # ie. \`IntegerArray\` and \`BooleanArray\` + df = df.convert_dtypes(infer_objects=True, + convert_integer=True, + convert_string=False, + convert_boolean=True, + convert_floating=False) + + # Convert leftover 'object' columns to string + object_cols = df.select_dtypes(include='object').columns.values + for obj_col in object_cols: + df[obj_col].astype(str).astype('category') + setattr(merged, df_attr, df) + + merged.write_h5mu(par["output"], compression=par["output_compression"]) + logger.info('Finished') + + +if __name__ == '__main__': + main() +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/native/dataflow/merge/setup_logger.py b/target/native/dataflow/merge/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/native/dataflow/merge/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/native/dataflow/split_modalities/.config.vsh.yaml b/target/native/dataflow/split_modalities/.config.vsh.yaml new file mode 100644 index 00000000000..0db6e52f07f --- /dev/null +++ b/target/native/dataflow/split_modalities/.config.vsh.yaml @@ -0,0 +1,214 @@ +functionality: + name: "split_modalities" + namespace: "dataflow" + version: "0.12.4" + authors: + - name: "Dries Schaumont" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + - name: "Robrecht Cannoodt" + roles: + - "contributor" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Path to a single .h5mu file." + info: null + default: + - "sample_path" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output directory containing multiple h5mu files." + info: null + example: + - "/path/to/output" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output_types" + description: "A csv containing the base filename and modality type per output\ + \ file." + info: null + example: + - "types.csv" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--compression" + description: "The compression format to be used on the final h5mu object." + info: null + default: + - "gzip" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Split the modalities from a single .h5mu multimodal sample into seperate\ + \ .h5mu files. \n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "native" + id: "native" +- type: "nextflow" + id: "nextflow" + directives: + label: + - "singlecpu" + - "lowmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/dataflow/split_modalities/config.vsh.yaml" + platform: "native" + output: "/home/runner/work/openpipeline/openpipeline/target/native/dataflow/split_modalities" + executable: "/home/runner/work/openpipeline/openpipeline/target/native/dataflow/split_modalities/split_modalities" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/native/dataflow/split_modalities/setup_logger.py b/target/native/dataflow/split_modalities/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/native/dataflow/split_modalities/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/native/dataflow/split_modalities/split_modalities b/target/native/dataflow/split_modalities/split_modalities new file mode 100755 index 00000000000..88ccf44939f --- /dev/null +++ b/target/native/dataflow/split_modalities/split_modalities @@ -0,0 +1,586 @@ +#!/usr/bin/env bash + +# split_modalities 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Dries Schaumont (maintainer) +# * Robrecht Cannoodt (contributor) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="split_modalities" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "split_modalities 0.12.4" + echo "" + echo "Split the modalities from a single .h5mu multimodal sample into seperate .h5mu" + echo "files." + echo "" + echo "Arguments:" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " default: sample_path" + echo " Path to a single .h5mu file." + echo "" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " example: /path/to/output" + echo " Output directory containing multiple h5mu files." + echo "" + echo " --output_compression" + echo " type: string" + echo " example: gzip" + echo " choices: [ gzip, lzf ]" + echo " The compression format to be used on the output h5mu object." + echo "" + echo " --output_types" + echo " type: file, required parameter, output, file must exist" + echo " example: types.csv" + echo " A csv containing the base filename and modality type per output file." + echo "" + echo " --compression" + echo " type: string" + echo " default: gzip" + echo " The compression format to be used on the final h5mu object." +} + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "split_modalities 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_types) + [ -n "$VIASH_PAR_OUTPUT_TYPES" ] && ViashError Bad arguments for option \'--output_types\': \'$VIASH_PAR_OUTPUT_TYPES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_TYPES="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_types. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_types=*) + [ -n "$VIASH_PAR_OUTPUT_TYPES" ] && ViashError Bad arguments for option \'--output_types=*\': \'$VIASH_PAR_OUTPUT_TYPES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_TYPES=$(ViashRemoveFlags "$1") + shift 1 + ;; + --compression) + [ -n "$VIASH_PAR_COMPRESSION" ] && ViashError Bad arguments for option \'--compression\': \'$VIASH_PAR_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --compression=*) + [ -n "$VIASH_PAR_COMPRESSION" ] && ViashError Bad arguments for option \'--compression=*\': \'$VIASH_PAR_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT_TYPES+x} ]; then + ViashError '--output_types' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_COMPRESSION+x} ]; then + VIASH_PAR_COMPRESSION="gzip" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi +if [ ! -z "$VIASH_PAR_OUTPUT_TYPES" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_TYPES")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_TYPES")" +fi + +ViashDebug "Running command: bash" +cat << VIASHEOF | bash +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-split_modalities-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +from __future__ import annotations +import sys +import mudata as md +from sys import stdout +from pathlib import Path +import pandas as pd + +### VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_types': $( if [ ! -z ${VIASH_PAR_OUTPUT_TYPES+x} ]; then echo "r'${VIASH_PAR_OUTPUT_TYPES//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'compression': $( if [ ! -z ${VIASH_PAR_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +### VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +def main() -> None: + output_dir = Path(par["output"]) + if not output_dir.is_dir(): + output_dir.mkdir(parents=True) + + logger.info('Reading input file %s', par['input']) + sample = md.read_h5mu(par["input"].strip()) + input_file = Path(par["input"]) + + logger.info('Creating output types csv') + + names = {mod_name: f"{input_file.stem}_{mod_name}.h5mu" + for mod_name in sample.mod.keys() } + df = pd.DataFrame({"name": list(names.keys()), "filename": list(names.values())}) + df.to_csv(par["output_types"], index=False) + + logger.info('Splitting up modalities %s', ", ".join(sample.mod.keys())) + for mod_name, mod in sample.mod.items(): + new_sample = md.MuData({mod_name: mod}) + logger.info('Writing to %s', names[mod_name]) + new_sample.write_h5mu(output_dir / names[mod_name], compression=par["output_compression"]) + + logger.info("Finished") + + +if __name__ == "__main__": + main() +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_OUTPUT_TYPES" ] && [ ! -e "$VIASH_PAR_OUTPUT_TYPES" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT_TYPES' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/native/download/sync_test_resources/.config.vsh.yaml b/target/native/download/sync_test_resources/.config.vsh.yaml new file mode 100644 index 00000000000..abb700f24ee --- /dev/null +++ b/target/native/download/sync_test_resources/.config.vsh.yaml @@ -0,0 +1,170 @@ +functionality: + name: "sync_test_resources" + namespace: "download" + version: "0.12.4" + authors: + - name: "Robrecht Cannoodt" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + arguments: + - type: "string" + name: "--input" + alternatives: + - "-i" + description: "Path to the S3 bucket to sync from." + info: null + default: + - "s3://openpipelines-data" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Path to the test resource directory." + info: null + default: + - "resources_test" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--quiet" + description: "Displays the operations that would be performed using the specified\ + \ command without actually running them." + info: null + direction: "input" + dest: "par" + - type: "boolean_true" + name: "--dryrun" + description: "Does not display the operations performed from the specified command." + info: null + direction: "input" + dest: "par" + - type: "boolean_true" + name: "--delete" + description: "Files that exist in the destination but not in the source are deleted\ + \ during sync." + info: null + direction: "input" + dest: "par" + - type: "string" + name: "--exclude" + description: "Exclude all files or objects from the command that matches the specified\ + \ pattern." + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + resources: + - type: "bash_script" + path: "script.sh" + is_executable: true + description: "Synchronise the test resources from s3://openpipelines-data to resources_test" + usage: "sync_test_resources\nsync_test_resources --input s3://openpipelines-data\ + \ --output resources_test\n" + test_resources: + - type: "bash_script" + path: "run_test.sh" + is_executable: true + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "amazon/aws-cli:2.11.0" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "yum" + packages: + - "procps" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/download/sync_test_resources/config.vsh.yaml" + platform: "native" + output: "/home/runner/work/openpipeline/openpipeline/target/native/download/sync_test_resources" + executable: "/home/runner/work/openpipeline/openpipeline/target/native/download/sync_test_resources/sync_test_resources" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/native/download/sync_test_resources/sync_test_resources b/target/native/download/sync_test_resources/sync_test_resources new file mode 100755 index 00000000000..c4f466060d6 --- /dev/null +++ b/target/native/download/sync_test_resources/sync_test_resources @@ -0,0 +1,557 @@ +#!/usr/bin/env bash + +# sync_test_resources 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Robrecht Cannoodt (maintainer) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="sync_test_resources" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "sync_test_resources 0.12.4" + echo "" + echo "Synchronise the test resources from s3://openpipelines-data to resources_test" + echo "" + echo "Usage:" + echo "sync_test_resources" + echo "sync_test_resources --input s3://openpipelines-data --output resources_test" + echo "" + echo "Arguments:" + echo " -i, --input" + echo " type: string" + echo " default: s3://openpipelines-data" + echo " Path to the S3 bucket to sync from." + echo "" + echo " -o, --output" + echo " type: file, output, file must exist" + echo " default: resources_test" + echo " Path to the test resource directory." + echo "" + echo " --quiet" + echo " type: boolean_true" + echo " Displays the operations that would be performed using the specified" + echo " command without actually running them." + echo "" + echo " --dryrun" + echo " type: boolean_true" + echo " Does not display the operations performed from the specified command." + echo "" + echo " --delete" + echo " type: boolean_true" + echo " Files that exist in the destination but not in the source are deleted" + echo " during sync." + echo "" + echo " --exclude" + echo " type: string, multiple values allowed" + echo " Exclude all files or objects from the command that matches the specified" + echo " pattern." +} + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "sync_test_resources 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --quiet) + [ -n "$VIASH_PAR_QUIET" ] && ViashError Bad arguments for option \'--quiet\': \'$VIASH_PAR_QUIET\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUIET=true + shift 1 + ;; + --dryrun) + [ -n "$VIASH_PAR_DRYRUN" ] && ViashError Bad arguments for option \'--dryrun\': \'$VIASH_PAR_DRYRUN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DRYRUN=true + shift 1 + ;; + --delete) + [ -n "$VIASH_PAR_DELETE" ] && ViashError Bad arguments for option \'--delete\': \'$VIASH_PAR_DELETE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DELETE=true + shift 1 + ;; + --exclude) + if [ -z "$VIASH_PAR_EXCLUDE" ]; then + VIASH_PAR_EXCLUDE="$2" + else + VIASH_PAR_EXCLUDE="$VIASH_PAR_EXCLUDE:""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --exclude. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --exclude=*) + if [ -z "$VIASH_PAR_EXCLUDE" ]; then + VIASH_PAR_EXCLUDE=$(ViashRemoveFlags "$1") + else + VIASH_PAR_EXCLUDE="$VIASH_PAR_EXCLUDE:"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_INPUT+x} ]; then + VIASH_PAR_INPUT="s3://openpipelines-data" +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + VIASH_PAR_OUTPUT="resources_test" +fi +if [ -z ${VIASH_PAR_QUIET+x} ]; then + VIASH_PAR_QUIET="false" +fi +if [ -z ${VIASH_PAR_DRYRUN+x} ]; then + VIASH_PAR_DRYRUN="false" +fi +if [ -z ${VIASH_PAR_DELETE+x} ]; then + VIASH_PAR_DELETE="false" +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_QUIET" ]]; then + if ! [[ "$VIASH_PAR_QUIET" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--quiet' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_DRYRUN" ]]; then + if ! [[ "$VIASH_PAR_DRYRUN" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--dryrun' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_DELETE" ]]; then + if ! [[ "$VIASH_PAR_DELETE" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--delete' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +ViashDebug "Running command: bash" +cat << VIASHEOF | bash +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-sync_test_resources-XXXXXX").sh +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +#!/bin/bash + +set -eo pipefail + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_PAR_QUIET+x} ]; then echo "${VIASH_PAR_QUIET}" | sed "s#'#'\"'\"'#g;s#.*#par_quiet='&'#" ; else echo "# par_quiet="; fi ) +$( if [ ! -z ${VIASH_PAR_DRYRUN+x} ]; then echo "${VIASH_PAR_DRYRUN}" | sed "s#'#'\"'\"'#g;s#.*#par_dryrun='&'#" ; else echo "# par_dryrun="; fi ) +$( if [ ! -z ${VIASH_PAR_DELETE+x} ]; then echo "${VIASH_PAR_DELETE}" | sed "s#'#'\"'\"'#g;s#.*#par_delete='&'#" ; else echo "# par_delete="; fi ) +$( if [ ! -z ${VIASH_PAR_EXCLUDE+x} ]; then echo "${VIASH_PAR_EXCLUDE}" | sed "s#'#'\"'\"'#g;s#.*#par_exclude='&'#" ; else echo "# par_exclude="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) + +## VIASH END + +extra_params=( ) + +if [ "\$par_quiet" == "true" ]; then + extra_params+=( "--quiet" ) +fi +if [ "\$par_dryrun" == "true" ]; then + extra_params+=( "--dryrun" ) +fi +if [ "\$par_delete" == "true" ]; then + extra_params+=( "--delete" ) +fi + +if [ ! -z \${par_exclude+x} ]; then + IFS=":" + for var in \$par_exclude; do + unset IFS + extra_params+=( "--exclude" "\$var" ) + done +fi + + +# Disable the use of the Amazon EC2 instance metadata service (IMDS). +# see https://florian.ec/blog/github-actions-awscli-errors/ +# or https://github.com/aws/aws-cli/issues/5234#issuecomment-705831465 +export AWS_EC2_METADATA_DISABLED=true + +aws s3 sync "\$par_input" "\$par_output" --no-sign-request "\${extra_params[@]}" +VIASHMAIN +bash "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/native/integrate/scarches/.config.vsh.yaml b/target/native/integrate/scarches/.config.vsh.yaml new file mode 100644 index 00000000000..cbd87585423 --- /dev/null +++ b/target/native/integrate/scarches/.config.vsh.yaml @@ -0,0 +1,331 @@ +functionality: + name: "scarches" + namespace: "integrate" + version: "0.12.4" + authors: + - name: "Vladimir Shitov" + info: + role: "Contributor" + links: + email: "vladimir.shitov@helmholtz-muenchen.de" + github: "vladimirshitov" + orcid: "0000-0002-1960-8812" + linkedin: "vladimir-shitov-9a659513b" + organizations: + - name: "Helmholtz Munich" + href: "https://www.helmholtz-munich.de" + role: "PhD Candidate" + argument_groups: + - name: "Inputs" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input h5mu file to use as a query" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--reference" + alternatives: + - "-r" + description: "Path to the directory with reference model or a web link. For\ + \ HLCA use https://zenodo.org/record/6337966/files/HLCA_reference_model.zip" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--dataset_name" + description: "Name of query dataset to use as a batch name. If not set, name\ + \ of the input file is used" + info: null + default: + - "test_dataset" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Outputs" + arguments: + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output h5mu file." + info: null + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--model_output" + description: "Output directory for model" + info: null + default: + - "model" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obsm_output" + description: "In which .obsm slot to store the resulting integrated embedding." + info: null + default: + - "X_integrated_scanvi" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Early stopping arguments" + arguments: + - type: "boolean" + name: "--early_stopping" + description: "Whether to perform early stopping with respect to the validation\ + \ set." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--early_stopping_monitor" + description: "Metric logged during validation set epoch." + info: null + default: + - "elbo_validation" + required: false + choices: + - "elbo_validation" + - "reconstruction_loss_validation" + - "kl_local_validation" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--early_stopping_patience" + description: "Number of validation epochs with no improvement after which training\ + \ will be stopped." + info: null + default: + - 45 + required: false + min: 1 + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--early_stopping_min_delta" + description: "Minimum change in the monitored quantity to qualify as an improvement,\ + \ i.e. an absolute change of less than min_delta, will count as no improvement." + info: null + default: + - 0.0 + required: false + min: 0.0 + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Learning parameters" + arguments: + - type: "integer" + name: "--max_epochs" + description: "Number of passes through the dataset, defaults to (20000 / number\ + \ of cells) * 400 or 400; whichever is smallest." + info: null + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean" + name: "--reduce_lr_on_plateau" + description: "Whether to monitor validation loss and reduce learning rate when\ + \ validation set `lr_scheduler_metric` plateaus." + info: null + default: + - true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--lr_factor" + description: "Factor to reduce learning rate." + info: null + default: + - 0.6 + required: false + min: 0.0 + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--lr_patience" + description: "Number of epochs with no improvement after which learning rate\ + \ will be reduced." + info: null + default: + - 30.0 + required: false + min: 0.0 + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Performs reference mapping with scArches" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu" + - type: "file" + path: "resources_test/HLCA_reference_model/HLCA_reference_model.zip" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "nvcr.io/nvidia/pytorch:23.09-py3" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "scvi-tools~=1.0.3" + - "pandas~=2.1.0" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highmem" + - "highcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +- type: "native" + id: "native" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/integrate/scarches/config.vsh.yaml" + platform: "native" + output: "/home/runner/work/openpipeline/openpipeline/target/native/integrate/scarches" + executable: "/home/runner/work/openpipeline/openpipeline/target/native/integrate/scarches/scarches" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/native/integrate/scarches/scarches b/target/native/integrate/scarches/scarches new file mode 100755 index 00000000000..6b20b89e070 --- /dev/null +++ b/target/native/integrate/scarches/scarches @@ -0,0 +1,1086 @@ +#!/usr/bin/env bash + +# scarches 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Vladimir Shitov + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="scarches" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "scarches 0.12.4" + echo "" + echo "Performs reference mapping with scArches" + echo "" + echo "Inputs:" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " Input h5mu file to use as a query" + echo "" + echo " --modality" + echo " type: string" + echo " default: rna" + echo "" + echo " -r, --reference" + echo " type: file, required parameter, file must exist" + echo " Path to the directory with reference model or a web link. For HLCA use" + echo " https://zenodo.org/record/6337966/files/HLCA_reference_model.zip" + echo "" + echo " --dataset_name" + echo " type: string" + echo " default: test_dataset" + echo " Name of query dataset to use as a batch name. If not set, name of the" + echo " input file is used" + echo "" + echo "Outputs:" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " Output h5mu file." + echo "" + echo " --output_compression" + echo " type: string" + echo " example: gzip" + echo " choices: [ gzip, lzf ]" + echo " The compression format to be used on the output h5mu object." + echo "" + echo " --model_output" + echo " type: file, output, file must exist" + echo " default: model" + echo " Output directory for model" + echo "" + echo " --obsm_output" + echo " type: string" + echo " default: X_integrated_scanvi" + echo " In which .obsm slot to store the resulting integrated embedding." + echo "" + echo "Early stopping arguments:" + echo " --early_stopping" + echo " type: boolean" + echo " Whether to perform early stopping with respect to the validation set." + echo "" + echo " --early_stopping_monitor" + echo " type: string" + echo " default: elbo_validation" + echo " choices: [ elbo_validation, reconstruction_loss_validation," + echo "kl_local_validation ]" + echo " Metric logged during validation set epoch." + echo "" + echo " --early_stopping_patience" + echo " type: integer" + echo " default: 45" + echo " min: 1" + echo " Number of validation epochs with no improvement after which training" + echo " will be stopped." + echo "" + echo " --early_stopping_min_delta" + echo " type: double" + echo " default: 0.0" + echo " min: 0.0" + echo " Minimum change in the monitored quantity to qualify as an improvement," + echo " i.e. an absolute change of less than min_delta, will count as no" + echo " improvement." + echo "" + echo "Learning parameters:" + echo " --max_epochs" + echo " type: integer, required parameter" + echo " Number of passes through the dataset, defaults to (20000 / number of" + echo " cells) * 400 or 400; whichever is smallest." + echo "" + echo " --reduce_lr_on_plateau" + echo " type: boolean" + echo " default: true" + echo " Whether to monitor validation loss and reduce learning rate when" + echo " validation set \`lr_scheduler_metric\` plateaus." + echo "" + echo " --lr_factor" + echo " type: double" + echo " default: 0.6" + echo " min: 0.0" + echo " Factor to reduce learning rate." + echo "" + echo " --lr_patience" + echo " type: double" + echo " default: 30.0" + echo " min: 0.0" + echo " Number of epochs with no improvement after which learning rate will be" + echo " reduced." +} + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "scarches 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality=*) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --reference) + [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reference=*) + [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference=*\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE=$(ViashRemoveFlags "$1") + shift 1 + ;; + -r) + [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'-r\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -r. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --dataset_name) + [ -n "$VIASH_PAR_DATASET_NAME" ] && ViashError Bad arguments for option \'--dataset_name\': \'$VIASH_PAR_DATASET_NAME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DATASET_NAME="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --dataset_name. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --dataset_name=*) + [ -n "$VIASH_PAR_DATASET_NAME" ] && ViashError Bad arguments for option \'--dataset_name=*\': \'$VIASH_PAR_DATASET_NAME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_DATASET_NAME=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --model_output) + [ -n "$VIASH_PAR_MODEL_OUTPUT" ] && ViashError Bad arguments for option \'--model_output\': \'$VIASH_PAR_MODEL_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODEL_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --model_output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --model_output=*) + [ -n "$VIASH_PAR_MODEL_OUTPUT" ] && ViashError Bad arguments for option \'--model_output=*\': \'$VIASH_PAR_MODEL_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODEL_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --obsm_output) + [ -n "$VIASH_PAR_OBSM_OUTPUT" ] && ViashError Bad arguments for option \'--obsm_output\': \'$VIASH_PAR_OBSM_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBSM_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obsm_output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obsm_output=*) + [ -n "$VIASH_PAR_OBSM_OUTPUT" ] && ViashError Bad arguments for option \'--obsm_output=*\': \'$VIASH_PAR_OBSM_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBSM_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --early_stopping) + [ -n "$VIASH_PAR_EARLY_STOPPING" ] && ViashError Bad arguments for option \'--early_stopping\': \'$VIASH_PAR_EARLY_STOPPING\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EARLY_STOPPING="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --early_stopping. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --early_stopping=*) + [ -n "$VIASH_PAR_EARLY_STOPPING" ] && ViashError Bad arguments for option \'--early_stopping=*\': \'$VIASH_PAR_EARLY_STOPPING\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EARLY_STOPPING=$(ViashRemoveFlags "$1") + shift 1 + ;; + --early_stopping_monitor) + [ -n "$VIASH_PAR_EARLY_STOPPING_MONITOR" ] && ViashError Bad arguments for option \'--early_stopping_monitor\': \'$VIASH_PAR_EARLY_STOPPING_MONITOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EARLY_STOPPING_MONITOR="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --early_stopping_monitor. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --early_stopping_monitor=*) + [ -n "$VIASH_PAR_EARLY_STOPPING_MONITOR" ] && ViashError Bad arguments for option \'--early_stopping_monitor=*\': \'$VIASH_PAR_EARLY_STOPPING_MONITOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EARLY_STOPPING_MONITOR=$(ViashRemoveFlags "$1") + shift 1 + ;; + --early_stopping_patience) + [ -n "$VIASH_PAR_EARLY_STOPPING_PATIENCE" ] && ViashError Bad arguments for option \'--early_stopping_patience\': \'$VIASH_PAR_EARLY_STOPPING_PATIENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EARLY_STOPPING_PATIENCE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --early_stopping_patience. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --early_stopping_patience=*) + [ -n "$VIASH_PAR_EARLY_STOPPING_PATIENCE" ] && ViashError Bad arguments for option \'--early_stopping_patience=*\': \'$VIASH_PAR_EARLY_STOPPING_PATIENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EARLY_STOPPING_PATIENCE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --early_stopping_min_delta) + [ -n "$VIASH_PAR_EARLY_STOPPING_MIN_DELTA" ] && ViashError Bad arguments for option \'--early_stopping_min_delta\': \'$VIASH_PAR_EARLY_STOPPING_MIN_DELTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EARLY_STOPPING_MIN_DELTA="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --early_stopping_min_delta. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --early_stopping_min_delta=*) + [ -n "$VIASH_PAR_EARLY_STOPPING_MIN_DELTA" ] && ViashError Bad arguments for option \'--early_stopping_min_delta=*\': \'$VIASH_PAR_EARLY_STOPPING_MIN_DELTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EARLY_STOPPING_MIN_DELTA=$(ViashRemoveFlags "$1") + shift 1 + ;; + --max_epochs) + [ -n "$VIASH_PAR_MAX_EPOCHS" ] && ViashError Bad arguments for option \'--max_epochs\': \'$VIASH_PAR_MAX_EPOCHS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MAX_EPOCHS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --max_epochs. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --max_epochs=*) + [ -n "$VIASH_PAR_MAX_EPOCHS" ] && ViashError Bad arguments for option \'--max_epochs=*\': \'$VIASH_PAR_MAX_EPOCHS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MAX_EPOCHS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --reduce_lr_on_plateau) + [ -n "$VIASH_PAR_REDUCE_LR_ON_PLATEAU" ] && ViashError Bad arguments for option \'--reduce_lr_on_plateau\': \'$VIASH_PAR_REDUCE_LR_ON_PLATEAU\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REDUCE_LR_ON_PLATEAU="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --reduce_lr_on_plateau. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reduce_lr_on_plateau=*) + [ -n "$VIASH_PAR_REDUCE_LR_ON_PLATEAU" ] && ViashError Bad arguments for option \'--reduce_lr_on_plateau=*\': \'$VIASH_PAR_REDUCE_LR_ON_PLATEAU\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REDUCE_LR_ON_PLATEAU=$(ViashRemoveFlags "$1") + shift 1 + ;; + --lr_factor) + [ -n "$VIASH_PAR_LR_FACTOR" ] && ViashError Bad arguments for option \'--lr_factor\': \'$VIASH_PAR_LR_FACTOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LR_FACTOR="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --lr_factor. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --lr_factor=*) + [ -n "$VIASH_PAR_LR_FACTOR" ] && ViashError Bad arguments for option \'--lr_factor=*\': \'$VIASH_PAR_LR_FACTOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LR_FACTOR=$(ViashRemoveFlags "$1") + shift 1 + ;; + --lr_patience) + [ -n "$VIASH_PAR_LR_PATIENCE" ] && ViashError Bad arguments for option \'--lr_patience\': \'$VIASH_PAR_LR_PATIENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LR_PATIENCE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --lr_patience. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --lr_patience=*) + [ -n "$VIASH_PAR_LR_PATIENCE" ] && ViashError Bad arguments for option \'--lr_patience=*\': \'$VIASH_PAR_LR_PATIENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LR_PATIENCE=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_REFERENCE+x} ]; then + ViashError '--reference' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_MAX_EPOCHS+x} ]; then + ViashError '--max_epochs' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_MODALITY+x} ]; then + VIASH_PAR_MODALITY="rna" +fi +if [ -z ${VIASH_PAR_DATASET_NAME+x} ]; then + VIASH_PAR_DATASET_NAME="test_dataset" +fi +if [ -z ${VIASH_PAR_MODEL_OUTPUT+x} ]; then + VIASH_PAR_MODEL_OUTPUT="model" +fi +if [ -z ${VIASH_PAR_OBSM_OUTPUT+x} ]; then + VIASH_PAR_OBSM_OUTPUT="X_integrated_scanvi" +fi +if [ -z ${VIASH_PAR_EARLY_STOPPING_MONITOR+x} ]; then + VIASH_PAR_EARLY_STOPPING_MONITOR="elbo_validation" +fi +if [ -z ${VIASH_PAR_EARLY_STOPPING_PATIENCE+x} ]; then + VIASH_PAR_EARLY_STOPPING_PATIENCE="45" +fi +if [ -z ${VIASH_PAR_EARLY_STOPPING_MIN_DELTA+x} ]; then + VIASH_PAR_EARLY_STOPPING_MIN_DELTA="0.0" +fi +if [ -z ${VIASH_PAR_REDUCE_LR_ON_PLATEAU+x} ]; then + VIASH_PAR_REDUCE_LR_ON_PLATEAU="true" +fi +if [ -z ${VIASH_PAR_LR_FACTOR+x} ]; then + VIASH_PAR_LR_FACTOR="0.6" +fi +if [ -z ${VIASH_PAR_LR_PATIENCE+x} ]; then + VIASH_PAR_LR_PATIENCE="30.0" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_REFERENCE" ] && [ ! -e "$VIASH_PAR_REFERENCE" ]; then + ViashError "Input file '$VIASH_PAR_REFERENCE' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_EARLY_STOPPING" ]]; then + if ! [[ "$VIASH_PAR_EARLY_STOPPING" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--early_stopping' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_EARLY_STOPPING_PATIENCE" ]]; then + if ! [[ "$VIASH_PAR_EARLY_STOPPING_PATIENCE" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--early_stopping_patience' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi + if [[ $VIASH_PAR_EARLY_STOPPING_PATIENCE -lt 1 ]]; then + ViashError '--early_stopping_patience' has be more than or equal to 1. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_EARLY_STOPPING_MIN_DELTA" ]]; then + if ! [[ "$VIASH_PAR_EARLY_STOPPING_MIN_DELTA" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--early_stopping_min_delta' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi + if command -v bc &> /dev/null; then + if ! [[ `echo $VIASH_PAR_EARLY_STOPPING_MIN_DELTA '>=' 0.0 | bc` -eq 1 ]]; then + ViashError '--early_stopping_min_delta' has be more than or equal to 0.0. Use "--help" to get more information on the parameters. + exit 1 + fi + elif command -v awk &> /dev/null; then + if ! [[ `awk -v n1=$VIASH_PAR_EARLY_STOPPING_MIN_DELTA -v n2=0.0 'BEGIN { print (n1 >= n2) ? "1" : "0" }'` -eq 1 ]]; then + ViashError '--early_stopping_min_delta' has be more than or equal to 0.0. Use "--help" to get more information on the parameters. + exit 1 + fi + else + ViashWarning '--early_stopping_min_delta' specifies a minimum value but the value was not verified as neither \'bc\' or \`awk\` are present on the system. + fi +fi +if [[ -n "$VIASH_PAR_MAX_EPOCHS" ]]; then + if ! [[ "$VIASH_PAR_MAX_EPOCHS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--max_epochs' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_REDUCE_LR_ON_PLATEAU" ]]; then + if ! [[ "$VIASH_PAR_REDUCE_LR_ON_PLATEAU" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--reduce_lr_on_plateau' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_LR_FACTOR" ]]; then + if ! [[ "$VIASH_PAR_LR_FACTOR" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--lr_factor' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi + if command -v bc &> /dev/null; then + if ! [[ `echo $VIASH_PAR_LR_FACTOR '>=' 0.0 | bc` -eq 1 ]]; then + ViashError '--lr_factor' has be more than or equal to 0.0. Use "--help" to get more information on the parameters. + exit 1 + fi + elif command -v awk &> /dev/null; then + if ! [[ `awk -v n1=$VIASH_PAR_LR_FACTOR -v n2=0.0 'BEGIN { print (n1 >= n2) ? "1" : "0" }'` -eq 1 ]]; then + ViashError '--lr_factor' has be more than or equal to 0.0. Use "--help" to get more information on the parameters. + exit 1 + fi + else + ViashWarning '--lr_factor' specifies a minimum value but the value was not verified as neither \'bc\' or \`awk\` are present on the system. + fi +fi +if [[ -n "$VIASH_PAR_LR_PATIENCE" ]]; then + if ! [[ "$VIASH_PAR_LR_PATIENCE" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--lr_patience' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi + if command -v bc &> /dev/null; then + if ! [[ `echo $VIASH_PAR_LR_PATIENCE '>=' 0.0 | bc` -eq 1 ]]; then + ViashError '--lr_patience' has be more than or equal to 0.0. Use "--help" to get more information on the parameters. + exit 1 + fi + elif command -v awk &> /dev/null; then + if ! [[ `awk -v n1=$VIASH_PAR_LR_PATIENCE -v n2=0.0 'BEGIN { print (n1 >= n2) ? "1" : "0" }'` -eq 1 ]]; then + ViashError '--lr_patience' has be more than or equal to 0.0. Use "--help" to get more information on the parameters. + exit 1 + fi + else + ViashWarning '--lr_patience' specifies a minimum value but the value was not verified as neither \'bc\' or \`awk\` are present on the system. + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +if [ ! -z "$VIASH_PAR_EARLY_STOPPING_MONITOR" ]; then + VIASH_PAR_EARLY_STOPPING_MONITOR_CHOICES=("elbo_validation:reconstruction_loss_validation:kl_local_validation") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_EARLY_STOPPING_MONITOR_CHOICES[*]}:" =~ ":$VIASH_PAR_EARLY_STOPPING_MONITOR:" ]]; then + ViashError '--early_stopping_monitor' specified value of \'$VIASH_PAR_EARLY_STOPPING_MONITOR\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi +if [ ! -z "$VIASH_PAR_MODEL_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_MODEL_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_MODEL_OUTPUT")" +fi + +ViashDebug "Running command: bash" +cat << VIASHEOF | bash +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-scarches-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +import sys +import mudata +import scvi +from torch.cuda import is_available as cuda_is_available +try: + from torch.backends.mps import is_available as mps_is_available +except ModuleNotFoundError: + # Older pytorch versions + # MacOS GPUs + def mps_is_available(): + return False + +### VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'reference': $( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "r'${VIASH_PAR_REFERENCE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'dataset_name': $( if [ ! -z ${VIASH_PAR_DATASET_NAME+x} ]; then echo "r'${VIASH_PAR_DATASET_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'model_output': $( if [ ! -z ${VIASH_PAR_MODEL_OUTPUT+x} ]; then echo "r'${VIASH_PAR_MODEL_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'obsm_output': $( if [ ! -z ${VIASH_PAR_OBSM_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'early_stopping': $( if [ ! -z ${VIASH_PAR_EARLY_STOPPING+x} ]; then echo "r'${VIASH_PAR_EARLY_STOPPING//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), + 'early_stopping_monitor': $( if [ ! -z ${VIASH_PAR_EARLY_STOPPING_MONITOR+x} ]; then echo "r'${VIASH_PAR_EARLY_STOPPING_MONITOR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'early_stopping_patience': $( if [ ! -z ${VIASH_PAR_EARLY_STOPPING_PATIENCE+x} ]; then echo "int(r'${VIASH_PAR_EARLY_STOPPING_PATIENCE//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'early_stopping_min_delta': $( if [ ! -z ${VIASH_PAR_EARLY_STOPPING_MIN_DELTA+x} ]; then echo "float(r'${VIASH_PAR_EARLY_STOPPING_MIN_DELTA//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'max_epochs': $( if [ ! -z ${VIASH_PAR_MAX_EPOCHS+x} ]; then echo "int(r'${VIASH_PAR_MAX_EPOCHS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'reduce_lr_on_plateau': $( if [ ! -z ${VIASH_PAR_REDUCE_LR_ON_PLATEAU+x} ]; then echo "r'${VIASH_PAR_REDUCE_LR_ON_PLATEAU//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), + 'lr_factor': $( if [ ! -z ${VIASH_PAR_LR_FACTOR+x} ]; then echo "float(r'${VIASH_PAR_LR_FACTOR//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'lr_patience': $( if [ ! -z ${VIASH_PAR_LR_PATIENCE+x} ]; then echo "float(r'${VIASH_PAR_LR_PATIENCE//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +### VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +def _read_model_name_from_registry(model_path) -> str: + """Read registry with information about the model, return the model name""" + registry = scvi.model.base.BaseModelClass.load_registry(model_path) + return registry["model_name"] + + +def _detect_base_model(model_path): + """Read from the model's file which scvi_tools model it contains""" + + names_to_models_map = { + "AUTOZI": scvi.model.AUTOZI, + "CondSCVI": scvi.model.CondSCVI, + "DestVI": scvi.model.DestVI, + "LinearSCVI": scvi.model.LinearSCVI, + "PEAKVI": scvi.model.PEAKVI, + "SCANVI": scvi.model.SCANVI, + "SCVI": scvi.model.SCVI, + "TOTALVI": scvi.model.TOTALVI, + "MULTIVI": scvi.model.MULTIVI, + "AmortizedLDA": scvi.model.AmortizedLDA, + "JaxSCVI": scvi.model.JaxSCVI, + } + + return names_to_models_map[_read_model_name_from_registry(model_path)] + + +def extract_file_name(file_path): + """Return the name of the file from path to this file + + Examples + -------- + >>> extract_file_name("resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu") + pbmc_1k_protein_v3_mms + """ + slash_position = file_path.rfind("/") + dot_position = file_path.rfind(".") + + return file_path[slash_position + 1: dot_position] + + +def map_to_existing_reference(adata_query, model_path, check_val_every_n_epoch=1): + """ + A function to map the query data to the reference atlas + + Input: + * adata_query: An AnnData object with the query + * model_path: The reference model directory + + Output: + * vae_query: the trained scvi_tools model + * adata_query: The AnnData object with the query preprocessed for the mapping to the reference + """ + model = _detect_base_model(model_path) + + try: + model.prepare_query_anndata(adata_query, model_path) + except ValueError: + logger.warning("ValueError thrown when preparing adata for mapping. Clearing .varm field to prevent it") + adata_query.varm.clear() + model.prepare_query_anndata(adata_query, model_path) + + # Load query data into the model + vae_query = model.load_query_data( + adata_query, + model_path, + freeze_dropout=True + ) + + # Train scArches model for query mapping + vae_query.train( + max_epochs=par["max_epochs"], + early_stopping=par['early_stopping'], + early_stopping_monitor=par['early_stopping_monitor'], + early_stopping_patience=par['early_stopping_patience'], + early_stopping_min_delta=par['early_stopping_min_delta'], + check_val_every_n_epoch=check_val_every_n_epoch, + use_gpu=(cuda_is_available() or mps_is_available()) + ) + + return vae_query, adata_query + + +def _convert_object_dtypes_to_strings(adata): + """Convert object dtypes in .var and .obs to string to prevent error when saving file""" + def convert_cols(df): + object_cols = df.columns[df.dtypes == "object"] + for col in object_cols: + df[col] = df[col].astype(str) + return df + + adata.var = convert_cols(adata.var) + adata.obs = convert_cols(adata.obs) + + return adata + + +def _get_model_path(model_path: str): + """Obtain path to the directory with reference model. If the proposed \`model_path\` is a .zip archive, unzip it. If nesessary, convert model to the new format + + Parameters + ---------- + model_path : str + Path to a directory, where to search for the model or to a zip file containing the model + + Returns + ------- + Path to a directory with reference model in format of scvi-tools>=0.15 + """ + import os + import zipfile + import tempfile + from pathlib import Path + + if os.path.isdir(model_path) and "model.pt" in os.listdir(model_path): + # Probably, the \`model_path\` already contains model in the output format of scvi-tools>=0.15 + return model_path + + # The model either has old format or is a zip file downloaded from Zenodo + new_directory = Path(tempfile.TemporaryDirectory().name) + + if zipfile.is_zipfile(model_path): + with zipfile.ZipFile(model_path) as archive: + archive.extractall(new_directory) + model_dir = next(new_directory.glob("**/*.pt")).parent + + else: + model_dir = next(Path(model_path).glob("**/*.pt")).parent + + if "model_params.pt" in os.listdir(model_dir): + # The model is in the \`directory\`, but it was generated with scvi-tools<0.15 + # TODO: for new references (that could not be SCANVI based), we need to check the base class somehow. Reading registry does not work with models generated by scvi-tools<0.15 + # Here I assume that the reference model is for HLCA and thus is SCANVI based + converted_model_path = os.path.join(model_dir, "converted") + scvi.model.SCANVI.convert_legacy_save(model_dir, converted_model_path) + return converted_model_path + + elif "model.pt" in os.listdir(model_dir): + # Archive contained model in the new format, so just return the directory + return model_dir + + else: + raise ValueError("Cannot find model in the provided reference path. Please, provide a path or a link to the directory with reference model. For HLCA use https://zenodo.org/record/6337966/files/HLCA_reference_model.zip") + + +def main(): + + mdata_query = mudata.read(par["input"].strip()) + adata_query = mdata_query.mod[par["modality"]].copy() + + if "dataset" not in adata_query.obs.columns: + # Write name of the dataset as batch variable + if par["dataset_name"] is None: + logger.info("Detecting dataset name") + par["dataset_name"] = extract_file_name(par["input"]) + logger.info(f"Detected {par['dataset_name']}") + + adata_query.obs["dataset"] = par["dataset_name"] + + model_path = _get_model_path(par["reference"]) + vae_query, adata_query = map_to_existing_reference(adata_query, model_path=model_path) + model_name = _read_model_name_from_registry(model_path) + + # Save info about the used model + mdata_query.mod[par["modality"]].uns["integration_method"] = model_name + + logger.info("Trying to write latent representation") + output_key = par["obsm_output"].format(model_name=model_name) + mdata_query.mod[par["modality"]].obsm[output_key] = vae_query.get_latent_representation() + + logger.info("Converting dtypes") + mdata_query.mod[par["modality"]] = _convert_object_dtypes_to_strings(mdata_query.mod[par["modality"]]) + + logger.info("Updating mudata") + try: + mdata_query.update() # Without that error might be thrown during file saving + except KeyError: + # Sometimes this error is thrown, but then everything is magically fixed, and the file gets saved normally + # This is discussed here a bit: https://github.com/scverse/mudata/issues/27 + logger.warning("KeyError was thrown during updating mudata. Probably, the file is fixed after that, but be careful") + + logger.info("Saving h5mu file") + mdata_query.write_h5mu(par["output"].strip(), compression=par["output_compression"]) + + logger.info("Saving model") + vae_query.save(par["model_output"], overwrite=True) + +if __name__ == "__main__": + main() +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_MODEL_OUTPUT" ] && [ ! -e "$VIASH_PAR_MODEL_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_MODEL_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/native/integrate/scarches/setup_logger.py b/target/native/integrate/scarches/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/native/integrate/scarches/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/native/integrate/totalvi/.config.vsh.yaml b/target/native/integrate/totalvi/.config.vsh.yaml new file mode 100644 index 00000000000..5745ee4f154 --- /dev/null +++ b/target/native/integrate/totalvi/.config.vsh.yaml @@ -0,0 +1,348 @@ +functionality: + name: "totalvi" + namespace: "integrate" + version: "0.12.4" + authors: + - name: "Vladimir Shitov" + info: + role: "Contributor" + links: + email: "vladimir.shitov@helmholtz-muenchen.de" + github: "vladimirshitov" + orcid: "0000-0002-1960-8812" + linkedin: "vladimir-shitov-9a659513b" + organizations: + - name: "Helmholtz Munich" + href: "https://www.helmholtz-munich.de" + role: "PhD Candidate" + argument_groups: + - name: "Inputs" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input h5mu file with query data to integrate with reference." + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--reference" + alternatives: + - "-r" + description: "Input h5mu file with reference data to train the TOTALVI model." + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--force_retrain" + alternatives: + - "-f" + description: "If true, retrain the model and save it to reference_model_path" + info: null + direction: "input" + dest: "par" + - type: "string" + name: "--query_modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--query_proteins_modality" + description: "Name of the modality in the input (query) h5mu file containing\ + \ protein data" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--reference_modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--reference_proteins_modality" + description: "Name of the modality containing proteins in the reference" + info: null + default: + - "prot" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--input_layer" + description: "Input layer to use. If None, X is used" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_batch" + description: "Column name discriminating between your batches." + info: null + default: + - "sample_id" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--var_input" + description: ".var column containing highly variable genes. By default, do not\ + \ subset genes." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Outputs" + arguments: + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output h5mu file." + info: null + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obsm_output" + description: "In which .obsm slot to store the resulting integrated embedding." + info: null + default: + - "X_integrated_totalvi" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obsm_normalized_rna_output" + description: "In which .obsm slot to store the normalized RNA from TOTALVI." + info: null + default: + - "X_totalvi_normalized_rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obsm_normalized_protein_output" + description: "In which .obsm slot to store the normalized protein data from\ + \ TOTALVI." + info: null + default: + - "X_totalvi_normalized_protein" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--reference_model_path" + description: "Directory with the reference model. If not exists, trained model\ + \ will be saved there" + info: null + default: + - "totalvi_model_reference" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--query_model_path" + description: "Directory, where the query model will be saved" + info: null + default: + - "totalvi_model_query" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Learning parameters" + arguments: + - type: "integer" + name: "--max_epochs" + description: "Number of passes through the dataset" + info: null + default: + - 400 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--max_query_epochs" + description: "Number of passes through the dataset, when fine-tuning model for\ + \ query" + info: null + default: + - 200 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--weight_decay" + description: "Weight decay, when fine-tuning model for query" + info: null + default: + - 0.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Performs mapping to the reference by totalvi model: https://docs.scvi-tools.org/en/stable/tutorials/notebooks/scarches_scvi_tools.html#Reference-mapping-with-TOTALVI" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.9" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "libopenblas-dev" + - "liblapack-dev" + - "gfortran" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "torchmetrics~=0.11.0" + - "scvi-tools~=1.0.3" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highmem" + - "highcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +- type: "native" + id: "native" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/integrate/totalvi/config.vsh.yaml" + platform: "native" + output: "/home/runner/work/openpipeline/openpipeline/target/native/integrate/totalvi" + executable: "/home/runner/work/openpipeline/openpipeline/target/native/integrate/totalvi/totalvi" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/native/integrate/totalvi/setup_logger.py b/target/native/integrate/totalvi/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/native/integrate/totalvi/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/native/integrate/totalvi/totalvi b/target/native/integrate/totalvi/totalvi new file mode 100755 index 00000000000..2ee6f3964b5 --- /dev/null +++ b/target/native/integrate/totalvi/totalvi @@ -0,0 +1,985 @@ +#!/usr/bin/env bash + +# totalvi 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Vladimir Shitov + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="totalvi" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "totalvi 0.12.4" + echo "" + echo "Performs mapping to the reference by totalvi model:" + echo "https://docs.scvi-tools.org/en/stable/tutorials/notebooks/scarches_scvi_tools.html#Reference-mapping-with-TOTALVI" + echo "" + echo "Inputs:" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " Input h5mu file with query data to integrate with reference." + echo "" + echo " -r, --reference" + echo " type: file, required parameter, file must exist" + echo " Input h5mu file with reference data to train the TOTALVI model." + echo "" + echo " -f, --force_retrain" + echo " type: boolean_true" + echo " If true, retrain the model and save it to reference_model_path" + echo "" + echo " --query_modality" + echo " type: string" + echo " default: rna" + echo "" + echo " --query_proteins_modality" + echo " type: string" + echo " Name of the modality in the input (query) h5mu file containing protein" + echo " data" + echo "" + echo " --reference_modality" + echo " type: string" + echo " default: rna" + echo "" + echo " --reference_proteins_modality" + echo " type: string" + echo " default: prot" + echo " Name of the modality containing proteins in the reference" + echo "" + echo " --input_layer" + echo " type: string" + echo " Input layer to use. If None, X is used" + echo "" + echo " --obs_batch" + echo " type: string" + echo " default: sample_id" + echo " Column name discriminating between your batches." + echo "" + echo " --var_input" + echo " type: string" + echo " .var column containing highly variable genes. By default, do not subset" + echo " genes." + echo "" + echo "Outputs:" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " Output h5mu file." + echo "" + echo " --obsm_output" + echo " type: string" + echo " default: X_integrated_totalvi" + echo " In which .obsm slot to store the resulting integrated embedding." + echo "" + echo " --obsm_normalized_rna_output" + echo " type: string" + echo " default: X_totalvi_normalized_rna" + echo " In which .obsm slot to store the normalized RNA from TOTALVI." + echo "" + echo " --obsm_normalized_protein_output" + echo " type: string" + echo " default: X_totalvi_normalized_protein" + echo " In which .obsm slot to store the normalized protein data from TOTALVI." + echo "" + echo " --reference_model_path" + echo " type: file, output, file must exist" + echo " default: totalvi_model_reference" + echo " Directory with the reference model. If not exists, trained model will be" + echo " saved there" + echo "" + echo " --query_model_path" + echo " type: file, output, file must exist" + echo " default: totalvi_model_query" + echo " Directory, where the query model will be saved" + echo "" + echo "Learning parameters:" + echo " --max_epochs" + echo " type: integer" + echo " default: 400" + echo " Number of passes through the dataset" + echo "" + echo " --max_query_epochs" + echo " type: integer" + echo " default: 200" + echo " Number of passes through the dataset, when fine-tuning model for query" + echo "" + echo " --weight_decay" + echo " type: double" + echo " default: 0.0" + echo " Weight decay, when fine-tuning model for query" +} + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "totalvi 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reference) + [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reference=*) + [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference=*\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE=$(ViashRemoveFlags "$1") + shift 1 + ;; + -r) + [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'-r\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -r. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --force_retrain) + [ -n "$VIASH_PAR_FORCE_RETRAIN" ] && ViashError Bad arguments for option \'--force_retrain\': \'$VIASH_PAR_FORCE_RETRAIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FORCE_RETRAIN=true + shift 1 + ;; + -f) + [ -n "$VIASH_PAR_FORCE_RETRAIN" ] && ViashError Bad arguments for option \'-f\': \'$VIASH_PAR_FORCE_RETRAIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FORCE_RETRAIN=true + shift 1 + ;; + --query_modality) + [ -n "$VIASH_PAR_QUERY_MODALITY" ] && ViashError Bad arguments for option \'--query_modality\': \'$VIASH_PAR_QUERY_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUERY_MODALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --query_modality. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --query_modality=*) + [ -n "$VIASH_PAR_QUERY_MODALITY" ] && ViashError Bad arguments for option \'--query_modality=*\': \'$VIASH_PAR_QUERY_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUERY_MODALITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --query_proteins_modality) + [ -n "$VIASH_PAR_QUERY_PROTEINS_MODALITY" ] && ViashError Bad arguments for option \'--query_proteins_modality\': \'$VIASH_PAR_QUERY_PROTEINS_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUERY_PROTEINS_MODALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --query_proteins_modality. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --query_proteins_modality=*) + [ -n "$VIASH_PAR_QUERY_PROTEINS_MODALITY" ] && ViashError Bad arguments for option \'--query_proteins_modality=*\': \'$VIASH_PAR_QUERY_PROTEINS_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUERY_PROTEINS_MODALITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --reference_modality) + [ -n "$VIASH_PAR_REFERENCE_MODALITY" ] && ViashError Bad arguments for option \'--reference_modality\': \'$VIASH_PAR_REFERENCE_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE_MODALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference_modality. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reference_modality=*) + [ -n "$VIASH_PAR_REFERENCE_MODALITY" ] && ViashError Bad arguments for option \'--reference_modality=*\': \'$VIASH_PAR_REFERENCE_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE_MODALITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --reference_proteins_modality) + [ -n "$VIASH_PAR_REFERENCE_PROTEINS_MODALITY" ] && ViashError Bad arguments for option \'--reference_proteins_modality\': \'$VIASH_PAR_REFERENCE_PROTEINS_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE_PROTEINS_MODALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference_proteins_modality. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reference_proteins_modality=*) + [ -n "$VIASH_PAR_REFERENCE_PROTEINS_MODALITY" ] && ViashError Bad arguments for option \'--reference_proteins_modality=*\': \'$VIASH_PAR_REFERENCE_PROTEINS_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE_PROTEINS_MODALITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --input_layer) + [ -n "$VIASH_PAR_INPUT_LAYER" ] && ViashError Bad arguments for option \'--input_layer\': \'$VIASH_PAR_INPUT_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_LAYER="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_layer. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input_layer=*) + [ -n "$VIASH_PAR_INPUT_LAYER" ] && ViashError Bad arguments for option \'--input_layer=*\': \'$VIASH_PAR_INPUT_LAYER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_LAYER=$(ViashRemoveFlags "$1") + shift 1 + ;; + --obs_batch) + [ -n "$VIASH_PAR_OBS_BATCH" ] && ViashError Bad arguments for option \'--obs_batch\': \'$VIASH_PAR_OBS_BATCH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBS_BATCH="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_batch. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obs_batch=*) + [ -n "$VIASH_PAR_OBS_BATCH" ] && ViashError Bad arguments for option \'--obs_batch=*\': \'$VIASH_PAR_OBS_BATCH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBS_BATCH=$(ViashRemoveFlags "$1") + shift 1 + ;; + --var_input) + [ -n "$VIASH_PAR_VAR_INPUT" ] && ViashError Bad arguments for option \'--var_input\': \'$VIASH_PAR_VAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_VAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --var_input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --var_input=*) + [ -n "$VIASH_PAR_VAR_INPUT" ] && ViashError Bad arguments for option \'--var_input=*\': \'$VIASH_PAR_VAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_VAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obsm_output) + [ -n "$VIASH_PAR_OBSM_OUTPUT" ] && ViashError Bad arguments for option \'--obsm_output\': \'$VIASH_PAR_OBSM_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBSM_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obsm_output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obsm_output=*) + [ -n "$VIASH_PAR_OBSM_OUTPUT" ] && ViashError Bad arguments for option \'--obsm_output=*\': \'$VIASH_PAR_OBSM_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBSM_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --obsm_normalized_rna_output) + [ -n "$VIASH_PAR_OBSM_NORMALIZED_RNA_OUTPUT" ] && ViashError Bad arguments for option \'--obsm_normalized_rna_output\': \'$VIASH_PAR_OBSM_NORMALIZED_RNA_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBSM_NORMALIZED_RNA_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obsm_normalized_rna_output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obsm_normalized_rna_output=*) + [ -n "$VIASH_PAR_OBSM_NORMALIZED_RNA_OUTPUT" ] && ViashError Bad arguments for option \'--obsm_normalized_rna_output=*\': \'$VIASH_PAR_OBSM_NORMALIZED_RNA_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBSM_NORMALIZED_RNA_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --obsm_normalized_protein_output) + [ -n "$VIASH_PAR_OBSM_NORMALIZED_PROTEIN_OUTPUT" ] && ViashError Bad arguments for option \'--obsm_normalized_protein_output\': \'$VIASH_PAR_OBSM_NORMALIZED_PROTEIN_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBSM_NORMALIZED_PROTEIN_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obsm_normalized_protein_output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obsm_normalized_protein_output=*) + [ -n "$VIASH_PAR_OBSM_NORMALIZED_PROTEIN_OUTPUT" ] && ViashError Bad arguments for option \'--obsm_normalized_protein_output=*\': \'$VIASH_PAR_OBSM_NORMALIZED_PROTEIN_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBSM_NORMALIZED_PROTEIN_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --reference_model_path) + [ -n "$VIASH_PAR_REFERENCE_MODEL_PATH" ] && ViashError Bad arguments for option \'--reference_model_path\': \'$VIASH_PAR_REFERENCE_MODEL_PATH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE_MODEL_PATH="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference_model_path. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reference_model_path=*) + [ -n "$VIASH_PAR_REFERENCE_MODEL_PATH" ] && ViashError Bad arguments for option \'--reference_model_path=*\': \'$VIASH_PAR_REFERENCE_MODEL_PATH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE_MODEL_PATH=$(ViashRemoveFlags "$1") + shift 1 + ;; + --query_model_path) + [ -n "$VIASH_PAR_QUERY_MODEL_PATH" ] && ViashError Bad arguments for option \'--query_model_path\': \'$VIASH_PAR_QUERY_MODEL_PATH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUERY_MODEL_PATH="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --query_model_path. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --query_model_path=*) + [ -n "$VIASH_PAR_QUERY_MODEL_PATH" ] && ViashError Bad arguments for option \'--query_model_path=*\': \'$VIASH_PAR_QUERY_MODEL_PATH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUERY_MODEL_PATH=$(ViashRemoveFlags "$1") + shift 1 + ;; + --max_epochs) + [ -n "$VIASH_PAR_MAX_EPOCHS" ] && ViashError Bad arguments for option \'--max_epochs\': \'$VIASH_PAR_MAX_EPOCHS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MAX_EPOCHS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --max_epochs. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --max_epochs=*) + [ -n "$VIASH_PAR_MAX_EPOCHS" ] && ViashError Bad arguments for option \'--max_epochs=*\': \'$VIASH_PAR_MAX_EPOCHS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MAX_EPOCHS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --max_query_epochs) + [ -n "$VIASH_PAR_MAX_QUERY_EPOCHS" ] && ViashError Bad arguments for option \'--max_query_epochs\': \'$VIASH_PAR_MAX_QUERY_EPOCHS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MAX_QUERY_EPOCHS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --max_query_epochs. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --max_query_epochs=*) + [ -n "$VIASH_PAR_MAX_QUERY_EPOCHS" ] && ViashError Bad arguments for option \'--max_query_epochs=*\': \'$VIASH_PAR_MAX_QUERY_EPOCHS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MAX_QUERY_EPOCHS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --weight_decay) + [ -n "$VIASH_PAR_WEIGHT_DECAY" ] && ViashError Bad arguments for option \'--weight_decay\': \'$VIASH_PAR_WEIGHT_DECAY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WEIGHT_DECAY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --weight_decay. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --weight_decay=*) + [ -n "$VIASH_PAR_WEIGHT_DECAY" ] && ViashError Bad arguments for option \'--weight_decay=*\': \'$VIASH_PAR_WEIGHT_DECAY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WEIGHT_DECAY=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_REFERENCE+x} ]; then + ViashError '--reference' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_FORCE_RETRAIN+x} ]; then + VIASH_PAR_FORCE_RETRAIN="false" +fi +if [ -z ${VIASH_PAR_QUERY_MODALITY+x} ]; then + VIASH_PAR_QUERY_MODALITY="rna" +fi +if [ -z ${VIASH_PAR_REFERENCE_MODALITY+x} ]; then + VIASH_PAR_REFERENCE_MODALITY="rna" +fi +if [ -z ${VIASH_PAR_REFERENCE_PROTEINS_MODALITY+x} ]; then + VIASH_PAR_REFERENCE_PROTEINS_MODALITY="prot" +fi +if [ -z ${VIASH_PAR_OBS_BATCH+x} ]; then + VIASH_PAR_OBS_BATCH="sample_id" +fi +if [ -z ${VIASH_PAR_OBSM_OUTPUT+x} ]; then + VIASH_PAR_OBSM_OUTPUT="X_integrated_totalvi" +fi +if [ -z ${VIASH_PAR_OBSM_NORMALIZED_RNA_OUTPUT+x} ]; then + VIASH_PAR_OBSM_NORMALIZED_RNA_OUTPUT="X_totalvi_normalized_rna" +fi +if [ -z ${VIASH_PAR_OBSM_NORMALIZED_PROTEIN_OUTPUT+x} ]; then + VIASH_PAR_OBSM_NORMALIZED_PROTEIN_OUTPUT="X_totalvi_normalized_protein" +fi +if [ -z ${VIASH_PAR_REFERENCE_MODEL_PATH+x} ]; then + VIASH_PAR_REFERENCE_MODEL_PATH="totalvi_model_reference" +fi +if [ -z ${VIASH_PAR_QUERY_MODEL_PATH+x} ]; then + VIASH_PAR_QUERY_MODEL_PATH="totalvi_model_query" +fi +if [ -z ${VIASH_PAR_MAX_EPOCHS+x} ]; then + VIASH_PAR_MAX_EPOCHS="400" +fi +if [ -z ${VIASH_PAR_MAX_QUERY_EPOCHS+x} ]; then + VIASH_PAR_MAX_QUERY_EPOCHS="200" +fi +if [ -z ${VIASH_PAR_WEIGHT_DECAY+x} ]; then + VIASH_PAR_WEIGHT_DECAY="0.0" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_REFERENCE" ] && [ ! -e "$VIASH_PAR_REFERENCE" ]; then + ViashError "Input file '$VIASH_PAR_REFERENCE' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_FORCE_RETRAIN" ]]; then + if ! [[ "$VIASH_PAR_FORCE_RETRAIN" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--force_retrain' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MAX_EPOCHS" ]]; then + if ! [[ "$VIASH_PAR_MAX_EPOCHS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--max_epochs' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MAX_QUERY_EPOCHS" ]]; then + if ! [[ "$VIASH_PAR_MAX_QUERY_EPOCHS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--max_query_epochs' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_WEIGHT_DECAY" ]]; then + if ! [[ "$VIASH_PAR_WEIGHT_DECAY" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--weight_decay' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi +if [ ! -z "$VIASH_PAR_REFERENCE_MODEL_PATH" ] && [ ! -d "$(dirname "$VIASH_PAR_REFERENCE_MODEL_PATH")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_REFERENCE_MODEL_PATH")" +fi +if [ ! -z "$VIASH_PAR_QUERY_MODEL_PATH" ] && [ ! -d "$(dirname "$VIASH_PAR_QUERY_MODEL_PATH")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_QUERY_MODEL_PATH")" +fi + +ViashDebug "Running command: bash" +cat << VIASHEOF | bash +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-totalvi-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +from typing import Tuple + +import os +import sys +import mudata +from anndata import AnnData # For type hints +from mudata import MuData # For type hints +import numpy as np +import scvi +from scipy.sparse import issparse + + +### VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'reference': $( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "r'${VIASH_PAR_REFERENCE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'force_retrain': $( if [ ! -z ${VIASH_PAR_FORCE_RETRAIN+x} ]; then echo "r'${VIASH_PAR_FORCE_RETRAIN//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), + 'query_modality': $( if [ ! -z ${VIASH_PAR_QUERY_MODALITY+x} ]; then echo "r'${VIASH_PAR_QUERY_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'query_proteins_modality': $( if [ ! -z ${VIASH_PAR_QUERY_PROTEINS_MODALITY+x} ]; then echo "r'${VIASH_PAR_QUERY_PROTEINS_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'reference_modality': $( if [ ! -z ${VIASH_PAR_REFERENCE_MODALITY+x} ]; then echo "r'${VIASH_PAR_REFERENCE_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'reference_proteins_modality': $( if [ ! -z ${VIASH_PAR_REFERENCE_PROTEINS_MODALITY+x} ]; then echo "r'${VIASH_PAR_REFERENCE_PROTEINS_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'input_layer': $( if [ ! -z ${VIASH_PAR_INPUT_LAYER+x} ]; then echo "r'${VIASH_PAR_INPUT_LAYER//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'obs_batch': $( if [ ! -z ${VIASH_PAR_OBS_BATCH+x} ]; then echo "r'${VIASH_PAR_OBS_BATCH//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'var_input': $( if [ ! -z ${VIASH_PAR_VAR_INPUT+x} ]; then echo "r'${VIASH_PAR_VAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'obsm_output': $( if [ ! -z ${VIASH_PAR_OBSM_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'obsm_normalized_rna_output': $( if [ ! -z ${VIASH_PAR_OBSM_NORMALIZED_RNA_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_NORMALIZED_RNA_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'obsm_normalized_protein_output': $( if [ ! -z ${VIASH_PAR_OBSM_NORMALIZED_PROTEIN_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_NORMALIZED_PROTEIN_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'reference_model_path': $( if [ ! -z ${VIASH_PAR_REFERENCE_MODEL_PATH+x} ]; then echo "r'${VIASH_PAR_REFERENCE_MODEL_PATH//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'query_model_path': $( if [ ! -z ${VIASH_PAR_QUERY_MODEL_PATH+x} ]; then echo "r'${VIASH_PAR_QUERY_MODEL_PATH//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'max_epochs': $( if [ ! -z ${VIASH_PAR_MAX_EPOCHS+x} ]; then echo "int(r'${VIASH_PAR_MAX_EPOCHS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'max_query_epochs': $( if [ ! -z ${VIASH_PAR_MAX_QUERY_EPOCHS+x} ]; then echo "int(r'${VIASH_PAR_MAX_QUERY_EPOCHS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'weight_decay': $( if [ ! -z ${VIASH_PAR_WEIGHT_DECAY+x} ]; then echo "float(r'${VIASH_PAR_WEIGHT_DECAY//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +### VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +def align_proteins_names(adata_reference: AnnData, mdata_query: MuData, adata_query: AnnData, reference_proteins_key: str, query_proteins_key: str) -> AnnData: + """Make sure that proteins are located in the same .obsm slot in reference and query. Pad query proteins with zeros if they are absent""" + proteins_reference = adata_reference.obsm[reference_proteins_key] + + # If query has no protein data, put matrix of zeros + if not query_proteins_key or query_proteins_key not in mdata_query.mod: + adata_query.obsm[reference_proteins_key] = np.zeros((adata_query.n_obs, proteins_reference.shape[1])) + else: + # Make sure that proteins expression has the same key in query and reference + adata_query.obsm[reference_proteins_key] = adata_query.obsm[query_proteins_key] + + return adata_query + + +def extract_proteins_to_anndata(mdata: MuData, rna_modality_key, protein_modality_key, input_layer, hvg_var_key=None) -> AnnData: + """TOTALVI requires data to be stored in AnnData format with protein counts in .obsm slot. This function performs the conversion""" + adata: AnnData = mdata.mod[rna_modality_key].copy() + + if hvg_var_key: + selected_genes = adata.var_names[adata.var[hvg_var_key]] + adata = adata[:, selected_genes].copy() + + if protein_modality_key in mdata.mod: + # Put the proteins modality into .obsm slot + proteins_reference_adata = mdata.mod[protein_modality_key].copy() + + if input_layer is None: + proteins = proteins_reference_adata.X + else: + proteins = proteins_reference_adata.obsm[input_layer] + + if issparse(proteins): + proteins = proteins.toarray() + + adata.obsm[protein_modality_key] = proteins + + return adata + + +def build_reference_model(adata_reference: AnnData, max_train_epochs: int = 400) -> scvi.model.TOTALVI: + + vae_reference = scvi.model.TOTALVI(adata_reference, use_layer_norm="both", use_batch_norm="none") + vae_reference.train(max_train_epochs) + + vae_reference.save(par["reference_model_path"]) + + return vae_reference + +def is_retraining_model() -> bool: + """Decide, whether reference model should be trained. It happens when no model exists or force_retrain flag is on""" + + trained_model_exists = os.path.isdir(par["reference_model_path"]) and ("model.pt" in os.listdir(par["reference_model_path"])) + return not trained_model_exists or par["force_retrain"] + + +def map_query_to_reference(mdata_reference: MuData, mdata_query: MuData, adata_query: AnnData) -> Tuple[scvi.model.TOTALVI, AnnData]: + """Build model on the provided reference if necessary, and map query to the reference""" + + adata_reference: AnnData = extract_proteins_to_anndata(mdata_reference, rna_modality_key=par["reference_modality"], protein_modality_key=par["reference_proteins_modality"], + input_layer=par["input_layer"], hvg_var_key=par["var_input"]) + + scvi.model.TOTALVI.setup_anndata( + adata_reference, + batch_key=par["obs_batch"], + protein_expression_obsm_key=par["reference_proteins_modality"] + ) + + if is_retraining_model(): + vae_reference = build_reference_model(adata_reference, max_train_epochs=par["max_epochs"]) + else: + vae_reference = scvi.model.TOTALVI.load(dir_path=par["reference_model_path"], adata=adata_reference) + + adata_query: AnnData = align_proteins_names(adata_reference, mdata_query, adata_query, reference_proteins_key=par["reference_proteins_modality"], + query_proteins_key=par["query_proteins_modality"]) + + # Reorder genes and pad missing genes with 0s + scvi.model.TOTALVI.prepare_query_anndata(adata_query, vae_reference) + + # Train the model for query + vae_query = scvi.model.TOTALVI.load_query_data( + adata_query, + vae_reference + ) + vae_query.train(par["max_query_epochs"], plan_kwargs=dict(weight_decay=par["weight_decay"])) + + return vae_query, adata_query + +def main(): + mdata_query = mudata.read(par["input"].strip()) + adata_query = extract_proteins_to_anndata(mdata_query, + rna_modality_key=par["query_modality"], + protein_modality_key=par["query_proteins_modality"], + input_layer=par["input_layer"], + hvg_var_key=par["var_input"]) + + if par["reference"].endswith(".h5mu"): + logger.info("Reading reference") + mdata_reference = mudata.read(par["reference"].strip()) + + logger.info("Mapping query to the reference") + vae_query, adata_query = map_query_to_reference(mdata_reference, mdata_query, adata_query) + else: + raise ValueError("Incorrect format of reference, please provide a .h5mu file") + + adata_query.uns["integration_method"] = "totalvi" + + logger.info("Getting the latent representation of query") + mdata_query.mod[par["query_modality"]].obsm[par["obsm_output"]] = vae_query.get_latent_representation() + + norm_rna, norm_protein = vae_query.get_normalized_expression() + mdata_query.mod[par["query_modality"]].obsm[par["obsm_normalized_rna_output"]] = norm_rna.to_numpy() + + if par["query_proteins_modality"] in mdata_query.mod: + mdata_query.mod[par["query_proteins_modality"]].obsm[par["obsm_normalized_protein_output"]] = norm_protein.to_numpy() + + logger.info("Updating mdata") + mdata_query.update() + + logger.info("Saving updated query data") + mdata_query.write_h5mu(par["output"].strip()) + + logger.info("Saving query model") + vae_query.save(par["query_model_path"], overwrite=True) + +if __name__ == "__main__": + main() +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_REFERENCE_MODEL_PATH" ] && [ ! -e "$VIASH_PAR_REFERENCE_MODEL_PATH" ]; then + ViashError "Output file '$VIASH_PAR_REFERENCE_MODEL_PATH' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_QUERY_MODEL_PATH" ] && [ ! -e "$VIASH_PAR_QUERY_MODEL_PATH" ]; then + ViashError "Output file '$VIASH_PAR_QUERY_MODEL_PATH' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/native/labels_transfer/knn/.config.vsh.yaml b/target/native/labels_transfer/knn/.config.vsh.yaml new file mode 100644 index 00000000000..1a0c760789a --- /dev/null +++ b/target/native/labels_transfer/knn/.config.vsh.yaml @@ -0,0 +1,379 @@ +functionality: + name: "knn" + namespace: "labels_transfer" + version: "0.12.4" + authors: + - name: "Vladimir Shitov" + roles: + - "author" + info: + role: "Contributor" + links: + email: "vladimir.shitov@helmholtz-muenchen.de" + github: "vladimirshitov" + orcid: "0000-0002-1960-8812" + linkedin: "vladimir-shitov-9a659513b" + organizations: + - name: "Helmholtz Munich" + href: "https://www.helmholtz-munich.de" + role: "PhD Candidate" + argument_groups: + - name: "Input dataset (query) arguments" + arguments: + - type: "file" + name: "--input" + description: "The query data to transfer the labels to. Should be a .h5mu file." + info: + label: "Query" + file_format: + type: "h5mu" + mod: + rna: + description: "Modality in AnnData format containing RNA data." + required: true + slots: + X: + type: "double" + name: "features" + required: false + description: "The expression data to use for the classifier's inference,\ + \ if `--input_obsm_features` argument is not provided.\n" + obsm: + - type: "double" + name: "features" + example: "X_integrated_scanvi" + required: false + description: "The embedding to use for the classifier's inference.\ + \ Override using the `--input_obsm_features` argument. If not\ + \ provided, the `.X` slot will be used instead.\nMake sure that\ + \ embedding was obtained in the same way as the reference embedding\ + \ (e.g. by the same model or preprocessing).\n" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + description: "Which modality to use." + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--input_obsm_features" + description: "The `.obsm` key of the embedding to use for the classifier's inference.\ + \ If not provided, the `.X` slot will be used instead.\nMake sure that embedding\ + \ was obtained in the same way as the reference embedding (e.g. by the same\ + \ model or preprocessing).\n" + info: null + example: + - "X_integrated_scanvi" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Reference dataset arguments" + arguments: + - type: "file" + name: "--reference" + description: "The reference data to train classifiers on." + info: + label: "Reference" + file_format: + type: "h5ad" + X: + type: "double" + name: "features" + required: false + description: "The expression data to use for the classifier's training,\ + \ if `--input_obsm_features` argument is not provided.\n" + obsm: + - type: "double" + name: "features" + example: "X_integrated_scanvi" + description: "The embedding to use for the classifier's training. Override\ + \ using the `--reference_obsm_features` argument.\nMake sure that embedding\ + \ was obtained in the same way as the query embedding (e.g. by the same\ + \ model or preprocessing).\n" + required: true + obs: + - type: "string" + name: "targets" + multiple: true + example: + - "ann_level_1" + - "ann_level_2" + - "ann_level_3" + - "ann_level_4" + - "ann_level_5" + - "ann_finest_level" + description: "The target labels to transfer. Override using the `--reference_obs_targets`\ + \ argument." + required: true + example: + - "https:/zenodo.org/record/6337966/files/HLCA_emb_and_metadata.h5ad" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--reference_obsm_features" + description: "The `.obsm` key of the embedding to use for the classifier's training.\n\ + Make sure that embedding was obtained in the same way as the query embedding\ + \ (e.g. by the same model or preprocessing).\n" + info: null + default: + - "X_integrated_scanvi" + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--reference_obs_targets" + description: "The `.obs` key of the target labels to tranfer." + info: null + default: + - "ann_level_1" + - "ann_level_2" + - "ann_level_3" + - "ann_level_4" + - "ann_level_5" + - "ann_finest_level" + required: false + direction: "input" + multiple: true + multiple_sep: "," + dest: "par" + - name: "Outputs" + arguments: + - type: "file" + name: "--output" + description: "The query data in .h5mu format with predicted labels transfered\ + \ from the reference." + info: + label: "Output data" + file_format: + type: "h5mu" + mod: + rna: + description: "Modality in AnnData format containing RNA data." + required: true + obs: + - type: "string" + name: "predictions" + description: "The predicted labels. Override using the `--output_obs_predictions`\ + \ argument." + required: true + - type: "double" + name: "uncertainty" + description: "The uncertainty of the predicted labels. Override using\ + \ the `--output_obs_uncertainty` argument." + required: false + obsm: + - type: "double" + name: "X_integrated_scanvi" + description: "The embedding used for the classifier's inference. Could\ + \ have any name, specified by `input_obsm_features` argument.\"" + required: false + uns: + - type: "string" + name: "parameters" + example: "labels_tranfer" + description: "Additional information about the parameters used for\ + \ the label transfer." + required: true + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_obs_predictions" + description: "In which `.obs` slots to store the predicted information.\nIf\ + \ provided, must have the same length as `--reference_obs_targets`.\nIf empty,\ + \ will default to the `reference_obs_targets` combined with the `\"_pred\"\ + ` suffix.\n" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_obs_uncertainty" + description: "In which `.obs` slots to store the uncertainty of the predictions.\n\ + If provided, must have the same length as `--reference_obs_targets`.\nIf empty,\ + \ will default to the `reference_obs_targets` combined with the `\"_uncertainty\"\ + ` suffix.\n" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_uns_parameters" + description: "The `.uns` key to store additional information about the parameters\ + \ used for the label transfer." + info: null + default: + - "labels_transfer" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Learning parameters" + arguments: + - type: "integer" + name: "--n_neighbors" + alternatives: + - "-k" + description: "Number of nearest neighbors to use for classification" + info: null + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "../utils/helper.py" + - type: "file" + path: "../../utils/setup_logger.py" + description: "Performs label transfer from reference to query using KNN classifier" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/annotation_test_data/" + - type: "file" + path: "resources_test/pbmc_1k_protein_v3/" + info: + method_id: "KNN_pynndescent" + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + - "git" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + upgrade: true + - type: "apt" + packages: + - "libopenblas-dev" + - "liblapack-dev" + - "gfortran" + interactive: false + - type: "python" + user: false + packages: + - "scanpy~=1.9.5" + - "pynndescent~=0.5.8" + - "numba~=0.56.4" + - "numpy~=1.23.5" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highmem" + - "highcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +- type: "native" + id: "native" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/labels_transfer/knn/config.vsh.yaml" + platform: "native" + output: "/home/runner/work/openpipeline/openpipeline/target/native/labels_transfer/knn" + executable: "/home/runner/work/openpipeline/openpipeline/target/native/labels_transfer/knn/knn" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/native/labels_transfer/knn/helper.py b/target/native/labels_transfer/knn/helper.py new file mode 100644 index 00000000000..a90bf59efdb --- /dev/null +++ b/target/native/labels_transfer/knn/helper.py @@ -0,0 +1,32 @@ +def check_arguments(par): + # check output .obs predictions + if not par["output_obs_predictions"]: + par["output_obs_predictions"] = [ t + "_pred" for t in par["reference_obs_targets"]] + assert len(par["output_obs_predictions"]) == len(par["reference_obs_targets"]), f"Number of output_obs_predictions must match number of reference_obs_targets\npar: {par}" + + # check output .obs uncertainty + if not par["output_obs_uncertainty"]: + par["output_obs_uncertainty"] = [ t + "_uncertainty" for t in par["reference_obs_targets"]] + assert len(par["output_obs_uncertainty"]) == len(par["reference_obs_targets"]), f"Number of output_obs_uncertainty must match number of reference_obs_targets\npar: {par}" + + return par + +def get_reference_features(adata_reference, par, logger): + if par["reference_obsm_features"] is None: + logger.info("Using .X of reference data") + train_data = adata_reference.X + else: + logger.info(f"Using .obsm[{par['reference_obsm_features']}] of reference data") + train_data = adata_reference.obsm[par["reference_obsm_features"]] + + return train_data + +def get_query_features(adata, par, logger): + if par["input_obsm_features"] is None: + logger.info("Using .X of query data") + query_data = adata.X + else: + logger.info(f"Using .obsm[{par['input_obsm_features']}] of query data") + query_data = adata.obsm[par["input_obsm_features"]] + + return query_data \ No newline at end of file diff --git a/target/native/labels_transfer/knn/knn b/target/native/labels_transfer/knn/knn new file mode 100755 index 00000000000..36a8e31d880 --- /dev/null +++ b/target/native/labels_transfer/knn/knn @@ -0,0 +1,773 @@ +#!/usr/bin/env bash + +# knn 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Vladimir Shitov (author) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="knn" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "knn 0.12.4" + echo "" + echo "Performs label transfer from reference to query using KNN classifier" + echo "" + echo "Input dataset (query) arguments:" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " The query data to transfer the labels to. Should be a .h5mu file." + echo "" + echo " --modality" + echo " type: string" + echo " default: rna" + echo " Which modality to use." + echo "" + echo " --input_obsm_features" + echo " type: string" + echo " example: X_integrated_scanvi" + echo " The \`.obsm\` key of the embedding to use for the classifier's inference." + echo " If not provided, the \`.X\` slot will be used instead." + echo " Make sure that embedding was obtained in the same way as the reference" + echo " embedding (e.g. by the same model or preprocessing)." + echo "" + echo "Reference dataset arguments:" + echo " --reference" + echo " type: file, file must exist" + echo " example:" + echo "https:/zenodo.org/record/6337966/files/HLCA_emb_and_metadata.h5ad" + echo " The reference data to train classifiers on." + echo "" + echo " --reference_obsm_features" + echo " type: string, required parameter" + echo " default: X_integrated_scanvi" + echo " The \`.obsm\` key of the embedding to use for the classifier's training." + echo " Make sure that embedding was obtained in the same way as the query" + echo " embedding (e.g. by the same model or preprocessing)." + echo "" + echo " --reference_obs_targets" + echo " type: string, multiple values allowed" + echo " default:" + echo "ann_level_1,ann_level_2,ann_level_3,ann_level_4,ann_level_5,ann_finest_level" + echo " The \`.obs\` key of the target labels to tranfer." + echo "" + echo "Outputs:" + echo " --output" + echo " type: file, required parameter, output, file must exist" + echo " The query data in .h5mu format with predicted labels transfered from the" + echo " reference." + echo "" + echo " --output_obs_predictions" + echo " type: string, multiple values allowed" + echo " In which \`.obs\` slots to store the predicted information." + echo " If provided, must have the same length as \`--reference_obs_targets\`." + echo " If empty, will default to the \`reference_obs_targets\` combined with the" + echo " \`\"_pred\"\` suffix." + echo "" + echo " --output_obs_uncertainty" + echo " type: string, multiple values allowed" + echo " In which \`.obs\` slots to store the uncertainty of the predictions." + echo " If provided, must have the same length as \`--reference_obs_targets\`." + echo " If empty, will default to the \`reference_obs_targets\` combined with the" + echo " \`\"_uncertainty\"\` suffix." + echo "" + echo " --output_uns_parameters" + echo " type: string" + echo " default: labels_transfer" + echo " The \`.uns\` key to store additional information about the parameters used" + echo " for the label transfer." + echo "" + echo "Learning parameters:" + echo " -k, --n_neighbors" + echo " type: integer, required parameter" + echo " Number of nearest neighbors to use for classification" +} + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "knn 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --modality) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality=*) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --input_obsm_features) + [ -n "$VIASH_PAR_INPUT_OBSM_FEATURES" ] && ViashError Bad arguments for option \'--input_obsm_features\': \'$VIASH_PAR_INPUT_OBSM_FEATURES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_OBSM_FEATURES="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_obsm_features. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input_obsm_features=*) + [ -n "$VIASH_PAR_INPUT_OBSM_FEATURES" ] && ViashError Bad arguments for option \'--input_obsm_features=*\': \'$VIASH_PAR_INPUT_OBSM_FEATURES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_OBSM_FEATURES=$(ViashRemoveFlags "$1") + shift 1 + ;; + --reference) + [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reference=*) + [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference=*\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --reference_obsm_features) + [ -n "$VIASH_PAR_REFERENCE_OBSM_FEATURES" ] && ViashError Bad arguments for option \'--reference_obsm_features\': \'$VIASH_PAR_REFERENCE_OBSM_FEATURES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE_OBSM_FEATURES="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference_obsm_features. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reference_obsm_features=*) + [ -n "$VIASH_PAR_REFERENCE_OBSM_FEATURES" ] && ViashError Bad arguments for option \'--reference_obsm_features=*\': \'$VIASH_PAR_REFERENCE_OBSM_FEATURES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE_OBSM_FEATURES=$(ViashRemoveFlags "$1") + shift 1 + ;; + --reference_obs_targets) + if [ -z "$VIASH_PAR_REFERENCE_OBS_TARGETS" ]; then + VIASH_PAR_REFERENCE_OBS_TARGETS="$2" + else + VIASH_PAR_REFERENCE_OBS_TARGETS="$VIASH_PAR_REFERENCE_OBS_TARGETS,""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference_obs_targets. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reference_obs_targets=*) + if [ -z "$VIASH_PAR_REFERENCE_OBS_TARGETS" ]; then + VIASH_PAR_REFERENCE_OBS_TARGETS=$(ViashRemoveFlags "$1") + else + VIASH_PAR_REFERENCE_OBS_TARGETS="$VIASH_PAR_REFERENCE_OBS_TARGETS,"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_obs_predictions) + if [ -z "$VIASH_PAR_OUTPUT_OBS_PREDICTIONS" ]; then + VIASH_PAR_OUTPUT_OBS_PREDICTIONS="$2" + else + VIASH_PAR_OUTPUT_OBS_PREDICTIONS="$VIASH_PAR_OUTPUT_OBS_PREDICTIONS:""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_obs_predictions. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_obs_predictions=*) + if [ -z "$VIASH_PAR_OUTPUT_OBS_PREDICTIONS" ]; then + VIASH_PAR_OUTPUT_OBS_PREDICTIONS=$(ViashRemoveFlags "$1") + else + VIASH_PAR_OUTPUT_OBS_PREDICTIONS="$VIASH_PAR_OUTPUT_OBS_PREDICTIONS:"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --output_obs_uncertainty) + if [ -z "$VIASH_PAR_OUTPUT_OBS_UNCERTAINTY" ]; then + VIASH_PAR_OUTPUT_OBS_UNCERTAINTY="$2" + else + VIASH_PAR_OUTPUT_OBS_UNCERTAINTY="$VIASH_PAR_OUTPUT_OBS_UNCERTAINTY:""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_obs_uncertainty. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_obs_uncertainty=*) + if [ -z "$VIASH_PAR_OUTPUT_OBS_UNCERTAINTY" ]; then + VIASH_PAR_OUTPUT_OBS_UNCERTAINTY=$(ViashRemoveFlags "$1") + else + VIASH_PAR_OUTPUT_OBS_UNCERTAINTY="$VIASH_PAR_OUTPUT_OBS_UNCERTAINTY:"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --output_uns_parameters) + [ -n "$VIASH_PAR_OUTPUT_UNS_PARAMETERS" ] && ViashError Bad arguments for option \'--output_uns_parameters\': \'$VIASH_PAR_OUTPUT_UNS_PARAMETERS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_UNS_PARAMETERS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_uns_parameters. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_uns_parameters=*) + [ -n "$VIASH_PAR_OUTPUT_UNS_PARAMETERS" ] && ViashError Bad arguments for option \'--output_uns_parameters=*\': \'$VIASH_PAR_OUTPUT_UNS_PARAMETERS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_UNS_PARAMETERS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --n_neighbors) + [ -n "$VIASH_PAR_N_NEIGHBORS" ] && ViashError Bad arguments for option \'--n_neighbors\': \'$VIASH_PAR_N_NEIGHBORS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_N_NEIGHBORS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --n_neighbors. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --n_neighbors=*) + [ -n "$VIASH_PAR_N_NEIGHBORS" ] && ViashError Bad arguments for option \'--n_neighbors=*\': \'$VIASH_PAR_N_NEIGHBORS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_N_NEIGHBORS=$(ViashRemoveFlags "$1") + shift 1 + ;; + -k) + [ -n "$VIASH_PAR_N_NEIGHBORS" ] && ViashError Bad arguments for option \'-k\': \'$VIASH_PAR_N_NEIGHBORS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_N_NEIGHBORS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -k. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_REFERENCE_OBSM_FEATURES+x} ]; then + ViashError '--reference_obsm_features' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_N_NEIGHBORS+x} ]; then + ViashError '--n_neighbors' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_MODALITY+x} ]; then + VIASH_PAR_MODALITY="rna" +fi +if [ -z ${VIASH_PAR_REFERENCE_OBS_TARGETS+x} ]; then + VIASH_PAR_REFERENCE_OBS_TARGETS="ann_level_1,ann_level_2,ann_level_3,ann_level_4,ann_level_5,ann_finest_level" +fi +if [ -z ${VIASH_PAR_OUTPUT_UNS_PARAMETERS+x} ]; then + VIASH_PAR_OUTPUT_UNS_PARAMETERS="labels_transfer" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_REFERENCE" ] && [ ! -e "$VIASH_PAR_REFERENCE" ]; then + ViashError "Input file '$VIASH_PAR_REFERENCE' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_N_NEIGHBORS" ]]; then + if ! [[ "$VIASH_PAR_N_NEIGHBORS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--n_neighbors' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +ViashDebug "Running command: bash" +cat << VIASHEOF | bash +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-knn-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +import sys +import warnings + +import mudata +import numpy as np +import scanpy as sc +from scipy.sparse import issparse +import pynndescent +import numba + + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'input_obsm_features': $( if [ ! -z ${VIASH_PAR_INPUT_OBSM_FEATURES+x} ]; then echo "r'${VIASH_PAR_INPUT_OBSM_FEATURES//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'reference': $( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "r'${VIASH_PAR_REFERENCE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'reference_obsm_features': $( if [ ! -z ${VIASH_PAR_REFERENCE_OBSM_FEATURES+x} ]; then echo "r'${VIASH_PAR_REFERENCE_OBSM_FEATURES//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'reference_obs_targets': $( if [ ! -z ${VIASH_PAR_REFERENCE_OBS_TARGETS+x} ]; then echo "r'${VIASH_PAR_REFERENCE_OBS_TARGETS//\'/\'\"\'\"r\'}'.split(',')"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_obs_predictions': $( if [ ! -z ${VIASH_PAR_OUTPUT_OBS_PREDICTIONS+x} ]; then echo "r'${VIASH_PAR_OUTPUT_OBS_PREDICTIONS//\'/\'\"\'\"r\'}'.split(':')"; else echo None; fi ), + 'output_obs_uncertainty': $( if [ ! -z ${VIASH_PAR_OUTPUT_OBS_UNCERTAINTY+x} ]; then echo "r'${VIASH_PAR_OUTPUT_OBS_UNCERTAINTY//\'/\'\"\'\"r\'}'.split(':')"; else echo None; fi ), + 'output_uns_parameters': $( if [ ! -z ${VIASH_PAR_OUTPUT_UNS_PARAMETERS+x} ]; then echo "r'${VIASH_PAR_OUTPUT_UNS_PARAMETERS//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'n_neighbors': $( if [ ! -z ${VIASH_PAR_N_NEIGHBORS+x} ]; then echo "int(r'${VIASH_PAR_N_NEIGHBORS//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +## VIASH END + +sys.path.append(meta["resources_dir"]) +from helper import check_arguments, get_reference_features, get_query_features +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger + +@numba.njit +def weighted_prediction(weights, ref_cats): + """Get highest weight category.""" + N = len(weights) + predictions = np.zeros((N,), dtype=ref_cats.dtype) + uncertainty = np.zeros((N,)) + for i in range(N): + obs_weights = weights[i] + obs_cats = ref_cats[i] + best_prob = 0 + for c in np.unique(obs_cats): + cand_prob = np.sum(obs_weights[obs_cats == c]) + if cand_prob > best_prob: + best_prob = cand_prob + predictions[i] = c + uncertainty[i] = max(1 - best_prob, 0) + + return predictions, uncertainty + +def distances_to_affinities(distances): + stds = np.std(distances, axis=1) + stds = (2.0 / stds) ** 2 + stds = stds.reshape(-1, 1) + distances_tilda = np.exp(-np.true_divide(distances, stds)) + + return distances_tilda / np.sum(distances_tilda, axis=1, keepdims=True) + +def main(par): + logger = setup_logger() + + logger.info("Checking arguments") + par = check_arguments(par) + + logger.info("Reading input (query) data") + mdata = mudata.read(par["input"]) + adata = mdata.mod[par["modality"]] + + logger.info("Reading reference data") + adata_reference = sc.read(par["reference"], backup_url=par["reference"]) + + # fetch feature data + train_data = get_reference_features(adata_reference, par, logger) + query_data = get_query_features(adata, par, logger) + + # pynndescent does not support sparse matrices + if issparse(train_data): + warnings.warn("Converting sparse matrix to dense. This may consume a lot of memory.") + train_data = train_data.toarray() + + logger.debug(f"Shape of train data: {train_data.shape}") + + logger.info("Building NN index") + ref_nn_index = pynndescent.NNDescent(train_data, n_neighbors=par["n_neighbors"]) + ref_nn_index.prepare() + + ref_neighbors, ref_distances = ref_nn_index.query(query_data, k=par["n_neighbors"]) + + weights = distances_to_affinities(ref_distances) + + output_uns_parameters = adata.uns.get(par["output_uns_parameters"], {}) + + # for each annotation level, get prediction and uncertainty + + for obs_tar, obs_pred, obs_unc in zip(par["reference_obs_targets"], par["output_obs_predictions"], par["output_obs_uncertainty"]): + logger.info(f"Predicting labels for {obs_tar}") + ref_cats = adata_reference.obs[obs_tar].cat.codes.to_numpy()[ref_neighbors] + prediction, uncertainty = weighted_prediction(weights, ref_cats) + prediction = np.asarray(adata_reference.obs[obs_tar].cat.categories)[prediction] + + adata.obs[obs_pred], adata.obs[obs_unc] = prediction, uncertainty + + # Write information about labels transfer to uns + output_uns_parameters[obs_tar] = { + "method": "KNN_pynndescent", + "n_neighbors": par["n_neighbors"], + "reference": par["reference"] + } + + adata.uns[par["output_uns_parameters"]] = output_uns_parameters + + mdata.mod[par['modality']] = adata + mdata.update() + mdata.write_h5mu(par['output'].strip()) + +if __name__ == "__main__": + main(par) +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/native/labels_transfer/knn/setup_logger.py b/target/native/labels_transfer/knn/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/native/labels_transfer/knn/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/native/labels_transfer/xgboost/.config.vsh.yaml b/target/native/labels_transfer/xgboost/.config.vsh.yaml new file mode 100644 index 00000000000..3dac82b291e --- /dev/null +++ b/target/native/labels_transfer/xgboost/.config.vsh.yaml @@ -0,0 +1,594 @@ +functionality: + name: "xgboost" + namespace: "labels_transfer" + version: "0.12.4" + authors: + - name: "Vladimir Shitov" + roles: + - "author" + info: + role: "Contributor" + links: + email: "vladimir.shitov@helmholtz-muenchen.de" + github: "vladimirshitov" + orcid: "0000-0002-1960-8812" + linkedin: "vladimir-shitov-9a659513b" + organizations: + - name: "Helmholtz Munich" + href: "https://www.helmholtz-munich.de" + role: "PhD Candidate" + argument_groups: + - name: "Input dataset (query) arguments" + arguments: + - type: "file" + name: "--input" + description: "The query data to transfer the labels to. Should be a .h5mu file." + info: + label: "Query" + file_format: + type: "h5mu" + mod: + rna: + description: "Modality in AnnData format containing RNA data." + required: true + slots: + X: + type: "double" + name: "features" + required: false + description: "The expression data to use for the classifier's inference,\ + \ if `--input_obsm_features` argument is not provided.\n" + obsm: + - type: "double" + name: "features" + example: "X_integrated_scanvi" + required: false + description: "The embedding to use for the classifier's inference.\ + \ Override using the `--input_obsm_features` argument. If not\ + \ provided, the `.X` slot will be used instead.\nMake sure that\ + \ embedding was obtained in the same way as the reference embedding\ + \ (e.g. by the same model or preprocessing).\n" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + description: "Which modality to use." + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--input_obsm_features" + description: "The `.obsm` key of the embedding to use for the classifier's inference.\ + \ If not provided, the `.X` slot will be used instead.\nMake sure that embedding\ + \ was obtained in the same way as the reference embedding (e.g. by the same\ + \ model or preprocessing).\n" + info: null + example: + - "X_integrated_scanvi" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Reference dataset arguments" + arguments: + - type: "file" + name: "--reference" + description: "The reference data to train classifiers on." + info: + label: "Reference" + file_format: + type: "h5ad" + X: + type: "double" + name: "features" + required: false + description: "The expression data to use for the classifier's training,\ + \ if `--input_obsm_features` argument is not provided.\n" + obsm: + - type: "double" + name: "features" + example: "X_integrated_scanvi" + description: "The embedding to use for the classifier's training. Override\ + \ using the `--reference_obsm_features` argument.\nMake sure that embedding\ + \ was obtained in the same way as the query embedding (e.g. by the same\ + \ model or preprocessing).\n" + required: true + obs: + - type: "string" + name: "targets" + multiple: true + example: + - "ann_level_1" + - "ann_level_2" + - "ann_level_3" + - "ann_level_4" + - "ann_level_5" + - "ann_finest_level" + description: "The target labels to transfer. Override using the `--reference_obs_targets`\ + \ argument." + required: true + example: + - "https:/zenodo.org/record/6337966/files/HLCA_emb_and_metadata.h5ad" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--reference_obsm_features" + description: "The `.obsm` key of the embedding to use for the classifier's training.\n\ + Make sure that embedding was obtained in the same way as the query embedding\ + \ (e.g. by the same model or preprocessing).\n" + info: null + default: + - "X_integrated_scanvi" + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--reference_obs_targets" + description: "The `.obs` key of the target labels to tranfer." + info: null + default: + - "ann_level_1" + - "ann_level_2" + - "ann_level_3" + - "ann_level_4" + - "ann_level_5" + - "ann_finest_level" + required: false + direction: "input" + multiple: true + multiple_sep: "," + dest: "par" + - name: "Outputs" + arguments: + - type: "file" + name: "--output" + description: "The query data in .h5mu format with predicted labels transfered\ + \ from the reference." + info: + label: "Output data" + file_format: + type: "h5mu" + mod: + rna: + description: "Modality in AnnData format containing RNA data." + required: true + obs: + - type: "string" + name: "predictions" + description: "The predicted labels. Override using the `--output_obs_predictions`\ + \ argument." + required: true + - type: "double" + name: "uncertainty" + description: "The uncertainty of the predicted labels. Override using\ + \ the `--output_obs_uncertainty` argument." + required: false + obsm: + - type: "double" + name: "X_integrated_scanvi" + description: "The embedding used for the classifier's inference. Could\ + \ have any name, specified by `input_obsm_features` argument.\"" + required: false + uns: + - type: "string" + name: "parameters" + example: "labels_tranfer" + description: "Additional information about the parameters used for\ + \ the label transfer." + required: true + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_obs_predictions" + description: "In which `.obs` slots to store the predicted information.\nIf\ + \ provided, must have the same length as `--reference_obs_targets`.\nIf empty,\ + \ will default to the `reference_obs_targets` combined with the `\"_pred\"\ + ` suffix.\n" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_obs_uncertainty" + description: "In which `.obs` slots to store the uncertainty of the predictions.\n\ + If provided, must have the same length as `--reference_obs_targets`.\nIf empty,\ + \ will default to the `reference_obs_targets` combined with the `\"_uncertainty\"\ + ` suffix.\n" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_uns_parameters" + description: "The `.uns` key to store additional information about the parameters\ + \ used for the label transfer." + info: null + default: + - "labels_transfer" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Execution arguments" + arguments: + - type: "boolean_true" + name: "--force_retrain" + alternatives: + - "-f" + description: "Retrain models on the reference even if model_output directory\ + \ already has trained classifiers. WARNING! It will rewrite existing classifiers\ + \ for targets in the model_output directory!" + info: null + direction: "input" + dest: "par" + - type: "boolean" + name: "--use_gpu" + description: "Use GPU during models training and inference (recommended)." + info: null + default: + - false + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--verbosity" + alternatives: + - "-v" + description: "The verbosity level for evaluation of the classifier from the\ + \ range [0,2]" + info: null + default: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--model_output" + description: "Output directory for model" + info: null + default: + - "model" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Learning parameters" + arguments: + - type: "double" + name: "--learning_rate" + alternatives: + - "--eta" + description: "Step size shrinkage used in update to prevents overfitting. Range:\ + \ [0,1]. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ + \ for the reference" + info: null + default: + - 0.3 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--min_split_loss" + alternatives: + - "--gamma" + description: "Minimum loss reduction required to make a further partition on\ + \ a leaf node of the tree. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ + \ for the reference" + info: null + default: + - 0.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--max_depth" + alternatives: + - "-d" + description: "Maximum depth of a tree. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ + \ for the reference" + info: null + default: + - 6 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--min_child_weight" + description: "Minimum sum of instance weight (hessian) needed in a child. See\ + \ https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ + \ for the reference" + info: null + default: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--max_delta_step" + description: "Maximum delta step we allow each leaf output to be. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ + \ for the reference" + info: null + default: + - 0.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--subsample" + description: "Subsample ratio of the training instances. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ + \ for the reference" + info: null + default: + - 1.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--sampling_method" + description: "The method to use to sample the training instances. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ + \ for the reference" + info: null + default: + - "uniform" + required: false + choices: + - "uniform" + - "gradient_based" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--colsample_bytree" + description: "Fraction of columns to be subsampled. Range (0, 1]. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ + \ for the reference" + info: null + default: + - 1.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--colsample_bylevel" + description: "Subsample ratio of columns for each level. Range (0, 1]. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ + \ for the reference" + info: null + default: + - 1.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--colsample_bynode" + description: "Subsample ratio of columns for each node (split). Range (0, 1].\ + \ See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ + \ for the reference" + info: null + default: + - 1.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--reg_lambda" + alternatives: + - "--lambda" + description: "L2 regularization term on weights. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ + \ for the reference" + info: null + default: + - 1.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--reg_alpha" + alternatives: + - "--alpha" + description: "L1 regularization term on weights. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ + \ for the reference" + info: null + default: + - 0.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--scale_pos_weight" + description: "Control the balance of positive and negative weights, useful for\ + \ unbalanced classes. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ + \ for the reference" + info: null + default: + - 1.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "../utils/helper.py" + - type: "file" + path: "src/utils/setup_logger.py" + description: "Performs label transfer from reference to query using XGBoost classifier" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/annotation_test_data/" + - type: "file" + path: "resources_test/pbmc_1k_protein_v3/" + info: + method_id: "XGBClassifier" + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + - "git" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + upgrade: true + - type: "apt" + packages: + - "libopenblas-dev" + - "liblapack-dev" + - "gfortran" + interactive: false + - type: "python" + user: false + packages: + - "scanpy~=1.9.5" + - "xgboost~=1.7.1" + - "scikit-learn~=1.1.1" + - "numpy~=1.23.5" + - "pandas~=1.4.4" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highmem" + - "highcpu" + - "gpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +- type: "native" + id: "native" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/labels_transfer/xgboost/config.vsh.yaml" + platform: "native" + output: "/home/runner/work/openpipeline/openpipeline/target/native/labels_transfer/xgboost" + executable: "/home/runner/work/openpipeline/openpipeline/target/native/labels_transfer/xgboost/xgboost" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/native/labels_transfer/xgboost/helper.py b/target/native/labels_transfer/xgboost/helper.py new file mode 100644 index 00000000000..a90bf59efdb --- /dev/null +++ b/target/native/labels_transfer/xgboost/helper.py @@ -0,0 +1,32 @@ +def check_arguments(par): + # check output .obs predictions + if not par["output_obs_predictions"]: + par["output_obs_predictions"] = [ t + "_pred" for t in par["reference_obs_targets"]] + assert len(par["output_obs_predictions"]) == len(par["reference_obs_targets"]), f"Number of output_obs_predictions must match number of reference_obs_targets\npar: {par}" + + # check output .obs uncertainty + if not par["output_obs_uncertainty"]: + par["output_obs_uncertainty"] = [ t + "_uncertainty" for t in par["reference_obs_targets"]] + assert len(par["output_obs_uncertainty"]) == len(par["reference_obs_targets"]), f"Number of output_obs_uncertainty must match number of reference_obs_targets\npar: {par}" + + return par + +def get_reference_features(adata_reference, par, logger): + if par["reference_obsm_features"] is None: + logger.info("Using .X of reference data") + train_data = adata_reference.X + else: + logger.info(f"Using .obsm[{par['reference_obsm_features']}] of reference data") + train_data = adata_reference.obsm[par["reference_obsm_features"]] + + return train_data + +def get_query_features(adata, par, logger): + if par["input_obsm_features"] is None: + logger.info("Using .X of query data") + query_data = adata.X + else: + logger.info(f"Using .obsm[{par['input_obsm_features']}] of query data") + query_data = adata.obsm[par["input_obsm_features"]] + + return query_data \ No newline at end of file diff --git a/target/native/labels_transfer/xgboost/setup_logger.py b/target/native/labels_transfer/xgboost/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/native/labels_transfer/xgboost/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/native/labels_transfer/xgboost/xgboost b/target/native/labels_transfer/xgboost/xgboost new file mode 100755 index 00000000000..a11c2cf1841 --- /dev/null +++ b/target/native/labels_transfer/xgboost/xgboost @@ -0,0 +1,1520 @@ +#!/usr/bin/env bash + +# xgboost 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Vladimir Shitov (author) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="xgboost" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "xgboost 0.12.4" + echo "" + echo "Performs label transfer from reference to query using XGBoost classifier" + echo "" + echo "Input dataset (query) arguments:" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " The query data to transfer the labels to. Should be a .h5mu file." + echo "" + echo " --modality" + echo " type: string" + echo " default: rna" + echo " Which modality to use." + echo "" + echo " --input_obsm_features" + echo " type: string" + echo " example: X_integrated_scanvi" + echo " The \`.obsm\` key of the embedding to use for the classifier's inference." + echo " If not provided, the \`.X\` slot will be used instead." + echo " Make sure that embedding was obtained in the same way as the reference" + echo " embedding (e.g. by the same model or preprocessing)." + echo "" + echo "Reference dataset arguments:" + echo " --reference" + echo " type: file, file must exist" + echo " example:" + echo "https:/zenodo.org/record/6337966/files/HLCA_emb_and_metadata.h5ad" + echo " The reference data to train classifiers on." + echo "" + echo " --reference_obsm_features" + echo " type: string, required parameter" + echo " default: X_integrated_scanvi" + echo " The \`.obsm\` key of the embedding to use for the classifier's training." + echo " Make sure that embedding was obtained in the same way as the query" + echo " embedding (e.g. by the same model or preprocessing)." + echo "" + echo " --reference_obs_targets" + echo " type: string, multiple values allowed" + echo " default:" + echo "ann_level_1,ann_level_2,ann_level_3,ann_level_4,ann_level_5,ann_finest_level" + echo " The \`.obs\` key of the target labels to tranfer." + echo "" + echo "Outputs:" + echo " --output" + echo " type: file, required parameter, output, file must exist" + echo " The query data in .h5mu format with predicted labels transfered from the" + echo " reference." + echo "" + echo " --output_obs_predictions" + echo " type: string, multiple values allowed" + echo " In which \`.obs\` slots to store the predicted information." + echo " If provided, must have the same length as \`--reference_obs_targets\`." + echo " If empty, will default to the \`reference_obs_targets\` combined with the" + echo " \`\"_pred\"\` suffix." + echo "" + echo " --output_obs_uncertainty" + echo " type: string, multiple values allowed" + echo " In which \`.obs\` slots to store the uncertainty of the predictions." + echo " If provided, must have the same length as \`--reference_obs_targets\`." + echo " If empty, will default to the \`reference_obs_targets\` combined with the" + echo " \`\"_uncertainty\"\` suffix." + echo "" + echo " --output_uns_parameters" + echo " type: string" + echo " default: labels_transfer" + echo " The \`.uns\` key to store additional information about the parameters used" + echo " for the label transfer." + echo "" + echo "Execution arguments:" + echo " -f, --force_retrain" + echo " type: boolean_true" + echo " Retrain models on the reference even if model_output directory already" + echo " has trained classifiers. WARNING! It will rewrite existing classifiers" + echo " for targets in the model_output directory!" + echo "" + echo " --use_gpu" + echo " type: boolean" + echo " default: false" + echo " Use GPU during models training and inference (recommended)." + echo "" + echo " -v, --verbosity" + echo " type: integer" + echo " default: 1" + echo " The verbosity level for evaluation of the classifier from the range" + echo " [0,2]" + echo "" + echo " --model_output" + echo " type: file, output, file must exist" + echo " default: model" + echo " Output directory for model" + echo "" + echo "Learning parameters:" + echo " --eta, --learning_rate" + echo " type: double" + echo " default: 0.3" + echo " Step size shrinkage used in update to prevents overfitting. Range:" + echo " [0,1]. See" + echo " " + echo "https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster" + echo " for the reference" + echo "" + echo " --gamma, --min_split_loss" + echo " type: double" + echo " default: 0.0" + echo " Minimum loss reduction required to make a further partition on a leaf" + echo " node of the tree. See" + echo " " + echo "https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster" + echo " for the reference" + echo "" + echo " -d, --max_depth" + echo " type: integer" + echo " default: 6" + echo " Maximum depth of a tree. See" + echo " " + echo "https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster" + echo " for the reference" + echo "" + echo " --min_child_weight" + echo " type: integer" + echo " default: 1" + echo " Minimum sum of instance weight (hessian) needed in a child. See" + echo " " + echo "https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster" + echo " for the reference" + echo "" + echo " --max_delta_step" + echo " type: double" + echo " default: 0.0" + echo " Maximum delta step we allow each leaf output to be. See" + echo " " + echo "https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster" + echo " for the reference" + echo "" + echo " --subsample" + echo " type: double" + echo " default: 1.0" + echo " Subsample ratio of the training instances. See" + echo " " + echo "https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster" + echo " for the reference" + echo "" + echo " --sampling_method" + echo " type: string" + echo " default: uniform" + echo " choices: [ uniform, gradient_based ]" + echo " The method to use to sample the training instances. See" + echo " " + echo "https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster" + echo " for the reference" + echo "" + echo " --colsample_bytree" + echo " type: double" + echo " default: 1.0" + echo " Fraction of columns to be subsampled. Range (0, 1]. See" + echo " " + echo "https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster" + echo " for the reference" + echo "" + echo " --colsample_bylevel" + echo " type: double" + echo " default: 1.0" + echo " Subsample ratio of columns for each level. Range (0, 1]. See" + echo " " + echo "https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster" + echo " for the reference" + echo "" + echo " --colsample_bynode" + echo " type: double" + echo " default: 1.0" + echo " Subsample ratio of columns for each node (split). Range (0, 1]. See" + echo " " + echo "https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster" + echo " for the reference" + echo "" + echo " --lambda, --reg_lambda" + echo " type: double" + echo " default: 1.0" + echo " L2 regularization term on weights. See" + echo " " + echo "https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster" + echo " for the reference" + echo "" + echo " --alpha, --reg_alpha" + echo " type: double" + echo " default: 0.0" + echo " L1 regularization term on weights. See" + echo " " + echo "https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster" + echo " for the reference" + echo "" + echo " --scale_pos_weight" + echo " type: double" + echo " default: 1.0" + echo " Control the balance of positive and negative weights, useful for" + echo " unbalanced classes. See" + echo " " + echo "https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster" + echo " for the reference" +} + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "xgboost 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --modality) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality=*) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --input_obsm_features) + [ -n "$VIASH_PAR_INPUT_OBSM_FEATURES" ] && ViashError Bad arguments for option \'--input_obsm_features\': \'$VIASH_PAR_INPUT_OBSM_FEATURES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_OBSM_FEATURES="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_obsm_features. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input_obsm_features=*) + [ -n "$VIASH_PAR_INPUT_OBSM_FEATURES" ] && ViashError Bad arguments for option \'--input_obsm_features=*\': \'$VIASH_PAR_INPUT_OBSM_FEATURES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_OBSM_FEATURES=$(ViashRemoveFlags "$1") + shift 1 + ;; + --reference) + [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reference=*) + [ -n "$VIASH_PAR_REFERENCE" ] && ViashError Bad arguments for option \'--reference=*\': \'$VIASH_PAR_REFERENCE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --reference_obsm_features) + [ -n "$VIASH_PAR_REFERENCE_OBSM_FEATURES" ] && ViashError Bad arguments for option \'--reference_obsm_features\': \'$VIASH_PAR_REFERENCE_OBSM_FEATURES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE_OBSM_FEATURES="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference_obsm_features. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reference_obsm_features=*) + [ -n "$VIASH_PAR_REFERENCE_OBSM_FEATURES" ] && ViashError Bad arguments for option \'--reference_obsm_features=*\': \'$VIASH_PAR_REFERENCE_OBSM_FEATURES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REFERENCE_OBSM_FEATURES=$(ViashRemoveFlags "$1") + shift 1 + ;; + --reference_obs_targets) + if [ -z "$VIASH_PAR_REFERENCE_OBS_TARGETS" ]; then + VIASH_PAR_REFERENCE_OBS_TARGETS="$2" + else + VIASH_PAR_REFERENCE_OBS_TARGETS="$VIASH_PAR_REFERENCE_OBS_TARGETS,""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --reference_obs_targets. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reference_obs_targets=*) + if [ -z "$VIASH_PAR_REFERENCE_OBS_TARGETS" ]; then + VIASH_PAR_REFERENCE_OBS_TARGETS=$(ViashRemoveFlags "$1") + else + VIASH_PAR_REFERENCE_OBS_TARGETS="$VIASH_PAR_REFERENCE_OBS_TARGETS,"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_obs_predictions) + if [ -z "$VIASH_PAR_OUTPUT_OBS_PREDICTIONS" ]; then + VIASH_PAR_OUTPUT_OBS_PREDICTIONS="$2" + else + VIASH_PAR_OUTPUT_OBS_PREDICTIONS="$VIASH_PAR_OUTPUT_OBS_PREDICTIONS:""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_obs_predictions. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_obs_predictions=*) + if [ -z "$VIASH_PAR_OUTPUT_OBS_PREDICTIONS" ]; then + VIASH_PAR_OUTPUT_OBS_PREDICTIONS=$(ViashRemoveFlags "$1") + else + VIASH_PAR_OUTPUT_OBS_PREDICTIONS="$VIASH_PAR_OUTPUT_OBS_PREDICTIONS:"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --output_obs_uncertainty) + if [ -z "$VIASH_PAR_OUTPUT_OBS_UNCERTAINTY" ]; then + VIASH_PAR_OUTPUT_OBS_UNCERTAINTY="$2" + else + VIASH_PAR_OUTPUT_OBS_UNCERTAINTY="$VIASH_PAR_OUTPUT_OBS_UNCERTAINTY:""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_obs_uncertainty. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_obs_uncertainty=*) + if [ -z "$VIASH_PAR_OUTPUT_OBS_UNCERTAINTY" ]; then + VIASH_PAR_OUTPUT_OBS_UNCERTAINTY=$(ViashRemoveFlags "$1") + else + VIASH_PAR_OUTPUT_OBS_UNCERTAINTY="$VIASH_PAR_OUTPUT_OBS_UNCERTAINTY:"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --output_uns_parameters) + [ -n "$VIASH_PAR_OUTPUT_UNS_PARAMETERS" ] && ViashError Bad arguments for option \'--output_uns_parameters\': \'$VIASH_PAR_OUTPUT_UNS_PARAMETERS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_UNS_PARAMETERS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_uns_parameters. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_uns_parameters=*) + [ -n "$VIASH_PAR_OUTPUT_UNS_PARAMETERS" ] && ViashError Bad arguments for option \'--output_uns_parameters=*\': \'$VIASH_PAR_OUTPUT_UNS_PARAMETERS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_UNS_PARAMETERS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --force_retrain) + [ -n "$VIASH_PAR_FORCE_RETRAIN" ] && ViashError Bad arguments for option \'--force_retrain\': \'$VIASH_PAR_FORCE_RETRAIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FORCE_RETRAIN=true + shift 1 + ;; + -f) + [ -n "$VIASH_PAR_FORCE_RETRAIN" ] && ViashError Bad arguments for option \'-f\': \'$VIASH_PAR_FORCE_RETRAIN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_FORCE_RETRAIN=true + shift 1 + ;; + --use_gpu) + [ -n "$VIASH_PAR_USE_GPU" ] && ViashError Bad arguments for option \'--use_gpu\': \'$VIASH_PAR_USE_GPU\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_USE_GPU="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --use_gpu. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --use_gpu=*) + [ -n "$VIASH_PAR_USE_GPU" ] && ViashError Bad arguments for option \'--use_gpu=*\': \'$VIASH_PAR_USE_GPU\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_USE_GPU=$(ViashRemoveFlags "$1") + shift 1 + ;; + --verbosity) + [ -n "$VIASH_PAR_VERBOSITY" ] && ViashError Bad arguments for option \'--verbosity\': \'$VIASH_PAR_VERBOSITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_VERBOSITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --verbosity. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --verbosity=*) + [ -n "$VIASH_PAR_VERBOSITY" ] && ViashError Bad arguments for option \'--verbosity=*\': \'$VIASH_PAR_VERBOSITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_VERBOSITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + -v) + [ -n "$VIASH_PAR_VERBOSITY" ] && ViashError Bad arguments for option \'-v\': \'$VIASH_PAR_VERBOSITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_VERBOSITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -v. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --model_output) + [ -n "$VIASH_PAR_MODEL_OUTPUT" ] && ViashError Bad arguments for option \'--model_output\': \'$VIASH_PAR_MODEL_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODEL_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --model_output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --model_output=*) + [ -n "$VIASH_PAR_MODEL_OUTPUT" ] && ViashError Bad arguments for option \'--model_output=*\': \'$VIASH_PAR_MODEL_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODEL_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --learning_rate) + [ -n "$VIASH_PAR_LEARNING_RATE" ] && ViashError Bad arguments for option \'--learning_rate\': \'$VIASH_PAR_LEARNING_RATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LEARNING_RATE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --learning_rate. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --learning_rate=*) + [ -n "$VIASH_PAR_LEARNING_RATE" ] && ViashError Bad arguments for option \'--learning_rate=*\': \'$VIASH_PAR_LEARNING_RATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LEARNING_RATE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --eta) + [ -n "$VIASH_PAR_LEARNING_RATE" ] && ViashError Bad arguments for option \'--eta\': \'$VIASH_PAR_LEARNING_RATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LEARNING_RATE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --eta. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --min_split_loss) + [ -n "$VIASH_PAR_MIN_SPLIT_LOSS" ] && ViashError Bad arguments for option \'--min_split_loss\': \'$VIASH_PAR_MIN_SPLIT_LOSS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_SPLIT_LOSS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_split_loss. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --min_split_loss=*) + [ -n "$VIASH_PAR_MIN_SPLIT_LOSS" ] && ViashError Bad arguments for option \'--min_split_loss=*\': \'$VIASH_PAR_MIN_SPLIT_LOSS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_SPLIT_LOSS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --gamma) + [ -n "$VIASH_PAR_MIN_SPLIT_LOSS" ] && ViashError Bad arguments for option \'--gamma\': \'$VIASH_PAR_MIN_SPLIT_LOSS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_SPLIT_LOSS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --gamma. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --max_depth) + [ -n "$VIASH_PAR_MAX_DEPTH" ] && ViashError Bad arguments for option \'--max_depth\': \'$VIASH_PAR_MAX_DEPTH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MAX_DEPTH="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --max_depth. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --max_depth=*) + [ -n "$VIASH_PAR_MAX_DEPTH" ] && ViashError Bad arguments for option \'--max_depth=*\': \'$VIASH_PAR_MAX_DEPTH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MAX_DEPTH=$(ViashRemoveFlags "$1") + shift 1 + ;; + -d) + [ -n "$VIASH_PAR_MAX_DEPTH" ] && ViashError Bad arguments for option \'-d\': \'$VIASH_PAR_MAX_DEPTH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MAX_DEPTH="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -d. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --min_child_weight) + [ -n "$VIASH_PAR_MIN_CHILD_WEIGHT" ] && ViashError Bad arguments for option \'--min_child_weight\': \'$VIASH_PAR_MIN_CHILD_WEIGHT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_CHILD_WEIGHT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_child_weight. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --min_child_weight=*) + [ -n "$VIASH_PAR_MIN_CHILD_WEIGHT" ] && ViashError Bad arguments for option \'--min_child_weight=*\': \'$VIASH_PAR_MIN_CHILD_WEIGHT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_CHILD_WEIGHT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --max_delta_step) + [ -n "$VIASH_PAR_MAX_DELTA_STEP" ] && ViashError Bad arguments for option \'--max_delta_step\': \'$VIASH_PAR_MAX_DELTA_STEP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MAX_DELTA_STEP="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --max_delta_step. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --max_delta_step=*) + [ -n "$VIASH_PAR_MAX_DELTA_STEP" ] && ViashError Bad arguments for option \'--max_delta_step=*\': \'$VIASH_PAR_MAX_DELTA_STEP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MAX_DELTA_STEP=$(ViashRemoveFlags "$1") + shift 1 + ;; + --subsample) + [ -n "$VIASH_PAR_SUBSAMPLE" ] && ViashError Bad arguments for option \'--subsample\': \'$VIASH_PAR_SUBSAMPLE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SUBSAMPLE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --subsample. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --subsample=*) + [ -n "$VIASH_PAR_SUBSAMPLE" ] && ViashError Bad arguments for option \'--subsample=*\': \'$VIASH_PAR_SUBSAMPLE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SUBSAMPLE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --sampling_method) + [ -n "$VIASH_PAR_SAMPLING_METHOD" ] && ViashError Bad arguments for option \'--sampling_method\': \'$VIASH_PAR_SAMPLING_METHOD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SAMPLING_METHOD="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --sampling_method. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --sampling_method=*) + [ -n "$VIASH_PAR_SAMPLING_METHOD" ] && ViashError Bad arguments for option \'--sampling_method=*\': \'$VIASH_PAR_SAMPLING_METHOD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SAMPLING_METHOD=$(ViashRemoveFlags "$1") + shift 1 + ;; + --colsample_bytree) + [ -n "$VIASH_PAR_COLSAMPLE_BYTREE" ] && ViashError Bad arguments for option \'--colsample_bytree\': \'$VIASH_PAR_COLSAMPLE_BYTREE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COLSAMPLE_BYTREE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --colsample_bytree. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --colsample_bytree=*) + [ -n "$VIASH_PAR_COLSAMPLE_BYTREE" ] && ViashError Bad arguments for option \'--colsample_bytree=*\': \'$VIASH_PAR_COLSAMPLE_BYTREE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COLSAMPLE_BYTREE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --colsample_bylevel) + [ -n "$VIASH_PAR_COLSAMPLE_BYLEVEL" ] && ViashError Bad arguments for option \'--colsample_bylevel\': \'$VIASH_PAR_COLSAMPLE_BYLEVEL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COLSAMPLE_BYLEVEL="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --colsample_bylevel. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --colsample_bylevel=*) + [ -n "$VIASH_PAR_COLSAMPLE_BYLEVEL" ] && ViashError Bad arguments for option \'--colsample_bylevel=*\': \'$VIASH_PAR_COLSAMPLE_BYLEVEL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COLSAMPLE_BYLEVEL=$(ViashRemoveFlags "$1") + shift 1 + ;; + --colsample_bynode) + [ -n "$VIASH_PAR_COLSAMPLE_BYNODE" ] && ViashError Bad arguments for option \'--colsample_bynode\': \'$VIASH_PAR_COLSAMPLE_BYNODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COLSAMPLE_BYNODE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --colsample_bynode. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --colsample_bynode=*) + [ -n "$VIASH_PAR_COLSAMPLE_BYNODE" ] && ViashError Bad arguments for option \'--colsample_bynode=*\': \'$VIASH_PAR_COLSAMPLE_BYNODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_COLSAMPLE_BYNODE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --reg_lambda) + [ -n "$VIASH_PAR_REG_LAMBDA" ] && ViashError Bad arguments for option \'--reg_lambda\': \'$VIASH_PAR_REG_LAMBDA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REG_LAMBDA="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --reg_lambda. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reg_lambda=*) + [ -n "$VIASH_PAR_REG_LAMBDA" ] && ViashError Bad arguments for option \'--reg_lambda=*\': \'$VIASH_PAR_REG_LAMBDA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REG_LAMBDA=$(ViashRemoveFlags "$1") + shift 1 + ;; + --lambda) + [ -n "$VIASH_PAR_REG_LAMBDA" ] && ViashError Bad arguments for option \'--lambda\': \'$VIASH_PAR_REG_LAMBDA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REG_LAMBDA="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --lambda. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reg_alpha) + [ -n "$VIASH_PAR_REG_ALPHA" ] && ViashError Bad arguments for option \'--reg_alpha\': \'$VIASH_PAR_REG_ALPHA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REG_ALPHA="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --reg_alpha. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --reg_alpha=*) + [ -n "$VIASH_PAR_REG_ALPHA" ] && ViashError Bad arguments for option \'--reg_alpha=*\': \'$VIASH_PAR_REG_ALPHA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REG_ALPHA=$(ViashRemoveFlags "$1") + shift 1 + ;; + --alpha) + [ -n "$VIASH_PAR_REG_ALPHA" ] && ViashError Bad arguments for option \'--alpha\': \'$VIASH_PAR_REG_ALPHA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REG_ALPHA="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --alpha. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --scale_pos_weight) + [ -n "$VIASH_PAR_SCALE_POS_WEIGHT" ] && ViashError Bad arguments for option \'--scale_pos_weight\': \'$VIASH_PAR_SCALE_POS_WEIGHT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCALE_POS_WEIGHT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --scale_pos_weight. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --scale_pos_weight=*) + [ -n "$VIASH_PAR_SCALE_POS_WEIGHT" ] && ViashError Bad arguments for option \'--scale_pos_weight=*\': \'$VIASH_PAR_SCALE_POS_WEIGHT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SCALE_POS_WEIGHT=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_REFERENCE_OBSM_FEATURES+x} ]; then + ViashError '--reference_obsm_features' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_MODALITY+x} ]; then + VIASH_PAR_MODALITY="rna" +fi +if [ -z ${VIASH_PAR_REFERENCE_OBS_TARGETS+x} ]; then + VIASH_PAR_REFERENCE_OBS_TARGETS="ann_level_1,ann_level_2,ann_level_3,ann_level_4,ann_level_5,ann_finest_level" +fi +if [ -z ${VIASH_PAR_OUTPUT_UNS_PARAMETERS+x} ]; then + VIASH_PAR_OUTPUT_UNS_PARAMETERS="labels_transfer" +fi +if [ -z ${VIASH_PAR_FORCE_RETRAIN+x} ]; then + VIASH_PAR_FORCE_RETRAIN="false" +fi +if [ -z ${VIASH_PAR_USE_GPU+x} ]; then + VIASH_PAR_USE_GPU="false" +fi +if [ -z ${VIASH_PAR_VERBOSITY+x} ]; then + VIASH_PAR_VERBOSITY="1" +fi +if [ -z ${VIASH_PAR_MODEL_OUTPUT+x} ]; then + VIASH_PAR_MODEL_OUTPUT="model" +fi +if [ -z ${VIASH_PAR_LEARNING_RATE+x} ]; then + VIASH_PAR_LEARNING_RATE="0.3" +fi +if [ -z ${VIASH_PAR_MIN_SPLIT_LOSS+x} ]; then + VIASH_PAR_MIN_SPLIT_LOSS="0.0" +fi +if [ -z ${VIASH_PAR_MAX_DEPTH+x} ]; then + VIASH_PAR_MAX_DEPTH="6" +fi +if [ -z ${VIASH_PAR_MIN_CHILD_WEIGHT+x} ]; then + VIASH_PAR_MIN_CHILD_WEIGHT="1" +fi +if [ -z ${VIASH_PAR_MAX_DELTA_STEP+x} ]; then + VIASH_PAR_MAX_DELTA_STEP="0.0" +fi +if [ -z ${VIASH_PAR_SUBSAMPLE+x} ]; then + VIASH_PAR_SUBSAMPLE="1.0" +fi +if [ -z ${VIASH_PAR_SAMPLING_METHOD+x} ]; then + VIASH_PAR_SAMPLING_METHOD="uniform" +fi +if [ -z ${VIASH_PAR_COLSAMPLE_BYTREE+x} ]; then + VIASH_PAR_COLSAMPLE_BYTREE="1.0" +fi +if [ -z ${VIASH_PAR_COLSAMPLE_BYLEVEL+x} ]; then + VIASH_PAR_COLSAMPLE_BYLEVEL="1.0" +fi +if [ -z ${VIASH_PAR_COLSAMPLE_BYNODE+x} ]; then + VIASH_PAR_COLSAMPLE_BYNODE="1.0" +fi +if [ -z ${VIASH_PAR_REG_LAMBDA+x} ]; then + VIASH_PAR_REG_LAMBDA="1.0" +fi +if [ -z ${VIASH_PAR_REG_ALPHA+x} ]; then + VIASH_PAR_REG_ALPHA="0.0" +fi +if [ -z ${VIASH_PAR_SCALE_POS_WEIGHT+x} ]; then + VIASH_PAR_SCALE_POS_WEIGHT="1.0" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_REFERENCE" ] && [ ! -e "$VIASH_PAR_REFERENCE" ]; then + ViashError "Input file '$VIASH_PAR_REFERENCE' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_FORCE_RETRAIN" ]]; then + if ! [[ "$VIASH_PAR_FORCE_RETRAIN" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--force_retrain' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_USE_GPU" ]]; then + if ! [[ "$VIASH_PAR_USE_GPU" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--use_gpu' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_VERBOSITY" ]]; then + if ! [[ "$VIASH_PAR_VERBOSITY" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--verbosity' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_LEARNING_RATE" ]]; then + if ! [[ "$VIASH_PAR_LEARNING_RATE" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--learning_rate' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MIN_SPLIT_LOSS" ]]; then + if ! [[ "$VIASH_PAR_MIN_SPLIT_LOSS" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--min_split_loss' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MAX_DEPTH" ]]; then + if ! [[ "$VIASH_PAR_MAX_DEPTH" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--max_depth' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MIN_CHILD_WEIGHT" ]]; then + if ! [[ "$VIASH_PAR_MIN_CHILD_WEIGHT" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--min_child_weight' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MAX_DELTA_STEP" ]]; then + if ! [[ "$VIASH_PAR_MAX_DELTA_STEP" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--max_delta_step' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SUBSAMPLE" ]]; then + if ! [[ "$VIASH_PAR_SUBSAMPLE" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--subsample' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_COLSAMPLE_BYTREE" ]]; then + if ! [[ "$VIASH_PAR_COLSAMPLE_BYTREE" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--colsample_bytree' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_COLSAMPLE_BYLEVEL" ]]; then + if ! [[ "$VIASH_PAR_COLSAMPLE_BYLEVEL" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--colsample_bylevel' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_COLSAMPLE_BYNODE" ]]; then + if ! [[ "$VIASH_PAR_COLSAMPLE_BYNODE" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--colsample_bynode' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_REG_LAMBDA" ]]; then + if ! [[ "$VIASH_PAR_REG_LAMBDA" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--reg_lambda' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_REG_ALPHA" ]]; then + if ! [[ "$VIASH_PAR_REG_ALPHA" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--reg_alpha' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_SCALE_POS_WEIGHT" ]]; then + if ! [[ "$VIASH_PAR_SCALE_POS_WEIGHT" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--scale_pos_weight' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_SAMPLING_METHOD" ]; then + VIASH_PAR_SAMPLING_METHOD_CHOICES=("uniform:gradient_based") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_SAMPLING_METHOD_CHOICES[*]}:" =~ ":$VIASH_PAR_SAMPLING_METHOD:" ]]; then + ViashError '--sampling_method' specified value of \'$VIASH_PAR_SAMPLING_METHOD\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi +if [ ! -z "$VIASH_PAR_MODEL_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_MODEL_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_MODEL_OUTPUT")" +fi + +ViashDebug "Running command: bash" +cat << VIASHEOF | bash +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-xgboost-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +import sys +import json +import os +from typing import Optional +import yaml +from pathlib import Path + +import mudata +import numpy as np +import scanpy as sc +import pandas as pd +import xgboost as xgb +from sklearn.model_selection import train_test_split +from sklearn.metrics import classification_report +from sklearn.preprocessing import LabelEncoder + + +### VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'input_obsm_features': $( if [ ! -z ${VIASH_PAR_INPUT_OBSM_FEATURES+x} ]; then echo "r'${VIASH_PAR_INPUT_OBSM_FEATURES//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'reference': $( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "r'${VIASH_PAR_REFERENCE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'reference_obsm_features': $( if [ ! -z ${VIASH_PAR_REFERENCE_OBSM_FEATURES+x} ]; then echo "r'${VIASH_PAR_REFERENCE_OBSM_FEATURES//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'reference_obs_targets': $( if [ ! -z ${VIASH_PAR_REFERENCE_OBS_TARGETS+x} ]; then echo "r'${VIASH_PAR_REFERENCE_OBS_TARGETS//\'/\'\"\'\"r\'}'.split(',')"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_obs_predictions': $( if [ ! -z ${VIASH_PAR_OUTPUT_OBS_PREDICTIONS+x} ]; then echo "r'${VIASH_PAR_OUTPUT_OBS_PREDICTIONS//\'/\'\"\'\"r\'}'.split(':')"; else echo None; fi ), + 'output_obs_uncertainty': $( if [ ! -z ${VIASH_PAR_OUTPUT_OBS_UNCERTAINTY+x} ]; then echo "r'${VIASH_PAR_OUTPUT_OBS_UNCERTAINTY//\'/\'\"\'\"r\'}'.split(':')"; else echo None; fi ), + 'output_uns_parameters': $( if [ ! -z ${VIASH_PAR_OUTPUT_UNS_PARAMETERS+x} ]; then echo "r'${VIASH_PAR_OUTPUT_UNS_PARAMETERS//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'force_retrain': $( if [ ! -z ${VIASH_PAR_FORCE_RETRAIN+x} ]; then echo "r'${VIASH_PAR_FORCE_RETRAIN//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), + 'use_gpu': $( if [ ! -z ${VIASH_PAR_USE_GPU+x} ]; then echo "r'${VIASH_PAR_USE_GPU//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), + 'verbosity': $( if [ ! -z ${VIASH_PAR_VERBOSITY+x} ]; then echo "int(r'${VIASH_PAR_VERBOSITY//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'model_output': $( if [ ! -z ${VIASH_PAR_MODEL_OUTPUT+x} ]; then echo "r'${VIASH_PAR_MODEL_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'learning_rate': $( if [ ! -z ${VIASH_PAR_LEARNING_RATE+x} ]; then echo "float(r'${VIASH_PAR_LEARNING_RATE//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'min_split_loss': $( if [ ! -z ${VIASH_PAR_MIN_SPLIT_LOSS+x} ]; then echo "float(r'${VIASH_PAR_MIN_SPLIT_LOSS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'max_depth': $( if [ ! -z ${VIASH_PAR_MAX_DEPTH+x} ]; then echo "int(r'${VIASH_PAR_MAX_DEPTH//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'min_child_weight': $( if [ ! -z ${VIASH_PAR_MIN_CHILD_WEIGHT+x} ]; then echo "int(r'${VIASH_PAR_MIN_CHILD_WEIGHT//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'max_delta_step': $( if [ ! -z ${VIASH_PAR_MAX_DELTA_STEP+x} ]; then echo "float(r'${VIASH_PAR_MAX_DELTA_STEP//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'subsample': $( if [ ! -z ${VIASH_PAR_SUBSAMPLE+x} ]; then echo "float(r'${VIASH_PAR_SUBSAMPLE//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'sampling_method': $( if [ ! -z ${VIASH_PAR_SAMPLING_METHOD+x} ]; then echo "r'${VIASH_PAR_SAMPLING_METHOD//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'colsample_bytree': $( if [ ! -z ${VIASH_PAR_COLSAMPLE_BYTREE+x} ]; then echo "float(r'${VIASH_PAR_COLSAMPLE_BYTREE//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'colsample_bylevel': $( if [ ! -z ${VIASH_PAR_COLSAMPLE_BYLEVEL+x} ]; then echo "float(r'${VIASH_PAR_COLSAMPLE_BYLEVEL//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'colsample_bynode': $( if [ ! -z ${VIASH_PAR_COLSAMPLE_BYNODE+x} ]; then echo "float(r'${VIASH_PAR_COLSAMPLE_BYNODE//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'reg_lambda': $( if [ ! -z ${VIASH_PAR_REG_LAMBDA+x} ]; then echo "float(r'${VIASH_PAR_REG_LAMBDA//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'reg_alpha': $( if [ ! -z ${VIASH_PAR_REG_ALPHA+x} ]; then echo "float(r'${VIASH_PAR_REG_ALPHA//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'scale_pos_weight': $( if [ ! -z ${VIASH_PAR_SCALE_POS_WEIGHT+x} ]; then echo "float(r'${VIASH_PAR_SCALE_POS_WEIGHT//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +### VIASH END + +sys.path.append(meta["resources_dir"]) +from helper import check_arguments, get_reference_features, get_query_features +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +# read config arguments +config = yaml.safe_load(Path(meta["config"]).read_text()) + +# look for training params for method +argument_groups = { grp["name"]: grp["arguments"] for grp in config["functionality"]["argument_groups"] } +training_arg_names = [ arg["name"].replace("--", "") for arg in argument_groups["Learning parameters"] ] +training_params = { arg_name: par[arg_name] for arg_name in training_arg_names } + +def encode_labels(y): + labels_encoder = LabelEncoder() + labels_encoder.fit(y) + + return labels_encoder.transform(y), labels_encoder + + +def get_model_eval(xgb_model, X_test, y_test, labels_encoder): + preds = xgb_model.predict(X_test) + + cr = classification_report(labels_encoder.inverse_transform(y_test), + labels_encoder.inverse_transform(preds), + output_dict=True) + cr_df = pd.DataFrame(cr).transpose() + + return cr_df + + +def train_test_split_adata(adata, labels): + train_data = pd.DataFrame(data=adata.X, index=adata.obs_names) + + X_train, X_test, y_train, y_test = train_test_split( + train_data, labels, test_size=0.2, random_state=42, stratify=labels) + + return X_train, X_test, y_train, y_test + + +def train_xgb_model(X_train, y_train, gpu=True) -> xgb.XGBClassifier: + n_classes = len(np.unique(y_train)) + objective = "binary:logistic" if n_classes == 2 else "multi:softprob" + + tree_method = "gpu_hist" if gpu else "hist" + xgbc = xgb.XGBClassifier(tree_method=tree_method, objective=objective, **training_params) + xgbc.fit(X_train, y_train) + + return xgbc + + +def build_classifier(X, y, labels_encoder, label_key, eval_verbosity: Optional[int] = 1, gpu=True) -> xgb.XGBClassifier: + # Adata prep + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y) + #Note: Do we need a new train-test split for each classifier? + + # Model training + xgb_model = train_xgb_model(X_train, y_train, gpu=gpu) + + # Model eval + if eval_verbosity != 0: + cr_df = get_model_eval(xgb_model, X_test, y_test, labels_encoder) + + if eval_verbosity == 2: + print(cr_df) + + else: + overall_accuracy = cr_df["support"]["accuracy"] + low_prec_key = cr_df.precision.idxmin() + low_prec_val = cr_df.precision.min() + low_rec_key = cr_df.recall.idxmin() + low_rec_val = cr_df.recall.min() + low_f1_key = cr_df["f1-score"].idxmin() + low_f1_val = cr_df["f1-score"].min() + + print("") + print(f"Summary stats for {label_key} model:") + print(f"Overall accuracy: {overall_accuracy}") + print(f"Min. precision: {low_prec_key}: {low_prec_val}") + print(f"Min. Recall: {low_rec_key}: {low_rec_val}") + print(f"Min. F1-score: {low_f1_key}: {low_f1_val}") + print("") + + return xgb_model + + +def build_ref_classifiers(adata_reference, targets, model_path, + eval_verbosity: Optional[int] = 1, gpu: Optional[bool] = True) -> None: + """ + This function builds xgboost classifiers on a reference embedding for a designated number of + adata_reference.obs columns. Classifier .xgb files and a model_info.json file is written to the \`model_path\` + directory. Model evaluation is printed to stdout. + + Inputs: + * \`adata_reference\`: The AnnData object that was used to train the reference model + * \`model_path\`: The reference model directory where the classifiers will also be stored + * \`eval_verbosity\`: The verbosity level for evaluation of the classifier from the range [0;2]. + * \`gpu\`: Boolean indicating whether a gpu is available for classifier training + + + Example: + \`\`\` + >>> adata + AnnData object with n_obs x n_vars = 700 x 765 + obs: "ann_finest_level", "ann_level_1" + + >>> os.listdir("/path/to/model") + model_params.pt* + + >>> build_ref_classifiers(adata, "path/to/model", eval_verbosity=1, gpu=True) + >>> os.listdir("/path/to/model") + classifier_ann_finest_level.xgb* model_info.json* + classifier_ann_level_1.xgb* model_params.pt* + \`\`\` + """ + + # Check inputs + if not isinstance(eval_verbosity, int): + raise TypeError("\`eval_verbosity\` should be an integer between 0 and 2.") + + if eval_verbosity < 0 or eval_verbosity > 2: + raise ValueError("\`eval_verbosity\` should be an integer between 0 and 2.") + + train_data = get_reference_features(adata_reference, par, logger) + + if not os.path.exists(model_path): + os.makedirs(model_path, exist_ok=True) + + # Map from name of classifier to file names + classifiers = dict() + + for label, obs_pred in zip(targets, par["output_obs_predictions"]): + if label not in adata_reference.obs: + raise ValueError(f"{label} is not in the \`adata\` object passed!") + + filename = "classifier_" + label + ".xgb" + + labels, labels_encoder = encode_labels(adata_reference.obs[label]) + logger.info(f"Classes: {labels_encoder.classes_}") + + logger.info(f"Building classifier for {label}...") + xgb_model = build_classifier( + X=train_data, + y=labels, + labels_encoder=labels_encoder, + label_key=label, + eval_verbosity=eval_verbosity, + gpu=gpu + ) + + # Save classifier + logger.info("Saving model") + xgb_model.save_model(os.path.join(model_path, filename)) + + # Store classifier info + classifiers[label] = { + "filename": filename, + "labels": labels_encoder.classes_.tolist(), + "obs_column": obs_pred, + "model_params": training_params, + } + + # Store model_info.json file + model_info = { + "classifier_info": classifiers + } + + logger.info("Writing model_info to the file") + # Read previous file if it exists + if os.path.exists(model_path + "/model_info.json"): + logger.info("Old model_info file found, updating") + with open(model_path + "/model_info.json", "r") as f: + old_model_info = json.loads(f.read()) + + for key in old_model_info: + if key in model_info: + old_model_info[key].update(model_info[key]) + json_string = json.dumps(old_model_info, indent=4) + + else: + logger.info("Creating a new file") + json_string = json.dumps(model_info, indent=4) + + with open(model_path + "/model_info.json", "w") as f: + f.write(json_string) + + +def project_labels( + query_dataset, + cell_type_classifier_model: xgb.XGBClassifier, + annotation_column_name='label_pred', + uncertainty_column_name='label_uncertainty', + uncertainty_thresh=None # Note: currently not passed to predict function +): + """ + A function that projects predicted labels onto the query dataset, along with uncertainty scores. + Performs in-place update of the adata object, adding columns to the \`obs\` DataFrame. + + Input: + * \`query_dataset\`: The query \`AnnData\` object + * \`model_file\`: Path to the classification model file + * \`prediction_key\`: Column name in \`adata.obs\` where to store the predicted labels + * \`uncertainty_key\`: Column name in \`adata.obs\` where to store the uncertainty scores + * \`uncertainty_thresh\`: The uncertainty threshold above which we call a cell 'Unknown' + + Output: + Nothing is output, the passed anndata is modified inplace + + """ + + if (uncertainty_thresh is not None) and (uncertainty_thresh < 0 or uncertainty_thresh > 1): + raise ValueError(f'\`uncertainty_thresh\` must be \`None\` or between 0 and 1.') + + query_data = get_query_features(query_dataset, par, logger) + + # Predict labels and probabilities + query_dataset.obs[annotation_column_name] = cell_type_classifier_model.predict(query_data) + + logger.info("Predicting probabilities") + probs = cell_type_classifier_model.predict_proba(query_data) + + # Format probabilities + df_probs = pd.DataFrame(probs, columns=cell_type_classifier_model.classes_, index=query_dataset.obs_names) + query_dataset.obs[uncertainty_column_name] = 1 - df_probs.max(1) + + # Note: this is here in case we want to propose a set of values for the user to accept to seed the + # manual curation of predicted labels + if uncertainty_thresh is not None: + logger.info("Marking uncertain predictions") + query_dataset.obs[annotation_column_name + "_filtered"] = [ + val if query_dataset.obs[uncertainty_column_name][i] < uncertainty_thresh + else "Unknown" for i, val in enumerate(query_dataset.obs[annotation_column_name])] + + return query_dataset + + +def predict( + query_dataset, + cell_type_classifier_model_path, + annotation_column_name: str, + prediction_column_name: str, + uncertainty_column_name: str, + models_info, + use_gpu: bool = False +) -> pd.DataFrame: + """ + Returns \`obs\` DataFrame with prediction columns appended + """ + + tree_method = "gpu_hist" if use_gpu else "hist" + + labels = models_info["classifier_info"][annotation_column_name]["labels"] + + objective = "binary:logistic" if len(labels) == 2 else "multi:softprob" + cell_type_classifier_model = xgb.XGBClassifier(tree_method=tree_method, objective=objective) + + logger.info("Loading model") + cell_type_classifier_model.load_model(fname=cell_type_classifier_model_path) + + logger.info("Predicting labels") + project_labels(query_dataset, + cell_type_classifier_model, + annotation_column_name=prediction_column_name, + uncertainty_column_name=uncertainty_column_name) + + logger.info("Converting labels from numbers to classes") + labels_encoder = LabelEncoder() + labels_encoder.classes_ = np.array(labels) + query_dataset.obs[prediction_column_name] = labels_encoder.inverse_transform(query_dataset.obs[prediction_column_name]) + + return query_dataset + + +def main(par): + logger.info("Checking arguments") + par = check_arguments(par) + + mdata = mudata.read(par["input"].strip()) + adata = mdata.mod[par["modality"]] + + adata_reference = sc.read(par["reference"], backup_url=par["reference"]) + + # If classifiers for targets are in the model_output directory, simply open them and run (unless \`retrain\` != True) + # If some classifiers are missing, train and save them first + # Predict and save the query data + + targets_to_train = [] + + for obs_target in par["reference_obs_targets"]: + if not os.path.exists(par["model_output"]) or f"classifier_{obs_target}.xgb" not in os.listdir(par["model_output"]) or par["force_retrain"]: + logger.info(f"Classifier for {obs_target} added to a training schedule") + targets_to_train.append(obs_target) + else: + logger.info(f"Found classifier for {obs_target}, no retraining required") + + build_ref_classifiers(adata_reference, targets_to_train, model_path=par["model_output"], + gpu=par["use_gpu"], eval_verbosity=par["verbosity"]) + + output_uns_parameters = adata.uns.get(par["output_uns_parameters"], {}) + + with open(par["model_output"] + "/model_info.json", "r") as f: + models_info = json.loads(f.read()) + + for obs_target, obs_pred, obs_unc in zip(par["reference_obs_targets"], par["output_obs_predictions"], par["output_obs_uncertainty"]): + logger.info(f"Predicting {obs_target}") + + adata = predict(query_dataset=adata, + cell_type_classifier_model_path=os.path.join(par["model_output"], "classifier_" + obs_target + ".xgb"), + annotation_column_name=obs_target, + prediction_column_name=obs_pred, + uncertainty_column_name=obs_unc, + models_info=models_info, + use_gpu=par["use_gpu"]) + + if obs_target in targets_to_train: + # Save information about the transfer to .uns + output_uns_parameters[obs_target] = { + "method": "XGBClassifier", + **training_params + } + + adata.uns[par["output_uns_parameters"]] = output_uns_parameters + + logger.info("Updating mdata") + mdata.mod[par['modality']] = adata + mdata.update() + + logger.info("Writing output") + mdata.write_h5mu(par['output'].strip()) + +if __name__ == "__main__": + main(par) +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_MODEL_OUTPUT" ] && [ ! -e "$VIASH_PAR_MODEL_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_MODEL_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/native/metadata/add_id/.config.vsh.yaml b/target/native/metadata/add_id/.config.vsh.yaml new file mode 100644 index 00000000000..23e8b23932e --- /dev/null +++ b/target/native/metadata/add_id/.config.vsh.yaml @@ -0,0 +1,197 @@ +functionality: + name: "add_id" + namespace: "metadata" + version: "0.12.4" + authors: + - name: "Dries Schaumont" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Path to the input .h5mu." + info: null + example: + - "sample_path" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--input_id" + description: "The input id." + info: null + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_output" + description: "Name of the .obs column where to store the id." + info: null + default: + - "sample_id" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--make_observation_keys_unique" + description: "Join the id to the .obs index (.obs_names)." + info: null + direction: "input" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Add id of .obs. Also allows to make .obs_names (the .obs index) unique\ + \ \nby prefixing the values with an unique id per .h5mu file.\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/concat_test_data/e18_mouse_brain_fresh_5k_filtered_feature_bc_matrix_subset_unique_obs.h5mu" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "native" + id: "native" +- type: "nextflow" + id: "nextflow" + directives: + label: + - "singlecpu" + - "lowmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/metadata/add_id/config.vsh.yaml" + platform: "native" + output: "/home/runner/work/openpipeline/openpipeline/target/native/metadata/add_id" + executable: "/home/runner/work/openpipeline/openpipeline/target/native/metadata/add_id/add_id" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/native/metadata/add_id/add_id b/target/native/metadata/add_id/add_id new file mode 100755 index 00000000000..820a157e10f --- /dev/null +++ b/target/native/metadata/add_id/add_id @@ -0,0 +1,593 @@ +#!/usr/bin/env bash + +# add_id 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Dries Schaumont (maintainer) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="add_id" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "add_id 0.12.4" + echo "" + echo "Add id of .obs. Also allows to make .obs_names (the .obs index) unique" + echo "by prefixing the values with an unique id per .h5mu file." + echo "" + echo "Arguments:" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " example: sample_path" + echo " Path to the input .h5mu." + echo "" + echo " --input_id" + echo " type: string, required parameter" + echo " The input id." + echo "" + echo " --obs_output" + echo " type: string" + echo " default: sample_id" + echo " Name of the .obs column where to store the id." + echo "" + echo " -o, --output" + echo " type: file, output, file must exist" + echo " example: output.h5mu" + echo "" + echo " --output_compression" + echo " type: string" + echo " example: gzip" + echo " choices: [ gzip, lzf ]" + echo " The compression format to be used on the output h5mu object." + echo "" + echo " --make_observation_keys_unique" + echo " type: boolean_true" + echo " Join the id to the .obs index (.obs_names)." +} + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "add_id 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input_id) + [ -n "$VIASH_PAR_INPUT_ID" ] && ViashError Bad arguments for option \'--input_id\': \'$VIASH_PAR_INPUT_ID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_ID="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_id. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input_id=*) + [ -n "$VIASH_PAR_INPUT_ID" ] && ViashError Bad arguments for option \'--input_id=*\': \'$VIASH_PAR_INPUT_ID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_ID=$(ViashRemoveFlags "$1") + shift 1 + ;; + --obs_output) + [ -n "$VIASH_PAR_OBS_OUTPUT" ] && ViashError Bad arguments for option \'--obs_output\': \'$VIASH_PAR_OBS_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBS_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obs_output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obs_output=*) + [ -n "$VIASH_PAR_OBS_OUTPUT" ] && ViashError Bad arguments for option \'--obs_output=*\': \'$VIASH_PAR_OBS_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBS_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --make_observation_keys_unique) + [ -n "$VIASH_PAR_MAKE_OBSERVATION_KEYS_UNIQUE" ] && ViashError Bad arguments for option \'--make_observation_keys_unique\': \'$VIASH_PAR_MAKE_OBSERVATION_KEYS_UNIQUE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MAKE_OBSERVATION_KEYS_UNIQUE=true + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_INPUT_ID+x} ]; then + ViashError '--input_id' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_OBS_OUTPUT+x} ]; then + VIASH_PAR_OBS_OUTPUT="sample_id" +fi +if [ -z ${VIASH_PAR_MAKE_OBSERVATION_KEYS_UNIQUE+x} ]; then + VIASH_PAR_MAKE_OBSERVATION_KEYS_UNIQUE="false" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_MAKE_OBSERVATION_KEYS_UNIQUE" ]]; then + if ! [[ "$VIASH_PAR_MAKE_OBSERVATION_KEYS_UNIQUE" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--make_observation_keys_unique' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +ViashDebug "Running command: bash" +cat << VIASHEOF | bash +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-add_id-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +from __future__ import annotations +import sys +from mudata import read_h5mu, MuData + +### VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'input_id': $( if [ ! -z ${VIASH_PAR_INPUT_ID+x} ]; then echo "r'${VIASH_PAR_INPUT_ID//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'obs_output': $( if [ ! -z ${VIASH_PAR_OBS_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OBS_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'make_observation_keys_unique': $( if [ ! -z ${VIASH_PAR_MAKE_OBSERVATION_KEYS_UNIQUE+x} ]; then echo "r'${VIASH_PAR_MAKE_OBSERVATION_KEYS_UNIQUE//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +### VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +def make_observation_keys_unique(sample_id: str, sample: MuData) -> None: + """ + Make the observation keys unique across all samples. At input, + the observation keys are unique within a sample. By adding the sample name + (unique for a sample) to each observation key, the observation key is made + unique across all samples as well. + """ + logger.info('Making observation keys unique across all samples.') + sample.obs.index = f"{sample_id}_" + sample.obs.index + make_observation_keys_unique_per_mod(sample_id, sample) + + +def make_observation_keys_unique_per_mod(sample_id: str, sample: MuData) -> None: + """ + Updating MuData.obs_names is not allowed (it is read-only). + So the observation keys for each modality has to be updated manually. + """ + for mod in sample.mod.values(): + mod.obs_names = f"{sample_id}_" + mod.obs_names + +def main(): + input_data = read_h5mu(par["input"]) + input_data.obs[par["obs_output"]] = par["input_id"] + for mod_data in input_data.mod.values(): + mod_data.obs[par["obs_output"]] = par["input_id"] + if par["make_observation_keys_unique"]: + make_observation_keys_unique(par["input_id"], input_data) + logger.info("Writing out data to '%s'.", par["output"]) + input_data.write_h5mu(par["output"], compression=par["output_compression"]) + +if __name__ == '__main__': + main() +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/native/metadata/add_id/setup_logger.py b/target/native/metadata/add_id/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/native/metadata/add_id/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/native/metadata/grep_annotation_column/.config.vsh.yaml b/target/native/metadata/grep_annotation_column/.config.vsh.yaml new file mode 100644 index 00000000000..2570eb739ad --- /dev/null +++ b/target/native/metadata/grep_annotation_column/.config.vsh.yaml @@ -0,0 +1,244 @@ +functionality: + name: "grep_annotation_column" + namespace: "metadata" + version: "0.12.4" + authors: + - name: "Dries Schaumont" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + argument_groups: + - name: "Inputs" + description: "Arguments related to the input dataset." + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Path to the input .h5mu." + info: null + example: + - "sample_path" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--input_column" + description: "Column to query. If not specified, use .var_names or .obs_names,\ + \ depending on the value of --matrix" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + description: "Which modality to get the annotation matrix from.\n" + info: null + example: + - "rna" + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--matrix" + description: "Matrix to fetch the column from that will be searched." + info: null + example: + - "var" + required: false + choices: + - "var" + - "obs" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Outputs" + description: "Arguments related to how the output will be written." + arguments: + - type: "file" + name: "--output" + alternatives: + - "-o" + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_match_column" + description: "Name of the column to write the result to." + info: null + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_fraction_column" + description: "For the opposite axis, name of the column to write the fraction\ + \ of \nobservations that matches to the pattern.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Query options" + description: "Options related to the query" + arguments: + - type: "string" + name: "--regex_pattern" + description: "Regex to use to match with the input column." + info: null + example: + - "^[mM][tT]-" + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + description: "Perform a regex lookup on a column from the annotation matrices .obs\ + \ or .var.\nThe annotation matrix can originate from either a modality, or all\ + \ modalities (global .var or .obs).\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/concat_test_data/e18_mouse_brain_fresh_5k_filtered_feature_bc_matrix_subset_unique_obs.h5mu" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "native" + id: "native" +- type: "nextflow" + id: "nextflow" + directives: + label: + - "singlecpu" + - "lowmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/metadata/grep_annotation_column/config.vsh.yaml" + platform: "native" + output: "/home/runner/work/openpipeline/openpipeline/target/native/metadata/grep_annotation_column" + executable: "/home/runner/work/openpipeline/openpipeline/target/native/metadata/grep_annotation_column/grep_annotation_column" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/native/metadata/grep_annotation_column/grep_annotation_column b/target/native/metadata/grep_annotation_column/grep_annotation_column new file mode 100755 index 00000000000..6d2acbcc2db --- /dev/null +++ b/target/native/metadata/grep_annotation_column/grep_annotation_column @@ -0,0 +1,677 @@ +#!/usr/bin/env bash + +# grep_annotation_column 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Dries Schaumont (maintainer) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="grep_annotation_column" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "grep_annotation_column 0.12.4" + echo "" + echo "Perform a regex lookup on a column from the annotation matrices .obs or .var." + echo "The annotation matrix can originate from either a modality, or all modalities" + echo "(global .var or .obs)." + echo "" + echo "Inputs:" + echo " Arguments related to the input dataset." + echo "" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " example: sample_path" + echo " Path to the input .h5mu." + echo "" + echo " --input_column" + echo " type: string" + echo " Column to query. If not specified, use .var_names or .obs_names," + echo " depending on the value of --matrix" + echo "" + echo " --modality" + echo " type: string, required parameter" + echo " example: rna" + echo " Which modality to get the annotation matrix from." + echo "" + echo " --matrix" + echo " type: string" + echo " example: var" + echo " choices: [ var, obs ]" + echo " Matrix to fetch the column from that will be searched." + echo "" + echo "Outputs:" + echo " Arguments related to how the output will be written." + echo "" + echo " -o, --output" + echo " type: file, output, file must exist" + echo " example: output.h5mu" + echo "" + echo " --output_compression" + echo " type: string" + echo " example: gzip" + echo " choices: [ gzip, lzf ]" + echo " The compression format to be used on the output h5mu object." + echo "" + echo " --output_match_column" + echo " type: string, required parameter" + echo " Name of the column to write the result to." + echo "" + echo " --output_fraction_column" + echo " type: string" + echo " For the opposite axis, name of the column to write the fraction of" + echo " observations that matches to the pattern." + echo "" + echo "Query options:" + echo " Options related to the query" + echo "" + echo " --regex_pattern" + echo " type: string, required parameter" + echo " example: ^[mM][tT]-" + echo " Regex to use to match with the input column." +} + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "grep_annotation_column 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input_column) + [ -n "$VIASH_PAR_INPUT_COLUMN" ] && ViashError Bad arguments for option \'--input_column\': \'$VIASH_PAR_INPUT_COLUMN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_COLUMN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_column. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input_column=*) + [ -n "$VIASH_PAR_INPUT_COLUMN" ] && ViashError Bad arguments for option \'--input_column=*\': \'$VIASH_PAR_INPUT_COLUMN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_COLUMN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --modality) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality=*) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --matrix) + [ -n "$VIASH_PAR_MATRIX" ] && ViashError Bad arguments for option \'--matrix\': \'$VIASH_PAR_MATRIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MATRIX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --matrix. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --matrix=*) + [ -n "$VIASH_PAR_MATRIX" ] && ViashError Bad arguments for option \'--matrix=*\': \'$VIASH_PAR_MATRIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MATRIX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_match_column) + [ -n "$VIASH_PAR_OUTPUT_MATCH_COLUMN" ] && ViashError Bad arguments for option \'--output_match_column\': \'$VIASH_PAR_OUTPUT_MATCH_COLUMN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_MATCH_COLUMN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_match_column. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_match_column=*) + [ -n "$VIASH_PAR_OUTPUT_MATCH_COLUMN" ] && ViashError Bad arguments for option \'--output_match_column=*\': \'$VIASH_PAR_OUTPUT_MATCH_COLUMN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_MATCH_COLUMN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_fraction_column) + [ -n "$VIASH_PAR_OUTPUT_FRACTION_COLUMN" ] && ViashError Bad arguments for option \'--output_fraction_column\': \'$VIASH_PAR_OUTPUT_FRACTION_COLUMN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_FRACTION_COLUMN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_fraction_column. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_fraction_column=*) + [ -n "$VIASH_PAR_OUTPUT_FRACTION_COLUMN" ] && ViashError Bad arguments for option \'--output_fraction_column=*\': \'$VIASH_PAR_OUTPUT_FRACTION_COLUMN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_FRACTION_COLUMN=$(ViashRemoveFlags "$1") + shift 1 + ;; + --regex_pattern) + [ -n "$VIASH_PAR_REGEX_PATTERN" ] && ViashError Bad arguments for option \'--regex_pattern\': \'$VIASH_PAR_REGEX_PATTERN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REGEX_PATTERN="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --regex_pattern. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --regex_pattern=*) + [ -n "$VIASH_PAR_REGEX_PATTERN" ] && ViashError Bad arguments for option \'--regex_pattern=*\': \'$VIASH_PAR_REGEX_PATTERN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_REGEX_PATTERN=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_MODALITY+x} ]; then + ViashError '--modality' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT_MATCH_COLUMN+x} ]; then + ViashError '--output_match_column' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_REGEX_PATTERN+x} ]; then + ViashError '--regex_pattern' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_MATRIX" ]; then + VIASH_PAR_MATRIX_CHOICES=("var:obs") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_MATRIX_CHOICES[*]}:" =~ ":$VIASH_PAR_MATRIX:" ]]; then + ViashError '--matrix' specified value of \'$VIASH_PAR_MATRIX\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +ViashDebug "Running command: bash" +cat << VIASHEOF | bash +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-grep_annotation_column-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +import mudata as mu +from pathlib import Path +from operator import attrgetter +import re +import numpy as np + + +### VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'input_column': $( if [ ! -z ${VIASH_PAR_INPUT_COLUMN+x} ]; then echo "r'${VIASH_PAR_INPUT_COLUMN//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'matrix': $( if [ ! -z ${VIASH_PAR_MATRIX+x} ]; then echo "r'${VIASH_PAR_MATRIX//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_match_column': $( if [ ! -z ${VIASH_PAR_OUTPUT_MATCH_COLUMN+x} ]; then echo "r'${VIASH_PAR_OUTPUT_MATCH_COLUMN//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_fraction_column': $( if [ ! -z ${VIASH_PAR_OUTPUT_FRACTION_COLUMN+x} ]; then echo "r'${VIASH_PAR_OUTPUT_FRACTION_COLUMN//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'regex_pattern': $( if [ ! -z ${VIASH_PAR_REGEX_PATTERN+x} ]; then echo "r'${VIASH_PAR_REGEX_PATTERN//\'/\'\"\'\"r\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +### VIASH END + +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +def main(par): + input_file, output_file, mod_name = Path(par["input"]), Path(par["output"]), par['modality'] + try: + compiled_regex = re.compile(par["regex_pattern"]) + except (TypeError, re.error) as e: + raise ValueError(f"{par['regex_pattern']} is not a valid regular expression pattern.") from e + else: + if compiled_regex.groups: + raise NotImplementedError("Using match groups is not supported by this component.") + logger.info('Reading input file %s, modality %s.', input_file, mod_name) + + mudata = mu.read_h5mu(input_file) + modality_data = mudata[mod_name] + annotation_matrix = getattr(modality_data, par['matrix']) + default_column = { + "var": attrgetter("var_names"), + "obs": attrgetter("obs_names") + } + if par["input_column"]: + try: + annotation_column = annotation_matrix[par["input_column"]] + except KeyError as e: + raise ValueError(f"Column {par['input_column']} could not be found for modality " + f"{par['modality']}. Available columns: {','.join(annotation_matrix.columns.to_list())}") from e + else: + annotation_column = default_column[par['matrix']](modality_data) + grep_result = annotation_column.str.contains(par["regex_pattern"], regex=True) + + other_axis_attribute = { + "var": "obs", + "obs": "var" + } + if par['output_fraction_column']: + pct_matching = np.ravel(np.sum(modality_data[:, grep_result].X, axis=1) / np.sum(modality_data.X, axis=1)) + getattr(modality_data, other_axis_attribute[par['matrix']])[par['output_fraction_column']] = pct_matching + getattr(modality_data, par['matrix'])[par["output_match_column"]] = grep_result + mudata.write(output_file, compression=par["output_compression"]) + +if __name__ == "__main__": + main(par) +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/native/transform/scale/.config.vsh.yaml b/target/native/transform/scale/.config.vsh.yaml new file mode 100644 index 00000000000..8d4174da81c --- /dev/null +++ b/target/native/transform/scale/.config.vsh.yaml @@ -0,0 +1,205 @@ +functionality: + name: "scale" + namespace: "transform" + version: "0.12.4" + authors: + - name: "Dries Schaumont" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input h5mu file." + info: null + example: + - "input.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + description: "List of modalities to process." + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--max_value" + description: "Clip (truncate) to this value after scaling. Does not clip by default." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean" + name: "--zero_center" + description: "If False, omit zero-centering variables, which allows to handle\ + \ sparse input efficiently." + info: null + default: + - true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output h5mu file." + info: null + default: + - "output.h5mu" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Scale data to unit variance and zero mean.\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim-bullseye" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "libhdf5-dev" + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "scanpy~=1.9.5" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "lowmem" + - "lowcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +- type: "native" + id: "native" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/transform/scaling/config.vsh.yaml" + platform: "native" + output: "/home/runner/work/openpipeline/openpipeline/target/native/transform/scale" + executable: "/home/runner/work/openpipeline/openpipeline/target/native/transform/scale/scale" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/native/transform/scale/scale b/target/native/transform/scale/scale new file mode 100755 index 00000000000..ac5ad7f7631 --- /dev/null +++ b/target/native/transform/scale/scale @@ -0,0 +1,592 @@ +#!/usr/bin/env bash + +# scale 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Dries Schaumont (maintainer) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="scale" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "scale 0.12.4" + echo "" + echo "Scale data to unit variance and zero mean." + echo "" + echo "Arguments:" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " example: input.h5mu" + echo " Input h5mu file." + echo "" + echo " --modality" + echo " type: string" + echo " default: rna" + echo " List of modalities to process." + echo "" + echo " --max_value" + echo " type: double" + echo " Clip (truncate) to this value after scaling. Does not clip by default." + echo "" + echo " --zero_center" + echo " type: boolean" + echo " default: true" + echo " If False, omit zero-centering variables, which allows to handle sparse" + echo " input efficiently." + echo "" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " default: output.h5mu" + echo " Output h5mu file." + echo "" + echo " --output_compression" + echo " type: string" + echo " example: gzip" + echo " choices: [ gzip, lzf ]" + echo " The compression format to be used on the output h5mu object." +} + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "scale 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality=*) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --max_value) + [ -n "$VIASH_PAR_MAX_VALUE" ] && ViashError Bad arguments for option \'--max_value\': \'$VIASH_PAR_MAX_VALUE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MAX_VALUE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --max_value. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --max_value=*) + [ -n "$VIASH_PAR_MAX_VALUE" ] && ViashError Bad arguments for option \'--max_value=*\': \'$VIASH_PAR_MAX_VALUE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MAX_VALUE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --zero_center) + [ -n "$VIASH_PAR_ZERO_CENTER" ] && ViashError Bad arguments for option \'--zero_center\': \'$VIASH_PAR_ZERO_CENTER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ZERO_CENTER="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --zero_center. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --zero_center=*) + [ -n "$VIASH_PAR_ZERO_CENTER" ] && ViashError Bad arguments for option \'--zero_center=*\': \'$VIASH_PAR_ZERO_CENTER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_ZERO_CENTER=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_MODALITY+x} ]; then + VIASH_PAR_MODALITY="rna" +fi +if [ -z ${VIASH_PAR_ZERO_CENTER+x} ]; then + VIASH_PAR_ZERO_CENTER="true" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_MAX_VALUE" ]]; then + if ! [[ "$VIASH_PAR_MAX_VALUE" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--max_value' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_ZERO_CENTER" ]]; then + if ! [[ "$VIASH_PAR_ZERO_CENTER" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--zero_center' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +ViashDebug "Running command: bash" +cat << VIASHEOF | bash +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-scale-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +import sys +from mudata import read_h5mu +import scanpy + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'max_value': $( if [ ! -z ${VIASH_PAR_MAX_VALUE+x} ]; then echo "float(r'${VIASH_PAR_MAX_VALUE//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'zero_center': $( if [ ! -z ${VIASH_PAR_ZERO_CENTER+x} ]; then echo "r'${VIASH_PAR_ZERO_CENTER//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +## VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +def main(): + logger.info(f'Reading .h5mu file: {par["input"]}') + mudata = read_h5mu(par["input"]) + mod = par["modality"] + data = mudata.mod[mod] + + logger.info("Scaling modality: %s", mod) + scanpy.pp.scale(data, + zero_center=par["zero_center"], + max_value=par["max_value"]) + + logger.info("Writing to %s", par["output"]) + mudata.write_h5mu(filename=par["output"], compression=par["output_compression"]) + logger.info("Finished") + +if __name__ == "__main__": + main() +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/native/transform/scale/setup_logger.py b/target/native/transform/scale/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/native/transform/scale/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/native/velocity/scvelo/.config.vsh.yaml b/target/native/velocity/scvelo/.config.vsh.yaml new file mode 100644 index 00000000000..29da507ebb1 --- /dev/null +++ b/target/native/velocity/scvelo/.config.vsh.yaml @@ -0,0 +1,276 @@ +functionality: + name: "scvelo" + namespace: "velocity" + version: "0.12.4" + authors: + - name: "Dries Schaumont" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + argument_groups: + - name: "Inputs" + arguments: + - type: "file" + name: "--input" + description: "Velocyto loom file." + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Outputs" + arguments: + - type: "file" + name: "--output" + description: "Output directory. If it does not exist, will be created." + info: null + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Filtering and normalization" + description: "Arguments for filtering, normalization an log transform (see scvelo.pp.filter_and_normalize\ + \ function)" + arguments: + - type: "integer" + name: "--min_counts" + description: "Minimum number of counts required for a gene to pass filtering\ + \ (spliced)." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--min_counts_u" + description: "Minimum number of counts required for a gene to pass filtering\ + \ (unspliced)." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--min_cells" + description: "Minimum number of cells expressed required to pass filtering (spliced)." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--min_cells_u" + description: "Minimum number of cells expressed required to pass filtering (unspliced)." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--min_shared_counts" + description: "Minimum number of counts (both unspliced and spliced) required\ + \ for a gene." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--min_shared_cells" + description: "Minimum number of cells required to be expressed (both unspliced\ + \ and spliced)." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--n_top_genes" + description: "Number of genes to keep." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean" + name: "--log_transform" + description: "Do not log transform counts." + info: null + default: + - true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Fitting parameters" + description: "Arguments for fitting the data" + arguments: + - type: "integer" + name: "--n_principal_components" + description: "Number of principal components to use for calculating moments." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--n_neighbors" + description: "Number of neighbors to use. First/second-order moments are computed\ + \ for each\ncell across its nearest neighbors, where the neighbor graph is\ + \ obtained from\neuclidean distances in PCA space.\n" + info: null + default: + - 30 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/rna_velocity/velocyto_processed/cellranger_tiny.loom" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.9-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "scvelo~=0.2.5" + - "numpy~=1.23.5" + - "matplotlib<3.8.0" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "native" + id: "native" +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highmem" + - "highcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/velocity/scvelo/config.vsh.yaml" + platform: "native" + output: "/home/runner/work/openpipeline/openpipeline/target/native/velocity/scvelo" + executable: "/home/runner/work/openpipeline/openpipeline/target/native/velocity/scvelo/scvelo" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/native/velocity/scvelo/scvelo b/target/native/velocity/scvelo/scvelo new file mode 100755 index 00000000000..164b1dfd685 --- /dev/null +++ b/target/native/velocity/scvelo/scvelo @@ -0,0 +1,801 @@ +#!/usr/bin/env bash + +# scvelo 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Dries Schaumont (maintainer) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="scvelo" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "scvelo 0.12.4" + echo "" + echo "Inputs:" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " Velocyto loom file." + echo "" + echo "Outputs:" + echo " --output" + echo " type: file, required parameter, output, file must exist" + echo " Output directory. If it does not exist, will be created." + echo "" + echo " --output_compression" + echo " type: string" + echo " example: gzip" + echo " choices: [ gzip, lzf ]" + echo " The compression format to be used on the output h5mu object." + echo "" + echo "Filtering and normalization:" + echo " Arguments for filtering, normalization an log transform (see" + echo " scvelo.pp.filter_and_normalize function)" + echo "" + echo " --min_counts" + echo " type: integer" + echo " Minimum number of counts required for a gene to pass filtering" + echo " (spliced)." + echo "" + echo " --min_counts_u" + echo " type: integer" + echo " Minimum number of counts required for a gene to pass filtering" + echo " (unspliced)." + echo "" + echo " --min_cells" + echo " type: integer" + echo " Minimum number of cells expressed required to pass filtering (spliced)." + echo "" + echo " --min_cells_u" + echo " type: integer" + echo " Minimum number of cells expressed required to pass filtering" + echo " (unspliced)." + echo "" + echo " --min_shared_counts" + echo " type: integer" + echo " Minimum number of counts (both unspliced and spliced) required for a" + echo " gene." + echo "" + echo " --min_shared_cells" + echo " type: integer" + echo " Minimum number of cells required to be expressed (both unspliced and" + echo " spliced)." + echo "" + echo " --n_top_genes" + echo " type: integer" + echo " Number of genes to keep." + echo "" + echo " --log_transform" + echo " type: boolean" + echo " default: true" + echo " Do not log transform counts." + echo "" + echo "Fitting parameters:" + echo " Arguments for fitting the data" + echo "" + echo " --n_principal_components" + echo " type: integer" + echo " Number of principal components to use for calculating moments." + echo "" + echo " --n_neighbors" + echo " type: integer" + echo " default: 30" + echo " Number of neighbors to use. First/second-order moments are computed for" + echo " each" + echo " cell across its nearest neighbors, where the neighbor graph is obtained" + echo " from" + echo " euclidean distances in PCA space." +} + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "scvelo 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --min_counts) + [ -n "$VIASH_PAR_MIN_COUNTS" ] && ViashError Bad arguments for option \'--min_counts\': \'$VIASH_PAR_MIN_COUNTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_COUNTS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_counts. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --min_counts=*) + [ -n "$VIASH_PAR_MIN_COUNTS" ] && ViashError Bad arguments for option \'--min_counts=*\': \'$VIASH_PAR_MIN_COUNTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_COUNTS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --min_counts_u) + [ -n "$VIASH_PAR_MIN_COUNTS_U" ] && ViashError Bad arguments for option \'--min_counts_u\': \'$VIASH_PAR_MIN_COUNTS_U\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_COUNTS_U="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_counts_u. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --min_counts_u=*) + [ -n "$VIASH_PAR_MIN_COUNTS_U" ] && ViashError Bad arguments for option \'--min_counts_u=*\': \'$VIASH_PAR_MIN_COUNTS_U\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_COUNTS_U=$(ViashRemoveFlags "$1") + shift 1 + ;; + --min_cells) + [ -n "$VIASH_PAR_MIN_CELLS" ] && ViashError Bad arguments for option \'--min_cells\': \'$VIASH_PAR_MIN_CELLS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_CELLS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_cells. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --min_cells=*) + [ -n "$VIASH_PAR_MIN_CELLS" ] && ViashError Bad arguments for option \'--min_cells=*\': \'$VIASH_PAR_MIN_CELLS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_CELLS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --min_cells_u) + [ -n "$VIASH_PAR_MIN_CELLS_U" ] && ViashError Bad arguments for option \'--min_cells_u\': \'$VIASH_PAR_MIN_CELLS_U\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_CELLS_U="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_cells_u. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --min_cells_u=*) + [ -n "$VIASH_PAR_MIN_CELLS_U" ] && ViashError Bad arguments for option \'--min_cells_u=*\': \'$VIASH_PAR_MIN_CELLS_U\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_CELLS_U=$(ViashRemoveFlags "$1") + shift 1 + ;; + --min_shared_counts) + [ -n "$VIASH_PAR_MIN_SHARED_COUNTS" ] && ViashError Bad arguments for option \'--min_shared_counts\': \'$VIASH_PAR_MIN_SHARED_COUNTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_SHARED_COUNTS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_shared_counts. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --min_shared_counts=*) + [ -n "$VIASH_PAR_MIN_SHARED_COUNTS" ] && ViashError Bad arguments for option \'--min_shared_counts=*\': \'$VIASH_PAR_MIN_SHARED_COUNTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_SHARED_COUNTS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --min_shared_cells) + [ -n "$VIASH_PAR_MIN_SHARED_CELLS" ] && ViashError Bad arguments for option \'--min_shared_cells\': \'$VIASH_PAR_MIN_SHARED_CELLS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_SHARED_CELLS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_shared_cells. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --min_shared_cells=*) + [ -n "$VIASH_PAR_MIN_SHARED_CELLS" ] && ViashError Bad arguments for option \'--min_shared_cells=*\': \'$VIASH_PAR_MIN_SHARED_CELLS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_SHARED_CELLS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --n_top_genes) + [ -n "$VIASH_PAR_N_TOP_GENES" ] && ViashError Bad arguments for option \'--n_top_genes\': \'$VIASH_PAR_N_TOP_GENES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_N_TOP_GENES="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --n_top_genes. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --n_top_genes=*) + [ -n "$VIASH_PAR_N_TOP_GENES" ] && ViashError Bad arguments for option \'--n_top_genes=*\': \'$VIASH_PAR_N_TOP_GENES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_N_TOP_GENES=$(ViashRemoveFlags "$1") + shift 1 + ;; + --log_transform) + [ -n "$VIASH_PAR_LOG_TRANSFORM" ] && ViashError Bad arguments for option \'--log_transform\': \'$VIASH_PAR_LOG_TRANSFORM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LOG_TRANSFORM="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --log_transform. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --log_transform=*) + [ -n "$VIASH_PAR_LOG_TRANSFORM" ] && ViashError Bad arguments for option \'--log_transform=*\': \'$VIASH_PAR_LOG_TRANSFORM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LOG_TRANSFORM=$(ViashRemoveFlags "$1") + shift 1 + ;; + --n_principal_components) + [ -n "$VIASH_PAR_N_PRINCIPAL_COMPONENTS" ] && ViashError Bad arguments for option \'--n_principal_components\': \'$VIASH_PAR_N_PRINCIPAL_COMPONENTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_N_PRINCIPAL_COMPONENTS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --n_principal_components. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --n_principal_components=*) + [ -n "$VIASH_PAR_N_PRINCIPAL_COMPONENTS" ] && ViashError Bad arguments for option \'--n_principal_components=*\': \'$VIASH_PAR_N_PRINCIPAL_COMPONENTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_N_PRINCIPAL_COMPONENTS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --n_neighbors) + [ -n "$VIASH_PAR_N_NEIGHBORS" ] && ViashError Bad arguments for option \'--n_neighbors\': \'$VIASH_PAR_N_NEIGHBORS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_N_NEIGHBORS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --n_neighbors. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --n_neighbors=*) + [ -n "$VIASH_PAR_N_NEIGHBORS" ] && ViashError Bad arguments for option \'--n_neighbors=*\': \'$VIASH_PAR_N_NEIGHBORS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_N_NEIGHBORS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_LOG_TRANSFORM+x} ]; then + VIASH_PAR_LOG_TRANSFORM="true" +fi +if [ -z ${VIASH_PAR_N_NEIGHBORS+x} ]; then + VIASH_PAR_N_NEIGHBORS="30" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_MIN_COUNTS" ]]; then + if ! [[ "$VIASH_PAR_MIN_COUNTS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--min_counts' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MIN_COUNTS_U" ]]; then + if ! [[ "$VIASH_PAR_MIN_COUNTS_U" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--min_counts_u' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MIN_CELLS" ]]; then + if ! [[ "$VIASH_PAR_MIN_CELLS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--min_cells' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MIN_CELLS_U" ]]; then + if ! [[ "$VIASH_PAR_MIN_CELLS_U" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--min_cells_u' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MIN_SHARED_COUNTS" ]]; then + if ! [[ "$VIASH_PAR_MIN_SHARED_COUNTS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--min_shared_counts' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MIN_SHARED_CELLS" ]]; then + if ! [[ "$VIASH_PAR_MIN_SHARED_CELLS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--min_shared_cells' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_N_TOP_GENES" ]]; then + if ! [[ "$VIASH_PAR_N_TOP_GENES" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--n_top_genes' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_LOG_TRANSFORM" ]]; then + if ! [[ "$VIASH_PAR_LOG_TRANSFORM" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--log_transform' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_N_PRINCIPAL_COMPONENTS" ]]; then + if ! [[ "$VIASH_PAR_N_PRINCIPAL_COMPONENTS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--n_principal_components' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_N_NEIGHBORS" ]]; then + if ! [[ "$VIASH_PAR_N_NEIGHBORS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--n_neighbors' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip:lzf") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]}:" =~ ":$VIASH_PAR_OUTPUT_COMPRESSION:" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +ViashDebug "Running command: bash" +cat << VIASHEOF | bash +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-scvelo-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +import sys +import scvelo +import mudata +from contextlib import redirect_stdout +from pathlib import Path +import matplotlib as mpl + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'min_counts': $( if [ ! -z ${VIASH_PAR_MIN_COUNTS+x} ]; then echo "int(r'${VIASH_PAR_MIN_COUNTS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'min_counts_u': $( if [ ! -z ${VIASH_PAR_MIN_COUNTS_U+x} ]; then echo "int(r'${VIASH_PAR_MIN_COUNTS_U//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'min_cells': $( if [ ! -z ${VIASH_PAR_MIN_CELLS+x} ]; then echo "int(r'${VIASH_PAR_MIN_CELLS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'min_cells_u': $( if [ ! -z ${VIASH_PAR_MIN_CELLS_U+x} ]; then echo "int(r'${VIASH_PAR_MIN_CELLS_U//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'min_shared_counts': $( if [ ! -z ${VIASH_PAR_MIN_SHARED_COUNTS+x} ]; then echo "int(r'${VIASH_PAR_MIN_SHARED_COUNTS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'min_shared_cells': $( if [ ! -z ${VIASH_PAR_MIN_SHARED_CELLS+x} ]; then echo "int(r'${VIASH_PAR_MIN_SHARED_CELLS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'n_top_genes': $( if [ ! -z ${VIASH_PAR_N_TOP_GENES+x} ]; then echo "int(r'${VIASH_PAR_N_TOP_GENES//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'log_transform': $( if [ ! -z ${VIASH_PAR_LOG_TRANSFORM+x} ]; then echo "r'${VIASH_PAR_LOG_TRANSFORM//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), + 'n_principal_components': $( if [ ! -z ${VIASH_PAR_N_PRINCIPAL_COMPONENTS+x} ]; then echo "int(r'${VIASH_PAR_N_PRINCIPAL_COMPONENTS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'n_neighbors': $( if [ ! -z ${VIASH_PAR_N_NEIGHBORS+x} ]; then echo "int(r'${VIASH_PAR_N_NEIGHBORS//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} + +## VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +mpl.rcParams['savefig.dpi']=150 + +# Script must be wrapped into a main function because scvelo spawn subprocesses +# and this fails when the functions are not wrapped. +def main(): + # Create output directory + output_dir = Path(par['output']) + output_dir.mkdir(parents=True, exist_ok=True) + scvelo.settings.figdir = str(output_dir) + + + # Calculate the sample name + sample_name = par["output"].removesuffix(".loom") + sample_name = Path(sample_name).name + + # Read the input data + adata = scvelo.read(par['input']) + + # Save spliced vs unspliced proportions to file + with (output_dir / "proportions.txt").open('w') as target: + with redirect_stdout(target): + scvelo.utils.show_proportions(adata) + + # Plot piecharts of spliced vs unspliced proportions + scvelo.pl.proportions(adata, save=True, show=False) + + # Perform preprocessing + scvelo.pp.filter_and_normalize(adata, + min_counts=par["min_counts"], + min_counts_u=par["min_counts_u"], + min_cells=par["min_cells"], + min_cells_u=par["min_cells_u"], + min_shared_counts=par["min_shared_counts"], + min_shared_cells=par["min_shared_cells"], + n_top_genes=par["n_top_genes"], + log=par["log_transform"]) + + # Fitting + scvelo.pp.moments(adata, + n_pcs=par["n_principal_components"], + n_neighbors=par["n_neighbors"]) + + + # Second step in velocyto calculations + # Velocity calculation and visualization + # From the scvelo manual: + # The solution to the full dynamical model is obtained by setting mode='dynamical', + # which requires to run scv.tl.recover_dynamics(adata) beforehand + scvelo.tl.recover_dynamics(adata) + scvelo.tl.velocity(adata, mode="dynamical") + scvelo.tl.velocity_graph(adata) + scvelo.pl.velocity_graph(adata, save=str(output_dir / "scvelo_graph.pdf"), show=False) + + # Plotting + # TODO: add more here. + scvelo.pl.velocity_embedding_stream(adata, save=str(output_dir / "scvelo_embedding.pdf"), show=False) + + # Create output + ouput_data = mudata.MuData({'rna_velocity': adata}) + ouput_data.write_h5mu(output_dir / f"{sample_name}.h5mu", compression=par["output_compression"]) + +if __name__ == "__main__": + main() +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/native/velocity/scvelo/setup_logger.py b/target/native/velocity/scvelo/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/native/velocity/scvelo/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/native/velocity/velocyto/.config.vsh.yaml b/target/native/velocity/velocyto/.config.vsh.yaml new file mode 100644 index 00000000000..aa6436d9c6e --- /dev/null +++ b/target/native/velocity/velocyto/.config.vsh.yaml @@ -0,0 +1,225 @@ +functionality: + name: "velocyto" + namespace: "velocity" + version: "0.12.4" + authors: + - name: "Robrecht Cannoodt" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Path to BAM file" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--transcriptome" + alternatives: + - "-t" + description: "Path to GTF file" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--barcode" + alternatives: + - "-b" + description: "Valid barcodes file, to filter the bam. If --bcfile is not specified\ + \ all the cell barcodes will be included.\nCell barcodes should be specified\ + \ in the bcfile as the 'CB' tag for each read\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--without_umi" + description: "foo" + info: null + direction: "input" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Velocyto loom file" + info: null + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--logic" + alternatives: + - "-l" + description: "The logic to use for the filtering." + info: null + default: + - "Default" + required: false + choices: + - "Default" + - "Permissive10X" + - "Intermediate10X" + - "ValidatedIntrons10X" + - "Stricter10X" + - "ObservedSpanning10X" + - "Discordant10X" + - "SmartSeq2" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "bash_script" + path: "script.sh" + is_executable: true + description: "Runs the velocity analysis on a BAM file, outputting a loom file." + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/cellranger_tiny_fastq" + - type: "file" + path: "resources_test/rna_velocity" + - type: "file" + path: "resources_test/reference_gencodev41_chr1" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.9-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + - "build-essential" + - "file" + interactive: false + - type: "python" + user: false + pip: + - "numpy" + - "Cython" + upgrade: true + - type: "python" + user: false + pip: + - "velocyto" + upgrade: true + - type: "apt" + packages: + - "samtools" + interactive: false + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "native" + id: "native" +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highmem" + - "lowcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/velocity/velocyto/config.vsh.yaml" + platform: "native" + output: "/home/runner/work/openpipeline/openpipeline/target/native/velocity/velocyto" + executable: "/home/runner/work/openpipeline/openpipeline/target/native/velocity/velocyto/velocyto" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/native/velocity/velocyto/velocyto b/target/native/velocity/velocyto/velocyto new file mode 100755 index 00000000000..b23f53938a7 --- /dev/null +++ b/target/native/velocity/velocyto/velocyto @@ -0,0 +1,605 @@ +#!/usr/bin/env bash + +# velocyto 0.12.4 +# +# This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Robrecht Cannoodt (maintainer) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + SOURCE="$1" + while [ -h "$SOURCE" ]; do + DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" + done + cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# define meta fields +VIASH_META_FUNCTIONALITY_NAME="velocyto" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_FUNCTIONALITY_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "velocyto 0.12.4" + echo "" + echo "Runs the velocity analysis on a BAM file, outputting a loom file." + echo "" + echo "Arguments:" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " Path to BAM file" + echo "" + echo " -t, --transcriptome" + echo " type: file, required parameter, file must exist" + echo " Path to GTF file" + echo "" + echo " -b, --barcode" + echo " type: file, file must exist" + echo " Valid barcodes file, to filter the bam. If --bcfile is not specified all" + echo " the cell barcodes will be included." + echo " Cell barcodes should be specified in the bcfile as the 'CB' tag for each" + echo " read" + echo "" + echo " --without_umi" + echo " type: boolean_true" + echo " foo" + echo "" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " Velocyto loom file" + echo "" + echo " -l, --logic" + echo " type: string" + echo " default: Default" + echo " choices: [ Default, Permissive10X, Intermediate10X, ValidatedIntrons10X," + echo "Stricter10X, ObservedSpanning10X, Discordant10X, SmartSeq2 ]" + echo " The logic to use for the filtering." +} + +# initialise array +VIASH_POSITIONAL_ARGS='' +VIASH_MODE='run' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "velocyto 0.12.4" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --transcriptome) + [ -n "$VIASH_PAR_TRANSCRIPTOME" ] && ViashError Bad arguments for option \'--transcriptome\': \'$VIASH_PAR_TRANSCRIPTOME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRANSCRIPTOME="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --transcriptome. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --transcriptome=*) + [ -n "$VIASH_PAR_TRANSCRIPTOME" ] && ViashError Bad arguments for option \'--transcriptome=*\': \'$VIASH_PAR_TRANSCRIPTOME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRANSCRIPTOME=$(ViashRemoveFlags "$1") + shift 1 + ;; + -t) + [ -n "$VIASH_PAR_TRANSCRIPTOME" ] && ViashError Bad arguments for option \'-t\': \'$VIASH_PAR_TRANSCRIPTOME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_TRANSCRIPTOME="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -t. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --barcode) + [ -n "$VIASH_PAR_BARCODE" ] && ViashError Bad arguments for option \'--barcode\': \'$VIASH_PAR_BARCODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BARCODE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --barcode. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --barcode=*) + [ -n "$VIASH_PAR_BARCODE" ] && ViashError Bad arguments for option \'--barcode=*\': \'$VIASH_PAR_BARCODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BARCODE=$(ViashRemoveFlags "$1") + shift 1 + ;; + -b) + [ -n "$VIASH_PAR_BARCODE" ] && ViashError Bad arguments for option \'-b\': \'$VIASH_PAR_BARCODE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_BARCODE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -b. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --without_umi) + [ -n "$VIASH_PAR_WITHOUT_UMI" ] && ViashError Bad arguments for option \'--without_umi\': \'$VIASH_PAR_WITHOUT_UMI\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_WITHOUT_UMI=true + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --logic) + [ -n "$VIASH_PAR_LOGIC" ] && ViashError Bad arguments for option \'--logic\': \'$VIASH_PAR_LOGIC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LOGIC="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --logic. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --logic=*) + [ -n "$VIASH_PAR_LOGIC" ] && ViashError Bad arguments for option \'--logic=*\': \'$VIASH_PAR_LOGIC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LOGIC=$(ViashRemoveFlags "$1") + shift 1 + ;; + -l) + [ -n "$VIASH_PAR_LOGIC" ] && ViashError Bad arguments for option \'-l\': \'$VIASH_PAR_LOGIC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_LOGIC="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -l. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1024 )) ;; + mb|m) memory_b=$(( $number * 1024 * 1024 )) ;; + gb|g) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tb|t) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pb|p) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+1023) / 1024 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+1023) / 1024 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+1023) / 1024 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_TRANSCRIPTOME+x} ]; then + ViashError '--transcriptome' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_WITHOUT_UMI+x} ]; then + VIASH_PAR_WITHOUT_UMI="false" +fi +if [ -z ${VIASH_PAR_LOGIC+x} ]; then + VIASH_PAR_LOGIC="Default" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_TRANSCRIPTOME" ] && [ ! -e "$VIASH_PAR_TRANSCRIPTOME" ]; then + ViashError "Input file '$VIASH_PAR_TRANSCRIPTOME' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_BARCODE" ] && [ ! -e "$VIASH_PAR_BARCODE" ]; then + ViashError "Input file '$VIASH_PAR_BARCODE' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_WITHOUT_UMI" ]]; then + if ! [[ "$VIASH_PAR_WITHOUT_UMI" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--without_umi' has to be a boolean_true. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_LOGIC" ]; then + VIASH_PAR_LOGIC_CHOICES=("Default:Permissive10X:Intermediate10X:ValidatedIntrons10X:Stricter10X:ObservedSpanning10X:Discordant10X:SmartSeq2") + IFS=':' + set -f + if ! [[ ":${VIASH_PAR_LOGIC_CHOICES[*]}:" =~ ":$VIASH_PAR_LOGIC:" ]]; then + ViashError '--logic' specified value of \'$VIASH_PAR_LOGIC\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +ViashDebug "Running command: bash" +cat << VIASHEOF | bash +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-velocyto-XXXXXX").sh +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +#!/bin/bash + +set -eo pipefail + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_TRANSCRIPTOME+x} ]; then echo "${VIASH_PAR_TRANSCRIPTOME}" | sed "s#'#'\"'\"'#g;s#.*#par_transcriptome='&'#" ; else echo "# par_transcriptome="; fi ) +$( if [ ! -z ${VIASH_PAR_BARCODE+x} ]; then echo "${VIASH_PAR_BARCODE}" | sed "s#'#'\"'\"'#g;s#.*#par_barcode='&'#" ; else echo "# par_barcode="; fi ) +$( if [ ! -z ${VIASH_PAR_WITHOUT_UMI+x} ]; then echo "${VIASH_PAR_WITHOUT_UMI}" | sed "s#'#'\"'\"'#g;s#.*#par_without_umi='&'#" ; else echo "# par_without_umi="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_PAR_LOGIC+x} ]; then echo "${VIASH_PAR_LOGIC}" | sed "s#'#'\"'\"'#g;s#.*#par_logic='&'#" ; else echo "# par_logic="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) + +## VIASH END + +extra_params=( ) + +if [ ! -z "\$par_barcode" ]; then + extra_params+=( "--bcfile=\$par_barcode" ) +fi +if [ "\$par_without_umi" == "true" ]; then + extra_params+=( "--without-umi" ) +fi +if [ ! -z "\$meta_cpus" ]; then + extra_params+=( "--samtools-threads" "\$meta_cpus" ) +fi +if [ ! -z "\$meta_memory_mb" ]; then + extra_params+=( "--samtools-memory" "\$meta_memory_mb" ) +fi + +output_dir=\`dirname "\$par_output"\` +sample_id=\`basename "\$par_output" .loom\` + +if (file \`readlink -f "\$par_transcriptome"\` | grep -q compressed ) ; then + # create temporary directory + tmpdir=\$(mktemp -d "\$meta_temp_dir/\$meta_functionality_name-XXXXXXXX") + function clean_up { + rm -rf "\$tmpdir" + } + trap clean_up EXIT + + zcat "\$par_transcriptome" > "\$tmpdir/genes.gtf" + par_transcriptome="\$tmpdir/genes.gtf" +fi + +velocyto run \\ + "\$par_input" \\ + "\$par_transcriptome" \\ + "\${extra_params[@]}" \\ + --outputfolder "\$output_dir" \\ + --sampleid "\$sample_id" +VIASHMAIN +bash "\$tempscript" & +wait "\$!" + +VIASHEOF + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/nextflow/annotate/popv/.config.vsh.yaml b/target/nextflow/annotate/popv/.config.vsh.yaml new file mode 100644 index 00000000000..7d13e317a86 --- /dev/null +++ b/target/nextflow/annotate/popv/.config.vsh.yaml @@ -0,0 +1,346 @@ +functionality: + name: "popv" + namespace: "annotate" + version: "0.12.4" + authors: + - name: "Matthias Beyens" + roles: + - "author" + info: + role: "Contributor" + links: + github: "MatthiasBeyens" + orcid: "0000-0003-3304-0706" + email: "matthias.beyens@gmail.com" + linkedin: "mbeyens" + organizations: + - name: "Janssen Pharmaceuticals" + href: "https://www.janssen.com" + role: "Principal Scientist" + - name: "Robrecht Cannoodt" + roles: + - "author" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + argument_groups: + - name: "Inputs" + description: "Arguments related to the input (aka query) dataset." + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input h5mu file." + info: null + example: + - "input.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + description: "Which modality to process." + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--input_layer" + description: "Which layer to use. If no value is provided, the counts are assumed\ + \ to be in the `.X` slot. Otherwise, count data is expected to be in `.layers[input_layer]`." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--input_obs_batch" + description: "Key in obs field of input adata for batch information. If no value\ + \ is provided, batch label is assumed to be unknown." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--input_var_subset" + description: "Subset the input object with this column." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--input_obs_label" + description: "Key in obs field of input adata for label information. This is\ + \ only used for training scANVI. Unlabelled cells should be set to `\"unknown_celltype_label\"\ + `." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--unknown_celltype_label" + description: "If `input_obs_label` is specified, cells with this value will\ + \ be treated as unknown and will be predicted by the model." + info: null + default: + - "unknown" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Reference" + description: "Arguments related to the reference dataset." + arguments: + - type: "file" + name: "--reference" + description: "User-provided reference tissue. The data that will be used as\ + \ reference to call cell types." + info: null + example: + - "TS_Bladder_filtered.h5ad" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--reference_layer" + description: "Which layer to use. If no value is provided, the counts are assumed\ + \ to be in the `.X` slot. Otherwise, count data is expected to be in `.layers[reference_layer]`." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--reference_obs_label" + description: "Key in obs field of reference AnnData with cell-type information." + info: null + default: + - "cell_ontology_class" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--reference_obs_batch" + description: "Key in obs field of input adata for batch information." + info: null + default: + - "donor_assay" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Outputs" + description: "Output arguments." + arguments: + - type: "file" + name: "--output" + description: "Output h5mu file." + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Arguments" + description: "Other arguments." + arguments: + - type: "string" + name: "--methods" + description: "Methods to call cell types. By default, runs to knn_on_scvi and\ + \ scanvi." + info: null + example: + - "knn_on_scvi" + - "scanvi" + required: true + choices: + - "celltypist" + - "knn_on_bbknn" + - "knn_on_scanorama" + - "knn_on_scvi" + - "onclass" + - "rf" + - "scanvi" + - "svm" + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Performs popular major vote cell typing on single cell sequence data\ + \ using multiple algorithms. Note that this is a one-shot version of PopV." + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/annotation_test_data/" + - type: "file" + path: "resources_test/pbmc_1k_protein_v3/" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.9-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + - "git" + - "build-essential" + - "wget" + interactive: false + - type: "python" + user: false + packages: + - "scanpy~=1.9.5" + - "scvi-tools~=1.0.3" + - "popv~=0.3.2" + - "jax==0.4.10" + - "jaxlib==0.4.10" + - "ml-dtypes<0.3.0" + upgrade: true + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + upgrade: true + - type: "docker" + run: + - "cd /opt && git clone --depth 1 https://github.com/YosefLab/PopV.git && \\\n\ + \ cd PopV && git fetch --depth 1 origin tag v0.2 && git checkout v0.2\n" + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highmem" + - "highcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/annotate/popv/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/annotate/popv" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/annotate/popv/popv" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/annotate/popv/main.nf b/target/nextflow/annotate/popv/main.nf new file mode 100644 index 00000000000..e0bf5943414 --- /dev/null +++ b/target/nextflow/annotate/popv/main.nf @@ -0,0 +1,2958 @@ +// popv 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Matthias Beyens (author) +// * Robrecht Cannoodt (author) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "popv", + "namespace" : "annotate", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Matthias Beyens", + "roles" : [ + "author" + ], + "info" : { + "role" : "Contributor", + "links" : { + "github" : "MatthiasBeyens", + "orcid" : "0000-0003-3304-0706", + "email" : "matthias.beyens@gmail.com", + "linkedin" : "mbeyens" + }, + "organizations" : [ + { + "name" : "Janssen Pharmaceuticals", + "href" : "https://www.janssen.com", + "role" : "Principal Scientist" + } + ] + } + }, + { + "name" : "Robrecht Cannoodt", + "roles" : [ + "author" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "robrecht@data-intuitive.com", + "github" : "rcannood", + "orcid" : "0000-0003-3641-729X", + "linkedin" : "robrechtcannoodt" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Science Engineer" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Inputs", + "description" : "Arguments related to the input (aka query) dataset.", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "alternatives" : [ + "-i" + ], + "description" : "Input h5mu file.", + "example" : [ + "input.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--modality", + "description" : "Which modality to process.", + "default" : [ + "rna" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--input_layer", + "description" : "Which layer to use. If no value is provided, the counts are assumed to be in the `.X` slot. Otherwise, count data is expected to be in `.layers[input_layer]`.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--input_obs_batch", + "description" : "Key in obs field of input adata for batch information. If no value is provided, batch label is assumed to be unknown.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--input_var_subset", + "description" : "Subset the input object with this column.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--input_obs_label", + "description" : "Key in obs field of input adata for label information. This is only used for training scANVI. Unlabelled cells should be set to `\\"unknown_celltype_label\\"`.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--unknown_celltype_label", + "description" : "If `input_obs_label` is specified, cells with this value will be treated as unknown and will be predicted by the model.", + "default" : [ + "unknown" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Reference", + "description" : "Arguments related to the reference dataset.", + "arguments" : [ + { + "type" : "file", + "name" : "--reference", + "description" : "User-provided reference tissue. The data that will be used as reference to call cell types.", + "example" : [ + "TS_Bladder_filtered.h5ad" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--reference_layer", + "description" : "Which layer to use. If no value is provided, the counts are assumed to be in the `.X` slot. Otherwise, count data is expected to be in `.layers[reference_layer]`.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--reference_obs_label", + "description" : "Key in obs field of reference AnnData with cell-type information.", + "default" : [ + "cell_ontology_class" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--reference_obs_batch", + "description" : "Key in obs field of input adata for batch information.", + "default" : [ + "donor_assay" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Outputs", + "description" : "Output arguments.", + "arguments" : [ + { + "type" : "file", + "name" : "--output", + "description" : "Output h5mu file.", + "example" : [ + "output.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_compression", + "example" : [ + "gzip" + ], + "required" : false, + "choices" : [ + "gzip", + "lzf" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Arguments", + "description" : "Other arguments.", + "arguments" : [ + { + "type" : "string", + "name" : "--methods", + "description" : "Methods to call cell types. By default, runs to knn_on_scvi and scanvi.", + "example" : [ + "knn_on_scvi", + "scanvi" + ], + "required" : true, + "choices" : [ + "celltypist", + "knn_on_bbknn", + "knn_on_scanorama", + "knn_on_scvi", + "onclass", + "rf", + "scanvi", + "svm" + ], + "direction" : "input", + "multiple" : true, + "multiple_sep" : ":", + "dest" : "par" + } + ] + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/annotate/popv/" + }, + { + "type" : "file", + "path" : "src/utils/setup_logger.py", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "description" : "Performs popular major vote cell typing on single cell sequence data using multiple algorithms. Note that this is a one-shot version of PopV.", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/annotate/popv/" + }, + { + "type" : "file", + "path" : "resources_test/annotation_test_data/", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + }, + { + "type" : "file", + "path" : "resources_test/pbmc_1k_protein_v3/", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.9-slim", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "procps", + "git", + "build-essential", + "wget" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "scanpy~=1.9.5", + "scvi-tools~=1.0.3", + "popv~=0.3.2", + "jax==0.4.10", + "jaxlib==0.4.10", + "ml-dtypes<0.3.0" + ], + "upgrade" : true + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "mudata~=0.2.3", + "anndata~=0.9.1" + ], + "upgrade" : true + }, + { + "type" : "docker", + "run" : [ + "cd /opt && git clone --depth 1 https://github.com/YosefLab/PopV.git && \\\\\n cd PopV && git fetch --depth 1 origin tag v0.2 && git checkout v0.2\n" + ] + } + ], + "test_setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "viashpy==0.5.0" + ], + "upgrade" : true + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "highmem", + "highcpu" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/annotate/popv/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/annotate/popv", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +import sys +import re +import tempfile +import typing +import numpy as np +import mudata as mu +import anndata as ad +import popv + +# todo: is this still needed? +from torch.cuda import is_available as cuda_is_available +try: + from torch.backends.mps import is_available as mps_is_available +except ModuleNotFoundError: + # Older pytorch versions + # MacOS GPUs + def mps_is_available(): + return False + +# where to find the obo files +cl_obo_folder = "/opt/PopV/ontology/" + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'input_layer': $( if [ ! -z ${VIASH_PAR_INPUT_LAYER+x} ]; then echo "r'${VIASH_PAR_INPUT_LAYER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'input_obs_batch': $( if [ ! -z ${VIASH_PAR_INPUT_OBS_BATCH+x} ]; then echo "r'${VIASH_PAR_INPUT_OBS_BATCH//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'input_var_subset': $( if [ ! -z ${VIASH_PAR_INPUT_VAR_SUBSET+x} ]; then echo "r'${VIASH_PAR_INPUT_VAR_SUBSET//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'input_obs_label': $( if [ ! -z ${VIASH_PAR_INPUT_OBS_LABEL+x} ]; then echo "r'${VIASH_PAR_INPUT_OBS_LABEL//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'unknown_celltype_label': $( if [ ! -z ${VIASH_PAR_UNKNOWN_CELLTYPE_LABEL+x} ]; then echo "r'${VIASH_PAR_UNKNOWN_CELLTYPE_LABEL//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'reference': $( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "r'${VIASH_PAR_REFERENCE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'reference_layer': $( if [ ! -z ${VIASH_PAR_REFERENCE_LAYER+x} ]; then echo "r'${VIASH_PAR_REFERENCE_LAYER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'reference_obs_label': $( if [ ! -z ${VIASH_PAR_REFERENCE_OBS_LABEL+x} ]; then echo "r'${VIASH_PAR_REFERENCE_OBS_LABEL//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'reference_obs_batch': $( if [ ! -z ${VIASH_PAR_REFERENCE_OBS_BATCH+x} ]; then echo "r'${VIASH_PAR_REFERENCE_OBS_BATCH//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'methods': $( if [ ! -z ${VIASH_PAR_METHODS+x} ]; then echo "r'${VIASH_PAR_METHODS//\\'/\\'\\"\\'\\"r\\'}'.split(':')"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +## VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +use_gpu = cuda_is_available() or mps_is_available() +logger.info("GPU enabled? %s", use_gpu) + +# Helper functions +def get_X(adata: ad.AnnData, layer: typing.Optional[str], var_index: typing.Optional[str]): + """Fetch the counts data from X or a layer. Subset columns by var_index if so desired.""" + if var_index: + adata = adata[:, var_index] + if layer: + return adata.layers[layer] + else: + return adata.X +def get_obs(adata: ad.AnnData, obs_par_names): + """Subset the obs dataframe to just the columns defined by the obs_label and obs_batch.""" + obs_columns = [par[x] for x in obs_par_names if par[x]] + return adata.obs[obs_columns] +def get_var(adata: ad.AnnData, var_index: list[str]): + """Fetch the var dataframe. Subset rows by var_index if so desired.""" + return adata.var.loc[var_index] + +def main(par, meta): + assert len(par["methods"]) >= 1, "Please, specify at least one method for cell typing." + logger.info("Cell typing methods: {}".format(par["methods"])) + + ### PREPROCESSING REFERENCE ### + logger.info("### PREPROCESSING REFERENCE ###") + + # take a look at reference data + logger.info("Reading reference data '%s'", par["reference"]) + reference = ad.read_h5ad(par["reference"]) + + logger.info("Setting reference var index to Ensembl IDs") + reference.var["gene_symbol"] = list(reference.var.index) + reference.var.index = [re.sub("\\\\\\\\.[0-9]+\\$", "", s) for s in reference.var["ensemblid"]] + + logger.info("Detect number of samples per label") + min_celltype_size = np.min(reference.obs.groupby(par["reference_obs_batch"]).size()) + n_samples_per_label = np.max((min_celltype_size, 100)) + + ### PREPROCESSING INPUT ### + logger.info("### PREPROCESSING INPUT ###") + logger.info("Reading '%s'", par["input"]) + input = mu.read_h5mu(par["input"]) + input_modality = input.mod[par["modality"]] + + # subset with var column + if par["input_var_subset"]: + logger.info("Subset input with .var['%s']", par["input_var_subset"]) + assert par["input_var_subset"] in input_modality.var, f"--input_var_subset='{par['input_var_subset']}' needs to be a column in .var" + input_modality = input_modality[:,input_modality.var[par["input_var_subset"]]] + + ### ALIGN REFERENCE AND INPUT ### + logger.info("### ALIGN REFERENCE AND INPUT ###") + + logger.info("Detecting common vars based on ensembl ids") + common_ens_ids = list(set(reference.var.index).intersection(set(input_modality.var.index))) + + logger.info(" reference n_vars: %i", reference.n_vars) + logger.info(" input n_vars: %i", input_modality.n_vars) + logger.info(" intersect n_vars: %i", len(common_ens_ids)) + assert len(common_ens_ids) >= 100, "The intersection of genes is too small." + + # subset input objects to make sure popv is using the data we expect + input_modality = ad.AnnData( + X = get_X(input_modality, par["input_layer"], common_ens_ids), + obs = get_obs(input_modality, ["input_obs_label", "input_obs_batch"]), + var = get_var(input_modality, common_ens_ids) + ) + reference = ad.AnnData( + X = get_X(reference, par["reference_layer"], common_ens_ids), + obs = get_obs(reference, ["reference_obs_label", "reference_obs_batch"]), + var = get_var(reference, common_ens_ids) + ) + + # remove layers that + + ### ALIGN REFERENCE AND INPUT ### + logger.info("### ALIGN REFERENCE AND INPUT ###") + + with tempfile.TemporaryDirectory(prefix="popv-", dir=meta["temp_dir"]) as temp_dir: + logger.info("Run PopV processing") + pq = popv.preprocessing.Process_Query( + # input + query_adata=input_modality, + query_labels_key=par["input_obs_label"], + query_batch_key=par["input_obs_batch"], + query_layers_key=None, # this is taken care of by subset + # reference + ref_adata=reference, + ref_labels_key=par["reference_obs_label"], + ref_batch_key=par["reference_obs_batch"], + # options + unknown_celltype_label=par["unknown_celltype_label"], + n_samples_per_label=n_samples_per_label, + # pretrained model + # Might need to be parameterized at some point + prediction_mode="retrain", + pretrained_scvi_path=None, + # outputs + # Might need to be parameterized at some point + save_path_trained_models=temp_dir, + # hardcoded values + cl_obo_folder=cl_obo_folder, + use_gpu=use_gpu + ) + method_kwargs = {} + if 'scanorama' in par['methods']: + method_kwargs['scanorama'] = {'approx': False} + logger.info("Annotate data") + popv.annotation.annotate_data( + adata=pq.adata, + methods=par["methods"], + methods_kwargs=method_kwargs + ) + + popv_input = pq.adata[input_modality.obs_names] + + # select columns starting with "popv_" + popv_obs_cols = popv_input.obs.columns[popv_input.obs.columns.str.startswith("popv_")] + + # create new data frame with selected columns + df_popv = popv_input.obs[popv_obs_cols] + + # remove prefix from column names + df_popv.columns = df_popv.columns.str.replace("popv_", "") + + # store output in mudata .obsm + input.mod[par["modality"]].obsm["popv_output"] = df_popv + + # copy important output in mudata .obs + for col in ["popv_prediction"]: + if col in popv_input.obs.columns: + input.mod[par["modality"]].obs[col] = popv_input.obs[col] + + # code to explore how the output differs from the original + # for attr in ["obs", "var", "uns", "obsm", "layers", "obsp"]: + # old_keys = set(getattr(pq_adata_orig, attr).keys()) + # new_keys = set(getattr(pq.adata, attr).keys()) + # diff_keys = list(new_keys.difference(old_keys)) + # diff_keys.sort() + # print(f"{attr}:", flush=True) + # for key in diff_keys: + # print(f" {key}", flush=True) + + # write output + logger.info("Writing %s", par["output"]) + input.write_h5mu(par["output"], compression=par["output_compression"]) + +if __name__ == "__main__": + main(par, meta) +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/annotate_popv", + "tag" : "0.12.0" + }, + "label" : [ + "highmem", + "highcpu" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/annotate/popv/nextflow.config b/target/nextflow/annotate/popv/nextflow.config new file mode 100644 index 00000000000..d618d269b9b --- /dev/null +++ b/target/nextflow/annotate/popv/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'popv' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Performs popular major vote cell typing on single cell sequence data using multiple algorithms. Note that this is a one-shot version of PopV.' + author = 'Matthias Beyens, Robrecht Cannoodt' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/annotate/popv/nextflow_params.yaml b/target/nextflow/annotate/popv/nextflow_params.yaml new file mode 100644 index 00000000000..e58b114fd16 --- /dev/null +++ b/target/nextflow/annotate/popv/nextflow_params.yaml @@ -0,0 +1,25 @@ +# Inputs +input: # please fill in - example: "input.h5mu" +modality: "rna" +# input_layer: "foo" +# input_obs_batch: "foo" +# input_var_subset: "foo" +# input_obs_label: "foo" +unknown_celltype_label: "unknown" + +# Outputs +# output: "$id.$key.output.h5mu" +# output_compression: "gzip" + +# Arguments +methods: # please fill in - example: ["knn_on_scvi", "scanvi"] + +# Reference +reference: # please fill in - example: "TS_Bladder_filtered.h5ad" +# reference_layer: "foo" +reference_obs_label: "cell_ontology_class" +reference_obs_batch: "donor_assay" + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/annotate/popv/nextflow_schema.json b/target/nextflow/annotate/popv/nextflow_schema.json new file mode 100644 index 00000000000..0749ce0b0fd --- /dev/null +++ b/target/nextflow/annotate/popv/nextflow_schema.json @@ -0,0 +1,251 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "popv", +"description": "Performs popular major vote cell typing on single cell sequence data using multiple algorithms. Note that this is a one-shot version of PopV.", +"type": "object", +"definitions": { + + + + "inputs" : { + "title": "Inputs", + "type": "object", + "description": "Arguments related to the input (aka query) dataset.", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required, example: `input.h5mu`. Input h5mu file", + "help_text": "Type: `file`, required, example: `input.h5mu`. Input h5mu file." + + } + + + , + "modality": { + "type": + "string", + "description": "Type: `string`, default: `rna`. Which modality to process", + "help_text": "Type: `string`, default: `rna`. Which modality to process." + , + "default": "rna" + } + + + , + "input_layer": { + "type": + "string", + "description": "Type: `string`. Which layer to use", + "help_text": "Type: `string`. Which layer to use. If no value is provided, the counts are assumed to be in the `.X` slot. Otherwise, count data is expected to be in `.layers[input_layer]`." + + } + + + , + "input_obs_batch": { + "type": + "string", + "description": "Type: `string`. Key in obs field of input adata for batch information", + "help_text": "Type: `string`. Key in obs field of input adata for batch information. If no value is provided, batch label is assumed to be unknown." + + } + + + , + "input_var_subset": { + "type": + "string", + "description": "Type: `string`. Subset the input object with this column", + "help_text": "Type: `string`. Subset the input object with this column." + + } + + + , + "input_obs_label": { + "type": + "string", + "description": "Type: `string`. Key in obs field of input adata for label information", + "help_text": "Type: `string`. Key in obs field of input adata for label information. This is only used for training scANVI. Unlabelled cells should be set to `\"unknown_celltype_label\"`." + + } + + + , + "unknown_celltype_label": { + "type": + "string", + "description": "Type: `string`, default: `unknown`. If `input_obs_label` is specified, cells with this value will be treated as unknown and will be predicted by the model", + "help_text": "Type: `string`, default: `unknown`. If `input_obs_label` is specified, cells with this value will be treated as unknown and will be predicted by the model." + , + "default": "unknown" + } + + +} +}, + + + "outputs" : { + "title": "Outputs", + "type": "object", + "description": "Output arguments.", + "properties": { + + + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file", + "help_text": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file." + , + "default": "$id.$key.output.h5mu" + } + + + , + "output_compression": { + "type": + "string", + "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. ", + "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. ", + "enum": ["gzip", "lzf"] + + + } + + +} +}, + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "Other arguments.", + "properties": { + + + "methods": { + "type": + "string", + "description": "Type: List of `string`, required, example: `knn_on_scvi:scanvi`, multiple_sep: `\":\"`, choices: ``celltypist`, `knn_on_bbknn`, `knn_on_scanorama`, `knn_on_scvi`, `onclass`, `rf`, `scanvi`, `svm``. Methods to call cell types", + "help_text": "Type: List of `string`, required, example: `knn_on_scvi:scanvi`, multiple_sep: `\":\"`, choices: ``celltypist`, `knn_on_bbknn`, `knn_on_scanorama`, `knn_on_scvi`, `onclass`, `rf`, `scanvi`, `svm``. Methods to call cell types. By default, runs to knn_on_scvi and scanvi.", + "enum": ["celltypist", "knn_on_bbknn", "knn_on_scanorama", "knn_on_scvi", "onclass", "rf", "scanvi", "svm"] + + + } + + +} +}, + + + "reference" : { + "title": "Reference", + "type": "object", + "description": "Arguments related to the reference dataset.", + "properties": { + + + "reference": { + "type": + "string", + "description": "Type: `file`, required, example: `TS_Bladder_filtered.h5ad`. User-provided reference tissue", + "help_text": "Type: `file`, required, example: `TS_Bladder_filtered.h5ad`. User-provided reference tissue. The data that will be used as reference to call cell types." + + } + + + , + "reference_layer": { + "type": + "string", + "description": "Type: `string`. Which layer to use", + "help_text": "Type: `string`. Which layer to use. If no value is provided, the counts are assumed to be in the `.X` slot. Otherwise, count data is expected to be in `.layers[reference_layer]`." + + } + + + , + "reference_obs_label": { + "type": + "string", + "description": "Type: `string`, default: `cell_ontology_class`. Key in obs field of reference AnnData with cell-type information", + "help_text": "Type: `string`, default: `cell_ontology_class`. Key in obs field of reference AnnData with cell-type information." + , + "default": "cell_ontology_class" + } + + + , + "reference_obs_batch": { + "type": + "string", + "description": "Type: `string`, default: `donor_assay`. Key in obs field of input adata for batch information", + "help_text": "Type: `string`, default: `donor_assay`. Key in obs field of input adata for batch information." + , + "default": "donor_assay" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/inputs" + }, + + { + "$ref": "#/definitions/outputs" + }, + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/reference" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/annotate/popv/setup_logger.py b/target/nextflow/annotate/popv/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/nextflow/annotate/popv/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/nextflow/cluster/leiden/.config.vsh.yaml b/target/nextflow/cluster/leiden/.config.vsh.yaml new file mode 100644 index 00000000000..447c044a7a5 --- /dev/null +++ b/target/nextflow/cluster/leiden/.config.vsh.yaml @@ -0,0 +1,219 @@ +functionality: + name: "leiden" + namespace: "cluster" + version: "0.12.4" + authors: + - name: "Dries De Maeyer" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "ddemaeyer@gmail.com" + github: "ddemaeyer" + linkedin: "dries-de-maeyer-b46a814" + organizations: + - name: "Janssen Pharmaceuticals" + href: "https://www.janssen.com" + role: "Principal Scientist" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input file." + info: null + example: + - "input.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obsp_connectivities" + description: "In which .obsp slot the neighbor connectivities can be found." + info: null + default: + - "connectivities" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output file." + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obsm_name" + description: "Name of the .obsm key under which to add the cluster labels.\nThe\ + \ name of the columns in the matrix will correspond to the resolutions.\n" + info: null + default: + - "leiden" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--resolution" + description: "A parameter value controlling the coarseness of the clustering.\ + \ Higher values lead to more clusters.\nMultiple values will result in clustering\ + \ being performed multiple times.\n" + info: null + default: + - 1.0 + required: true + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Cluster cells using the Leiden algorithm [Traag18] implemented in\ + \ the Scanpy framework [Wolf18]. \nLeiden is an improved version of the Louvain\ + \ algorithm [Blondel08]. \nIt has been proposed for single-cell analysis by [Levine15].\ + \ \nThis requires having ran `neighbors/find_neighbors` or `neighbors/bbknn` first.\n\ + \nBlondel08: Blondel et al. (2008), Fast unfolding of communities in large networks,\ + \ J. Stat. Mech. \nLevine15: Levine et al. (2015), Data-Driven Phenotypic Dissection\ + \ of AML Reveals Progenitor-like Cells that Correlate with Prognosis, Cell. \n\ + Traag18: Traag et al. (2018), From Louvain to Leiden: guaranteeing well-connected\ + \ communities arXiv. \nWolf18: Wolf et al. (2018), Scanpy: large-scale single-cell\ + \ gene expression data analysis, Genome Biology. \n" + test_resources: + - type: "python_script" + path: "run_test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.8-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "cmake" + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "scanpy~=1.9.5" + - "leidenalg~=0.8.9" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highcpu" + - "midmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/cluster/leiden/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/cluster/leiden" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/cluster/leiden/leiden" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/cluster/leiden/main.nf b/target/nextflow/cluster/leiden/main.nf new file mode 100644 index 00000000000..50eb4aec860 --- /dev/null +++ b/target/nextflow/cluster/leiden/main.nf @@ -0,0 +1,2631 @@ +// leiden 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Dries De Maeyer (maintainer) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "leiden", + "namespace" : "cluster", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Dries De Maeyer", + "roles" : [ + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "ddemaeyer@gmail.com", + "github" : "ddemaeyer", + "linkedin" : "dries-de-maeyer-b46a814" + }, + "organizations" : [ + { + "name" : "Janssen Pharmaceuticals", + "href" : "https://www.janssen.com", + "role" : "Principal Scientist" + } + ] + } + } + ], + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "alternatives" : [ + "-i" + ], + "description" : "Input file.", + "example" : [ + "input.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--modality", + "default" : [ + "rna" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--obsp_connectivities", + "description" : "In which .obsp slot the neighbor connectivities can be found.", + "default" : [ + "connectivities" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "-o" + ], + "description" : "Output file.", + "example" : [ + "output.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_compression", + "example" : [ + "gzip" + ], + "required" : false, + "choices" : [ + "gzip", + "lzf" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--obsm_name", + "description" : "Name of the .obsm key under which to add the cluster labels.\nThe name of the columns in the matrix will correspond to the resolutions.\n", + "default" : [ + "leiden" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--resolution", + "description" : "A parameter value controlling the coarseness of the clustering. Higher values lead to more clusters.\nMultiple values will result in clustering being performed multiple times.\n", + "default" : [ + 1.0 + ], + "required" : true, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ":", + "dest" : "par" + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/cluster/leiden/" + }, + { + "type" : "file", + "path" : "src/utils/setup_logger.py", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "description" : "Cluster cells using the Leiden algorithm [Traag18] implemented in the Scanpy framework [Wolf18]. \nLeiden is an improved version of the Louvain algorithm [Blondel08]. \nIt has been proposed for single-cell analysis by [Levine15]. \nThis requires having ran `neighbors/find_neighbors` or `neighbors/bbknn` first.\n\nBlondel08: Blondel et al. (2008), Fast unfolding of communities in large networks, J. Stat. Mech. \nLevine15: Levine et al. (2015), Data-Driven Phenotypic Dissection of AML Reveals Progenitor-like Cells that Correlate with Prognosis, Cell. \nTraag18: Traag et al. (2018), From Louvain to Leiden: guaranteeing well-connected communities arXiv. \nWolf18: Wolf et al. (2018), Scanpy: large-scale single-cell gene expression data analysis, Genome Biology. \n", + "test_resources" : [ + { + "type" : "python_script", + "path" : "run_test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/cluster/leiden/" + }, + { + "type" : "file", + "path" : "resources_test/pbmc_1k_protein_v3", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.8-slim", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "cmake", + "procps" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "mudata~=0.2.3", + "anndata~=0.9.1", + "scanpy~=1.9.5", + "leidenalg~=0.8.9" + ], + "upgrade" : true + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "highcpu", + "midmem" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/cluster/leiden/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/cluster/leiden", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +import sys +import mudata as mu +import pandas as pd +import scanpy as sc + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'obsp_connectivities': $( if [ ! -z ${VIASH_PAR_OBSP_CONNECTIVITIES+x} ]; then echo "r'${VIASH_PAR_OBSP_CONNECTIVITIES//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'obsm_name': $( if [ ! -z ${VIASH_PAR_OBSM_NAME+x} ]; then echo "r'${VIASH_PAR_OBSM_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resolution': $( if [ ! -z ${VIASH_PAR_RESOLUTION+x} ]; then echo "list(map(float, r'${VIASH_PAR_RESOLUTION//\\'/\\'\\"\\'\\"r\\'}'.split(':')))"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +## VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +logger.info("Reading %s.", par["input"]) +mdata = mu.read_h5mu(par["input"]) + + +def run_single_resolution(adata, resolution): + adata_out = sc.tl.leiden( + adata, + resolution=resolution, + key_added=str(resolution), + obsp=par['obsp_connectivities'], + copy=True + ) + return adata_out.obs[str(resolution)] + +logger.info("Processing modality '%s'.", par['modality']) +data = mdata.mod[par['modality']] +results = {str(resolution): run_single_resolution(data, resolution) for resolution in par["resolution"]} +data.obsm[par["obsm_name"]] = pd.DataFrame(results) +logger.info("Writing to %s.", par["output"]) +mdata.write_h5mu(filename=par["output"], compression=par["output_compression"]) +logger.info("Finished.") +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/cluster_leiden", + "tag" : "0.12.0" + }, + "label" : [ + "highcpu", + "midmem" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/cluster/leiden/nextflow.config b/target/nextflow/cluster/leiden/nextflow.config new file mode 100644 index 00000000000..a8c7150d552 --- /dev/null +++ b/target/nextflow/cluster/leiden/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'leiden' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Cluster cells using the Leiden algorithm [Traag18] implemented in the Scanpy framework [Wolf18]. \nLeiden is an improved version of the Louvain algorithm [Blondel08]. \nIt has been proposed for single-cell analysis by [Levine15]. \nThis requires having ran `neighbors/find_neighbors` or `neighbors/bbknn` first.\n\nBlondel08: Blondel et al. (2008), Fast unfolding of communities in large networks, J. Stat. Mech. \nLevine15: Levine et al. (2015), Data-Driven Phenotypic Dissection of AML Reveals Progenitor-like Cells that Correlate with Prognosis, Cell. \nTraag18: Traag et al. (2018), From Louvain to Leiden: guaranteeing well-connected communities arXiv. \nWolf18: Wolf et al. (2018), Scanpy: large-scale single-cell gene expression data analysis, Genome Biology. \n' + author = 'Dries De Maeyer' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/cluster/leiden/nextflow_params.yaml b/target/nextflow/cluster/leiden/nextflow_params.yaml new file mode 100644 index 00000000000..b051a6181b3 --- /dev/null +++ b/target/nextflow/cluster/leiden/nextflow_params.yaml @@ -0,0 +1,12 @@ +# Arguments +input: # please fill in - example: "input.h5mu" +modality: "rna" +obsp_connectivities: "connectivities" +# output: "$id.$key.output.h5mu" +# output_compression: "gzip" +obsm_name: "leiden" +resolution: # please fill in - example: [1] + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/cluster/leiden/nextflow_schema.json b/target/nextflow/cluster/leiden/nextflow_schema.json new file mode 100644 index 00000000000..cc9d8382dae --- /dev/null +++ b/target/nextflow/cluster/leiden/nextflow_schema.json @@ -0,0 +1,137 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "leiden", +"description": "Cluster cells using the Leiden algorithm [Traag18] implemented in the Scanpy framework [Wolf18]. \nLeiden is an improved version of the Louvain algorithm [Blondel08]. \nIt has been proposed for single-cell analysis by [Levine15]. \nThis requires having ran `neighbors/find_neighbors` or `neighbors/bbknn` first.\n\nBlondel08: Blondel et al. (2008), Fast unfolding of communities in large networks, J. Stat. Mech. \nLevine15: Levine et al. (2015), Data-Driven Phenotypic Dissection of AML Reveals Progenitor-like Cells that Correlate with Prognosis, Cell. \nTraag18: Traag et al. (2018), From Louvain to Leiden: guaranteeing well-connected communities arXiv. \nWolf18: Wolf et al. (2018), Scanpy: large-scale single-cell gene expression data analysis, Genome Biology. \n", +"type": "object", +"definitions": { + + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required, example: `input.h5mu`. Input file", + "help_text": "Type: `file`, required, example: `input.h5mu`. Input file." + + } + + + , + "modality": { + "type": + "string", + "description": "Type: `string`, default: `rna`. ", + "help_text": "Type: `string`, default: `rna`. " + , + "default": "rna" + } + + + , + "obsp_connectivities": { + "type": + "string", + "description": "Type: `string`, default: `connectivities`. In which ", + "help_text": "Type: `string`, default: `connectivities`. In which .obsp slot the neighbor connectivities can be found." + , + "default": "connectivities" + } + + + , + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output file", + "help_text": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output file." + , + "default": "$id.$key.output.h5mu" + } + + + , + "output_compression": { + "type": + "string", + "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. ", + "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. ", + "enum": ["gzip", "lzf"] + + + } + + + , + "obsm_name": { + "type": + "string", + "description": "Type: `string`, default: `leiden`. Name of the ", + "help_text": "Type: `string`, default: `leiden`. Name of the .obsm key under which to add the cluster labels.\nThe name of the columns in the matrix will correspond to the resolutions.\n" + , + "default": "leiden" + } + + + , + "resolution": { + "type": + "string", + "description": "Type: List of `double`, required, default: `1`, multiple_sep: `\":\"`. A parameter value controlling the coarseness of the clustering", + "help_text": "Type: List of `double`, required, default: `1`, multiple_sep: `\":\"`. A parameter value controlling the coarseness of the clustering. Higher values lead to more clusters.\nMultiple values will result in clustering being performed multiple times.\n" + , + "default": "1" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/cluster/leiden/setup_logger.py b/target/nextflow/cluster/leiden/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/nextflow/cluster/leiden/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/nextflow/compression/compress_h5mu/.config.vsh.yaml b/target/nextflow/compression/compress_h5mu/.config.vsh.yaml new file mode 100644 index 00000000000..15d89236355 --- /dev/null +++ b/target/nextflow/compression/compress_h5mu/.config.vsh.yaml @@ -0,0 +1,167 @@ +functionality: + name: "compress_h5mu" + namespace: "compression" + version: "0.12.4" + authors: + - name: "Dries Schaumont" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Path to the input .h5mu." + info: null + example: + - "sample_path" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + description: "location of output file." + info: null + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--compression" + description: "Compression type." + info: null + default: + - "gzip" + required: false + choices: + - "lzf" + - "gzip" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "../../utils/compress_h5mu.py" + description: "Compress a MuData file. \n" + test_resources: + - type: "python_script" + path: "run_test.py" + is_executable: true + - type: "file" + path: "resources_test/concat_test_data/e18_mouse_brain_fresh_5k_filtered_feature_bc_matrix_subset_unique_obs.h5mu" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "native" + id: "native" +- type: "nextflow" + id: "nextflow" + directives: + label: + - "singlecpu" + - "lowmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/compression/compress_h5mu/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/compression/compress_h5mu" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/compression/compress_h5mu/compress_h5mu" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/compression/compress_h5mu/compress_h5mu.py b/target/nextflow/compression/compress_h5mu/compress_h5mu.py new file mode 100644 index 00000000000..9d92395a573 --- /dev/null +++ b/target/nextflow/compression/compress_h5mu/compress_h5mu.py @@ -0,0 +1,49 @@ +from h5py import File as H5File +from h5py import Group, Dataset +from pathlib import Path +from typing import Union, Literal +from functools import partial + + +def compress_h5mu(input_path: Union[str, Path], + output_path: Union[str, Path], + compression: Union[Literal['gzip'], Literal['lzf']]): + input_path, output_path = str(input_path), str(output_path) + + def copy_attributes(in_object, out_object): + for key, value in in_object.attrs.items(): + out_object.attrs[key] = value + + def visit_path(output_h5: H5File, + compression: Union[Literal['gzip'], Literal['lzf']], + name: str, object: Union[Group, Dataset]): + if isinstance(object, Group): + new_group = output_h5.create_group(name) + copy_attributes(object, new_group) + elif isinstance(object, Dataset): + # Compression only works for non-scalar Dataset objects + # Scalar objects dont have a shape defined + if not object.compression and object.shape not in [None, ()]: + new_dataset = output_h5.create_dataset(name, data=object, compression=compression) + copy_attributes(object, new_dataset) + else: + output_h5.copy(object, name) + else: + raise NotImplementedError(f"Could not copy element {name}, " + f"type has not been implemented yet: {type(object)}") + + with H5File(input_path, 'r') as input_h5, H5File(output_path, 'w', userblock_size=512) as output_h5: + copy_attributes(input_h5, output_h5) + input_h5.visititems(partial(visit_path, output_h5, compression)) + + with open(input_path, "rb") as input_bytes: + # Mudata puts metadata like this in the first 512 bytes: + # MuData (format-version=0.1.0;creator=muon;creator-version=0.2.0) + # See mudata/_core/io.py, read_h5mu() function + starting_metadata = input_bytes.read(100) + # The metadata is padded with extra null bytes up until 512 bytes + truncate_location = starting_metadata.find(b"\x00") + starting_metadata = starting_metadata[:truncate_location] + with open(output_path, "br+") as f: + nbytes = f.write(starting_metadata) + f.write(b"\0" * (512 - nbytes)) diff --git a/target/nextflow/compression/compress_h5mu/main.nf b/target/nextflow/compression/compress_h5mu/main.nf new file mode 100644 index 00000000000..b3d9bb5ab84 --- /dev/null +++ b/target/nextflow/compression/compress_h5mu/main.nf @@ -0,0 +1,2596 @@ +// compress_h5mu 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Dries Schaumont (maintainer) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "compress_h5mu", + "namespace" : "compression", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Dries Schaumont", + "roles" : [ + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "dries@data-intuitive.com", + "github" : "DriesSchaumont", + "orcid" : "0000-0002-4389-0440", + "linkedin" : "dries-schaumont" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Scientist" + } + ] + } + } + ], + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "alternatives" : [ + "-i" + ], + "description" : "Path to the input .h5mu.", + "example" : [ + "sample_path" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--output", + "description" : "location of output file.", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--compression", + "description" : "Compression type.", + "default" : [ + "gzip" + ], + "required" : false, + "choices" : [ + "lzf", + "gzip" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/compression/compress_h5mu/" + }, + { + "type" : "file", + "path" : "../../utils/compress_h5mu.py", + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/compression/compress_h5mu/" + } + ], + "description" : "Compress a MuData file. \n", + "test_resources" : [ + { + "type" : "python_script", + "path" : "run_test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/compression/compress_h5mu/" + }, + { + "type" : "file", + "path" : "resources_test/concat_test_data/e18_mouse_brain_fresh_5k_filtered_feature_bc_matrix_subset_unique_obs.h5mu", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.10-slim", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "procps" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "mudata~=0.2.3", + "anndata~=0.9.1" + ], + "upgrade" : true + } + ], + "test_setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "viashpy==0.5.0" + ], + "upgrade" : true + } + ] + }, + { + "type" : "native", + "id" : "native" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "singlecpu", + "lowmem" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/compression/compress_h5mu/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/compression/compress_h5mu", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +import sys +### VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'compression': $( if [ ! -z ${VIASH_PAR_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +### VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND compress_h5mu +# reason: resources aren't available when using Nextflow fusion +# from compress_h5mu import compress_h5mu +from h5py import File as H5File +from h5py import Group, Dataset +from pathlib import Path +from typing import Union, Literal +from functools import partial + + +def compress_h5mu(input_path: Union[str, Path], + output_path: Union[str, Path], + compression: Union[Literal['gzip'], Literal['lzf']]): + input_path, output_path = str(input_path), str(output_path) + + def copy_attributes(in_object, out_object): + for key, value in in_object.attrs.items(): + out_object.attrs[key] = value + + def visit_path(output_h5: H5File, + compression: Union[Literal['gzip'], Literal['lzf']], + name: str, object: Union[Group, Dataset]): + if isinstance(object, Group): + new_group = output_h5.create_group(name) + copy_attributes(object, new_group) + elif isinstance(object, Dataset): + # Compression only works for non-scalar Dataset objects + # Scalar objects dont have a shape defined + if not object.compression and object.shape not in [None, ()]: + new_dataset = output_h5.create_dataset(name, data=object, compression=compression) + copy_attributes(object, new_dataset) + else: + output_h5.copy(object, name) + else: + raise NotImplementedError(f"Could not copy element {name}, " + f"type has not been implemented yet: {type(object)}") + + with H5File(input_path, 'r') as input_h5, H5File(output_path, 'w', userblock_size=512) as output_h5: + copy_attributes(input_h5, output_h5) + input_h5.visititems(partial(visit_path, output_h5, compression)) + + with open(input_path, "rb") as input_bytes: + # Mudata puts metadata like this in the first 512 bytes: + # MuData (format-version=0.1.0;creator=muon;creator-version=0.2.0) + # See mudata/_core/io.py, read_h5mu() function + starting_metadata = input_bytes.read(100) + # The metadata is padded with extra null bytes up until 512 bytes + truncate_location = starting_metadata.find(b"\\\\x00") + starting_metadata = starting_metadata[:truncate_location] + with open(output_path, "br+") as f: + nbytes = f.write(starting_metadata) + f.write(b"\\\\0" * (512 - nbytes)) +# END TEMPORARY WORKAROUND compress_h5mu + +if __name__ == "__main__": + compress_h5mu(par["input"], par["output"], compression=par["compression"]) +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/compression_compress_h5mu", + "tag" : "0.12.0" + }, + "label" : [ + "singlecpu", + "lowmem" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/compression/compress_h5mu/nextflow.config b/target/nextflow/compression/compress_h5mu/nextflow.config new file mode 100644 index 00000000000..f5e1490099d --- /dev/null +++ b/target/nextflow/compression/compress_h5mu/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'compress_h5mu' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Compress a MuData file. \n' + author = 'Dries Schaumont' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/compression/compress_h5mu/nextflow_params.yaml b/target/nextflow/compression/compress_h5mu/nextflow_params.yaml new file mode 100644 index 00000000000..740452fec8b --- /dev/null +++ b/target/nextflow/compression/compress_h5mu/nextflow_params.yaml @@ -0,0 +1,8 @@ +# Arguments +input: # please fill in - example: "sample_path" +# output: "$id.$key.output.output" +compression: "gzip" + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/compression/compress_h5mu/nextflow_schema.json b/target/nextflow/compression/compress_h5mu/nextflow_schema.json new file mode 100644 index 00000000000..1160871a2a7 --- /dev/null +++ b/target/nextflow/compression/compress_h5mu/nextflow_schema.json @@ -0,0 +1,94 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "compress_h5mu", +"description": "Compress a MuData file. \n", +"type": "object", +"definitions": { + + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required, example: `sample_path`. Path to the input ", + "help_text": "Type: `file`, required, example: `sample_path`. Path to the input .h5mu." + + } + + + , + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.output`. location of output file", + "help_text": "Type: `file`, required, default: `$id.$key.output.output`. location of output file." + , + "default": "$id.$key.output.output" + } + + + , + "compression": { + "type": + "string", + "description": "Type: `string`, default: `gzip`, choices: ``lzf`, `gzip``. Compression type", + "help_text": "Type: `string`, default: `gzip`, choices: ``lzf`, `gzip``. Compression type.", + "enum": ["lzf", "gzip"] + + , + "default": "gzip" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/convert/from_10xh5_to_h5mu/.config.vsh.yaml b/target/nextflow/convert/from_10xh5_to_h5mu/.config.vsh.yaml new file mode 100644 index 00000000000..63290facddc --- /dev/null +++ b/target/nextflow/convert/from_10xh5_to_h5mu/.config.vsh.yaml @@ -0,0 +1,272 @@ +functionality: + name: "from_10xh5_to_h5mu" + namespace: "convert" + version: "0.12.4" + authors: + - name: "Robrecht Cannoodt" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + argument_groups: + - name: "Inputs" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "A 10x h5 file as generated by Cell Ranger." + info: null + example: + - "raw_feature_bc_matrix.h5" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--input_metrics_summary" + description: "A metrics summary csv file as generated by Cell Ranger." + info: null + example: + - "metrics_cellranger.h5" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Outputs" + arguments: + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output h5mu file." + info: + slots: + mod: + - name: "rna" + required: true + description: "Gene expression counts." + slots: + var: + - name: "gene_symbol" + type: "string" + description: "Identification of the gene." + required: true + - name: "feature_types" + type: "string" + description: "The full name of the modality." + required: true + - name: "genome" + type: "string" + description: "Reference that was used to generate the data." + required: true + - name: "prot" + required: false + description: "Protein abundancy" + slots: + var: + - name: "gene_symbol" + type: "string" + description: "Identification of the gene." + required: true + - name: "feature_types" + type: "string" + description: "The full name of the modality." + required: true + - name: "genome" + type: "string" + description: "Reference that was used to generate the data." + required: true + - name: "vdj" + required: false + description: "VDJ transcript counts" + slots: + var: + - name: "gene_symbol" + type: "string" + description: "Identification of the gene." + required: true + - name: "feature_types" + type: "string" + description: "The full name of the modality." + required: true + - name: "genome" + type: "string" + description: "Reference that was used to generate the data." + required: true + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--uns_metrics" + description: "Name of the .uns slot under which to QC metrics (if any)." + info: null + default: + - "metrics_cellranger" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Arguments" + arguments: + - type: "integer" + name: "--min_genes" + description: "Minimum number of counts required for a cell to pass filtering." + info: null + example: + - 100 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--min_counts" + description: "Minimum number of genes expressed required for a cell to pass\ + \ filtering." + info: null + example: + - 1000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Converts a 10x h5 into an h5mu file.\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "scanpy~=1.9.5" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "lowmem" + - "singlecpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/convert/from_10xh5_to_h5mu/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/convert/from_10xh5_to_h5mu" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/convert/from_10xh5_to_h5mu/from_10xh5_to_h5mu" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/convert/from_10xh5_to_h5mu/main.nf b/target/nextflow/convert/from_10xh5_to_h5mu/main.nf new file mode 100644 index 00000000000..d79cb70ce42 --- /dev/null +++ b/target/nextflow/convert/from_10xh5_to_h5mu/main.nf @@ -0,0 +1,2767 @@ +// from_10xh5_to_h5mu 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Robrecht Cannoodt (maintainer) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "from_10xh5_to_h5mu", + "namespace" : "convert", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Robrecht Cannoodt", + "roles" : [ + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "robrecht@data-intuitive.com", + "github" : "rcannood", + "orcid" : "0000-0003-3641-729X", + "linkedin" : "robrechtcannoodt" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Science Engineer" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Inputs", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "alternatives" : [ + "-i" + ], + "description" : "A 10x h5 file as generated by Cell Ranger.", + "example" : [ + "raw_feature_bc_matrix.h5" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--input_metrics_summary", + "description" : "A metrics summary csv file as generated by Cell Ranger.", + "example" : [ + "metrics_cellranger.h5" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Outputs", + "arguments" : [ + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "-o" + ], + "description" : "Output h5mu file.", + "info" : { + "slots" : { + "mod" : [ + { + "name" : "rna", + "required" : true, + "description" : "Gene expression counts.", + "slots" : { + "var" : [ + { + "name" : "gene_symbol", + "type" : "string", + "description" : "Identification of the gene.", + "required" : true + }, + { + "name" : "feature_types", + "type" : "string", + "description" : "The full name of the modality.", + "required" : true + }, + { + "name" : "genome", + "type" : "string", + "description" : "Reference that was used to generate the data.", + "required" : true + } + ] + } + }, + { + "name" : "prot", + "required" : false, + "description" : "Protein abundancy", + "slots" : { + "var" : [ + { + "name" : "gene_symbol", + "type" : "string", + "description" : "Identification of the gene.", + "required" : true + }, + { + "name" : "feature_types", + "type" : "string", + "description" : "The full name of the modality.", + "required" : true + }, + { + "name" : "genome", + "type" : "string", + "description" : "Reference that was used to generate the data.", + "required" : true + } + ] + } + }, + { + "name" : "vdj", + "required" : false, + "description" : "VDJ transcript counts", + "slots" : { + "var" : [ + { + "name" : "gene_symbol", + "type" : "string", + "description" : "Identification of the gene.", + "required" : true + }, + { + "name" : "feature_types", + "type" : "string", + "description" : "The full name of the modality.", + "required" : true + }, + { + "name" : "genome", + "type" : "string", + "description" : "Reference that was used to generate the data.", + "required" : true + } + ] + } + } + ] + } + }, + "example" : [ + "output.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_compression", + "example" : [ + "gzip" + ], + "required" : false, + "choices" : [ + "gzip", + "lzf" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--uns_metrics", + "description" : "Name of the .uns slot under which to QC metrics (if any).", + "default" : [ + "metrics_cellranger" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Arguments", + "arguments" : [ + { + "type" : "integer", + "name" : "--min_genes", + "description" : "Minimum number of counts required for a cell to pass filtering.", + "example" : [ + 100 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--min_counts", + "description" : "Minimum number of genes expressed required for a cell to pass filtering.", + "example" : [ + 1000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/convert/from_10xh5_to_h5mu/" + }, + { + "type" : "file", + "path" : "src/utils/setup_logger.py", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "description" : "Converts a 10x h5 into an h5mu file.\n", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/convert/from_10xh5_to_h5mu/" + }, + { + "type" : "file", + "path" : "resources_test/pbmc_1k_protein_v3", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.10-slim", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "procps" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "mudata~=0.2.3", + "anndata~=0.9.1", + "scanpy~=1.9.5" + ], + "upgrade" : true + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "lowmem", + "singlecpu" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/convert/from_10xh5_to_h5mu/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/convert/from_10xh5_to_h5mu", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +import mudata +import scanpy as sc +import sys +import pandas as pd + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'input_metrics_summary': $( if [ ! -z ${VIASH_PAR_INPUT_METRICS_SUMMARY+x} ]; then echo "r'${VIASH_PAR_INPUT_METRICS_SUMMARY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'uns_metrics': $( if [ ! -z ${VIASH_PAR_UNS_METRICS+x} ]; then echo "r'${VIASH_PAR_UNS_METRICS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'min_genes': $( if [ ! -z ${VIASH_PAR_MIN_GENES+x} ]; then echo "int(r'${VIASH_PAR_MIN_GENES//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'min_counts': $( if [ ! -z ${VIASH_PAR_MIN_COUNTS+x} ]; then echo "int(r'${VIASH_PAR_MIN_COUNTS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +## VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +logger.info("Reading %s.", par["input"]) +adata = sc.read_10x_h5(par["input"], gex_only=False) + +# set the gene ids as var_names +logger.info("Renaming var columns") +adata.var = adata.var\\\\ + .rename_axis("gene_symbol")\\\\ + .reset_index()\\\\ + .set_index("gene_ids") + +# parse metrics summary file and store in .obsm or .obs +if par["input_metrics_summary"] and par["uns_metrics"]: + logger.info("Reading metrics summary file '%s'", par['input_metrics_summary']) + + def read_percentage(val): + try: + return float(val.strip('%')) / 100 + except AttributeError: + return val + + metrics_summary = pd.read_csv(par["input_metrics_summary"], decimal=".", quotechar='"', thousands=",").applymap(read_percentage) + + logger.info("Storing metrics summary in .uns['%s']", par['uns_metrics']) + adata.uns[par["uns_metrics"]] = metrics_summary +else: + is_none = "input_metrics_summary" if not par["input_metrics_summary"] else "uns_metrics" + logger.info("Not storing metrics summary because par['%s'] is None", is_none) + +# might perform basic filtering to get rid of some data +# applicable when starting from the raw counts +if par["min_genes"]: + logger.info("Filtering with min_genes=%d", par['min_genes']) + sc.pp.filter_cells(adata, min_genes=par["min_genes"]) + +if par["min_counts"]: + logger.info("Filtering with min_counts=%d", par['min_counts']) + sc.pp.filter_cells(adata, min_counts=par["min_counts"]) + +# generate output +logger.info("Convert to mudata") +mdata = mudata.MuData(adata) + +# override root .obs +mdata.obs = adata.obs + +# write output +logger.info("Writing %s", par["output"]) +mdata.write_h5mu(par["output"], compression=par["output_compression"]) +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/convert_from_10xh5_to_h5mu", + "tag" : "0.12.0" + }, + "label" : [ + "lowmem", + "singlecpu" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/convert/from_10xh5_to_h5mu/nextflow.config b/target/nextflow/convert/from_10xh5_to_h5mu/nextflow.config new file mode 100644 index 00000000000..c1b6673bca3 --- /dev/null +++ b/target/nextflow/convert/from_10xh5_to_h5mu/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'from_10xh5_to_h5mu' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Converts a 10x h5 into an h5mu file.\n' + author = 'Robrecht Cannoodt' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/convert/from_10xh5_to_h5mu/nextflow_params.yaml b/target/nextflow/convert/from_10xh5_to_h5mu/nextflow_params.yaml new file mode 100644 index 00000000000..cd471c4544c --- /dev/null +++ b/target/nextflow/convert/from_10xh5_to_h5mu/nextflow_params.yaml @@ -0,0 +1,16 @@ +# Inputs +input: # please fill in - example: "raw_feature_bc_matrix.h5" +# input_metrics_summary: "metrics_cellranger.h5" + +# Outputs +# output: "$id.$key.output.h5mu" +# output_compression: "gzip" +uns_metrics: "metrics_cellranger" + +# Arguments +# min_genes: 100 +# min_counts: 1000 + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/convert/from_10xh5_to_h5mu/nextflow_schema.json b/target/nextflow/convert/from_10xh5_to_h5mu/nextflow_schema.json new file mode 100644 index 00000000000..adaaccceefd --- /dev/null +++ b/target/nextflow/convert/from_10xh5_to_h5mu/nextflow_schema.json @@ -0,0 +1,162 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "from_10xh5_to_h5mu", +"description": "Converts a 10x h5 into an h5mu file.\n", +"type": "object", +"definitions": { + + + + "inputs" : { + "title": "Inputs", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required, example: `raw_feature_bc_matrix.h5`. A 10x h5 file as generated by Cell Ranger", + "help_text": "Type: `file`, required, example: `raw_feature_bc_matrix.h5`. A 10x h5 file as generated by Cell Ranger." + + } + + + , + "input_metrics_summary": { + "type": + "string", + "description": "Type: `file`, example: `metrics_cellranger.h5`. A metrics summary csv file as generated by Cell Ranger", + "help_text": "Type: `file`, example: `metrics_cellranger.h5`. A metrics summary csv file as generated by Cell Ranger." + + } + + +} +}, + + + "outputs" : { + "title": "Outputs", + "type": "object", + "description": "No description", + "properties": { + + + "output": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file", + "help_text": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file." + , + "default": "$id.$key.output.h5mu" + } + + + , + "output_compression": { + "type": + "string", + "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. ", + "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. ", + "enum": ["gzip", "lzf"] + + + } + + + , + "uns_metrics": { + "type": + "string", + "description": "Type: `string`, default: `metrics_cellranger`. Name of the ", + "help_text": "Type: `string`, default: `metrics_cellranger`. Name of the .uns slot under which to QC metrics (if any)." + , + "default": "metrics_cellranger" + } + + +} +}, + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "min_genes": { + "type": + "integer", + "description": "Type: `integer`, example: `100`. Minimum number of counts required for a cell to pass filtering", + "help_text": "Type: `integer`, example: `100`. Minimum number of counts required for a cell to pass filtering." + + } + + + , + "min_counts": { + "type": + "integer", + "description": "Type: `integer`, example: `1000`. Minimum number of genes expressed required for a cell to pass filtering", + "help_text": "Type: `integer`, example: `1000`. Minimum number of genes expressed required for a cell to pass filtering." + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/inputs" + }, + + { + "$ref": "#/definitions/outputs" + }, + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/convert/from_10xh5_to_h5mu/setup_logger.py b/target/nextflow/convert/from_10xh5_to_h5mu/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/nextflow/convert/from_10xh5_to_h5mu/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/nextflow/convert/from_10xmtx_to_h5mu/.config.vsh.yaml b/target/nextflow/convert/from_10xmtx_to_h5mu/.config.vsh.yaml new file mode 100644 index 00000000000..f3ef72393c7 --- /dev/null +++ b/target/nextflow/convert/from_10xmtx_to_h5mu/.config.vsh.yaml @@ -0,0 +1,166 @@ +functionality: + name: "from_10xmtx_to_h5mu" + namespace: "convert" + version: "0.12.4" + authors: + - name: "Robrecht Cannoodt" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input mtx folder" + info: null + example: + - "input_dir_containing_gz_files" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output h5mu file." + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Converts a 10x mtx into an h5mu file.\n" + test_resources: + - type: "python_script" + path: "run_test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.8-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "scanpy~=1.9.5" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "lowmem" + - "singlecpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/convert/from_10xmtx_to_h5mu/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/convert/from_10xmtx_to_h5mu" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/convert/from_10xmtx_to_h5mu/from_10xmtx_to_h5mu" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/convert/from_10xmtx_to_h5mu/main.nf b/target/nextflow/convert/from_10xmtx_to_h5mu/main.nf new file mode 100644 index 00000000000..58116551ddd --- /dev/null +++ b/target/nextflow/convert/from_10xmtx_to_h5mu/main.nf @@ -0,0 +1,2577 @@ +// from_10xmtx_to_h5mu 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Robrecht Cannoodt (maintainer) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "from_10xmtx_to_h5mu", + "namespace" : "convert", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Robrecht Cannoodt", + "roles" : [ + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "robrecht@data-intuitive.com", + "github" : "rcannood", + "orcid" : "0000-0003-3641-729X", + "linkedin" : "robrechtcannoodt" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Science Engineer" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + } + ], + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "alternatives" : [ + "-i" + ], + "description" : "Input mtx folder", + "example" : [ + "input_dir_containing_gz_files" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "-o" + ], + "description" : "Output h5mu file.", + "example" : [ + "output.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_compression", + "example" : [ + "gzip" + ], + "required" : false, + "choices" : [ + "gzip", + "lzf" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/convert/from_10xmtx_to_h5mu/" + }, + { + "type" : "file", + "path" : "src/utils/setup_logger.py", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "description" : "Converts a 10x mtx into an h5mu file.\n", + "test_resources" : [ + { + "type" : "python_script", + "path" : "run_test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/convert/from_10xmtx_to_h5mu/" + }, + { + "type" : "file", + "path" : "resources_test/pbmc_1k_protein_v3", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.8-slim", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "procps" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "mudata~=0.2.3", + "anndata~=0.9.1", + "scanpy~=1.9.5" + ], + "upgrade" : true + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "lowmem", + "singlecpu" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/convert/from_10xmtx_to_h5mu/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/convert/from_10xmtx_to_h5mu", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +import mudata as mu +import scanpy as sc +import sys + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +## VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +logger.info("Reading %s.", par["input"]) +adata = sc.read_10x_mtx(par["input"], gex_only=False) + +logger.info("Renaming keys.") +adata.var = adata.var\\\\ + .rename_axis("gene_symbol")\\\\ + .reset_index()\\\\ + .set_index("gene_ids") + +# generate output +logger.info("Convert to mudata") +mdata = mu.MuData(adata) + +# override root .obs +mdata.obs = adata.obs + +# write output +logger.info("Writing %s", par["output"]) +mdata.write_h5mu(par["output"], compression=par["output_compression"]) +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/convert_from_10xmtx_to_h5mu", + "tag" : "0.12.0" + }, + "label" : [ + "lowmem", + "singlecpu" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/convert/from_10xmtx_to_h5mu/nextflow.config b/target/nextflow/convert/from_10xmtx_to_h5mu/nextflow.config new file mode 100644 index 00000000000..d4664dd39df --- /dev/null +++ b/target/nextflow/convert/from_10xmtx_to_h5mu/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'from_10xmtx_to_h5mu' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Converts a 10x mtx into an h5mu file.\n' + author = 'Robrecht Cannoodt' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/convert/from_10xmtx_to_h5mu/nextflow_params.yaml b/target/nextflow/convert/from_10xmtx_to_h5mu/nextflow_params.yaml new file mode 100644 index 00000000000..8087527e8a2 --- /dev/null +++ b/target/nextflow/convert/from_10xmtx_to_h5mu/nextflow_params.yaml @@ -0,0 +1,8 @@ +# Arguments +input: # please fill in - example: "input_dir_containing_gz_files" +# output: "$id.$key.output.h5mu" +# output_compression: "gzip" + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/convert/from_10xmtx_to_h5mu/nextflow_schema.json b/target/nextflow/convert/from_10xmtx_to_h5mu/nextflow_schema.json new file mode 100644 index 00000000000..0ddcb2e45f3 --- /dev/null +++ b/target/nextflow/convert/from_10xmtx_to_h5mu/nextflow_schema.json @@ -0,0 +1,93 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "from_10xmtx_to_h5mu", +"description": "Converts a 10x mtx into an h5mu file.\n", +"type": "object", +"definitions": { + + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required, example: `input_dir_containing_gz_files`. Input mtx folder", + "help_text": "Type: `file`, required, example: `input_dir_containing_gz_files`. Input mtx folder" + + } + + + , + "output": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file", + "help_text": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file." + , + "default": "$id.$key.output.h5mu" + } + + + , + "output_compression": { + "type": + "string", + "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. ", + "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. ", + "enum": ["gzip", "lzf"] + + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/convert/from_10xmtx_to_h5mu/setup_logger.py b/target/nextflow/convert/from_10xmtx_to_h5mu/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/nextflow/convert/from_10xmtx_to_h5mu/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/nextflow/convert/from_bd_to_10x_molecular_barcode_tags/.config.vsh.yaml b/target/nextflow/convert/from_bd_to_10x_molecular_barcode_tags/.config.vsh.yaml new file mode 100644 index 00000000000..f8957c86293 --- /dev/null +++ b/target/nextflow/convert/from_bd_to_10x_molecular_barcode_tags/.config.vsh.yaml @@ -0,0 +1,159 @@ +functionality: + name: "from_bd_to_10x_molecular_barcode_tags" + namespace: "convert" + version: "0.12.4" + authors: + - name: "Dries Schaumont" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input SAM or BAM file." + info: null + example: + - "input.bam" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output alignment file." + info: null + example: + - "output.sam" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--bam" + description: "Output a BAM file." + info: null + direction: "input" + dest: "par" + - type: "integer" + name: "--threads" + alternatives: + - "-t" + description: "Number of threads" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "bash_script" + path: "script.sh" + is_executable: true + description: "Convert the molecular barcode sequence SAM tag from BD format (MA)\ + \ to 10X format (UB).\n" + test_resources: + - type: "bash_script" + path: "run_test.sh" + is_executable: true + - type: "file" + path: "resources_test/bdrhap_5kjrt/processed/WTA.bd_rhapsody.output_raw" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "ubuntu:latest" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "samtools" + interactive: false + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "lowmem" + - "singlecpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/convert/from_bd_to_10x_molecular_barcode_tags/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/convert/from_bd_to_10x_molecular_barcode_tags" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/convert/from_bd_to_10x_molecular_barcode_tags/from_bd_to_10x_molecular_barcode_tags" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/convert/from_bd_to_10x_molecular_barcode_tags/main.nf b/target/nextflow/convert/from_bd_to_10x_molecular_barcode_tags/main.nf new file mode 100644 index 00000000000..db8ae9f8601 --- /dev/null +++ b/target/nextflow/convert/from_bd_to_10x_molecular_barcode_tags/main.nf @@ -0,0 +1,2586 @@ +// from_bd_to_10x_molecular_barcode_tags 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Dries Schaumont (maintainer) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "from_bd_to_10x_molecular_barcode_tags", + "namespace" : "convert", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Dries Schaumont", + "roles" : [ + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "dries@data-intuitive.com", + "github" : "DriesSchaumont", + "orcid" : "0000-0002-4389-0440", + "linkedin" : "dries-schaumont" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Scientist" + } + ] + } + } + ], + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "alternatives" : [ + "-i" + ], + "description" : "Input SAM or BAM file.", + "example" : [ + "input.bam" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "-o" + ], + "description" : "Output alignment file.", + "example" : [ + "output.sam" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "boolean_true", + "name" : "--bam", + "description" : "Output a BAM file.", + "direction" : "input", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--threads", + "alternatives" : [ + "-t" + ], + "description" : "Number of threads", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/convert/from_bd_to_10x_molecular_barcode_tags/" + } + ], + "description" : "Convert the molecular barcode sequence SAM tag from BD format (MA) to 10X format (UB).\n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "run_test.sh", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/convert/from_bd_to_10x_molecular_barcode_tags/" + }, + { + "type" : "file", + "path" : "resources_test/bdrhap_5kjrt/processed/WTA.bd_rhapsody.output_raw", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "ubuntu:latest", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "samtools" + ], + "interactive" : false + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "lowmem", + "singlecpu" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/convert/from_bd_to_10x_molecular_barcode_tags/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/convert/from_bd_to_10x_molecular_barcode_tags", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_PAR_BAM+x} ]; then echo "${VIASH_PAR_BAM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bam='&'#" ; else echo "# par_bam="; fi ) +$( if [ ! -z ${VIASH_PAR_THREADS+x} ]; then echo "${VIASH_PAR_THREADS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_threads='&'#" ; else echo "# par_threads="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +# Sam tags added by BD Rhapsody Pipeline +# From: https://www.bd.com/documents/guides/user-guides/GMX_BD-Rhapsody-genomics-informatics_UG_EN.pdf +# +# ========================================================================================= +# | | Definition | +# ========================================================================================= +# | CB | A number between 1 and 96 3 (884,736) representing a unique cell label sequence | +# | | (CB = 0 when no cell label sequence is detected) | +# ----------------------------------------------------------------------------------------- +# | MR | Raw molecular identifier sequence | +# ----------------------------------------------------------------------------------------- +# | MA | RSEC-adjusted molecular identifier sequence. If not a true cell, the raw UMI is | +# | | repeated in this tag. | +# ----------------------------------------------------------------------------------------- +# | PT | T if a poly(T) tail was found in the expected position on R1, or F if poly(T) | +# | | was not found | +# ----------------------------------------------------------------------------------------- +# | CN | Indicates if a sequence is derived from a putative cell, as determined by the | +# | | cell label filtering algorithm (T: putative cell; x: invalid cell label or noise | +# | | cell) Note: You can distinguish between an invalid cell label and a noise cell | +# | | with the CB tag (invalid cell labels are 0). | +# ----------------------------------------------------------------------------------------- +# | ST | The value is 1-12, indicating the Sample Tag of the called putative cell, or M | +# | | for multiplet, or x for undetermined. | +# ========================================================================================= + + +# SAM tags added by 10X +# https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/output/bam +# ========================================================================================= +# | | Definition | +# ========================================================================================= +# | CB | Chromium cellular barcode sequence that is error-corrected and confirmed against | +# | | a list of known-good barcode sequences. For multiplex Fixed RNA Profiling, the | +# | | cellular barcode is a combination of the 10x GEM Barcode and Probe Barcode | +# | | sequences. | +# ----------------------------------------------------------------------------------------- +# | CR | Chromium cellular barcode sequence as reported by the sequencer. For multiplex | +# | | Fixed RNA Profiling, the cellular barcode is a combination of the 10x GEM | +# | | Barcode and Probe Barcode sequences. | +# ----------------------------------------------------------------------------------------- +# | CY | Chromium cellular barcode read quality. For multiplex Fixed RNA Profiling, the | +# | | cellular barcode is a combination of the 10x GEM Barcode and Probe Barcode | +# | | sequences. Phred scores as reported by sequencer. | +# ----------------------------------------------------------------------------------------- +# | UB | Chromium molecular barcode sequence that is error-corrected among other | +# | | molecular barcodes with the same cellular barcode and gene alignment. | +# ----------------------------------------------------------------------------------------- +# | UR | Chromium molecular barcode sequence as reported by the sequencer. | +# ----------------------------------------------------------------------------------------- +# | UY | Chromium molecular barcode read quality. Phred scores as reported by sequencer. | +# ----------------------------------------------------------------------------------------- +# | TR | Trimmed sequence. For the Single Cell 3' v1 chemistry, this is trailing sequence | +# | | following the UMI on Read 2. For the Single Cell 3' v2 chemistry, this is | +# | | trailing sequence following the cell and molecular barcodes on Read 1. | +# ========================================================================================= + +extra_params=() + +if [ "\\$par_bam" == "true" ]; then + extra_params+=("--bam") +fi + +cat \\\\ + <(samtools view -SH "\\$par_input") \\\\ + <(samtools view "\\$par_input" | grep "MA:Z:*" | sed "s/MA:Z:/UB:Z:/" ) | \\\\ +samtools view -Sh "\\${extra_params[@]}" -@"\\$par_threads" - > "\\$par_output" +VIASHMAIN +bash "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/convert_from_bd_to_10x_molecular_barcode_tags", + "tag" : "0.12.0" + }, + "label" : [ + "lowmem", + "singlecpu" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/convert/from_bd_to_10x_molecular_barcode_tags/nextflow.config b/target/nextflow/convert/from_bd_to_10x_molecular_barcode_tags/nextflow.config new file mode 100644 index 00000000000..d2d37f774ed --- /dev/null +++ b/target/nextflow/convert/from_bd_to_10x_molecular_barcode_tags/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'from_bd_to_10x_molecular_barcode_tags' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Convert the molecular barcode sequence SAM tag from BD format (MA) to 10X format (UB).\n' + author = 'Dries Schaumont' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/convert/from_bd_to_10x_molecular_barcode_tags/nextflow_params.yaml b/target/nextflow/convert/from_bd_to_10x_molecular_barcode_tags/nextflow_params.yaml new file mode 100644 index 00000000000..547450d3a74 --- /dev/null +++ b/target/nextflow/convert/from_bd_to_10x_molecular_barcode_tags/nextflow_params.yaml @@ -0,0 +1,9 @@ +# Arguments +input: # please fill in - example: "input.bam" +# output: "$id.$key.output.sam" +bam: false +# threads: 123 + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/convert/from_bd_to_10x_molecular_barcode_tags/nextflow_schema.json b/target/nextflow/convert/from_bd_to_10x_molecular_barcode_tags/nextflow_schema.json new file mode 100644 index 00000000000..14124b81b14 --- /dev/null +++ b/target/nextflow/convert/from_bd_to_10x_molecular_barcode_tags/nextflow_schema.json @@ -0,0 +1,102 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "from_bd_to_10x_molecular_barcode_tags", +"description": "Convert the molecular barcode sequence SAM tag from BD format (MA) to 10X format (UB).\n", +"type": "object", +"definitions": { + + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required, example: `input.bam`. Input SAM or BAM file", + "help_text": "Type: `file`, required, example: `input.bam`. Input SAM or BAM file." + + } + + + , + "output": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.output.sam`, example: `output.sam`. Output alignment file", + "help_text": "Type: `file`, default: `$id.$key.output.sam`, example: `output.sam`. Output alignment file." + , + "default": "$id.$key.output.sam" + } + + + , + "bam": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Output a BAM file", + "help_text": "Type: `boolean_true`, default: `false`. Output a BAM file." + , + "default": "False" + } + + + , + "threads": { + "type": + "integer", + "description": "Type: `integer`. Number of threads", + "help_text": "Type: `integer`. Number of threads" + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/convert/from_bdrhap_to_h5mu/.config.vsh.yaml b/target/nextflow/convert/from_bdrhap_to_h5mu/.config.vsh.yaml new file mode 100644 index 00000000000..6b153d04dbf --- /dev/null +++ b/target/nextflow/convert/from_bdrhap_to_h5mu/.config.vsh.yaml @@ -0,0 +1,181 @@ +functionality: + name: "from_bdrhap_to_h5mu" + namespace: "convert" + version: "0.12.4" + authors: + - name: "Robrecht Cannoodt" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + argument_groups: + - name: "Inputs" + arguments: + - type: "string" + name: "--id" + description: "A sample ID." + info: null + example: + - "my_id" + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "The output of a BD Rhapsody workflow." + info: null + example: + - "input_dir" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Outputs" + arguments: + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output h5mu file." + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "r_script" + path: "script.R" + is_executable: true + description: "Convert the output of a BD Rhapsody WTA pipeline to a MuData h5 file.\n" + test_resources: + - type: "python_script" + path: "run_test.py" + is_executable: true + - type: "file" + path: "resources_test/bdrhap_5kjrt/processed/WTA.bd_rhapsody.output_raw" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "ghcr.io/data-intuitive/randpy:r4.2_py3.9" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "docker" + run: + - "apt update && apt upgrade -y" + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + upgrade: true + - type: "r" + cran: + - "anndata" + bioc_force_install: false + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "lowmem" + - "singlecpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/convert/from_bdrhap_to_h5mu/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/convert/from_bdrhap_to_h5mu" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/convert/from_bdrhap_to_h5mu/from_bdrhap_to_h5mu" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/convert/from_bdrhap_to_h5mu/main.nf b/target/nextflow/convert/from_bdrhap_to_h5mu/main.nf new file mode 100644 index 00000000000..b17ff3b49c0 --- /dev/null +++ b/target/nextflow/convert/from_bdrhap_to_h5mu/main.nf @@ -0,0 +1,2801 @@ +// from_bdrhap_to_h5mu 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Robrecht Cannoodt (maintainer) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "from_bdrhap_to_h5mu", + "namespace" : "convert", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Robrecht Cannoodt", + "roles" : [ + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "robrecht@data-intuitive.com", + "github" : "rcannood", + "orcid" : "0000-0003-3641-729X", + "linkedin" : "robrechtcannoodt" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Science Engineer" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Inputs", + "arguments" : [ + { + "type" : "string", + "name" : "--id", + "description" : "A sample ID.", + "example" : [ + "my_id" + ], + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--input", + "alternatives" : [ + "-i" + ], + "description" : "The output of a BD Rhapsody workflow.", + "example" : [ + "input_dir" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Outputs", + "arguments" : [ + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "-o" + ], + "description" : "Output h5mu file.", + "example" : [ + "output.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_compression", + "example" : [ + "gzip" + ], + "required" : false, + "choices" : [ + "gzip", + "lzf" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + } + ], + "resources" : [ + { + "type" : "r_script", + "path" : "script.R", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/convert/from_bdrhap_to_h5mu/" + } + ], + "description" : "Convert the output of a BD Rhapsody WTA pipeline to a MuData h5 file.\n", + "test_resources" : [ + { + "type" : "python_script", + "path" : "run_test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/convert/from_bdrhap_to_h5mu/" + }, + { + "type" : "file", + "path" : "resources_test/bdrhap_5kjrt/processed/WTA.bd_rhapsody.output_raw", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "ghcr.io/data-intuitive/randpy:r4.2_py3.9", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "docker", + "run" : [ + "apt update && apt upgrade -y" + ] + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "mudata~=0.2.3", + "anndata~=0.9.1" + ], + "upgrade" : true + }, + { + "type" : "r", + "cran" : [ + "anndata" + ], + "bioc_force_install" : false + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "lowmem", + "singlecpu" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/convert/from_bdrhap_to_h5mu/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/convert/from_bdrhap_to_h5mu", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +cat("Loading libraries\\\\n") +options(tidyverse.quiet = TRUE) +library(tidyverse) +requireNamespace("anndata", quietly = TRUE) +requireNamespace("reticulate", quietly = TRUE) +library(assertthat) +mudata <- reticulate::import("mudata") + +## VIASH START +# The following code has been auto-generated by Viash. +# treat warnings as errors +.viash_orig_warn <- options(warn = 2) + +par <- list( + "id" = $( if [ ! -z ${VIASH_PAR_ID+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_ID" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), + "input" = $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_INPUT" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), + "output" = $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_OUTPUT" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), + "output_compression" = $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_OUTPUT_COMPRESSION" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ) +) +meta <- list( + "functionality_name" = $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo -n "'"; echo -n "$VIASH_META_FUNCTIONALITY_NAME" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), + "resources_dir" = $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo -n "'"; echo -n "$VIASH_META_RESOURCES_DIR" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), + "executable" = $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo -n "'"; echo -n "$VIASH_META_EXECUTABLE" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), + "config" = $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo -n "'"; echo -n "$VIASH_META_CONFIG" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), + "temp_dir" = $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo -n "'"; echo -n "$VIASH_META_TEMP_DIR" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), + "cpus" = $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo -n "as.integer('"; echo -n "$VIASH_META_CPUS" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_b" = $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_B" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_kb" = $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_KB" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_mb" = $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_MB" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_gb" = $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_GB" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_tb" = $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_TB" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_pb" = $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_PB" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ) +) + + +# restore original warn setting +options(.viash_orig_warn) +rm(.viash_orig_warn) + +## VIASH END + +read_metrics <- function(file) { + metric_lines <- readr::read_lines(file) + metric_lines_no_header <- metric_lines[!grepl("^##", metric_lines)] + + # parse sub data frames + group_title_regex <- "^#([^#]*)#" + group_title_ix <- grep(group_title_regex, metric_lines_no_header) + group_titles <- gsub(group_title_regex, "\\\\\\\\1", metric_lines_no_header[group_title_ix]) + group_ix_from <- group_title_ix+1 + group_ix_to <- c(group_title_ix[-1]-1, length(metric_lines_no_header)) + metric_dfs <- pmap( + list( + from = group_ix_from, + to = group_ix_to + ), + function(from, to) { + lines <- metric_lines_no_header[from:to] + lines <- lines[lines != ""] + readr::read_csv(paste0(lines, collapse = "\\\\n")) %>% + mutate(run_id = par\\$id) %>% + select(run_id, everything()) + } + ) + names(metric_dfs) <- gsub(" ", "_", tolower(group_titles)) + metric_dfs +} +cat("Reading in metric summaries\\\\n") +metrics_file <- list.files(par\\$input, pattern = "_Metrics_Summary.csv\\$", full.names = TRUE) +assert_that( + length(metrics_file) == 1, + msg = paste0("Exactly one *_Metrics_Summary.csv should be found, found ", length(metrics_file), " files instead.") +) +metric_dfs <- read_metrics(metrics_file) + +cat("Reading in count data\\\\n") +counts_file <- list.files(par\\$input, pattern = "_DBEC_MolsPerCell.csv\\$", full.names = TRUE) +if (length(counts_file) == 0) { + cat("Warning: could not find DBEC file, looking for RSEC file instead.\\\\n") + counts_file <- list.files(par\\$input, pattern = "_RSEC_MolsPerCell.csv\\$", full.names = TRUE) +} +assert_that( + length(counts_file) == 1, + msg = paste0("Exactly one *_(RSEC|DBEC)_MolsPerCell.csv should be found, found ", length(counts_file), " files instead.") +) +counts <- + readr::read_csv( + counts_file, + col_types = cols(.default = col_integer()), + comment = "#" + ) %>% + tibble::column_to_rownames("Cell_Index") %>% + as.matrix %>% + Matrix::Matrix(sparse = TRUE) + +# processing VDJ data +vdj_file <- list.files(par\\$input, pattern = "_VDJ_perCell.csv\\$", full.names = TRUE) +vdj_data <- + if (length(vdj_file) == 1) { + cat("Reading in VDJ data\\\\n") + readr::read_csv( + vdj_file, + comment = "#" + ) + } else { + NULL + } + +cat("Reading in VDJ metric summaries\\\\n") +vdj_metrics_file <- list.files(par\\$input, pattern = "_VDJ_metrics.csv\\$", full.names = TRUE) +vdj_metric_dfs <- + if (length(vdj_metrics_file) == 1) { + read_metrics(vdj_metrics_file) + } else { + list() + } + +# processing SMK data +smk_file <- list.files(par\\$input, pattern = "_Sample_Tag_Calls.csv\\$", full.names = TRUE) +smk_calls <- + if (length(smk_file) == 1) { + cat("Processing sample tags\\\\n") + readr::read_csv( + smk_file, + comment = "#" + ) + } else { + NULL + } +smk_metrics_file <- list.files(par\\$input, pattern = "_Sample_Tag_Metrics.csv\\$", full.names = TRUE) +smk_metrics <- + if (length(smk_metrics_file) == 1) { + readr::read_csv( + smk_metrics_file, + comment = "#" + ) + } else { + NULL + } + +cat("Constructing obs\\\\n") +library_id <- metric_dfs[["sequencing_quality"]]\\$Library +if (length(library_id) > 1) { + library_id <- paste(library_id[library_id != "Combined_stats"], collapse = " & ") +} + +obs <- tibble( + cell_id = rownames(counts), + run_id = par\\$id, + library_id = library_id +) + +if (!is.null(smk_calls)) { + obs <- left_join( + obs, + smk_calls %>% transmute( + cell_id = as.character(Cell_Index), + sample_tag = Sample_Tag, + sample_id = Sample_Name + ), + by = "cell_id" + ) +} else { + obs <- obs %>% mutate(sample_id = library_id) +} + +obs <- obs %>% + mutate(sample_id = ifelse(!is.na(sample_id), sample_id, run_id)) %>% + as.data.frame() %>% + column_to_rownames("cell_id") + +cat("Constructing var\\\\n") +# determine feature types of genes +var0 <- tryCatch({ + feature_types_file <- list.files(par\\$input, pattern = "feature_types.tsv\\$", full.names = TRUE) + + # abseq fasta reference has trailing info which apparently gets stripped off by the bd rhapsody pipeline + readr::read_tsv(feature_types_file) %>% + mutate( + trimmed_feature_id = gsub(" .*", "", feature_id), + i = match(feature_id, colnames(counts)), + j = match(trimmed_feature_id, colnames(counts)), + ij = ifelse(is.na(i), j, i), + final_feature_id = ifelse(!is.na(i), feature_id, trimmed_feature_id) + ) %>% + filter(!is.na(ij)) %>% + select(feature_id = final_feature_id, feature_type, reference_file) +}, error = function(e) { + cat("Feature matching error: ", e\\$message, "\\\\n", sep = "") + tibble( + feature_id = character() + ) +}) + +# in case the feature types are missing +missing_features <- tibble( + feature_id = setdiff(colnames(counts), var0\\$feature_id), + feature_type = "Gene Expression", + reference_file = NA_character_, + note = "Feature annotation file missing, assuming type is Gene Expression" +) + +var1 <- + if (nrow(missing_features) > 0) { + cat("Feature annotation file missing, assuming type is Gene Expression\\\\n") + bind_rows(var0, missing_features) %>% + slice(match(colnames(counts), feature_id)) + # Avoid nullable string columnns https://github.com/scverse/anndata/issues/679 + missing_features %>% mutate(across(reference_file, as.factor)) + } else { + var0 + } + +# create var +var <- var1 %>% + transmute(gene_ids = feature_id, gene_name = feature_id, feature_types = feature_type, reference_file) %>% + as.data.frame() %>% + column_to_rownames("gene_ids") + +cat("Constructing uns\\\\n") +names(metric_dfs) <- paste0("mapping_qc_", names(metric_dfs)) +smk_metric_dfs <- + if (!is.null(smk_metrics)) { + list(mapping_qc_smk_metrics = smk_metrics) + } else { + NULL + } +uns <- c(metric_dfs, smk_metric_dfs) + +cat("Constructing RNA (&ABC?) AnnData") +adata <- anndata::AnnData( + X = counts, + obs = obs, + var = var, + uns = uns +) + +adata_prot <- adata[, adata\\$var\\$feature_types == "Antibody Capture"] +if (ncol(adata_prot) == 0) { + adata_prot <- NULL +} +adata_rna <- adata[, adata\\$var\\$feature_types != "Antibody Capture"] + +adata_vdj <- + if (!is.null(vdj_data)) { + cat("Constructing VDJ AnnData\\\\n") + names(vdj_metric_dfs) <- paste0("mapping_qc_", names(vdj_metric_dfs)) + anndata::AnnData( + obs = vdj_data, + uns = vdj_metric_dfs, + shape = c(nrow(vdj_data), 0L) + ) + } else { + NULL + } + +cat("Constructing MuData object\\\\n") +modalities <- + list( + rna = adata_rna, + prot = adata_prot, + vdj = adata_vdj + ) +mdata <- mudata\\$MuData(modalities[!sapply(modalities, is.null)]) + +cat("Writing to h5mu file\\\\n") +mdata\\$write(par\\$output, compression=par\\$output_compression) +VIASHMAIN +Rscript "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/convert_from_bdrhap_to_h5mu", + "tag" : "0.12.0" + }, + "label" : [ + "lowmem", + "singlecpu" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/convert/from_bdrhap_to_h5mu/nextflow.config b/target/nextflow/convert/from_bdrhap_to_h5mu/nextflow.config new file mode 100644 index 00000000000..d7e032ca5aa --- /dev/null +++ b/target/nextflow/convert/from_bdrhap_to_h5mu/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'from_bdrhap_to_h5mu' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Convert the output of a BD Rhapsody WTA pipeline to a MuData h5 file.\n' + author = 'Robrecht Cannoodt' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/convert/from_bdrhap_to_h5mu/nextflow_params.yaml b/target/nextflow/convert/from_bdrhap_to_h5mu/nextflow_params.yaml new file mode 100644 index 00000000000..1ceae40b890 --- /dev/null +++ b/target/nextflow/convert/from_bdrhap_to_h5mu/nextflow_params.yaml @@ -0,0 +1,11 @@ +# Inputs +id: # please fill in - example: "my_id" +input: # please fill in - example: "input_dir/" + +# Outputs +# output: "$id.$key.output.h5mu" +# output_compression: "gzip" + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/convert/from_bdrhap_to_h5mu/nextflow_schema.json b/target/nextflow/convert/from_bdrhap_to_h5mu/nextflow_schema.json new file mode 100644 index 00000000000..58616764b07 --- /dev/null +++ b/target/nextflow/convert/from_bdrhap_to_h5mu/nextflow_schema.json @@ -0,0 +1,117 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "from_bdrhap_to_h5mu", +"description": "Convert the output of a BD Rhapsody WTA pipeline to a MuData h5 file.\n", +"type": "object", +"definitions": { + + + + "inputs" : { + "title": "Inputs", + "type": "object", + "description": "No description", + "properties": { + + + "id": { + "type": + "string", + "description": "Type: `string`, required, example: `my_id`. A sample ID", + "help_text": "Type: `string`, required, example: `my_id`. A sample ID." + + } + + + , + "input": { + "type": + "string", + "description": "Type: `file`, required, example: `input_dir/`. The output of a BD Rhapsody workflow", + "help_text": "Type: `file`, required, example: `input_dir/`. The output of a BD Rhapsody workflow." + + } + + +} +}, + + + "outputs" : { + "title": "Outputs", + "type": "object", + "description": "No description", + "properties": { + + + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file", + "help_text": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file." + , + "default": "$id.$key.output.h5mu" + } + + + , + "output_compression": { + "type": + "string", + "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. ", + "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. ", + "enum": ["gzip", "lzf"] + + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/inputs" + }, + + { + "$ref": "#/definitions/outputs" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/convert/from_cellranger_multi_to_h5mu/.config.vsh.yaml b/target/nextflow/convert/from_cellranger_multi_to_h5mu/.config.vsh.yaml new file mode 100644 index 00000000000..ab95a739da8 --- /dev/null +++ b/target/nextflow/convert/from_cellranger_multi_to_h5mu/.config.vsh.yaml @@ -0,0 +1,190 @@ +functionality: + name: "from_cellranger_multi_to_h5mu" + namespace: "convert" + version: "0.12.4" + authors: + - name: "Dries Schaumont" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input folder. Must contain the output from a cellranger multi run." + info: null + example: + - "input_dir_containing_modalities" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output h5mu file." + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--uns_metrics" + description: "Name of the .uns slot under which to QC metrics (if any)." + info: null + default: + - "metrics_cellranger" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Converts the output from cellranger multi to a single .h5mu file.\n\ + By default, will map the following library type names to modality names:\n -\ + \ Gene Expression: rna\n - Peaks: atac\n - Antibody Capture: prot\n - VDJ:\ + \ vdj\n - VDJ-T: vdj_t\n - VDJ-B: vdj_b\n - CRISPR Guide Capture: crispr\n\ + \ - Multiplexing Capture: hashing\n\nOther library types have their whitepace\ + \ removed and dashes replaced by\nunderscores to generate the modality name.\n\ + \nCurrently does not allow parsing the output from cell barcode demultiplexing.\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/10x_5k_anticmv" + - type: "file" + path: "resources_test/10x_5k_lung_crispr" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "scanpy~=1.9.5" + - "scirpy~=0.11.1" + - "pandas~=2.0.0" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "lowmem" + - "singlecpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/convert/from_cellranger_multi_to_h5mu/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/convert/from_cellranger_multi_to_h5mu" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/convert/from_cellranger_multi_to_h5mu/from_cellranger_multi_to_h5mu" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/convert/from_cellranger_multi_to_h5mu/main.nf b/target/nextflow/convert/from_cellranger_multi_to_h5mu/main.nf new file mode 100644 index 00000000000..7dd685c75ea --- /dev/null +++ b/target/nextflow/convert/from_cellranger_multi_to_h5mu/main.nf @@ -0,0 +1,2744 @@ +// from_cellranger_multi_to_h5mu 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Dries Schaumont (maintainer) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "from_cellranger_multi_to_h5mu", + "namespace" : "convert", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Dries Schaumont", + "roles" : [ + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "dries@data-intuitive.com", + "github" : "DriesSchaumont", + "orcid" : "0000-0002-4389-0440", + "linkedin" : "dries-schaumont" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Scientist" + } + ] + } + } + ], + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "alternatives" : [ + "-i" + ], + "description" : "Input folder. Must contain the output from a cellranger multi run.", + "example" : [ + "input_dir_containing_modalities" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "-o" + ], + "description" : "Output h5mu file.", + "example" : [ + "output.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_compression", + "example" : [ + "gzip" + ], + "required" : false, + "choices" : [ + "gzip", + "lzf" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--uns_metrics", + "description" : "Name of the .uns slot under which to QC metrics (if any).", + "default" : [ + "metrics_cellranger" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/convert/from_cellranger_multi_to_h5mu/" + }, + { + "type" : "file", + "path" : "src/utils/setup_logger.py", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "description" : "Converts the output from cellranger multi to a single .h5mu file.\nBy default, will map the following library type names to modality names:\n - Gene Expression: rna\n - Peaks: atac\n - Antibody Capture: prot\n - VDJ: vdj\n - VDJ-T: vdj_t\n - VDJ-B: vdj_b\n - CRISPR Guide Capture: crispr\n - Multiplexing Capture: hashing\n\nOther library types have their whitepace removed and dashes replaced by\nunderscores to generate the modality name.\n\nCurrently does not allow parsing the output from cell barcode demultiplexing.\n", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/convert/from_cellranger_multi_to_h5mu/" + }, + { + "type" : "file", + "path" : "resources_test/10x_5k_anticmv", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + }, + { + "type" : "file", + "path" : "resources_test/10x_5k_lung_crispr", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.10-slim", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "procps" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "mudata~=0.2.3", + "anndata~=0.9.1", + "scanpy~=1.9.5", + "scirpy~=0.11.1", + "pandas~=2.0.0" + ], + "upgrade" : true + } + ], + "test_setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "viashpy==0.5.0" + ], + "upgrade" : true + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "lowmem", + "singlecpu" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/convert/from_cellranger_multi_to_h5mu/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/convert/from_cellranger_multi_to_h5mu", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +from pathlib import Path +import sys +import scanpy +import pandas as pd +import mudata +from scirpy.io import read_10x_vdj +from collections import defaultdict +from functools import partial + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'uns_metrics': $( if [ ! -z ${VIASH_PAR_UNS_METRICS+x} ]; then echo "r'${VIASH_PAR_UNS_METRICS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +## VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +POSSIBLE_LIBRARY_TYPES = ('vdj_t', 'vdj_b', 'vdj_t_gd', 'count') + +FEATURE_TYPES_NAMES = { + "Gene Expression": "rna", + "Peaks": "atac", + "Antibody Capture": "prot", + "VDJ": "vdj", + "VDJ-T": "vdj_t", + "VDJ-B": "vdj_b", + "CRISPR Guide Capture": "gdo", + "Multiplexing Capture": "hto" + } + +def gather_input_data(dir: Path): + if not dir.is_dir(): + raise ValueError("Specified input is not a directory.") + folder_contents = list(dir.iterdir()) + config = dir / 'config.csv' + if config not in folder_contents: + logger.warning('Config.csv not found in input directory, this folder might not be a valid cellranger multi output.') + + required_subfolders = [dir / subfolder_name for subfolder_name in ('multi', 'per_sample_outs')] + found_input = {key_: None for key_ in POSSIBLE_LIBRARY_TYPES + ('metrics_summary',)} + for required_subfolder in required_subfolders: + if not required_subfolder in folder_contents: + raise ValueError(f"Input folder must contain the subfolder {required_subfolder} please make " + "sure that the specified input folder is a valid cellranger multi output.") + + multi_dir = dir / 'multi' + for library_type in multi_dir.iterdir(): + if not library_type.is_dir(): + logger.warning("%s is not a directory. Contents of the multi folder " + "must be directories to be recognized as valid input data", + library_type) + continue + if library_type.name not in POSSIBLE_LIBRARY_TYPES: + raise ValueError(f"Contents of the 'multi' folder must be found one of the following: {','.join(POSSIBLE_LIBRARY_TYPES)}.") + + found_input[library_type.name] = library_type + + per_sample_outs_dir = dir / 'per_sample_outs' + for file_glob in ('*/metrics_summary.csv', '*/count/feature_reference.csv', + '*/count/crispr_analysis/perturbation_efficiencies_by_feature.csv', + '*/count/crispr_analysis/perturbation_efficiencies_by_target.csv'): + found_files = list(per_sample_outs_dir.glob(file_glob)) + if len(found_files) > 1: + raise ValueError(f"Found more than one file for glob '{file_glob}' file. " + "This component currently only supports parsing cellranger multi output for one sample.") + file_name = Path(file_glob).name.removesuffix('.csv') + found_input[file_name] = found_files[0] if found_files else None + + return found_input + + +def proces_perturbation(key_name: str, mudata: mudata.MuData, efficiency_file: Path): + assert 'gdo' in mudata.mod + eff_df = pd.read_csv(efficiency_file, index_col="Perturbation", sep=",", decimal=".", quotechar='"') + mudata.mod['gdo'].uns[key_name] = eff_df + return mudata + +def process_feature_reference(mudata: mudata.MuData, efficiency_file: Path): + df = pd.read_csv(efficiency_file, index_col="id", sep=",", decimal=".", quotechar='"') + assert 'feature_type' in df.columns, "Columns 'feature_type' should be present in features_reference file." + feature_types = df['feature_type'] + if set(feature_types) - set(FEATURE_TYPES_NAMES): + raise ValueError("Not all feature types present in the features_reference file are supported by this component.") + for feature_type in feature_types: + modality = FEATURE_TYPES_NAMES[feature_type] + subset_df = df.loc[df['feature_type'] == feature_type] + mudata.mod[modality].uns['feature_reference'] = subset_df + return mudata + +def process_counts(counts_folder: Path): + counts_matrix_file = counts_folder / "raw_feature_bc_matrix.h5" + logger.info("Reading %s.", counts_matrix_file) + adata = scanpy.read_10x_h5(counts_matrix_file, gex_only=False) + + # set the gene ids as var_names + logger.info("Renaming var columns") + adata.var = adata.var\\\\ + .rename_axis("gene_symbol")\\\\ + .reset_index()\\\\ + .set_index("gene_ids") + + # generate output + logger.info("Convert to mudata") + + def modality_name_factory(library_type): + return ("".join(library_type.replace("-", "_").split())).lower() + + feature_types = defaultdict(modality_name_factory, FEATURE_TYPES_NAMES) + return mudata.MuData(adata, feature_types_names=feature_types) + +def process_metrics_summary(mudata: mudata.MuData, metrics_file: Path): + def read_percentage(val): + try: + return float(val.strip('%')) / 100 + except (AttributeError, ValueError): + return val + + metrics_summary = pd.read_csv(metrics_file, + decimal=".", + quotechar='"', + thousands=",").applymap(read_percentage) + + mudata.uns[par["uns_metrics"]] = metrics_summary + for colname, coldata in metrics_summary.items(): + try: + new_column = coldata.astype(str, copy=True).astype({colname: "category"}) + metrics_summary[colname] = new_column + except (ValueError, TypeError): + logger.warning(f"Could not store column {colname} from metrics.") + pass + return mudata + +def process_vdj(mudata: mudata.MuData, vdj_folder_path: Path): + # https://scverse.org/scirpy/latest/generated/scirpy.io.read_10x_vdj.html#scirpy-io-read-10x-vdj + # According to docs, using the json is preferred as this file includes intron info. + all_config_json_file = vdj_folder_path / "all_contig_annotations.json" + vdj_anndata = read_10x_vdj(all_config_json_file) + vdj_type = vdj_folder_path.name + mudata.mod[vdj_type] = vdj_anndata + return mudata + +def get_modalities(input_data): + dispatcher = { + 'vdj_t': process_vdj, + 'vdj_b': process_vdj, + 'vdj_t_gd': process_vdj, + 'metrics_summary': process_metrics_summary, + 'feature_reference': process_feature_reference, + 'perturbation_efficiencies_by_feature': partial(proces_perturbation, 'perturbation_efficiencies_by_feature'), + 'perturbation_efficiencies_by_target': partial(proces_perturbation, 'perturbation_efficiencies_by_target'), + } + mudata_file = process_counts(input_data['count']) + for modality_name, modality_data_path in input_data.items(): + if modality_name == "count" or not modality_data_path: + continue + try: + parser_function = dispatcher[modality_name] + except KeyError as e: + raise ValueError("This component does not support the " + f"parsing of the '{modality_name}' yet.") from e + mudata_file = parser_function(mudata_file, modality_data_path) + return mudata_file + +def main(): + cellranger_multi_dir = Path(par["input"]) + input_data = gather_input_data(cellranger_multi_dir) + result = get_modalities(input_data) + logger.info("Writing %s", par["output"]) + result.write_h5mu(par["output"], compression=par["output_compression"]) + +if __name__ == "__main__": + main() +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/convert_from_cellranger_multi_to_h5mu", + "tag" : "0.12.0" + }, + "label" : [ + "lowmem", + "singlecpu" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/convert/from_cellranger_multi_to_h5mu/nextflow.config b/target/nextflow/convert/from_cellranger_multi_to_h5mu/nextflow.config new file mode 100644 index 00000000000..59e1bd5b075 --- /dev/null +++ b/target/nextflow/convert/from_cellranger_multi_to_h5mu/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'from_cellranger_multi_to_h5mu' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Converts the output from cellranger multi to a single .h5mu file.\nBy default, will map the following library type names to modality names:\n - Gene Expression: rna\n - Peaks: atac\n - Antibody Capture: prot\n - VDJ: vdj\n - VDJ-T: vdj_t\n - VDJ-B: vdj_b\n - CRISPR Guide Capture: crispr\n - Multiplexing Capture: hashing\n\nOther library types have their whitepace removed and dashes replaced by\nunderscores to generate the modality name.\n\nCurrently does not allow parsing the output from cell barcode demultiplexing.\n' + author = 'Dries Schaumont' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/convert/from_cellranger_multi_to_h5mu/nextflow_params.yaml b/target/nextflow/convert/from_cellranger_multi_to_h5mu/nextflow_params.yaml new file mode 100644 index 00000000000..c336d480ec8 --- /dev/null +++ b/target/nextflow/convert/from_cellranger_multi_to_h5mu/nextflow_params.yaml @@ -0,0 +1,9 @@ +# Arguments +input: # please fill in - example: "input_dir_containing_modalities" +# output: "$id.$key.output.h5mu" +# output_compression: "gzip" +uns_metrics: "metrics_cellranger" + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/convert/from_cellranger_multi_to_h5mu/nextflow_schema.json b/target/nextflow/convert/from_cellranger_multi_to_h5mu/nextflow_schema.json new file mode 100644 index 00000000000..bbe7a4d4618 --- /dev/null +++ b/target/nextflow/convert/from_cellranger_multi_to_h5mu/nextflow_schema.json @@ -0,0 +1,104 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "from_cellranger_multi_to_h5mu", +"description": "Converts the output from cellranger multi to a single .h5mu file.\nBy default, will map the following library type names to modality names:\n - Gene Expression: rna\n - Peaks: atac\n - Antibody Capture: prot\n - VDJ: vdj\n - VDJ-T: vdj_t\n - VDJ-B: vdj_b\n - CRISPR Guide Capture: crispr\n - Multiplexing Capture: hashing\n\nOther library types have their whitepace removed and dashes replaced by\nunderscores to generate the modality name.\n\nCurrently does not allow parsing the output from cell barcode demultiplexing.\n", +"type": "object", +"definitions": { + + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required, example: `input_dir_containing_modalities`. Input folder", + "help_text": "Type: `file`, required, example: `input_dir_containing_modalities`. Input folder. Must contain the output from a cellranger multi run." + + } + + + , + "output": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file", + "help_text": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file." + , + "default": "$id.$key.output.h5mu" + } + + + , + "output_compression": { + "type": + "string", + "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. ", + "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. ", + "enum": ["gzip", "lzf"] + + + } + + + , + "uns_metrics": { + "type": + "string", + "description": "Type: `string`, default: `metrics_cellranger`. Name of the ", + "help_text": "Type: `string`, default: `metrics_cellranger`. Name of the .uns slot under which to QC metrics (if any)." + , + "default": "metrics_cellranger" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/convert/from_cellranger_multi_to_h5mu/setup_logger.py b/target/nextflow/convert/from_cellranger_multi_to_h5mu/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/nextflow/convert/from_cellranger_multi_to_h5mu/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/nextflow/convert/from_h5ad_to_h5mu/.config.vsh.yaml b/target/nextflow/convert/from_h5ad_to_h5mu/.config.vsh.yaml new file mode 100644 index 00000000000..93c279f3d96 --- /dev/null +++ b/target/nextflow/convert/from_h5ad_to_h5mu/.config.vsh.yaml @@ -0,0 +1,177 @@ +functionality: + name: "from_h5ad_to_h5mu" + namespace: "convert" + version: "0.12.4" + authors: + - name: "Dries De Maeyer" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "ddemaeyer@gmail.com" + github: "ddemaeyer" + linkedin: "dries-de-maeyer-b46a814" + organizations: + - name: "Janssen Pharmaceuticals" + href: "https://www.janssen.com" + role: "Principal Scientist" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input h5ad files" + info: null + default: + - "input.h5ad" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output MuData file." + info: null + default: + - "output.h5mu" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Converts a single layer h5ad file into a single MuData object\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.9-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "lowmem" + - "singlecpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/convert/from_h5ad_to_h5mu/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/convert/from_h5ad_to_h5mu" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/convert/from_h5ad_to_h5mu/from_h5ad_to_h5mu" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/convert/from_h5ad_to_h5mu/main.nf b/target/nextflow/convert/from_h5ad_to_h5mu/main.nf new file mode 100644 index 00000000000..03e89999922 --- /dev/null +++ b/target/nextflow/convert/from_h5ad_to_h5mu/main.nf @@ -0,0 +1,2596 @@ +// from_h5ad_to_h5mu 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Dries De Maeyer (maintainer) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "from_h5ad_to_h5mu", + "namespace" : "convert", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Dries De Maeyer", + "roles" : [ + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "ddemaeyer@gmail.com", + "github" : "ddemaeyer", + "linkedin" : "dries-de-maeyer-b46a814" + }, + "organizations" : [ + { + "name" : "Janssen Pharmaceuticals", + "href" : "https://www.janssen.com", + "role" : "Principal Scientist" + } + ] + } + } + ], + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "alternatives" : [ + "-i" + ], + "description" : "Input h5ad files", + "default" : [ + "input.h5ad" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--modality", + "default" : [ + "rna" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "-o" + ], + "description" : "Output MuData file.", + "default" : [ + "output.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_compression", + "example" : [ + "gzip" + ], + "required" : false, + "choices" : [ + "gzip", + "lzf" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/convert/from_h5ad_to_h5mu/" + }, + { + "type" : "file", + "path" : "src/utils/setup_logger.py", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "description" : "Converts a single layer h5ad file into a single MuData object\n", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/convert/from_h5ad_to_h5mu/" + }, + { + "type" : "file", + "path" : "resources_test/pbmc_1k_protein_v3", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.9-slim", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "procps" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "mudata~=0.2.3", + "anndata~=0.9.1" + ], + "upgrade" : true + } + ], + "test_setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "viashpy==0.5.0" + ], + "upgrade" : true + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "lowmem", + "singlecpu" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/convert/from_h5ad_to_h5mu/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/convert/from_h5ad_to_h5mu", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +import mudata as mu +import anndata +import sys + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'.split(':')"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'.split(':')"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +## VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +assert len(par["input"]) == len(par["modality"]), "Number of input files should be the same length as the number of modalities" + +logger.info("Reading input files") +data = { key: anndata.read_h5ad(path) for key, path in zip(par["modality"], par["input"]) } + +try: + data.var_names_make_unique() +except: + pass + +logger.info("Converting to mudata") +mudata = mu.MuData(data) + +try: + mudata.var_names_make_unique() +except: + pass + +logger.info("Writing to %s.", par['output']) +mudata.write_h5mu(par["output"], compression=par["output_compression"]) + +logger.info("Finished") +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/convert_from_h5ad_to_h5mu", + "tag" : "0.12.0" + }, + "label" : [ + "lowmem", + "singlecpu" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/convert/from_h5ad_to_h5mu/nextflow.config b/target/nextflow/convert/from_h5ad_to_h5mu/nextflow.config new file mode 100644 index 00000000000..fef430246d9 --- /dev/null +++ b/target/nextflow/convert/from_h5ad_to_h5mu/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'from_h5ad_to_h5mu' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Converts a single layer h5ad file into a single MuData object\n' + author = 'Dries De Maeyer' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/convert/from_h5ad_to_h5mu/nextflow_params.yaml b/target/nextflow/convert/from_h5ad_to_h5mu/nextflow_params.yaml new file mode 100644 index 00000000000..56e4002b284 --- /dev/null +++ b/target/nextflow/convert/from_h5ad_to_h5mu/nextflow_params.yaml @@ -0,0 +1,9 @@ +# Arguments +input: # please fill in - example: ["input.h5ad"] +modality: ["rna"] +# output: "$id.$key.output.h5mu" +# output_compression: "gzip" + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/convert/from_h5ad_to_h5mu/nextflow_schema.json b/target/nextflow/convert/from_h5ad_to_h5mu/nextflow_schema.json new file mode 100644 index 00000000000..8c302ca4a51 --- /dev/null +++ b/target/nextflow/convert/from_h5ad_to_h5mu/nextflow_schema.json @@ -0,0 +1,105 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "from_h5ad_to_h5mu", +"description": "Converts a single layer h5ad file into a single MuData object\n", +"type": "object", +"definitions": { + + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: List of `file`, required, default: `input.h5ad`, multiple_sep: `\":\"`. Input h5ad files", + "help_text": "Type: List of `file`, required, default: `input.h5ad`, multiple_sep: `\":\"`. Input h5ad files" + , + "default": "input.h5ad" + } + + + , + "modality": { + "type": + "string", + "description": "Type: List of `string`, default: `rna`, multiple_sep: `\":\"`. ", + "help_text": "Type: List of `string`, default: `rna`, multiple_sep: `\":\"`. " + , + "default": "rna" + } + + + , + "output": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.output.h5mu`. Output MuData file", + "help_text": "Type: `file`, default: `$id.$key.output.h5mu`. Output MuData file." + , + "default": "$id.$key.output.h5mu" + } + + + , + "output_compression": { + "type": + "string", + "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. ", + "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. ", + "enum": ["gzip", "lzf"] + + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/convert/from_h5ad_to_h5mu/setup_logger.py b/target/nextflow/convert/from_h5ad_to_h5mu/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/nextflow/convert/from_h5ad_to_h5mu/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/nextflow/convert/from_h5mu_to_h5ad/.config.vsh.yaml b/target/nextflow/convert/from_h5mu_to_h5ad/.config.vsh.yaml new file mode 100644 index 00000000000..4bf16bd4493 --- /dev/null +++ b/target/nextflow/convert/from_h5mu_to_h5ad/.config.vsh.yaml @@ -0,0 +1,182 @@ +functionality: + name: "from_h5mu_to_h5ad" + namespace: "convert" + version: "0.12.4" + authors: + - name: "Robrecht Cannoodt" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input MuData file" + info: null + default: + - "input.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output AnnData file." + info: null + default: + - "output.h5ad" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the final h5ad object." + info: null + default: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Converts a h5mu file into a h5ad file.\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.9-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "lowmem" + - "singlecpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/convert/from_h5mu_to_h5ad/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/convert/from_h5mu_to_h5ad" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/convert/from_h5mu_to_h5ad/from_h5mu_to_h5ad" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/convert/from_h5mu_to_h5ad/main.nf b/target/nextflow/convert/from_h5mu_to_h5ad/main.nf new file mode 100644 index 00000000000..2ede870aeee --- /dev/null +++ b/target/nextflow/convert/from_h5mu_to_h5ad/main.nf @@ -0,0 +1,2592 @@ +// from_h5mu_to_h5ad 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Robrecht Cannoodt (maintainer) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "from_h5mu_to_h5ad", + "namespace" : "convert", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Robrecht Cannoodt", + "roles" : [ + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "robrecht@data-intuitive.com", + "github" : "rcannood", + "orcid" : "0000-0003-3641-729X", + "linkedin" : "robrechtcannoodt" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Science Engineer" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + } + ], + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "alternatives" : [ + "-i" + ], + "description" : "Input MuData file", + "default" : [ + "input.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--modality", + "default" : [ + "rna" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "-o" + ], + "description" : "Output AnnData file.", + "default" : [ + "output.h5ad" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_compression", + "description" : "The compression format to be used on the final h5ad object.", + "default" : [ + "gzip" + ], + "required" : false, + "choices" : [ + "gzip", + "lzf" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/convert/from_h5mu_to_h5ad/" + }, + { + "type" : "file", + "path" : "src/utils/setup_logger.py", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "description" : "Converts a h5mu file into a h5ad file.\n", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/convert/from_h5mu_to_h5ad/" + }, + { + "type" : "file", + "path" : "resources_test/pbmc_1k_protein_v3", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.9-slim", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "procps" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "mudata~=0.2.3", + "anndata~=0.9.1" + ], + "upgrade" : true + } + ], + "test_setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "viashpy==0.5.0" + ], + "upgrade" : true + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "lowmem", + "singlecpu" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/convert/from_h5mu_to_h5ad/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/convert/from_h5mu_to_h5ad", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +import mudata as mu +import sys + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +## VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +# TODO: Merge modalities into one layer + +logger.info("Reading input h5mu file") +dat = mu.read_h5mu(par["input"]) + +logger.info("Converting to h5ad") +adat = dat.mod[par["modality"]] + +logger.info("Writing to %s.", par['output']) +adat.write_h5ad(par["output"], compression=par["output_compression"]) + +logger.info("Finished") +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/convert_from_h5mu_to_h5ad", + "tag" : "0.12.0" + }, + "label" : [ + "lowmem", + "singlecpu" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/convert/from_h5mu_to_h5ad/nextflow.config b/target/nextflow/convert/from_h5mu_to_h5ad/nextflow.config new file mode 100644 index 00000000000..d73324a8309 --- /dev/null +++ b/target/nextflow/convert/from_h5mu_to_h5ad/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'from_h5mu_to_h5ad' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Converts a h5mu file into a h5ad file.\n' + author = 'Robrecht Cannoodt' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/convert/from_h5mu_to_h5ad/nextflow_params.yaml b/target/nextflow/convert/from_h5mu_to_h5ad/nextflow_params.yaml new file mode 100644 index 00000000000..5a88b692a83 --- /dev/null +++ b/target/nextflow/convert/from_h5mu_to_h5ad/nextflow_params.yaml @@ -0,0 +1,9 @@ +# Arguments +input: # please fill in - example: "input.h5mu" +modality: "rna" +# output: "$id.$key.output.h5ad" +output_compression: "gzip" + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/convert/from_h5mu_to_h5ad/nextflow_schema.json b/target/nextflow/convert/from_h5mu_to_h5ad/nextflow_schema.json new file mode 100644 index 00000000000..b192426b3e3 --- /dev/null +++ b/target/nextflow/convert/from_h5mu_to_h5ad/nextflow_schema.json @@ -0,0 +1,106 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "from_h5mu_to_h5ad", +"description": "Converts a h5mu file into a h5ad file.\n", +"type": "object", +"definitions": { + + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required, default: `input.h5mu`. Input MuData file", + "help_text": "Type: `file`, required, default: `input.h5mu`. Input MuData file" + , + "default": "input.h5mu" + } + + + , + "modality": { + "type": + "string", + "description": "Type: `string`, default: `rna`. ", + "help_text": "Type: `string`, default: `rna`. " + , + "default": "rna" + } + + + , + "output": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.output.h5ad`. Output AnnData file", + "help_text": "Type: `file`, default: `$id.$key.output.h5ad`. Output AnnData file." + , + "default": "$id.$key.output.h5ad" + } + + + , + "output_compression": { + "type": + "string", + "description": "Type: `string`, default: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the final h5ad object", + "help_text": "Type: `string`, default: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the final h5ad object.", + "enum": ["gzip", "lzf"] + + , + "default": "gzip" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/convert/from_h5mu_to_h5ad/setup_logger.py b/target/nextflow/convert/from_h5mu_to_h5ad/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/nextflow/convert/from_h5mu_to_h5ad/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/nextflow/convert/velocyto_to_h5mu/.config.vsh.yaml b/target/nextflow/convert/velocyto_to_h5mu/.config.vsh.yaml new file mode 100644 index 00000000000..61a1cb5d5ff --- /dev/null +++ b/target/nextflow/convert/velocyto_to_h5mu/.config.vsh.yaml @@ -0,0 +1,255 @@ +functionality: + name: "velocyto_to_h5mu" + namespace: "convert" + version: "0.12.4" + authors: + - name: "Dries Schaumont" + roles: + - "maintainer" + - "author" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + - name: "Robrecht Cannoodt" + roles: + - "author" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + - name: "Angela Oliveira Pisco" + roles: + - "contributor" + info: + role: "Contributor" + links: + github: "aopisco" + orcid: "0000-0003-0142-2355" + linkedin: "aopisco" + organizations: + - name: "Insitro" + href: "https://insitro.com" + role: "Director of Computational Biology" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + argument_groups: + - name: "Inputs" + arguments: + - type: "file" + name: "--input_loom" + description: "Path to the input loom file." + info: null + example: + - "input.loom" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--input_h5mu" + description: "If a MuData file is provided," + info: null + example: + - "input.h5mu" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + description: "The name of the modality to operate on." + info: null + default: + - "rna_velocity" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Outputs" + arguments: + - type: "file" + name: "--output" + description: "Path to the output MuData file." + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--layer_spliced" + description: "Output layer for the spliced reads." + info: null + default: + - "velo_spliced" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--layer_unspliced" + description: "Output layer for the unspliced reads." + info: null + default: + - "velo_unspliced" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--layer_ambiguous" + description: "Output layer for the ambiguous reads." + info: null + default: + - "velo_ambiguous" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + description: "Convert a velocyto loom file to a h5mu file.\n\nIf an input h5mu file\ + \ is also provided, the velocity\nh5ad object will get added to that h5mu instead.\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/cellranger_tiny_fastq" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "scanpy~=1.9.5" + - "loompy" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "lowmem" + - "lowcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/velocity/velocyto_to_h5mu/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/convert/velocyto_to_h5mu" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/convert/velocyto_to_h5mu/velocyto_to_h5mu" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/convert/velocyto_to_h5mu/main.nf b/target/nextflow/convert/velocyto_to_h5mu/main.nf new file mode 100644 index 00000000000..42a73828637 --- /dev/null +++ b/target/nextflow/convert/velocyto_to_h5mu/main.nf @@ -0,0 +1,2693 @@ +// velocyto_to_h5mu 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Dries Schaumont (maintainer, author) +// * Robrecht Cannoodt (author) +// * Angela Oliveira Pisco (contributor) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "velocyto_to_h5mu", + "namespace" : "convert", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Dries Schaumont", + "roles" : [ + "maintainer", + "author" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "dries@data-intuitive.com", + "github" : "DriesSchaumont", + "orcid" : "0000-0002-4389-0440", + "linkedin" : "dries-schaumont" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Scientist" + } + ] + } + }, + { + "name" : "Robrecht Cannoodt", + "roles" : [ + "author" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "robrecht@data-intuitive.com", + "github" : "rcannood", + "orcid" : "0000-0003-3641-729X", + "linkedin" : "robrechtcannoodt" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Science Engineer" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + }, + { + "name" : "Angela Oliveira Pisco", + "roles" : [ + "contributor" + ], + "info" : { + "role" : "Contributor", + "links" : { + "github" : "aopisco", + "orcid" : "0000-0003-0142-2355", + "linkedin" : "aopisco" + }, + "organizations" : [ + { + "name" : "Insitro", + "href" : "https://insitro.com", + "role" : "Director of Computational Biology" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Inputs", + "arguments" : [ + { + "type" : "file", + "name" : "--input_loom", + "description" : "Path to the input loom file.", + "example" : [ + "input.loom" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--input_h5mu", + "description" : "If a MuData file is provided,", + "example" : [ + "input.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--modality", + "description" : "The name of the modality to operate on.", + "default" : [ + "rna_velocity" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Outputs", + "arguments" : [ + { + "type" : "file", + "name" : "--output", + "description" : "Path to the output MuData file.", + "example" : [ + "output.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_compression", + "description" : "The compression format to be used on the output h5mu object.", + "example" : [ + "gzip" + ], + "required" : false, + "choices" : [ + "gzip", + "lzf" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--layer_spliced", + "description" : "Output layer for the spliced reads.", + "default" : [ + "velo_spliced" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--layer_unspliced", + "description" : "Output layer for the unspliced reads.", + "default" : [ + "velo_unspliced" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--layer_ambiguous", + "description" : "Output layer for the ambiguous reads.", + "default" : [ + "velo_ambiguous" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/velocity/velocyto_to_h5mu/" + } + ], + "description" : "Convert a velocyto loom file to a h5mu file.\n\nIf an input h5mu file is also provided, the velocity\nh5ad object will get added to that h5mu instead.\n", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/velocity/velocyto_to_h5mu/" + }, + { + "type" : "file", + "path" : "resources_test/cellranger_tiny_fastq", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.10-slim", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "procps" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "mudata~=0.2.3", + "anndata~=0.9.1", + "scanpy~=1.9.5", + "loompy" + ], + "upgrade" : true + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "lowmem", + "lowcpu" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/velocity/velocyto_to_h5mu/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/convert/velocyto_to_h5mu", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +import anndata as ad +import mudata as mu + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input_loom': $( if [ ! -z ${VIASH_PAR_INPUT_LOOM+x} ]; then echo "r'${VIASH_PAR_INPUT_LOOM//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'input_h5mu': $( if [ ! -z ${VIASH_PAR_INPUT_H5MU+x} ]; then echo "r'${VIASH_PAR_INPUT_H5MU//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'layer_spliced': $( if [ ! -z ${VIASH_PAR_LAYER_SPLICED+x} ]; then echo "r'${VIASH_PAR_LAYER_SPLICED//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'layer_unspliced': $( if [ ! -z ${VIASH_PAR_LAYER_UNSPLICED+x} ]; then echo "r'${VIASH_PAR_LAYER_UNSPLICED//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'layer_ambiguous': $( if [ ! -z ${VIASH_PAR_LAYER_AMBIGUOUS+x} ]; then echo "r'${VIASH_PAR_LAYER_AMBIGUOUS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +## VIASH END + +print("Parameters:", par, flush=True) + +print("Reading AnnData from loom", flush=True) +adata_in = ad.read_loom(par["input_loom"]) +adata_in.var_names = adata_in.var["Accession"] + +print("Creating clean AnnData", flush=True) +adata = ad.AnnData( + obs=adata_in.obs[[]], + var=adata_in.var[[]], + layers={ + par["layer_spliced"]: adata_in.layers["spliced"], + par["layer_unspliced"]: adata_in.layers["unspliced"], + par["layer_ambiguous"]: adata_in.layers["ambiguous"] + } +) + +if par["input_h5mu"]: + print("Received input h5mu to read", flush=True) + mdata = mu.read_h5mu(par["input_h5mu"]) + + print(f"Storing AnnData in modality {par['modality']}", flush=True) + mdata.mod[par["modality"]] = adata +else: + print("Creating h5mu from scratch", flush=True) + mdata = mu.MuData({par["modality"]: adata}) + +print("Resulting mudata:", mdata, flush=True) + +print("Writing h5mu to file", flush=True) +mdata.write_h5mu(par["output"], compression=par["output_compression"]) +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/convert_velocyto_to_h5mu", + "tag" : "0.12.0" + }, + "label" : [ + "lowmem", + "lowcpu" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/convert/velocyto_to_h5mu/nextflow.config b/target/nextflow/convert/velocyto_to_h5mu/nextflow.config new file mode 100644 index 00000000000..520bd2417d6 --- /dev/null +++ b/target/nextflow/convert/velocyto_to_h5mu/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'velocyto_to_h5mu' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Convert a velocyto loom file to a h5mu file.\n\nIf an input h5mu file is also provided, the velocity\nh5ad object will get added to that h5mu instead.\n' + author = 'Dries Schaumont, Robrecht Cannoodt, Angela Oliveira Pisco' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/convert/velocyto_to_h5mu/nextflow_params.yaml b/target/nextflow/convert/velocyto_to_h5mu/nextflow_params.yaml new file mode 100644 index 00000000000..a3f43db0942 --- /dev/null +++ b/target/nextflow/convert/velocyto_to_h5mu/nextflow_params.yaml @@ -0,0 +1,15 @@ +# Inputs +input_loom: # please fill in - example: "input.loom" +# input_h5mu: "input.h5mu" +modality: "rna_velocity" + +# Outputs +# output: "$id.$key.output.h5mu" +# output_compression: "gzip" +layer_spliced: "velo_spliced" +layer_unspliced: "velo_unspliced" +layer_ambiguous: "velo_ambiguous" + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/convert/velocyto_to_h5mu/nextflow_schema.json b/target/nextflow/convert/velocyto_to_h5mu/nextflow_schema.json new file mode 100644 index 00000000000..33e9b9f9059 --- /dev/null +++ b/target/nextflow/convert/velocyto_to_h5mu/nextflow_schema.json @@ -0,0 +1,161 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "velocyto_to_h5mu", +"description": "Convert a velocyto loom file to a h5mu file.\n\nIf an input h5mu file is also provided, the velocity\nh5ad object will get added to that h5mu instead.\n", +"type": "object", +"definitions": { + + + + "inputs" : { + "title": "Inputs", + "type": "object", + "description": "No description", + "properties": { + + + "input_loom": { + "type": + "string", + "description": "Type: `file`, required, example: `input.loom`. Path to the input loom file", + "help_text": "Type: `file`, required, example: `input.loom`. Path to the input loom file." + + } + + + , + "input_h5mu": { + "type": + "string", + "description": "Type: `file`, example: `input.h5mu`. If a MuData file is provided,", + "help_text": "Type: `file`, example: `input.h5mu`. If a MuData file is provided," + + } + + + , + "modality": { + "type": + "string", + "description": "Type: `string`, default: `rna_velocity`. The name of the modality to operate on", + "help_text": "Type: `string`, default: `rna_velocity`. The name of the modality to operate on." + , + "default": "rna_velocity" + } + + +} +}, + + + "outputs" : { + "title": "Outputs", + "type": "object", + "description": "No description", + "properties": { + + + "output": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Path to the output MuData file", + "help_text": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Path to the output MuData file." + , + "default": "$id.$key.output.h5mu" + } + + + , + "output_compression": { + "type": + "string", + "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", + "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", + "enum": ["gzip", "lzf"] + + + } + + + , + "layer_spliced": { + "type": + "string", + "description": "Type: `string`, default: `velo_spliced`. Output layer for the spliced reads", + "help_text": "Type: `string`, default: `velo_spliced`. Output layer for the spliced reads." + , + "default": "velo_spliced" + } + + + , + "layer_unspliced": { + "type": + "string", + "description": "Type: `string`, default: `velo_unspliced`. Output layer for the unspliced reads", + "help_text": "Type: `string`, default: `velo_unspliced`. Output layer for the unspliced reads." + , + "default": "velo_unspliced" + } + + + , + "layer_ambiguous": { + "type": + "string", + "description": "Type: `string`, default: `velo_ambiguous`. Output layer for the ambiguous reads", + "help_text": "Type: `string`, default: `velo_ambiguous`. Output layer for the ambiguous reads." + , + "default": "velo_ambiguous" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/inputs" + }, + + { + "$ref": "#/definitions/outputs" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/correction/cellbender_remove_background/.config.vsh.yaml b/target/nextflow/correction/cellbender_remove_background/.config.vsh.yaml new file mode 100644 index 00000000000..cb466880229 --- /dev/null +++ b/target/nextflow/correction/cellbender_remove_background/.config.vsh.yaml @@ -0,0 +1,637 @@ +functionality: + name: "cellbender_remove_background" + namespace: "correction" + version: "0.12.4" + argument_groups: + - name: "Inputs" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input h5mu file. Data file on which to run tool. Data must be\ + \ un-filtered: it should include empty droplets." + info: null + example: + - "input.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + description: "List of modalities to process." + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Outputs" + arguments: + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Full count matrix as an h5mu file, with background RNA removed.\ + \ This file contains all the original droplet barcodes." + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--layer_output" + description: "Output layer" + info: null + default: + - "cellbender_corrected" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_background_fraction" + info: null + default: + - "cellbender_background_fraction" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_cell_probability" + info: null + default: + - "cellbender_cell_probability" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_cell_size" + info: null + default: + - "cellbender_cell_size" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_droplet_efficiency" + info: null + default: + - "cellbender_droplet_efficiency" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_latent_scale" + info: null + default: + - "cellbender_latent_scale" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--var_ambient_expression" + info: null + default: + - "cellbender_ambient_expression" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obsm_gene_expression_encoding" + info: null + default: + - "cellbender_gene_expression_encoding" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Arguments" + arguments: + - type: "boolean" + name: "--expected_cells_from_qc" + description: "Will use the Cell Ranger QC to determine the estimated number\ + \ of cells" + info: null + default: + - false + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--expected_cells" + description: "Number of cells expected in the dataset (a rough estimate within\ + \ a factor of 2 is sufficient)." + info: null + example: + - 1000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--total_droplets_included" + description: "The number of droplets from the rank-ordered UMI plot\nthat will\ + \ have their cell probabilities inferred as an\noutput. Include the droplets\ + \ which might contain cells.\nDroplets beyond TOTAL_DROPLETS_INCLUDED should\ + \ be\n'surely empty' droplets.\n" + info: null + example: + - 25000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--force_cell_umi_prior" + description: "Ignore CellBender's heuristic prior estimation, and use this prior\ + \ for UMI counts in cells." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--force_empty_umi_prior" + description: "Ignore CellBender's heuristic prior estimation, and use this prior\ + \ for UMI counts in empty droplets." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--model" + description: "Which model is being used for count data.\n\n* 'naive' subtracts\ + \ the estimated ambient profile.\n* 'simple' does not model either ambient\ + \ RNA or random barcode swapping (for debugging purposes -- not recommended).\n\ + * 'ambient' assumes background RNA is incorporated into droplets.\n* 'swapping'\ + \ assumes background RNA comes from random barcode swapping (via PCR chimeras).\n\ + * 'full' uses a combined ambient and swapping model.\n" + info: null + default: + - "full" + required: false + choices: + - "naive" + - "simple" + - "ambient" + - "swapping" + - "full" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--epochs" + description: "Number of epochs to train." + info: null + default: + - 150 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--low_count_threshold" + description: "Droplets with UMI counts below this number are completely \nexcluded\ + \ from the analysis. This can help identify the correct \nprior for empty\ + \ droplet counts in the rare case where empty \ncounts are extremely high\ + \ (over 200).\n" + info: null + default: + - 5 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--z_dim" + description: "Dimension of latent variable z.\n" + info: null + default: + - 64 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--z_layers" + description: "Dimension of hidden layers in the encoder for z.\n" + info: null + default: + - 512 + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--training_fraction" + description: "Training detail: the fraction of the data used for training.\n\ + The rest is never seen by the inference algorithm. Speeds up learning.\n" + info: null + default: + - 0.9 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--empty_drop_training_fraction" + description: "Training detail: the fraction of the training data each epoch\ + \ that \nis drawn (randomly sampled) from surely empty droplets.\n" + info: null + default: + - 0.2 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--ignore_features" + description: "Integer indices of features to ignore entirely. In the output\n\ + count matrix, the counts for these features will be unchanged.\n" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--fpr" + description: "Target 'delta' false positive rate in [0, 1). Use 0 for a cohort\n\ + of samples which will be jointly analyzed for differential expression.\nA\ + \ false positive is a true signal count that is erroneously removed.\nMore\ + \ background removal is accompanied by more signal removal at\nhigh values\ + \ of FPR. You can specify multiple values, which will\ncreate multiple output\ + \ files.\n" + info: null + default: + - 0.01 + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--exclude_feature_types" + description: "Feature types to ignore during the analysis. These features will\n\ + be left unchanged in the output file.\n" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--projected_ambient_count_threshold" + description: "Controls how many features are included in the analysis, which\n\ + can lead to a large speedup. If a feature is expected to have less\nthan PROJECTED_AMBIENT_COUNT_THRESHOLD\ + \ counts total in all cells\n(summed), then that gene is excluded, and it\ + \ will be unchanged\nin the output count matrix. For example, \nPROJECTED_AMBIENT_COUNT_THRESHOLD\ + \ = 0 will include all features\nwhich have even a single count in any empty\ + \ droplet.\n" + info: null + default: + - 0.1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--learning_rate" + description: "Training detail: lower learning rate for inference.\nA OneCycle\ + \ learning rate schedule is used, where the\nupper learning rate is ten times\ + \ this value. (For this\nvalue, probably do not exceed 1e-3).\n" + info: null + default: + - 1.0E-4 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--final_elbo_fail_fraction" + description: "Training is considered to have failed if \n(best_test_ELBO - final_test_ELBO)/(best_test_ELBO\ + \ - initial_test_ELBO) > FINAL_ELBO_FAIL_FRACTION.\nTraining will automatically\ + \ re-run if --num-training-tries > 1.\nBy default, will not fail training\ + \ based on final_training_ELBO.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--epoch_elbo_fail_fraction" + description: "Training is considered to have failed if \n(previous_epoch_test_ELBO\ + \ - current_epoch_test_ELBO)/(previous_epoch_test_ELBO - initial_train_ELBO)\ + \ > EPOCH_ELBO_FAIL_FRACTION.\nTraining will automatically re-run if --num-training-tries\ + \ > 1.\nBy default, will not fail training based on epoch_training_ELBO.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--num_training_tries" + description: "Number of times to attempt to train the model. At each subsequent\ + \ attempt,\nthe learning rate is multiplied by LEARNING_RATE_RETRY_MULT.\n" + info: null + default: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--learning_rate_retry_mult" + description: "Learning rate is multiplied by this amount each time a new training\n\ + attempt is made. (This parameter is only used if training fails based\non\ + \ EPOCH_ELBO_FAIL_FRACTION or FINAL_ELBO_FAIL_FRACTION and\nNUM_TRAINING_TRIES\ + \ is > 1.) \n" + info: null + default: + - 0.2 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--posterior_batch_size" + description: "Training detail: size of batches when creating the posterior.\n\ + Reduce this to avoid running out of GPU memory creating the posterior\n(will\ + \ be slower).\n" + info: null + default: + - 128 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--posterior_regulation" + description: "Posterior regularization method. (For experts: not required for\ + \ normal usage,\nsee documentation). \n\n* PRq is approximate quantile-targeting.\n\ + * PRmu is approximate mean-targeting aggregated over genes (behavior of v0.2.0).\n\ + * PRmu_gene is approximate mean-targeting per gene.\n" + info: null + required: false + choices: + - "PRq" + - "PRmu" + - "PRmu_gene" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--alpha" + description: "Tunable parameter alpha for the PRq posterior regularization method\n\ + (not normally used: see documentation).\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--q" + description: "Tunable parameter q for the CDF threshold estimation method (not\n\ + normally used: see documentation).\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--estimator" + description: "Output denoised count estimation method. (For experts: not required\n\ + for normal usage, see documentation).\n" + info: null + default: + - "mckp" + required: false + choices: + - "map" + - "mean" + - "cdf" + - "sample" + - "mckp" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--estimator_multiple_cpu" + description: "Including the flag --estimator-multiple-cpu will use more than\ + \ one\nCPU to compute the MCKP output count estimator in parallel (does nothing\n\ + for other estimators).\n" + info: null + direction: "input" + dest: "par" + - type: "boolean" + name: "--constant_learning_rate" + description: "Including the flag --constant-learning-rate will use the ClippedAdam\n\ + optimizer instead of the OneCycleLR learning rate schedule, which is\nthe\ + \ default. Learning is faster with the OneCycleLR schedule.\nHowever, training\ + \ can easily be continued from a checkpoint for more\nepochs than the initial\ + \ command specified when using ClippedAdam. On\nthe other hand, if using the\ + \ OneCycleLR schedule with 150 epochs\nspecified, it is not possible to pick\ + \ up from that final checkpoint\nand continue training until 250 epochs.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--debug" + description: "Including the flag --debug will log extra messages useful for\ + \ debugging.\n" + info: null + direction: "input" + dest: "par" + - type: "boolean_true" + name: "--cuda" + description: "Including the flag --cuda will run the inference on a\nGPU.\n" + info: null + direction: "input" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Eliminating technical artifacts from high-throughput single-cell RNA\ + \ sequencing data.\n\nThis module removes counts due to ambient RNA molecules\ + \ and random barcode swapping from (raw) UMI-based scRNA-seq count matrices. \n\ + At the moment, only the count matrices produced by the CellRanger count pipeline\ + \ is supported. Support for additional tools and protocols \nwill be added in\ + \ the future. A quick start tutorial can be found here.\n\nFleming et al. 2022,\ + \ bioRxiv.\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_raw_feature_bc_matrix.h5" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "nvcr.io/nvidia/cuda:11.8.0-devel-ubuntu22.04" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "docker" + run: + - "apt update && DEBIAN_FRONTEND=noninteractive apt install -y make build-essential\ + \ libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev wget ca-certificates\ + \ curl llvm libncurses5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev libffi-dev\ + \ liblzma-dev mecab-ipadic-utf8 git \\\n&& curl https://pyenv.run | bash \\\n\ + && pyenv update \\\n&& pyenv install $PYTHON_VERSION \\\n&& pyenv global $PYTHON_VERSION\ + \ \\\n&& apt-get clean\n" + env: + - "PYENV_ROOT=\"/root/.pyenv\"" + - "PATH=\"$PYENV_ROOT/shims:$PYENV_ROOT/bin:$PATH\"" + - "PYTHON_VERSION=3.7.16" + - type: "python" + user: false + packages: + - "mudata~=0.2.1" + - "cellbender~=0.3.0" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "muon" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "singlecpu" + - "lowmem" + - "gpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/correction/cellbender_remove_background/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/correction/cellbender_remove_background" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/correction/cellbender_remove_background/cellbender_remove_background" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/correction/cellbender_remove_background/main.nf b/target/nextflow/correction/cellbender_remove_background/main.nf new file mode 100644 index 00000000000..cb704b48201 --- /dev/null +++ b/target/nextflow/correction/cellbender_remove_background/main.nf @@ -0,0 +1,3212 @@ +// cellbender_remove_background 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "cellbender_remove_background", + "namespace" : "correction", + "version" : "0.12.4", + "argument_groups" : [ + { + "name" : "Inputs", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "alternatives" : [ + "-i" + ], + "description" : "Input h5mu file. Data file on which to run tool. Data must be un-filtered: it should include empty droplets.", + "example" : [ + "input.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--modality", + "description" : "List of modalities to process.", + "default" : [ + "rna" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Outputs", + "arguments" : [ + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "-o" + ], + "description" : "Full count matrix as an h5mu file, with background RNA removed. This file contains all the original droplet barcodes.", + "example" : [ + "output.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_compression", + "example" : [ + "gzip" + ], + "required" : false, + "choices" : [ + "gzip", + "lzf" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--layer_output", + "description" : "Output layer", + "default" : [ + "cellbender_corrected" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--obs_background_fraction", + "default" : [ + "cellbender_background_fraction" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--obs_cell_probability", + "default" : [ + "cellbender_cell_probability" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--obs_cell_size", + "default" : [ + "cellbender_cell_size" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--obs_droplet_efficiency", + "default" : [ + "cellbender_droplet_efficiency" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--obs_latent_scale", + "default" : [ + "cellbender_latent_scale" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--var_ambient_expression", + "default" : [ + "cellbender_ambient_expression" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--obsm_gene_expression_encoding", + "default" : [ + "cellbender_gene_expression_encoding" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Arguments", + "arguments" : [ + { + "type" : "boolean", + "name" : "--expected_cells_from_qc", + "description" : "Will use the Cell Ranger QC to determine the estimated number of cells", + "default" : [ + false + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--expected_cells", + "description" : "Number of cells expected in the dataset (a rough estimate within a factor of 2 is sufficient).", + "example" : [ + 1000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--total_droplets_included", + "description" : "The number of droplets from the rank-ordered UMI plot\nthat will have their cell probabilities inferred as an\noutput. Include the droplets which might contain cells.\nDroplets beyond TOTAL_DROPLETS_INCLUDED should be\n'surely empty' droplets.\n", + "example" : [ + 25000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--force_cell_umi_prior", + "description" : "Ignore CellBender's heuristic prior estimation, and use this prior for UMI counts in cells.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--force_empty_umi_prior", + "description" : "Ignore CellBender's heuristic prior estimation, and use this prior for UMI counts in empty droplets.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--model", + "description" : "Which model is being used for count data.\n\n* 'naive' subtracts the estimated ambient profile.\n* 'simple' does not model either ambient RNA or random barcode swapping (for debugging purposes -- not recommended).\n* 'ambient' assumes background RNA is incorporated into droplets.\n* 'swapping' assumes background RNA comes from random barcode swapping (via PCR chimeras).\n* 'full' uses a combined ambient and swapping model.\n", + "default" : [ + "full" + ], + "required" : false, + "choices" : [ + "naive", + "simple", + "ambient", + "swapping", + "full" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--epochs", + "description" : "Number of epochs to train.", + "default" : [ + 150 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--low_count_threshold", + "description" : "Droplets with UMI counts below this number are completely \nexcluded from the analysis. This can help identify the correct \nprior for empty droplet counts in the rare case where empty \ncounts are extremely high (over 200).\n", + "default" : [ + 5 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--z_dim", + "description" : "Dimension of latent variable z.\n", + "default" : [ + 64 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--z_layers", + "description" : "Dimension of hidden layers in the encoder for z.\n", + "default" : [ + 512 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--training_fraction", + "description" : "Training detail: the fraction of the data used for training.\nThe rest is never seen by the inference algorithm. Speeds up learning.\n", + "default" : [ + 0.9 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--empty_drop_training_fraction", + "description" : "Training detail: the fraction of the training data each epoch that \nis drawn (randomly sampled) from surely empty droplets.\n", + "default" : [ + 0.2 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--ignore_features", + "description" : "Integer indices of features to ignore entirely. In the output\ncount matrix, the counts for these features will be unchanged.\n", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--fpr", + "description" : "Target 'delta' false positive rate in [0, 1). Use 0 for a cohort\nof samples which will be jointly analyzed for differential expression.\nA false positive is a true signal count that is erroneously removed.\nMore background removal is accompanied by more signal removal at\nhigh values of FPR. You can specify multiple values, which will\ncreate multiple output files.\n", + "default" : [ + 0.01 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--exclude_feature_types", + "description" : "Feature types to ignore during the analysis. These features will\nbe left unchanged in the output file.\n", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--projected_ambient_count_threshold", + "description" : "Controls how many features are included in the analysis, which\ncan lead to a large speedup. If a feature is expected to have less\nthan PROJECTED_AMBIENT_COUNT_THRESHOLD counts total in all cells\n(summed), then that gene is excluded, and it will be unchanged\nin the output count matrix. For example, \nPROJECTED_AMBIENT_COUNT_THRESHOLD = 0 will include all features\nwhich have even a single count in any empty droplet.\n", + "default" : [ + 0.1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--learning_rate", + "description" : "Training detail: lower learning rate for inference.\nA OneCycle learning rate schedule is used, where the\nupper learning rate is ten times this value. (For this\nvalue, probably do not exceed 1e-3).\n", + "default" : [ + 1.0E-4 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--final_elbo_fail_fraction", + "description" : "Training is considered to have failed if \n(best_test_ELBO - final_test_ELBO)/(best_test_ELBO - initial_test_ELBO) > FINAL_ELBO_FAIL_FRACTION.\nTraining will automatically re-run if --num-training-tries > 1.\nBy default, will not fail training based on final_training_ELBO.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--epoch_elbo_fail_fraction", + "description" : "Training is considered to have failed if \n(previous_epoch_test_ELBO - current_epoch_test_ELBO)/(previous_epoch_test_ELBO - initial_train_ELBO) > EPOCH_ELBO_FAIL_FRACTION.\nTraining will automatically re-run if --num-training-tries > 1.\nBy default, will not fail training based on epoch_training_ELBO.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--num_training_tries", + "description" : "Number of times to attempt to train the model. At each subsequent attempt,\nthe learning rate is multiplied by LEARNING_RATE_RETRY_MULT.\n", + "default" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--learning_rate_retry_mult", + "description" : "Learning rate is multiplied by this amount each time a new training\nattempt is made. (This parameter is only used if training fails based\non EPOCH_ELBO_FAIL_FRACTION or FINAL_ELBO_FAIL_FRACTION and\nNUM_TRAINING_TRIES is > 1.) \n", + "default" : [ + 0.2 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--posterior_batch_size", + "description" : "Training detail: size of batches when creating the posterior.\nReduce this to avoid running out of GPU memory creating the posterior\n(will be slower).\n", + "default" : [ + 128 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--posterior_regulation", + "description" : "Posterior regularization method. (For experts: not required for normal usage,\nsee documentation). \n\n* PRq is approximate quantile-targeting.\n* PRmu is approximate mean-targeting aggregated over genes (behavior of v0.2.0).\n* PRmu_gene is approximate mean-targeting per gene.\n", + "required" : false, + "choices" : [ + "PRq", + "PRmu", + "PRmu_gene" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--alpha", + "description" : "Tunable parameter alpha for the PRq posterior regularization method\n(not normally used: see documentation).\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--q", + "description" : "Tunable parameter q for the CDF threshold estimation method (not\nnormally used: see documentation).\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--estimator", + "description" : "Output denoised count estimation method. (For experts: not required\nfor normal usage, see documentation).\n", + "default" : [ + "mckp" + ], + "required" : false, + "choices" : [ + "map", + "mean", + "cdf", + "sample", + "mckp" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "boolean_true", + "name" : "--estimator_multiple_cpu", + "description" : "Including the flag --estimator-multiple-cpu will use more than one\nCPU to compute the MCKP output count estimator in parallel (does nothing\nfor other estimators).\n", + "direction" : "input", + "dest" : "par" + }, + { + "type" : "boolean", + "name" : "--constant_learning_rate", + "description" : "Including the flag --constant-learning-rate will use the ClippedAdam\noptimizer instead of the OneCycleLR learning rate schedule, which is\nthe default. Learning is faster with the OneCycleLR schedule.\nHowever, training can easily be continued from a checkpoint for more\nepochs than the initial command specified when using ClippedAdam. On\nthe other hand, if using the OneCycleLR schedule with 150 epochs\nspecified, it is not possible to pick up from that final checkpoint\nand continue training until 250 epochs.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "boolean_true", + "name" : "--debug", + "description" : "Including the flag --debug will log extra messages useful for debugging.\n", + "direction" : "input", + "dest" : "par" + }, + { + "type" : "boolean_true", + "name" : "--cuda", + "description" : "Including the flag --cuda will run the inference on a\nGPU.\n", + "direction" : "input", + "dest" : "par" + } + ] + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/correction/cellbender_remove_background/" + }, + { + "type" : "file", + "path" : "src/utils/setup_logger.py", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "description" : "Eliminating technical artifacts from high-throughput single-cell RNA sequencing data.\n\nThis module removes counts due to ambient RNA molecules and random barcode swapping from (raw) UMI-based scRNA-seq count matrices. \nAt the moment, only the count matrices produced by the CellRanger count pipeline is supported. Support for additional tools and protocols \nwill be added in the future. A quick start tutorial can be found here.\n\nFleming et al. 2022, bioRxiv.\n", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/correction/cellbender_remove_background/" + }, + { + "type" : "file", + "path" : "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_raw_feature_bc_matrix.h5", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "nvcr.io/nvidia/cuda:11.8.0-devel-ubuntu22.04", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "docker", + "run" : [ + "apt update && DEBIAN_FRONTEND=noninteractive apt install -y make build-essential libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev wget ca-certificates curl llvm libncurses5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev libffi-dev liblzma-dev mecab-ipadic-utf8 git \\\\\n&& curl https://pyenv.run | bash \\\\\n&& pyenv update \\\\\n&& pyenv install $PYTHON_VERSION \\\\\n&& pyenv global $PYTHON_VERSION \\\\\n&& apt-get clean\n" + ], + "env" : [ + "PYENV_ROOT=\\"/root/.pyenv\\"", + "PATH=\\"$PYENV_ROOT/shims:$PYENV_ROOT/bin:$PATH\\"", + "PYTHON_VERSION=3.7.16" + ] + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "mudata~=0.2.1", + "cellbender~=0.3.0" + ], + "upgrade" : true + } + ], + "test_setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "muon" + ], + "upgrade" : true + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "singlecpu", + "lowmem", + "gpu" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/correction/cellbender_remove_background/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/correction/cellbender_remove_background", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +import mudata as mu +import tempfile +import subprocess +import os +import sys +import numpy as np +from scipy.sparse import csr_matrix +from cellbender.remove_background.downstream import anndata_from_h5 +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'layer_output': $( if [ ! -z ${VIASH_PAR_LAYER_OUTPUT+x} ]; then echo "r'${VIASH_PAR_LAYER_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'obs_background_fraction': $( if [ ! -z ${VIASH_PAR_OBS_BACKGROUND_FRACTION+x} ]; then echo "r'${VIASH_PAR_OBS_BACKGROUND_FRACTION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'obs_cell_probability': $( if [ ! -z ${VIASH_PAR_OBS_CELL_PROBABILITY+x} ]; then echo "r'${VIASH_PAR_OBS_CELL_PROBABILITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'obs_cell_size': $( if [ ! -z ${VIASH_PAR_OBS_CELL_SIZE+x} ]; then echo "r'${VIASH_PAR_OBS_CELL_SIZE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'obs_droplet_efficiency': $( if [ ! -z ${VIASH_PAR_OBS_DROPLET_EFFICIENCY+x} ]; then echo "r'${VIASH_PAR_OBS_DROPLET_EFFICIENCY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'obs_latent_scale': $( if [ ! -z ${VIASH_PAR_OBS_LATENT_SCALE+x} ]; then echo "r'${VIASH_PAR_OBS_LATENT_SCALE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'var_ambient_expression': $( if [ ! -z ${VIASH_PAR_VAR_AMBIENT_EXPRESSION+x} ]; then echo "r'${VIASH_PAR_VAR_AMBIENT_EXPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'obsm_gene_expression_encoding': $( if [ ! -z ${VIASH_PAR_OBSM_GENE_EXPRESSION_ENCODING+x} ]; then echo "r'${VIASH_PAR_OBSM_GENE_EXPRESSION_ENCODING//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'expected_cells_from_qc': $( if [ ! -z ${VIASH_PAR_EXPECTED_CELLS_FROM_QC+x} ]; then echo "r'${VIASH_PAR_EXPECTED_CELLS_FROM_QC//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), + 'expected_cells': $( if [ ! -z ${VIASH_PAR_EXPECTED_CELLS+x} ]; then echo "int(r'${VIASH_PAR_EXPECTED_CELLS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'total_droplets_included': $( if [ ! -z ${VIASH_PAR_TOTAL_DROPLETS_INCLUDED+x} ]; then echo "int(r'${VIASH_PAR_TOTAL_DROPLETS_INCLUDED//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'force_cell_umi_prior': $( if [ ! -z ${VIASH_PAR_FORCE_CELL_UMI_PRIOR+x} ]; then echo "int(r'${VIASH_PAR_FORCE_CELL_UMI_PRIOR//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'force_empty_umi_prior': $( if [ ! -z ${VIASH_PAR_FORCE_EMPTY_UMI_PRIOR+x} ]; then echo "int(r'${VIASH_PAR_FORCE_EMPTY_UMI_PRIOR//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'model': $( if [ ! -z ${VIASH_PAR_MODEL+x} ]; then echo "r'${VIASH_PAR_MODEL//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'epochs': $( if [ ! -z ${VIASH_PAR_EPOCHS+x} ]; then echo "int(r'${VIASH_PAR_EPOCHS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'low_count_threshold': $( if [ ! -z ${VIASH_PAR_LOW_COUNT_THRESHOLD+x} ]; then echo "int(r'${VIASH_PAR_LOW_COUNT_THRESHOLD//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'z_dim': $( if [ ! -z ${VIASH_PAR_Z_DIM+x} ]; then echo "int(r'${VIASH_PAR_Z_DIM//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'z_layers': $( if [ ! -z ${VIASH_PAR_Z_LAYERS+x} ]; then echo "list(map(int, r'${VIASH_PAR_Z_LAYERS//\\'/\\'\\"\\'\\"r\\'}'.split(':')))"; else echo None; fi ), + 'training_fraction': $( if [ ! -z ${VIASH_PAR_TRAINING_FRACTION+x} ]; then echo "float(r'${VIASH_PAR_TRAINING_FRACTION//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'empty_drop_training_fraction': $( if [ ! -z ${VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION+x} ]; then echo "float(r'${VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'ignore_features': $( if [ ! -z ${VIASH_PAR_IGNORE_FEATURES+x} ]; then echo "list(map(int, r'${VIASH_PAR_IGNORE_FEATURES//\\'/\\'\\"\\'\\"r\\'}'.split(':')))"; else echo None; fi ), + 'fpr': $( if [ ! -z ${VIASH_PAR_FPR+x} ]; then echo "list(map(float, r'${VIASH_PAR_FPR//\\'/\\'\\"\\'\\"r\\'}'.split(':')))"; else echo None; fi ), + 'exclude_feature_types': $( if [ ! -z ${VIASH_PAR_EXCLUDE_FEATURE_TYPES+x} ]; then echo "r'${VIASH_PAR_EXCLUDE_FEATURE_TYPES//\\'/\\'\\"\\'\\"r\\'}'.split(':')"; else echo None; fi ), + 'projected_ambient_count_threshold': $( if [ ! -z ${VIASH_PAR_PROJECTED_AMBIENT_COUNT_THRESHOLD+x} ]; then echo "float(r'${VIASH_PAR_PROJECTED_AMBIENT_COUNT_THRESHOLD//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'learning_rate': $( if [ ! -z ${VIASH_PAR_LEARNING_RATE+x} ]; then echo "float(r'${VIASH_PAR_LEARNING_RATE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'final_elbo_fail_fraction': $( if [ ! -z ${VIASH_PAR_FINAL_ELBO_FAIL_FRACTION+x} ]; then echo "float(r'${VIASH_PAR_FINAL_ELBO_FAIL_FRACTION//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'epoch_elbo_fail_fraction': $( if [ ! -z ${VIASH_PAR_EPOCH_ELBO_FAIL_FRACTION+x} ]; then echo "float(r'${VIASH_PAR_EPOCH_ELBO_FAIL_FRACTION//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'num_training_tries': $( if [ ! -z ${VIASH_PAR_NUM_TRAINING_TRIES+x} ]; then echo "int(r'${VIASH_PAR_NUM_TRAINING_TRIES//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'learning_rate_retry_mult': $( if [ ! -z ${VIASH_PAR_LEARNING_RATE_RETRY_MULT+x} ]; then echo "float(r'${VIASH_PAR_LEARNING_RATE_RETRY_MULT//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'posterior_batch_size': $( if [ ! -z ${VIASH_PAR_POSTERIOR_BATCH_SIZE+x} ]; then echo "int(r'${VIASH_PAR_POSTERIOR_BATCH_SIZE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'posterior_regulation': $( if [ ! -z ${VIASH_PAR_POSTERIOR_REGULATION+x} ]; then echo "r'${VIASH_PAR_POSTERIOR_REGULATION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'alpha': $( if [ ! -z ${VIASH_PAR_ALPHA+x} ]; then echo "float(r'${VIASH_PAR_ALPHA//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'q': $( if [ ! -z ${VIASH_PAR_Q+x} ]; then echo "float(r'${VIASH_PAR_Q//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'estimator': $( if [ ! -z ${VIASH_PAR_ESTIMATOR+x} ]; then echo "r'${VIASH_PAR_ESTIMATOR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'estimator_multiple_cpu': $( if [ ! -z ${VIASH_PAR_ESTIMATOR_MULTIPLE_CPU+x} ]; then echo "r'${VIASH_PAR_ESTIMATOR_MULTIPLE_CPU//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), + 'constant_learning_rate': $( if [ ! -z ${VIASH_PAR_CONSTANT_LEARNING_RATE+x} ]; then echo "r'${VIASH_PAR_CONSTANT_LEARNING_RATE//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), + 'debug': $( if [ ! -z ${VIASH_PAR_DEBUG+x} ]; then echo "r'${VIASH_PAR_DEBUG//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), + 'cuda': $( if [ ! -z ${VIASH_PAR_CUDA+x} ]; then echo "r'${VIASH_PAR_CUDA//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +## VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + + +logger.info("Reading input mudata") +mdata = mu.read_h5mu(par["input"]) + +mod = par["modality"] +logger.info("Performing log transformation on modality %s", mod) +data = mdata.mod[mod] + +# import pathlib +# with pathlib.Path(os.path.dirname(par["output"])) / "cellbender" as temp_dir: +# os.mkdir(temp_dir) +with tempfile.TemporaryDirectory(prefix="cellbender-", dir=meta["temp_dir"]) as temp_dir: + # construct paths within tempdir + input_file = os.path.join(temp_dir, "input.h5ad") + output_file = os.path.join(temp_dir, "output.h5") + + logger.info("Creating AnnData input file for CellBender: '%s'", input_file) + data.write_h5ad(input_file) + + logger.info("Constructing CellBender command") + cmd_pars = [ + "cellbender", "remove-background", + "--input", input_file, + "--output", output_file, + # don't create checkpoints because they're not used / returned anyways + "--checkpoint-mins", "99999999" + ] + + if meta.get("cpus") is not None: + cmd_pars += ["--cpu-threads", str(meta["cpus"])] + + extra_args = [ + ("--expected-cells", "expected_cells", True), + ("--total-droplets-included", "total_droplets_included", True), + ("--force-cell-umi-prior", "force_cell_umi_prior", True), + ("--force-empty-umi-prior", "force_empty_umi_prior", True), + ("--model", "model", True), + ("--epochs", "epochs", True), + ("--low-count-threshold", "low_count_threshold", True), + ("--z-dim", "z_dim", True), + ("--z-layers", "z_layers", True), + ("--training-fraction", "training_fraction", True), + ("--empty-drop-training-fraction", "empty_drop_training_fraction", True), + ("--ignore-features", "ignore_features", True), + ("--fpr", "fpr", True), + ("--exclude-feature-types", "exclude_feature_types", True), + ("--projected-ambient-count-threshold", "projected_ambient_count_threshold", True), + ("--learning-rate", "learning_rate", True), + ("--final-elbo-fail-fraction", "final_elbo_fail_fraction", True), + ("--epoch-elbo-fail-fraction", "epoch_elbo_fail_fraction", True), + ("--num-training-tries", "num_training_tries", True), + ("--learning-rate-retry-mult", "learning_rate_retry_mult", True), + ("--posterior-batch-size", "posterior_batch_size", True), + ("--posterior-regulation", "posterior_regulation", True), + ("--alpha", "alpha", True), + ("--q", "q", True), + ("--estimator", "estimator", True), + ("--estimator-multiple-cpu", "estimator_multiple_cpu", False), + ("--constant-learning-rate", "constant_learning_rate", False), + ("--debug", "debug", False), + ("--cuda", "cuda", False), + ] + for (flag, name, is_kwarg) in extra_args: + if par[name]: + values = par[name] if isinstance(par[name], list) else [par[name]] + cmd_pars += [flag] + [str(val) for val in values] if is_kwarg else [flag] + + if par["expected_cells_from_qc"] and "metrics_cellranger" in data.uns: + assert par["expected_cells"] is None, "If min_counts is defined, expected_cells should be undefined" + assert par["total_droplets_included"] is None, "If min_counts is defined, expected_cells should be undefined" + met = data.uns["metrics_cellranger"] + col_name = "Estimated Number of Cells" + assert col_name in met.columns, "%s should be a column in .obs[metrics_cellranger]" + est_cells = met[col_name].values[0] + logger.info("Selecting --expected-cells %d and --total-droplets-included %d", est_cells, est_cells * 5) + cmd_pars += ["--expected-cells", str(est_cells), "--total-droplets-included", str(5*est_cells)] + + logger.info("Running CellBender: '%s'", ' '.join(cmd_pars)) + out = subprocess.check_output(cmd_pars).decode("utf-8") + + logger.info("Reading CellBender 10xh5 output file: '%s'", output_file) + adata_out = anndata_from_h5(output_file, analyzed_barcodes_only=False) + + logger.info("CellBender output format:", adata_out) + + # AnnData object with n_obs x n_vars = 6794880 x 33538 + # obs: 'cellbender_analyzed' + # var: 'ambient_expression', 'feature_type', 'genome', 'gene_id', 'cellbender_analyzed' + # uns: 'background_fraction', 'barcode_indices_for_latents', 'cell_probability', 'cell_size', 'droplet_efficiency', 'gene_expression_encoding', + # 'cell_size_lognormal_std', 'empty_droplet_size_lognormal_loc', 'empty_droplet_size_lognormal_scale', 'swapping_fraction_dist_params', + # 'barcodes_analyzed', 'barcodes_analyzed_inds', 'estimator', 'features_analyzed_inds', 'fraction_data_used_for_testing', 'learning_curve_learning_rate_epoch', + # 'learning_curve_learning_rate_value', 'learning_curve_test_elbo', 'learning_curve_test_epoch', 'learning_curve_train_elbo', 'learning_curve_train_epoch', + # 'target_false_positive_rate' + + logger.info("Copying X output to MuData") + data.layers[par["layer_output"]] = adata_out.X + + logger.info("Copying .obs output to MuData") + obs_store = { + "obs_background_fraction": "background_fraction", + "obs_cell_probability": "cell_probability", + "obs_cell_size": "cell_size", + "obs_droplet_efficiency": "droplet_efficiency", + "obs_latent_scale": "latent_scale" + } + for to_name, from_name in obs_store.items(): + if par[to_name]: + if from_name in adata_out.obs: + data.obs[par[to_name]] = adata_out.obs[from_name] + # when using unfiltered data, the values will be in uns instead of obs + elif from_name in adata_out.uns and "barcode_indices_for_latents" in adata_out.uns: + vec = np.zeros(data.n_obs) + vec[adata_out.uns["barcode_indices_for_latents"]] = adata_out.uns[from_name] + data.obs[par[to_name]] = vec + + logger.info("Copying .var output to MuData") + var_store = { "var_ambient_expression": "ambient_expression" } + for to_name, from_name in var_store.items(): + if par[to_name]: + data.var[par[to_name]] = adata_out.var[from_name] + + logger.info("Copying obsm_gene_expression_encoding output to MuData") + obsm_store = { "obsm_gene_expression_encoding": "gene_expression_encoding" } + for to_name, from_name in obsm_store.items(): + if par[to_name]: + if from_name in adata_out.obsm: + data.obsm[par[to_name]] = adata_out.obsm[from_name] + elif from_name in adata_out.uns and "barcode_indices_for_latents" in adata_out.uns: + matrix_to_store = adata_out.uns[from_name] + number_of_obs = data.X.shape[0] + latent_space_sparse = csr_matrix((number_of_obs, par["z_dim"]), + dtype=adata_out.uns[from_name].dtype) + obs_rows_in_space_representation = adata_out.uns["barcode_indices_for_latents"] + latent_space_sparse[obs_rows_in_space_representation] = adata_out.uns[from_name] + data.obsm[par[to_name]] = latent_space_sparse + else: + raise RuntimeError("Requested to save latent gene encoding, but the data is either missing " + "from cellbender output or in an incorrect format.") + + +logger.info("Writing to file %s", par["output"]) +mdata.write_h5mu(filename=par["output"], compression=par["output_compression"]) +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/correction_cellbender_remove_background", + "tag" : "0.12.0" + }, + "label" : [ + "singlecpu", + "lowmem", + "gpu" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/correction/cellbender_remove_background/nextflow.config b/target/nextflow/correction/cellbender_remove_background/nextflow.config new file mode 100644 index 00000000000..a6f4c6ec272 --- /dev/null +++ b/target/nextflow/correction/cellbender_remove_background/nextflow.config @@ -0,0 +1,107 @@ +manifest { + name = 'cellbender_remove_background' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Eliminating technical artifacts from high-throughput single-cell RNA sequencing data.\n\nThis module removes counts due to ambient RNA molecules and random barcode swapping from (raw) UMI-based scRNA-seq count matrices. \nAt the moment, only the count matrices produced by the CellRanger count pipeline is supported. Support for additional tools and protocols \nwill be added in the future. A quick start tutorial can be found here.\n\nFleming et al. 2022, bioRxiv.\n' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/correction/cellbender_remove_background/nextflow_params.yaml b/target/nextflow/correction/cellbender_remove_background/nextflow_params.yaml new file mode 100644 index 00000000000..cc465d7b29b --- /dev/null +++ b/target/nextflow/correction/cellbender_remove_background/nextflow_params.yaml @@ -0,0 +1,51 @@ +# Inputs +input: # please fill in - example: "input.h5mu" +modality: "rna" + +# Outputs +# output: "$id.$key.output.h5mu" +# output_compression: "gzip" +layer_output: "cellbender_corrected" +obs_background_fraction: "cellbender_background_fraction" +obs_cell_probability: "cellbender_cell_probability" +obs_cell_size: "cellbender_cell_size" +obs_droplet_efficiency: "cellbender_droplet_efficiency" +obs_latent_scale: "cellbender_latent_scale" +var_ambient_expression: "cellbender_ambient_expression" +obsm_gene_expression_encoding: "cellbender_gene_expression_encoding" + +# Arguments +expected_cells_from_qc: false +# expected_cells: 1000 +# total_droplets_included: 25000 +# force_cell_umi_prior: 123 +# force_empty_umi_prior: 123 +model: "full" +epochs: 150 +low_count_threshold: 5 +z_dim: 64 +z_layers: [512] +training_fraction: 0.9 +empty_drop_training_fraction: 0.2 +# ignore_features: [123] +fpr: [0.01] +# exclude_feature_types: ["foo"] +projected_ambient_count_threshold: 0.1 +learning_rate: 1.0E-4 +# final_elbo_fail_fraction: 123.0 +# epoch_elbo_fail_fraction: 123.0 +num_training_tries: 1 +learning_rate_retry_mult: 0.2 +posterior_batch_size: 128 +# posterior_regulation: "foo" +# alpha: 123.0 +# q: 123.0 +estimator: "mckp" +estimator_multiple_cpu: false +# constant_learning_rate: true +debug: false +cuda: false + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/correction/cellbender_remove_background/nextflow_schema.json b/target/nextflow/correction/cellbender_remove_background/nextflow_schema.json new file mode 100644 index 00000000000..10ec3f879d5 --- /dev/null +++ b/target/nextflow/correction/cellbender_remove_background/nextflow_schema.json @@ -0,0 +1,544 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "cellbender_remove_background", +"description": "Eliminating technical artifacts from high-throughput single-cell RNA sequencing data.\n\nThis module removes counts due to ambient RNA molecules and random barcode swapping from (raw) UMI-based scRNA-seq count matrices. \nAt the moment, only the count matrices produced by the CellRanger count pipeline is supported. Support for additional tools and protocols \nwill be added in the future. A quick start tutorial can be found here.\n\nFleming et al. 2022, bioRxiv.\n", +"type": "object", +"definitions": { + + + + "inputs" : { + "title": "Inputs", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required, example: `input.h5mu`. Input h5mu file", + "help_text": "Type: `file`, required, example: `input.h5mu`. Input h5mu file. Data file on which to run tool. Data must be un-filtered: it should include empty droplets." + + } + + + , + "modality": { + "type": + "string", + "description": "Type: `string`, default: `rna`. List of modalities to process", + "help_text": "Type: `string`, default: `rna`. List of modalities to process." + , + "default": "rna" + } + + +} +}, + + + "outputs" : { + "title": "Outputs", + "type": "object", + "description": "No description", + "properties": { + + + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Full count matrix as an h5mu file, with background RNA removed", + "help_text": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Full count matrix as an h5mu file, with background RNA removed. This file contains all the original droplet barcodes." + , + "default": "$id.$key.output.h5mu" + } + + + , + "output_compression": { + "type": + "string", + "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. ", + "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. ", + "enum": ["gzip", "lzf"] + + + } + + + , + "layer_output": { + "type": + "string", + "description": "Type: `string`, default: `cellbender_corrected`. Output layer", + "help_text": "Type: `string`, default: `cellbender_corrected`. Output layer" + , + "default": "cellbender_corrected" + } + + + , + "obs_background_fraction": { + "type": + "string", + "description": "Type: `string`, default: `cellbender_background_fraction`. ", + "help_text": "Type: `string`, default: `cellbender_background_fraction`. " + , + "default": "cellbender_background_fraction" + } + + + , + "obs_cell_probability": { + "type": + "string", + "description": "Type: `string`, default: `cellbender_cell_probability`. ", + "help_text": "Type: `string`, default: `cellbender_cell_probability`. " + , + "default": "cellbender_cell_probability" + } + + + , + "obs_cell_size": { + "type": + "string", + "description": "Type: `string`, default: `cellbender_cell_size`. ", + "help_text": "Type: `string`, default: `cellbender_cell_size`. " + , + "default": "cellbender_cell_size" + } + + + , + "obs_droplet_efficiency": { + "type": + "string", + "description": "Type: `string`, default: `cellbender_droplet_efficiency`. ", + "help_text": "Type: `string`, default: `cellbender_droplet_efficiency`. " + , + "default": "cellbender_droplet_efficiency" + } + + + , + "obs_latent_scale": { + "type": + "string", + "description": "Type: `string`, default: `cellbender_latent_scale`. ", + "help_text": "Type: `string`, default: `cellbender_latent_scale`. " + , + "default": "cellbender_latent_scale" + } + + + , + "var_ambient_expression": { + "type": + "string", + "description": "Type: `string`, default: `cellbender_ambient_expression`. ", + "help_text": "Type: `string`, default: `cellbender_ambient_expression`. " + , + "default": "cellbender_ambient_expression" + } + + + , + "obsm_gene_expression_encoding": { + "type": + "string", + "description": "Type: `string`, default: `cellbender_gene_expression_encoding`. ", + "help_text": "Type: `string`, default: `cellbender_gene_expression_encoding`. " + , + "default": "cellbender_gene_expression_encoding" + } + + +} +}, + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "expected_cells_from_qc": { + "type": + "boolean", + "description": "Type: `boolean`, default: `false`. Will use the Cell Ranger QC to determine the estimated number of cells", + "help_text": "Type: `boolean`, default: `false`. Will use the Cell Ranger QC to determine the estimated number of cells" + , + "default": "False" + } + + + , + "expected_cells": { + "type": + "integer", + "description": "Type: `integer`, example: `1000`. Number of cells expected in the dataset (a rough estimate within a factor of 2 is sufficient)", + "help_text": "Type: `integer`, example: `1000`. Number of cells expected in the dataset (a rough estimate within a factor of 2 is sufficient)." + + } + + + , + "total_droplets_included": { + "type": + "integer", + "description": "Type: `integer`, example: `25000`. The number of droplets from the rank-ordered UMI plot\nthat will have their cell probabilities inferred as an\noutput", + "help_text": "Type: `integer`, example: `25000`. The number of droplets from the rank-ordered UMI plot\nthat will have their cell probabilities inferred as an\noutput. Include the droplets which might contain cells.\nDroplets beyond TOTAL_DROPLETS_INCLUDED should be\n\u0027surely empty\u0027 droplets.\n" + + } + + + , + "force_cell_umi_prior": { + "type": + "integer", + "description": "Type: `integer`. Ignore CellBender\u0027s heuristic prior estimation, and use this prior for UMI counts in cells", + "help_text": "Type: `integer`. Ignore CellBender\u0027s heuristic prior estimation, and use this prior for UMI counts in cells." + + } + + + , + "force_empty_umi_prior": { + "type": + "integer", + "description": "Type: `integer`. Ignore CellBender\u0027s heuristic prior estimation, and use this prior for UMI counts in empty droplets", + "help_text": "Type: `integer`. Ignore CellBender\u0027s heuristic prior estimation, and use this prior for UMI counts in empty droplets." + + } + + + , + "model": { + "type": + "string", + "description": "Type: `string`, default: `full`, choices: ``naive`, `simple`, `ambient`, `swapping`, `full``. Which model is being used for count data", + "help_text": "Type: `string`, default: `full`, choices: ``naive`, `simple`, `ambient`, `swapping`, `full``. Which model is being used for count data.\n\n* \u0027naive\u0027 subtracts the estimated ambient profile.\n* \u0027simple\u0027 does not model either ambient RNA or random barcode swapping (for debugging purposes -- not recommended).\n* \u0027ambient\u0027 assumes background RNA is incorporated into droplets.\n* \u0027swapping\u0027 assumes background RNA comes from random barcode swapping (via PCR chimeras).\n* \u0027full\u0027 uses a combined ambient and swapping model.\n", + "enum": ["naive", "simple", "ambient", "swapping", "full"] + + , + "default": "full" + } + + + , + "epochs": { + "type": + "integer", + "description": "Type: `integer`, default: `150`. Number of epochs to train", + "help_text": "Type: `integer`, default: `150`. Number of epochs to train." + , + "default": "150" + } + + + , + "low_count_threshold": { + "type": + "integer", + "description": "Type: `integer`, default: `5`. Droplets with UMI counts below this number are completely \nexcluded from the analysis", + "help_text": "Type: `integer`, default: `5`. Droplets with UMI counts below this number are completely \nexcluded from the analysis. This can help identify the correct \nprior for empty droplet counts in the rare case where empty \ncounts are extremely high (over 200).\n" + , + "default": "5" + } + + + , + "z_dim": { + "type": + "integer", + "description": "Type: `integer`, default: `64`. Dimension of latent variable z", + "help_text": "Type: `integer`, default: `64`. Dimension of latent variable z.\n" + , + "default": "64" + } + + + , + "z_layers": { + "type": + "string", + "description": "Type: List of `integer`, default: `512`, multiple_sep: `\":\"`. Dimension of hidden layers in the encoder for z", + "help_text": "Type: List of `integer`, default: `512`, multiple_sep: `\":\"`. Dimension of hidden layers in the encoder for z.\n" + , + "default": "512" + } + + + , + "training_fraction": { + "type": + "number", + "description": "Type: `double`, default: `0.9`. Training detail: the fraction of the data used for training", + "help_text": "Type: `double`, default: `0.9`. Training detail: the fraction of the data used for training.\nThe rest is never seen by the inference algorithm. Speeds up learning.\n" + , + "default": "0.9" + } + + + , + "empty_drop_training_fraction": { + "type": + "number", + "description": "Type: `double`, default: `0.2`. Training detail: the fraction of the training data each epoch that \nis drawn (randomly sampled) from surely empty droplets", + "help_text": "Type: `double`, default: `0.2`. Training detail: the fraction of the training data each epoch that \nis drawn (randomly sampled) from surely empty droplets.\n" + , + "default": "0.2" + } + + + , + "ignore_features": { + "type": + "string", + "description": "Type: List of `integer`, multiple_sep: `\":\"`. Integer indices of features to ignore entirely", + "help_text": "Type: List of `integer`, multiple_sep: `\":\"`. Integer indices of features to ignore entirely. In the output\ncount matrix, the counts for these features will be unchanged.\n" + + } + + + , + "fpr": { + "type": + "string", + "description": "Type: List of `double`, default: `0.01`, multiple_sep: `\":\"`. Target \u0027delta\u0027 false positive rate in [0, 1)", + "help_text": "Type: List of `double`, default: `0.01`, multiple_sep: `\":\"`. Target \u0027delta\u0027 false positive rate in [0, 1). Use 0 for a cohort\nof samples which will be jointly analyzed for differential expression.\nA false positive is a true signal count that is erroneously removed.\nMore background removal is accompanied by more signal removal at\nhigh values of FPR. You can specify multiple values, which will\ncreate multiple output files.\n" + , + "default": "0.01" + } + + + , + "exclude_feature_types": { + "type": + "string", + "description": "Type: List of `string`, multiple_sep: `\":\"`. Feature types to ignore during the analysis", + "help_text": "Type: List of `string`, multiple_sep: `\":\"`. Feature types to ignore during the analysis. These features will\nbe left unchanged in the output file.\n" + + } + + + , + "projected_ambient_count_threshold": { + "type": + "number", + "description": "Type: `double`, default: `0.1`. Controls how many features are included in the analysis, which\ncan lead to a large speedup", + "help_text": "Type: `double`, default: `0.1`. Controls how many features are included in the analysis, which\ncan lead to a large speedup. If a feature is expected to have less\nthan PROJECTED_AMBIENT_COUNT_THRESHOLD counts total in all cells\n(summed), then that gene is excluded, and it will be unchanged\nin the output count matrix. For example, \nPROJECTED_AMBIENT_COUNT_THRESHOLD = 0 will include all features\nwhich have even a single count in any empty droplet.\n" + , + "default": "0.1" + } + + + , + "learning_rate": { + "type": + "number", + "description": "Type: `double`, default: `1.0E-4`. Training detail: lower learning rate for inference", + "help_text": "Type: `double`, default: `1.0E-4`. Training detail: lower learning rate for inference.\nA OneCycle learning rate schedule is used, where the\nupper learning rate is ten times this value. (For this\nvalue, probably do not exceed 1e-3).\n" + , + "default": "0.0001" + } + + + , + "final_elbo_fail_fraction": { + "type": + "number", + "description": "Type: `double`. Training is considered to have failed if \n(best_test_ELBO - final_test_ELBO)/(best_test_ELBO - initial_test_ELBO) \u003e FINAL_ELBO_FAIL_FRACTION", + "help_text": "Type: `double`. Training is considered to have failed if \n(best_test_ELBO - final_test_ELBO)/(best_test_ELBO - initial_test_ELBO) \u003e FINAL_ELBO_FAIL_FRACTION.\nTraining will automatically re-run if --num-training-tries \u003e 1.\nBy default, will not fail training based on final_training_ELBO.\n" + + } + + + , + "epoch_elbo_fail_fraction": { + "type": + "number", + "description": "Type: `double`. Training is considered to have failed if \n(previous_epoch_test_ELBO - current_epoch_test_ELBO)/(previous_epoch_test_ELBO - initial_train_ELBO) \u003e EPOCH_ELBO_FAIL_FRACTION", + "help_text": "Type: `double`. Training is considered to have failed if \n(previous_epoch_test_ELBO - current_epoch_test_ELBO)/(previous_epoch_test_ELBO - initial_train_ELBO) \u003e EPOCH_ELBO_FAIL_FRACTION.\nTraining will automatically re-run if --num-training-tries \u003e 1.\nBy default, will not fail training based on epoch_training_ELBO.\n" + + } + + + , + "num_training_tries": { + "type": + "integer", + "description": "Type: `integer`, default: `1`. Number of times to attempt to train the model", + "help_text": "Type: `integer`, default: `1`. Number of times to attempt to train the model. At each subsequent attempt,\nthe learning rate is multiplied by LEARNING_RATE_RETRY_MULT.\n" + , + "default": "1" + } + + + , + "learning_rate_retry_mult": { + "type": + "number", + "description": "Type: `double`, default: `0.2`. Learning rate is multiplied by this amount each time a new training\nattempt is made", + "help_text": "Type: `double`, default: `0.2`. Learning rate is multiplied by this amount each time a new training\nattempt is made. (This parameter is only used if training fails based\non EPOCH_ELBO_FAIL_FRACTION or FINAL_ELBO_FAIL_FRACTION and\nNUM_TRAINING_TRIES is \u003e 1.) \n" + , + "default": "0.2" + } + + + , + "posterior_batch_size": { + "type": + "integer", + "description": "Type: `integer`, default: `128`. Training detail: size of batches when creating the posterior", + "help_text": "Type: `integer`, default: `128`. Training detail: size of batches when creating the posterior.\nReduce this to avoid running out of GPU memory creating the posterior\n(will be slower).\n" + , + "default": "128" + } + + + , + "posterior_regulation": { + "type": + "string", + "description": "Type: `string`, choices: ``PRq`, `PRmu`, `PRmu_gene``. Posterior regularization method", + "help_text": "Type: `string`, choices: ``PRq`, `PRmu`, `PRmu_gene``. Posterior regularization method. (For experts: not required for normal usage,\nsee documentation). \n\n* PRq is approximate quantile-targeting.\n* PRmu is approximate mean-targeting aggregated over genes (behavior of v0.2.0).\n* PRmu_gene is approximate mean-targeting per gene.\n", + "enum": ["PRq", "PRmu", "PRmu_gene"] + + + } + + + , + "alpha": { + "type": + "number", + "description": "Type: `double`. Tunable parameter alpha for the PRq posterior regularization method\n(not normally used: see documentation)", + "help_text": "Type: `double`. Tunable parameter alpha for the PRq posterior regularization method\n(not normally used: see documentation).\n" + + } + + + , + "q": { + "type": + "number", + "description": "Type: `double`. Tunable parameter q for the CDF threshold estimation method (not\nnormally used: see documentation)", + "help_text": "Type: `double`. Tunable parameter q for the CDF threshold estimation method (not\nnormally used: see documentation).\n" + + } + + + , + "estimator": { + "type": + "string", + "description": "Type: `string`, default: `mckp`, choices: ``map`, `mean`, `cdf`, `sample`, `mckp``. Output denoised count estimation method", + "help_text": "Type: `string`, default: `mckp`, choices: ``map`, `mean`, `cdf`, `sample`, `mckp``. Output denoised count estimation method. (For experts: not required\nfor normal usage, see documentation).\n", + "enum": ["map", "mean", "cdf", "sample", "mckp"] + + , + "default": "mckp" + } + + + , + "estimator_multiple_cpu": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Including the flag --estimator-multiple-cpu will use more than one\nCPU to compute the MCKP output count estimator in parallel (does nothing\nfor other estimators)", + "help_text": "Type: `boolean_true`, default: `false`. Including the flag --estimator-multiple-cpu will use more than one\nCPU to compute the MCKP output count estimator in parallel (does nothing\nfor other estimators).\n" + , + "default": "False" + } + + + , + "constant_learning_rate": { + "type": + "boolean", + "description": "Type: `boolean`. Including the flag --constant-learning-rate will use the ClippedAdam\noptimizer instead of the OneCycleLR learning rate schedule, which is\nthe default", + "help_text": "Type: `boolean`. Including the flag --constant-learning-rate will use the ClippedAdam\noptimizer instead of the OneCycleLR learning rate schedule, which is\nthe default. Learning is faster with the OneCycleLR schedule.\nHowever, training can easily be continued from a checkpoint for more\nepochs than the initial command specified when using ClippedAdam. On\nthe other hand, if using the OneCycleLR schedule with 150 epochs\nspecified, it is not possible to pick up from that final checkpoint\nand continue training until 250 epochs.\n" + + } + + + , + "debug": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Including the flag --debug will log extra messages useful for debugging", + "help_text": "Type: `boolean_true`, default: `false`. Including the flag --debug will log extra messages useful for debugging.\n" + , + "default": "False" + } + + + , + "cuda": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Including the flag --cuda will run the inference on a\nGPU", + "help_text": "Type: `boolean_true`, default: `false`. Including the flag --cuda will run the inference on a\nGPU.\n" + , + "default": "False" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/inputs" + }, + + { + "$ref": "#/definitions/outputs" + }, + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/correction/cellbender_remove_background/setup_logger.py b/target/nextflow/correction/cellbender_remove_background/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/nextflow/correction/cellbender_remove_background/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/nextflow/correction/cellbender_remove_background_v0_2/.config.vsh.yaml b/target/nextflow/correction/cellbender_remove_background_v0_2/.config.vsh.yaml new file mode 100644 index 00000000000..74ed0c17f62 --- /dev/null +++ b/target/nextflow/correction/cellbender_remove_background_v0_2/.config.vsh.yaml @@ -0,0 +1,406 @@ +functionality: + name: "cellbender_remove_background_v0_2" + namespace: "correction" + version: "0.12.4" + argument_groups: + - name: "Inputs" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input h5mu file." + info: null + example: + - "input.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + description: "List of modalities to process." + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Outputs" + arguments: + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Full count matrix as an h5mu file, with background RNA removed.\ + \ This file contains all the original droplet barcodes." + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--layer_output" + description: "Output layer" + info: null + default: + - "corrected" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_latent_rt_efficiency" + info: null + default: + - "latent_rt_efficiency" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_latent_cell_probability" + info: null + default: + - "latent_cell_probability" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_latent_scale" + info: null + default: + - "latent_scale" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--var_ambient_expression" + info: null + default: + - "ambient_expression" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obsm_latent_gene_encoding" + info: null + default: + - "cellbender_latent_gene_encoding" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Arguments" + arguments: + - type: "integer" + name: "--expected_cells" + description: "Number of cells expected in the dataset (a rough estimate within\ + \ a factor of 2 is sufficient)." + info: null + example: + - 1000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--total_droplets_included" + description: "The number of droplets from the rank-ordered UMI plot\nthat will\ + \ be analyzed. The largest 'total_droplets'\ndroplets will have their cell\ + \ probabilities inferred\nas an output.\n" + info: null + example: + - 25000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean" + name: "--expected_cells_from_qc" + description: "Will use the Cell Ranger QC to determine the estimated number\ + \ of cells" + info: null + default: + - true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--model" + description: "Which model is being used for count data. 'simple'\ndoes not model\ + \ either ambient RNA or random barcode\nswapping (for debugging purposes --\ + \ not recommended).\n'ambient' assumes background RNA is incorporated into\n\ + droplets. 'swapping' assumes background RNA comes from\nrandom barcode swapping.\ + \ 'full' uses a combined\nambient and swapping model.\n" + info: null + default: + - "full" + required: false + choices: + - "simple" + - "ambient" + - "swapping" + - "full" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--epochs" + description: "Number of epochs to train." + info: null + default: + - 150 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--low_count_threshold" + description: "Droplets with UMI counts below this number are completely \nexcluded\ + \ from the analysis. This can help identify the correct \nprior for empty\ + \ droplet counts in the rare case where empty \ncounts are extremely high\ + \ (over 200).\n" + info: null + default: + - 15 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--z_dim" + description: "Dimension of latent variable z.\n" + info: null + default: + - 100 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--z_layers" + description: "Dimension of hidden layers in the encoder for z.\n" + info: null + default: + - 500 + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--training_fraction" + description: "Training detail: the fraction of the data used for training.\n\ + The rest is never seen by the inference algorithm. Speeds up learning.\n" + info: null + default: + - 0.9 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--empty_drop_training_fraction" + description: "Training detail: the fraction of the training data each epoch\ + \ that \nis drawn (randomly sampled) from surely empty droplets.\n" + info: null + default: + - 0.5 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--fpr" + description: "Target false positive rate in (0, 1). A false positive\nis a true\ + \ signal count that is erroneously removed.\nMore background removal is accompanied\ + \ by more signal\nremoval at high values of FPR. You can specify\nmultiple\ + \ values, which will create multiple output\nfiles.\n" + info: null + default: + - 0.01 + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--exclude_antibody_capture" + description: "Including the flag --exclude-antibody-capture will\ncause remove-background\ + \ to operate on gene counts\nonly, ignoring other features.\n" + info: null + direction: "input" + dest: "par" + - type: "double" + name: "--learning_rate" + description: "Training detail: lower learning rate for inference. A\nOneCycle\ + \ learning rate schedule is used, where the\nupper learning rate is ten times\ + \ this value. (For this\nvalue, probably do not exceed 1e-3).\n" + info: null + example: + - 1.0E-4 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--cuda" + description: "Including the flag --cuda will run the inference on a\nGPU.\n" + info: null + direction: "input" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "helper.py" + - type: "file" + path: "src/utils/setup_logger.py" + description: "Eliminating technical artifacts from high-throughput single-cell RNA\ + \ sequencing data.\n\nThis module removes counts due to ambient RNA molecules\ + \ and random barcode swapping from (raw) UMI-based scRNA-seq count matrices. \n\ + At the moment, only the count matrices produced by the CellRanger count pipeline\ + \ is supported. Support for additional tools and protocols \nwill be added in\ + \ the future. A quick start tutorial can be found here.\n\nFleming et al. 2022,\ + \ bioRxiv.\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_raw_feature_bc_matrix.h5" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "nvcr.io/nvidia/pytorch:22.12-py3" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "muon==0.1.5" + - "tables==3.8.0" + - "cellbender==0.2.1" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "muon~=0.1.4" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "gpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/correction/cellbender_remove_background_v0_2/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/correction/cellbender_remove_background_v0_2" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/correction/cellbender_remove_background_v0_2/cellbender_remove_background_v0_2" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/correction/cellbender_remove_background_v0_2/helper.py b/target/nextflow/correction/cellbender_remove_background_v0_2/helper.py new file mode 100644 index 00000000000..479dd56f596 --- /dev/null +++ b/target/nextflow/correction/cellbender_remove_background_v0_2/helper.py @@ -0,0 +1,143 @@ +# This file is copied from https://github.com/broadinstitute/CellBender/issues/128#issuecomment-1175336065 +# to solve an issue with scanpy not being able to read in the 10x h5 files produced by cellbender. +# +# Note: If something doesn't work in this helper function, it may be interesting to +# take a look at the comments by Dries: https://github.com/openpipelines-bio/openpipeline/pull/115 +# I'm not going to apply them for now -- if it ain't broke, don't fix it. +import tables +import numpy as np +import scipy.sparse as sp +import anndata +from typing import Dict + + +def anndata_from_h5(file: str, + analyzed_barcodes_only: bool = True) -> 'anndata.AnnData': + """Load an output h5 file into an AnnData object for downstream work. + + Args: + file: The h5 file + analyzed_barcodes_only: False to load all barcodes, so that the size of + the AnnData object will match the size of the input raw count matrix. + True to load a limited set of barcodes: only those analyzed by the + algorithm. This allows relevant latent variables to be loaded + properly into adata.obs and adata.obsm, rather than adata.uns. + + Returns: + adata: The anndata object, populated with inferred latent variables + and metadata. + + """ + + d = dict_from_h5(file) + X = sp.csc_matrix((d.pop('data'), d.pop('indices'), d.pop('indptr')), + shape=d.pop('shape')).transpose().tocsr() + + # check and see if we have barcode index annotations, and if the file is filtered + barcode_key = [k for k in d.keys() if (('barcode' in k) and ('ind' in k))] + if len(barcode_key) > 0: + max_barcode_ind = d[barcode_key[0]].max() + filtered_file = (max_barcode_ind >= X.shape[0]) + else: + filtered_file = True + + if analyzed_barcodes_only: + if filtered_file: + # filtered file being read, so we don't need to subset + print('Assuming we are loading a "filtered" file that contains only cells.') + pass + elif 'barcode_indices_for_latents' in d.keys(): + X = X[d['barcode_indices_for_latents'], :] + d['barcodes'] = d['barcodes'][d['barcode_indices_for_latents']] + elif 'barcodes_analyzed_inds' in d.keys(): + X = X[d['barcodes_analyzed_inds'], :] + d['barcodes'] = d['barcodes'][d['barcodes_analyzed_inds']] + else: + print('Warning: analyzed_barcodes_only=True, but the key ' + '"barcodes_analyzed_inds" or "barcode_indices_for_latents" ' + 'is missing from the h5 file. ' + 'Will output all barcodes, and proceed as if ' + 'analyzed_barcodes_only=False') + + # Construct the anndata object. + adata = anndata.AnnData(X=X, + obs={'barcode': d.pop('barcodes').astype(str)}, + var={'gene_name': (d.pop('gene_names') if 'gene_names' in d.keys() + else d.pop('name')).astype(str)}, + dtype=X.dtype) + adata.obs.set_index('barcode', inplace=True) + adata.var.set_index('gene_name', inplace=True) + + # For CellRanger v2 legacy format, "gene_ids" was called "genes"... rename this + if 'genes' in d.keys(): + d['id'] = d.pop('genes') + + # For purely aesthetic purposes, rename "id" to "gene_id" + if 'id' in d.keys(): + d['gene_id'] = d.pop('id') + + # If genomes are empty, try to guess them based on gene_id + if 'genome' in d.keys(): + if np.array([s.decode() == '' for s in d['genome']]).all(): + if '_' in d['gene_id'][0].decode(): + print('Genome field blank, so attempting to guess genomes based on gene_id prefixes') + d['genome'] = np.array([s.decode().split('_')[0] for s in d['gene_id']], dtype=str) + + # Add other information to the anndata object in the appropriate slot. + _fill_adata_slots_automatically(adata, d) + + # Add a special additional field to .var if it exists. + if 'features_analyzed_inds' in adata.uns.keys(): + adata.var['cellbender_analyzed'] = [True if (i in adata.uns['features_analyzed_inds']) + else False for i in range(adata.shape[1])] + + if analyzed_barcodes_only: + for col in adata.obs.columns[adata.obs.columns.str.startswith('barcodes_analyzed') + | adata.obs.columns.str.startswith('barcode_indices')]: + try: + del adata.obs[col] + except Exception: + pass + else: + # Add a special additional field to .obs if all barcodes are included. + if 'barcodes_analyzed_inds' in adata.uns.keys(): + adata.obs['cellbender_analyzed'] = [True if (i in adata.uns['barcodes_analyzed_inds']) + else False for i in range(adata.shape[0])] + + return adata + + +def dict_from_h5(file: str) -> Dict[str, np.ndarray]: + """Read in everything from an h5 file and put into a dictionary.""" + d = {} + with tables.open_file(file) as f: + # read in everything + for array in f.walk_nodes("/", "Array"): + d[array.name] = array.read() + return d + + +def _fill_adata_slots_automatically(adata, d): + """Add other information to the adata object in the appropriate slot.""" + + for key, value in d.items(): + try: + if value is None: + continue + value = np.asarray(value) + if len(value.shape) == 0: + adata.uns[key] = value + elif value.shape[0] == adata.shape[0]: + if (len(value.shape) < 2) or (value.shape[1] < 2): + adata.obs[key] = value + else: + adata.obsm[key] = value + elif value.shape[0] == adata.shape[1]: + if value.dtype.name.startswith('bytes'): + adata.var[key] = value.astype(str) + else: + adata.var[key] = value + else: + adata.uns[key] = value + except Exception: + print('Unable to load data into AnnData: ', key, value, type(value)) \ No newline at end of file diff --git a/target/nextflow/correction/cellbender_remove_background_v0_2/main.nf b/target/nextflow/correction/cellbender_remove_background_v0_2/main.nf new file mode 100644 index 00000000000..c0d54b13f45 --- /dev/null +++ b/target/nextflow/correction/cellbender_remove_background_v0_2/main.nf @@ -0,0 +1,2946 @@ +// cellbender_remove_background_v0_2 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "cellbender_remove_background_v0_2", + "namespace" : "correction", + "version" : "0.12.4", + "argument_groups" : [ + { + "name" : "Inputs", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "alternatives" : [ + "-i" + ], + "description" : "Input h5mu file.", + "example" : [ + "input.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--modality", + "description" : "List of modalities to process.", + "default" : [ + "rna" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Outputs", + "arguments" : [ + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "-o" + ], + "description" : "Full count matrix as an h5mu file, with background RNA removed. This file contains all the original droplet barcodes.", + "example" : [ + "output.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_compression", + "example" : [ + "gzip" + ], + "required" : false, + "choices" : [ + "gzip", + "lzf" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--layer_output", + "description" : "Output layer", + "default" : [ + "corrected" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--obs_latent_rt_efficiency", + "default" : [ + "latent_rt_efficiency" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--obs_latent_cell_probability", + "default" : [ + "latent_cell_probability" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--obs_latent_scale", + "default" : [ + "latent_scale" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--var_ambient_expression", + "default" : [ + "ambient_expression" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--obsm_latent_gene_encoding", + "default" : [ + "cellbender_latent_gene_encoding" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Arguments", + "arguments" : [ + { + "type" : "integer", + "name" : "--expected_cells", + "description" : "Number of cells expected in the dataset (a rough estimate within a factor of 2 is sufficient).", + "example" : [ + 1000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--total_droplets_included", + "description" : "The number of droplets from the rank-ordered UMI plot\nthat will be analyzed. The largest 'total_droplets'\ndroplets will have their cell probabilities inferred\nas an output.\n", + "example" : [ + 25000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "boolean", + "name" : "--expected_cells_from_qc", + "description" : "Will use the Cell Ranger QC to determine the estimated number of cells", + "default" : [ + true + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--model", + "description" : "Which model is being used for count data. 'simple'\ndoes not model either ambient RNA or random barcode\nswapping (for debugging purposes -- not recommended).\n'ambient' assumes background RNA is incorporated into\ndroplets. 'swapping' assumes background RNA comes from\nrandom barcode swapping. 'full' uses a combined\nambient and swapping model.\n", + "default" : [ + "full" + ], + "required" : false, + "choices" : [ + "simple", + "ambient", + "swapping", + "full" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--epochs", + "description" : "Number of epochs to train.", + "default" : [ + 150 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--low_count_threshold", + "description" : "Droplets with UMI counts below this number are completely \nexcluded from the analysis. This can help identify the correct \nprior for empty droplet counts in the rare case where empty \ncounts are extremely high (over 200).\n", + "default" : [ + 15 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--z_dim", + "description" : "Dimension of latent variable z.\n", + "default" : [ + 100 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--z_layers", + "description" : "Dimension of hidden layers in the encoder for z.\n", + "default" : [ + 500 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--training_fraction", + "description" : "Training detail: the fraction of the data used for training.\nThe rest is never seen by the inference algorithm. Speeds up learning.\n", + "default" : [ + 0.9 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--empty_drop_training_fraction", + "description" : "Training detail: the fraction of the training data each epoch that \nis drawn (randomly sampled) from surely empty droplets.\n", + "default" : [ + 0.5 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--fpr", + "description" : "Target false positive rate in (0, 1). A false positive\nis a true signal count that is erroneously removed.\nMore background removal is accompanied by more signal\nremoval at high values of FPR. You can specify\nmultiple values, which will create multiple output\nfiles.\n", + "default" : [ + 0.01 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "boolean_true", + "name" : "--exclude_antibody_capture", + "description" : "Including the flag --exclude-antibody-capture will\ncause remove-background to operate on gene counts\nonly, ignoring other features.\n", + "direction" : "input", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--learning_rate", + "description" : "Training detail: lower learning rate for inference. A\nOneCycle learning rate schedule is used, where the\nupper learning rate is ten times this value. (For this\nvalue, probably do not exceed 1e-3).\n", + "example" : [ + 1.0E-4 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "boolean_true", + "name" : "--cuda", + "description" : "Including the flag --cuda will run the inference on a\nGPU.\n", + "direction" : "input", + "dest" : "par" + } + ] + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/correction/cellbender_remove_background_v0_2/" + }, + { + "type" : "file", + "path" : "helper.py", + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/correction/cellbender_remove_background_v0_2/" + }, + { + "type" : "file", + "path" : "src/utils/setup_logger.py", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "description" : "Eliminating technical artifacts from high-throughput single-cell RNA sequencing data.\n\nThis module removes counts due to ambient RNA molecules and random barcode swapping from (raw) UMI-based scRNA-seq count matrices. \nAt the moment, only the count matrices produced by the CellRanger count pipeline is supported. Support for additional tools and protocols \nwill be added in the future. A quick start tutorial can be found here.\n\nFleming et al. 2022, bioRxiv.\n", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/correction/cellbender_remove_background_v0_2/" + }, + { + "type" : "file", + "path" : "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_raw_feature_bc_matrix.h5", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "nvcr.io/nvidia/pytorch:22.12-py3", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "mudata~=0.2.3", + "anndata~=0.9.1", + "muon==0.1.5", + "tables==3.8.0", + "cellbender==0.2.1" + ], + "upgrade" : true + } + ], + "test_setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "muon~=0.1.4" + ], + "upgrade" : true + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "gpu" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/correction/cellbender_remove_background_v0_2/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/correction/cellbender_remove_background_v0_2", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +import mudata as mu +import tempfile +import subprocess +import os +import sys +import numpy as np +from scipy.sparse import csr_matrix + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'layer_output': $( if [ ! -z ${VIASH_PAR_LAYER_OUTPUT+x} ]; then echo "r'${VIASH_PAR_LAYER_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'obs_latent_rt_efficiency': $( if [ ! -z ${VIASH_PAR_OBS_LATENT_RT_EFFICIENCY+x} ]; then echo "r'${VIASH_PAR_OBS_LATENT_RT_EFFICIENCY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'obs_latent_cell_probability': $( if [ ! -z ${VIASH_PAR_OBS_LATENT_CELL_PROBABILITY+x} ]; then echo "r'${VIASH_PAR_OBS_LATENT_CELL_PROBABILITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'obs_latent_scale': $( if [ ! -z ${VIASH_PAR_OBS_LATENT_SCALE+x} ]; then echo "r'${VIASH_PAR_OBS_LATENT_SCALE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'var_ambient_expression': $( if [ ! -z ${VIASH_PAR_VAR_AMBIENT_EXPRESSION+x} ]; then echo "r'${VIASH_PAR_VAR_AMBIENT_EXPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'obsm_latent_gene_encoding': $( if [ ! -z ${VIASH_PAR_OBSM_LATENT_GENE_ENCODING+x} ]; then echo "r'${VIASH_PAR_OBSM_LATENT_GENE_ENCODING//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'expected_cells': $( if [ ! -z ${VIASH_PAR_EXPECTED_CELLS+x} ]; then echo "int(r'${VIASH_PAR_EXPECTED_CELLS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'total_droplets_included': $( if [ ! -z ${VIASH_PAR_TOTAL_DROPLETS_INCLUDED+x} ]; then echo "int(r'${VIASH_PAR_TOTAL_DROPLETS_INCLUDED//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'expected_cells_from_qc': $( if [ ! -z ${VIASH_PAR_EXPECTED_CELLS_FROM_QC+x} ]; then echo "r'${VIASH_PAR_EXPECTED_CELLS_FROM_QC//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), + 'model': $( if [ ! -z ${VIASH_PAR_MODEL+x} ]; then echo "r'${VIASH_PAR_MODEL//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'epochs': $( if [ ! -z ${VIASH_PAR_EPOCHS+x} ]; then echo "int(r'${VIASH_PAR_EPOCHS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'low_count_threshold': $( if [ ! -z ${VIASH_PAR_LOW_COUNT_THRESHOLD+x} ]; then echo "int(r'${VIASH_PAR_LOW_COUNT_THRESHOLD//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'z_dim': $( if [ ! -z ${VIASH_PAR_Z_DIM+x} ]; then echo "int(r'${VIASH_PAR_Z_DIM//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'z_layers': $( if [ ! -z ${VIASH_PAR_Z_LAYERS+x} ]; then echo "list(map(int, r'${VIASH_PAR_Z_LAYERS//\\'/\\'\\"\\'\\"r\\'}'.split(':')))"; else echo None; fi ), + 'training_fraction': $( if [ ! -z ${VIASH_PAR_TRAINING_FRACTION+x} ]; then echo "float(r'${VIASH_PAR_TRAINING_FRACTION//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'empty_drop_training_fraction': $( if [ ! -z ${VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION+x} ]; then echo "float(r'${VIASH_PAR_EMPTY_DROP_TRAINING_FRACTION//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'fpr': $( if [ ! -z ${VIASH_PAR_FPR+x} ]; then echo "list(map(float, r'${VIASH_PAR_FPR//\\'/\\'\\"\\'\\"r\\'}'.split(':')))"; else echo None; fi ), + 'exclude_antibody_capture': $( if [ ! -z ${VIASH_PAR_EXCLUDE_ANTIBODY_CAPTURE+x} ]; then echo "r'${VIASH_PAR_EXCLUDE_ANTIBODY_CAPTURE//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), + 'learning_rate': $( if [ ! -z ${VIASH_PAR_LEARNING_RATE+x} ]; then echo "float(r'${VIASH_PAR_LEARNING_RATE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'cuda': $( if [ ! -z ${VIASH_PAR_CUDA+x} ]; then echo "r'${VIASH_PAR_CUDA//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +## VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +from helper import anndata_from_h5 + +logger.info("Reading input mudata") +mdata = mu.read_h5mu(par["input"]) + +mod = par["modality"] +logger.info("Performing log transformation on modality %s", mod) +data = mdata.mod[mod] + +# with pathlib.Path(meta["temp_dir"]) / "cellbender" as temp_dir: +# os.mkdir(temp_dir) +with tempfile.TemporaryDirectory(prefix="cellbender-", dir=meta["temp_dir"]) as temp_dir: + # construct paths within tempdir + input_file = os.path.join(temp_dir, "input.h5ad") + output_file = os.path.join(temp_dir, "output.h5") + + logger.info("Creating AnnData input file for CellBender: '%s'", input_file) + data.write_h5ad(input_file) + + logger.info("Constructing CellBender command") + cmd_pars = [ + "cellbender", "remove-background", + "--input", input_file, + "--output", output_file + ] + + extra_args = [ + ("--expected-cells", "expected_cells", True), + ("--total-droplets-included", "total_droplets_included", True), + ("--model", "model", True), + ("--epochs", "epochs", True), + ("--cuda", "cuda", False), + ("--low-count-threshold", "low_count_threshold", True), + ("--z-dim", "z_dim", True), + ("--z-layers", "z_layers", True), + ("--training-fraction", "training_fraction", True), + ("--exclude-antibody-capture", "exclude_antibody_capture", False), + ("--learning-rate", "learning_rate", True), + ("--empty-drop-training-fraction", "empty_drop_training_fraction", True), + ] + for (flag, name, is_kwarg) in extra_args: + if par[name]: + values = par[name] if isinstance(par[name], list) else [par[name]] + cmd_pars += [flag] + [str(val) for val in values] if is_kwarg else [flag] + + if par["expected_cells_from_qc"] and "metrics_cellranger" in data.uns: + assert par["expected_cells"] is None, "If min_counts is defined, expected_cells should be undefined" + assert par["total_droplets_included"] is None, "If min_counts is defined, expected_cells should be undefined" + met = data.uns["metrics_cellranger"] + col_name = "Estimated Number of Cells" + assert col_name in met.columns, "%s should be a column in .obs[metrics_cellranger]" + est_cells = met[col_name].values[0] + logger.info("Selecting --expected-cells %d and --total-droplets-included %d", est_cells, est_cells * 5) + cmd_pars += ["--expected-cells", str(est_cells), "--total-droplets-included", str(5*est_cells)] + + logger.info("Running CellBender: '%s'", ' '.join(cmd_pars)) + out = subprocess.check_output(cmd_pars).decode("utf-8") + + logger.info("Reading CellBender 10xh5 output file: '%s'", output_file) + # have to use custom read_10x_h5 function for now + # will be fixed when https://github.com/scverse/scanpy/pull/2344 is merged + # adata_out = sc.read_10x_h5(output_file, gex_only=False) + adata_out = anndata_from_h5(output_file, analyzed_barcodes_only=False) + + logger.info("Copying X output to MuData") + data.layers[par["layer_output"]] = adata_out.X + + logger.info("Copying .obs output to MuData") + obs_store = { + "obs_latent_rt_efficiency": "latent_RT_efficiency", + "obs_latent_cell_probability": "latent_cell_probability", + "obs_latent_scale": "latent_scale" + } + for to_name, from_name in obs_store.items(): + if par[to_name]: + if from_name in adata_out.obs: + data.obs[par[to_name]] = adata_out.obs[from_name] + # when using unfiltered data, the values will be in uns instead of obs + elif from_name in adata_out.uns and 'barcode_indices_for_latents' in adata_out.uns: + vec = np.zeros(data.n_obs) + vec[adata_out.uns['barcode_indices_for_latents']] = adata_out.uns[from_name] + data.obs[par[to_name]] = vec + + logger.info("Copying .var output to MuData") + var_store = { "var_ambient_expression": "ambient_expression" } + for to_name, from_name in var_store.items(): + if par[to_name]: + data.var[par[to_name]] = adata_out.var[from_name] + + logger.info("Copying obsm_latent_gene_encoding output to MuData") + obsm_store = { "obsm_latent_gene_encoding": "latent_gene_encoding" } + for to_name, from_name in obsm_store.items(): + if par[to_name]: + if from_name in adata_out.obsm: + data.obsm[par[to_name]] = adata_out.obsm[from_name] + elif from_name in adata_out.uns and 'barcode_indices_for_latents' in adata_out.uns: + matrix_to_store = adata_out.uns[from_name] + number_of_obs = data.X.shape[0] + latent_space_sparse = csr_matrix((number_of_obs, par['z_dim']), + dtype=adata_out.uns[from_name].dtype) + obs_rows_in_space_representation = adata_out.uns['barcode_indices_for_latents'] + latent_space_sparse[obs_rows_in_space_representation] = adata_out.uns[from_name] + data.obsm[par[to_name]] = latent_space_sparse + else: + raise RuntimeError("Requested to save latent gene encoding, but the data is either missing " + "from cellbender output or in an incorrect format.") + + +logger.info("Writing to file %s", par["output"]) +mdata.write_h5mu(filename=par["output"], compression=par["output_compression"]) +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/correction_cellbender_remove_background_v0_2", + "tag" : "0.12.0" + }, + "label" : [ + "gpu" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/correction/cellbender_remove_background_v0_2/nextflow.config b/target/nextflow/correction/cellbender_remove_background_v0_2/nextflow.config new file mode 100644 index 00000000000..360afb03868 --- /dev/null +++ b/target/nextflow/correction/cellbender_remove_background_v0_2/nextflow.config @@ -0,0 +1,107 @@ +manifest { + name = 'cellbender_remove_background_v0_2' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Eliminating technical artifacts from high-throughput single-cell RNA sequencing data.\n\nThis module removes counts due to ambient RNA molecules and random barcode swapping from (raw) UMI-based scRNA-seq count matrices. \nAt the moment, only the count matrices produced by the CellRanger count pipeline is supported. Support for additional tools and protocols \nwill be added in the future. A quick start tutorial can be found here.\n\nFleming et al. 2022, bioRxiv.\n' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/correction/cellbender_remove_background_v0_2/nextflow_params.yaml b/target/nextflow/correction/cellbender_remove_background_v0_2/nextflow_params.yaml new file mode 100644 index 00000000000..aebea733c6c --- /dev/null +++ b/target/nextflow/correction/cellbender_remove_background_v0_2/nextflow_params.yaml @@ -0,0 +1,33 @@ +# Inputs +input: # please fill in - example: "input.h5mu" +modality: "rna" + +# Outputs +# output: "$id.$key.output.h5mu" +# output_compression: "gzip" +layer_output: "corrected" +obs_latent_rt_efficiency: "latent_rt_efficiency" +obs_latent_cell_probability: "latent_cell_probability" +obs_latent_scale: "latent_scale" +var_ambient_expression: "ambient_expression" +obsm_latent_gene_encoding: "cellbender_latent_gene_encoding" + +# Arguments +# expected_cells: 1000 +# total_droplets_included: 25000 +expected_cells_from_qc: true +model: "full" +epochs: 150 +low_count_threshold: 15 +z_dim: 100 +z_layers: [500] +training_fraction: 0.9 +empty_drop_training_fraction: 0.5 +fpr: [0.01] +exclude_antibody_capture: false +# learning_rate: 1.0E-4 +cuda: false + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/correction/cellbender_remove_background_v0_2/nextflow_schema.json b/target/nextflow/correction/cellbender_remove_background_v0_2/nextflow_schema.json new file mode 100644 index 00000000000..0a7b6baa0eb --- /dev/null +++ b/target/nextflow/correction/cellbender_remove_background_v0_2/nextflow_schema.json @@ -0,0 +1,351 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "cellbender_remove_background_v0_2", +"description": "Eliminating technical artifacts from high-throughput single-cell RNA sequencing data.\n\nThis module removes counts due to ambient RNA molecules and random barcode swapping from (raw) UMI-based scRNA-seq count matrices. \nAt the moment, only the count matrices produced by the CellRanger count pipeline is supported. Support for additional tools and protocols \nwill be added in the future. A quick start tutorial can be found here.\n\nFleming et al. 2022, bioRxiv.\n", +"type": "object", +"definitions": { + + + + "inputs" : { + "title": "Inputs", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required, example: `input.h5mu`. Input h5mu file", + "help_text": "Type: `file`, required, example: `input.h5mu`. Input h5mu file." + + } + + + , + "modality": { + "type": + "string", + "description": "Type: `string`, default: `rna`. List of modalities to process", + "help_text": "Type: `string`, default: `rna`. List of modalities to process." + , + "default": "rna" + } + + +} +}, + + + "outputs" : { + "title": "Outputs", + "type": "object", + "description": "No description", + "properties": { + + + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Full count matrix as an h5mu file, with background RNA removed", + "help_text": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Full count matrix as an h5mu file, with background RNA removed. This file contains all the original droplet barcodes." + , + "default": "$id.$key.output.h5mu" + } + + + , + "output_compression": { + "type": + "string", + "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. ", + "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. ", + "enum": ["gzip", "lzf"] + + + } + + + , + "layer_output": { + "type": + "string", + "description": "Type: `string`, default: `corrected`. Output layer", + "help_text": "Type: `string`, default: `corrected`. Output layer" + , + "default": "corrected" + } + + + , + "obs_latent_rt_efficiency": { + "type": + "string", + "description": "Type: `string`, default: `latent_rt_efficiency`. ", + "help_text": "Type: `string`, default: `latent_rt_efficiency`. " + , + "default": "latent_rt_efficiency" + } + + + , + "obs_latent_cell_probability": { + "type": + "string", + "description": "Type: `string`, default: `latent_cell_probability`. ", + "help_text": "Type: `string`, default: `latent_cell_probability`. " + , + "default": "latent_cell_probability" + } + + + , + "obs_latent_scale": { + "type": + "string", + "description": "Type: `string`, default: `latent_scale`. ", + "help_text": "Type: `string`, default: `latent_scale`. " + , + "default": "latent_scale" + } + + + , + "var_ambient_expression": { + "type": + "string", + "description": "Type: `string`, default: `ambient_expression`. ", + "help_text": "Type: `string`, default: `ambient_expression`. " + , + "default": "ambient_expression" + } + + + , + "obsm_latent_gene_encoding": { + "type": + "string", + "description": "Type: `string`, default: `cellbender_latent_gene_encoding`. ", + "help_text": "Type: `string`, default: `cellbender_latent_gene_encoding`. " + , + "default": "cellbender_latent_gene_encoding" + } + + +} +}, + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "expected_cells": { + "type": + "integer", + "description": "Type: `integer`, example: `1000`. Number of cells expected in the dataset (a rough estimate within a factor of 2 is sufficient)", + "help_text": "Type: `integer`, example: `1000`. Number of cells expected in the dataset (a rough estimate within a factor of 2 is sufficient)." + + } + + + , + "total_droplets_included": { + "type": + "integer", + "description": "Type: `integer`, example: `25000`. The number of droplets from the rank-ordered UMI plot\nthat will be analyzed", + "help_text": "Type: `integer`, example: `25000`. The number of droplets from the rank-ordered UMI plot\nthat will be analyzed. The largest \u0027total_droplets\u0027\ndroplets will have their cell probabilities inferred\nas an output.\n" + + } + + + , + "expected_cells_from_qc": { + "type": + "boolean", + "description": "Type: `boolean`, default: `true`. Will use the Cell Ranger QC to determine the estimated number of cells", + "help_text": "Type: `boolean`, default: `true`. Will use the Cell Ranger QC to determine the estimated number of cells" + , + "default": "True" + } + + + , + "model": { + "type": + "string", + "description": "Type: `string`, default: `full`, choices: ``simple`, `ambient`, `swapping`, `full``. Which model is being used for count data", + "help_text": "Type: `string`, default: `full`, choices: ``simple`, `ambient`, `swapping`, `full``. Which model is being used for count data. \u0027simple\u0027\ndoes not model either ambient RNA or random barcode\nswapping (for debugging purposes -- not recommended).\n\u0027ambient\u0027 assumes background RNA is incorporated into\ndroplets. \u0027swapping\u0027 assumes background RNA comes from\nrandom barcode swapping. \u0027full\u0027 uses a combined\nambient and swapping model.\n", + "enum": ["simple", "ambient", "swapping", "full"] + + , + "default": "full" + } + + + , + "epochs": { + "type": + "integer", + "description": "Type: `integer`, default: `150`. Number of epochs to train", + "help_text": "Type: `integer`, default: `150`. Number of epochs to train." + , + "default": "150" + } + + + , + "low_count_threshold": { + "type": + "integer", + "description": "Type: `integer`, default: `15`. Droplets with UMI counts below this number are completely \nexcluded from the analysis", + "help_text": "Type: `integer`, default: `15`. Droplets with UMI counts below this number are completely \nexcluded from the analysis. This can help identify the correct \nprior for empty droplet counts in the rare case where empty \ncounts are extremely high (over 200).\n" + , + "default": "15" + } + + + , + "z_dim": { + "type": + "integer", + "description": "Type: `integer`, default: `100`. Dimension of latent variable z", + "help_text": "Type: `integer`, default: `100`. Dimension of latent variable z.\n" + , + "default": "100" + } + + + , + "z_layers": { + "type": + "string", + "description": "Type: List of `integer`, default: `500`, multiple_sep: `\":\"`. Dimension of hidden layers in the encoder for z", + "help_text": "Type: List of `integer`, default: `500`, multiple_sep: `\":\"`. Dimension of hidden layers in the encoder for z.\n" + , + "default": "500" + } + + + , + "training_fraction": { + "type": + "number", + "description": "Type: `double`, default: `0.9`. Training detail: the fraction of the data used for training", + "help_text": "Type: `double`, default: `0.9`. Training detail: the fraction of the data used for training.\nThe rest is never seen by the inference algorithm. Speeds up learning.\n" + , + "default": "0.9" + } + + + , + "empty_drop_training_fraction": { + "type": + "number", + "description": "Type: `double`, default: `0.5`. Training detail: the fraction of the training data each epoch that \nis drawn (randomly sampled) from surely empty droplets", + "help_text": "Type: `double`, default: `0.5`. Training detail: the fraction of the training data each epoch that \nis drawn (randomly sampled) from surely empty droplets.\n" + , + "default": "0.5" + } + + + , + "fpr": { + "type": + "string", + "description": "Type: List of `double`, default: `0.01`, multiple_sep: `\":\"`. Target false positive rate in (0, 1)", + "help_text": "Type: List of `double`, default: `0.01`, multiple_sep: `\":\"`. Target false positive rate in (0, 1). A false positive\nis a true signal count that is erroneously removed.\nMore background removal is accompanied by more signal\nremoval at high values of FPR. You can specify\nmultiple values, which will create multiple output\nfiles.\n" + , + "default": "0.01" + } + + + , + "exclude_antibody_capture": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Including the flag --exclude-antibody-capture will\ncause remove-background to operate on gene counts\nonly, ignoring other features", + "help_text": "Type: `boolean_true`, default: `false`. Including the flag --exclude-antibody-capture will\ncause remove-background to operate on gene counts\nonly, ignoring other features.\n" + , + "default": "False" + } + + + , + "learning_rate": { + "type": + "number", + "description": "Type: `double`, example: `1.0E-4`. Training detail: lower learning rate for inference", + "help_text": "Type: `double`, example: `1.0E-4`. Training detail: lower learning rate for inference. A\nOneCycle learning rate schedule is used, where the\nupper learning rate is ten times this value. (For this\nvalue, probably do not exceed 1e-3).\n" + + } + + + , + "cuda": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Including the flag --cuda will run the inference on a\nGPU", + "help_text": "Type: `boolean_true`, default: `false`. Including the flag --cuda will run the inference on a\nGPU.\n" + , + "default": "False" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/inputs" + }, + + { + "$ref": "#/definitions/outputs" + }, + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/correction/cellbender_remove_background_v0_2/setup_logger.py b/target/nextflow/correction/cellbender_remove_background_v0_2/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/nextflow/correction/cellbender_remove_background_v0_2/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/nextflow/dataflow/concat/.config.vsh.yaml b/target/nextflow/dataflow/concat/.config.vsh.yaml new file mode 100644 index 00000000000..e2c35f34947 --- /dev/null +++ b/target/nextflow/dataflow/concat/.config.vsh.yaml @@ -0,0 +1,222 @@ +functionality: + name: "concat" + namespace: "dataflow" + version: "0.12.4" + authors: + - name: "Dries Schaumont" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Paths to the different samples to be concatenated." + info: null + example: + - "sample_paths" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: true + multiple_sep: "," + dest: "par" + - type: "string" + name: "--input_id" + description: "Names of the different samples that have to be concatenated. Must\ + \ be specified when using '--mode move'.\nIn this case, the ids will be used\ + \ for the columns names of the dataframes registring the conflicts.\nIf specified,\ + \ must be of same length as `--input`.\n" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: "," + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_sample_name" + description: "Name of the .obs key under which to add the sample names." + info: null + default: + - "sample_id" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--other_axis_mode" + description: "How to handle the merging of other axis (var, obs, ...).\n\n - None:\ + \ keep no data\n - same: only keep elements of the matrices which are the same\ + \ in each of the samples\n - unique: only keep elements for which there is only\ + \ 1 possible value (1 value that can occur in multiple samples)\n - first: keep\ + \ the annotation from the first sample\n - only: keep elements that show up\ + \ in only one of the objects (1 unique element in only 1 sample)\n - move: identical\ + \ to 'same', but moving the conflicting values to .varm or .obsm\n" + info: null + default: + - "move" + required: false + choices: + - "same" + - "unique" + - "first" + - "only" + - "concat" + - "move" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Concatenates several uni-modal samples in .h5mu files into a single\ + \ file.\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/concat_test_data/e18_mouse_brain_fresh_5k_filtered_feature_bc_matrix_subset_unique_obs.h5mu" + - type: "file" + path: "resources_test/concat_test_data/human_brain_3k_filtered_feature_bc_matrix_subset_unique_obs.h5mu" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "pandas~=2.1.1" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + - "muon" + upgrade: true + entrypoint: [] + cmd: null +- type: "native" + id: "native" +- type: "nextflow" + id: "nextflow" + directives: + label: + - "midcpu" + - "highmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/dataflow/concat/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/dataflow/concat" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/dataflow/concat/concat" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/dataflow/concat/main.nf b/target/nextflow/dataflow/concat/main.nf new file mode 100644 index 00000000000..4ca228e8537 --- /dev/null +++ b/target/nextflow/dataflow/concat/main.nf @@ -0,0 +1,2911 @@ +// concat 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Dries Schaumont (maintainer) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "concat", + "namespace" : "dataflow", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Dries Schaumont", + "roles" : [ + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "dries@data-intuitive.com", + "github" : "DriesSchaumont", + "orcid" : "0000-0002-4389-0440", + "linkedin" : "dries-schaumont" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Scientist" + } + ] + } + } + ], + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "alternatives" : [ + "-i" + ], + "description" : "Paths to the different samples to be concatenated.", + "example" : [ + "sample_paths" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ",", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--input_id", + "description" : "Names of the different samples that have to be concatenated. Must be specified when using '--mode move'.\nIn this case, the ids will be used for the columns names of the dataframes registring the conflicts.\nIf specified, must be of same length as `--input`.\n", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ",", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "-o" + ], + "example" : [ + "output.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_compression", + "description" : "The compression format to be used on the output h5mu object.", + "example" : [ + "gzip" + ], + "required" : false, + "choices" : [ + "gzip", + "lzf" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--obs_sample_name", + "description" : "Name of the .obs key under which to add the sample names.", + "default" : [ + "sample_id" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--other_axis_mode", + "description" : "How to handle the merging of other axis (var, obs, ...).\n\n - None: keep no data\n - same: only keep elements of the matrices which are the same in each of the samples\n - unique: only keep elements for which there is only 1 possible value (1 value that can occur in multiple samples)\n - first: keep the annotation from the first sample\n - only: keep elements that show up in only one of the objects (1 unique element in only 1 sample)\n - move: identical to 'same', but moving the conflicting values to .varm or .obsm\n", + "default" : [ + "move" + ], + "required" : false, + "choices" : [ + "same", + "unique", + "first", + "only", + "concat", + "move" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/dataflow/concat/" + }, + { + "type" : "file", + "path" : "src/utils/setup_logger.py", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "description" : "Concatenates several uni-modal samples in .h5mu files into a single file.\n", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/dataflow/concat/" + }, + { + "type" : "file", + "path" : "resources_test/concat_test_data/e18_mouse_brain_fresh_5k_filtered_feature_bc_matrix_subset_unique_obs.h5mu", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + }, + { + "type" : "file", + "path" : "resources_test/concat_test_data/human_brain_3k_filtered_feature_bc_matrix_subset_unique_obs.h5mu", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.10-slim", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "procps" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "mudata~=0.2.3", + "anndata~=0.9.1", + "pandas~=2.1.1" + ], + "upgrade" : true + } + ], + "test_setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "viashpy==0.5.0", + "muon" + ], + "upgrade" : true + } + ] + }, + { + "type" : "native", + "id" : "native" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "midcpu", + "highmem" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/dataflow/concat/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/dataflow/concat", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +from __future__ import annotations +import sys +import anndata +import mudata as mu +import pandas as pd +import numpy as np +from collections.abc import Iterable +from multiprocessing import Pool +from pathlib import Path +from h5py import File as H5File +from typing import Literal +import shutil + +### VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'.split(',')"; else echo None; fi ), + 'input_id': $( if [ ! -z ${VIASH_PAR_INPUT_ID+x} ]; then echo "r'${VIASH_PAR_INPUT_ID//\\'/\\'\\"\\'\\"r\\'}'.split(',')"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'obs_sample_name': $( if [ ! -z ${VIASH_PAR_OBS_SAMPLE_NAME+x} ]; then echo "r'${VIASH_PAR_OBS_SAMPLE_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'other_axis_mode': $( if [ ! -z ${VIASH_PAR_OTHER_AXIS_MODE+x} ]; then echo "r'${VIASH_PAR_OTHER_AXIS_MODE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +### VIASH END + +sys.path.append(meta["resources_dir"]) + +# START TEMPORARY WORKAROUND compress_h5mu +# reason: resources aren't available when using Nextflow fusion + +# from compress_h5mu import compress_h5mu +from h5py import Group, Dataset +from typing import Union +from functools import partial + +def compress_h5mu(input_path: Union[str, Path], + output_path: Union[str, Path], + compression: Union[Literal['gzip'], Literal['lzf']]): + input_path, output_path = str(input_path), str(output_path) + + def copy_attributes(in_object, out_object): + for key, value in in_object.attrs.items(): + out_object.attrs[key] = value + + def visit_path(output_h5: H5File, + compression: Union[Literal['gzip'], Literal['lzf']], + name: str, object: Union[Group, Dataset]): + if isinstance(object, Group): + new_group = output_h5.create_group(name) + copy_attributes(object, new_group) + elif isinstance(object, Dataset): + # Compression only works for non-scalar Dataset objects + # Scalar objects dont have a shape defined + if not object.compression and object.shape not in [None, ()]: + new_dataset = output_h5.create_dataset(name, data=object, compression=compression) + copy_attributes(object, new_dataset) + else: + output_h5.copy(object, name) + else: + raise NotImplementedError(f"Could not copy element {name}, " + f"type has not been implemented yet: {type(object)}") + + with H5File(input_path, 'r') as input_h5, H5File(output_path, 'w', userblock_size=512) as output_h5: + copy_attributes(input_h5, output_h5) + input_h5.visititems(partial(visit_path, output_h5, compression)) + + with open(input_path, "rb") as input_bytes: + # Mudata puts metadata like this in the first 512 bytes: + # MuData (format-version=0.1.0;creator=muon;creator-version=0.2.0) + # See mudata/_core/io.py, read_h5mu() function + starting_metadata = input_bytes.read(100) + # The metadata is padded with extra null bytes up until 512 bytes + truncate_location = starting_metadata.find(b"\\\\x00") + starting_metadata = starting_metadata[:truncate_location] + with open(output_path, "br+") as f: + nbytes = f.write(starting_metadata) + f.write(b"\\\\0" * (512 - nbytes)) +# END TEMPORARY WORKAROUND compress_h5mu + +# START TEMPORARY WORKAROUND setup_logger +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +def indexes_unique(indices: Iterable[pd.Index]) -> bool: + combined_indices = indices[0].append(indices[1:]) + return combined_indices.is_unique + +def check_observations_unique(samples: Iterable[anndata.AnnData]) -> None: + observation_ids = [sample.obs.index for sample in samples] + if not indexes_unique(observation_ids): + raise ValueError("Observations are not unique across samples.") + + +def nunique(row): + unique = pd.unique(row) + unique_without_na = pd.core.dtypes.missing.remove_na_arraylike(unique) + return len(unique_without_na) > 1 + +def any_row_contains_duplicate_values(n_processes: int, frame: pd.DataFrame) -> bool: + """ + Check if any row contains duplicate values, that are not NA. + """ + numpy_array = frame.to_numpy() + with Pool(n_processes) as pool: + is_duplicated = pool.map(nunique, iter(numpy_array)) + return any(is_duplicated) + +def concatenate_matrices(n_processes: int, matrices: dict[str, pd.DataFrame], align_to: pd.Index | None) \\\\ + -> tuple[dict[str, pd.DataFrame], pd.DataFrame | None, dict[str, pd.core.dtypes.dtypes.Dtype]]: + """ + Merge matrices by combining columns that have the same name. + Columns that contain conflicting values (e.i. the columns have different values), + are not merged, but instead moved to a new dataframe. + """ + column_names = set(column_name for var in matrices.values() for column_name in var) + logger.debug('Trying to concatenate columns: %s.', ",".join(column_names)) + if not column_names: + return {}, pd.DataFrame(index=align_to) + conflicts, concatenated_matrix = \\\\ + split_conflicts_and_concatenated_columns(n_processes, + matrices, + column_names, + align_to) + concatenated_matrix = cast_to_writeable_dtype(concatenated_matrix) + conflicts = {conflict_name: cast_to_writeable_dtype(conflict_df) + for conflict_name, conflict_df in conflicts.items()} + return conflicts, concatenated_matrix + +def get_first_non_na_value_vector(df): + numpy_arr = df.to_numpy() + n_rows, n_cols = numpy_arr.shape + col_index = pd.isna(numpy_arr).argmin(axis=1) + flat_index = n_cols * np.arange(n_rows) + col_index + return pd.Series(numpy_arr.ravel()[flat_index], index=df.index, name=df.columns[0]) + +def split_conflicts_and_concatenated_columns(n_processes: int, + matrices: dict[str, pd.DataFrame], + column_names: Iterable[str], + align_to: pd.Index | None = None) -> \\\\ + tuple[dict[str, pd.DataFrame], pd.DataFrame]: + """ + Retrieve columns with the same name from a list of dataframes which are + identical across all the frames (ignoring NA values). + Columns which are not the same are regarded as 'conflicts', + which are stored in seperate dataframes, one per columns + with the same name that store conflicting values. + """ + conflicts = {} + concatenated_matrix = [] + for column_name in column_names: + columns = {input_id: var[column_name] + for input_id, var in matrices.items() + if column_name in var} + assert columns, "Some columns should have been found." + concatenated_columns = pd.concat(columns.values(), axis=1, + join="outer", sort=False) + if any_row_contains_duplicate_values(n_processes, concatenated_columns): + concatenated_columns.columns = columns.keys() # Use the sample id as column name + if align_to is not None: + concatenated_columns = concatenated_columns.reindex(align_to, copy=False) + conflicts[f'conflict_{column_name}'] = concatenated_columns + else: + unique_values = get_first_non_na_value_vector(concatenated_columns) + concatenated_matrix.append(unique_values) + if not concatenated_matrix: + return conflicts, pd.DataFrame(index=align_to) + concatenated_matrix = pd.concat(concatenated_matrix, join="outer", + axis=1, sort=False) + if align_to is not None: + concatenated_matrix = concatenated_matrix.reindex(align_to, copy=False) + return conflicts, concatenated_matrix + +def cast_to_writeable_dtype(result: pd.DataFrame) -> pd.DataFrame: + """ + Cast the dataframe to dtypes that can be written by mudata. + """ + # dtype inferral workfs better with np.nan + result = result.replace({pd.NA: np.nan}) + + # MuData supports nullable booleans and ints + # ie. \\`IntegerArray\\` and \\`BooleanArray\\` + result = result.convert_dtypes(infer_objects=True, + convert_integer=True, + convert_string=False, + convert_boolean=True, + convert_floating=False) + + # Convert leftover 'object' columns to string + # However, na values are supported, so convert all values except NA's to string + object_cols = result.select_dtypes(include='object').columns.values + for obj_col in object_cols: + result[obj_col] = result[obj_col].where(result[obj_col].isna(), result[obj_col].astype(str)).astype('category') + return result + +def split_conflicts_modalities(n_processes: int, samples: dict[str, anndata.AnnData], output: anndata.AnnData) \\\\ + -> anndata.AnnData: + """ + Merge .var and .obs matrices of the anndata objects. Columns are merged + when the values (excl NA) are the same in each of the matrices. + Conflicting columns are moved to a separate dataframe (one dataframe for each column, + containing all the corresponding column from each sample). + """ + matrices_to_parse = ("var", "obs") + for matrix_name in matrices_to_parse: + matrices = {sample_id: getattr(sample, matrix_name) for sample_id, sample in samples.items()} + output_index = getattr(output, matrix_name).index + align_to = output_index if matrix_name == "var" else None + conflicts, concatenated_matrix = concatenate_matrices(n_processes, matrices, align_to) + if concatenated_matrix.empty: + concatenated_matrix.index = output_index + # Write the conflicts to the output + for conflict_name, conflict_data in conflicts.items(): + getattr(output, f"{matrix_name}m")[conflict_name] = conflict_data + + # Set other annotation matrices in the output + setattr(output, matrix_name, concatenated_matrix) + + return output + + +def concatenate_modality(n_processes: int, mod: str, input_files: Iterable[str | Path], + other_axis_mode: str, input_ids: tuple[str]) -> anndata.AnnData: + + concat_modes = { + "move": None, + } + other_axis_mode_to_apply = concat_modes.get(other_axis_mode, other_axis_mode) + + mod_data = {} + for input_id, input_file in zip(input_ids, input_files): + try: + mod_data[input_id] = mu.read_h5ad(input_file, mod=mod) + except KeyError as e: # Modality does not exist for this sample, skip it + if f"Unable to open object '{mod}' doesn't exist" not in str(e): + raise e + pass + check_observations_unique(mod_data.values()) + + concatenated_data = anndata.concat(mod_data.values(), join='outer', merge=other_axis_mode_to_apply) + + if other_axis_mode == "move": + concatenated_data = split_conflicts_modalities(n_processes, mod_data, concatenated_data) + + return concatenated_data + +def concatenate_modalities(n_processes: int, modalities: list[str], input_files: Path | str, + other_axis_mode: str, output_file: Path | str, + compression: Literal['gzip'] | Literal['lzf'], + input_ids: tuple[str] | None = None) -> None: + """ + Join the modalities together into a single multimodal sample. + """ + logger.info('Concatenating samples.') + output_file, input_files = Path(output_file), [Path(input_file) for input_file in input_files] + output_file_uncompressed = output_file.with_name(output_file.stem + "_uncompressed.h5mu") + output_file_uncompressed.touch() + # Create empty mudata file + mdata = mu.MuData({modality: anndata.AnnData() for modality in modalities}) + mdata.write(output_file_uncompressed, compression=compression) + + for mod_name in modalities: + new_mod = concatenate_modality(n_processes, mod_name, + input_files, other_axis_mode, + input_ids) + logger.info("Writing out modality '%s' to '%s' with compression '%s'.", + mod_name, output_file_uncompressed, compression) + mu.write_h5ad(output_file_uncompressed, data=new_mod, mod=mod_name) + + if compression: + compress_h5mu(output_file_uncompressed, output_file, compression=compression) + output_file_uncompressed.unlink() + else: + shutil.move(output_file_uncompressed, output_file) + + logger.info("Concatenation successful.") + +def main() -> None: + # Get a list of all possible modalities + mods = set() + for path in par["input"]: + try: + with H5File(path, 'r') as f_root: + mods = mods | set(f_root["mod"].keys()) + except OSError: + raise OSError(f"Failed to load {path}. Is it a valid h5 file?") + + input_ids = None + if par["input_id"]: + input_ids: tuple[str] = tuple(i.strip() for i in par["input_id"]) + if len(input_ids) != len(par["input"]): + raise ValueError("The number of sample names must match the number of sample files.") + + if len(set(input_ids)) != len(input_ids): + raise ValueError("The sample names should be unique.") + + logger.info("\\\\nConcatenating data from paths:\\\\n\\\\t%s", + "\\\\n\\\\t".join(par["input"])) + + if par["other_axis_mode"] == "move" and not input_ids: + raise ValueError("--mode 'move' requires --input_ids.") + + n_processes = meta["cpus"] if meta["cpus"] else 1 + concatenate_modalities(n_processes, + list(mods), + par["input"], + par["other_axis_mode"], + par["output"], + par["output_compression"], + input_ids=input_ids) + + +if __name__ == "__main__": + main() +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/dataflow_concat", + "tag" : "0.12.0" + }, + "label" : [ + "midcpu", + "highmem" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/dataflow/concat/nextflow.config b/target/nextflow/dataflow/concat/nextflow.config new file mode 100644 index 00000000000..798f70e2b32 --- /dev/null +++ b/target/nextflow/dataflow/concat/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'concat' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Concatenates several uni-modal samples in .h5mu files into a single file.\n' + author = 'Dries Schaumont' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/dataflow/concat/nextflow_params.yaml b/target/nextflow/dataflow/concat/nextflow_params.yaml new file mode 100644 index 00000000000..f59626839bb --- /dev/null +++ b/target/nextflow/dataflow/concat/nextflow_params.yaml @@ -0,0 +1,11 @@ +# Arguments +input: # please fill in - example: ["sample_paths"] +# input_id: ["foo"] +# output: "$id.$key.output.h5mu" +# output_compression: "gzip" +obs_sample_name: "sample_id" +other_axis_mode: "move" + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/dataflow/concat/nextflow_schema.json b/target/nextflow/dataflow/concat/nextflow_schema.json new file mode 100644 index 00000000000..101e567e1ce --- /dev/null +++ b/target/nextflow/dataflow/concat/nextflow_schema.json @@ -0,0 +1,127 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "concat", +"description": "Concatenates several uni-modal samples in .h5mu files into a single file.\n", +"type": "object", +"definitions": { + + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: List of `file`, required, example: `sample_paths`, multiple_sep: `\",\"`. Paths to the different samples to be concatenated", + "help_text": "Type: List of `file`, required, example: `sample_paths`, multiple_sep: `\",\"`. Paths to the different samples to be concatenated." + + } + + + , + "input_id": { + "type": + "string", + "description": "Type: List of `string`, multiple_sep: `\",\"`. Names of the different samples that have to be concatenated", + "help_text": "Type: List of `string`, multiple_sep: `\",\"`. Names of the different samples that have to be concatenated. Must be specified when using \u0027--mode move\u0027.\nIn this case, the ids will be used for the columns names of the dataframes registring the conflicts.\nIf specified, must be of same length as `--input`.\n" + + } + + + , + "output": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. ", + "help_text": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. " + , + "default": "$id.$key.output.h5mu" + } + + + , + "output_compression": { + "type": + "string", + "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", + "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", + "enum": ["gzip", "lzf"] + + + } + + + , + "obs_sample_name": { + "type": + "string", + "description": "Type: `string`, default: `sample_id`. Name of the ", + "help_text": "Type: `string`, default: `sample_id`. Name of the .obs key under which to add the sample names." + , + "default": "sample_id" + } + + + , + "other_axis_mode": { + "type": + "string", + "description": "Type: `string`, default: `move`, choices: ``same`, `unique`, `first`, `only`, `concat`, `move``. How to handle the merging of other axis (var, obs, ", + "help_text": "Type: `string`, default: `move`, choices: ``same`, `unique`, `first`, `only`, `concat`, `move``. How to handle the merging of other axis (var, obs, ...).\n\n - None: keep no data\n - same: only keep elements of the matrices which are the same in each of the samples\n - unique: only keep elements for which there is only 1 possible value (1 value that can occur in multiple samples)\n - first: keep the annotation from the first sample\n - only: keep elements that show up in only one of the objects (1 unique element in only 1 sample)\n - move: identical to \u0027same\u0027, but moving the conflicting values to .varm or .obsm\n", + "enum": ["same", "unique", "first", "only", "concat", "move"] + + , + "default": "move" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/dataflow/concat/setup_logger.py b/target/nextflow/dataflow/concat/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/nextflow/dataflow/concat/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/nextflow/dataflow/merge/.config.vsh.yaml b/target/nextflow/dataflow/merge/.config.vsh.yaml new file mode 100644 index 00000000000..9a87c8835ba --- /dev/null +++ b/target/nextflow/dataflow/merge/.config.vsh.yaml @@ -0,0 +1,175 @@ +functionality: + name: "merge" + namespace: "dataflow" + version: "0.12.4" + authors: + - name: "Dries Schaumont" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Paths to the single-modality .h5mu files that need to be combined" + info: null + default: + - "sample_paths" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: true + multiple_sep: "," + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Path to the output file." + info: null + default: + - "output.h5mu" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Combine one or more single-modality .h5mu files together into one\ + \ .h5mu file.\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "../../../resources_test/merge_test_data/pbmc_1k_protein_v3_filtered_feature_bc_matrix_rna.h5mu" + - type: "file" + path: "../../../resources_test/merge_test_data/pbmc_1k_protein_v3_filtered_feature_bc_matrix_prot.h5mu" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "pandas~=2.0.0" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "native" + id: "native" +- type: "nextflow" + id: "nextflow" + directives: + label: + - "singlecpu" + - "highmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/dataflow/merge/config.vsh.yml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/dataflow/merge" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/dataflow/merge/merge" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/dataflow/merge/main.nf b/target/nextflow/dataflow/merge/main.nf new file mode 100644 index 00000000000..14e6a94765b --- /dev/null +++ b/target/nextflow/dataflow/merge/main.nf @@ -0,0 +1,2614 @@ +// merge 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Dries Schaumont (maintainer) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "merge", + "namespace" : "dataflow", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Dries Schaumont", + "roles" : [ + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "dries@data-intuitive.com", + "github" : "DriesSchaumont", + "orcid" : "0000-0002-4389-0440", + "linkedin" : "dries-schaumont" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Scientist" + } + ] + } + } + ], + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "alternatives" : [ + "-i" + ], + "description" : "Paths to the single-modality .h5mu files that need to be combined", + "default" : [ + "sample_paths" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ",", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "-o" + ], + "description" : "Path to the output file.", + "default" : [ + "output.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_compression", + "description" : "The compression format to be used on the output h5mu object.", + "example" : [ + "gzip" + ], + "required" : false, + "choices" : [ + "gzip", + "lzf" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/dataflow/merge/" + }, + { + "type" : "file", + "path" : "src/utils/setup_logger.py", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "description" : "Combine one or more single-modality .h5mu files together into one .h5mu file.\n", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/dataflow/merge/" + }, + { + "type" : "file", + "path" : "../../../resources_test/merge_test_data/pbmc_1k_protein_v3_filtered_feature_bc_matrix_rna.h5mu", + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/dataflow/merge/" + }, + { + "type" : "file", + "path" : "../../../resources_test/merge_test_data/pbmc_1k_protein_v3_filtered_feature_bc_matrix_prot.h5mu", + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/dataflow/merge/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.10-slim", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "procps" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "mudata~=0.2.3", + "anndata~=0.9.1", + "pandas~=2.0.0" + ], + "upgrade" : true + } + ], + "test_setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "viashpy==0.5.0" + ], + "upgrade" : true + } + ] + }, + { + "type" : "native", + "id" : "native" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "singlecpu", + "highmem" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/dataflow/merge/config.vsh.yml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/dataflow/merge", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +from __future__ import annotations +import sys +import mudata as md +import pandas as pd +import numpy as np + + +### VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'.split(',')"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +### VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +def main(): + logger.info('Reading input files %s', ",".join(par["input"])) + input_samples = [md.read_h5mu(path) for path in par["input"]] + + logger.info('Merging into single object.') + sample_modalities = {} + for input_sample in input_samples: + for mod_name, mod_data in input_sample.mod.items(): + if mod_name in sample_modalities: + raise ValueError(f"Modality '{mod_name}' was found in more than 1 sample.") + sample_modalities[mod_name] = mod_data + + merged = md.MuData(sample_modalities) + merged.update() + for df_attr in ("var", "obs"): + df = getattr(merged, df_attr) + df = df.replace({pd.NA: np.nan}, inplace=False) + + # MuData supports nullable booleans and ints + # ie. \\`IntegerArray\\` and \\`BooleanArray\\` + df = df.convert_dtypes(infer_objects=True, + convert_integer=True, + convert_string=False, + convert_boolean=True, + convert_floating=False) + + # Convert leftover 'object' columns to string + object_cols = df.select_dtypes(include='object').columns.values + for obj_col in object_cols: + df[obj_col].astype(str).astype('category') + setattr(merged, df_attr, df) + + merged.write_h5mu(par["output"], compression=par["output_compression"]) + logger.info('Finished') + + +if __name__ == '__main__': + main() +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/dataflow_merge", + "tag" : "0.12.0" + }, + "label" : [ + "singlecpu", + "highmem" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/dataflow/merge/nextflow.config b/target/nextflow/dataflow/merge/nextflow.config new file mode 100644 index 00000000000..5067d6a3790 --- /dev/null +++ b/target/nextflow/dataflow/merge/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'merge' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Combine one or more single-modality .h5mu files together into one .h5mu file.\n' + author = 'Dries Schaumont' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/dataflow/merge/nextflow_params.yaml b/target/nextflow/dataflow/merge/nextflow_params.yaml new file mode 100644 index 00000000000..90f26b252dd --- /dev/null +++ b/target/nextflow/dataflow/merge/nextflow_params.yaml @@ -0,0 +1,8 @@ +# Arguments +input: # please fill in - example: ["sample_paths"] +# output: "$id.$key.output.h5mu" +# output_compression: "gzip" + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/dataflow/merge/nextflow_schema.json b/target/nextflow/dataflow/merge/nextflow_schema.json new file mode 100644 index 00000000000..c4ef46d5fee --- /dev/null +++ b/target/nextflow/dataflow/merge/nextflow_schema.json @@ -0,0 +1,94 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "merge", +"description": "Combine one or more single-modality .h5mu files together into one .h5mu file.\n", +"type": "object", +"definitions": { + + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: List of `file`, required, default: `sample_paths`, multiple_sep: `\",\"`. Paths to the single-modality ", + "help_text": "Type: List of `file`, required, default: `sample_paths`, multiple_sep: `\",\"`. Paths to the single-modality .h5mu files that need to be combined" + , + "default": "sample_paths" + } + + + , + "output": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.output.h5mu`. Path to the output file", + "help_text": "Type: `file`, default: `$id.$key.output.h5mu`. Path to the output file." + , + "default": "$id.$key.output.h5mu" + } + + + , + "output_compression": { + "type": + "string", + "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", + "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", + "enum": ["gzip", "lzf"] + + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/dataflow/merge/setup_logger.py b/target/nextflow/dataflow/merge/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/nextflow/dataflow/merge/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/nextflow/dataflow/split_modalities/.config.vsh.yaml b/target/nextflow/dataflow/split_modalities/.config.vsh.yaml new file mode 100644 index 00000000000..292452a61f4 --- /dev/null +++ b/target/nextflow/dataflow/split_modalities/.config.vsh.yaml @@ -0,0 +1,214 @@ +functionality: + name: "split_modalities" + namespace: "dataflow" + version: "0.12.4" + authors: + - name: "Dries Schaumont" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + - name: "Robrecht Cannoodt" + roles: + - "contributor" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Path to a single .h5mu file." + info: null + default: + - "sample_path" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output directory containing multiple h5mu files." + info: null + example: + - "/path/to/output" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output_types" + description: "A csv containing the base filename and modality type per output\ + \ file." + info: null + example: + - "types.csv" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--compression" + description: "The compression format to be used on the final h5mu object." + info: null + default: + - "gzip" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Split the modalities from a single .h5mu multimodal sample into seperate\ + \ .h5mu files. \n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "native" + id: "native" +- type: "nextflow" + id: "nextflow" + directives: + label: + - "singlecpu" + - "lowmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/dataflow/split_modalities/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/dataflow/split_modalities" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/dataflow/split_modalities/split_modalities" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/dataflow/split_modalities/main.nf b/target/nextflow/dataflow/split_modalities/main.nf new file mode 100644 index 00000000000..2c43925cb3a --- /dev/null +++ b/target/nextflow/dataflow/split_modalities/main.nf @@ -0,0 +1,2655 @@ +// split_modalities 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Dries Schaumont (maintainer) +// * Robrecht Cannoodt (contributor) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "split_modalities", + "namespace" : "dataflow", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Dries Schaumont", + "roles" : [ + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "dries@data-intuitive.com", + "github" : "DriesSchaumont", + "orcid" : "0000-0002-4389-0440", + "linkedin" : "dries-schaumont" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Scientist" + } + ] + } + }, + { + "name" : "Robrecht Cannoodt", + "roles" : [ + "contributor" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "robrecht@data-intuitive.com", + "github" : "rcannood", + "orcid" : "0000-0003-3641-729X", + "linkedin" : "robrechtcannoodt" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Science Engineer" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + } + ], + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "alternatives" : [ + "-i" + ], + "description" : "Path to a single .h5mu file.", + "default" : [ + "sample_path" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "-o" + ], + "description" : "Output directory containing multiple h5mu files.", + "example" : [ + "/path/to/output" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_compression", + "description" : "The compression format to be used on the output h5mu object.", + "example" : [ + "gzip" + ], + "required" : false, + "choices" : [ + "gzip", + "lzf" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--output_types", + "description" : "A csv containing the base filename and modality type per output file.", + "example" : [ + "types.csv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--compression", + "description" : "The compression format to be used on the final h5mu object.", + "default" : [ + "gzip" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/dataflow/split_modalities/" + }, + { + "type" : "file", + "path" : "src/utils/setup_logger.py", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "description" : "Split the modalities from a single .h5mu multimodal sample into seperate .h5mu files. \n", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/dataflow/split_modalities/" + }, + { + "type" : "file", + "path" : "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.10-slim", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "procps" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "mudata~=0.2.3", + "anndata~=0.9.1" + ], + "upgrade" : true + } + ], + "test_setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "viashpy==0.5.0" + ], + "upgrade" : true + } + ] + }, + { + "type" : "native", + "id" : "native" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "singlecpu", + "lowmem" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/dataflow/split_modalities/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/dataflow/split_modalities", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +from __future__ import annotations +import sys +import mudata as md +from sys import stdout +from pathlib import Path +import pandas as pd + +### VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_types': $( if [ ! -z ${VIASH_PAR_OUTPUT_TYPES+x} ]; then echo "r'${VIASH_PAR_OUTPUT_TYPES//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'compression': $( if [ ! -z ${VIASH_PAR_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +### VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +def main() -> None: + output_dir = Path(par["output"]) + if not output_dir.is_dir(): + output_dir.mkdir(parents=True) + + logger.info('Reading input file %s', par['input']) + sample = md.read_h5mu(par["input"].strip()) + input_file = Path(par["input"]) + + logger.info('Creating output types csv') + + names = {mod_name: f"{input_file.stem}_{mod_name}.h5mu" + for mod_name in sample.mod.keys() } + df = pd.DataFrame({"name": list(names.keys()), "filename": list(names.values())}) + df.to_csv(par["output_types"], index=False) + + logger.info('Splitting up modalities %s', ", ".join(sample.mod.keys())) + for mod_name, mod in sample.mod.items(): + new_sample = md.MuData({mod_name: mod}) + logger.info('Writing to %s', names[mod_name]) + new_sample.write_h5mu(output_dir / names[mod_name], compression=par["output_compression"]) + + logger.info("Finished") + + +if __name__ == "__main__": + main() +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/dataflow_split_modalities", + "tag" : "0.12.0" + }, + "label" : [ + "singlecpu", + "lowmem" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/dataflow/split_modalities/nextflow.config b/target/nextflow/dataflow/split_modalities/nextflow.config new file mode 100644 index 00000000000..5e522b18a88 --- /dev/null +++ b/target/nextflow/dataflow/split_modalities/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'split_modalities' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Split the modalities from a single .h5mu multimodal sample into seperate .h5mu files. \n' + author = 'Dries Schaumont, Robrecht Cannoodt' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/dataflow/split_modalities/nextflow_params.yaml b/target/nextflow/dataflow/split_modalities/nextflow_params.yaml new file mode 100644 index 00000000000..8da3e33dc1c --- /dev/null +++ b/target/nextflow/dataflow/split_modalities/nextflow_params.yaml @@ -0,0 +1,10 @@ +# Arguments +input: # please fill in - example: "sample_path" +# output: "$id.$key.output.output" +# output_compression: "gzip" +# output_types: "$id.$key.output_types.csv" +compression: "gzip" + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/dataflow/split_modalities/nextflow_schema.json b/target/nextflow/dataflow/split_modalities/nextflow_schema.json new file mode 100644 index 00000000000..c0d287f31e0 --- /dev/null +++ b/target/nextflow/dataflow/split_modalities/nextflow_schema.json @@ -0,0 +1,116 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "split_modalities", +"description": "Split the modalities from a single .h5mu multimodal sample into seperate .h5mu files. \n", +"type": "object", +"definitions": { + + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required, default: `sample_path`. Path to a single ", + "help_text": "Type: `file`, required, default: `sample_path`. Path to a single .h5mu file." + , + "default": "sample_path" + } + + + , + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.output`, example: `/path/to/output`. Output directory containing multiple h5mu files", + "help_text": "Type: `file`, required, default: `$id.$key.output.output`, example: `/path/to/output`. Output directory containing multiple h5mu files." + , + "default": "$id.$key.output.output" + } + + + , + "output_compression": { + "type": + "string", + "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", + "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", + "enum": ["gzip", "lzf"] + + + } + + + , + "output_types": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output_types.csv`, example: `types.csv`. A csv containing the base filename and modality type per output file", + "help_text": "Type: `file`, required, default: `$id.$key.output_types.csv`, example: `types.csv`. A csv containing the base filename and modality type per output file." + , + "default": "$id.$key.output_types.csv" + } + + + , + "compression": { + "type": + "string", + "description": "Type: `string`, default: `gzip`. The compression format to be used on the final h5mu object", + "help_text": "Type: `string`, default: `gzip`. The compression format to be used on the final h5mu object." + , + "default": "gzip" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/dataflow/split_modalities/setup_logger.py b/target/nextflow/dataflow/split_modalities/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/nextflow/dataflow/split_modalities/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/nextflow/demux/bcl2fastq/.config.vsh.yaml b/target/nextflow/demux/bcl2fastq/.config.vsh.yaml new file mode 100644 index 00000000000..4e07a975581 --- /dev/null +++ b/target/nextflow/demux/bcl2fastq/.config.vsh.yaml @@ -0,0 +1,169 @@ +functionality: + name: "bcl2fastq" + namespace: "demux" + version: "0.12.4" + authors: + - name: "Toni Verbeiren" + roles: + - "author" + - "maintainer" + info: + role: "Core Team Member" + links: + github: "tverbeiren" + linkedin: "verbeiren" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist and CEO" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + - "--runfolder_dir" + description: "Input run directory" + info: null + example: + - "bcl_dir" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--sample_sheet" + alternatives: + - "-s" + description: "Pointer to the sample sheet" + info: null + example: + - "SampleSheet.csv" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output directory containig fastq files" + info: null + example: + - "fastq_dir" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--reports" + description: "Reports directory" + info: null + example: + - "reports_dir" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--ignore_missing" + info: null + direction: "input" + dest: "par" + resources: + - type: "bash_script" + path: "script.sh" + is_executable: true + description: "Convert bcl files to fastq files using bcl2fastq.\n" + test_resources: + - type: "bash_script" + path: "test.sh" + is_executable: true + - type: "file" + path: "resources_test/cellranger_tiny_bcl/bcl" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "ghcr.io/data-intuitive/bcl2fastq:2.20" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "midmem" + - "midcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/demux/bcl2fastq/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/demux/bcl2fastq" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/demux/bcl2fastq/bcl2fastq" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/demux/bcl2fastq/main.nf b/target/nextflow/demux/bcl2fastq/main.nf new file mode 100644 index 00000000000..7463c4f79fc --- /dev/null +++ b/target/nextflow/demux/bcl2fastq/main.nf @@ -0,0 +1,2548 @@ +// bcl2fastq 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Toni Verbeiren (author, maintainer) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "bcl2fastq", + "namespace" : "demux", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Toni Verbeiren", + "roles" : [ + "author", + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "github" : "tverbeiren", + "linkedin" : "verbeiren" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Scientist and CEO" + } + ] + } + } + ], + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "alternatives" : [ + "-i", + "--runfolder_dir" + ], + "description" : "Input run directory", + "example" : [ + "bcl_dir" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--sample_sheet", + "alternatives" : [ + "-s" + ], + "description" : "Pointer to the sample sheet", + "example" : [ + "SampleSheet.csv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "-o" + ], + "description" : "Output directory containig fastq files", + "example" : [ + "fastq_dir" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--reports", + "description" : "Reports directory", + "example" : [ + "reports_dir" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "boolean_true", + "name" : "--ignore_missing", + "direction" : "input", + "dest" : "par" + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/demux/bcl2fastq/" + } + ], + "description" : "Convert bcl files to fastq files using bcl2fastq.\n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/demux/bcl2fastq/" + }, + { + "type" : "file", + "path" : "resources_test/cellranger_tiny_bcl/bcl", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "ghcr.io/data-intuitive/bcl2fastq:2.20", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "midmem", + "midcpu" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/demux/bcl2fastq/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/demux/bcl2fastq", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_SAMPLE_SHEET+x} ]; then echo "${VIASH_PAR_SAMPLE_SHEET}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sample_sheet='&'#" ; else echo "# par_sample_sheet="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_PAR_REPORTS+x} ]; then echo "${VIASH_PAR_REPORTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_reports='&'#" ; else echo "# par_reports="; fi ) +$( if [ ! -z ${VIASH_PAR_IGNORE_MISSING+x} ]; then echo "${VIASH_PAR_IGNORE_MISSING}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ignore_missing='&'#" ; else echo "# par_ignore_missing="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) + +## VIASH END +#!/bin/bash + +set -exo pipefail + +extra_params=() + +# Handle reports stored separate +if [ ! -z "\\$par_reports" ]; then + extra_params+=("--reports-dir" "\\$par_reports") +fi + +# Handle the boolean flag +if [ "\\$par_ignore_missing" == "true" ]; then + extra_params+=("--ignore-missing-control" "--ignore-missing-bcl" "--ignore-missing-filter") +fi + +# Run the actual command +bcl2fastq \\\\ + --runfolder-dir "\\$par_input" \\\\ + --sample-sheet "\\$par_sample_sheet" \\\\ + --output-dir "\\$par_output" \\\\ + "\\${extra_params[@]}" +VIASHMAIN +bash "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/demux_bcl2fastq", + "tag" : "0.12.0" + }, + "label" : [ + "midmem", + "midcpu" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/demux/bcl2fastq/nextflow.config b/target/nextflow/demux/bcl2fastq/nextflow.config new file mode 100644 index 00000000000..2a062a1ab06 --- /dev/null +++ b/target/nextflow/demux/bcl2fastq/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'bcl2fastq' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Convert bcl files to fastq files using bcl2fastq.\n' + author = 'Toni Verbeiren' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/demux/bcl2fastq/nextflow_params.yaml b/target/nextflow/demux/bcl2fastq/nextflow_params.yaml new file mode 100644 index 00000000000..64952782d47 --- /dev/null +++ b/target/nextflow/demux/bcl2fastq/nextflow_params.yaml @@ -0,0 +1,10 @@ +# Arguments +input: # please fill in - example: "bcl_dir" +sample_sheet: # please fill in - example: "SampleSheet.csv" +# output: "$id.$key.output.output" +# reports: "$id.$key.reports.reports" +ignore_missing: false + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/demux/bcl2fastq/nextflow_schema.json b/target/nextflow/demux/bcl2fastq/nextflow_schema.json new file mode 100644 index 00000000000..efa834ff8b3 --- /dev/null +++ b/target/nextflow/demux/bcl2fastq/nextflow_schema.json @@ -0,0 +1,113 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "bcl2fastq", +"description": "Convert bcl files to fastq files using bcl2fastq.\n", +"type": "object", +"definitions": { + + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required, example: `bcl_dir`. Input run directory", + "help_text": "Type: `file`, required, example: `bcl_dir`. Input run directory" + + } + + + , + "sample_sheet": { + "type": + "string", + "description": "Type: `file`, required, example: `SampleSheet.csv`. Pointer to the sample sheet", + "help_text": "Type: `file`, required, example: `SampleSheet.csv`. Pointer to the sample sheet" + + } + + + , + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.output`, example: `fastq_dir`. Output directory containig fastq files", + "help_text": "Type: `file`, required, default: `$id.$key.output.output`, example: `fastq_dir`. Output directory containig fastq files" + , + "default": "$id.$key.output.output" + } + + + , + "reports": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.reports.reports`, example: `reports_dir`. Reports directory", + "help_text": "Type: `file`, default: `$id.$key.reports.reports`, example: `reports_dir`. Reports directory" + , + "default": "$id.$key.reports.reports" + } + + + , + "ignore_missing": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. ", + "help_text": "Type: `boolean_true`, default: `false`. " + , + "default": "False" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/demux/bcl_convert/.config.vsh.yaml b/target/nextflow/demux/bcl_convert/.config.vsh.yaml new file mode 100644 index 00000000000..0277c759099 --- /dev/null +++ b/target/nextflow/demux/bcl_convert/.config.vsh.yaml @@ -0,0 +1,189 @@ +functionality: + name: "bcl_convert" + namespace: "demux" + version: "0.12.4" + authors: + - name: "Toni Verbeiren" + roles: + - "author" + - "maintainer" + info: + role: "Core Team Member" + links: + github: "tverbeiren" + linkedin: "verbeiren" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist and CEO" + - name: "Marijke Van Moerbeke" + roles: + - "author" + info: + role: "Contributor" + links: + github: "mvanmoerbeke" + orcid: "0000-0002-3097-5621" + linkedin: "marijke-van-moerbeke-84303a34" + organizations: + - name: "OpenAnalytics" + href: "https://www.openanalytics.eu" + role: "Statistical Consultant" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input run directory" + info: null + example: + - "bcl_dir" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--sample_sheet" + alternatives: + - "-s" + description: "Pointer to the sample sheet" + info: null + example: + - "bcl_dir" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output directory containig fastq files" + info: null + example: + - "fastq_dir" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--reports" + description: "Reports directory" + info: null + example: + - "reports_dir" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean" + name: "--test_mode" + description: "Should bcl-convert be run in test mode (using --first-tile-only)?" + info: null + default: + - false + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "bash_script" + path: "script.sh" + is_executable: true + description: "Convert bcl files to fastq files using bcl-convert.\nInformation about\ + \ upgrading from bcl2fastq via\nhttps://emea.support.illumina.com/bulletins/2020/10/upgrading-from-bcl2fastq-to-bcl-convert.html\n\ + and https://support.illumina.com/sequencing/sequencing_software/bcl-convert/compatibility.html\n" + test_resources: + - type: "bash_script" + path: "test.sh" + is_executable: true + - type: "file" + path: "resources_test/cellranger_tiny_bcl/bcl2" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "ghcr.io/data-intuitive/bclconvert:3.10" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "midmem" + - "midcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/demux/bcl_convert/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/demux/bcl_convert" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/demux/bcl_convert/bcl_convert" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/demux/bcl_convert/main.nf b/target/nextflow/demux/bcl_convert/main.nf new file mode 100644 index 00000000000..9b96db5bb02 --- /dev/null +++ b/target/nextflow/demux/bcl_convert/main.nf @@ -0,0 +1,2574 @@ +// bcl_convert 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Toni Verbeiren (author, maintainer) +// * Marijke Van Moerbeke (author) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "bcl_convert", + "namespace" : "demux", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Toni Verbeiren", + "roles" : [ + "author", + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "github" : "tverbeiren", + "linkedin" : "verbeiren" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Scientist and CEO" + } + ] + } + }, + { + "name" : "Marijke Van Moerbeke", + "roles" : [ + "author" + ], + "info" : { + "role" : "Contributor", + "links" : { + "github" : "mvanmoerbeke", + "orcid" : "0000-0002-3097-5621", + "linkedin" : "marijke-van-moerbeke-84303a34" + }, + "organizations" : [ + { + "name" : "OpenAnalytics", + "href" : "https://www.openanalytics.eu", + "role" : "Statistical Consultant" + } + ] + } + } + ], + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "alternatives" : [ + "-i" + ], + "description" : "Input run directory", + "example" : [ + "bcl_dir" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--sample_sheet", + "alternatives" : [ + "-s" + ], + "description" : "Pointer to the sample sheet", + "example" : [ + "bcl_dir" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "-o" + ], + "description" : "Output directory containig fastq files", + "example" : [ + "fastq_dir" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--reports", + "description" : "Reports directory", + "example" : [ + "reports_dir" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "boolean", + "name" : "--test_mode", + "description" : "Should bcl-convert be run in test mode (using --first-tile-only)?", + "default" : [ + false + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/demux/bcl_convert/" + } + ], + "description" : "Convert bcl files to fastq files using bcl-convert.\nInformation about upgrading from bcl2fastq via\nhttps://emea.support.illumina.com/bulletins/2020/10/upgrading-from-bcl2fastq-to-bcl-convert.html\nand https://support.illumina.com/sequencing/sequencing_software/bcl-convert/compatibility.html\n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/demux/bcl_convert/" + }, + { + "type" : "file", + "path" : "resources_test/cellranger_tiny_bcl/bcl2", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "ghcr.io/data-intuitive/bclconvert:3.10", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "midmem", + "midcpu" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/demux/bcl_convert/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/demux/bcl_convert", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +#!/bin/bash + +set -eo pipefail + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_SAMPLE_SHEET+x} ]; then echo "${VIASH_PAR_SAMPLE_SHEET}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sample_sheet='&'#" ; else echo "# par_sample_sheet="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_PAR_REPORTS+x} ]; then echo "${VIASH_PAR_REPORTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_reports='&'#" ; else echo "# par_reports="; fi ) +$( if [ ! -z ${VIASH_PAR_TEST_MODE+x} ]; then echo "${VIASH_PAR_TEST_MODE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_test_mode='&'#" ; else echo "# par_test_mode="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) + +## VIASH END + +[ -d "\\$par_output" ] || mkdir -p "\\$par_output" + +bcl-convert \\\\ + --force \\\\ + --bcl-input-directory "\\$par_input" \\\\ + --output-directory "\\$par_output" \\\\ + --sample-sheet "\\$par_sample_sheet" \\\\ + --first-tile-only \\$par_test_mode + +if [ ! -z "\\$par_reports" ]; then + echo "Moving reports to its own location" + mv "\\$par_output"/Reports "\\$par_reports" +else + echo "Leaving reports alone" +fi +VIASHMAIN +bash "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/demux_bcl_convert", + "tag" : "0.12.0" + }, + "label" : [ + "midmem", + "midcpu" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/demux/bcl_convert/nextflow.config b/target/nextflow/demux/bcl_convert/nextflow.config new file mode 100644 index 00000000000..9d528464a72 --- /dev/null +++ b/target/nextflow/demux/bcl_convert/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'bcl_convert' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Convert bcl files to fastq files using bcl-convert.\nInformation about upgrading from bcl2fastq via\nhttps://emea.support.illumina.com/bulletins/2020/10/upgrading-from-bcl2fastq-to-bcl-convert.html\nand https://support.illumina.com/sequencing/sequencing_software/bcl-convert/compatibility.html\n' + author = 'Toni Verbeiren, Marijke Van Moerbeke' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/demux/bcl_convert/nextflow_params.yaml b/target/nextflow/demux/bcl_convert/nextflow_params.yaml new file mode 100644 index 00000000000..a6a214dc347 --- /dev/null +++ b/target/nextflow/demux/bcl_convert/nextflow_params.yaml @@ -0,0 +1,10 @@ +# Arguments +input: # please fill in - example: "bcl_dir" +sample_sheet: # please fill in - example: "bcl_dir" +# output: "$id.$key.output.output" +# reports: "$id.$key.reports.reports" +test_mode: false + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/demux/bcl_convert/nextflow_schema.json b/target/nextflow/demux/bcl_convert/nextflow_schema.json new file mode 100644 index 00000000000..7129d37a8fe --- /dev/null +++ b/target/nextflow/demux/bcl_convert/nextflow_schema.json @@ -0,0 +1,113 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "bcl_convert", +"description": "Convert bcl files to fastq files using bcl-convert.\nInformation about upgrading from bcl2fastq via\nhttps://emea.support.illumina.com/bulletins/2020/10/upgrading-from-bcl2fastq-to-bcl-convert.html\nand https://support.illumina.com/sequencing/sequencing_software/bcl-convert/compatibility.html\n", +"type": "object", +"definitions": { + + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required, example: `bcl_dir`. Input run directory", + "help_text": "Type: `file`, required, example: `bcl_dir`. Input run directory" + + } + + + , + "sample_sheet": { + "type": + "string", + "description": "Type: `file`, required, example: `bcl_dir`. Pointer to the sample sheet", + "help_text": "Type: `file`, required, example: `bcl_dir`. Pointer to the sample sheet" + + } + + + , + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.output`, example: `fastq_dir`. Output directory containig fastq files", + "help_text": "Type: `file`, required, default: `$id.$key.output.output`, example: `fastq_dir`. Output directory containig fastq files" + , + "default": "$id.$key.output.output" + } + + + , + "reports": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.reports.reports`, example: `reports_dir`. Reports directory", + "help_text": "Type: `file`, default: `$id.$key.reports.reports`, example: `reports_dir`. Reports directory" + , + "default": "$id.$key.reports.reports" + } + + + , + "test_mode": { + "type": + "boolean", + "description": "Type: `boolean`, default: `false`. Should bcl-convert be run in test mode (using --first-tile-only)?", + "help_text": "Type: `boolean`, default: `false`. Should bcl-convert be run in test mode (using --first-tile-only)?" + , + "default": "False" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/demux/cellranger_mkfastq/.config.vsh.yaml b/target/nextflow/demux/cellranger_mkfastq/.config.vsh.yaml new file mode 100644 index 00000000000..411766de74d --- /dev/null +++ b/target/nextflow/demux/cellranger_mkfastq/.config.vsh.yaml @@ -0,0 +1,207 @@ +functionality: + name: "cellranger_mkfastq" + namespace: "demux" + version: "0.12.4" + authors: + - name: "Angela Oliveira Pisco" + roles: + - "author" + info: + role: "Contributor" + links: + github: "aopisco" + orcid: "0000-0003-0142-2355" + linkedin: "aopisco" + organizations: + - name: "Insitro" + href: "https://insitro.com" + role: "Director of Computational Biology" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + - name: "Samuel D'Souza" + roles: + - "author" + info: + role: "Contributor" + links: + github: "srdsam" + linkedin: "samuel-d-souza-887023150/" + organizations: + - name: "Chan Zuckerberg Biohub" + href: "https://www.czbiohub.org" + role: "Data Engineer" + - name: "Robrecht Cannoodt" + roles: + - "author" + - "maintainer" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + arguments: + - type: "file" + name: "--input" + description: "Path to the (untarred) BCL files. Expects 'RunParameters.xml' at\ + \ './'." + info: null + example: + - "/path/to/bcl" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--sample_sheet" + description: "The path to the sample sheet." + info: null + example: + - "SampleSheet.csv" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + description: "The folder to store the demux results" + info: null + example: + - "/path/to/output" + default: + - "fastqs" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--reports" + description: "Reports directory" + info: null + example: + - "reports_dir" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "bash_script" + path: "script.sh" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Demultiplex raw sequencing data" + usage: "cellranger_mkfastq \\\n --input /path/to/bcl \\\n --sample_sheet SampleSheet.csv\ + \ \\\n --output /path/to/output\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/cellranger_tiny_bcl" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "ghcr.io/data-intuitive/cellranger:6.1" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "docker" + run: + - "apt-get update && apt-get upgrade -y" + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highmem" + - "highcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/demux/cellranger_mkfastq/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/demux/cellranger_mkfastq" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/demux/cellranger_mkfastq/cellranger_mkfastq" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/demux/cellranger_mkfastq/main.nf b/target/nextflow/demux/cellranger_mkfastq/main.nf new file mode 100644 index 00000000000..d82b2d2642a --- /dev/null +++ b/target/nextflow/demux/cellranger_mkfastq/main.nf @@ -0,0 +1,2646 @@ +// cellranger_mkfastq 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Angela Oliveira Pisco (author) +// * Samuel D'Souza (author) +// * Robrecht Cannoodt (author, maintainer) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "cellranger_mkfastq", + "namespace" : "demux", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Angela Oliveira Pisco", + "roles" : [ + "author" + ], + "info" : { + "role" : "Contributor", + "links" : { + "github" : "aopisco", + "orcid" : "0000-0003-0142-2355", + "linkedin" : "aopisco" + }, + "organizations" : [ + { + "name" : "Insitro", + "href" : "https://insitro.com", + "role" : "Director of Computational Biology" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + }, + { + "name" : "Samuel D'Souza", + "roles" : [ + "author" + ], + "info" : { + "role" : "Contributor", + "links" : { + "github" : "srdsam", + "linkedin" : "samuel-d-souza-887023150/" + }, + "organizations" : [ + { + "name" : "Chan Zuckerberg Biohub", + "href" : "https://www.czbiohub.org", + "role" : "Data Engineer" + } + ] + } + }, + { + "name" : "Robrecht Cannoodt", + "roles" : [ + "author", + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "robrecht@data-intuitive.com", + "github" : "rcannood", + "orcid" : "0000-0003-3641-729X", + "linkedin" : "robrechtcannoodt" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Science Engineer" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + } + ], + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "description" : "Path to the (untarred) BCL files. Expects 'RunParameters.xml' at './'.", + "example" : [ + "/path/to/bcl" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--sample_sheet", + "description" : "The path to the sample sheet.", + "example" : [ + "SampleSheet.csv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--output", + "description" : "The folder to store the demux results", + "example" : [ + "/path/to/output" + ], + "default" : [ + "fastqs" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--reports", + "description" : "Reports directory", + "example" : [ + "reports_dir" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/demux/cellranger_mkfastq/" + }, + { + "type" : "file", + "path" : "src/utils/setup_logger.py", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "description" : "Demultiplex raw sequencing data", + "usage" : "cellranger_mkfastq \\\\\n --input /path/to/bcl \\\\\n --sample_sheet SampleSheet.csv \\\\\n --output /path/to/output\n", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/demux/cellranger_mkfastq/" + }, + { + "type" : "file", + "path" : "resources_test/cellranger_tiny_bcl", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "ghcr.io/data-intuitive/cellranger:6.1", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "docker", + "run" : [ + "apt-get update && apt-get upgrade -y" + ] + } + ], + "test_setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "viashpy==0.5.0" + ], + "upgrade" : true + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "highmem", + "highcpu" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/demux/cellranger_mkfastq/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/demux/cellranger_mkfastq", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +#!/bin/bash + +set -eo pipefail + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_SAMPLE_SHEET+x} ]; then echo "${VIASH_PAR_SAMPLE_SHEET}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sample_sheet='&'#" ; else echo "# par_sample_sheet="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_PAR_REPORTS+x} ]; then echo "${VIASH_PAR_REPORTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_reports='&'#" ; else echo "# par_reports="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) + +## VIASH END + +# create temporary directory +tmpdir=\\$(mktemp -d "$VIASH_TEMP/\\$meta_functionality_name-XXXXXXXX") +function clean_up { + rm -rf "\\$tmpdir" +} +trap clean_up EXIT + +# if par_input not is a folder, untar first +if [ ! -d "\\$par_input" ]; then + echo "Assuming input is a tar.gz, untarring" + input_dir="\\$tmpdir/bcl" + mkdir -p "\\$input_dir" + tar -xzf "\\$par_input" -C "\\$input_dir" --strip-components=1 +else + input_dir="\\$par_input" +fi + + +# add additional params +extra_params=( ) + +if [ ! -z "\\$meta_cpus" ]; then + extra_params+=( "--localcores=\\$meta_cpus" ) +fi +if [ ! -z "\\$meta_memory_gb" ]; then + # always keep 2gb for the OS itself + memory_gb=\\`python -c "print(int('\\$meta_memory_gb') - 2)"\\` + extra_params+=( "--localmem=\\$memory_gb" ) +fi + + +echo "Running cellranger demux" + +id=myoutput + +cellranger mkfastq \\\\ + --id "\\$id" \\\\ + --csv "\\$par_sample_sheet" \\\\ + --run "\\$par_input" \\\\ + "\\${extra_params[@]}" \\\\ + --disable-ui \\\\ + --output-dir "\\$par_output" + +# Move reports to their own output location +if [ ! -z "\\$par_reports" ]; then + echo "Moving reports its own location" + mv "\\$par_output"/Reports "\\$par_reports" +else + echo "Leaving reports alone" +fi +VIASHMAIN +bash "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/demux_cellranger_mkfastq", + "tag" : "0.12.0" + }, + "label" : [ + "highmem", + "highcpu" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/demux/cellranger_mkfastq/nextflow.config b/target/nextflow/demux/cellranger_mkfastq/nextflow.config new file mode 100644 index 00000000000..1d752ebdeeb --- /dev/null +++ b/target/nextflow/demux/cellranger_mkfastq/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'cellranger_mkfastq' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Demultiplex raw sequencing data' + author = 'Angela Oliveira Pisco, Samuel D\'Souza, Robrecht Cannoodt' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/demux/cellranger_mkfastq/nextflow_params.yaml b/target/nextflow/demux/cellranger_mkfastq/nextflow_params.yaml new file mode 100644 index 00000000000..7da67817572 --- /dev/null +++ b/target/nextflow/demux/cellranger_mkfastq/nextflow_params.yaml @@ -0,0 +1,9 @@ +# Arguments +input: # please fill in - example: "/path/to/bcl" +sample_sheet: # please fill in - example: "SampleSheet.csv" +# output: "$id.$key.output.output" +# reports: "$id.$key.reports.reports" + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/demux/cellranger_mkfastq/nextflow_schema.json b/target/nextflow/demux/cellranger_mkfastq/nextflow_schema.json new file mode 100644 index 00000000000..9da67b57341 --- /dev/null +++ b/target/nextflow/demux/cellranger_mkfastq/nextflow_schema.json @@ -0,0 +1,102 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "cellranger_mkfastq", +"description": "Demultiplex raw sequencing data", +"type": "object", +"definitions": { + + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required, example: `/path/to/bcl`. Path to the (untarred) BCL files", + "help_text": "Type: `file`, required, example: `/path/to/bcl`. Path to the (untarred) BCL files. Expects \u0027RunParameters.xml\u0027 at \u0027./\u0027." + + } + + + , + "sample_sheet": { + "type": + "string", + "description": "Type: `file`, required, example: `SampleSheet.csv`. The path to the sample sheet", + "help_text": "Type: `file`, required, example: `SampleSheet.csv`. The path to the sample sheet." + + } + + + , + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.output`, example: `/path/to/output`. The folder to store the demux results", + "help_text": "Type: `file`, required, default: `$id.$key.output.output`, example: `/path/to/output`. The folder to store the demux results" + , + "default": "$id.$key.output.output" + } + + + , + "reports": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.reports.reports`, example: `reports_dir`. Reports directory", + "help_text": "Type: `file`, default: `$id.$key.reports.reports`, example: `reports_dir`. Reports directory" + , + "default": "$id.$key.reports.reports" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/demux/cellranger_mkfastq/setup_logger.py b/target/nextflow/demux/cellranger_mkfastq/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/nextflow/demux/cellranger_mkfastq/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/nextflow/dimred/pca/.config.vsh.yaml b/target/nextflow/dimred/pca/.config.vsh.yaml new file mode 100644 index 00000000000..f37a549b4db --- /dev/null +++ b/target/nextflow/dimred/pca/.config.vsh.yaml @@ -0,0 +1,253 @@ +functionality: + name: "pca" + namespace: "dimred" + version: "0.12.4" + authors: + - name: "Dries De Maeyer" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "ddemaeyer@gmail.com" + github: "ddemaeyer" + linkedin: "dries-de-maeyer-b46a814" + organizations: + - name: "Janssen Pharmaceuticals" + href: "https://www.janssen.com" + role: "Principal Scientist" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input h5mu file" + info: null + example: + - "input.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--layer" + description: "Use specified layer for expression values instead of the .X object\ + \ from the modality." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--var_input" + description: "Column name in .var matrix that will be used to select which genes\ + \ to run the PCA on." + info: null + example: + - "filter_with_hvg" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output h5mu file." + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obsm_output" + description: "In which .obsm slot to store the resulting embedding." + info: null + default: + - "X_pca" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--varm_output" + description: "In which .varm slot to store the resulting loadings matrix." + info: null + default: + - "pca_loadings" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--uns_output" + description: "In which .uns slot to store the resulting variance objects." + info: null + default: + - "pca_variance" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--num_components" + description: "Number of principal components to compute. Defaults to 50, or 1\ + \ - minimum dimension size of selected representation." + info: null + example: + - 25 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--overwrite" + description: "Allow overwriting .obsm, .varm and .uns slots." + info: null + direction: "input" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Computes PCA coordinates, loadings and variance decomposition. Uses\ + \ the implementation of scikit-learn [Pedregosa11].\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.9-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "scanpy~=1.9.5" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highcpu" + - "highmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/dimred/pca/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/dimred/pca" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/dimred/pca/pca" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/dimred/pca/main.nf b/target/nextflow/dimred/pca/main.nf new file mode 100644 index 00000000000..843c510c5c8 --- /dev/null +++ b/target/nextflow/dimred/pca/main.nf @@ -0,0 +1,2718 @@ +// pca 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Dries De Maeyer (maintainer) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "pca", + "namespace" : "dimred", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Dries De Maeyer", + "roles" : [ + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "ddemaeyer@gmail.com", + "github" : "ddemaeyer", + "linkedin" : "dries-de-maeyer-b46a814" + }, + "organizations" : [ + { + "name" : "Janssen Pharmaceuticals", + "href" : "https://www.janssen.com", + "role" : "Principal Scientist" + } + ] + } + } + ], + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "alternatives" : [ + "-i" + ], + "description" : "Input h5mu file", + "example" : [ + "input.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--modality", + "default" : [ + "rna" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--layer", + "description" : "Use specified layer for expression values instead of the .X object from the modality.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--var_input", + "description" : "Column name in .var matrix that will be used to select which genes to run the PCA on.", + "example" : [ + "filter_with_hvg" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "-o" + ], + "description" : "Output h5mu file.", + "example" : [ + "output.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_compression", + "description" : "The compression format to be used on the output h5mu object.", + "example" : [ + "gzip" + ], + "required" : false, + "choices" : [ + "gzip", + "lzf" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--obsm_output", + "description" : "In which .obsm slot to store the resulting embedding.", + "default" : [ + "X_pca" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--varm_output", + "description" : "In which .varm slot to store the resulting loadings matrix.", + "default" : [ + "pca_loadings" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--uns_output", + "description" : "In which .uns slot to store the resulting variance objects.", + "default" : [ + "pca_variance" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--num_components", + "description" : "Number of principal components to compute. Defaults to 50, or 1 - minimum dimension size of selected representation.", + "example" : [ + 25 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "boolean_true", + "name" : "--overwrite", + "description" : "Allow overwriting .obsm, .varm and .uns slots.", + "direction" : "input", + "dest" : "par" + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/dimred/pca/" + }, + { + "type" : "file", + "path" : "src/utils/setup_logger.py", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "description" : "Computes PCA coordinates, loadings and variance decomposition. Uses the implementation of scikit-learn [Pedregosa11].\n", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/dimred/pca/" + }, + { + "type" : "file", + "path" : "resources_test/pbmc_1k_protein_v3", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.9-slim", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "procps" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "mudata~=0.2.3", + "anndata~=0.9.1", + "scanpy~=1.9.5" + ], + "upgrade" : true + } + ], + "test_setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "viashpy==0.5.0" + ], + "upgrade" : true + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "highcpu", + "highmem" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/dimred/pca/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/dimred/pca", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +import scanpy as sc +import mudata as mu +import sys +from anndata import AnnData + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'layer': $( if [ ! -z ${VIASH_PAR_LAYER+x} ]; then echo "r'${VIASH_PAR_LAYER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'var_input': $( if [ ! -z ${VIASH_PAR_VAR_INPUT+x} ]; then echo "r'${VIASH_PAR_VAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'obsm_output': $( if [ ! -z ${VIASH_PAR_OBSM_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'varm_output': $( if [ ! -z ${VIASH_PAR_VARM_OUTPUT+x} ]; then echo "r'${VIASH_PAR_VARM_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'uns_output': $( if [ ! -z ${VIASH_PAR_UNS_OUTPUT+x} ]; then echo "r'${VIASH_PAR_UNS_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'num_components': $( if [ ! -z ${VIASH_PAR_NUM_COMPONENTS+x} ]; then echo "int(r'${VIASH_PAR_NUM_COMPONENTS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'overwrite': $( if [ ! -z ${VIASH_PAR_OVERWRITE+x} ]; then echo "r'${VIASH_PAR_OVERWRITE//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +## VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +logger.info("Reading %s.", par["input"]) +mdata = mu.read_h5mu(par["input"]) + +logger.info("Computing PCA components for modality '%s'", par['modality']) +data = mdata.mod[par['modality']] +if par['layer'] and par['layer'] not in data.layers: + raise ValueError(f"{par['layer']} was not found in modality {par['modality']}.") +layer = data.X if not par['layer'] else data.layers[par['layer']] +adata_input_layer = AnnData(layer) +adata_input_layer.var.index = data.var.index + +use_highly_variable = False +if par["var_input"]: + if not par["var_input"] in data.var.columns: + raise ValueError(f"Requested to use .var column {par['var_input']} " + "as a selection of genes to run the PCA on, " + f"but the column is not available for modality {par['modality']}") + use_highly_variable = True + adata_input_layer.var['highly_variable'] = data.var[par["var_input"]] + +# run pca +output_adata = sc.tl.pca( + adata_input_layer, + n_comps=par["num_components"], + copy=True, + use_highly_variable=use_highly_variable +) + +# store output in specific objects + +check_exist_dict = { + "obsm_output": ("obs"), + "varm_output": ("varm"), + "uns_output": ("uns") +} +for parameter_name, field in check_exist_dict.items(): + if par[parameter_name] in getattr(data, field): + if not par["overwrite"]: + raise ValueError(f"Requested to create field {par[parameter_name]} in .{field} " + f"for modality {par['modality']}, but field already exists.") + del getattr(data, field)[par[parameter_name]] + +data.obsm[par["obsm_output"]] = output_adata.obsm['X_pca'] +data.varm[par["varm_output"]] = output_adata.varm['PCs'] +data.uns[par["uns_output"]] = { "variance": output_adata.uns['pca']['variance'], + "variance_ratio": output_adata.uns['pca']['variance_ratio'] } + + +logger.info("Writing to %s.", par["output"]) +mdata.write_h5mu(filename=par["output"], compression=par["output_compression"]) + +logger.info("Finished") +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/dimred_pca", + "tag" : "0.12.0" + }, + "label" : [ + "highcpu", + "highmem" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/dimred/pca/nextflow.config b/target/nextflow/dimred/pca/nextflow.config new file mode 100644 index 00000000000..4b6d55c057f --- /dev/null +++ b/target/nextflow/dimred/pca/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'pca' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Computes PCA coordinates, loadings and variance decomposition. Uses the implementation of scikit-learn [Pedregosa11].\n' + author = 'Dries De Maeyer' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/dimred/pca/nextflow_params.yaml b/target/nextflow/dimred/pca/nextflow_params.yaml new file mode 100644 index 00000000000..52d5f4d55bc --- /dev/null +++ b/target/nextflow/dimred/pca/nextflow_params.yaml @@ -0,0 +1,16 @@ +# Arguments +input: # please fill in - example: "input.h5mu" +modality: "rna" +# layer: "foo" +# var_input: "filter_with_hvg" +# output: "$id.$key.output.h5mu" +# output_compression: "gzip" +obsm_output: "X_pca" +varm_output: "pca_loadings" +uns_output: "pca_variance" +# num_components: 25 +overwrite: false + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/dimred/pca/nextflow_schema.json b/target/nextflow/dimred/pca/nextflow_schema.json new file mode 100644 index 00000000000..b21447962ed --- /dev/null +++ b/target/nextflow/dimred/pca/nextflow_schema.json @@ -0,0 +1,178 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "pca", +"description": "Computes PCA coordinates, loadings and variance decomposition. Uses the implementation of scikit-learn [Pedregosa11].\n", +"type": "object", +"definitions": { + + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required, example: `input.h5mu`. Input h5mu file", + "help_text": "Type: `file`, required, example: `input.h5mu`. Input h5mu file" + + } + + + , + "modality": { + "type": + "string", + "description": "Type: `string`, default: `rna`. ", + "help_text": "Type: `string`, default: `rna`. " + , + "default": "rna" + } + + + , + "layer": { + "type": + "string", + "description": "Type: `string`. Use specified layer for expression values instead of the ", + "help_text": "Type: `string`. Use specified layer for expression values instead of the .X object from the modality." + + } + + + , + "var_input": { + "type": + "string", + "description": "Type: `string`, example: `filter_with_hvg`. Column name in ", + "help_text": "Type: `string`, example: `filter_with_hvg`. Column name in .var matrix that will be used to select which genes to run the PCA on." + + } + + + , + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file", + "help_text": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file." + , + "default": "$id.$key.output.h5mu" + } + + + , + "output_compression": { + "type": + "string", + "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", + "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", + "enum": ["gzip", "lzf"] + + + } + + + , + "obsm_output": { + "type": + "string", + "description": "Type: `string`, default: `X_pca`. In which ", + "help_text": "Type: `string`, default: `X_pca`. In which .obsm slot to store the resulting embedding." + , + "default": "X_pca" + } + + + , + "varm_output": { + "type": + "string", + "description": "Type: `string`, default: `pca_loadings`. In which ", + "help_text": "Type: `string`, default: `pca_loadings`. In which .varm slot to store the resulting loadings matrix." + , + "default": "pca_loadings" + } + + + , + "uns_output": { + "type": + "string", + "description": "Type: `string`, default: `pca_variance`. In which ", + "help_text": "Type: `string`, default: `pca_variance`. In which .uns slot to store the resulting variance objects." + , + "default": "pca_variance" + } + + + , + "num_components": { + "type": + "integer", + "description": "Type: `integer`, example: `25`. Number of principal components to compute", + "help_text": "Type: `integer`, example: `25`. Number of principal components to compute. Defaults to 50, or 1 - minimum dimension size of selected representation." + + } + + + , + "overwrite": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Allow overwriting ", + "help_text": "Type: `boolean_true`, default: `false`. Allow overwriting .obsm, .varm and .uns slots." + , + "default": "False" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/dimred/pca/setup_logger.py b/target/nextflow/dimred/pca/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/nextflow/dimred/pca/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/nextflow/dimred/umap/.config.vsh.yaml b/target/nextflow/dimred/umap/.config.vsh.yaml new file mode 100644 index 00000000000..f31d298d501 --- /dev/null +++ b/target/nextflow/dimred/umap/.config.vsh.yaml @@ -0,0 +1,312 @@ +functionality: + name: "umap" + namespace: "dimred" + version: "0.12.4" + authors: + - name: "Dries De Maeyer" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "ddemaeyer@gmail.com" + github: "ddemaeyer" + linkedin: "dries-de-maeyer-b46a814" + organizations: + - name: "Janssen Pharmaceuticals" + href: "https://www.janssen.com" + role: "Principal Scientist" + argument_groups: + - name: "Inputs" + arguments: + - type: "file" + name: "--input" + description: "Input h5mu file" + info: null + example: + - "input.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--uns_neighbors" + description: "The `.uns` neighbors slot as output by the `find_neighbors` component." + info: null + default: + - "neighbors" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Outputs" + arguments: + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output h5mu file." + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obsm_output" + description: "The pre/postfix under which to store the UMAP results." + info: null + default: + - "umap" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Arguments" + arguments: + - type: "double" + name: "--min_dist" + description: "The effective minimum distance between embedded points. Smaller\ + \ values will result in a more clustered/clumped embedding where nearby points\ + \ on the manifold are drawn closer together, while larger values will result\ + \ on a more even dispersal of points. The value should be set relative to\ + \ the spread value, which determines the scale at which embedded points will\ + \ be spread out." + info: null + default: + - 0.5 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--spread" + description: "The effective scale of embedded points. In combination with `min_dist`\ + \ this determines how clustered/clumped the embedded points are." + info: null + default: + - 1.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--num_components" + description: "The number of dimensions of the embedding." + info: null + default: + - 2 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--max_iter" + description: "The number of iterations (epochs) of the optimization. Called\ + \ `n_epochs` in the original UMAP. Default is set to 500 if neighbors['connectivities'].shape[0]\ + \ <= 10000, else 200." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--alpha" + description: "The initial learning rate for the embedding optimization." + info: null + default: + - 1.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--gamma" + description: "Weighting applied to negative samples in low dimensional embedding\ + \ optimization. Values higher than one will result in greater weight being\ + \ given to negative samples." + info: null + default: + - 1.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--negative_sample_rate" + description: "The number of negative edge/1-simplex samples to use per positive\ + \ edge/1-simplex sample in optimizing the low dimensional embedding." + info: null + default: + - 5 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--init_pos" + description: "How to initialize the low dimensional embedding. Called `init`\ + \ in the original UMAP. Options are:\n\n* Any key from `.obsm`\n* `'paga'`:\ + \ positions from `paga()`\n* `'spectral'`: use a spectral embedding of the\ + \ graph\n* `'random'`: assign initial embedding positions at random.\n" + info: null + default: + - "spectral" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "UMAP (Uniform Manifold Approximation and Projection) is a manifold\ + \ learning technique suitable for visualizing high-dimensional data. Besides tending\ + \ to be faster than tSNE, it optimizes the embedding such that it best reflects\ + \ the topology of the data, which we represent throughout Scanpy using a neighborhood\ + \ graph. tSNE, by contrast, optimizes the distribution of nearest-neighbor distances\ + \ in the embedding such that these best match the distribution of distances in\ + \ the high-dimensional space. We use the implementation of umap-learn [McInnes18].\ + \ For a few comparisons of UMAP with tSNE, see this preprint.\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.9-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "scanpy~=1.9.5" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highcpu" + - "midmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/dimred/umap/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/dimred/umap" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/dimred/umap/umap" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/dimred/umap/main.nf b/target/nextflow/dimred/umap/main.nf new file mode 100644 index 00000000000..5d814fd2281 --- /dev/null +++ b/target/nextflow/dimred/umap/main.nf @@ -0,0 +1,2778 @@ +// umap 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Dries De Maeyer (maintainer) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "umap", + "namespace" : "dimred", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Dries De Maeyer", + "roles" : [ + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "ddemaeyer@gmail.com", + "github" : "ddemaeyer", + "linkedin" : "dries-de-maeyer-b46a814" + }, + "organizations" : [ + { + "name" : "Janssen Pharmaceuticals", + "href" : "https://www.janssen.com", + "role" : "Principal Scientist" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Inputs", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "description" : "Input h5mu file", + "example" : [ + "input.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--modality", + "default" : [ + "rna" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--uns_neighbors", + "description" : "The `.uns` neighbors slot as output by the `find_neighbors` component.", + "default" : [ + "neighbors" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Outputs", + "arguments" : [ + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "-o" + ], + "description" : "Output h5mu file.", + "example" : [ + "output.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_compression", + "description" : "The compression format to be used on the output h5mu object.", + "example" : [ + "gzip" + ], + "required" : false, + "choices" : [ + "gzip", + "lzf" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--obsm_output", + "description" : "The pre/postfix under which to store the UMAP results.", + "default" : [ + "umap" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Arguments", + "arguments" : [ + { + "type" : "double", + "name" : "--min_dist", + "description" : "The effective minimum distance between embedded points. Smaller values will result in a more clustered/clumped embedding where nearby points on the manifold are drawn closer together, while larger values will result on a more even dispersal of points. The value should be set relative to the spread value, which determines the scale at which embedded points will be spread out.", + "default" : [ + 0.5 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--spread", + "description" : "The effective scale of embedded points. In combination with `min_dist` this determines how clustered/clumped the embedded points are.", + "default" : [ + 1.0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--num_components", + "description" : "The number of dimensions of the embedding.", + "default" : [ + 2 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--max_iter", + "description" : "The number of iterations (epochs) of the optimization. Called `n_epochs` in the original UMAP. Default is set to 500 if neighbors['connectivities'].shape[0] <= 10000, else 200.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--alpha", + "description" : "The initial learning rate for the embedding optimization.", + "default" : [ + 1.0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--gamma", + "description" : "Weighting applied to negative samples in low dimensional embedding optimization. Values higher than one will result in greater weight being given to negative samples.", + "default" : [ + 1.0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--negative_sample_rate", + "description" : "The number of negative edge/1-simplex samples to use per positive edge/1-simplex sample in optimizing the low dimensional embedding.", + "default" : [ + 5 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--init_pos", + "description" : "How to initialize the low dimensional embedding. Called `init` in the original UMAP. Options are:\n\n* Any key from `.obsm`\n* `'paga'`: positions from `paga()`\n* `'spectral'`: use a spectral embedding of the graph\n* `'random'`: assign initial embedding positions at random.\n", + "default" : [ + "spectral" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/dimred/umap/" + }, + { + "type" : "file", + "path" : "src/utils/setup_logger.py", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "description" : "UMAP (Uniform Manifold Approximation and Projection) is a manifold learning technique suitable for visualizing high-dimensional data. Besides tending to be faster than tSNE, it optimizes the embedding such that it best reflects the topology of the data, which we represent throughout Scanpy using a neighborhood graph. tSNE, by contrast, optimizes the distribution of nearest-neighbor distances in the embedding such that these best match the distribution of distances in the high-dimensional space. We use the implementation of umap-learn [McInnes18]. For a few comparisons of UMAP with tSNE, see this preprint.\n", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/dimred/umap/" + }, + { + "type" : "file", + "path" : "resources_test/pbmc_1k_protein_v3", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.9-slim", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "procps" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "mudata~=0.2.3", + "anndata~=0.9.1", + "scanpy~=1.9.5" + ], + "upgrade" : true + } + ], + "test_setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "viashpy==0.5.0" + ], + "upgrade" : true + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "highcpu", + "midmem" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/dimred/umap/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/dimred/umap", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +import scanpy as sc +import mudata as mu +import sys +import anndata as ad + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'uns_neighbors': $( if [ ! -z ${VIASH_PAR_UNS_NEIGHBORS+x} ]; then echo "r'${VIASH_PAR_UNS_NEIGHBORS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'obsm_output': $( if [ ! -z ${VIASH_PAR_OBSM_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'min_dist': $( if [ ! -z ${VIASH_PAR_MIN_DIST+x} ]; then echo "float(r'${VIASH_PAR_MIN_DIST//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'spread': $( if [ ! -z ${VIASH_PAR_SPREAD+x} ]; then echo "float(r'${VIASH_PAR_SPREAD//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'num_components': $( if [ ! -z ${VIASH_PAR_NUM_COMPONENTS+x} ]; then echo "int(r'${VIASH_PAR_NUM_COMPONENTS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'max_iter': $( if [ ! -z ${VIASH_PAR_MAX_ITER+x} ]; then echo "int(r'${VIASH_PAR_MAX_ITER//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'alpha': $( if [ ! -z ${VIASH_PAR_ALPHA+x} ]; then echo "float(r'${VIASH_PAR_ALPHA//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'gamma': $( if [ ! -z ${VIASH_PAR_GAMMA+x} ]; then echo "float(r'${VIASH_PAR_GAMMA//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'negative_sample_rate': $( if [ ! -z ${VIASH_PAR_NEGATIVE_SAMPLE_RATE+x} ]; then echo "int(r'${VIASH_PAR_NEGATIVE_SAMPLE_RATE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'init_pos': $( if [ ! -z ${VIASH_PAR_INIT_POS+x} ]; then echo "r'${VIASH_PAR_INIT_POS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +## VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +logger.info("Reading %s", par["input"]) +mdata = mu.read_h5mu(par["input"]) + +logger.info("Computing UMAP for modality '%s'", par['modality']) +data = mdata.mod[par['modality']] + +if par['uns_neighbors'] not in data.uns: + raise ValueError(f"'{par['uns_neighbors']}' was not found in .mod['{par['modality']}'].uns.") + +# create temporary AnnData +# ... because sc.tl.umap doesn't allow to choose +# the obsm output slot +# ... also we can see scanpy is a data format dependency hell +neigh_key = par["uns_neighbors"] +temp_uns = { neigh_key: data.uns[neigh_key] } +conn_key = temp_uns[neigh_key]['connectivities_key'] +dist_key = temp_uns[neigh_key]['distances_key'] +temp_obsp = { + conn_key: data.obsp[conn_key], + dist_key: data.obsp[dist_key], +} +pca_key = temp_uns[neigh_key]['params']['use_rep'] +temp_obsm = { + pca_key: data.obsm[pca_key] +} + +temp_adata = ad.AnnData( + obsm=temp_obsm, + obsp=temp_obsp, + uns=temp_uns, + shape=data.shape +) + +sc.tl.umap( + temp_adata, + min_dist=par["min_dist"], + spread=par["spread"], + n_components=par["num_components"], + maxiter=par["max_iter"], + alpha=par["alpha"], + gamma=par["gamma"], + negative_sample_rate=par["negative_sample_rate"], + init_pos=par["init_pos"], + neighbors_key=neigh_key +) + +data.obsm[par['obsm_output']] = temp_adata.obsm['X_umap'] + +logger.info("Writing to %s.", par["output"]) +mdata.write_h5mu(filename=par["output"], compression=par["output_compression"]) + +logger.info("Finished") +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/dimred_umap", + "tag" : "0.12.0" + }, + "label" : [ + "highcpu", + "midmem" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/dimred/umap/nextflow.config b/target/nextflow/dimred/umap/nextflow.config new file mode 100644 index 00000000000..01da52cb467 --- /dev/null +++ b/target/nextflow/dimred/umap/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'umap' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'UMAP (Uniform Manifold Approximation and Projection) is a manifold learning technique suitable for visualizing high-dimensional data. Besides tending to be faster than tSNE, it optimizes the embedding such that it best reflects the topology of the data, which we represent throughout Scanpy using a neighborhood graph. tSNE, by contrast, optimizes the distribution of nearest-neighbor distances in the embedding such that these best match the distribution of distances in the high-dimensional space. We use the implementation of umap-learn [McInnes18]. For a few comparisons of UMAP with tSNE, see this preprint.\n' + author = 'Dries De Maeyer' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/dimred/umap/nextflow_params.yaml b/target/nextflow/dimred/umap/nextflow_params.yaml new file mode 100644 index 00000000000..fc7cc388975 --- /dev/null +++ b/target/nextflow/dimred/umap/nextflow_params.yaml @@ -0,0 +1,23 @@ +# Inputs +input: # please fill in - example: "input.h5mu" +modality: "rna" +uns_neighbors: "neighbors" + +# Outputs +# output: "$id.$key.output.h5mu" +# output_compression: "gzip" +obsm_output: "umap" + +# Arguments +min_dist: 0.5 +spread: 1.0 +num_components: 2 +# max_iter: 123 +alpha: 1.0 +gamma: 1.0 +negative_sample_rate: 5 +init_pos: "spectral" + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/dimred/umap/nextflow_schema.json b/target/nextflow/dimred/umap/nextflow_schema.json new file mode 100644 index 00000000000..b333a0fe17d --- /dev/null +++ b/target/nextflow/dimred/umap/nextflow_schema.json @@ -0,0 +1,241 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "umap", +"description": "UMAP (Uniform Manifold Approximation and Projection) is a manifold learning technique suitable for visualizing high-dimensional data. Besides tending to be faster than tSNE, it optimizes the embedding such that it best reflects the topology of the data, which we represent throughout Scanpy using a neighborhood graph. tSNE, by contrast, optimizes the distribution of nearest-neighbor distances in the embedding such that these best match the distribution of distances in the high-dimensional space. We use the implementation of umap-learn [McInnes18]. For a few comparisons of UMAP with tSNE, see this preprint.\n", +"type": "object", +"definitions": { + + + + "inputs" : { + "title": "Inputs", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required, example: `input.h5mu`. Input h5mu file", + "help_text": "Type: `file`, required, example: `input.h5mu`. Input h5mu file" + + } + + + , + "modality": { + "type": + "string", + "description": "Type: `string`, default: `rna`. ", + "help_text": "Type: `string`, default: `rna`. " + , + "default": "rna" + } + + + , + "uns_neighbors": { + "type": + "string", + "description": "Type: `string`, default: `neighbors`. The `", + "help_text": "Type: `string`, default: `neighbors`. The `.uns` neighbors slot as output by the `find_neighbors` component." + , + "default": "neighbors" + } + + +} +}, + + + "outputs" : { + "title": "Outputs", + "type": "object", + "description": "No description", + "properties": { + + + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file", + "help_text": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file." + , + "default": "$id.$key.output.h5mu" + } + + + , + "output_compression": { + "type": + "string", + "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", + "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", + "enum": ["gzip", "lzf"] + + + } + + + , + "obsm_output": { + "type": + "string", + "description": "Type: `string`, default: `umap`. The pre/postfix under which to store the UMAP results", + "help_text": "Type: `string`, default: `umap`. The pre/postfix under which to store the UMAP results." + , + "default": "umap" + } + + +} +}, + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "min_dist": { + "type": + "number", + "description": "Type: `double`, default: `0.5`. The effective minimum distance between embedded points", + "help_text": "Type: `double`, default: `0.5`. The effective minimum distance between embedded points. Smaller values will result in a more clustered/clumped embedding where nearby points on the manifold are drawn closer together, while larger values will result on a more even dispersal of points. The value should be set relative to the spread value, which determines the scale at which embedded points will be spread out." + , + "default": "0.5" + } + + + , + "spread": { + "type": + "number", + "description": "Type: `double`, default: `1.0`. The effective scale of embedded points", + "help_text": "Type: `double`, default: `1.0`. The effective scale of embedded points. In combination with `min_dist` this determines how clustered/clumped the embedded points are." + , + "default": "1.0" + } + + + , + "num_components": { + "type": + "integer", + "description": "Type: `integer`, default: `2`. The number of dimensions of the embedding", + "help_text": "Type: `integer`, default: `2`. The number of dimensions of the embedding." + , + "default": "2" + } + + + , + "max_iter": { + "type": + "integer", + "description": "Type: `integer`. The number of iterations (epochs) of the optimization", + "help_text": "Type: `integer`. The number of iterations (epochs) of the optimization. Called `n_epochs` in the original UMAP. Default is set to 500 if neighbors[\u0027connectivities\u0027].shape[0] \u003c= 10000, else 200." + + } + + + , + "alpha": { + "type": + "number", + "description": "Type: `double`, default: `1.0`. The initial learning rate for the embedding optimization", + "help_text": "Type: `double`, default: `1.0`. The initial learning rate for the embedding optimization." + , + "default": "1.0" + } + + + , + "gamma": { + "type": + "number", + "description": "Type: `double`, default: `1.0`. Weighting applied to negative samples in low dimensional embedding optimization", + "help_text": "Type: `double`, default: `1.0`. Weighting applied to negative samples in low dimensional embedding optimization. Values higher than one will result in greater weight being given to negative samples." + , + "default": "1.0" + } + + + , + "negative_sample_rate": { + "type": + "integer", + "description": "Type: `integer`, default: `5`. The number of negative edge/1-simplex samples to use per positive edge/1-simplex sample in optimizing the low dimensional embedding", + "help_text": "Type: `integer`, default: `5`. The number of negative edge/1-simplex samples to use per positive edge/1-simplex sample in optimizing the low dimensional embedding." + , + "default": "5" + } + + + , + "init_pos": { + "type": + "string", + "description": "Type: `string`, default: `spectral`. How to initialize the low dimensional embedding", + "help_text": "Type: `string`, default: `spectral`. How to initialize the low dimensional embedding. Called `init` in the original UMAP. Options are:\n\n* Any key from `.obsm`\n* `\u0027paga\u0027`: positions from `paga()`\n* `\u0027spectral\u0027`: use a spectral embedding of the graph\n* `\u0027random\u0027`: assign initial embedding positions at random.\n" + , + "default": "spectral" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/inputs" + }, + + { + "$ref": "#/definitions/outputs" + }, + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/dimred/umap/setup_logger.py b/target/nextflow/dimred/umap/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/nextflow/dimred/umap/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/nextflow/download/download_file/.config.vsh.yaml b/target/nextflow/download/download_file/.config.vsh.yaml new file mode 100644 index 00000000000..8516dd1bfda --- /dev/null +++ b/target/nextflow/download/download_file/.config.vsh.yaml @@ -0,0 +1,138 @@ +functionality: + name: "download_file" + namespace: "download" + version: "0.12.4" + authors: + - name: "Robrecht Cannoodt" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + arguments: + - type: "string" + name: "--input" + description: "URL to a file to download." + info: null + example: + - "https://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_1k_protein_v3/pbmc_1k_protein_v3_raw_feature_bc_matrix.h5" + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + description: "Path where to store output." + info: null + example: + - "pbmc_1k_protein_v3_raw_feature_bc_matrix.h5" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--verbose" + alternatives: + - "-v" + description: "Increase verbosity" + info: null + direction: "input" + dest: "par" + resources: + - type: "bash_script" + path: "script.sh" + is_executable: true + description: "Download a file.\n" + usage: "download_file \\\n --input https://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_1k_protein_v3/pbmc_1k_protein_v3_raw_feature_bc_matrix.h5\ + \ \\\n --output output_rna.h5\n" + test_resources: + - type: "bash_script" + path: "run_test.sh" + is_executable: true + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "bash:5.1.16" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/download/download_file/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/download/download_file" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/download/download_file/download_file" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/download/download_file/main.nf b/target/nextflow/download/download_file/main.nf new file mode 100644 index 00000000000..eadbf2f921e --- /dev/null +++ b/target/nextflow/download/download_file/main.nf @@ -0,0 +1,2492 @@ +// download_file 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Robrecht Cannoodt (maintainer) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "download_file", + "namespace" : "download", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Robrecht Cannoodt", + "roles" : [ + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "robrecht@data-intuitive.com", + "github" : "rcannood", + "orcid" : "0000-0003-3641-729X", + "linkedin" : "robrechtcannoodt" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Science Engineer" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + } + ], + "arguments" : [ + { + "type" : "string", + "name" : "--input", + "description" : "URL to a file to download.", + "example" : [ + "https://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_1k_protein_v3/pbmc_1k_protein_v3_raw_feature_bc_matrix.h5" + ], + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--output", + "description" : "Path where to store output.", + "example" : [ + "pbmc_1k_protein_v3_raw_feature_bc_matrix.h5" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "boolean_true", + "name" : "--verbose", + "alternatives" : [ + "-v" + ], + "description" : "Increase verbosity", + "direction" : "input", + "dest" : "par" + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/download/download_file/" + } + ], + "description" : "Download a file.\n", + "usage" : "download_file \\\\\n --input https://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_1k_protein_v3/pbmc_1k_protein_v3_raw_feature_bc_matrix.h5 \\\\\n --output output_rna.h5\n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "run_test.sh", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/download/download_file/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "bash:5.1.16", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/download/download_file/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/download/download_file", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +#!/bin/bash + +set -eo pipefail + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_PAR_VERBOSE+x} ]; then echo "${VIASH_PAR_VERBOSE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_verbose='&'#" ; else echo "# par_verbose="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) + +## VIASH END + +extra_params=() + +if [ "\\$par_verbose" != "true" ]; then + extra_params+=("--quiet") +fi + +wget "\\$par_input" -O "\\$par_output" "\\${extra_params[@]}" +VIASHMAIN +bash "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/download_download_file", + "tag" : "0.12.0" + }, + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/download/download_file/nextflow.config b/target/nextflow/download/download_file/nextflow.config new file mode 100644 index 00000000000..440340e4727 --- /dev/null +++ b/target/nextflow/download/download_file/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'download_file' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Download a file.\n' + author = 'Robrecht Cannoodt' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/download/download_file/nextflow_params.yaml b/target/nextflow/download/download_file/nextflow_params.yaml new file mode 100644 index 00000000000..d3120622209 --- /dev/null +++ b/target/nextflow/download/download_file/nextflow_params.yaml @@ -0,0 +1,8 @@ +# Arguments +input: # please fill in - example: "https://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_1k_protein_v3/pbmc_1k_protein_v3_raw_feature_bc_matrix.h5" +# output: "$id.$key.output.h5" +verbose: false + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/download/download_file/nextflow_schema.json b/target/nextflow/download/download_file/nextflow_schema.json new file mode 100644 index 00000000000..b4a3b7c48a3 --- /dev/null +++ b/target/nextflow/download/download_file/nextflow_schema.json @@ -0,0 +1,92 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "download_file", +"description": "Download a file.\n", +"type": "object", +"definitions": { + + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `string`, required, example: `https://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_1k_protein_v3/pbmc_1k_protein_v3_raw_feature_bc_matrix.h5`. URL to a file to download", + "help_text": "Type: `string`, required, example: `https://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_1k_protein_v3/pbmc_1k_protein_v3_raw_feature_bc_matrix.h5`. URL to a file to download." + + } + + + , + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.h5`, example: `pbmc_1k_protein_v3_raw_feature_bc_matrix.h5`. Path where to store output", + "help_text": "Type: `file`, required, default: `$id.$key.output.h5`, example: `pbmc_1k_protein_v3_raw_feature_bc_matrix.h5`. Path where to store output." + , + "default": "$id.$key.output.h5" + } + + + , + "verbose": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Increase verbosity", + "help_text": "Type: `boolean_true`, default: `false`. Increase verbosity" + , + "default": "False" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/download/sync_test_resources/.config.vsh.yaml b/target/nextflow/download/sync_test_resources/.config.vsh.yaml new file mode 100644 index 00000000000..a022b436753 --- /dev/null +++ b/target/nextflow/download/sync_test_resources/.config.vsh.yaml @@ -0,0 +1,170 @@ +functionality: + name: "sync_test_resources" + namespace: "download" + version: "0.12.4" + authors: + - name: "Robrecht Cannoodt" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + arguments: + - type: "string" + name: "--input" + alternatives: + - "-i" + description: "Path to the S3 bucket to sync from." + info: null + default: + - "s3://openpipelines-data" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Path to the test resource directory." + info: null + default: + - "resources_test" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--quiet" + description: "Displays the operations that would be performed using the specified\ + \ command without actually running them." + info: null + direction: "input" + dest: "par" + - type: "boolean_true" + name: "--dryrun" + description: "Does not display the operations performed from the specified command." + info: null + direction: "input" + dest: "par" + - type: "boolean_true" + name: "--delete" + description: "Files that exist in the destination but not in the source are deleted\ + \ during sync." + info: null + direction: "input" + dest: "par" + - type: "string" + name: "--exclude" + description: "Exclude all files or objects from the command that matches the specified\ + \ pattern." + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + resources: + - type: "bash_script" + path: "script.sh" + is_executable: true + description: "Synchronise the test resources from s3://openpipelines-data to resources_test" + usage: "sync_test_resources\nsync_test_resources --input s3://openpipelines-data\ + \ --output resources_test\n" + test_resources: + - type: "bash_script" + path: "run_test.sh" + is_executable: true + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "amazon/aws-cli:2.11.0" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "yum" + packages: + - "procps" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/download/sync_test_resources/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/download/sync_test_resources" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/download/sync_test_resources/sync_test_resources" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/download/sync_test_resources/main.nf b/target/nextflow/download/sync_test_resources/main.nf new file mode 100644 index 00000000000..6caa3670d57 --- /dev/null +++ b/target/nextflow/download/sync_test_resources/main.nf @@ -0,0 +1,2554 @@ +// sync_test_resources 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Robrecht Cannoodt (maintainer) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "sync_test_resources", + "namespace" : "download", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Robrecht Cannoodt", + "roles" : [ + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "robrecht@data-intuitive.com", + "github" : "rcannood", + "orcid" : "0000-0003-3641-729X", + "linkedin" : "robrechtcannoodt" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Science Engineer" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + } + ], + "arguments" : [ + { + "type" : "string", + "name" : "--input", + "alternatives" : [ + "-i" + ], + "description" : "Path to the S3 bucket to sync from.", + "default" : [ + "s3://openpipelines-data" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "-o" + ], + "description" : "Path to the test resource directory.", + "default" : [ + "resources_test" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "boolean_true", + "name" : "--quiet", + "description" : "Displays the operations that would be performed using the specified command without actually running them.", + "direction" : "input", + "dest" : "par" + }, + { + "type" : "boolean_true", + "name" : "--dryrun", + "description" : "Does not display the operations performed from the specified command.", + "direction" : "input", + "dest" : "par" + }, + { + "type" : "boolean_true", + "name" : "--delete", + "description" : "Files that exist in the destination but not in the source are deleted during sync.", + "direction" : "input", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--exclude", + "description" : "Exclude all files or objects from the command that matches the specified pattern.", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ":", + "dest" : "par" + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/download/sync_test_resources/" + } + ], + "description" : "Synchronise the test resources from s3://openpipelines-data to resources_test", + "usage" : "sync_test_resources\nsync_test_resources --input s3://openpipelines-data --output resources_test\n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "run_test.sh", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/download/sync_test_resources/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "amazon/aws-cli:2.11.0", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "yum", + "packages" : [ + "procps" + ] + } + ] + }, + { + "type" : "native", + "id" : "native" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/download/sync_test_resources/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/download/sync_test_resources", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +#!/bin/bash + +set -eo pipefail + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_PAR_QUIET+x} ]; then echo "${VIASH_PAR_QUIET}" | sed "s#'#'\\"'\\"'#g;s#.*#par_quiet='&'#" ; else echo "# par_quiet="; fi ) +$( if [ ! -z ${VIASH_PAR_DRYRUN+x} ]; then echo "${VIASH_PAR_DRYRUN}" | sed "s#'#'\\"'\\"'#g;s#.*#par_dryrun='&'#" ; else echo "# par_dryrun="; fi ) +$( if [ ! -z ${VIASH_PAR_DELETE+x} ]; then echo "${VIASH_PAR_DELETE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_delete='&'#" ; else echo "# par_delete="; fi ) +$( if [ ! -z ${VIASH_PAR_EXCLUDE+x} ]; then echo "${VIASH_PAR_EXCLUDE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_exclude='&'#" ; else echo "# par_exclude="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) + +## VIASH END + +extra_params=( ) + +if [ "\\$par_quiet" == "true" ]; then + extra_params+=( "--quiet" ) +fi +if [ "\\$par_dryrun" == "true" ]; then + extra_params+=( "--dryrun" ) +fi +if [ "\\$par_delete" == "true" ]; then + extra_params+=( "--delete" ) +fi + +if [ ! -z \\${par_exclude+x} ]; then + IFS=":" + for var in \\$par_exclude; do + unset IFS + extra_params+=( "--exclude" "\\$var" ) + done +fi + + +# Disable the use of the Amazon EC2 instance metadata service (IMDS). +# see https://florian.ec/blog/github-actions-awscli-errors/ +# or https://github.com/aws/aws-cli/issues/5234#issuecomment-705831465 +export AWS_EC2_METADATA_DISABLED=true + +aws s3 sync "\\$par_input" "\\$par_output" --no-sign-request "\\${extra_params[@]}" +VIASHMAIN +bash "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/download_sync_test_resources", + "tag" : "0.12.0" + }, + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/download/sync_test_resources/nextflow.config b/target/nextflow/download/sync_test_resources/nextflow.config new file mode 100644 index 00000000000..b1d940b3fc6 --- /dev/null +++ b/target/nextflow/download/sync_test_resources/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'sync_test_resources' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Synchronise the test resources from s3://openpipelines-data to resources_test' + author = 'Robrecht Cannoodt' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/download/sync_test_resources/nextflow_params.yaml b/target/nextflow/download/sync_test_resources/nextflow_params.yaml new file mode 100644 index 00000000000..7c714c2f36c --- /dev/null +++ b/target/nextflow/download/sync_test_resources/nextflow_params.yaml @@ -0,0 +1,11 @@ +# Arguments +input: "s3://openpipelines-data" +# output: "$id.$key.output.output" +quiet: false +dryrun: false +delete: false +# exclude: ["foo"] + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/download/sync_test_resources/nextflow_schema.json b/target/nextflow/download/sync_test_resources/nextflow_schema.json new file mode 100644 index 00000000000..8c753a3650a --- /dev/null +++ b/target/nextflow/download/sync_test_resources/nextflow_schema.json @@ -0,0 +1,125 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "sync_test_resources", +"description": "Synchronise the test resources from s3://openpipelines-data to resources_test", +"type": "object", +"definitions": { + + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `string`, default: `s3://openpipelines-data`. Path to the S3 bucket to sync from", + "help_text": "Type: `string`, default: `s3://openpipelines-data`. Path to the S3 bucket to sync from." + , + "default": "s3://openpipelines-data" + } + + + , + "output": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.output.output`. Path to the test resource directory", + "help_text": "Type: `file`, default: `$id.$key.output.output`. Path to the test resource directory." + , + "default": "$id.$key.output.output" + } + + + , + "quiet": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Displays the operations that would be performed using the specified command without actually running them", + "help_text": "Type: `boolean_true`, default: `false`. Displays the operations that would be performed using the specified command without actually running them." + , + "default": "False" + } + + + , + "dryrun": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Does not display the operations performed from the specified command", + "help_text": "Type: `boolean_true`, default: `false`. Does not display the operations performed from the specified command." + , + "default": "False" + } + + + , + "delete": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Files that exist in the destination but not in the source are deleted during sync", + "help_text": "Type: `boolean_true`, default: `false`. Files that exist in the destination but not in the source are deleted during sync." + , + "default": "False" + } + + + , + "exclude": { + "type": + "string", + "description": "Type: List of `string`, multiple_sep: `\":\"`. Exclude all files or objects from the command that matches the specified pattern", + "help_text": "Type: List of `string`, multiple_sep: `\":\"`. Exclude all files or objects from the command that matches the specified pattern." + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/files/make_params/.config.vsh.yaml b/target/nextflow/files/make_params/.config.vsh.yaml new file mode 100644 index 00000000000..f521966803b --- /dev/null +++ b/target/nextflow/files/make_params/.config.vsh.yaml @@ -0,0 +1,220 @@ +functionality: + name: "make_params" + namespace: "files" + version: "0.12.4" + authors: + - name: "Angela Oliveira Pisco" + roles: + - "author" + info: + role: "Contributor" + links: + github: "aopisco" + orcid: "0000-0003-0142-2355" + linkedin: "aopisco" + organizations: + - name: "Insitro" + href: "https://insitro.com" + role: "Director of Computational Biology" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + - name: "Robrecht Cannoodt" + roles: + - "maintainer" + - "author" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + arguments: + - type: "file" + name: "--base_dir" + description: "Base directory to search recursively" + info: null + example: + - "/path/to/dir" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--pattern" + description: "An optional regular expression. Only file names which match the\ + \ regular expression will be matched." + info: null + example: + - "*.fastq.gz" + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--n_dirname_drop" + description: "For every matched file, the parent directory will be traversed N\ + \ times." + info: null + default: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--n_basename_id" + description: "The unique identifiers will consist of at least N dirnames." + info: null + default: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--id_name" + description: "The name for storing the identifier field in the yaml." + info: null + default: + - "id" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--path_name" + description: "The name for storing the path field in the yaml." + info: null + default: + - "path" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--group_name" + description: "Top level name for the group of entries." + info: null + example: + - "param_list" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + description: "Output YAML file." + info: null + example: + - "params.yaml" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "r_script" + path: "script.R" + is_executable: true + description: "Looks for files in a directory and turn it in a params file." + test_resources: + - type: "bash_script" + path: "test_make_params.sh" + is_executable: true + - type: "file" + path: "../../../src" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "ghcr.io/data-intuitive/randpy:r4.0" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "singlecpu" + - "lowmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/files/make_params/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/files/make_params" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/files/make_params/make_params" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/files/make_params/main.nf b/target/nextflow/files/make_params/main.nf new file mode 100644 index 00000000000..64e71970b84 --- /dev/null +++ b/target/nextflow/files/make_params/main.nf @@ -0,0 +1,2663 @@ +// make_params 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Angela Oliveira Pisco (author) +// * Robrecht Cannoodt (maintainer, author) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "make_params", + "namespace" : "files", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Angela Oliveira Pisco", + "roles" : [ + "author" + ], + "info" : { + "role" : "Contributor", + "links" : { + "github" : "aopisco", + "orcid" : "0000-0003-0142-2355", + "linkedin" : "aopisco" + }, + "organizations" : [ + { + "name" : "Insitro", + "href" : "https://insitro.com", + "role" : "Director of Computational Biology" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + }, + { + "name" : "Robrecht Cannoodt", + "roles" : [ + "maintainer", + "author" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "robrecht@data-intuitive.com", + "github" : "rcannood", + "orcid" : "0000-0003-3641-729X", + "linkedin" : "robrechtcannoodt" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Science Engineer" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + } + ], + "arguments" : [ + { + "type" : "file", + "name" : "--base_dir", + "description" : "Base directory to search recursively", + "example" : [ + "/path/to/dir" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--pattern", + "description" : "An optional regular expression. Only file names which match the regular expression will be matched.", + "example" : [ + "*.fastq.gz" + ], + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--n_dirname_drop", + "description" : "For every matched file, the parent directory will be traversed N times.", + "default" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--n_basename_id", + "description" : "The unique identifiers will consist of at least N dirnames.", + "default" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--id_name", + "description" : "The name for storing the identifier field in the yaml.", + "default" : [ + "id" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--path_name", + "description" : "The name for storing the path field in the yaml.", + "default" : [ + "path" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--group_name", + "description" : "Top level name for the group of entries.", + "example" : [ + "param_list" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--output", + "description" : "Output YAML file.", + "example" : [ + "params.yaml" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ], + "resources" : [ + { + "type" : "r_script", + "path" : "script.R", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/files/make_params/" + } + ], + "description" : "Looks for files in a directory and turn it in a params file.", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test_make_params.sh", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/files/make_params/" + }, + { + "type" : "file", + "path" : "../../../src", + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/files/make_params/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "ghcr.io/data-intuitive/randpy:r4.0", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "singlecpu", + "lowmem" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/files/make_params/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/files/make_params", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +library(dplyr) +library(purrr) + +## VIASH START +# The following code has been auto-generated by Viash. +# treat warnings as errors +.viash_orig_warn <- options(warn = 2) + +par <- list( + "base_dir" = $( if [ ! -z ${VIASH_PAR_BASE_DIR+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_BASE_DIR" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), + "pattern" = $( if [ ! -z ${VIASH_PAR_PATTERN+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_PATTERN" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), + "n_dirname_drop" = $( if [ ! -z ${VIASH_PAR_N_DIRNAME_DROP+x} ]; then echo -n "as.integer('"; echo -n "$VIASH_PAR_N_DIRNAME_DROP" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), + "n_basename_id" = $( if [ ! -z ${VIASH_PAR_N_BASENAME_ID+x} ]; then echo -n "as.integer('"; echo -n "$VIASH_PAR_N_BASENAME_ID" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), + "id_name" = $( if [ ! -z ${VIASH_PAR_ID_NAME+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_ID_NAME" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), + "path_name" = $( if [ ! -z ${VIASH_PAR_PATH_NAME+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_PATH_NAME" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), + "group_name" = $( if [ ! -z ${VIASH_PAR_GROUP_NAME+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_GROUP_NAME" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), + "output" = $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_OUTPUT" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ) +) +meta <- list( + "functionality_name" = $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo -n "'"; echo -n "$VIASH_META_FUNCTIONALITY_NAME" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), + "resources_dir" = $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo -n "'"; echo -n "$VIASH_META_RESOURCES_DIR" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), + "executable" = $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo -n "'"; echo -n "$VIASH_META_EXECUTABLE" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), + "config" = $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo -n "'"; echo -n "$VIASH_META_CONFIG" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), + "temp_dir" = $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo -n "'"; echo -n "$VIASH_META_TEMP_DIR" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), + "cpus" = $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo -n "as.integer('"; echo -n "$VIASH_META_CPUS" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_b" = $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_B" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_kb" = $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_KB" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_mb" = $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_MB" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_gb" = $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_GB" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_tb" = $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_TB" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_pb" = $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_PB" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ) +) + + +# restore original warn setting +options(.viash_orig_warn) +rm(.viash_orig_warn) + +## VIASH END + +cat("> Listing files of base dir ", par\\$base_dir, "\\\\n", sep = "") +paths <- list.files( + normalizePath(par\\$base_dir), + pattern = par\\$pattern, + recursive = TRUE, + full.names = TRUE +) + +cat("> Traversing up ", par\\$n_dirname_apply, " times\\\\n", sep = "") +for (i in seq_len(par\\$n_dirname_drop)) { + paths <- dirname(paths) %>% unique() +} + +# removing /viash_automount in case we're inside a docker container +paths <- gsub("^/viash_automount", "", paths) + +cat("> Checking whether basenames are unique\\\\n") +i <- par\\$n_basename_id +maxi <- strsplit(paths, "/") %>% map_int(length) %>% max + +regex <- paste0(".*/(", paste(rep("[^/]+/", i), collapse = ""), "[^/]*)\\$") +ids <- gsub("/", "_", gsub(regex, "\\\\\\\\1", paths)) + +cat("> Printing first five rows\\\\n") +print(tibble(id = ids, path = paths) %>% head(5)) +cat("\\\\n") + +while (i < maxi && any(duplicated(ids))) { + i <- i + 1 + cat("Duplicated ids detected, combining with ", i, " dirnames in an attempt to get unique ids.\\\\n") + regex <- paste0(".*/(", paste(rep("[^/]+/", i), collapse = ""), "[^/]*)\\$") + ids <- gsub("/", "_", gsub(regex, "\\\\\\\\1", paths)) + + cat("> Printing first five rows\\\\n") + print(tibble(id = ids, path = paths) %>% head(5)) + cat("\\\\n") +} + +cat("> Transforming into list of items\\\\n") +par_list <- map2( + ids, paths, + function(id, input) { + setNames(list(id, input), c(par\\$id_name, par\\$path_name)) + } +) + +if (!is.null(par\\$group_name)) { + par_list <- setNames(list(par_list), par\\$group_name) +} + +cat("> Writing as YAML\\\\n") +yaml::write_yaml(par_list, par\\$output) +VIASHMAIN +Rscript "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/files_make_params", + "tag" : "0.12.0" + }, + "label" : [ + "singlecpu", + "lowmem" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/files/make_params/nextflow.config b/target/nextflow/files/make_params/nextflow.config new file mode 100644 index 00000000000..226046dce1c --- /dev/null +++ b/target/nextflow/files/make_params/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'make_params' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Looks for files in a directory and turn it in a params file.' + author = 'Angela Oliveira Pisco, Robrecht Cannoodt' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/files/make_params/nextflow_params.yaml b/target/nextflow/files/make_params/nextflow_params.yaml new file mode 100644 index 00000000000..9dbc54ad0db --- /dev/null +++ b/target/nextflow/files/make_params/nextflow_params.yaml @@ -0,0 +1,13 @@ +# Arguments +base_dir: # please fill in - example: "/path/to/dir" +pattern: # please fill in - example: "*.fastq.gz" +n_dirname_drop: 0 +n_basename_id: 0 +id_name: "id" +path_name: "path" +# group_name: "param_list" +# output: "$id.$key.output.yaml" + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/files/make_params/nextflow_schema.json b/target/nextflow/files/make_params/nextflow_schema.json new file mode 100644 index 00000000000..2566c76fe8c --- /dev/null +++ b/target/nextflow/files/make_params/nextflow_schema.json @@ -0,0 +1,145 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "make_params", +"description": "Looks for files in a directory and turn it in a params file.", +"type": "object", +"definitions": { + + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "base_dir": { + "type": + "string", + "description": "Type: `file`, required, example: `/path/to/dir`. Base directory to search recursively", + "help_text": "Type: `file`, required, example: `/path/to/dir`. Base directory to search recursively" + + } + + + , + "pattern": { + "type": + "string", + "description": "Type: `string`, required, example: `*.fastq.gz`. An optional regular expression", + "help_text": "Type: `string`, required, example: `*.fastq.gz`. An optional regular expression. Only file names which match the regular expression will be matched." + + } + + + , + "n_dirname_drop": { + "type": + "integer", + "description": "Type: `integer`, default: `0`. For every matched file, the parent directory will be traversed N times", + "help_text": "Type: `integer`, default: `0`. For every matched file, the parent directory will be traversed N times." + , + "default": "0" + } + + + , + "n_basename_id": { + "type": + "integer", + "description": "Type: `integer`, default: `0`. The unique identifiers will consist of at least N dirnames", + "help_text": "Type: `integer`, default: `0`. The unique identifiers will consist of at least N dirnames." + , + "default": "0" + } + + + , + "id_name": { + "type": + "string", + "description": "Type: `string`, default: `id`. The name for storing the identifier field in the yaml", + "help_text": "Type: `string`, default: `id`. The name for storing the identifier field in the yaml." + , + "default": "id" + } + + + , + "path_name": { + "type": + "string", + "description": "Type: `string`, default: `path`. The name for storing the path field in the yaml", + "help_text": "Type: `string`, default: `path`. The name for storing the path field in the yaml." + , + "default": "path" + } + + + , + "group_name": { + "type": + "string", + "description": "Type: `string`, example: `param_list`. Top level name for the group of entries", + "help_text": "Type: `string`, example: `param_list`. Top level name for the group of entries." + + } + + + , + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.yaml`, example: `params.yaml`. Output YAML file", + "help_text": "Type: `file`, required, default: `$id.$key.output.yaml`, example: `params.yaml`. Output YAML file." + , + "default": "$id.$key.output.yaml" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/filter/delimit_fraction/.config.vsh.yaml b/target/nextflow/filter/delimit_fraction/.config.vsh.yaml new file mode 100644 index 00000000000..e834b981402 --- /dev/null +++ b/target/nextflow/filter/delimit_fraction/.config.vsh.yaml @@ -0,0 +1,241 @@ +functionality: + name: "delimit_fraction" + namespace: "filter" + version: "0.12.4" + authors: + - name: "Dries Schaumont" + roles: + - "author" + - "maintainer" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + argument_groups: + - name: "Inputs" + arguments: + - type: "file" + name: "--input" + description: "Input h5mu file" + info: null + example: + - "input.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--layer" + info: null + example: + - "raw_counts" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_fraction_column" + description: "Name of column from .var dataframe selecting\na column that contains\ + \ floating point values between 0 and 1.\n" + info: null + example: + - "fraction_mitochondrial" + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Outputs" + arguments: + - type: "file" + name: "--output" + description: "Output h5mu file." + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_name_filter" + description: "In which .obs slot to store a boolean array corresponding to which\ + \ observations should be removed." + info: null + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Arguments" + arguments: + - type: "double" + name: "--min_fraction" + description: "Min fraction for an observation to be retained (True in output)." + info: null + default: + - 0.0 + required: false + min: 0.0 + max: 1.0 + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--max_fraction" + description: "Max fraction for an observation to be retained (True in output)." + info: null + default: + - 1.0 + required: false + min: 0.0 + max: 1.0 + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Turns a column containing values between 0 and 1 into a boolean column\ + \ based on thresholds.\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.9-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "singlecpu" + - "lowmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/filter/delimit_fraction/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/filter/delimit_fraction" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/filter/delimit_fraction/delimit_fraction" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/filter/delimit_fraction/main.nf b/target/nextflow/filter/delimit_fraction/main.nf new file mode 100644 index 00000000000..5dbafc40189 --- /dev/null +++ b/target/nextflow/filter/delimit_fraction/main.nf @@ -0,0 +1,2713 @@ +// delimit_fraction 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Dries Schaumont (author, maintainer) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "delimit_fraction", + "namespace" : "filter", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Dries Schaumont", + "roles" : [ + "author", + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "dries@data-intuitive.com", + "github" : "DriesSchaumont", + "orcid" : "0000-0002-4389-0440", + "linkedin" : "dries-schaumont" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Scientist" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Inputs", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "description" : "Input h5mu file", + "example" : [ + "input.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--modality", + "default" : [ + "rna" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--layer", + "example" : [ + "raw_counts" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--obs_fraction_column", + "description" : "Name of column from .var dataframe selecting\na column that contains floating point values between 0 and 1.\n", + "example" : [ + "fraction_mitochondrial" + ], + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Outputs", + "arguments" : [ + { + "type" : "file", + "name" : "--output", + "description" : "Output h5mu file.", + "example" : [ + "output.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_compression", + "description" : "The compression format to be used on the output h5mu object.", + "example" : [ + "gzip" + ], + "required" : false, + "choices" : [ + "gzip", + "lzf" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--obs_name_filter", + "description" : "In which .obs slot to store a boolean array corresponding to which observations should be removed.", + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Arguments", + "arguments" : [ + { + "type" : "double", + "name" : "--min_fraction", + "description" : "Min fraction for an observation to be retained (True in output).", + "default" : [ + 0.0 + ], + "required" : false, + "min" : 0.0, + "max" : 1.0, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--max_fraction", + "description" : "Max fraction for an observation to be retained (True in output).", + "default" : [ + 1.0 + ], + "required" : false, + "min" : 0.0, + "max" : 1.0, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/filter/delimit_fraction/" + }, + { + "type" : "file", + "path" : "src/utils/setup_logger.py", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "description" : "Turns a column containing values between 0 and 1 into a boolean column based on thresholds.\n", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/filter/delimit_fraction/" + }, + { + "type" : "file", + "path" : "resources_test/pbmc_1k_protein_v3", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.9-slim", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "procps" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "mudata~=0.2.3", + "anndata~=0.9.1" + ], + "upgrade" : true + } + ], + "test_setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "viashpy==0.5.0" + ], + "upgrade" : true + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "singlecpu", + "lowmem" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/filter/delimit_fraction/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/filter/delimit_fraction", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN + +import mudata as mu +import numpy as np +import sys +from operator import le, ge +from pandas.api.types import is_float_dtype + + +### VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'layer': $( if [ ! -z ${VIASH_PAR_LAYER+x} ]; then echo "r'${VIASH_PAR_LAYER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'obs_fraction_column': $( if [ ! -z ${VIASH_PAR_OBS_FRACTION_COLUMN+x} ]; then echo "r'${VIASH_PAR_OBS_FRACTION_COLUMN//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'obs_name_filter': $( if [ ! -z ${VIASH_PAR_OBS_NAME_FILTER+x} ]; then echo "r'${VIASH_PAR_OBS_NAME_FILTER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'min_fraction': $( if [ ! -z ${VIASH_PAR_MIN_FRACTION+x} ]; then echo "float(r'${VIASH_PAR_MIN_FRACTION//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'max_fraction': $( if [ ! -z ${VIASH_PAR_MAX_FRACTION+x} ]; then echo "float(r'${VIASH_PAR_MAX_FRACTION//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +### VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +logger.info("Reading input data") +mdata = mu.read_h5mu(par["input"]) + +mdata.var_names_make_unique() + +mod = par['modality'] +logger.info("Processing modality %s.", mod) +data = mdata.mod[mod] + +logger.info("\\\\tUnfiltered data: %s", data) + +logger.info("\\\\tComputing aggregations.") + +def apply_filter_to_mask(mask, base, filter, comparator): + new_filt = np.ravel(comparator(base, filter)) + num_removed = np.sum(np.invert(new_filt) & mask) + mask &= new_filt + return num_removed, mask + +try: + fraction = data.obs[par['obs_fraction_column']] +except KeyError: + raise ValueError(f"Could not find column '{par['obs_fraction_column']}'") +if not is_float_dtype(fraction): + raise ValueError(f"Column '{par['obs_fraction_column']}' does not contain float datatype.") +if fraction.max() > 1: + raise ValueError(f"Column '{par['obs_fraction_column']}' contains values > 1.") +if fraction.min() < 0: + raise ValueError(f"Column '{par['obs_fraction_column']}' contains values < 0.") + + +# Filter cells +filters = (("min_fraction", fraction, ge, "\\\\tRemoving %s cells with <%s percentage mitochondrial reads."), + ("max_fraction", fraction, le, "\\\\tRemoving %s cells with >%s percentage mitochondrial reads."), + ) + +keep_cells = np.repeat(True, data.n_obs) +for filter_name_or_value, base, comparator, message in filters: + try: + filter = par[filter_name_or_value] + except KeyError: + filter = filter_name_or_value + if filter is not None: + num_removed, keep_cells = apply_filter_to_mask(keep_cells, base, filter, comparator) + logger.info(message, num_removed, filter) + +data.obs[par["obs_name_filter"]] = keep_cells + +logger.info("\\\\tFiltered data: %s", data) +logger.info("Writing output data to %s", par["output"]) +mdata.write_h5mu(par["output"], compression=par["output_compression"]) + +logger.info("Finished") +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/filter_delimit_fraction", + "tag" : "0.12.0" + }, + "label" : [ + "singlecpu", + "lowmem" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/filter/delimit_fraction/nextflow.config b/target/nextflow/filter/delimit_fraction/nextflow.config new file mode 100644 index 00000000000..f405577eb47 --- /dev/null +++ b/target/nextflow/filter/delimit_fraction/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'delimit_fraction' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Turns a column containing values between 0 and 1 into a boolean column based on thresholds.\n' + author = 'Dries Schaumont' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/filter/delimit_fraction/nextflow_params.yaml b/target/nextflow/filter/delimit_fraction/nextflow_params.yaml new file mode 100644 index 00000000000..b3df3adef10 --- /dev/null +++ b/target/nextflow/filter/delimit_fraction/nextflow_params.yaml @@ -0,0 +1,18 @@ +# Inputs +input: # please fill in - example: "input.h5mu" +modality: "rna" +# layer: "raw_counts" +obs_fraction_column: # please fill in - example: "fraction_mitochondrial" + +# Outputs +# output: "$id.$key.output.h5mu" +# output_compression: "gzip" +obs_name_filter: # please fill in - example: "foo" + +# Arguments +min_fraction: 0 +max_fraction: 1 + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/filter/delimit_fraction/nextflow_schema.json b/target/nextflow/filter/delimit_fraction/nextflow_schema.json new file mode 100644 index 00000000000..c22316618d0 --- /dev/null +++ b/target/nextflow/filter/delimit_fraction/nextflow_schema.json @@ -0,0 +1,184 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "delimit_fraction", +"description": "Turns a column containing values between 0 and 1 into a boolean column based on thresholds.\n", +"type": "object", +"definitions": { + + + + "inputs" : { + "title": "Inputs", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required, example: `input.h5mu`. Input h5mu file", + "help_text": "Type: `file`, required, example: `input.h5mu`. Input h5mu file" + + } + + + , + "modality": { + "type": + "string", + "description": "Type: `string`, default: `rna`. ", + "help_text": "Type: `string`, default: `rna`. " + , + "default": "rna" + } + + + , + "layer": { + "type": + "string", + "description": "Type: `string`, example: `raw_counts`. ", + "help_text": "Type: `string`, example: `raw_counts`. " + + } + + + , + "obs_fraction_column": { + "type": + "string", + "description": "Type: `string`, required, example: `fraction_mitochondrial`. Name of column from ", + "help_text": "Type: `string`, required, example: `fraction_mitochondrial`. Name of column from .var dataframe selecting\na column that contains floating point values between 0 and 1.\n" + + } + + +} +}, + + + "outputs" : { + "title": "Outputs", + "type": "object", + "description": "No description", + "properties": { + + + "output": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file", + "help_text": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file." + , + "default": "$id.$key.output.h5mu" + } + + + , + "output_compression": { + "type": + "string", + "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", + "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", + "enum": ["gzip", "lzf"] + + + } + + + , + "obs_name_filter": { + "type": + "string", + "description": "Type: `string`, required. In which ", + "help_text": "Type: `string`, required. In which .obs slot to store a boolean array corresponding to which observations should be removed." + + } + + +} +}, + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "min_fraction": { + "type": + "number", + "description": "Type: `double`, default: `0`. Min fraction for an observation to be retained (True in output)", + "help_text": "Type: `double`, default: `0`. Min fraction for an observation to be retained (True in output)." + , + "default": "0" + } + + + , + "max_fraction": { + "type": + "number", + "description": "Type: `double`, default: `1`. Max fraction for an observation to be retained (True in output)", + "help_text": "Type: `double`, default: `1`. Max fraction for an observation to be retained (True in output)." + , + "default": "1" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/inputs" + }, + + { + "$ref": "#/definitions/outputs" + }, + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/filter/delimit_fraction/setup_logger.py b/target/nextflow/filter/delimit_fraction/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/nextflow/filter/delimit_fraction/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/nextflow/filter/do_filter/.config.vsh.yaml b/target/nextflow/filter/do_filter/.config.vsh.yaml new file mode 100644 index 00000000000..49713c6b68d --- /dev/null +++ b/target/nextflow/filter/do_filter/.config.vsh.yaml @@ -0,0 +1,202 @@ +functionality: + name: "do_filter" + namespace: "filter" + version: "0.12.4" + authors: + - name: "Robrecht Cannoodt" + roles: + - "maintainer" + - "contributor" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + arguments: + - type: "file" + name: "--input" + description: "Input h5mu file" + info: null + example: + - "input.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_filter" + description: "Which .obs columns to use to filter the observations by." + info: null + example: + - "filter_with_x" + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--var_filter" + description: "Which .var columns to use to filter the observations by." + info: null + example: + - "filter_with_x" + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + description: "Output h5mu file." + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Remove observations and variables based on specified .obs and .var\ + \ columns.\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.9-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "singlecpu" + - "lowmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/filter/do_filter/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/filter/do_filter" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/filter/do_filter/do_filter" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/filter/do_filter/main.nf b/target/nextflow/filter/do_filter/main.nf new file mode 100644 index 00000000000..11ff10b4e31 --- /dev/null +++ b/target/nextflow/filter/do_filter/main.nf @@ -0,0 +1,2634 @@ +// do_filter 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Robrecht Cannoodt (maintainer, contributor) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "do_filter", + "namespace" : "filter", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Robrecht Cannoodt", + "roles" : [ + "maintainer", + "contributor" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "robrecht@data-intuitive.com", + "github" : "rcannood", + "orcid" : "0000-0003-3641-729X", + "linkedin" : "robrechtcannoodt" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Science Engineer" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + } + ], + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "description" : "Input h5mu file", + "example" : [ + "input.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--modality", + "default" : [ + "rna" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--obs_filter", + "description" : "Which .obs columns to use to filter the observations by.", + "example" : [ + "filter_with_x" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--var_filter", + "description" : "Which .var columns to use to filter the observations by.", + "example" : [ + "filter_with_x" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--output", + "description" : "Output h5mu file.", + "example" : [ + "output.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_compression", + "description" : "The compression format to be used on the output h5mu object.", + "example" : [ + "gzip" + ], + "required" : false, + "choices" : [ + "gzip", + "lzf" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/filter/do_filter/" + }, + { + "type" : "file", + "path" : "src/utils/setup_logger.py", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "description" : "Remove observations and variables based on specified .obs and .var columns.\n", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/filter/do_filter/" + }, + { + "type" : "file", + "path" : "resources_test/pbmc_1k_protein_v3", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.9-slim", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "procps" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "mudata~=0.2.3", + "anndata~=0.9.1" + ], + "upgrade" : true + } + ], + "test_setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "viashpy==0.5.0" + ], + "upgrade" : true + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "singlecpu", + "lowmem" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/filter/do_filter/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/filter/do_filter", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +import mudata as mu +import numpy as np +import sys + +### VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'obs_filter': $( if [ ! -z ${VIASH_PAR_OBS_FILTER+x} ]; then echo "r'${VIASH_PAR_OBS_FILTER//\\'/\\'\\"\\'\\"r\\'}'.split(':')"; else echo None; fi ), + 'var_filter': $( if [ ! -z ${VIASH_PAR_VAR_FILTER+x} ]; then echo "r'${VIASH_PAR_VAR_FILTER//\\'/\\'\\"\\'\\"r\\'}'.split(':')"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +### VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +logger.info("Reading %s", par['input']) +mdata = mu.read_h5mu(par["input"]) + +mod = par["modality"] +logger.info("Processing modality '%s'", mod) + +obs_filt = np.repeat(True, mdata.mod[mod].n_obs) +var_filt = np.repeat(True, mdata.mod[mod].n_vars) + +par["obs_filter"] = par["obs_filter"] if par["obs_filter"] else [] +par["var_filter"] = par["var_filter"] if par["var_filter"] else [] + +for obs_name in par["obs_filter"]: + logger.info("Filtering modality '%s' observations by .obs['%s']", mod, obs_name) + if not obs_name in mdata.mod[mod].obs: + raise ValueError(f".mod[{mod}].obs[{obs_name}] does not exist.") + if obs_name in mdata.mod[mod].obs: + obs_filt &= mdata.mod[mod].obs[obs_name] + +for var_name in par["var_filter"]: + logger.info("Filtering modality '%s' variables by .var['%s']", mod, var_name) + if not var_name in mdata.mod[mod].var: + raise ValueError(f".mod[{mod}].var[{var_name}] does not exist.") + if var_name in mdata.mod[mod].var: + var_filt &= mdata.mod[mod].var[var_name] + +mdata.mod[mod] = mdata.mod[mod][obs_filt, var_filt].copy() + +logger.info("Writing h5mu to file %s.", par["output"]) +mdata.write_h5mu(par["output"], compression=par["output_compression"]) +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/filter_do_filter", + "tag" : "0.12.0" + }, + "label" : [ + "singlecpu", + "lowmem" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/filter/do_filter/nextflow.config b/target/nextflow/filter/do_filter/nextflow.config new file mode 100644 index 00000000000..959c42463c8 --- /dev/null +++ b/target/nextflow/filter/do_filter/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'do_filter' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Remove observations and variables based on specified .obs and .var columns.\n' + author = 'Robrecht Cannoodt' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/filter/do_filter/nextflow_params.yaml b/target/nextflow/filter/do_filter/nextflow_params.yaml new file mode 100644 index 00000000000..a0d7704cbec --- /dev/null +++ b/target/nextflow/filter/do_filter/nextflow_params.yaml @@ -0,0 +1,11 @@ +# Arguments +input: # please fill in - example: "input.h5mu" +modality: "rna" +# obs_filter: ["filter_with_x"] +# var_filter: ["filter_with_x"] +# output: "$id.$key.output.h5mu" +# output_compression: "gzip" + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/filter/do_filter/nextflow_schema.json b/target/nextflow/filter/do_filter/nextflow_schema.json new file mode 100644 index 00000000000..2e8655cd805 --- /dev/null +++ b/target/nextflow/filter/do_filter/nextflow_schema.json @@ -0,0 +1,124 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "do_filter", +"description": "Remove observations and variables based on specified .obs and .var columns.\n", +"type": "object", +"definitions": { + + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required, example: `input.h5mu`. Input h5mu file", + "help_text": "Type: `file`, required, example: `input.h5mu`. Input h5mu file" + + } + + + , + "modality": { + "type": + "string", + "description": "Type: `string`, default: `rna`. ", + "help_text": "Type: `string`, default: `rna`. " + , + "default": "rna" + } + + + , + "obs_filter": { + "type": + "string", + "description": "Type: List of `string`, example: `filter_with_x`, multiple_sep: `\":\"`. Which ", + "help_text": "Type: List of `string`, example: `filter_with_x`, multiple_sep: `\":\"`. Which .obs columns to use to filter the observations by." + + } + + + , + "var_filter": { + "type": + "string", + "description": "Type: List of `string`, example: `filter_with_x`, multiple_sep: `\":\"`. Which ", + "help_text": "Type: List of `string`, example: `filter_with_x`, multiple_sep: `\":\"`. Which .var columns to use to filter the observations by." + + } + + + , + "output": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file", + "help_text": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file." + , + "default": "$id.$key.output.h5mu" + } + + + , + "output_compression": { + "type": + "string", + "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", + "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", + "enum": ["gzip", "lzf"] + + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/filter/do_filter/setup_logger.py b/target/nextflow/filter/do_filter/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/nextflow/filter/do_filter/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/nextflow/filter/filter_with_counts/.config.vsh.yaml b/target/nextflow/filter/filter_with_counts/.config.vsh.yaml new file mode 100644 index 00000000000..4eb3f043508 --- /dev/null +++ b/target/nextflow/filter/filter_with_counts/.config.vsh.yaml @@ -0,0 +1,295 @@ +functionality: + name: "filter_with_counts" + namespace: "filter" + version: "0.12.4" + authors: + - name: "Dries De Maeyer" + roles: + - "author" + info: + role: "Core Team Member" + links: + email: "ddemaeyer@gmail.com" + github: "ddemaeyer" + linkedin: "dries-de-maeyer-b46a814" + organizations: + - name: "Janssen Pharmaceuticals" + href: "https://www.janssen.com" + role: "Principal Scientist" + - name: "Robrecht Cannoodt" + roles: + - "maintainer" + - "author" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + argument_groups: + - name: "Inputs" + arguments: + - type: "file" + name: "--input" + description: "Input h5mu file" + info: null + example: + - "input.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--layer" + info: null + example: + - "raw_counts" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Outputs" + arguments: + - type: "file" + name: "--output" + description: "Output h5mu file." + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--do_subset" + description: "Whether to subset before storing the output." + info: null + direction: "input" + dest: "par" + - type: "string" + name: "--obs_name_filter" + description: "In which .obs slot to store a boolean array corresponding to which\ + \ observations should be removed." + info: null + default: + - "filter_with_counts" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--var_name_filter" + description: "In which .var slot to store a boolean array corresponding to which\ + \ variables should be removed." + info: null + default: + - "filter_with_counts" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Arguments" + arguments: + - type: "integer" + name: "--min_counts" + description: "Minimum number of counts captured per cell." + info: null + example: + - 200 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--max_counts" + description: "Maximum number of counts captured per cell." + info: null + example: + - 5000000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--min_genes_per_cell" + description: "Minimum of non-zero values per cell." + info: null + example: + - 200 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--max_genes_per_cell" + description: "Maximum of non-zero values per cell." + info: null + example: + - 1500000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--min_cells_per_gene" + description: "Minimum of non-zero values per gene." + info: null + example: + - 3 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Filter scRNA-seq data based on the primary QC metrics. \nThis is based\ + \ on both the UMI counts, the gene counts \nand the mitochondrial genes (genes\ + \ starting with mt/MT).\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.9-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "singlecpu" + - "lowmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/filter/filter_with_counts/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/filter/filter_with_counts" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/filter/filter_with_counts/filter_with_counts" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/filter/filter_with_counts/main.nf b/target/nextflow/filter/filter_with_counts/main.nf new file mode 100644 index 00000000000..030d115fc25 --- /dev/null +++ b/target/nextflow/filter/filter_with_counts/main.nf @@ -0,0 +1,2796 @@ +// filter_with_counts 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Dries De Maeyer (author) +// * Robrecht Cannoodt (maintainer, author) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "filter_with_counts", + "namespace" : "filter", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Dries De Maeyer", + "roles" : [ + "author" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "ddemaeyer@gmail.com", + "github" : "ddemaeyer", + "linkedin" : "dries-de-maeyer-b46a814" + }, + "organizations" : [ + { + "name" : "Janssen Pharmaceuticals", + "href" : "https://www.janssen.com", + "role" : "Principal Scientist" + } + ] + } + }, + { + "name" : "Robrecht Cannoodt", + "roles" : [ + "maintainer", + "author" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "robrecht@data-intuitive.com", + "github" : "rcannood", + "orcid" : "0000-0003-3641-729X", + "linkedin" : "robrechtcannoodt" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Science Engineer" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Inputs", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "description" : "Input h5mu file", + "example" : [ + "input.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--modality", + "default" : [ + "rna" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--layer", + "example" : [ + "raw_counts" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Outputs", + "arguments" : [ + { + "type" : "file", + "name" : "--output", + "description" : "Output h5mu file.", + "example" : [ + "output.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_compression", + "description" : "The compression format to be used on the output h5mu object.", + "example" : [ + "gzip" + ], + "required" : false, + "choices" : [ + "gzip", + "lzf" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "boolean_true", + "name" : "--do_subset", + "description" : "Whether to subset before storing the output.", + "direction" : "input", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--obs_name_filter", + "description" : "In which .obs slot to store a boolean array corresponding to which observations should be removed.", + "default" : [ + "filter_with_counts" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--var_name_filter", + "description" : "In which .var slot to store a boolean array corresponding to which variables should be removed.", + "default" : [ + "filter_with_counts" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Arguments", + "arguments" : [ + { + "type" : "integer", + "name" : "--min_counts", + "description" : "Minimum number of counts captured per cell.", + "example" : [ + 200 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--max_counts", + "description" : "Maximum number of counts captured per cell.", + "example" : [ + 5000000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--min_genes_per_cell", + "description" : "Minimum of non-zero values per cell.", + "example" : [ + 200 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--max_genes_per_cell", + "description" : "Maximum of non-zero values per cell.", + "example" : [ + 1500000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--min_cells_per_gene", + "description" : "Minimum of non-zero values per gene.", + "example" : [ + 3 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/filter/filter_with_counts/" + }, + { + "type" : "file", + "path" : "src/utils/setup_logger.py", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "description" : "Filter scRNA-seq data based on the primary QC metrics. \nThis is based on both the UMI counts, the gene counts \nand the mitochondrial genes (genes starting with mt/MT).\n", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/filter/filter_with_counts/" + }, + { + "type" : "file", + "path" : "resources_test/pbmc_1k_protein_v3", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.9-slim", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "procps" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "mudata~=0.2.3", + "anndata~=0.9.1" + ], + "upgrade" : true + } + ], + "test_setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "viashpy==0.5.0" + ], + "upgrade" : true + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "singlecpu", + "lowmem" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/filter/filter_with_counts/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/filter/filter_with_counts", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN + +import mudata as mu +import numpy as np +import sys +from operator import le, ge, gt + +### VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'layer': $( if [ ! -z ${VIASH_PAR_LAYER+x} ]; then echo "r'${VIASH_PAR_LAYER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'do_subset': $( if [ ! -z ${VIASH_PAR_DO_SUBSET+x} ]; then echo "r'${VIASH_PAR_DO_SUBSET//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), + 'obs_name_filter': $( if [ ! -z ${VIASH_PAR_OBS_NAME_FILTER+x} ]; then echo "r'${VIASH_PAR_OBS_NAME_FILTER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'var_name_filter': $( if [ ! -z ${VIASH_PAR_VAR_NAME_FILTER+x} ]; then echo "r'${VIASH_PAR_VAR_NAME_FILTER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'min_counts': $( if [ ! -z ${VIASH_PAR_MIN_COUNTS+x} ]; then echo "int(r'${VIASH_PAR_MIN_COUNTS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'max_counts': $( if [ ! -z ${VIASH_PAR_MAX_COUNTS+x} ]; then echo "int(r'${VIASH_PAR_MAX_COUNTS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'min_genes_per_cell': $( if [ ! -z ${VIASH_PAR_MIN_GENES_PER_CELL+x} ]; then echo "int(r'${VIASH_PAR_MIN_GENES_PER_CELL//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'max_genes_per_cell': $( if [ ! -z ${VIASH_PAR_MAX_GENES_PER_CELL+x} ]; then echo "int(r'${VIASH_PAR_MAX_GENES_PER_CELL//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'min_cells_per_gene': $( if [ ! -z ${VIASH_PAR_MIN_CELLS_PER_GENE+x} ]; then echo "int(r'${VIASH_PAR_MIN_CELLS_PER_GENE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +### VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +logger.info("Reading input data") +mdata = mu.read_h5mu(par["input"]) + +mdata.var_names_make_unique() + +mod = par['modality'] +logger.info("Processing modality %s.", mod) +data = mdata.mod[mod] + +logger.info("\\\\tUnfiltered data: %s", data) + +logger.info("\\\\tComputing aggregations.") +n_counts_per_cell = np.ravel(np.sum(data.X, axis=1)) +n_cells_per_gene = np.sum(data.X > 0, axis=0) +n_genes_per_cell = np.sum(data.X > 0, axis=1) + +def apply_filter_to_mask(mask, base, filter, comparator): + new_filt = np.ravel(comparator(base, filter)) + num_removed = np.sum(np.invert(new_filt) & mask) + mask &= new_filt + return num_removed, mask + +# Filter genes +keep_genes = np.repeat(True, data.n_vars) +if par["min_cells_per_gene"] is not None: + num_removed, keep_genes = apply_filter_to_mask(keep_genes, + n_cells_per_gene, + par['min_cells_per_gene'], + ge) + logger.info("\\\\tRemoving %s genes with non-zero values in <%s cells.", + num_removed, par['min_cells_per_gene']) + +# Filter cells +filters = (("min_genes_per_cell", n_genes_per_cell, ge, "\\\\tRemoving %s cells with non-zero values in <%s genes."), + ("max_genes_per_cell", n_genes_per_cell, le, "\\\\tRemoving %s cells with non-zero values in >%s genes."), + ("min_counts", n_counts_per_cell, ge, "\\\\tRemoving %s cells with <%s total counts."), + ("max_counts", n_counts_per_cell, le, "\\\\tRemoving %s cells with >%s total counts."), + (0, np.sum(data[:,keep_genes].X, axis=1), gt, "\\\\tRemoving %s cells with %s counts")) + +keep_cells = np.repeat(True, data.n_obs) +for filter_name_or_value, base, comparator, message in filters: + try: + filter = par[filter_name_or_value] + except KeyError: + filter = filter_name_or_value + if filter is not None: + num_removed, keep_cells = apply_filter_to_mask(keep_cells, base, filter, comparator) + logger.info(message, num_removed, filter) + +if par["obs_name_filter"] is not None: + data.obs[par["obs_name_filter"]] = keep_cells +if par["var_name_filter"] is not None: + data.var[par["var_name_filter"]] = keep_genes + +if par["do_subset"]: + mdata.mod[mod] = data[keep_cells, keep_genes] + +logger.info("\\\\tFiltered data: %s", data) +logger.info("Writing output data to %s", par["output"]) +mdata.write_h5mu(par["output"], compression=par["output_compression"]) + +logger.info("Finished") +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/filter_filter_with_counts", + "tag" : "0.12.0" + }, + "label" : [ + "singlecpu", + "lowmem" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/filter/filter_with_counts/nextflow.config b/target/nextflow/filter/filter_with_counts/nextflow.config new file mode 100644 index 00000000000..5a7a91e49e3 --- /dev/null +++ b/target/nextflow/filter/filter_with_counts/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'filter_with_counts' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Filter scRNA-seq data based on the primary QC metrics. \nThis is based on both the UMI counts, the gene counts \nand the mitochondrial genes (genes starting with mt/MT).\n' + author = 'Dries De Maeyer, Robrecht Cannoodt' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/filter/filter_with_counts/nextflow_params.yaml b/target/nextflow/filter/filter_with_counts/nextflow_params.yaml new file mode 100644 index 00000000000..bc01a706b90 --- /dev/null +++ b/target/nextflow/filter/filter_with_counts/nextflow_params.yaml @@ -0,0 +1,22 @@ +# Inputs +input: # please fill in - example: "input.h5mu" +modality: "rna" +# layer: "raw_counts" + +# Outputs +# output: "$id.$key.output.h5mu" +# output_compression: "gzip" +do_subset: false +obs_name_filter: "filter_with_counts" +var_name_filter: "filter_with_counts" + +# Arguments +# min_counts: 200 +# max_counts: 5000000 +# min_genes_per_cell: 200 +# max_genes_per_cell: 1500000 +# min_cells_per_gene: 3 + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/filter/filter_with_counts/nextflow_schema.json b/target/nextflow/filter/filter_with_counts/nextflow_schema.json new file mode 100644 index 00000000000..16f2f7e2b32 --- /dev/null +++ b/target/nextflow/filter/filter_with_counts/nextflow_schema.json @@ -0,0 +1,225 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "filter_with_counts", +"description": "Filter scRNA-seq data based on the primary QC metrics. \nThis is based on both the UMI counts, the gene counts \nand the mitochondrial genes (genes starting with mt/MT).\n", +"type": "object", +"definitions": { + + + + "inputs" : { + "title": "Inputs", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required, example: `input.h5mu`. Input h5mu file", + "help_text": "Type: `file`, required, example: `input.h5mu`. Input h5mu file" + + } + + + , + "modality": { + "type": + "string", + "description": "Type: `string`, default: `rna`. ", + "help_text": "Type: `string`, default: `rna`. " + , + "default": "rna" + } + + + , + "layer": { + "type": + "string", + "description": "Type: `string`, example: `raw_counts`. ", + "help_text": "Type: `string`, example: `raw_counts`. " + + } + + +} +}, + + + "outputs" : { + "title": "Outputs", + "type": "object", + "description": "No description", + "properties": { + + + "output": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file", + "help_text": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file." + , + "default": "$id.$key.output.h5mu" + } + + + , + "output_compression": { + "type": + "string", + "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", + "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", + "enum": ["gzip", "lzf"] + + + } + + + , + "do_subset": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Whether to subset before storing the output", + "help_text": "Type: `boolean_true`, default: `false`. Whether to subset before storing the output." + , + "default": "False" + } + + + , + "obs_name_filter": { + "type": + "string", + "description": "Type: `string`, default: `filter_with_counts`. In which ", + "help_text": "Type: `string`, default: `filter_with_counts`. In which .obs slot to store a boolean array corresponding to which observations should be removed." + , + "default": "filter_with_counts" + } + + + , + "var_name_filter": { + "type": + "string", + "description": "Type: `string`, default: `filter_with_counts`. In which ", + "help_text": "Type: `string`, default: `filter_with_counts`. In which .var slot to store a boolean array corresponding to which variables should be removed." + , + "default": "filter_with_counts" + } + + +} +}, + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "min_counts": { + "type": + "integer", + "description": "Type: `integer`, example: `200`. Minimum number of counts captured per cell", + "help_text": "Type: `integer`, example: `200`. Minimum number of counts captured per cell." + + } + + + , + "max_counts": { + "type": + "integer", + "description": "Type: `integer`, example: `5000000`. Maximum number of counts captured per cell", + "help_text": "Type: `integer`, example: `5000000`. Maximum number of counts captured per cell." + + } + + + , + "min_genes_per_cell": { + "type": + "integer", + "description": "Type: `integer`, example: `200`. Minimum of non-zero values per cell", + "help_text": "Type: `integer`, example: `200`. Minimum of non-zero values per cell." + + } + + + , + "max_genes_per_cell": { + "type": + "integer", + "description": "Type: `integer`, example: `1500000`. Maximum of non-zero values per cell", + "help_text": "Type: `integer`, example: `1500000`. Maximum of non-zero values per cell." + + } + + + , + "min_cells_per_gene": { + "type": + "integer", + "description": "Type: `integer`, example: `3`. Minimum of non-zero values per gene", + "help_text": "Type: `integer`, example: `3`. Minimum of non-zero values per gene." + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/inputs" + }, + + { + "$ref": "#/definitions/outputs" + }, + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/filter/filter_with_counts/setup_logger.py b/target/nextflow/filter/filter_with_counts/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/nextflow/filter/filter_with_counts/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/nextflow/filter/filter_with_hvg/.config.vsh.yaml b/target/nextflow/filter/filter_with_hvg/.config.vsh.yaml new file mode 100644 index 00000000000..68341ad1e35 --- /dev/null +++ b/target/nextflow/filter/filter_with_hvg/.config.vsh.yaml @@ -0,0 +1,352 @@ +functionality: + name: "filter_with_hvg" + namespace: "filter" + version: "0.12.4" + authors: + - name: "Dries De Maeyer" + roles: + - "contributor" + info: + role: "Core Team Member" + links: + email: "ddemaeyer@gmail.com" + github: "ddemaeyer" + linkedin: "dries-de-maeyer-b46a814" + organizations: + - name: "Janssen Pharmaceuticals" + href: "https://www.janssen.com" + role: "Principal Scientist" + - name: "Robrecht Cannoodt" + roles: + - "maintainer" + - "contributor" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + arguments: + - type: "file" + name: "--input" + description: "Input h5mu file" + info: null + example: + - "input.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--layer" + description: "use adata.layers[layer] for expression values instead of adata.X." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + description: "Output h5mu file." + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--var_name_filter" + description: "In which .var slot to store a boolean array corresponding to which\ + \ observations should be filtered out." + info: null + default: + - "filter_with_hvg" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--varm_name" + description: "In which .varm slot to store additional metadata." + info: null + default: + - "hvg" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--do_subset" + description: "Whether to subset before storing the output." + info: null + direction: "input" + dest: "par" + - type: "string" + name: "--flavor" + description: "Choose the flavor for identifying highly variable genes. For the\ + \ dispersion based methods\nin their default workflows, Seurat passes the cutoffs\ + \ whereas Cell Ranger passes n_top_genes.\n" + info: null + default: + - "seurat" + required: false + choices: + - "seurat" + - "cell_ranger" + - "seurat_v3" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--n_top_genes" + description: "Number of highly-variable genes to keep. Mandatory if flavor='seurat_v3'." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--min_mean" + description: "If n_top_genes is defined, this and all other cutoffs for the means\ + \ and the normalized dispersions are ignored. Ignored if flavor='seurat_v3'." + info: null + default: + - 0.0125 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--max_mean" + description: "If n_top_genes is defined, this and all other cutoffs for the means\ + \ and the normalized dispersions are ignored. Ignored if flavor='seurat_v3'." + info: null + default: + - 3.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--min_disp" + description: "If n_top_genes is defined, this and all other cutoffs for the means\ + \ and the normalized dispersions are ignored. Ignored if flavor='seurat_v3'." + info: null + default: + - 0.5 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--max_disp" + description: "If n_top_genes is defined, this and all other cutoffs for the means\ + \ and the normalized dispersions are ignored. Ignored if flavor='seurat_v3'.\ + \ Default is +inf." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--span" + description: "The fraction of the data (cells) used when estimating the variance\ + \ in the loess model fit if flavor='seurat_v3'." + info: null + default: + - 0.3 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--n_bins" + description: "Number of bins for binning the mean gene expression. Normalization\ + \ is done with respect to each bin. If just a single gene falls into a bin,\ + \ the normalized dispersion is artificially set to 1." + info: null + default: + - 20 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_batch_key" + description: "If specified, highly-variable genes are selected within each batch\ + \ separately and merged. This simple \nprocess avoids the selection of batch-specific\ + \ genes and acts as a lightweight batch correction method. \nFor all flavors,\ + \ genes are first sorted by how many batches they are a HVG. For dispersion-based\ + \ flavors \nties are broken by normalized dispersion. If flavor = 'seurat_v3',\ + \ ties are broken by the median (across\nbatches) rank based on within-batch\ + \ normalized variance.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Annotate highly variable genes [Satija15] [Zheng17] [Stuart19].\n\n\ + Expects logarithmized data, except when flavor='seurat_v3' in which count data\ + \ is expected.\n\nDepending on flavor, this reproduces the R-implementations of\ + \ Seurat [Satija15], Cell Ranger [Zheng17], and Seurat v3 [Stuart19].\n\nFor the\ + \ dispersion-based methods ([Satija15] and [Zheng17]), the normalized dispersion\ + \ is obtained by scaling with the mean and standard deviation of the dispersions\ + \ for genes falling into a given bin for mean expression of genes. This means\ + \ that for each bin of mean expression, highly variable genes are selected.\n\n\ + For [Stuart19], a normalized variance for each gene is computed. First, the data\ + \ are standardized (i.e., z-score normalization per feature) with a regularized\ + \ standard deviation. Next, the normalized variance is computed as the variance\ + \ of each gene after the transformation. Genes are ranked by the normalized variance.\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.9" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "scanpy~=1.9.5" + - "scikit-misc" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "singlecpu" + - "lowmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/filter/filter_with_hvg/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/filter/filter_with_hvg" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/filter/filter_with_hvg/filter_with_hvg" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/filter/filter_with_hvg/main.nf b/target/nextflow/filter/filter_with_hvg/main.nf new file mode 100644 index 00000000000..053dd28099c --- /dev/null +++ b/target/nextflow/filter/filter_with_hvg/main.nf @@ -0,0 +1,2856 @@ +// filter_with_hvg 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Dries De Maeyer (contributor) +// * Robrecht Cannoodt (maintainer, contributor) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "filter_with_hvg", + "namespace" : "filter", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Dries De Maeyer", + "roles" : [ + "contributor" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "ddemaeyer@gmail.com", + "github" : "ddemaeyer", + "linkedin" : "dries-de-maeyer-b46a814" + }, + "organizations" : [ + { + "name" : "Janssen Pharmaceuticals", + "href" : "https://www.janssen.com", + "role" : "Principal Scientist" + } + ] + } + }, + { + "name" : "Robrecht Cannoodt", + "roles" : [ + "maintainer", + "contributor" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "robrecht@data-intuitive.com", + "github" : "rcannood", + "orcid" : "0000-0003-3641-729X", + "linkedin" : "robrechtcannoodt" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Science Engineer" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + } + ], + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "description" : "Input h5mu file", + "example" : [ + "input.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--modality", + "default" : [ + "rna" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--layer", + "description" : "use adata.layers[layer] for expression values instead of adata.X.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--output", + "description" : "Output h5mu file.", + "example" : [ + "output.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_compression", + "description" : "The compression format to be used on the output h5mu object.", + "example" : [ + "gzip" + ], + "required" : false, + "choices" : [ + "gzip", + "lzf" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--var_name_filter", + "description" : "In which .var slot to store a boolean array corresponding to which observations should be filtered out.", + "default" : [ + "filter_with_hvg" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--varm_name", + "description" : "In which .varm slot to store additional metadata.", + "default" : [ + "hvg" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "boolean_true", + "name" : "--do_subset", + "description" : "Whether to subset before storing the output.", + "direction" : "input", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--flavor", + "description" : "Choose the flavor for identifying highly variable genes. For the dispersion based methods\nin their default workflows, Seurat passes the cutoffs whereas Cell Ranger passes n_top_genes.\n", + "default" : [ + "seurat" + ], + "required" : false, + "choices" : [ + "seurat", + "cell_ranger", + "seurat_v3" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--n_top_genes", + "description" : "Number of highly-variable genes to keep. Mandatory if flavor='seurat_v3'.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--min_mean", + "description" : "If n_top_genes is defined, this and all other cutoffs for the means and the normalized dispersions are ignored. Ignored if flavor='seurat_v3'.", + "default" : [ + 0.0125 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--max_mean", + "description" : "If n_top_genes is defined, this and all other cutoffs for the means and the normalized dispersions are ignored. Ignored if flavor='seurat_v3'.", + "default" : [ + 3.0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--min_disp", + "description" : "If n_top_genes is defined, this and all other cutoffs for the means and the normalized dispersions are ignored. Ignored if flavor='seurat_v3'.", + "default" : [ + 0.5 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--max_disp", + "description" : "If n_top_genes is defined, this and all other cutoffs for the means and the normalized dispersions are ignored. Ignored if flavor='seurat_v3'. Default is +inf.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--span", + "description" : "The fraction of the data (cells) used when estimating the variance in the loess model fit if flavor='seurat_v3'.", + "default" : [ + 0.3 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--n_bins", + "description" : "Number of bins for binning the mean gene expression. Normalization is done with respect to each bin. If just a single gene falls into a bin, the normalized dispersion is artificially set to 1.", + "default" : [ + 20 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--obs_batch_key", + "description" : "If specified, highly-variable genes are selected within each batch separately and merged. This simple \nprocess avoids the selection of batch-specific genes and acts as a lightweight batch correction method. \nFor all flavors, genes are first sorted by how many batches they are a HVG. For dispersion-based flavors \nties are broken by normalized dispersion. If flavor = 'seurat_v3', ties are broken by the median (across\nbatches) rank based on within-batch normalized variance.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/filter/filter_with_hvg/" + }, + { + "type" : "file", + "path" : "src/utils/setup_logger.py", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "description" : "Annotate highly variable genes [Satija15] [Zheng17] [Stuart19].\n\nExpects logarithmized data, except when flavor='seurat_v3' in which count data is expected.\n\nDepending on flavor, this reproduces the R-implementations of Seurat [Satija15], Cell Ranger [Zheng17], and Seurat v3 [Stuart19].\n\nFor the dispersion-based methods ([Satija15] and [Zheng17]), the normalized dispersion is obtained by scaling with the mean and standard deviation of the dispersions for genes falling into a given bin for mean expression of genes. This means that for each bin of mean expression, highly variable genes are selected.\n\nFor [Stuart19], a normalized variance for each gene is computed. First, the data are standardized (i.e., z-score normalization per feature) with a regularized standard deviation. Next, the normalized variance is computed as the variance of each gene after the transformation. Genes are ranked by the normalized variance.\n", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/filter/filter_with_hvg/" + }, + { + "type" : "file", + "path" : "resources_test/pbmc_1k_protein_v3", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.9", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "mudata~=0.2.3", + "anndata~=0.9.1", + "scanpy~=1.9.5", + "scikit-misc" + ], + "upgrade" : true + } + ], + "test_setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "viashpy==0.5.0" + ], + "upgrade" : true + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "singlecpu", + "lowmem" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/filter/filter_with_hvg/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/filter/filter_with_hvg", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +import scanpy as sc +import mudata as mu +import numpy as np +import sys +import re + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'layer': $( if [ ! -z ${VIASH_PAR_LAYER+x} ]; then echo "r'${VIASH_PAR_LAYER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'var_name_filter': $( if [ ! -z ${VIASH_PAR_VAR_NAME_FILTER+x} ]; then echo "r'${VIASH_PAR_VAR_NAME_FILTER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'varm_name': $( if [ ! -z ${VIASH_PAR_VARM_NAME+x} ]; then echo "r'${VIASH_PAR_VARM_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'do_subset': $( if [ ! -z ${VIASH_PAR_DO_SUBSET+x} ]; then echo "r'${VIASH_PAR_DO_SUBSET//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), + 'flavor': $( if [ ! -z ${VIASH_PAR_FLAVOR+x} ]; then echo "r'${VIASH_PAR_FLAVOR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'n_top_genes': $( if [ ! -z ${VIASH_PAR_N_TOP_GENES+x} ]; then echo "int(r'${VIASH_PAR_N_TOP_GENES//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'min_mean': $( if [ ! -z ${VIASH_PAR_MIN_MEAN+x} ]; then echo "float(r'${VIASH_PAR_MIN_MEAN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'max_mean': $( if [ ! -z ${VIASH_PAR_MAX_MEAN+x} ]; then echo "float(r'${VIASH_PAR_MAX_MEAN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'min_disp': $( if [ ! -z ${VIASH_PAR_MIN_DISP+x} ]; then echo "float(r'${VIASH_PAR_MIN_DISP//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'max_disp': $( if [ ! -z ${VIASH_PAR_MAX_DISP+x} ]; then echo "float(r'${VIASH_PAR_MAX_DISP//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'span': $( if [ ! -z ${VIASH_PAR_SPAN+x} ]; then echo "float(r'${VIASH_PAR_SPAN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'n_bins': $( if [ ! -z ${VIASH_PAR_N_BINS+x} ]; then echo "int(r'${VIASH_PAR_N_BINS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'obs_batch_key': $( if [ ! -z ${VIASH_PAR_OBS_BATCH_KEY+x} ]; then echo "r'${VIASH_PAR_OBS_BATCH_KEY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +## VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +mdata = mu.read_h5mu(par["input"]) +mdata.var_names_make_unique() + +mod = par['modality'] +logger.info(f"Processing modality '%s'", mod) +data = mdata.mod[mod] + +# Workaround for issue +# https://github.com/scverse/scanpy/issues/2239 +# https://github.com/scverse/scanpy/issues/2181 +if par['flavor'] != "seurat_v3": + # This component requires log normalized data when flavor is not seurat_v3 + # We assume that the data is correctly normalized but scanpy will look at + # .uns to check the transformations performed on the data. + # To prevent scanpy from automatically tranforming the counts when they are + # already transformed, we set the appropriate values to .uns. + if 'log1p' not in data.uns: + logger.warning("When flavor is not set to 'seurat_v3', " + "the input data for this component must be log-transformed. " + "However, the 'log1p' dictionairy in .uns has not been set. " + "This is fine if you did not log transform your data with scanpy." + "Otherwise, please check if you are providing log transformed " + "data using --layer.") + data.uns['log1p'] = {'base': None} + elif 'log1p' in data.uns and 'base' not in data.uns['log1p']: + data.uns['log1p']['base'] = None + +logger.info("\\\\tUnfiltered data: %s", data) + +logger.info("\\\\tComputing hvg") +# construct arguments +hvg_args = { + 'adata': data, + 'n_top_genes': par["n_top_genes"], + 'min_mean': par["min_mean"], + 'max_mean': par["max_mean"], + 'min_disp': par["min_disp"], + 'span': par["span"], + 'n_bins': par["n_bins"], + 'flavor': par["flavor"], + 'subset': False, + 'inplace': False, + 'layer': par['layer'], +} + +optional_parameters = { + "max_disp": "max_disp", + "obs_batch_key": "batch_key", + "n_top_genes": "n_top_genes" +} +# only add parameter if it's passed +for par_name, dest_name in optional_parameters.items(): + if par.get(par_name): + hvg_args[dest_name] = par[par_name] + +# scanpy does not do this check, although it is stated in the documentation +if par['flavor'] == "seurat_v3" and not par['n_top_genes']: + raise ValueError("When flavor is set to 'seurat_v3', you are required to set 'n_top_genes'.") + +if par["layer"] and not par['layer'] in data.layers: + raise ValueError(f"Layer '{par['layer']}' not found in layers for modality '{mod}'. " + f"Found layers are: {','.join(data.layers)}") +# call function +try: + out = sc.pp.highly_variable_genes(**hvg_args) + if par['obs_batch_key'] is not None: + assert (out.index == data.var.index).all(), "Expected output index values to be equivalent to the input index" +except ValueError as err: + if str(err) == "cannot specify integer \\`bins\\` when input data contains infinity": + err.args = ("Cannot specify integer \\`bins\\` when input data contains infinity. " + "Perhaps input data has not been log normalized?",) + if re.search("Bin edges must be unique:", str(err)): + raise RuntimeError("Scanpy failed to calculate hvg. The error " + "returned by scanpy (see above) could be the " + "result from trying to use this component on unfiltered data.") from err + raise err + +out.index = data.var.index +logger.info("\\\\tStoring output into .var") +if par.get("var_name_filter", None) is not None: + data.var[par["var_name_filter"]] = out["highly_variable"] + +if par.get("varm_name", None) is not None and 'mean_bin' in out: + # drop mean_bin as mudata/anndata doesn't support tuples + data.varm[par["varm_name"]] = out.drop("mean_bin", axis=1) + +if par["do_subset"]: + keep_feats = np.ravel(data.var[par["var_name_filter"]]) + mdata.mod[mod] = data[:,keep_feats] + +logger.info("Writing h5mu to file") +mdata.write_h5mu(par["output"], compression=par["output_compression"]) +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/filter_filter_with_hvg", + "tag" : "0.12.0" + }, + "label" : [ + "singlecpu", + "lowmem" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/filter/filter_with_hvg/nextflow.config b/target/nextflow/filter/filter_with_hvg/nextflow.config new file mode 100644 index 00000000000..c11fe73ba27 --- /dev/null +++ b/target/nextflow/filter/filter_with_hvg/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'filter_with_hvg' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Annotate highly variable genes [Satija15] [Zheng17] [Stuart19].\n\nExpects logarithmized data, except when flavor=\'seurat_v3\' in which count data is expected.\n\nDepending on flavor, this reproduces the R-implementations of Seurat [Satija15], Cell Ranger [Zheng17], and Seurat v3 [Stuart19].\n\nFor the dispersion-based methods ([Satija15] and [Zheng17]), the normalized dispersion is obtained by scaling with the mean and standard deviation of the dispersions for genes falling into a given bin for mean expression of genes. This means that for each bin of mean expression, highly variable genes are selected.\n\nFor [Stuart19], a normalized variance for each gene is computed. First, the data are standardized (i.e., z-score normalization per feature) with a regularized standard deviation. Next, the normalized variance is computed as the variance of each gene after the transformation. Genes are ranked by the normalized variance.\n' + author = 'Dries De Maeyer, Robrecht Cannoodt' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/filter/filter_with_hvg/nextflow_params.yaml b/target/nextflow/filter/filter_with_hvg/nextflow_params.yaml new file mode 100644 index 00000000000..1f5eaec7a25 --- /dev/null +++ b/target/nextflow/filter/filter_with_hvg/nextflow_params.yaml @@ -0,0 +1,22 @@ +# Arguments +input: # please fill in - example: "input.h5mu" +modality: "rna" +# layer: "foo" +# output: "$id.$key.output.h5mu" +# output_compression: "gzip" +var_name_filter: "filter_with_hvg" +varm_name: "hvg" +do_subset: false +flavor: "seurat" +# n_top_genes: 123 +min_mean: 0.0125 +max_mean: 3 +min_disp: 0.5 +# max_disp: 123.0 +span: 0.3 +n_bins: 20 +# obs_batch_key: "foo" + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/filter/filter_with_hvg/nextflow_schema.json b/target/nextflow/filter/filter_with_hvg/nextflow_schema.json new file mode 100644 index 00000000000..3f6658be745 --- /dev/null +++ b/target/nextflow/filter/filter_with_hvg/nextflow_schema.json @@ -0,0 +1,245 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "filter_with_hvg", +"description": "Annotate highly variable genes [Satija15] [Zheng17] [Stuart19].\n\nExpects logarithmized data, except when flavor=\u0027seurat_v3\u0027 in which count data is expected.\n\nDepending on flavor, this reproduces the R-implementations of Seurat [Satija15], Cell Ranger [Zheng17], and Seurat v3 [Stuart19].\n\nFor the dispersion-based methods ([Satija15] and [Zheng17]), the normalized dispersion is obtained by scaling with the mean and standard deviation of the dispersions for genes falling into a given bin for mean expression of genes. This means that for each bin of mean expression, highly variable genes are selected.\n\nFor [Stuart19], a normalized variance for each gene is computed. First, the data are standardized (i.e., z-score normalization per feature) with a regularized standard deviation. Next, the normalized variance is computed as the variance of each gene after the transformation. Genes are ranked by the normalized variance.\n", +"type": "object", +"definitions": { + + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required, example: `input.h5mu`. Input h5mu file", + "help_text": "Type: `file`, required, example: `input.h5mu`. Input h5mu file" + + } + + + , + "modality": { + "type": + "string", + "description": "Type: `string`, default: `rna`. ", + "help_text": "Type: `string`, default: `rna`. " + , + "default": "rna" + } + + + , + "layer": { + "type": + "string", + "description": "Type: `string`. use adata", + "help_text": "Type: `string`. use adata.layers[layer] for expression values instead of adata.X." + + } + + + , + "output": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file", + "help_text": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file." + , + "default": "$id.$key.output.h5mu" + } + + + , + "output_compression": { + "type": + "string", + "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", + "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", + "enum": ["gzip", "lzf"] + + + } + + + , + "var_name_filter": { + "type": + "string", + "description": "Type: `string`, default: `filter_with_hvg`. In which ", + "help_text": "Type: `string`, default: `filter_with_hvg`. In which .var slot to store a boolean array corresponding to which observations should be filtered out." + , + "default": "filter_with_hvg" + } + + + , + "varm_name": { + "type": + "string", + "description": "Type: `string`, default: `hvg`. In which ", + "help_text": "Type: `string`, default: `hvg`. In which .varm slot to store additional metadata." + , + "default": "hvg" + } + + + , + "do_subset": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Whether to subset before storing the output", + "help_text": "Type: `boolean_true`, default: `false`. Whether to subset before storing the output." + , + "default": "False" + } + + + , + "flavor": { + "type": + "string", + "description": "Type: `string`, default: `seurat`, choices: ``seurat`, `cell_ranger`, `seurat_v3``. Choose the flavor for identifying highly variable genes", + "help_text": "Type: `string`, default: `seurat`, choices: ``seurat`, `cell_ranger`, `seurat_v3``. Choose the flavor for identifying highly variable genes. For the dispersion based methods\nin their default workflows, Seurat passes the cutoffs whereas Cell Ranger passes n_top_genes.\n", + "enum": ["seurat", "cell_ranger", "seurat_v3"] + + , + "default": "seurat" + } + + + , + "n_top_genes": { + "type": + "integer", + "description": "Type: `integer`. Number of highly-variable genes to keep", + "help_text": "Type: `integer`. Number of highly-variable genes to keep. Mandatory if flavor=\u0027seurat_v3\u0027." + + } + + + , + "min_mean": { + "type": + "number", + "description": "Type: `double`, default: `0.0125`. If n_top_genes is defined, this and all other cutoffs for the means and the normalized dispersions are ignored", + "help_text": "Type: `double`, default: `0.0125`. If n_top_genes is defined, this and all other cutoffs for the means and the normalized dispersions are ignored. Ignored if flavor=\u0027seurat_v3\u0027." + , + "default": "0.0125" + } + + + , + "max_mean": { + "type": + "number", + "description": "Type: `double`, default: `3`. If n_top_genes is defined, this and all other cutoffs for the means and the normalized dispersions are ignored", + "help_text": "Type: `double`, default: `3`. If n_top_genes is defined, this and all other cutoffs for the means and the normalized dispersions are ignored. Ignored if flavor=\u0027seurat_v3\u0027." + , + "default": "3" + } + + + , + "min_disp": { + "type": + "number", + "description": "Type: `double`, default: `0.5`. If n_top_genes is defined, this and all other cutoffs for the means and the normalized dispersions are ignored", + "help_text": "Type: `double`, default: `0.5`. If n_top_genes is defined, this and all other cutoffs for the means and the normalized dispersions are ignored. Ignored if flavor=\u0027seurat_v3\u0027." + , + "default": "0.5" + } + + + , + "max_disp": { + "type": + "number", + "description": "Type: `double`. If n_top_genes is defined, this and all other cutoffs for the means and the normalized dispersions are ignored", + "help_text": "Type: `double`. If n_top_genes is defined, this and all other cutoffs for the means and the normalized dispersions are ignored. Ignored if flavor=\u0027seurat_v3\u0027. Default is +inf." + + } + + + , + "span": { + "type": + "number", + "description": "Type: `double`, default: `0.3`. The fraction of the data (cells) used when estimating the variance in the loess model fit if flavor=\u0027seurat_v3\u0027", + "help_text": "Type: `double`, default: `0.3`. The fraction of the data (cells) used when estimating the variance in the loess model fit if flavor=\u0027seurat_v3\u0027." + , + "default": "0.3" + } + + + , + "n_bins": { + "type": + "integer", + "description": "Type: `integer`, default: `20`. Number of bins for binning the mean gene expression", + "help_text": "Type: `integer`, default: `20`. Number of bins for binning the mean gene expression. Normalization is done with respect to each bin. If just a single gene falls into a bin, the normalized dispersion is artificially set to 1." + , + "default": "20" + } + + + , + "obs_batch_key": { + "type": + "string", + "description": "Type: `string`. If specified, highly-variable genes are selected within each batch separately and merged", + "help_text": "Type: `string`. If specified, highly-variable genes are selected within each batch separately and merged. This simple \nprocess avoids the selection of batch-specific genes and acts as a lightweight batch correction method. \nFor all flavors, genes are first sorted by how many batches they are a HVG. For dispersion-based flavors \nties are broken by normalized dispersion. If flavor = \u0027seurat_v3\u0027, ties are broken by the median (across\nbatches) rank based on within-batch normalized variance.\n" + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/filter/filter_with_hvg/setup_logger.py b/target/nextflow/filter/filter_with_hvg/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/nextflow/filter/filter_with_hvg/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/nextflow/filter/filter_with_scrublet/.config.vsh.yaml b/target/nextflow/filter/filter_with_scrublet/.config.vsh.yaml new file mode 100644 index 00000000000..dc82bfada95 --- /dev/null +++ b/target/nextflow/filter/filter_with_scrublet/.config.vsh.yaml @@ -0,0 +1,304 @@ +functionality: + name: "filter_with_scrublet" + namespace: "filter" + version: "0.12.4" + authors: + - name: "Dries De Maeyer" + roles: + - "contributor" + info: + role: "Core Team Member" + links: + email: "ddemaeyer@gmail.com" + github: "ddemaeyer" + linkedin: "dries-de-maeyer-b46a814" + organizations: + - name: "Janssen Pharmaceuticals" + href: "https://www.janssen.com" + role: "Principal Scientist" + - name: "Robrecht Cannoodt" + roles: + - "maintainer" + - "contributor" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + arguments: + - type: "file" + name: "--input" + description: "Input h5mu file" + info: null + example: + - "input.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + description: "Output h5mu file." + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_name_filter" + description: "In which .obs slot to store a boolean array corresponding to which\ + \ observations should be filtered out." + info: null + default: + - "filter_with_scrublet" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--do_subset" + description: "Whether to subset before storing the output." + info: null + direction: "input" + dest: "par" + - type: "string" + name: "--obs_name_doublet_score" + description: "Name of the doublet scores column in the obs slot of the returned\ + \ object." + info: null + default: + - "scrublet_doublet_score" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--min_counts" + description: "The number of minimal UMI counts per cell that have to be present\ + \ for initial cell detection." + info: null + default: + - 2 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--min_cells" + description: "The number of cells in which UMIs for a gene were detected." + info: null + default: + - 3 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--min_gene_variablity_percent" + description: "Used for gene filtering prior to PCA. Keep the most highly variable\ + \ genes (in the top min_gene_variability_pctl percentile), as measured by the\ + \ v-statistic [Klein et al., Cell 2015]." + info: null + default: + - 85.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--num_pca_components" + description: "Number of principal components to use during PCA dimensionality\ + \ reduction." + info: null + default: + - 30 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--distance_metric" + description: "The distance metric used for computing similarities." + info: null + default: + - "euclidean" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--allow_automatic_threshold_detection_fail" + description: "When scrublet fails to automatically determine the double score\ + \ threshold, \nallow the component to continue and set the output columns to\ + \ NA.\n" + info: null + direction: "input" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Doublet detection using the Scrublet method (Wolock, Lopez and Klein,\ + \ 2019).\nThe method tests for potential doublets by using the expression profiles\ + \ of\ncells to generate synthetic potential doubles which are tested against cells.\ + \ \nThe method returns a \"doublet score\" on which it calls for potential doublets.\n\ + \nFor the source code please visit https://github.com/AllonKleinLab/scrublet.\n\ + \nFor 10x we expect the doublet rates to be:\n Multiplet Rate (%) - # of Cells\ + \ Loaded - # of Cells Recovered\n ~0.4% ~800 ~500\n ~0.8% ~1,600 ~1,000\n ~1.6%\ + \ ~3,200 ~2,000\n ~2.3% ~4,800 ~3,000\n ~3.1% ~6,400 ~4,000\n ~3.9% ~8,000\ + \ ~5,000\n ~4.6% ~9,600 ~6,000\n ~5.4% ~11,200 ~7,000\n ~6.1% ~12,800 ~8,000\n\ + \ ~6.9% ~14,400 ~9,000\n ~7.6% ~16,000 ~10,000\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + run_args: + - "--env NUMBA_CACHE_DIR=/tmp" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + - "build-essential" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "scanpy~=1.9.5" + - "scrublet" + - "annoy==1.16.3" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highcpu" + - "midmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/filter/filter_with_scrublet/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/filter/filter_with_scrublet" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/filter/filter_with_scrublet/filter_with_scrublet" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/filter/filter_with_scrublet/main.nf b/target/nextflow/filter/filter_with_scrublet/main.nf new file mode 100644 index 00000000000..7da8d52ef4e --- /dev/null +++ b/target/nextflow/filter/filter_with_scrublet/main.nf @@ -0,0 +1,2769 @@ +// filter_with_scrublet 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Dries De Maeyer (contributor) +// * Robrecht Cannoodt (maintainer, contributor) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "filter_with_scrublet", + "namespace" : "filter", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Dries De Maeyer", + "roles" : [ + "contributor" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "ddemaeyer@gmail.com", + "github" : "ddemaeyer", + "linkedin" : "dries-de-maeyer-b46a814" + }, + "organizations" : [ + { + "name" : "Janssen Pharmaceuticals", + "href" : "https://www.janssen.com", + "role" : "Principal Scientist" + } + ] + } + }, + { + "name" : "Robrecht Cannoodt", + "roles" : [ + "maintainer", + "contributor" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "robrecht@data-intuitive.com", + "github" : "rcannood", + "orcid" : "0000-0003-3641-729X", + "linkedin" : "robrechtcannoodt" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Science Engineer" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + } + ], + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "description" : "Input h5mu file", + "example" : [ + "input.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--modality", + "default" : [ + "rna" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--output", + "description" : "Output h5mu file.", + "example" : [ + "output.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_compression", + "description" : "The compression format to be used on the output h5mu object.", + "example" : [ + "gzip" + ], + "required" : false, + "choices" : [ + "gzip", + "lzf" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--obs_name_filter", + "description" : "In which .obs slot to store a boolean array corresponding to which observations should be filtered out.", + "default" : [ + "filter_with_scrublet" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "boolean_true", + "name" : "--do_subset", + "description" : "Whether to subset before storing the output.", + "direction" : "input", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--obs_name_doublet_score", + "description" : "Name of the doublet scores column in the obs slot of the returned object.", + "default" : [ + "scrublet_doublet_score" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--min_counts", + "description" : "The number of minimal UMI counts per cell that have to be present for initial cell detection.", + "default" : [ + 2 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--min_cells", + "description" : "The number of cells in which UMIs for a gene were detected.", + "default" : [ + 3 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--min_gene_variablity_percent", + "description" : "Used for gene filtering prior to PCA. Keep the most highly variable genes (in the top min_gene_variability_pctl percentile), as measured by the v-statistic [Klein et al., Cell 2015].", + "default" : [ + 85.0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--num_pca_components", + "description" : "Number of principal components to use during PCA dimensionality reduction.", + "default" : [ + 30 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--distance_metric", + "description" : "The distance metric used for computing similarities.", + "default" : [ + "euclidean" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "boolean_true", + "name" : "--allow_automatic_threshold_detection_fail", + "description" : "When scrublet fails to automatically determine the double score threshold, \nallow the component to continue and set the output columns to NA.\n", + "direction" : "input", + "dest" : "par" + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/filter/filter_with_scrublet/" + }, + { + "type" : "file", + "path" : "src/utils/setup_logger.py", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "description" : "Doublet detection using the Scrublet method (Wolock, Lopez and Klein, 2019).\nThe method tests for potential doublets by using the expression profiles of\ncells to generate synthetic potential doubles which are tested against cells. \nThe method returns a \\"doublet score\\" on which it calls for potential doublets.\n\nFor the source code please visit https://github.com/AllonKleinLab/scrublet.\n\nFor 10x we expect the doublet rates to be:\n Multiplet Rate (%) - # of Cells Loaded - # of Cells Recovered\n ~0.4% ~800 ~500\n ~0.8% ~1,600 ~1,000\n ~1.6% ~3,200 ~2,000\n ~2.3% ~4,800 ~3,000\n ~3.1% ~6,400 ~4,000\n ~3.9% ~8,000 ~5,000\n ~4.6% ~9,600 ~6,000\n ~5.4% ~11,200 ~7,000\n ~6.1% ~12,800 ~8,000\n ~6.9% ~14,400 ~9,000\n ~7.6% ~16,000 ~10,000\n", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/filter/filter_with_scrublet/" + }, + { + "type" : "file", + "path" : "resources_test/pbmc_1k_protein_v3", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.10-slim", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "run_args" : [ + "--env NUMBA_CACHE_DIR=/tmp" + ], + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "procps", + "build-essential" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "mudata~=0.2.3", + "anndata~=0.9.1", + "scanpy~=1.9.5", + "scrublet", + "annoy==1.16.3" + ], + "upgrade" : true + } + ], + "test_setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "viashpy==0.5.0" + ], + "upgrade" : true + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "highcpu", + "midmem" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/filter/filter_with_scrublet/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/filter/filter_with_scrublet", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +import scrublet as scr +import mudata as mu +import numpy as np +import sys +import pandas as pd + +### VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'obs_name_filter': $( if [ ! -z ${VIASH_PAR_OBS_NAME_FILTER+x} ]; then echo "r'${VIASH_PAR_OBS_NAME_FILTER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'do_subset': $( if [ ! -z ${VIASH_PAR_DO_SUBSET+x} ]; then echo "r'${VIASH_PAR_DO_SUBSET//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), + 'obs_name_doublet_score': $( if [ ! -z ${VIASH_PAR_OBS_NAME_DOUBLET_SCORE+x} ]; then echo "r'${VIASH_PAR_OBS_NAME_DOUBLET_SCORE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'min_counts': $( if [ ! -z ${VIASH_PAR_MIN_COUNTS+x} ]; then echo "int(r'${VIASH_PAR_MIN_COUNTS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'min_cells': $( if [ ! -z ${VIASH_PAR_MIN_CELLS+x} ]; then echo "int(r'${VIASH_PAR_MIN_CELLS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'min_gene_variablity_percent': $( if [ ! -z ${VIASH_PAR_MIN_GENE_VARIABLITY_PERCENT+x} ]; then echo "float(r'${VIASH_PAR_MIN_GENE_VARIABLITY_PERCENT//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'num_pca_components': $( if [ ! -z ${VIASH_PAR_NUM_PCA_COMPONENTS+x} ]; then echo "int(r'${VIASH_PAR_NUM_PCA_COMPONENTS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'distance_metric': $( if [ ! -z ${VIASH_PAR_DISTANCE_METRIC+x} ]; then echo "r'${VIASH_PAR_DISTANCE_METRIC//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'allow_automatic_threshold_detection_fail': $( if [ ! -z ${VIASH_PAR_ALLOW_AUTOMATIC_THRESHOLD_DETECTION_FAIL+x} ]; then echo "r'${VIASH_PAR_ALLOW_AUTOMATIC_THRESHOLD_DETECTION_FAIL//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +### VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +logger.info("Reading %s.", par['input']) +mdata = mu.read_h5mu(par["input"]) + +mod = par["modality"] +logger.info("Processing modality '%s'.", mod) +data = mdata.mod[mod] + +logger.info("\\\\tRunning scrublet") +scrub = scr.Scrublet(data.X) + +doublet_scores, predicted_doublets = scrub.scrub_doublets( + min_counts=par["min_counts"], + min_cells=par["min_cells"], + min_gene_variability_pctl=par["min_gene_variablity_percent"], + n_prin_comps=par["num_pca_components"], + distance_metric=par["distance_metric"], + use_approx_neighbors=False +) + +try: + keep_cells = np.invert(predicted_doublets) +except TypeError: + if par['allow_automatic_threshold_detection_fail']: + # Scrublet might not throw an error and return None if it fails to detect doublets... + logger.info("\\\\tScrublet could not automatically detect the doublet score threshold. Setting output columns to NA.") + keep_cells = np.nan + doublet_scores = np.nan + else: + raise RuntimeError("Scrublet could not automatically detect the doublet score threshold. " + "--allow_automatic_threshold_detection_fail can be used to ignore this failure " + "and set the corresponding output columns to NA.") + +logger.info("\\\\tStoring output into .obs") +if par["obs_name_doublet_score"] is not None: + data.obs[par["obs_name_doublet_score"]] = doublet_scores + data.obs[par["obs_name_doublet_score"]] = data.obs[par["obs_name_doublet_score"]].astype("float64") +if par["obs_name_filter"] is not None: + data.obs[par["obs_name_filter"]] = keep_cells + data.obs[par["obs_name_filter"]] = data.obs[par["obs_name_filter"]].astype(pd.BooleanDtype()) + +if par["do_subset"]: + if pd.api.types.is_scalar(keep_cells) and pd.isna(keep_cells): + logger.warning("Not subsetting beacuse doublets were not predicted") + else: + mdata.mod[mod] = data[keep_cells, :] + +logger.info("Writing h5mu to %s", par["output"]) +mdata.write_h5mu(par["output"], compression=par["output_compression"]) +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/filter_filter_with_scrublet", + "tag" : "0.12.0" + }, + "label" : [ + "highcpu", + "midmem" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/filter/filter_with_scrublet/nextflow.config b/target/nextflow/filter/filter_with_scrublet/nextflow.config new file mode 100644 index 00000000000..9975a611561 --- /dev/null +++ b/target/nextflow/filter/filter_with_scrublet/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'filter_with_scrublet' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Doublet detection using the Scrublet method (Wolock, Lopez and Klein, 2019).\nThe method tests for potential doublets by using the expression profiles of\ncells to generate synthetic potential doubles which are tested against cells. \nThe method returns a "doublet score" on which it calls for potential doublets.\n\nFor the source code please visit https://github.com/AllonKleinLab/scrublet.\n\nFor 10x we expect the doublet rates to be:\n Multiplet Rate (%) - # of Cells Loaded - # of Cells Recovered\n ~0.4% ~800 ~500\n ~0.8% ~1,600 ~1,000\n ~1.6% ~3,200 ~2,000\n ~2.3% ~4,800 ~3,000\n ~3.1% ~6,400 ~4,000\n ~3.9% ~8,000 ~5,000\n ~4.6% ~9,600 ~6,000\n ~5.4% ~11,200 ~7,000\n ~6.1% ~12,800 ~8,000\n ~6.9% ~14,400 ~9,000\n ~7.6% ~16,000 ~10,000\n' + author = 'Dries De Maeyer, Robrecht Cannoodt' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/filter/filter_with_scrublet/nextflow_params.yaml b/target/nextflow/filter/filter_with_scrublet/nextflow_params.yaml new file mode 100644 index 00000000000..33af31b238d --- /dev/null +++ b/target/nextflow/filter/filter_with_scrublet/nextflow_params.yaml @@ -0,0 +1,18 @@ +# Arguments +input: # please fill in - example: "input.h5mu" +modality: "rna" +# output: "$id.$key.output.h5mu" +# output_compression: "gzip" +obs_name_filter: "filter_with_scrublet" +do_subset: false +obs_name_doublet_score: "scrublet_doublet_score" +min_counts: 2 +min_cells: 3 +min_gene_variablity_percent: 85 +num_pca_components: 30 +distance_metric: "euclidean" +allow_automatic_threshold_detection_fail: false + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/filter/filter_with_scrublet/nextflow_schema.json b/target/nextflow/filter/filter_with_scrublet/nextflow_schema.json new file mode 100644 index 00000000000..5350ded357f --- /dev/null +++ b/target/nextflow/filter/filter_with_scrublet/nextflow_schema.json @@ -0,0 +1,203 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "filter_with_scrublet", +"description": "Doublet detection using the Scrublet method (Wolock, Lopez and Klein, 2019).\nThe method tests for potential doublets by using the expression profiles of\ncells to generate synthetic potential doubles which are tested against cells. \nThe method returns a \"doublet score\" on which it calls for potential doublets.\n\nFor the source code please visit https://github.com/AllonKleinLab/scrublet.\n\nFor 10x we expect the doublet rates to be:\n Multiplet Rate (%) - # of Cells Loaded - # of Cells Recovered\n ~0.4% ~800 ~500\n ~0.8% ~1,600 ~1,000\n ~1.6% ~3,200 ~2,000\n ~2.3% ~4,800 ~3,000\n ~3.1% ~6,400 ~4,000\n ~3.9% ~8,000 ~5,000\n ~4.6% ~9,600 ~6,000\n ~5.4% ~11,200 ~7,000\n ~6.1% ~12,800 ~8,000\n ~6.9% ~14,400 ~9,000\n ~7.6% ~16,000 ~10,000\n", +"type": "object", +"definitions": { + + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required, example: `input.h5mu`. Input h5mu file", + "help_text": "Type: `file`, required, example: `input.h5mu`. Input h5mu file" + + } + + + , + "modality": { + "type": + "string", + "description": "Type: `string`, default: `rna`. ", + "help_text": "Type: `string`, default: `rna`. " + , + "default": "rna" + } + + + , + "output": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file", + "help_text": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file." + , + "default": "$id.$key.output.h5mu" + } + + + , + "output_compression": { + "type": + "string", + "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", + "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", + "enum": ["gzip", "lzf"] + + + } + + + , + "obs_name_filter": { + "type": + "string", + "description": "Type: `string`, default: `filter_with_scrublet`. In which ", + "help_text": "Type: `string`, default: `filter_with_scrublet`. In which .obs slot to store a boolean array corresponding to which observations should be filtered out." + , + "default": "filter_with_scrublet" + } + + + , + "do_subset": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Whether to subset before storing the output", + "help_text": "Type: `boolean_true`, default: `false`. Whether to subset before storing the output." + , + "default": "False" + } + + + , + "obs_name_doublet_score": { + "type": + "string", + "description": "Type: `string`, default: `scrublet_doublet_score`. Name of the doublet scores column in the obs slot of the returned object", + "help_text": "Type: `string`, default: `scrublet_doublet_score`. Name of the doublet scores column in the obs slot of the returned object." + , + "default": "scrublet_doublet_score" + } + + + , + "min_counts": { + "type": + "integer", + "description": "Type: `integer`, default: `2`. The number of minimal UMI counts per cell that have to be present for initial cell detection", + "help_text": "Type: `integer`, default: `2`. The number of minimal UMI counts per cell that have to be present for initial cell detection." + , + "default": "2" + } + + + , + "min_cells": { + "type": + "integer", + "description": "Type: `integer`, default: `3`. The number of cells in which UMIs for a gene were detected", + "help_text": "Type: `integer`, default: `3`. The number of cells in which UMIs for a gene were detected." + , + "default": "3" + } + + + , + "min_gene_variablity_percent": { + "type": + "number", + "description": "Type: `double`, default: `85`. Used for gene filtering prior to PCA", + "help_text": "Type: `double`, default: `85`. Used for gene filtering prior to PCA. Keep the most highly variable genes (in the top min_gene_variability_pctl percentile), as measured by the v-statistic [Klein et al., Cell 2015]." + , + "default": "85" + } + + + , + "num_pca_components": { + "type": + "integer", + "description": "Type: `integer`, default: `30`. Number of principal components to use during PCA dimensionality reduction", + "help_text": "Type: `integer`, default: `30`. Number of principal components to use during PCA dimensionality reduction." + , + "default": "30" + } + + + , + "distance_metric": { + "type": + "string", + "description": "Type: `string`, default: `euclidean`. The distance metric used for computing similarities", + "help_text": "Type: `string`, default: `euclidean`. The distance metric used for computing similarities." + , + "default": "euclidean" + } + + + , + "allow_automatic_threshold_detection_fail": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. When scrublet fails to automatically determine the double score threshold, \nallow the component to continue and set the output columns to NA", + "help_text": "Type: `boolean_true`, default: `false`. When scrublet fails to automatically determine the double score threshold, \nallow the component to continue and set the output columns to NA.\n" + , + "default": "False" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/filter/filter_with_scrublet/setup_logger.py b/target/nextflow/filter/filter_with_scrublet/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/nextflow/filter/filter_with_scrublet/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/nextflow/filter/remove_modality/.config.vsh.yaml b/target/nextflow/filter/remove_modality/.config.vsh.yaml new file mode 100644 index 00000000000..a8f05f5f5dd --- /dev/null +++ b/target/nextflow/filter/remove_modality/.config.vsh.yaml @@ -0,0 +1,171 @@ +functionality: + name: "remove_modality" + namespace: "filter" + version: "0.12.4" + authors: + - name: "Dries Schaumont" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + arguments: + - type: "file" + name: "--input" + description: "Input h5mu file" + info: null + example: + - "input.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + required: true + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + description: "Output h5mu file." + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + description: "Remove a modality from a .h5mu file\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.9-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "singlecpu" + - "lowmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/filter/remove_modality/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/filter/remove_modality" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/filter/remove_modality/remove_modality" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/filter/remove_modality/main.nf b/target/nextflow/filter/remove_modality/main.nf new file mode 100644 index 00000000000..28a25387313 --- /dev/null +++ b/target/nextflow/filter/remove_modality/main.nf @@ -0,0 +1,2550 @@ +// remove_modality 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Dries Schaumont (maintainer) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "remove_modality", + "namespace" : "filter", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Dries Schaumont", + "roles" : [ + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "dries@data-intuitive.com", + "github" : "DriesSchaumont", + "orcid" : "0000-0002-4389-0440", + "linkedin" : "dries-schaumont" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Scientist" + } + ] + } + } + ], + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "description" : "Input h5mu file", + "example" : [ + "input.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--modality", + "required" : true, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--output", + "description" : "Output h5mu file.", + "example" : [ + "output.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_compression", + "description" : "The compression format to be used on the output h5mu object.", + "example" : [ + "gzip" + ], + "required" : false, + "choices" : [ + "gzip", + "lzf" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/filter/remove_modality/" + } + ], + "description" : "Remove a modality from a .h5mu file\n", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/filter/remove_modality/" + }, + { + "type" : "file", + "path" : "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.9-slim", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "procps" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "mudata~=0.2.3", + "anndata~=0.9.1" + ], + "upgrade" : true + } + ], + "test_setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "viashpy==0.5.0" + ], + "upgrade" : true + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "singlecpu", + "lowmem" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/filter/remove_modality/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/filter/remove_modality", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +from mudata import read_h5mu, MuData + + +### VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'.split(':')"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +### VIASH END + + +input_mudata = read_h5mu(par['input']) +new_mods = {mod_name: mod for mod_name, mod + in input_mudata.mod.items() + if mod_name not in par['modality']} + +new_mudata = MuData(new_mods) +new_mudata.write_h5mu(filename=par["output"], compression=par["output_compression"]) +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/filter_remove_modality", + "tag" : "0.12.0" + }, + "label" : [ + "singlecpu", + "lowmem" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/filter/remove_modality/nextflow.config b/target/nextflow/filter/remove_modality/nextflow.config new file mode 100644 index 00000000000..a01d820977d --- /dev/null +++ b/target/nextflow/filter/remove_modality/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'remove_modality' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Remove a modality from a .h5mu file\n' + author = 'Dries Schaumont' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/filter/remove_modality/nextflow_params.yaml b/target/nextflow/filter/remove_modality/nextflow_params.yaml new file mode 100644 index 00000000000..886189a0005 --- /dev/null +++ b/target/nextflow/filter/remove_modality/nextflow_params.yaml @@ -0,0 +1,9 @@ +# Arguments +input: # please fill in - example: "input.h5mu" +modality: # please fill in - example: ["foo"] +# output: "$id.$key.output.h5mu" +# output_compression: "gzip" + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/filter/remove_modality/nextflow_schema.json b/target/nextflow/filter/remove_modality/nextflow_schema.json new file mode 100644 index 00000000000..1e597745256 --- /dev/null +++ b/target/nextflow/filter/remove_modality/nextflow_schema.json @@ -0,0 +1,103 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "remove_modality", +"description": "Remove a modality from a .h5mu file\n", +"type": "object", +"definitions": { + + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required, example: `input.h5mu`. Input h5mu file", + "help_text": "Type: `file`, required, example: `input.h5mu`. Input h5mu file" + + } + + + , + "modality": { + "type": + "string", + "description": "Type: List of `string`, required, multiple_sep: `\":\"`. ", + "help_text": "Type: List of `string`, required, multiple_sep: `\":\"`. " + + } + + + , + "output": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file", + "help_text": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file." + , + "default": "$id.$key.output.h5mu" + } + + + , + "output_compression": { + "type": + "string", + "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", + "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", + "enum": ["gzip", "lzf"] + + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/filter/subset_h5mu/.config.vsh.yaml b/target/nextflow/filter/subset_h5mu/.config.vsh.yaml new file mode 100644 index 00000000000..3e9366709b2 --- /dev/null +++ b/target/nextflow/filter/subset_h5mu/.config.vsh.yaml @@ -0,0 +1,187 @@ +functionality: + name: "subset_h5mu" + namespace: "filter" + version: "0.12.4" + authors: + - name: "Dries Schaumont" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + arguments: + - type: "file" + name: "--input" + description: "Input h5mu file" + info: null + example: + - "input.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + description: "Output h5mu file." + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--number_of_observations" + description: "Number of observations to be selected from the h5mu file." + info: null + example: + - 5 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Create a subset of a mudata file by selecting the first number of\ + \ observations\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.9-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "singlecpu" + - "lowmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/filter/subset_h5mu/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/filter/subset_h5mu" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/filter/subset_h5mu/subset_h5mu" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/filter/subset_h5mu/main.nf b/target/nextflow/filter/subset_h5mu/main.nf new file mode 100644 index 00000000000..7f941e35fae --- /dev/null +++ b/target/nextflow/filter/subset_h5mu/main.nf @@ -0,0 +1,2575 @@ +// subset_h5mu 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Dries Schaumont (maintainer) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "subset_h5mu", + "namespace" : "filter", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Dries Schaumont", + "roles" : [ + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "dries@data-intuitive.com", + "github" : "DriesSchaumont", + "orcid" : "0000-0002-4389-0440", + "linkedin" : "dries-schaumont" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Scientist" + } + ] + } + } + ], + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "description" : "Input h5mu file", + "example" : [ + "input.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--modality", + "default" : [ + "rna" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--output", + "description" : "Output h5mu file.", + "example" : [ + "output.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_compression", + "description" : "The compression format to be used on the output h5mu object.", + "example" : [ + "gzip" + ], + "required" : false, + "choices" : [ + "gzip", + "lzf" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--number_of_observations", + "description" : "Number of observations to be selected from the h5mu file.", + "example" : [ + 5 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/filter/subset_h5mu/" + }, + { + "type" : "file", + "path" : "src/utils/setup_logger.py", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "description" : "Create a subset of a mudata file by selecting the first number of observations\n", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/filter/subset_h5mu/" + }, + { + "type" : "file", + "path" : "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.9-slim", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "procps" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "mudata~=0.2.3", + "anndata~=0.9.1" + ], + "upgrade" : true + } + ], + "test_setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "viashpy==0.5.0" + ], + "upgrade" : true + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "singlecpu", + "lowmem" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/filter/subset_h5mu/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/filter/subset_h5mu", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +import mudata + +### VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'number_of_observations': $( if [ ! -z ${VIASH_PAR_NUMBER_OF_OBSERVATIONS+x} ]; then echo "int(r'${VIASH_PAR_NUMBER_OF_OBSERVATIONS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +### VIASH END + +if __name__ == "__main__": + # read data + data = mudata.read(par["input"]) + + # subset data + if par["modality"]: + data.mod[par["modality"]] = data.mod[par["modality"]][:par["number_of_observations"]] + else: + data = data[:par["number_of_observations"]] + + # write data + data.write_h5mu(par["output"], compression=par["output_compression"]) +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/filter_subset_h5mu", + "tag" : "0.12.0" + }, + "label" : [ + "singlecpu", + "lowmem" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/filter/subset_h5mu/nextflow.config b/target/nextflow/filter/subset_h5mu/nextflow.config new file mode 100644 index 00000000000..2cd5ee0815c --- /dev/null +++ b/target/nextflow/filter/subset_h5mu/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'subset_h5mu' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Create a subset of a mudata file by selecting the first number of observations\n' + author = 'Dries Schaumont' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/filter/subset_h5mu/nextflow_params.yaml b/target/nextflow/filter/subset_h5mu/nextflow_params.yaml new file mode 100644 index 00000000000..8300904f6c0 --- /dev/null +++ b/target/nextflow/filter/subset_h5mu/nextflow_params.yaml @@ -0,0 +1,10 @@ +# Arguments +input: # please fill in - example: "input.h5mu" +modality: "rna" +# output: "$id.$key.output.h5mu" +# output_compression: "gzip" +# number_of_observations: 5 + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/filter/subset_h5mu/nextflow_schema.json b/target/nextflow/filter/subset_h5mu/nextflow_schema.json new file mode 100644 index 00000000000..0a6d451c5a9 --- /dev/null +++ b/target/nextflow/filter/subset_h5mu/nextflow_schema.json @@ -0,0 +1,114 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "subset_h5mu", +"description": "Create a subset of a mudata file by selecting the first number of observations\n", +"type": "object", +"definitions": { + + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required, example: `input.h5mu`. Input h5mu file", + "help_text": "Type: `file`, required, example: `input.h5mu`. Input h5mu file" + + } + + + , + "modality": { + "type": + "string", + "description": "Type: `string`, default: `rna`. ", + "help_text": "Type: `string`, default: `rna`. " + , + "default": "rna" + } + + + , + "output": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file", + "help_text": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file." + , + "default": "$id.$key.output.h5mu" + } + + + , + "output_compression": { + "type": + "string", + "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", + "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", + "enum": ["gzip", "lzf"] + + + } + + + , + "number_of_observations": { + "type": + "integer", + "description": "Type: `integer`, example: `5`. Number of observations to be selected from the h5mu file", + "help_text": "Type: `integer`, example: `5`. Number of observations to be selected from the h5mu file." + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/filter/subset_h5mu/setup_logger.py b/target/nextflow/filter/subset_h5mu/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/nextflow/filter/subset_h5mu/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/nextflow/integrate/harmonypy/.config.vsh.yaml b/target/nextflow/integrate/harmonypy/.config.vsh.yaml new file mode 100644 index 00000000000..0e5da0e1eed --- /dev/null +++ b/target/nextflow/integrate/harmonypy/.config.vsh.yaml @@ -0,0 +1,240 @@ +functionality: + name: "harmonypy" + namespace: "integrate" + version: "0.12.4" + authors: + - name: "Dries Schaumont" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + - name: "Robrecht Cannoodt" + roles: + - "contributor" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input h5mu file" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output h5mu file." + info: null + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obsm_input" + description: "Which .obsm slot to use as a starting PCA embedding." + info: null + default: + - "X_pca" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obsm_output" + description: "In which .obsm slot to store the resulting integrated embedding." + info: null + default: + - "X_pca_integrated" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--theta" + description: "Diversity clustering penalty parameter. Specify for each variable\ + \ in group.by.vars. theta=0 does not encourage any diversity. Larger values\ + \ of theta result in more diverse clusters." + info: null + default: + - 2.0 + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_covariates" + description: "The .obs field(s) that define the covariate(s) to regress out." + info: null + example: + - "batch" + - "sample" + required: true + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + description: "Performs Harmony integration based as described in https://github.com/immunogenomics/harmony.\ + \ Based on an implementation in python from https://github.com/slowkow/harmonypy" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "scanpy~=1.9.5" + - "harmonypy~=0.0.6" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highmem" + - "highcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/integrate/harmonypy/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/integrate/harmonypy" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/integrate/harmonypy/harmonypy" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/integrate/harmonypy/main.nf b/target/nextflow/integrate/harmonypy/main.nf new file mode 100644 index 00000000000..740db64988b --- /dev/null +++ b/target/nextflow/integrate/harmonypy/main.nf @@ -0,0 +1,2646 @@ +// harmonypy 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Dries Schaumont (maintainer) +// * Robrecht Cannoodt (contributor) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "harmonypy", + "namespace" : "integrate", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Dries Schaumont", + "roles" : [ + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "dries@data-intuitive.com", + "github" : "DriesSchaumont", + "orcid" : "0000-0002-4389-0440", + "linkedin" : "dries-schaumont" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Scientist" + } + ] + } + }, + { + "name" : "Robrecht Cannoodt", + "roles" : [ + "contributor" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "robrecht@data-intuitive.com", + "github" : "rcannood", + "orcid" : "0000-0003-3641-729X", + "linkedin" : "robrechtcannoodt" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Science Engineer" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + } + ], + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "alternatives" : [ + "-i" + ], + "description" : "Input h5mu file", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "-o" + ], + "description" : "Output h5mu file.", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_compression", + "description" : "The compression format to be used on the output h5mu object.", + "example" : [ + "gzip" + ], + "required" : false, + "choices" : [ + "gzip", + "lzf" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--modality", + "default" : [ + "rna" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--obsm_input", + "description" : "Which .obsm slot to use as a starting PCA embedding.", + "default" : [ + "X_pca" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--obsm_output", + "description" : "In which .obsm slot to store the resulting integrated embedding.", + "default" : [ + "X_pca_integrated" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--theta", + "description" : "Diversity clustering penalty parameter. Specify for each variable in group.by.vars. theta=0 does not encourage any diversity. Larger values of theta result in more diverse clusters.", + "default" : [ + 2.0 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--obs_covariates", + "description" : "The .obs field(s) that define the covariate(s) to regress out.", + "example" : [ + "batch", + "sample" + ], + "required" : true, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ":", + "dest" : "par" + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/integrate/harmonypy/" + } + ], + "description" : "Performs Harmony integration based as described in https://github.com/immunogenomics/harmony. Based on an implementation in python from https://github.com/slowkow/harmonypy", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/integrate/harmonypy/" + }, + { + "type" : "file", + "path" : "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.10-slim", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "procps" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "mudata~=0.2.3", + "anndata~=0.9.1", + "scanpy~=1.9.5", + "harmonypy~=0.0.6" + ], + "upgrade" : true + } + ], + "test_setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "viashpy==0.5.0" + ], + "upgrade" : true + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "highmem", + "highcpu" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/integrate/harmonypy/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/integrate/harmonypy", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +import mudata +from harmonypy import run_harmony + + +### VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'obsm_input': $( if [ ! -z ${VIASH_PAR_OBSM_INPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'obsm_output': $( if [ ! -z ${VIASH_PAR_OBSM_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'theta': $( if [ ! -z ${VIASH_PAR_THETA+x} ]; then echo "list(map(float, r'${VIASH_PAR_THETA//\\'/\\'\\"\\'\\"r\\'}'.split(':')))"; else echo None; fi ), + 'obs_covariates': $( if [ ! -z ${VIASH_PAR_OBS_COVARIATES+x} ]; then echo "r'${VIASH_PAR_OBS_COVARIATES//\\'/\\'\\"\\'\\"r\\'}'.split(':')"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +### VIASH END + + +def main(): + mdata = mudata.read(par["input"].strip()) + mod_name = par['modality'] + mod = mdata.mod[mod_name] + pca_embedding = mod.obsm[par['obsm_input']] + metadata = mod.obs + ho = run_harmony(pca_embedding, metadata, par['obs_covariates'], theta=par['theta']) + mod.obsm[par["obsm_output"]] = ho.Z_corr.T + mdata.write_h5mu(par['output'].strip(), compression=par["output_compression"]) + +if __name__ == "__main__": + main() +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/integrate_harmonypy", + "tag" : "0.12.0" + }, + "label" : [ + "highmem", + "highcpu" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/integrate/harmonypy/nextflow.config b/target/nextflow/integrate/harmonypy/nextflow.config new file mode 100644 index 00000000000..5a039982997 --- /dev/null +++ b/target/nextflow/integrate/harmonypy/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'harmonypy' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Performs Harmony integration based as described in https://github.com/immunogenomics/harmony. Based on an implementation in python from https://github.com/slowkow/harmonypy' + author = 'Dries Schaumont, Robrecht Cannoodt' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/integrate/harmonypy/nextflow_params.yaml b/target/nextflow/integrate/harmonypy/nextflow_params.yaml new file mode 100644 index 00000000000..011b93747e1 --- /dev/null +++ b/target/nextflow/integrate/harmonypy/nextflow_params.yaml @@ -0,0 +1,13 @@ +# Arguments +input: # please fill in - example: "path/to/file" +# output: "$id.$key.output.output" +# output_compression: "gzip" +modality: "rna" +obsm_input: "X_pca" +obsm_output: "X_pca_integrated" +theta: [2] +obs_covariates: # please fill in - example: ["batch", "sample"] + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/integrate/harmonypy/nextflow_schema.json b/target/nextflow/integrate/harmonypy/nextflow_schema.json new file mode 100644 index 00000000000..2f7ae209630 --- /dev/null +++ b/target/nextflow/integrate/harmonypy/nextflow_schema.json @@ -0,0 +1,147 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "harmonypy", +"description": "Performs Harmony integration based as described in https://github.com/immunogenomics/harmony. Based on an implementation in python from https://github.com/slowkow/harmonypy", +"type": "object", +"definitions": { + + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required. Input h5mu file", + "help_text": "Type: `file`, required. Input h5mu file" + + } + + + , + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.output`. Output h5mu file", + "help_text": "Type: `file`, required, default: `$id.$key.output.output`. Output h5mu file." + , + "default": "$id.$key.output.output" + } + + + , + "output_compression": { + "type": + "string", + "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", + "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", + "enum": ["gzip", "lzf"] + + + } + + + , + "modality": { + "type": + "string", + "description": "Type: `string`, default: `rna`. ", + "help_text": "Type: `string`, default: `rna`. " + , + "default": "rna" + } + + + , + "obsm_input": { + "type": + "string", + "description": "Type: `string`, default: `X_pca`. Which ", + "help_text": "Type: `string`, default: `X_pca`. Which .obsm slot to use as a starting PCA embedding." + , + "default": "X_pca" + } + + + , + "obsm_output": { + "type": + "string", + "description": "Type: `string`, default: `X_pca_integrated`. In which ", + "help_text": "Type: `string`, default: `X_pca_integrated`. In which .obsm slot to store the resulting integrated embedding." + , + "default": "X_pca_integrated" + } + + + , + "theta": { + "type": + "string", + "description": "Type: List of `double`, default: `2`, multiple_sep: `\":\"`. Diversity clustering penalty parameter", + "help_text": "Type: List of `double`, default: `2`, multiple_sep: `\":\"`. Diversity clustering penalty parameter. Specify for each variable in group.by.vars. theta=0 does not encourage any diversity. Larger values of theta result in more diverse clusters." + , + "default": "2" + } + + + , + "obs_covariates": { + "type": + "string", + "description": "Type: List of `string`, required, example: `batch:sample`, multiple_sep: `\":\"`. The ", + "help_text": "Type: List of `string`, required, example: `batch:sample`, multiple_sep: `\":\"`. The .obs field(s) that define the covariate(s) to regress out." + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/integrate/scanorama/.config.vsh.yaml b/target/nextflow/integrate/scanorama/.config.vsh.yaml new file mode 100644 index 00000000000..e94c8e6fd9f --- /dev/null +++ b/target/nextflow/integrate/scanorama/.config.vsh.yaml @@ -0,0 +1,283 @@ +functionality: + name: "scanorama" + namespace: "integrate" + version: "0.12.4" + authors: + - name: "Dries De Maeyer" + roles: + - "author" + info: + role: "Core Team Member" + links: + email: "ddemaeyer@gmail.com" + github: "ddemaeyer" + linkedin: "dries-de-maeyer-b46a814" + organizations: + - name: "Janssen Pharmaceuticals" + href: "https://www.janssen.com" + role: "Principal Scientist" + - name: "Dries Schaumont" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input h5mu file" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output .h5mu file" + info: null + default: + - "output.h5ad" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_batch" + description: "Column name discriminating between your batches." + info: null + default: + - "batch" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obsm_input" + description: "Basis obsm slot to run scanorama on." + info: null + default: + - "X_pca" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obsm_output" + description: "The name of the field in adata.obsm where the integrated embeddings\ + \ will be stored after running this function. Defaults to X_scanorama." + info: null + default: + - "X_scanorama" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--knn" + description: "Number of nearest neighbors to use for matching." + info: null + default: + - 20 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--batch_size" + description: "The batch size used in the alignment vector computation. Useful\ + \ when integrating very large (>100k samples) datasets. Set to large value that\ + \ runs within available memory." + info: null + default: + - 5000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--sigma" + description: "Correction smoothing parameter on Gaussian kernel." + info: null + default: + - 15.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean" + name: "--approx" + description: "Use approximate nearest neighbors with Python annoy; greatly speeds\ + \ up matching runtime." + info: null + default: + - true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--alpha" + description: "Alignment score minimum cutoff" + info: null + default: + - 0.1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + description: "Use Scanorama to integrate different experiments.\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.9-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + - "build-essential" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "scanpy~=1.9.5" + - "scanorama" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "midcpu" + - "highmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/integrate/scanorama/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/integrate/scanorama" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/integrate/scanorama/scanorama" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/integrate/scanorama/main.nf b/target/nextflow/integrate/scanorama/main.nf new file mode 100644 index 00000000000..0c851d8a821 --- /dev/null +++ b/target/nextflow/integrate/scanorama/main.nf @@ -0,0 +1,2702 @@ +// scanorama 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Dries De Maeyer (author) +// * Dries Schaumont (maintainer) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "scanorama", + "namespace" : "integrate", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Dries De Maeyer", + "roles" : [ + "author" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "ddemaeyer@gmail.com", + "github" : "ddemaeyer", + "linkedin" : "dries-de-maeyer-b46a814" + }, + "organizations" : [ + { + "name" : "Janssen Pharmaceuticals", + "href" : "https://www.janssen.com", + "role" : "Principal Scientist" + } + ] + } + }, + { + "name" : "Dries Schaumont", + "roles" : [ + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "dries@data-intuitive.com", + "github" : "DriesSchaumont", + "orcid" : "0000-0002-4389-0440", + "linkedin" : "dries-schaumont" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Scientist" + } + ] + } + } + ], + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "alternatives" : [ + "-i" + ], + "description" : "Input h5mu file", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--modality", + "default" : [ + "rna" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "-o" + ], + "description" : "Output .h5mu file", + "default" : [ + "output.h5ad" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_compression", + "description" : "The compression format to be used on the output h5mu object.", + "example" : [ + "gzip" + ], + "required" : false, + "choices" : [ + "gzip", + "lzf" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--obs_batch", + "description" : "Column name discriminating between your batches.", + "default" : [ + "batch" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--obsm_input", + "description" : "Basis obsm slot to run scanorama on.", + "default" : [ + "X_pca" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--obsm_output", + "description" : "The name of the field in adata.obsm where the integrated embeddings will be stored after running this function. Defaults to X_scanorama.", + "default" : [ + "X_scanorama" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--knn", + "description" : "Number of nearest neighbors to use for matching.", + "default" : [ + 20 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--batch_size", + "description" : "The batch size used in the alignment vector computation. Useful when integrating very large (>100k samples) datasets. Set to large value that runs within available memory.", + "default" : [ + 5000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--sigma", + "description" : "Correction smoothing parameter on Gaussian kernel.", + "default" : [ + 15.0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "boolean", + "name" : "--approx", + "description" : "Use approximate nearest neighbors with Python annoy; greatly speeds up matching runtime.", + "default" : [ + true + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--alpha", + "description" : "Alignment score minimum cutoff", + "default" : [ + 0.1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/integrate/scanorama/" + } + ], + "description" : "Use Scanorama to integrate different experiments.\n", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/integrate/scanorama/" + }, + { + "type" : "file", + "path" : "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.9-slim", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "procps", + "build-essential" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "mudata~=0.2.3", + "anndata~=0.9.1", + "scanpy~=1.9.5", + "scanorama" + ], + "upgrade" : true + } + ], + "test_setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "viashpy==0.5.0" + ], + "upgrade" : true + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "midcpu", + "highmem" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/integrate/scanorama/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/integrate/scanorama", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +### VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'obs_batch': $( if [ ! -z ${VIASH_PAR_OBS_BATCH+x} ]; then echo "r'${VIASH_PAR_OBS_BATCH//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'obsm_input': $( if [ ! -z ${VIASH_PAR_OBSM_INPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'obsm_output': $( if [ ! -z ${VIASH_PAR_OBSM_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'knn': $( if [ ! -z ${VIASH_PAR_KNN+x} ]; then echo "int(r'${VIASH_PAR_KNN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'batch_size': $( if [ ! -z ${VIASH_PAR_BATCH_SIZE+x} ]; then echo "int(r'${VIASH_PAR_BATCH_SIZE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'sigma': $( if [ ! -z ${VIASH_PAR_SIGMA+x} ]; then echo "float(r'${VIASH_PAR_SIGMA//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'approx': $( if [ ! -z ${VIASH_PAR_APPROX+x} ]; then echo "r'${VIASH_PAR_APPROX//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), + 'alpha': $( if [ ! -z ${VIASH_PAR_ALPHA+x} ]; then echo "float(r'${VIASH_PAR_ALPHA//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +### VIASH END + +from scanpy.external.pp import scanorama_integrate +from mudata import read_h5mu + +mdata = read_h5mu(par["input"]) + +mod_name = par["modality"] +mod = mdata.mod[mod_name] + +# Integration. +scanorama_integrate(mod, + key=par["obs_batch"], + basis=par["obsm_input"], + adjusted_basis=par["obsm_output"], + knn=par["knn"], + alpha=par["alpha"], + sigma=par["sigma"], + approx=par["approx"], + batch_size=par["batch_size"] ) + +mdata.write_h5mu(par["output"], compression=par["output_compression"]) +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/integrate_scanorama", + "tag" : "0.12.0" + }, + "label" : [ + "midcpu", + "highmem" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/integrate/scanorama/nextflow.config b/target/nextflow/integrate/scanorama/nextflow.config new file mode 100644 index 00000000000..a6d9c564e3d --- /dev/null +++ b/target/nextflow/integrate/scanorama/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'scanorama' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Use Scanorama to integrate different experiments.\n' + author = 'Dries De Maeyer, Dries Schaumont' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/integrate/scanorama/nextflow_params.yaml b/target/nextflow/integrate/scanorama/nextflow_params.yaml new file mode 100644 index 00000000000..0a9714d0cc5 --- /dev/null +++ b/target/nextflow/integrate/scanorama/nextflow_params.yaml @@ -0,0 +1,17 @@ +# Arguments +input: # please fill in - example: "path/to/file" +modality: "rna" +# output: "$id.$key.output.h5ad" +# output_compression: "gzip" +obs_batch: "batch" +obsm_input: "X_pca" +obsm_output: "X_scanorama" +knn: 20 +batch_size: 5000 +sigma: 15 +approx: true +alpha: 0.1 + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/integrate/scanorama/nextflow_schema.json b/target/nextflow/integrate/scanorama/nextflow_schema.json new file mode 100644 index 00000000000..b04de18bec9 --- /dev/null +++ b/target/nextflow/integrate/scanorama/nextflow_schema.json @@ -0,0 +1,192 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "scanorama", +"description": "Use Scanorama to integrate different experiments.\n", +"type": "object", +"definitions": { + + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required. Input h5mu file", + "help_text": "Type: `file`, required. Input h5mu file" + + } + + + , + "modality": { + "type": + "string", + "description": "Type: `string`, default: `rna`. ", + "help_text": "Type: `string`, default: `rna`. " + , + "default": "rna" + } + + + , + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.h5ad`. Output ", + "help_text": "Type: `file`, required, default: `$id.$key.output.h5ad`. Output .h5mu file" + , + "default": "$id.$key.output.h5ad" + } + + + , + "output_compression": { + "type": + "string", + "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", + "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", + "enum": ["gzip", "lzf"] + + + } + + + , + "obs_batch": { + "type": + "string", + "description": "Type: `string`, default: `batch`. Column name discriminating between your batches", + "help_text": "Type: `string`, default: `batch`. Column name discriminating between your batches." + , + "default": "batch" + } + + + , + "obsm_input": { + "type": + "string", + "description": "Type: `string`, default: `X_pca`. Basis obsm slot to run scanorama on", + "help_text": "Type: `string`, default: `X_pca`. Basis obsm slot to run scanorama on." + , + "default": "X_pca" + } + + + , + "obsm_output": { + "type": + "string", + "description": "Type: `string`, default: `X_scanorama`. The name of the field in adata", + "help_text": "Type: `string`, default: `X_scanorama`. The name of the field in adata.obsm where the integrated embeddings will be stored after running this function. Defaults to X_scanorama." + , + "default": "X_scanorama" + } + + + , + "knn": { + "type": + "integer", + "description": "Type: `integer`, default: `20`. Number of nearest neighbors to use for matching", + "help_text": "Type: `integer`, default: `20`. Number of nearest neighbors to use for matching." + , + "default": "20" + } + + + , + "batch_size": { + "type": + "integer", + "description": "Type: `integer`, default: `5000`. The batch size used in the alignment vector computation", + "help_text": "Type: `integer`, default: `5000`. The batch size used in the alignment vector computation. Useful when integrating very large (\u003e100k samples) datasets. Set to large value that runs within available memory." + , + "default": "5000" + } + + + , + "sigma": { + "type": + "number", + "description": "Type: `double`, default: `15`. Correction smoothing parameter on Gaussian kernel", + "help_text": "Type: `double`, default: `15`. Correction smoothing parameter on Gaussian kernel." + , + "default": "15" + } + + + , + "approx": { + "type": + "boolean", + "description": "Type: `boolean`, default: `true`. Use approximate nearest neighbors with Python annoy; greatly speeds up matching runtime", + "help_text": "Type: `boolean`, default: `true`. Use approximate nearest neighbors with Python annoy; greatly speeds up matching runtime." + , + "default": "True" + } + + + , + "alpha": { + "type": + "number", + "description": "Type: `double`, default: `0.1`. Alignment score minimum cutoff", + "help_text": "Type: `double`, default: `0.1`. Alignment score minimum cutoff" + , + "default": "0.1" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/integrate/scarches/.config.vsh.yaml b/target/nextflow/integrate/scarches/.config.vsh.yaml new file mode 100644 index 00000000000..e4053a904aa --- /dev/null +++ b/target/nextflow/integrate/scarches/.config.vsh.yaml @@ -0,0 +1,331 @@ +functionality: + name: "scarches" + namespace: "integrate" + version: "0.12.4" + authors: + - name: "Vladimir Shitov" + info: + role: "Contributor" + links: + email: "vladimir.shitov@helmholtz-muenchen.de" + github: "vladimirshitov" + orcid: "0000-0002-1960-8812" + linkedin: "vladimir-shitov-9a659513b" + organizations: + - name: "Helmholtz Munich" + href: "https://www.helmholtz-munich.de" + role: "PhD Candidate" + argument_groups: + - name: "Inputs" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input h5mu file to use as a query" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--reference" + alternatives: + - "-r" + description: "Path to the directory with reference model or a web link. For\ + \ HLCA use https://zenodo.org/record/6337966/files/HLCA_reference_model.zip" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--dataset_name" + description: "Name of query dataset to use as a batch name. If not set, name\ + \ of the input file is used" + info: null + default: + - "test_dataset" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Outputs" + arguments: + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output h5mu file." + info: null + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--model_output" + description: "Output directory for model" + info: null + default: + - "model" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obsm_output" + description: "In which .obsm slot to store the resulting integrated embedding." + info: null + default: + - "X_integrated_scanvi" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Early stopping arguments" + arguments: + - type: "boolean" + name: "--early_stopping" + description: "Whether to perform early stopping with respect to the validation\ + \ set." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--early_stopping_monitor" + description: "Metric logged during validation set epoch." + info: null + default: + - "elbo_validation" + required: false + choices: + - "elbo_validation" + - "reconstruction_loss_validation" + - "kl_local_validation" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--early_stopping_patience" + description: "Number of validation epochs with no improvement after which training\ + \ will be stopped." + info: null + default: + - 45 + required: false + min: 1 + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--early_stopping_min_delta" + description: "Minimum change in the monitored quantity to qualify as an improvement,\ + \ i.e. an absolute change of less than min_delta, will count as no improvement." + info: null + default: + - 0.0 + required: false + min: 0.0 + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Learning parameters" + arguments: + - type: "integer" + name: "--max_epochs" + description: "Number of passes through the dataset, defaults to (20000 / number\ + \ of cells) * 400 or 400; whichever is smallest." + info: null + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean" + name: "--reduce_lr_on_plateau" + description: "Whether to monitor validation loss and reduce learning rate when\ + \ validation set `lr_scheduler_metric` plateaus." + info: null + default: + - true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--lr_factor" + description: "Factor to reduce learning rate." + info: null + default: + - 0.6 + required: false + min: 0.0 + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--lr_patience" + description: "Number of epochs with no improvement after which learning rate\ + \ will be reduced." + info: null + default: + - 30.0 + required: false + min: 0.0 + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Performs reference mapping with scArches" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu" + - type: "file" + path: "resources_test/HLCA_reference_model/HLCA_reference_model.zip" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "nvcr.io/nvidia/pytorch:23.09-py3" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "scvi-tools~=1.0.3" + - "pandas~=2.1.0" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highmem" + - "highcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +- type: "native" + id: "native" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/integrate/scarches/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/integrate/scarches" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/integrate/scarches/scarches" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/integrate/scarches/main.nf b/target/nextflow/integrate/scarches/main.nf new file mode 100644 index 00000000000..6d31cb50e1a --- /dev/null +++ b/target/nextflow/integrate/scarches/main.nf @@ -0,0 +1,2962 @@ +// scarches 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Vladimir Shitov + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "scarches", + "namespace" : "integrate", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Vladimir Shitov", + "info" : { + "role" : "Contributor", + "links" : { + "email" : "vladimir.shitov@helmholtz-muenchen.de", + "github" : "vladimirshitov", + "orcid" : "0000-0002-1960-8812", + "linkedin" : "vladimir-shitov-9a659513b" + }, + "organizations" : [ + { + "name" : "Helmholtz Munich", + "href" : "https://www.helmholtz-munich.de", + "role" : "PhD Candidate" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Inputs", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "alternatives" : [ + "-i" + ], + "description" : "Input h5mu file to use as a query", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--modality", + "default" : [ + "rna" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--reference", + "alternatives" : [ + "-r" + ], + "description" : "Path to the directory with reference model or a web link. For HLCA use https://zenodo.org/record/6337966/files/HLCA_reference_model.zip", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--dataset_name", + "description" : "Name of query dataset to use as a batch name. If not set, name of the input file is used", + "default" : [ + "test_dataset" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Outputs", + "arguments" : [ + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "-o" + ], + "description" : "Output h5mu file.", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_compression", + "description" : "The compression format to be used on the output h5mu object.", + "example" : [ + "gzip" + ], + "required" : false, + "choices" : [ + "gzip", + "lzf" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--model_output", + "description" : "Output directory for model", + "default" : [ + "model" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--obsm_output", + "description" : "In which .obsm slot to store the resulting integrated embedding.", + "default" : [ + "X_integrated_scanvi" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Early stopping arguments", + "arguments" : [ + { + "type" : "boolean", + "name" : "--early_stopping", + "description" : "Whether to perform early stopping with respect to the validation set.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--early_stopping_monitor", + "description" : "Metric logged during validation set epoch.", + "default" : [ + "elbo_validation" + ], + "required" : false, + "choices" : [ + "elbo_validation", + "reconstruction_loss_validation", + "kl_local_validation" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--early_stopping_patience", + "description" : "Number of validation epochs with no improvement after which training will be stopped.", + "default" : [ + 45 + ], + "required" : false, + "min" : 1, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--early_stopping_min_delta", + "description" : "Minimum change in the monitored quantity to qualify as an improvement, i.e. an absolute change of less than min_delta, will count as no improvement.", + "default" : [ + 0.0 + ], + "required" : false, + "min" : 0.0, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Learning parameters", + "arguments" : [ + { + "type" : "integer", + "name" : "--max_epochs", + "description" : "Number of passes through the dataset, defaults to (20000 / number of cells) * 400 or 400; whichever is smallest.", + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "boolean", + "name" : "--reduce_lr_on_plateau", + "description" : "Whether to monitor validation loss and reduce learning rate when validation set `lr_scheduler_metric` plateaus.", + "default" : [ + true + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--lr_factor", + "description" : "Factor to reduce learning rate.", + "default" : [ + 0.6 + ], + "required" : false, + "min" : 0.0, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--lr_patience", + "description" : "Number of epochs with no improvement after which learning rate will be reduced.", + "default" : [ + 30.0 + ], + "required" : false, + "min" : 0.0, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/integrate/scarches/" + }, + { + "type" : "file", + "path" : "src/utils/setup_logger.py", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "description" : "Performs reference mapping with scArches", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/integrate/scarches/" + }, + { + "type" : "file", + "path" : "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + }, + { + "type" : "file", + "path" : "resources_test/HLCA_reference_model/HLCA_reference_model.zip", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "nvcr.io/nvidia/pytorch:23.09-py3", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "mudata~=0.2.3", + "anndata~=0.9.1", + "scvi-tools~=1.0.3", + "pandas~=2.1.0" + ], + "upgrade" : true + } + ], + "test_setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "viashpy==0.5.0" + ], + "upgrade" : true + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "highmem", + "highcpu" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + }, + { + "type" : "native", + "id" : "native" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/integrate/scarches/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/integrate/scarches", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +import sys +import mudata +import scvi +from torch.cuda import is_available as cuda_is_available +try: + from torch.backends.mps import is_available as mps_is_available +except ModuleNotFoundError: + # Older pytorch versions + # MacOS GPUs + def mps_is_available(): + return False + +### VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'reference': $( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "r'${VIASH_PAR_REFERENCE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'dataset_name': $( if [ ! -z ${VIASH_PAR_DATASET_NAME+x} ]; then echo "r'${VIASH_PAR_DATASET_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'model_output': $( if [ ! -z ${VIASH_PAR_MODEL_OUTPUT+x} ]; then echo "r'${VIASH_PAR_MODEL_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'obsm_output': $( if [ ! -z ${VIASH_PAR_OBSM_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'early_stopping': $( if [ ! -z ${VIASH_PAR_EARLY_STOPPING+x} ]; then echo "r'${VIASH_PAR_EARLY_STOPPING//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), + 'early_stopping_monitor': $( if [ ! -z ${VIASH_PAR_EARLY_STOPPING_MONITOR+x} ]; then echo "r'${VIASH_PAR_EARLY_STOPPING_MONITOR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'early_stopping_patience': $( if [ ! -z ${VIASH_PAR_EARLY_STOPPING_PATIENCE+x} ]; then echo "int(r'${VIASH_PAR_EARLY_STOPPING_PATIENCE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'early_stopping_min_delta': $( if [ ! -z ${VIASH_PAR_EARLY_STOPPING_MIN_DELTA+x} ]; then echo "float(r'${VIASH_PAR_EARLY_STOPPING_MIN_DELTA//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'max_epochs': $( if [ ! -z ${VIASH_PAR_MAX_EPOCHS+x} ]; then echo "int(r'${VIASH_PAR_MAX_EPOCHS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'reduce_lr_on_plateau': $( if [ ! -z ${VIASH_PAR_REDUCE_LR_ON_PLATEAU+x} ]; then echo "r'${VIASH_PAR_REDUCE_LR_ON_PLATEAU//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), + 'lr_factor': $( if [ ! -z ${VIASH_PAR_LR_FACTOR+x} ]; then echo "float(r'${VIASH_PAR_LR_FACTOR//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'lr_patience': $( if [ ! -z ${VIASH_PAR_LR_PATIENCE+x} ]; then echo "float(r'${VIASH_PAR_LR_PATIENCE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +### VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +def _read_model_name_from_registry(model_path) -> str: + """Read registry with information about the model, return the model name""" + registry = scvi.model.base.BaseModelClass.load_registry(model_path) + return registry["model_name"] + + +def _detect_base_model(model_path): + """Read from the model's file which scvi_tools model it contains""" + + names_to_models_map = { + "AUTOZI": scvi.model.AUTOZI, + "CondSCVI": scvi.model.CondSCVI, + "DestVI": scvi.model.DestVI, + "LinearSCVI": scvi.model.LinearSCVI, + "PEAKVI": scvi.model.PEAKVI, + "SCANVI": scvi.model.SCANVI, + "SCVI": scvi.model.SCVI, + "TOTALVI": scvi.model.TOTALVI, + "MULTIVI": scvi.model.MULTIVI, + "AmortizedLDA": scvi.model.AmortizedLDA, + "JaxSCVI": scvi.model.JaxSCVI, + } + + return names_to_models_map[_read_model_name_from_registry(model_path)] + + +def extract_file_name(file_path): + """Return the name of the file from path to this file + + Examples + -------- + >>> extract_file_name("resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu") + pbmc_1k_protein_v3_mms + """ + slash_position = file_path.rfind("/") + dot_position = file_path.rfind(".") + + return file_path[slash_position + 1: dot_position] + + +def map_to_existing_reference(adata_query, model_path, check_val_every_n_epoch=1): + """ + A function to map the query data to the reference atlas + + Input: + * adata_query: An AnnData object with the query + * model_path: The reference model directory + + Output: + * vae_query: the trained scvi_tools model + * adata_query: The AnnData object with the query preprocessed for the mapping to the reference + """ + model = _detect_base_model(model_path) + + try: + model.prepare_query_anndata(adata_query, model_path) + except ValueError: + logger.warning("ValueError thrown when preparing adata for mapping. Clearing .varm field to prevent it") + adata_query.varm.clear() + model.prepare_query_anndata(adata_query, model_path) + + # Load query data into the model + vae_query = model.load_query_data( + adata_query, + model_path, + freeze_dropout=True + ) + + # Train scArches model for query mapping + vae_query.train( + max_epochs=par["max_epochs"], + early_stopping=par['early_stopping'], + early_stopping_monitor=par['early_stopping_monitor'], + early_stopping_patience=par['early_stopping_patience'], + early_stopping_min_delta=par['early_stopping_min_delta'], + check_val_every_n_epoch=check_val_every_n_epoch, + use_gpu=(cuda_is_available() or mps_is_available()) + ) + + return vae_query, adata_query + + +def _convert_object_dtypes_to_strings(adata): + """Convert object dtypes in .var and .obs to string to prevent error when saving file""" + def convert_cols(df): + object_cols = df.columns[df.dtypes == "object"] + for col in object_cols: + df[col] = df[col].astype(str) + return df + + adata.var = convert_cols(adata.var) + adata.obs = convert_cols(adata.obs) + + return adata + + +def _get_model_path(model_path: str): + """Obtain path to the directory with reference model. If the proposed \\`model_path\\` is a .zip archive, unzip it. If nesessary, convert model to the new format + + Parameters + ---------- + model_path : str + Path to a directory, where to search for the model or to a zip file containing the model + + Returns + ------- + Path to a directory with reference model in format of scvi-tools>=0.15 + """ + import os + import zipfile + import tempfile + from pathlib import Path + + if os.path.isdir(model_path) and "model.pt" in os.listdir(model_path): + # Probably, the \\`model_path\\` already contains model in the output format of scvi-tools>=0.15 + return model_path + + # The model either has old format or is a zip file downloaded from Zenodo + new_directory = Path(tempfile.TemporaryDirectory().name) + + if zipfile.is_zipfile(model_path): + with zipfile.ZipFile(model_path) as archive: + archive.extractall(new_directory) + model_dir = next(new_directory.glob("**/*.pt")).parent + + else: + model_dir = next(Path(model_path).glob("**/*.pt")).parent + + if "model_params.pt" in os.listdir(model_dir): + # The model is in the \\`directory\\`, but it was generated with scvi-tools<0.15 + # TODO: for new references (that could not be SCANVI based), we need to check the base class somehow. Reading registry does not work with models generated by scvi-tools<0.15 + # Here I assume that the reference model is for HLCA and thus is SCANVI based + converted_model_path = os.path.join(model_dir, "converted") + scvi.model.SCANVI.convert_legacy_save(model_dir, converted_model_path) + return converted_model_path + + elif "model.pt" in os.listdir(model_dir): + # Archive contained model in the new format, so just return the directory + return model_dir + + else: + raise ValueError("Cannot find model in the provided reference path. Please, provide a path or a link to the directory with reference model. For HLCA use https://zenodo.org/record/6337966/files/HLCA_reference_model.zip") + + +def main(): + + mdata_query = mudata.read(par["input"].strip()) + adata_query = mdata_query.mod[par["modality"]].copy() + + if "dataset" not in adata_query.obs.columns: + # Write name of the dataset as batch variable + if par["dataset_name"] is None: + logger.info("Detecting dataset name") + par["dataset_name"] = extract_file_name(par["input"]) + logger.info(f"Detected {par['dataset_name']}") + + adata_query.obs["dataset"] = par["dataset_name"] + + model_path = _get_model_path(par["reference"]) + vae_query, adata_query = map_to_existing_reference(adata_query, model_path=model_path) + model_name = _read_model_name_from_registry(model_path) + + # Save info about the used model + mdata_query.mod[par["modality"]].uns["integration_method"] = model_name + + logger.info("Trying to write latent representation") + output_key = par["obsm_output"].format(model_name=model_name) + mdata_query.mod[par["modality"]].obsm[output_key] = vae_query.get_latent_representation() + + logger.info("Converting dtypes") + mdata_query.mod[par["modality"]] = _convert_object_dtypes_to_strings(mdata_query.mod[par["modality"]]) + + logger.info("Updating mudata") + try: + mdata_query.update() # Without that error might be thrown during file saving + except KeyError: + # Sometimes this error is thrown, but then everything is magically fixed, and the file gets saved normally + # This is discussed here a bit: https://github.com/scverse/mudata/issues/27 + logger.warning("KeyError was thrown during updating mudata. Probably, the file is fixed after that, but be careful") + + logger.info("Saving h5mu file") + mdata_query.write_h5mu(par["output"].strip(), compression=par["output_compression"]) + + logger.info("Saving model") + vae_query.save(par["model_output"], overwrite=True) + +if __name__ == "__main__": + main() +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/integrate_scarches", + "tag" : "0.12.0" + }, + "label" : [ + "highmem", + "highcpu" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/integrate/scarches/nextflow.config b/target/nextflow/integrate/scarches/nextflow.config new file mode 100644 index 00000000000..db9b4ed3ea7 --- /dev/null +++ b/target/nextflow/integrate/scarches/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'scarches' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Performs reference mapping with scArches' + author = 'Vladimir Shitov' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/integrate/scarches/nextflow_params.yaml b/target/nextflow/integrate/scarches/nextflow_params.yaml new file mode 100644 index 00000000000..e8b0b098625 --- /dev/null +++ b/target/nextflow/integrate/scarches/nextflow_params.yaml @@ -0,0 +1,27 @@ +# Inputs +input: # please fill in - example: "path/to/file" +modality: "rna" +reference: # please fill in - example: "path/to/file" +dataset_name: "test_dataset" + +# Outputs +# output: "$id.$key.output.output" +# output_compression: "gzip" +# model_output: "$id.$key.model_output.model_output" +obsm_output: "X_integrated_scanvi" + +# Early stopping arguments +# early_stopping: true +early_stopping_monitor: "elbo_validation" +early_stopping_patience: 45 +early_stopping_min_delta: 0.0 + +# Learning parameters +max_epochs: # please fill in - example: 123 +reduce_lr_on_plateau: true +lr_factor: 0.6 +lr_patience: 30 + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/integrate/scarches/nextflow_schema.json b/target/nextflow/integrate/scarches/nextflow_schema.json new file mode 100644 index 00000000000..d873eb1f878 --- /dev/null +++ b/target/nextflow/integrate/scarches/nextflow_schema.json @@ -0,0 +1,277 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "scarches", +"description": "Performs reference mapping with scArches", +"type": "object", +"definitions": { + + + + "inputs" : { + "title": "Inputs", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required. Input h5mu file to use as a query", + "help_text": "Type: `file`, required. Input h5mu file to use as a query" + + } + + + , + "modality": { + "type": + "string", + "description": "Type: `string`, default: `rna`. ", + "help_text": "Type: `string`, default: `rna`. " + , + "default": "rna" + } + + + , + "reference": { + "type": + "string", + "description": "Type: `file`, required. Path to the directory with reference model or a web link", + "help_text": "Type: `file`, required. Path to the directory with reference model or a web link. For HLCA use https://zenodo.org/record/6337966/files/HLCA_reference_model.zip" + + } + + + , + "dataset_name": { + "type": + "string", + "description": "Type: `string`, default: `test_dataset`. Name of query dataset to use as a batch name", + "help_text": "Type: `string`, default: `test_dataset`. Name of query dataset to use as a batch name. If not set, name of the input file is used" + , + "default": "test_dataset" + } + + +} +}, + + + "outputs" : { + "title": "Outputs", + "type": "object", + "description": "No description", + "properties": { + + + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.output`. Output h5mu file", + "help_text": "Type: `file`, required, default: `$id.$key.output.output`. Output h5mu file." + , + "default": "$id.$key.output.output" + } + + + , + "output_compression": { + "type": + "string", + "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", + "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", + "enum": ["gzip", "lzf"] + + + } + + + , + "model_output": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.model_output.model_output`. Output directory for model", + "help_text": "Type: `file`, default: `$id.$key.model_output.model_output`. Output directory for model" + , + "default": "$id.$key.model_output.model_output" + } + + + , + "obsm_output": { + "type": + "string", + "description": "Type: `string`, default: `X_integrated_scanvi`. In which ", + "help_text": "Type: `string`, default: `X_integrated_scanvi`. In which .obsm slot to store the resulting integrated embedding." + , + "default": "X_integrated_scanvi" + } + + +} +}, + + + "early stopping arguments" : { + "title": "Early stopping arguments", + "type": "object", + "description": "No description", + "properties": { + + + "early_stopping": { + "type": + "boolean", + "description": "Type: `boolean`. Whether to perform early stopping with respect to the validation set", + "help_text": "Type: `boolean`. Whether to perform early stopping with respect to the validation set." + + } + + + , + "early_stopping_monitor": { + "type": + "string", + "description": "Type: `string`, default: `elbo_validation`, choices: ``elbo_validation`, `reconstruction_loss_validation`, `kl_local_validation``. Metric logged during validation set epoch", + "help_text": "Type: `string`, default: `elbo_validation`, choices: ``elbo_validation`, `reconstruction_loss_validation`, `kl_local_validation``. Metric logged during validation set epoch.", + "enum": ["elbo_validation", "reconstruction_loss_validation", "kl_local_validation"] + + , + "default": "elbo_validation" + } + + + , + "early_stopping_patience": { + "type": + "integer", + "description": "Type: `integer`, default: `45`. Number of validation epochs with no improvement after which training will be stopped", + "help_text": "Type: `integer`, default: `45`. Number of validation epochs with no improvement after which training will be stopped." + , + "default": "45" + } + + + , + "early_stopping_min_delta": { + "type": + "number", + "description": "Type: `double`, default: `0.0`. Minimum change in the monitored quantity to qualify as an improvement, i", + "help_text": "Type: `double`, default: `0.0`. Minimum change in the monitored quantity to qualify as an improvement, i.e. an absolute change of less than min_delta, will count as no improvement." + , + "default": "0.0" + } + + +} +}, + + + "learning parameters" : { + "title": "Learning parameters", + "type": "object", + "description": "No description", + "properties": { + + + "max_epochs": { + "type": + "integer", + "description": "Type: `integer`, required. Number of passes through the dataset, defaults to (20000 / number of cells) * 400 or 400; whichever is smallest", + "help_text": "Type: `integer`, required. Number of passes through the dataset, defaults to (20000 / number of cells) * 400 or 400; whichever is smallest." + + } + + + , + "reduce_lr_on_plateau": { + "type": + "boolean", + "description": "Type: `boolean`, default: `true`. Whether to monitor validation loss and reduce learning rate when validation set `lr_scheduler_metric` plateaus", + "help_text": "Type: `boolean`, default: `true`. Whether to monitor validation loss and reduce learning rate when validation set `lr_scheduler_metric` plateaus." + , + "default": "True" + } + + + , + "lr_factor": { + "type": + "number", + "description": "Type: `double`, default: `0.6`. Factor to reduce learning rate", + "help_text": "Type: `double`, default: `0.6`. Factor to reduce learning rate." + , + "default": "0.6" + } + + + , + "lr_patience": { + "type": + "number", + "description": "Type: `double`, default: `30`. Number of epochs with no improvement after which learning rate will be reduced", + "help_text": "Type: `double`, default: `30`. Number of epochs with no improvement after which learning rate will be reduced." + , + "default": "30" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/inputs" + }, + + { + "$ref": "#/definitions/outputs" + }, + + { + "$ref": "#/definitions/early stopping arguments" + }, + + { + "$ref": "#/definitions/learning parameters" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/integrate/scarches/setup_logger.py b/target/nextflow/integrate/scarches/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/nextflow/integrate/scarches/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/nextflow/integrate/scvi/.config.vsh.yaml b/target/nextflow/integrate/scvi/.config.vsh.yaml new file mode 100644 index 00000000000..ed2ae349e77 --- /dev/null +++ b/target/nextflow/integrate/scvi/.config.vsh.yaml @@ -0,0 +1,591 @@ +functionality: + name: "scvi" + namespace: "integrate" + version: "0.12.4" + authors: + - name: "Malte D. Luecken" + roles: + - "author" + info: + role: "Core Team Member" + links: + email: "malte.luecken@helmholtz-muenchen.de" + github: "LuckyMD" + orcid: "0000-0001-7464-7921" + linkedin: "malte-l%C3%BCcken-b8b21049" + twitter: "MDLuecken" + organizations: + - name: "Helmholtz Munich" + href: "https://www.helmholtz-munich.de" + role: "Group Leader" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + - name: "Dries Schaumont" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + - name: "Matthias Beyens" + roles: + - "contributor" + info: + role: "Contributor" + links: + github: "MatthiasBeyens" + orcid: "0000-0003-3304-0706" + email: "matthias.beyens@gmail.com" + linkedin: "mbeyens" + organizations: + - name: "Janssen Pharmaceuticals" + href: "https://www.janssen.com" + role: "Principal Scientist" + argument_groups: + - name: "Inputs" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input h5mu file" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--input_layer" + description: "Input layer to use. If None, X is used" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_batch" + description: "Column name discriminating between your batches." + info: null + default: + - "sample_id" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--var_input" + description: ".var column containing highly variable genes. By default, do not\ + \ subset genes." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_labels" + description: "Key in adata.obs for label information. Categories will automatically\ + \ be \nconverted into integer categories and saved to adata.obs['_scvi_labels'].\n\ + If None, assigns the same label to all the data.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_size_factor" + description: "Key in adata.obs for size factor information. Instead of using\ + \ library size as a size factor,\nthe provided size factor column will be\ + \ used as offset in the mean of the likelihood.\nAssumed to be on linear scale.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_categorical_covariate" + description: "Keys in adata.obs that correspond to categorical data. These covariates\ + \ can be added in\naddition to the batch covariate and are also treated as\ + \ nuisance factors\n(i.e., the model tries to minimize their effects on the\ + \ latent space).\nThus, these should not be used for biologically-relevant\ + \ factors that you do _not_ want to correct for.\n" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_continuous_covariate" + description: "Keys in adata.obs that correspond to continuous data. These covariates\ + \ can be added in\naddition to the batch covariate and are also treated as\ + \ nuisance factors\n(i.e., the model tries to minimize their effects on the\ + \ latent space). Thus, these should not be\nused for biologically-relevant\ + \ factors that you do _not_ want to correct for.\n" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - name: "Outputs" + arguments: + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output h5mu file." + info: null + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output_model" + description: "Folder where the state of the trained model will be saved to." + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obsm_output" + description: "In which .obsm slot to store the resulting integrated embedding." + info: null + default: + - "X_scvi_integrated" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "SCVI options" + arguments: + - type: "integer" + name: "--n_hidden_nodes" + description: "Number of nodes per hidden layer." + info: null + default: + - 128 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--n_dimensions_latent_space" + description: "Dimensionality of the latent space." + info: null + default: + - 30 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--n_hidden_layers" + description: "Number of hidden layers used for encoder and decoder neural-networks." + info: null + default: + - 2 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--dropout_rate" + description: "Dropout rate for the neural networks." + info: null + default: + - 0.1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--dispersion" + description: "Set the behavior for the dispersion for negative binomial distributions:\n\ + - gene: dispersion parameter of negative binomial is constant per gene across\ + \ cells\n- gene-batch: dispersion can differ between different batches\n-\ + \ gene-label: dispersion can differ between different labels\n- gene-cell:\ + \ dispersion can differ for every gene in every cell\n" + info: null + default: + - "gene" + required: false + choices: + - "gene" + - "gene-batch" + - "gene-label" + - "gene-cell" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--gene_likelihood" + description: "Model used to generate the expression data from a count-based\ + \ likelihood distribution.\n- nb: Negative binomial distribution\n- zinb:\ + \ Zero-inflated negative binomial distribution\n- poisson: Poisson distribution\n" + info: null + default: + - "nb" + required: false + choices: + - "nb" + - "zinb" + - "poisson" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Variational auto-encoder model options" + arguments: + - type: "string" + name: "--use_layer_normalization" + description: "Neural networks for which to enable layer normalization. \n" + info: null + default: + - "both" + required: false + choices: + - "encoder" + - "decoder" + - "none" + - "both" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--use_batch_normalization" + description: "Neural networks for which to enable batch normalization. \n" + info: null + default: + - "none" + required: false + choices: + - "encoder" + - "decoder" + - "none" + - "both" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean_false" + name: "--encode_covariates" + description: "Whether to concatenate covariates to expression in encoder" + info: null + direction: "input" + dest: "par" + - type: "boolean_true" + name: "--deeply_inject_covariates" + description: "Whether to concatenate covariates into output of hidden layers\ + \ in encoder/decoder. \nThis option only applies when n_layers > 1. The covariates\ + \ are concatenated to\nthe input of subsequent hidden layers.\n" + info: null + direction: "input" + dest: "par" + - type: "boolean_true" + name: "--use_observed_lib_size" + description: "Use observed library size for RNA as scaling factor in mean of\ + \ conditional distribution.\n" + info: null + direction: "input" + dest: "par" + - name: "Early stopping arguments" + arguments: + - type: "boolean" + name: "--early_stopping" + description: "Whether to perform early stopping with respect to the validation\ + \ set." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--early_stopping_monitor" + description: "Metric logged during validation set epoch." + info: null + default: + - "elbo_validation" + required: false + choices: + - "elbo_validation" + - "reconstruction_loss_validation" + - "kl_local_validation" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--early_stopping_patience" + description: "Number of validation epochs with no improvement after which training\ + \ will be stopped." + info: null + default: + - 45 + required: false + min: 1 + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--early_stopping_min_delta" + description: "Minimum change in the monitored quantity to qualify as an improvement,\ + \ i.e. an absolute change of less than min_delta, will count as no improvement." + info: null + default: + - 0.0 + required: false + min: 0.0 + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Learning parameters" + arguments: + - type: "integer" + name: "--max_epochs" + description: "Number of passes through the dataset, defaults to (20000 / number\ + \ of cells) * 400 or 400; whichever is smallest." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean" + name: "--reduce_lr_on_plateau" + description: "Whether to monitor validation loss and reduce learning rate when\ + \ validation set `lr_scheduler_metric` plateaus." + info: null + default: + - true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--lr_factor" + description: "Factor to reduce learning rate." + info: null + default: + - 0.6 + required: false + min: 0.0 + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--lr_patience" + description: "Number of epochs with no improvement after which learning rate\ + \ will be reduced." + info: null + default: + - 30.0 + required: false + min: 0.0 + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Data validition" + arguments: + - type: "integer" + name: "--n_obs_min_count" + description: "Minimum number of cells threshold ensuring that every obs_batch\ + \ category has sufficient observations (cells) for model training." + info: null + default: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--n_var_min_count" + description: "Minimum number of genes threshold ensuring that every var_input\ + \ filter has sufficient observations (genes) for model training." + info: null + default: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "../../utils/subset_vars.py" + description: "Performs scvi integration as done in the human lung cell atlas https://github.com/LungCellAtlas/HLCA" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "../../utils/subset_vars.py" + - type: "file" + path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "nvcr.io/nvidia/pytorch:23.06-py3" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "libpng-dev" + - "libjpeg-dev" + interactive: false + - type: "docker" + run: + - "pip install \"jax[cuda]\" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html\n" + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "scanpy~=1.9.5" + upgrade: true + - type: "python" + user: false + packages: + - "numba~=0.57.1" + - "scvi-tools~=1.0.0" + upgrade: false + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "midcpu" + - "midmem" + - "gpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/integrate/scvi/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/integrate/scvi" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/integrate/scvi/scvi" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/integrate/scvi/main.nf b/target/nextflow/integrate/scvi/main.nf new file mode 100644 index 00000000000..9393402ffe5 --- /dev/null +++ b/target/nextflow/integrate/scvi/main.nf @@ -0,0 +1,3174 @@ +// scvi 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Malte D. Luecken (author) +// * Dries Schaumont (maintainer) +// * Matthias Beyens (contributor) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "scvi", + "namespace" : "integrate", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Malte D. Luecken", + "roles" : [ + "author" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "malte.luecken@helmholtz-muenchen.de", + "github" : "LuckyMD", + "orcid" : "0000-0001-7464-7921", + "linkedin" : "malte-l%C3%BCcken-b8b21049", + "twitter" : "MDLuecken" + }, + "organizations" : [ + { + "name" : "Helmholtz Munich", + "href" : "https://www.helmholtz-munich.de", + "role" : "Group Leader" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + }, + { + "name" : "Dries Schaumont", + "roles" : [ + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "dries@data-intuitive.com", + "github" : "DriesSchaumont", + "orcid" : "0000-0002-4389-0440", + "linkedin" : "dries-schaumont" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Scientist" + } + ] + } + }, + { + "name" : "Matthias Beyens", + "roles" : [ + "contributor" + ], + "info" : { + "role" : "Contributor", + "links" : { + "github" : "MatthiasBeyens", + "orcid" : "0000-0003-3304-0706", + "email" : "matthias.beyens@gmail.com", + "linkedin" : "mbeyens" + }, + "organizations" : [ + { + "name" : "Janssen Pharmaceuticals", + "href" : "https://www.janssen.com", + "role" : "Principal Scientist" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Inputs", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "alternatives" : [ + "-i" + ], + "description" : "Input h5mu file", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--modality", + "default" : [ + "rna" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--input_layer", + "description" : "Input layer to use. If None, X is used", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--obs_batch", + "description" : "Column name discriminating between your batches.", + "default" : [ + "sample_id" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--var_input", + "description" : ".var column containing highly variable genes. By default, do not subset genes.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--obs_labels", + "description" : "Key in adata.obs for label information. Categories will automatically be \nconverted into integer categories and saved to adata.obs['_scvi_labels'].\nIf None, assigns the same label to all the data.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--obs_size_factor", + "description" : "Key in adata.obs for size factor information. Instead of using library size as a size factor,\nthe provided size factor column will be used as offset in the mean of the likelihood.\nAssumed to be on linear scale.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--obs_categorical_covariate", + "description" : "Keys in adata.obs that correspond to categorical data. These covariates can be added in\naddition to the batch covariate and are also treated as nuisance factors\n(i.e., the model tries to minimize their effects on the latent space).\nThus, these should not be used for biologically-relevant factors that you do _not_ want to correct for.\n", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--obs_continuous_covariate", + "description" : "Keys in adata.obs that correspond to continuous data. These covariates can be added in\naddition to the batch covariate and are also treated as nuisance factors\n(i.e., the model tries to minimize their effects on the latent space). Thus, these should not be\nused for biologically-relevant factors that you do _not_ want to correct for.\n", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Outputs", + "arguments" : [ + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "-o" + ], + "description" : "Output h5mu file.", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--output_model", + "description" : "Folder where the state of the trained model will be saved to.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_compression", + "description" : "The compression format to be used on the output h5mu object.", + "example" : [ + "gzip" + ], + "required" : false, + "choices" : [ + "gzip", + "lzf" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--obsm_output", + "description" : "In which .obsm slot to store the resulting integrated embedding.", + "default" : [ + "X_scvi_integrated" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "SCVI options", + "arguments" : [ + { + "type" : "integer", + "name" : "--n_hidden_nodes", + "description" : "Number of nodes per hidden layer.", + "default" : [ + 128 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--n_dimensions_latent_space", + "description" : "Dimensionality of the latent space.", + "default" : [ + 30 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--n_hidden_layers", + "description" : "Number of hidden layers used for encoder and decoder neural-networks.", + "default" : [ + 2 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--dropout_rate", + "description" : "Dropout rate for the neural networks.", + "default" : [ + 0.1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--dispersion", + "description" : "Set the behavior for the dispersion for negative binomial distributions:\n- gene: dispersion parameter of negative binomial is constant per gene across cells\n- gene-batch: dispersion can differ between different batches\n- gene-label: dispersion can differ between different labels\n- gene-cell: dispersion can differ for every gene in every cell\n", + "default" : [ + "gene" + ], + "required" : false, + "choices" : [ + "gene", + "gene-batch", + "gene-label", + "gene-cell" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--gene_likelihood", + "description" : "Model used to generate the expression data from a count-based likelihood distribution.\n- nb: Negative binomial distribution\n- zinb: Zero-inflated negative binomial distribution\n- poisson: Poisson distribution\n", + "default" : [ + "nb" + ], + "required" : false, + "choices" : [ + "nb", + "zinb", + "poisson" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Variational auto-encoder model options", + "arguments" : [ + { + "type" : "string", + "name" : "--use_layer_normalization", + "description" : "Neural networks for which to enable layer normalization. \n", + "default" : [ + "both" + ], + "required" : false, + "choices" : [ + "encoder", + "decoder", + "none", + "both" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--use_batch_normalization", + "description" : "Neural networks for which to enable batch normalization. \n", + "default" : [ + "none" + ], + "required" : false, + "choices" : [ + "encoder", + "decoder", + "none", + "both" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "boolean_false", + "name" : "--encode_covariates", + "description" : "Whether to concatenate covariates to expression in encoder", + "direction" : "input", + "dest" : "par" + }, + { + "type" : "boolean_true", + "name" : "--deeply_inject_covariates", + "description" : "Whether to concatenate covariates into output of hidden layers in encoder/decoder. \nThis option only applies when n_layers > 1. The covariates are concatenated to\nthe input of subsequent hidden layers.\n", + "direction" : "input", + "dest" : "par" + }, + { + "type" : "boolean_true", + "name" : "--use_observed_lib_size", + "description" : "Use observed library size for RNA as scaling factor in mean of conditional distribution.\n", + "direction" : "input", + "dest" : "par" + } + ] + }, + { + "name" : "Early stopping arguments", + "arguments" : [ + { + "type" : "boolean", + "name" : "--early_stopping", + "description" : "Whether to perform early stopping with respect to the validation set.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--early_stopping_monitor", + "description" : "Metric logged during validation set epoch.", + "default" : [ + "elbo_validation" + ], + "required" : false, + "choices" : [ + "elbo_validation", + "reconstruction_loss_validation", + "kl_local_validation" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--early_stopping_patience", + "description" : "Number of validation epochs with no improvement after which training will be stopped.", + "default" : [ + 45 + ], + "required" : false, + "min" : 1, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--early_stopping_min_delta", + "description" : "Minimum change in the monitored quantity to qualify as an improvement, i.e. an absolute change of less than min_delta, will count as no improvement.", + "default" : [ + 0.0 + ], + "required" : false, + "min" : 0.0, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Learning parameters", + "arguments" : [ + { + "type" : "integer", + "name" : "--max_epochs", + "description" : "Number of passes through the dataset, defaults to (20000 / number of cells) * 400 or 400; whichever is smallest.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "boolean", + "name" : "--reduce_lr_on_plateau", + "description" : "Whether to monitor validation loss and reduce learning rate when validation set `lr_scheduler_metric` plateaus.", + "default" : [ + true + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--lr_factor", + "description" : "Factor to reduce learning rate.", + "default" : [ + 0.6 + ], + "required" : false, + "min" : 0.0, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--lr_patience", + "description" : "Number of epochs with no improvement after which learning rate will be reduced.", + "default" : [ + 30.0 + ], + "required" : false, + "min" : 0.0, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Data validition", + "arguments" : [ + { + "type" : "integer", + "name" : "--n_obs_min_count", + "description" : "Minimum number of cells threshold ensuring that every obs_batch category has sufficient observations (cells) for model training.", + "default" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--n_var_min_count", + "description" : "Minimum number of genes threshold ensuring that every var_input filter has sufficient observations (genes) for model training.", + "default" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/integrate/scvi/" + }, + { + "type" : "file", + "path" : "../../utils/subset_vars.py", + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/integrate/scvi/" + } + ], + "description" : "Performs scvi integration as done in the human lung cell atlas https://github.com/LungCellAtlas/HLCA", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/integrate/scvi/" + }, + { + "type" : "file", + "path" : "../../utils/subset_vars.py", + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/integrate/scvi/" + }, + { + "type" : "file", + "path" : "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "nvcr.io/nvidia/pytorch:23.06-py3", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "libpng-dev", + "libjpeg-dev" + ], + "interactive" : false + }, + { + "type" : "docker", + "run" : [ + "pip install \\"jax[cuda]\\" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html\n" + ] + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "mudata~=0.2.3", + "anndata~=0.9.1", + "scanpy~=1.9.5" + ], + "upgrade" : true + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "numba~=0.57.1", + "scvi-tools~=1.0.0" + ], + "upgrade" : false + } + ], + "test_setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "viashpy==0.5.0" + ], + "upgrade" : true + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "midcpu", + "midmem", + "gpu" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/integrate/scvi/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/integrate/scvi", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +from scanpy._utils import check_nonnegative_integers +import mudata +import scvi + +### VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'input_layer': $( if [ ! -z ${VIASH_PAR_INPUT_LAYER+x} ]; then echo "r'${VIASH_PAR_INPUT_LAYER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'obs_batch': $( if [ ! -z ${VIASH_PAR_OBS_BATCH+x} ]; then echo "r'${VIASH_PAR_OBS_BATCH//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'var_input': $( if [ ! -z ${VIASH_PAR_VAR_INPUT+x} ]; then echo "r'${VIASH_PAR_VAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'obs_labels': $( if [ ! -z ${VIASH_PAR_OBS_LABELS+x} ]; then echo "r'${VIASH_PAR_OBS_LABELS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'obs_size_factor': $( if [ ! -z ${VIASH_PAR_OBS_SIZE_FACTOR+x} ]; then echo "r'${VIASH_PAR_OBS_SIZE_FACTOR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'obs_categorical_covariate': $( if [ ! -z ${VIASH_PAR_OBS_CATEGORICAL_COVARIATE+x} ]; then echo "r'${VIASH_PAR_OBS_CATEGORICAL_COVARIATE//\\'/\\'\\"\\'\\"r\\'}'.split(':')"; else echo None; fi ), + 'obs_continuous_covariate': $( if [ ! -z ${VIASH_PAR_OBS_CONTINUOUS_COVARIATE+x} ]; then echo "r'${VIASH_PAR_OBS_CONTINUOUS_COVARIATE//\\'/\\'\\"\\'\\"r\\'}'.split(':')"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_model': $( if [ ! -z ${VIASH_PAR_OUTPUT_MODEL+x} ]; then echo "r'${VIASH_PAR_OUTPUT_MODEL//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'obsm_output': $( if [ ! -z ${VIASH_PAR_OBSM_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'n_hidden_nodes': $( if [ ! -z ${VIASH_PAR_N_HIDDEN_NODES+x} ]; then echo "int(r'${VIASH_PAR_N_HIDDEN_NODES//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'n_dimensions_latent_space': $( if [ ! -z ${VIASH_PAR_N_DIMENSIONS_LATENT_SPACE+x} ]; then echo "int(r'${VIASH_PAR_N_DIMENSIONS_LATENT_SPACE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'n_hidden_layers': $( if [ ! -z ${VIASH_PAR_N_HIDDEN_LAYERS+x} ]; then echo "int(r'${VIASH_PAR_N_HIDDEN_LAYERS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'dropout_rate': $( if [ ! -z ${VIASH_PAR_DROPOUT_RATE+x} ]; then echo "float(r'${VIASH_PAR_DROPOUT_RATE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'dispersion': $( if [ ! -z ${VIASH_PAR_DISPERSION+x} ]; then echo "r'${VIASH_PAR_DISPERSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'gene_likelihood': $( if [ ! -z ${VIASH_PAR_GENE_LIKELIHOOD+x} ]; then echo "r'${VIASH_PAR_GENE_LIKELIHOOD//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'use_layer_normalization': $( if [ ! -z ${VIASH_PAR_USE_LAYER_NORMALIZATION+x} ]; then echo "r'${VIASH_PAR_USE_LAYER_NORMALIZATION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'use_batch_normalization': $( if [ ! -z ${VIASH_PAR_USE_BATCH_NORMALIZATION+x} ]; then echo "r'${VIASH_PAR_USE_BATCH_NORMALIZATION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'encode_covariates': $( if [ ! -z ${VIASH_PAR_ENCODE_COVARIATES+x} ]; then echo "r'${VIASH_PAR_ENCODE_COVARIATES//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), + 'deeply_inject_covariates': $( if [ ! -z ${VIASH_PAR_DEEPLY_INJECT_COVARIATES+x} ]; then echo "r'${VIASH_PAR_DEEPLY_INJECT_COVARIATES//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), + 'use_observed_lib_size': $( if [ ! -z ${VIASH_PAR_USE_OBSERVED_LIB_SIZE+x} ]; then echo "r'${VIASH_PAR_USE_OBSERVED_LIB_SIZE//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), + 'early_stopping': $( if [ ! -z ${VIASH_PAR_EARLY_STOPPING+x} ]; then echo "r'${VIASH_PAR_EARLY_STOPPING//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), + 'early_stopping_monitor': $( if [ ! -z ${VIASH_PAR_EARLY_STOPPING_MONITOR+x} ]; then echo "r'${VIASH_PAR_EARLY_STOPPING_MONITOR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'early_stopping_patience': $( if [ ! -z ${VIASH_PAR_EARLY_STOPPING_PATIENCE+x} ]; then echo "int(r'${VIASH_PAR_EARLY_STOPPING_PATIENCE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'early_stopping_min_delta': $( if [ ! -z ${VIASH_PAR_EARLY_STOPPING_MIN_DELTA+x} ]; then echo "float(r'${VIASH_PAR_EARLY_STOPPING_MIN_DELTA//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'max_epochs': $( if [ ! -z ${VIASH_PAR_MAX_EPOCHS+x} ]; then echo "int(r'${VIASH_PAR_MAX_EPOCHS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'reduce_lr_on_plateau': $( if [ ! -z ${VIASH_PAR_REDUCE_LR_ON_PLATEAU+x} ]; then echo "r'${VIASH_PAR_REDUCE_LR_ON_PLATEAU//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), + 'lr_factor': $( if [ ! -z ${VIASH_PAR_LR_FACTOR+x} ]; then echo "float(r'${VIASH_PAR_LR_FACTOR//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'lr_patience': $( if [ ! -z ${VIASH_PAR_LR_PATIENCE+x} ]; then echo "float(r'${VIASH_PAR_LR_PATIENCE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'n_obs_min_count': $( if [ ! -z ${VIASH_PAR_N_OBS_MIN_COUNT+x} ]; then echo "int(r'${VIASH_PAR_N_OBS_MIN_COUNT//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'n_var_min_count': $( if [ ! -z ${VIASH_PAR_N_VAR_MIN_COUNT+x} ]; then echo "int(r'${VIASH_PAR_N_VAR_MIN_COUNT//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +### VIASH END + +import sys +sys.path.append(meta['resources_dir']) + +# START TEMPORARY WORKAROUND subset_vars +# reason: resources aren't available when using Nextflow fusion +# from subset_vars import subset_vars +def subset_vars(adata, subset_col): + return adata[:, adata.var[subset_col]].copy() + +# END TEMPORARY WORKAROUND subset_vars + +#TODO: optionally, move to qa +# https://github.com/openpipelines-bio/openpipeline/issues/435 +def check_validity_anndata(adata, layer, obs_batch, + n_obs_min_count, n_var_min_count): + assert check_nonnegative_integers( + adata.layers[layer] if layer else adata.X + ), f"Make sure input adata contains raw_counts" + + assert len(set(adata.var_names)) == len( + adata.var_names + ), f"Dataset contains multiple genes with same gene name." + + # Ensure every obs_batch category has sufficient observations + assert min(adata.obs[[obs_batch]].value_counts()) > n_obs_min_count, \\\\ + f"Anndata has fewer than {n_obs_min_count} cells." + + assert adata.n_vars > n_var_min_count, \\\\ + f"Anndata has fewer than {n_var_min_count} genes." + + + +def main(): + mdata = mudata.read(par["input"].strip()) + adata = mdata.mod[par['modality']] + + if par['var_input']: + # Subset to HVG + adata_subset = subset_vars(adata, subset_col=par["var_input"]).copy() + else: + adata_subset = adata.copy() + + check_validity_anndata( + adata_subset, par['input_layer'], par['obs_batch'], + par["n_obs_min_count"], par["n_var_min_count"] + ) + # Set up the data + scvi.model.SCVI.setup_anndata( + adata_subset, + batch_key=par['obs_batch'], + layer=par['input_layer'], + labels_key=par['obs_labels'], + size_factor_key=par['obs_size_factor'], + categorical_covariate_keys=par['obs_categorical_covariate'], + continuous_covariate_keys=par['obs_continuous_covariate'], + ) + + # Set up the model + vae_uns = scvi.model.SCVI( + adata_subset, + n_hidden=par["n_hidden_nodes"], + n_latent=par["n_dimensions_latent_space"], + n_layers=par["n_hidden_layers"], + dropout_rate=par["dropout_rate"], + dispersion=par["dispersion"], + gene_likelihood=par["gene_likelihood"], + use_layer_norm=par["use_layer_normalization"], + use_batch_norm=par["use_batch_normalization"], + encode_covariates=par["encode_covariates"], # Default (True) is for better scArches performance -> maybe don't use this always? + deeply_inject_covariates=par["deeply_inject_covariates"], # Default (False) for better scArches performance -> maybe don't use this always? + use_observed_lib_size=par["use_observed_lib_size"], # When size_factors are not passed + ) + + plan_kwargs = { + "reduce_lr_on_plateau": par['reduce_lr_on_plateau'], + "lr_patience": par['lr_patience'], + "lr_factor": par['lr_factor'], + } + + + # Train the model + vae_uns.train( + max_epochs=par['max_epochs'], + early_stopping=par['early_stopping'], + early_stopping_monitor=par['early_stopping_monitor'], + early_stopping_patience=par['early_stopping_patience'], + early_stopping_min_delta=par['early_stopping_min_delta'], + plan_kwargs=plan_kwargs, + check_val_every_n_epoch=1, + accelerator="auto", + ) + # Note: train_size=1.0 should give better results, but then can't do early_stopping on validation set + + # Get the latent output + adata.obsm[par['obsm_output']] = vae_uns.get_latent_representation() + + mdata.mod[par['modality']] = adata + mdata.write_h5mu(par['output'].strip(), compression=par["output_compression"]) + if par["output_model"]: + vae_uns.save(par["output_model"], overwrite=True) + +if __name__ == "__main__": + main() +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/integrate_scvi", + "tag" : "0.12.0" + }, + "label" : [ + "midcpu", + "midmem", + "gpu" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/integrate/scvi/nextflow.config b/target/nextflow/integrate/scvi/nextflow.config new file mode 100644 index 00000000000..d1499cd34c3 --- /dev/null +++ b/target/nextflow/integrate/scvi/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'scvi' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Performs scvi integration as done in the human lung cell atlas https://github.com/LungCellAtlas/HLCA' + author = 'Malte D. Luecken, Dries Schaumont, Matthias Beyens' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/integrate/scvi/nextflow_params.yaml b/target/nextflow/integrate/scvi/nextflow_params.yaml new file mode 100644 index 00000000000..a62879b7b90 --- /dev/null +++ b/target/nextflow/integrate/scvi/nextflow_params.yaml @@ -0,0 +1,51 @@ +# Inputs +input: # please fill in - example: "path/to/file" +modality: "rna" +# input_layer: "foo" +obs_batch: "sample_id" +# var_input: "foo" +# obs_labels: "foo" +# obs_size_factor: "foo" +# obs_categorical_covariate: ["foo"] +# obs_continuous_covariate: ["foo"] + +# Outputs +# output: "$id.$key.output.output" +# output_model: "$id.$key.output_model.output_model" +# output_compression: "gzip" +obsm_output: "X_scvi_integrated" + +# SCVI options +n_hidden_nodes: 128 +n_dimensions_latent_space: 30 +n_hidden_layers: 2 +dropout_rate: 0.1 +dispersion: "gene" +gene_likelihood: "nb" + +# Variational auto-encoder model options +use_layer_normalization: "both" +use_batch_normalization: "none" +encode_covariates: true +deeply_inject_covariates: false +use_observed_lib_size: false + +# Early stopping arguments +# early_stopping: true +early_stopping_monitor: "elbo_validation" +early_stopping_patience: 45 +early_stopping_min_delta: 0.0 + +# Learning parameters +# max_epochs: 123 +reduce_lr_on_plateau: true +lr_factor: 0.6 +lr_patience: 30 + +# Data validition +n_obs_min_count: 0 +n_var_min_count: 0 + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/integrate/scvi/nextflow_schema.json b/target/nextflow/integrate/scvi/nextflow_schema.json new file mode 100644 index 00000000000..c360d430245 --- /dev/null +++ b/target/nextflow/integrate/scvi/nextflow_schema.json @@ -0,0 +1,520 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "scvi", +"description": "Performs scvi integration as done in the human lung cell atlas https://github.com/LungCellAtlas/HLCA", +"type": "object", +"definitions": { + + + + "inputs" : { + "title": "Inputs", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required. Input h5mu file", + "help_text": "Type: `file`, required. Input h5mu file" + + } + + + , + "modality": { + "type": + "string", + "description": "Type: `string`, default: `rna`. ", + "help_text": "Type: `string`, default: `rna`. " + , + "default": "rna" + } + + + , + "input_layer": { + "type": + "string", + "description": "Type: `string`. Input layer to use", + "help_text": "Type: `string`. Input layer to use. If None, X is used" + + } + + + , + "obs_batch": { + "type": + "string", + "description": "Type: `string`, default: `sample_id`. Column name discriminating between your batches", + "help_text": "Type: `string`, default: `sample_id`. Column name discriminating between your batches." + , + "default": "sample_id" + } + + + , + "var_input": { + "type": + "string", + "description": "Type: `string`. ", + "help_text": "Type: `string`. .var column containing highly variable genes. By default, do not subset genes." + + } + + + , + "obs_labels": { + "type": + "string", + "description": "Type: `string`. Key in adata", + "help_text": "Type: `string`. Key in adata.obs for label information. Categories will automatically be \nconverted into integer categories and saved to adata.obs[\u0027_scvi_labels\u0027].\nIf None, assigns the same label to all the data.\n" + + } + + + , + "obs_size_factor": { + "type": + "string", + "description": "Type: `string`. Key in adata", + "help_text": "Type: `string`. Key in adata.obs for size factor information. Instead of using library size as a size factor,\nthe provided size factor column will be used as offset in the mean of the likelihood.\nAssumed to be on linear scale.\n" + + } + + + , + "obs_categorical_covariate": { + "type": + "string", + "description": "Type: List of `string`, multiple_sep: `\":\"`. Keys in adata", + "help_text": "Type: List of `string`, multiple_sep: `\":\"`. Keys in adata.obs that correspond to categorical data. These covariates can be added in\naddition to the batch covariate and are also treated as nuisance factors\n(i.e., the model tries to minimize their effects on the latent space).\nThus, these should not be used for biologically-relevant factors that you do _not_ want to correct for.\n" + + } + + + , + "obs_continuous_covariate": { + "type": + "string", + "description": "Type: List of `string`, multiple_sep: `\":\"`. Keys in adata", + "help_text": "Type: List of `string`, multiple_sep: `\":\"`. Keys in adata.obs that correspond to continuous data. These covariates can be added in\naddition to the batch covariate and are also treated as nuisance factors\n(i.e., the model tries to minimize their effects on the latent space). Thus, these should not be\nused for biologically-relevant factors that you do _not_ want to correct for.\n" + + } + + +} +}, + + + "outputs" : { + "title": "Outputs", + "type": "object", + "description": "No description", + "properties": { + + + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.output`. Output h5mu file", + "help_text": "Type: `file`, required, default: `$id.$key.output.output`. Output h5mu file." + , + "default": "$id.$key.output.output" + } + + + , + "output_model": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.output_model.output_model`. Folder where the state of the trained model will be saved to", + "help_text": "Type: `file`, default: `$id.$key.output_model.output_model`. Folder where the state of the trained model will be saved to." + , + "default": "$id.$key.output_model.output_model" + } + + + , + "output_compression": { + "type": + "string", + "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", + "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", + "enum": ["gzip", "lzf"] + + + } + + + , + "obsm_output": { + "type": + "string", + "description": "Type: `string`, default: `X_scvi_integrated`. In which ", + "help_text": "Type: `string`, default: `X_scvi_integrated`. In which .obsm slot to store the resulting integrated embedding." + , + "default": "X_scvi_integrated" + } + + +} +}, + + + "scvi options" : { + "title": "SCVI options", + "type": "object", + "description": "No description", + "properties": { + + + "n_hidden_nodes": { + "type": + "integer", + "description": "Type: `integer`, default: `128`. Number of nodes per hidden layer", + "help_text": "Type: `integer`, default: `128`. Number of nodes per hidden layer." + , + "default": "128" + } + + + , + "n_dimensions_latent_space": { + "type": + "integer", + "description": "Type: `integer`, default: `30`. Dimensionality of the latent space", + "help_text": "Type: `integer`, default: `30`. Dimensionality of the latent space." + , + "default": "30" + } + + + , + "n_hidden_layers": { + "type": + "integer", + "description": "Type: `integer`, default: `2`. Number of hidden layers used for encoder and decoder neural-networks", + "help_text": "Type: `integer`, default: `2`. Number of hidden layers used for encoder and decoder neural-networks." + , + "default": "2" + } + + + , + "dropout_rate": { + "type": + "number", + "description": "Type: `double`, default: `0.1`. Dropout rate for the neural networks", + "help_text": "Type: `double`, default: `0.1`. Dropout rate for the neural networks." + , + "default": "0.1" + } + + + , + "dispersion": { + "type": + "string", + "description": "Type: `string`, default: `gene`, choices: ``gene`, `gene-batch`, `gene-label`, `gene-cell``. Set the behavior for the dispersion for negative binomial distributions:\n- gene: dispersion parameter of negative binomial is constant per gene across cells\n- gene-batch: dispersion can differ between different batches\n- gene-label: dispersion can differ between different labels\n- gene-cell: dispersion can differ for every gene in every cell\n", + "help_text": "Type: `string`, default: `gene`, choices: ``gene`, `gene-batch`, `gene-label`, `gene-cell``. Set the behavior for the dispersion for negative binomial distributions:\n- gene: dispersion parameter of negative binomial is constant per gene across cells\n- gene-batch: dispersion can differ between different batches\n- gene-label: dispersion can differ between different labels\n- gene-cell: dispersion can differ for every gene in every cell\n", + "enum": ["gene", "gene-batch", "gene-label", "gene-cell"] + + , + "default": "gene" + } + + + , + "gene_likelihood": { + "type": + "string", + "description": "Type: `string`, default: `nb`, choices: ``nb`, `zinb`, `poisson``. Model used to generate the expression data from a count-based likelihood distribution", + "help_text": "Type: `string`, default: `nb`, choices: ``nb`, `zinb`, `poisson``. Model used to generate the expression data from a count-based likelihood distribution.\n- nb: Negative binomial distribution\n- zinb: Zero-inflated negative binomial distribution\n- poisson: Poisson distribution\n", + "enum": ["nb", "zinb", "poisson"] + + , + "default": "nb" + } + + +} +}, + + + "variational auto-encoder model options" : { + "title": "Variational auto-encoder model options", + "type": "object", + "description": "No description", + "properties": { + + + "use_layer_normalization": { + "type": + "string", + "description": "Type: `string`, default: `both`, choices: ``encoder`, `decoder`, `none`, `both``. Neural networks for which to enable layer normalization", + "help_text": "Type: `string`, default: `both`, choices: ``encoder`, `decoder`, `none`, `both``. Neural networks for which to enable layer normalization. \n", + "enum": ["encoder", "decoder", "none", "both"] + + , + "default": "both" + } + + + , + "use_batch_normalization": { + "type": + "string", + "description": "Type: `string`, default: `none`, choices: ``encoder`, `decoder`, `none`, `both``. Neural networks for which to enable batch normalization", + "help_text": "Type: `string`, default: `none`, choices: ``encoder`, `decoder`, `none`, `both``. Neural networks for which to enable batch normalization. \n", + "enum": ["encoder", "decoder", "none", "both"] + + , + "default": "none" + } + + + , + "encode_covariates": { + "type": + "boolean", + "description": "Type: `boolean_false`, default: `true`. Whether to concatenate covariates to expression in encoder", + "help_text": "Type: `boolean_false`, default: `true`. Whether to concatenate covariates to expression in encoder" + , + "default": "True" + } + + + , + "deeply_inject_covariates": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Whether to concatenate covariates into output of hidden layers in encoder/decoder", + "help_text": "Type: `boolean_true`, default: `false`. Whether to concatenate covariates into output of hidden layers in encoder/decoder. \nThis option only applies when n_layers \u003e 1. The covariates are concatenated to\nthe input of subsequent hidden layers.\n" + , + "default": "False" + } + + + , + "use_observed_lib_size": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Use observed library size for RNA as scaling factor in mean of conditional distribution", + "help_text": "Type: `boolean_true`, default: `false`. Use observed library size for RNA as scaling factor in mean of conditional distribution.\n" + , + "default": "False" + } + + +} +}, + + + "early stopping arguments" : { + "title": "Early stopping arguments", + "type": "object", + "description": "No description", + "properties": { + + + "early_stopping": { + "type": + "boolean", + "description": "Type: `boolean`. Whether to perform early stopping with respect to the validation set", + "help_text": "Type: `boolean`. Whether to perform early stopping with respect to the validation set." + + } + + + , + "early_stopping_monitor": { + "type": + "string", + "description": "Type: `string`, default: `elbo_validation`, choices: ``elbo_validation`, `reconstruction_loss_validation`, `kl_local_validation``. Metric logged during validation set epoch", + "help_text": "Type: `string`, default: `elbo_validation`, choices: ``elbo_validation`, `reconstruction_loss_validation`, `kl_local_validation``. Metric logged during validation set epoch.", + "enum": ["elbo_validation", "reconstruction_loss_validation", "kl_local_validation"] + + , + "default": "elbo_validation" + } + + + , + "early_stopping_patience": { + "type": + "integer", + "description": "Type: `integer`, default: `45`. Number of validation epochs with no improvement after which training will be stopped", + "help_text": "Type: `integer`, default: `45`. Number of validation epochs with no improvement after which training will be stopped." + , + "default": "45" + } + + + , + "early_stopping_min_delta": { + "type": + "number", + "description": "Type: `double`, default: `0.0`. Minimum change in the monitored quantity to qualify as an improvement, i", + "help_text": "Type: `double`, default: `0.0`. Minimum change in the monitored quantity to qualify as an improvement, i.e. an absolute change of less than min_delta, will count as no improvement." + , + "default": "0.0" + } + + +} +}, + + + "learning parameters" : { + "title": "Learning parameters", + "type": "object", + "description": "No description", + "properties": { + + + "max_epochs": { + "type": + "integer", + "description": "Type: `integer`. Number of passes through the dataset, defaults to (20000 / number of cells) * 400 or 400; whichever is smallest", + "help_text": "Type: `integer`. Number of passes through the dataset, defaults to (20000 / number of cells) * 400 or 400; whichever is smallest." + + } + + + , + "reduce_lr_on_plateau": { + "type": + "boolean", + "description": "Type: `boolean`, default: `true`. Whether to monitor validation loss and reduce learning rate when validation set `lr_scheduler_metric` plateaus", + "help_text": "Type: `boolean`, default: `true`. Whether to monitor validation loss and reduce learning rate when validation set `lr_scheduler_metric` plateaus." + , + "default": "True" + } + + + , + "lr_factor": { + "type": + "number", + "description": "Type: `double`, default: `0.6`. Factor to reduce learning rate", + "help_text": "Type: `double`, default: `0.6`. Factor to reduce learning rate." + , + "default": "0.6" + } + + + , + "lr_patience": { + "type": + "number", + "description": "Type: `double`, default: `30`. Number of epochs with no improvement after which learning rate will be reduced", + "help_text": "Type: `double`, default: `30`. Number of epochs with no improvement after which learning rate will be reduced." + , + "default": "30" + } + + +} +}, + + + "data validition" : { + "title": "Data validition", + "type": "object", + "description": "No description", + "properties": { + + + "n_obs_min_count": { + "type": + "integer", + "description": "Type: `integer`, default: `0`. Minimum number of cells threshold ensuring that every obs_batch category has sufficient observations (cells) for model training", + "help_text": "Type: `integer`, default: `0`. Minimum number of cells threshold ensuring that every obs_batch category has sufficient observations (cells) for model training." + , + "default": "0" + } + + + , + "n_var_min_count": { + "type": + "integer", + "description": "Type: `integer`, default: `0`. Minimum number of genes threshold ensuring that every var_input filter has sufficient observations (genes) for model training", + "help_text": "Type: `integer`, default: `0`. Minimum number of genes threshold ensuring that every var_input filter has sufficient observations (genes) for model training." + , + "default": "0" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/inputs" + }, + + { + "$ref": "#/definitions/outputs" + }, + + { + "$ref": "#/definitions/scvi options" + }, + + { + "$ref": "#/definitions/variational auto-encoder model options" + }, + + { + "$ref": "#/definitions/early stopping arguments" + }, + + { + "$ref": "#/definitions/learning parameters" + }, + + { + "$ref": "#/definitions/data validition" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/integrate/scvi/subset_vars.py b/target/nextflow/integrate/scvi/subset_vars.py new file mode 100644 index 00000000000..10011c8fcca --- /dev/null +++ b/target/nextflow/integrate/scvi/subset_vars.py @@ -0,0 +1,16 @@ +def subset_vars(adata, subset_col): + """Subset highly variable genes from AnnData object + + Parameters + ---------- + adata : AnnData + Annotated data object + subset_col : str + Name of the boolean column in `adata.var` that contains the information if features should be used or not + + Returns + ------- + AnnData + Copy of `adata` with subsetted features + """ + return adata[:, adata.var[subset_col]].copy() diff --git a/target/nextflow/integrate/totalvi/.config.vsh.yaml b/target/nextflow/integrate/totalvi/.config.vsh.yaml new file mode 100644 index 00000000000..b5c8d3205e9 --- /dev/null +++ b/target/nextflow/integrate/totalvi/.config.vsh.yaml @@ -0,0 +1,348 @@ +functionality: + name: "totalvi" + namespace: "integrate" + version: "0.12.4" + authors: + - name: "Vladimir Shitov" + info: + role: "Contributor" + links: + email: "vladimir.shitov@helmholtz-muenchen.de" + github: "vladimirshitov" + orcid: "0000-0002-1960-8812" + linkedin: "vladimir-shitov-9a659513b" + organizations: + - name: "Helmholtz Munich" + href: "https://www.helmholtz-munich.de" + role: "PhD Candidate" + argument_groups: + - name: "Inputs" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input h5mu file with query data to integrate with reference." + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--reference" + alternatives: + - "-r" + description: "Input h5mu file with reference data to train the TOTALVI model." + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--force_retrain" + alternatives: + - "-f" + description: "If true, retrain the model and save it to reference_model_path" + info: null + direction: "input" + dest: "par" + - type: "string" + name: "--query_modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--query_proteins_modality" + description: "Name of the modality in the input (query) h5mu file containing\ + \ protein data" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--reference_modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--reference_proteins_modality" + description: "Name of the modality containing proteins in the reference" + info: null + default: + - "prot" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--input_layer" + description: "Input layer to use. If None, X is used" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_batch" + description: "Column name discriminating between your batches." + info: null + default: + - "sample_id" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--var_input" + description: ".var column containing highly variable genes. By default, do not\ + \ subset genes." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Outputs" + arguments: + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output h5mu file." + info: null + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obsm_output" + description: "In which .obsm slot to store the resulting integrated embedding." + info: null + default: + - "X_integrated_totalvi" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obsm_normalized_rna_output" + description: "In which .obsm slot to store the normalized RNA from TOTALVI." + info: null + default: + - "X_totalvi_normalized_rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obsm_normalized_protein_output" + description: "In which .obsm slot to store the normalized protein data from\ + \ TOTALVI." + info: null + default: + - "X_totalvi_normalized_protein" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--reference_model_path" + description: "Directory with the reference model. If not exists, trained model\ + \ will be saved there" + info: null + default: + - "totalvi_model_reference" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--query_model_path" + description: "Directory, where the query model will be saved" + info: null + default: + - "totalvi_model_query" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Learning parameters" + arguments: + - type: "integer" + name: "--max_epochs" + description: "Number of passes through the dataset" + info: null + default: + - 400 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--max_query_epochs" + description: "Number of passes through the dataset, when fine-tuning model for\ + \ query" + info: null + default: + - 200 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--weight_decay" + description: "Weight decay, when fine-tuning model for query" + info: null + default: + - 0.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Performs mapping to the reference by totalvi model: https://docs.scvi-tools.org/en/stable/tutorials/notebooks/scarches_scvi_tools.html#Reference-mapping-with-TOTALVI" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.9" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "libopenblas-dev" + - "liblapack-dev" + - "gfortran" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "torchmetrics~=0.11.0" + - "scvi-tools~=1.0.3" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highmem" + - "highcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +- type: "native" + id: "native" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/integrate/totalvi/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/integrate/totalvi" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/integrate/totalvi/totalvi" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/integrate/totalvi/main.nf b/target/nextflow/integrate/totalvi/main.nf new file mode 100644 index 00000000000..0ac4451bc7a --- /dev/null +++ b/target/nextflow/integrate/totalvi/main.nf @@ -0,0 +1,2923 @@ +// totalvi 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Vladimir Shitov + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "totalvi", + "namespace" : "integrate", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Vladimir Shitov", + "info" : { + "role" : "Contributor", + "links" : { + "email" : "vladimir.shitov@helmholtz-muenchen.de", + "github" : "vladimirshitov", + "orcid" : "0000-0002-1960-8812", + "linkedin" : "vladimir-shitov-9a659513b" + }, + "organizations" : [ + { + "name" : "Helmholtz Munich", + "href" : "https://www.helmholtz-munich.de", + "role" : "PhD Candidate" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Inputs", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "alternatives" : [ + "-i" + ], + "description" : "Input h5mu file with query data to integrate with reference.", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--reference", + "alternatives" : [ + "-r" + ], + "description" : "Input h5mu file with reference data to train the TOTALVI model.", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "boolean_true", + "name" : "--force_retrain", + "alternatives" : [ + "-f" + ], + "description" : "If true, retrain the model and save it to reference_model_path", + "direction" : "input", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--query_modality", + "default" : [ + "rna" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--query_proteins_modality", + "description" : "Name of the modality in the input (query) h5mu file containing protein data", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--reference_modality", + "default" : [ + "rna" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--reference_proteins_modality", + "description" : "Name of the modality containing proteins in the reference", + "default" : [ + "prot" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--input_layer", + "description" : "Input layer to use. If None, X is used", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--obs_batch", + "description" : "Column name discriminating between your batches.", + "default" : [ + "sample_id" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--var_input", + "description" : ".var column containing highly variable genes. By default, do not subset genes.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Outputs", + "arguments" : [ + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "-o" + ], + "description" : "Output h5mu file.", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--obsm_output", + "description" : "In which .obsm slot to store the resulting integrated embedding.", + "default" : [ + "X_integrated_totalvi" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--obsm_normalized_rna_output", + "description" : "In which .obsm slot to store the normalized RNA from TOTALVI.", + "default" : [ + "X_totalvi_normalized_rna" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--obsm_normalized_protein_output", + "description" : "In which .obsm slot to store the normalized protein data from TOTALVI.", + "default" : [ + "X_totalvi_normalized_protein" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--reference_model_path", + "description" : "Directory with the reference model. If not exists, trained model will be saved there", + "default" : [ + "totalvi_model_reference" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--query_model_path", + "description" : "Directory, where the query model will be saved", + "default" : [ + "totalvi_model_query" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Learning parameters", + "arguments" : [ + { + "type" : "integer", + "name" : "--max_epochs", + "description" : "Number of passes through the dataset", + "default" : [ + 400 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--max_query_epochs", + "description" : "Number of passes through the dataset, when fine-tuning model for query", + "default" : [ + 200 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--weight_decay", + "description" : "Weight decay, when fine-tuning model for query", + "default" : [ + 0.0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/integrate/totalvi/" + }, + { + "type" : "file", + "path" : "src/utils/setup_logger.py", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "description" : "Performs mapping to the reference by totalvi model: https://docs.scvi-tools.org/en/stable/tutorials/notebooks/scarches_scvi_tools.html#Reference-mapping-with-TOTALVI", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/integrate/totalvi/" + }, + { + "type" : "file", + "path" : "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.9", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "libopenblas-dev", + "liblapack-dev", + "gfortran" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "mudata~=0.2.3", + "anndata~=0.9.1", + "torchmetrics~=0.11.0", + "scvi-tools~=1.0.3" + ], + "upgrade" : true + } + ], + "test_setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "viashpy==0.5.0" + ], + "upgrade" : true + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "highmem", + "highcpu" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + }, + { + "type" : "native", + "id" : "native" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/integrate/totalvi/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/integrate/totalvi", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +from typing import Tuple + +import os +import sys +import mudata +from anndata import AnnData # For type hints +from mudata import MuData # For type hints +import numpy as np +import scvi +from scipy.sparse import issparse + + +### VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'reference': $( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "r'${VIASH_PAR_REFERENCE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'force_retrain': $( if [ ! -z ${VIASH_PAR_FORCE_RETRAIN+x} ]; then echo "r'${VIASH_PAR_FORCE_RETRAIN//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), + 'query_modality': $( if [ ! -z ${VIASH_PAR_QUERY_MODALITY+x} ]; then echo "r'${VIASH_PAR_QUERY_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'query_proteins_modality': $( if [ ! -z ${VIASH_PAR_QUERY_PROTEINS_MODALITY+x} ]; then echo "r'${VIASH_PAR_QUERY_PROTEINS_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'reference_modality': $( if [ ! -z ${VIASH_PAR_REFERENCE_MODALITY+x} ]; then echo "r'${VIASH_PAR_REFERENCE_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'reference_proteins_modality': $( if [ ! -z ${VIASH_PAR_REFERENCE_PROTEINS_MODALITY+x} ]; then echo "r'${VIASH_PAR_REFERENCE_PROTEINS_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'input_layer': $( if [ ! -z ${VIASH_PAR_INPUT_LAYER+x} ]; then echo "r'${VIASH_PAR_INPUT_LAYER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'obs_batch': $( if [ ! -z ${VIASH_PAR_OBS_BATCH+x} ]; then echo "r'${VIASH_PAR_OBS_BATCH//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'var_input': $( if [ ! -z ${VIASH_PAR_VAR_INPUT+x} ]; then echo "r'${VIASH_PAR_VAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'obsm_output': $( if [ ! -z ${VIASH_PAR_OBSM_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'obsm_normalized_rna_output': $( if [ ! -z ${VIASH_PAR_OBSM_NORMALIZED_RNA_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_NORMALIZED_RNA_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'obsm_normalized_protein_output': $( if [ ! -z ${VIASH_PAR_OBSM_NORMALIZED_PROTEIN_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_NORMALIZED_PROTEIN_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'reference_model_path': $( if [ ! -z ${VIASH_PAR_REFERENCE_MODEL_PATH+x} ]; then echo "r'${VIASH_PAR_REFERENCE_MODEL_PATH//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'query_model_path': $( if [ ! -z ${VIASH_PAR_QUERY_MODEL_PATH+x} ]; then echo "r'${VIASH_PAR_QUERY_MODEL_PATH//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'max_epochs': $( if [ ! -z ${VIASH_PAR_MAX_EPOCHS+x} ]; then echo "int(r'${VIASH_PAR_MAX_EPOCHS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'max_query_epochs': $( if [ ! -z ${VIASH_PAR_MAX_QUERY_EPOCHS+x} ]; then echo "int(r'${VIASH_PAR_MAX_QUERY_EPOCHS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'weight_decay': $( if [ ! -z ${VIASH_PAR_WEIGHT_DECAY+x} ]; then echo "float(r'${VIASH_PAR_WEIGHT_DECAY//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +### VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +def align_proteins_names(adata_reference: AnnData, mdata_query: MuData, adata_query: AnnData, reference_proteins_key: str, query_proteins_key: str) -> AnnData: + """Make sure that proteins are located in the same .obsm slot in reference and query. Pad query proteins with zeros if they are absent""" + proteins_reference = adata_reference.obsm[reference_proteins_key] + + # If query has no protein data, put matrix of zeros + if not query_proteins_key or query_proteins_key not in mdata_query.mod: + adata_query.obsm[reference_proteins_key] = np.zeros((adata_query.n_obs, proteins_reference.shape[1])) + else: + # Make sure that proteins expression has the same key in query and reference + adata_query.obsm[reference_proteins_key] = adata_query.obsm[query_proteins_key] + + return adata_query + + +def extract_proteins_to_anndata(mdata: MuData, rna_modality_key, protein_modality_key, input_layer, hvg_var_key=None) -> AnnData: + """TOTALVI requires data to be stored in AnnData format with protein counts in .obsm slot. This function performs the conversion""" + adata: AnnData = mdata.mod[rna_modality_key].copy() + + if hvg_var_key: + selected_genes = adata.var_names[adata.var[hvg_var_key]] + adata = adata[:, selected_genes].copy() + + if protein_modality_key in mdata.mod: + # Put the proteins modality into .obsm slot + proteins_reference_adata = mdata.mod[protein_modality_key].copy() + + if input_layer is None: + proteins = proteins_reference_adata.X + else: + proteins = proteins_reference_adata.obsm[input_layer] + + if issparse(proteins): + proteins = proteins.toarray() + + adata.obsm[protein_modality_key] = proteins + + return adata + + +def build_reference_model(adata_reference: AnnData, max_train_epochs: int = 400) -> scvi.model.TOTALVI: + + vae_reference = scvi.model.TOTALVI(adata_reference, use_layer_norm="both", use_batch_norm="none") + vae_reference.train(max_train_epochs) + + vae_reference.save(par["reference_model_path"]) + + return vae_reference + +def is_retraining_model() -> bool: + """Decide, whether reference model should be trained. It happens when no model exists or force_retrain flag is on""" + + trained_model_exists = os.path.isdir(par["reference_model_path"]) and ("model.pt" in os.listdir(par["reference_model_path"])) + return not trained_model_exists or par["force_retrain"] + + +def map_query_to_reference(mdata_reference: MuData, mdata_query: MuData, adata_query: AnnData) -> Tuple[scvi.model.TOTALVI, AnnData]: + """Build model on the provided reference if necessary, and map query to the reference""" + + adata_reference: AnnData = extract_proteins_to_anndata(mdata_reference, rna_modality_key=par["reference_modality"], protein_modality_key=par["reference_proteins_modality"], + input_layer=par["input_layer"], hvg_var_key=par["var_input"]) + + scvi.model.TOTALVI.setup_anndata( + adata_reference, + batch_key=par["obs_batch"], + protein_expression_obsm_key=par["reference_proteins_modality"] + ) + + if is_retraining_model(): + vae_reference = build_reference_model(adata_reference, max_train_epochs=par["max_epochs"]) + else: + vae_reference = scvi.model.TOTALVI.load(dir_path=par["reference_model_path"], adata=adata_reference) + + adata_query: AnnData = align_proteins_names(adata_reference, mdata_query, adata_query, reference_proteins_key=par["reference_proteins_modality"], + query_proteins_key=par["query_proteins_modality"]) + + # Reorder genes and pad missing genes with 0s + scvi.model.TOTALVI.prepare_query_anndata(adata_query, vae_reference) + + # Train the model for query + vae_query = scvi.model.TOTALVI.load_query_data( + adata_query, + vae_reference + ) + vae_query.train(par["max_query_epochs"], plan_kwargs=dict(weight_decay=par["weight_decay"])) + + return vae_query, adata_query + +def main(): + mdata_query = mudata.read(par["input"].strip()) + adata_query = extract_proteins_to_anndata(mdata_query, + rna_modality_key=par["query_modality"], + protein_modality_key=par["query_proteins_modality"], + input_layer=par["input_layer"], + hvg_var_key=par["var_input"]) + + if par["reference"].endswith(".h5mu"): + logger.info("Reading reference") + mdata_reference = mudata.read(par["reference"].strip()) + + logger.info("Mapping query to the reference") + vae_query, adata_query = map_query_to_reference(mdata_reference, mdata_query, adata_query) + else: + raise ValueError("Incorrect format of reference, please provide a .h5mu file") + + adata_query.uns["integration_method"] = "totalvi" + + logger.info("Getting the latent representation of query") + mdata_query.mod[par["query_modality"]].obsm[par["obsm_output"]] = vae_query.get_latent_representation() + + norm_rna, norm_protein = vae_query.get_normalized_expression() + mdata_query.mod[par["query_modality"]].obsm[par["obsm_normalized_rna_output"]] = norm_rna.to_numpy() + + if par["query_proteins_modality"] in mdata_query.mod: + mdata_query.mod[par["query_proteins_modality"]].obsm[par["obsm_normalized_protein_output"]] = norm_protein.to_numpy() + + logger.info("Updating mdata") + mdata_query.update() + + logger.info("Saving updated query data") + mdata_query.write_h5mu(par["output"].strip()) + + logger.info("Saving query model") + vae_query.save(par["query_model_path"], overwrite=True) + +if __name__ == "__main__": + main() +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/integrate_totalvi", + "tag" : "0.12.0" + }, + "label" : [ + "highmem", + "highcpu" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/integrate/totalvi/nextflow.config b/target/nextflow/integrate/totalvi/nextflow.config new file mode 100644 index 00000000000..459d6dbe239 --- /dev/null +++ b/target/nextflow/integrate/totalvi/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'totalvi' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Performs mapping to the reference by totalvi model: https://docs.scvi-tools.org/en/stable/tutorials/notebooks/scarches_scvi_tools.html#Reference-mapping-with-TOTALVI' + author = 'Vladimir Shitov' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/integrate/totalvi/nextflow_params.yaml b/target/nextflow/integrate/totalvi/nextflow_params.yaml new file mode 100644 index 00000000000..5cff159b5a1 --- /dev/null +++ b/target/nextflow/integrate/totalvi/nextflow_params.yaml @@ -0,0 +1,28 @@ +# Inputs +input: # please fill in - example: "path/to/file" +reference: # please fill in - example: "path/to/file" +force_retrain: false +query_modality: "rna" +# query_proteins_modality: "foo" +reference_modality: "rna" +reference_proteins_modality: "prot" +# input_layer: "foo" +obs_batch: "sample_id" +# var_input: "foo" + +# Outputs +# output: "$id.$key.output.output" +obsm_output: "X_integrated_totalvi" +obsm_normalized_rna_output: "X_totalvi_normalized_rna" +obsm_normalized_protein_output: "X_totalvi_normalized_protein" +# reference_model_path: "$id.$key.reference_model_path.reference_model_path" +# query_model_path: "$id.$key.query_model_path.query_model_path" + +# Learning parameters +max_epochs: 400 +max_query_epochs: 200 +weight_decay: 0.0 + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/integrate/totalvi/nextflow_schema.json b/target/nextflow/integrate/totalvi/nextflow_schema.json new file mode 100644 index 00000000000..a4150f4d8e5 --- /dev/null +++ b/target/nextflow/integrate/totalvi/nextflow_schema.json @@ -0,0 +1,292 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "totalvi", +"description": "Performs mapping to the reference by totalvi model: https://docs.scvi-tools.org/en/stable/tutorials/notebooks/scarches_scvi_tools.html#Reference-mapping-with-TOTALVI", +"type": "object", +"definitions": { + + + + "inputs" : { + "title": "Inputs", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required. Input h5mu file with query data to integrate with reference", + "help_text": "Type: `file`, required. Input h5mu file with query data to integrate with reference." + + } + + + , + "reference": { + "type": + "string", + "description": "Type: `file`, required. Input h5mu file with reference data to train the TOTALVI model", + "help_text": "Type: `file`, required. Input h5mu file with reference data to train the TOTALVI model." + + } + + + , + "force_retrain": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. If true, retrain the model and save it to reference_model_path", + "help_text": "Type: `boolean_true`, default: `false`. If true, retrain the model and save it to reference_model_path" + , + "default": "False" + } + + + , + "query_modality": { + "type": + "string", + "description": "Type: `string`, default: `rna`. ", + "help_text": "Type: `string`, default: `rna`. " + , + "default": "rna" + } + + + , + "query_proteins_modality": { + "type": + "string", + "description": "Type: `string`. Name of the modality in the input (query) h5mu file containing protein data", + "help_text": "Type: `string`. Name of the modality in the input (query) h5mu file containing protein data" + + } + + + , + "reference_modality": { + "type": + "string", + "description": "Type: `string`, default: `rna`. ", + "help_text": "Type: `string`, default: `rna`. " + , + "default": "rna" + } + + + , + "reference_proteins_modality": { + "type": + "string", + "description": "Type: `string`, default: `prot`. Name of the modality containing proteins in the reference", + "help_text": "Type: `string`, default: `prot`. Name of the modality containing proteins in the reference" + , + "default": "prot" + } + + + , + "input_layer": { + "type": + "string", + "description": "Type: `string`. Input layer to use", + "help_text": "Type: `string`. Input layer to use. If None, X is used" + + } + + + , + "obs_batch": { + "type": + "string", + "description": "Type: `string`, default: `sample_id`. Column name discriminating between your batches", + "help_text": "Type: `string`, default: `sample_id`. Column name discriminating between your batches." + , + "default": "sample_id" + } + + + , + "var_input": { + "type": + "string", + "description": "Type: `string`. ", + "help_text": "Type: `string`. .var column containing highly variable genes. By default, do not subset genes." + + } + + +} +}, + + + "outputs" : { + "title": "Outputs", + "type": "object", + "description": "No description", + "properties": { + + + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.output`. Output h5mu file", + "help_text": "Type: `file`, required, default: `$id.$key.output.output`. Output h5mu file." + , + "default": "$id.$key.output.output" + } + + + , + "obsm_output": { + "type": + "string", + "description": "Type: `string`, default: `X_integrated_totalvi`. In which ", + "help_text": "Type: `string`, default: `X_integrated_totalvi`. In which .obsm slot to store the resulting integrated embedding." + , + "default": "X_integrated_totalvi" + } + + + , + "obsm_normalized_rna_output": { + "type": + "string", + "description": "Type: `string`, default: `X_totalvi_normalized_rna`. In which ", + "help_text": "Type: `string`, default: `X_totalvi_normalized_rna`. In which .obsm slot to store the normalized RNA from TOTALVI." + , + "default": "X_totalvi_normalized_rna" + } + + + , + "obsm_normalized_protein_output": { + "type": + "string", + "description": "Type: `string`, default: `X_totalvi_normalized_protein`. In which ", + "help_text": "Type: `string`, default: `X_totalvi_normalized_protein`. In which .obsm slot to store the normalized protein data from TOTALVI." + , + "default": "X_totalvi_normalized_protein" + } + + + , + "reference_model_path": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.reference_model_path.reference_model_path`. Directory with the reference model", + "help_text": "Type: `file`, default: `$id.$key.reference_model_path.reference_model_path`. Directory with the reference model. If not exists, trained model will be saved there" + , + "default": "$id.$key.reference_model_path.reference_model_path" + } + + + , + "query_model_path": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.query_model_path.query_model_path`. Directory, where the query model will be saved", + "help_text": "Type: `file`, default: `$id.$key.query_model_path.query_model_path`. Directory, where the query model will be saved" + , + "default": "$id.$key.query_model_path.query_model_path" + } + + +} +}, + + + "learning parameters" : { + "title": "Learning parameters", + "type": "object", + "description": "No description", + "properties": { + + + "max_epochs": { + "type": + "integer", + "description": "Type: `integer`, default: `400`. Number of passes through the dataset", + "help_text": "Type: `integer`, default: `400`. Number of passes through the dataset" + , + "default": "400" + } + + + , + "max_query_epochs": { + "type": + "integer", + "description": "Type: `integer`, default: `200`. Number of passes through the dataset, when fine-tuning model for query", + "help_text": "Type: `integer`, default: `200`. Number of passes through the dataset, when fine-tuning model for query" + , + "default": "200" + } + + + , + "weight_decay": { + "type": + "number", + "description": "Type: `double`, default: `0.0`. Weight decay, when fine-tuning model for query", + "help_text": "Type: `double`, default: `0.0`. Weight decay, when fine-tuning model for query" + , + "default": "0.0" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/inputs" + }, + + { + "$ref": "#/definitions/outputs" + }, + + { + "$ref": "#/definitions/learning parameters" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/integrate/totalvi/setup_logger.py b/target/nextflow/integrate/totalvi/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/nextflow/integrate/totalvi/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/nextflow/interpret/lianapy/.config.vsh.yaml b/target/nextflow/interpret/lianapy/.config.vsh.yaml new file mode 100644 index 00000000000..e6f8c299aeb --- /dev/null +++ b/target/nextflow/interpret/lianapy/.config.vsh.yaml @@ -0,0 +1,313 @@ +functionality: + name: "lianapy" + namespace: "interpret" + version: "0.12.4" + authors: + - name: "Mauro Saporita" + roles: + - "author" + info: + role: "Contributor" + links: + email: "maurosaporita@gmail.com" + github: "mauro-saporita" + linkedin: "mauro-saporita-930b06a5" + organizations: + - name: "Ardigen" + href: "https://ardigen.com" + role: "Lead Nextflow Developer" + - name: "Povilas Gibas" + roles: + - "author" + info: + role: "Contributor" + links: + email: "povilasgibas@gmail.com" + github: "PoGibas" + linkedin: "povilas-gibas" + organizations: + - name: "Ardigen" + href: "https://ardigen.com" + role: "Bioinformatician" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input h5mu file" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output h5mu file." + info: null + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + info: null + default: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--layer" + description: "Layer in anndata.AnnData.layers to use. If None, use mudata.mod[modality].X." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--groupby" + description: "The key of the observations grouping to consider." + info: null + default: + - "bulk_labels" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--resource_name" + description: "Name of the resource to be loaded and use for ligand-receptor inference." + info: null + default: + - "consensus" + required: false + choices: + - "baccin2019" + - "cellcall" + - "cellchatdb" + - "cellinker" + - "cellphonedb" + - "celltalkdb" + - "connectomedb2020" + - "consensus" + - "embrace" + - "guide2pharma" + - "hpmr" + - "icellnet" + - "italk" + - "kirouac2010" + - "lrdb" + - "mouseconsensus" + - "ramilowski2015" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--gene_symbol" + description: "Column name in var DataFrame in which gene symbol are stored." + info: null + default: + - "gene_symbol" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--expr_prop" + description: "Minimum expression proportion for the ligands/receptors (and their\ + \ subunits) in the corresponding cell identities. Set to '0', to return unfiltered\ + \ results." + info: null + default: + - 0.1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--min_cells" + description: "Minimum cells per cell identity ('groupby') to be considered for\ + \ downstream analysis." + info: null + default: + - 5 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--aggregate_method" + description: "Method aggregation approach, one of ['mean', 'rra'], where 'mean'\ + \ represents the mean rank, while 'rra' is the RobustRankAggregate (Kolde et\ + \ al., 2014) of the interactions." + info: null + default: + - "rra" + required: false + choices: + - "mean" + - "rra" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean" + name: "--return_all_lrs" + description: "Bool whether to return all LRs, or only those that surpass the 'expr_prop'\ + \ threshold. Those interactions that do not pass the 'expr_prop' threshold will\ + \ be assigned to the *worst* score of the ones that do. 'False' by default." + info: null + default: + - false + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--n_perms" + description: "Number of permutations for the permutation test. Note that this\ + \ is relevant only for permutation-based methods - e.g. 'CellPhoneDB" + info: null + default: + - 100 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + description: "Performs LIANA integration based as described in https://github.com/saezlab/liana-py" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + - "git" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "liana~=0.1.9" + - "numpy~=1.24.3" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "lowmem" + - "lowcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/interpret/lianapy/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/interpret/lianapy" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/interpret/lianapy/lianapy" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/interpret/lianapy/main.nf b/target/nextflow/interpret/lianapy/main.nf new file mode 100644 index 00000000000..e3005106b51 --- /dev/null +++ b/target/nextflow/interpret/lianapy/main.nf @@ -0,0 +1,2757 @@ +// lianapy 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Mauro Saporita (author) +// * Povilas Gibas (author) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "lianapy", + "namespace" : "interpret", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Mauro Saporita", + "roles" : [ + "author" + ], + "info" : { + "role" : "Contributor", + "links" : { + "email" : "maurosaporita@gmail.com", + "github" : "mauro-saporita", + "linkedin" : "mauro-saporita-930b06a5" + }, + "organizations" : [ + { + "name" : "Ardigen", + "href" : "https://ardigen.com", + "role" : "Lead Nextflow Developer" + } + ] + } + }, + { + "name" : "Povilas Gibas", + "roles" : [ + "author" + ], + "info" : { + "role" : "Contributor", + "links" : { + "email" : "povilasgibas@gmail.com", + "github" : "PoGibas", + "linkedin" : "povilas-gibas" + }, + "organizations" : [ + { + "name" : "Ardigen", + "href" : "https://ardigen.com", + "role" : "Bioinformatician" + } + ] + } + } + ], + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "alternatives" : [ + "-i" + ], + "description" : "Input h5mu file", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "-o" + ], + "description" : "Output h5mu file.", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_compression", + "default" : [ + "gzip" + ], + "required" : false, + "choices" : [ + "gzip", + "lzf" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--modality", + "default" : [ + "rna" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--layer", + "description" : "Layer in anndata.AnnData.layers to use. If None, use mudata.mod[modality].X.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--groupby", + "description" : "The key of the observations grouping to consider.", + "default" : [ + "bulk_labels" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--resource_name", + "description" : "Name of the resource to be loaded and use for ligand-receptor inference.", + "default" : [ + "consensus" + ], + "required" : false, + "choices" : [ + "baccin2019", + "cellcall", + "cellchatdb", + "cellinker", + "cellphonedb", + "celltalkdb", + "connectomedb2020", + "consensus", + "embrace", + "guide2pharma", + "hpmr", + "icellnet", + "italk", + "kirouac2010", + "lrdb", + "mouseconsensus", + "ramilowski2015" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--gene_symbol", + "description" : "Column name in var DataFrame in which gene symbol are stored.", + "default" : [ + "gene_symbol" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--expr_prop", + "description" : "Minimum expression proportion for the ligands/receptors (and their subunits) in the corresponding cell identities. Set to '0', to return unfiltered results.", + "default" : [ + 0.1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--min_cells", + "description" : "Minimum cells per cell identity ('groupby') to be considered for downstream analysis.", + "default" : [ + 5 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--aggregate_method", + "description" : "Method aggregation approach, one of ['mean', 'rra'], where 'mean' represents the mean rank, while 'rra' is the RobustRankAggregate (Kolde et al., 2014) of the interactions.", + "default" : [ + "rra" + ], + "required" : false, + "choices" : [ + "mean", + "rra" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "boolean", + "name" : "--return_all_lrs", + "description" : "Bool whether to return all LRs, or only those that surpass the 'expr_prop' threshold. Those interactions that do not pass the 'expr_prop' threshold will be assigned to the *worst* score of the ones that do. 'False' by default.", + "default" : [ + false + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--n_perms", + "description" : "Number of permutations for the permutation test. Note that this is relevant only for permutation-based methods - e.g. 'CellPhoneDB", + "default" : [ + 100 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/interpret/lianapy/" + } + ], + "description" : "Performs LIANA integration based as described in https://github.com/saezlab/liana-py", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/interpret/lianapy/" + }, + { + "type" : "file", + "path" : "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.10-slim", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "procps", + "git" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "mudata~=0.2.3", + "anndata~=0.9.1", + "liana~=0.1.9", + "numpy~=1.24.3" + ], + "upgrade" : true + } + ], + "test_setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "viashpy==0.5.0" + ], + "upgrade" : true + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "lowmem", + "lowcpu" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/interpret/lianapy/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/interpret/lianapy", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +import liana +import mudata +# TODO: Remove when grouping labels exist +# For sign/PCA/ +import numpy as np + +### VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'layer': $( if [ ! -z ${VIASH_PAR_LAYER+x} ]; then echo "r'${VIASH_PAR_LAYER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'groupby': $( if [ ! -z ${VIASH_PAR_GROUPBY+x} ]; then echo "r'${VIASH_PAR_GROUPBY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resource_name': $( if [ ! -z ${VIASH_PAR_RESOURCE_NAME+x} ]; then echo "r'${VIASH_PAR_RESOURCE_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'gene_symbol': $( if [ ! -z ${VIASH_PAR_GENE_SYMBOL+x} ]; then echo "r'${VIASH_PAR_GENE_SYMBOL//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'expr_prop': $( if [ ! -z ${VIASH_PAR_EXPR_PROP+x} ]; then echo "float(r'${VIASH_PAR_EXPR_PROP//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'min_cells': $( if [ ! -z ${VIASH_PAR_MIN_CELLS+x} ]; then echo "int(r'${VIASH_PAR_MIN_CELLS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'aggregate_method': $( if [ ! -z ${VIASH_PAR_AGGREGATE_METHOD+x} ]; then echo "r'${VIASH_PAR_AGGREGATE_METHOD//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'return_all_lrs': $( if [ ! -z ${VIASH_PAR_RETURN_ALL_LRS+x} ]; then echo "r'${VIASH_PAR_RETURN_ALL_LRS//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), + 'n_perms': $( if [ ! -z ${VIASH_PAR_N_PERMS+x} ]; then echo "int(r'${VIASH_PAR_N_PERMS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +### VIASH END + + +def main(): + + # Get input data + mdata = mudata.read(par['input'].strip()) + mod = mdata.mod[par['modality']] + + # Add dummy grouping labels when they do not exist + if par['groupby'] not in mod.obs: + foo = mod.obsm.to_df().iloc[:, 0] + mod.obs[par['groupby']] = np.sign(foo).astype('category') + + # Solve gene labels + orig_gene_label = mod.var.index + mod.var_names = mod.var[par['gene_symbol']].astype(str) + mod.var_names_make_unique() + + liana.mt.rank_aggregate( + adata = mod, + groupby = par['groupby'], + resource_name = par["resource_name"], + expr_prop = par["expr_prop"], + min_cells = par["min_cells"], + aggregate_method = par["aggregate_method"], + return_all_lrs = par["return_all_lrs"], + layer = par["layer"], + n_perms = par["n_perms"], + verbose = True, + inplace = True, + use_raw = False + ) + + # Return original gene labels + mod.var_names = orig_gene_label + + # TODO: make sure compression is needed + mdata.write_h5mu(par['output'].strip(), compression=par['output_compression']) + +if __name__ == "__main__": + main() +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/interpret_lianapy", + "tag" : "0.12.0" + }, + "label" : [ + "lowmem", + "lowcpu" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/interpret/lianapy/nextflow.config b/target/nextflow/interpret/lianapy/nextflow.config new file mode 100644 index 00000000000..f3ce24b560c --- /dev/null +++ b/target/nextflow/interpret/lianapy/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'lianapy' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Performs LIANA integration based as described in https://github.com/saezlab/liana-py' + author = 'Mauro Saporita, Povilas Gibas' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/interpret/lianapy/nextflow_params.yaml b/target/nextflow/interpret/lianapy/nextflow_params.yaml new file mode 100644 index 00000000000..5d90f8fa00b --- /dev/null +++ b/target/nextflow/interpret/lianapy/nextflow_params.yaml @@ -0,0 +1,18 @@ +# Arguments +input: # please fill in - example: "path/to/file" +# output: "$id.$key.output.output" +output_compression: "gzip" +modality: "rna" +# layer: "foo" +groupby: "bulk_labels" +resource_name: "consensus" +gene_symbol: "gene_symbol" +expr_prop: 0.1 +min_cells: 5 +aggregate_method: "rra" +return_all_lrs: false +n_perms: 100 + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/interpret/lianapy/nextflow_schema.json b/target/nextflow/interpret/lianapy/nextflow_schema.json new file mode 100644 index 00000000000..b258b7deb01 --- /dev/null +++ b/target/nextflow/interpret/lianapy/nextflow_schema.json @@ -0,0 +1,207 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "lianapy", +"description": "Performs LIANA integration based as described in https://github.com/saezlab/liana-py", +"type": "object", +"definitions": { + + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required. Input h5mu file", + "help_text": "Type: `file`, required. Input h5mu file" + + } + + + , + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.output`. Output h5mu file", + "help_text": "Type: `file`, required, default: `$id.$key.output.output`. Output h5mu file." + , + "default": "$id.$key.output.output" + } + + + , + "output_compression": { + "type": + "string", + "description": "Type: `string`, default: `gzip`, choices: ``gzip`, `lzf``. ", + "help_text": "Type: `string`, default: `gzip`, choices: ``gzip`, `lzf``. ", + "enum": ["gzip", "lzf"] + + , + "default": "gzip" + } + + + , + "modality": { + "type": + "string", + "description": "Type: `string`, default: `rna`. ", + "help_text": "Type: `string`, default: `rna`. " + , + "default": "rna" + } + + + , + "layer": { + "type": + "string", + "description": "Type: `string`. Layer in anndata", + "help_text": "Type: `string`. Layer in anndata.AnnData.layers to use. If None, use mudata.mod[modality].X." + + } + + + , + "groupby": { + "type": + "string", + "description": "Type: `string`, default: `bulk_labels`. The key of the observations grouping to consider", + "help_text": "Type: `string`, default: `bulk_labels`. The key of the observations grouping to consider." + , + "default": "bulk_labels" + } + + + , + "resource_name": { + "type": + "string", + "description": "Type: `string`, default: `consensus`, choices: ``baccin2019`, `cellcall`, `cellchatdb`, `cellinker`, `cellphonedb`, `celltalkdb`, `connectomedb2020`, `consensus`, `embrace`, `guide2pharma`, `hpmr`, `icellnet`, `italk`, `kirouac2010`, `lrdb`, `mouseconsensus`, `ramilowski2015``. Name of the resource to be loaded and use for ligand-receptor inference", + "help_text": "Type: `string`, default: `consensus`, choices: ``baccin2019`, `cellcall`, `cellchatdb`, `cellinker`, `cellphonedb`, `celltalkdb`, `connectomedb2020`, `consensus`, `embrace`, `guide2pharma`, `hpmr`, `icellnet`, `italk`, `kirouac2010`, `lrdb`, `mouseconsensus`, `ramilowski2015``. Name of the resource to be loaded and use for ligand-receptor inference.", + "enum": ["baccin2019", "cellcall", "cellchatdb", "cellinker", "cellphonedb", "celltalkdb", "connectomedb2020", "consensus", "embrace", "guide2pharma", "hpmr", "icellnet", "italk", "kirouac2010", "lrdb", "mouseconsensus", "ramilowski2015"] + + , + "default": "consensus" + } + + + , + "gene_symbol": { + "type": + "string", + "description": "Type: `string`, default: `gene_symbol`. Column name in var DataFrame in which gene symbol are stored", + "help_text": "Type: `string`, default: `gene_symbol`. Column name in var DataFrame in which gene symbol are stored." + , + "default": "gene_symbol" + } + + + , + "expr_prop": { + "type": + "number", + "description": "Type: `double`, default: `0.1`. Minimum expression proportion for the ligands/receptors (and their subunits) in the corresponding cell identities", + "help_text": "Type: `double`, default: `0.1`. Minimum expression proportion for the ligands/receptors (and their subunits) in the corresponding cell identities. Set to \u00270\u0027, to return unfiltered results." + , + "default": "0.1" + } + + + , + "min_cells": { + "type": + "integer", + "description": "Type: `integer`, default: `5`. Minimum cells per cell identity (\u0027groupby\u0027) to be considered for downstream analysis", + "help_text": "Type: `integer`, default: `5`. Minimum cells per cell identity (\u0027groupby\u0027) to be considered for downstream analysis." + , + "default": "5" + } + + + , + "aggregate_method": { + "type": + "string", + "description": "Type: `string`, default: `rra`, choices: ``mean`, `rra``. Method aggregation approach, one of [\u0027mean\u0027, \u0027rra\u0027], where \u0027mean\u0027 represents the mean rank, while \u0027rra\u0027 is the RobustRankAggregate (Kolde et al", + "help_text": "Type: `string`, default: `rra`, choices: ``mean`, `rra``. Method aggregation approach, one of [\u0027mean\u0027, \u0027rra\u0027], where \u0027mean\u0027 represents the mean rank, while \u0027rra\u0027 is the RobustRankAggregate (Kolde et al., 2014) of the interactions.", + "enum": ["mean", "rra"] + + , + "default": "rra" + } + + + , + "return_all_lrs": { + "type": + "boolean", + "description": "Type: `boolean`, default: `false`. Bool whether to return all LRs, or only those that surpass the \u0027expr_prop\u0027 threshold", + "help_text": "Type: `boolean`, default: `false`. Bool whether to return all LRs, or only those that surpass the \u0027expr_prop\u0027 threshold. Those interactions that do not pass the \u0027expr_prop\u0027 threshold will be assigned to the *worst* score of the ones that do. \u0027False\u0027 by default." + , + "default": "False" + } + + + , + "n_perms": { + "type": + "integer", + "description": "Type: `integer`, default: `100`. Number of permutations for the permutation test", + "help_text": "Type: `integer`, default: `100`. Number of permutations for the permutation test. Note that this is relevant only for permutation-based methods - e.g. \u0027CellPhoneDB" + , + "default": "100" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/labels_transfer/knn/.config.vsh.yaml b/target/nextflow/labels_transfer/knn/.config.vsh.yaml new file mode 100644 index 00000000000..f1868c4828e --- /dev/null +++ b/target/nextflow/labels_transfer/knn/.config.vsh.yaml @@ -0,0 +1,379 @@ +functionality: + name: "knn" + namespace: "labels_transfer" + version: "0.12.4" + authors: + - name: "Vladimir Shitov" + roles: + - "author" + info: + role: "Contributor" + links: + email: "vladimir.shitov@helmholtz-muenchen.de" + github: "vladimirshitov" + orcid: "0000-0002-1960-8812" + linkedin: "vladimir-shitov-9a659513b" + organizations: + - name: "Helmholtz Munich" + href: "https://www.helmholtz-munich.de" + role: "PhD Candidate" + argument_groups: + - name: "Input dataset (query) arguments" + arguments: + - type: "file" + name: "--input" + description: "The query data to transfer the labels to. Should be a .h5mu file." + info: + label: "Query" + file_format: + type: "h5mu" + mod: + rna: + description: "Modality in AnnData format containing RNA data." + required: true + slots: + X: + type: "double" + name: "features" + required: false + description: "The expression data to use for the classifier's inference,\ + \ if `--input_obsm_features` argument is not provided.\n" + obsm: + - type: "double" + name: "features" + example: "X_integrated_scanvi" + required: false + description: "The embedding to use for the classifier's inference.\ + \ Override using the `--input_obsm_features` argument. If not\ + \ provided, the `.X` slot will be used instead.\nMake sure that\ + \ embedding was obtained in the same way as the reference embedding\ + \ (e.g. by the same model or preprocessing).\n" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + description: "Which modality to use." + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--input_obsm_features" + description: "The `.obsm` key of the embedding to use for the classifier's inference.\ + \ If not provided, the `.X` slot will be used instead.\nMake sure that embedding\ + \ was obtained in the same way as the reference embedding (e.g. by the same\ + \ model or preprocessing).\n" + info: null + example: + - "X_integrated_scanvi" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Reference dataset arguments" + arguments: + - type: "file" + name: "--reference" + description: "The reference data to train classifiers on." + info: + label: "Reference" + file_format: + type: "h5ad" + X: + type: "double" + name: "features" + required: false + description: "The expression data to use for the classifier's training,\ + \ if `--input_obsm_features` argument is not provided.\n" + obsm: + - type: "double" + name: "features" + example: "X_integrated_scanvi" + description: "The embedding to use for the classifier's training. Override\ + \ using the `--reference_obsm_features` argument.\nMake sure that embedding\ + \ was obtained in the same way as the query embedding (e.g. by the same\ + \ model or preprocessing).\n" + required: true + obs: + - type: "string" + name: "targets" + multiple: true + example: + - "ann_level_1" + - "ann_level_2" + - "ann_level_3" + - "ann_level_4" + - "ann_level_5" + - "ann_finest_level" + description: "The target labels to transfer. Override using the `--reference_obs_targets`\ + \ argument." + required: true + example: + - "https:/zenodo.org/record/6337966/files/HLCA_emb_and_metadata.h5ad" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--reference_obsm_features" + description: "The `.obsm` key of the embedding to use for the classifier's training.\n\ + Make sure that embedding was obtained in the same way as the query embedding\ + \ (e.g. by the same model or preprocessing).\n" + info: null + default: + - "X_integrated_scanvi" + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--reference_obs_targets" + description: "The `.obs` key of the target labels to tranfer." + info: null + default: + - "ann_level_1" + - "ann_level_2" + - "ann_level_3" + - "ann_level_4" + - "ann_level_5" + - "ann_finest_level" + required: false + direction: "input" + multiple: true + multiple_sep: "," + dest: "par" + - name: "Outputs" + arguments: + - type: "file" + name: "--output" + description: "The query data in .h5mu format with predicted labels transfered\ + \ from the reference." + info: + label: "Output data" + file_format: + type: "h5mu" + mod: + rna: + description: "Modality in AnnData format containing RNA data." + required: true + obs: + - type: "string" + name: "predictions" + description: "The predicted labels. Override using the `--output_obs_predictions`\ + \ argument." + required: true + - type: "double" + name: "uncertainty" + description: "The uncertainty of the predicted labels. Override using\ + \ the `--output_obs_uncertainty` argument." + required: false + obsm: + - type: "double" + name: "X_integrated_scanvi" + description: "The embedding used for the classifier's inference. Could\ + \ have any name, specified by `input_obsm_features` argument.\"" + required: false + uns: + - type: "string" + name: "parameters" + example: "labels_tranfer" + description: "Additional information about the parameters used for\ + \ the label transfer." + required: true + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_obs_predictions" + description: "In which `.obs` slots to store the predicted information.\nIf\ + \ provided, must have the same length as `--reference_obs_targets`.\nIf empty,\ + \ will default to the `reference_obs_targets` combined with the `\"_pred\"\ + ` suffix.\n" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_obs_uncertainty" + description: "In which `.obs` slots to store the uncertainty of the predictions.\n\ + If provided, must have the same length as `--reference_obs_targets`.\nIf empty,\ + \ will default to the `reference_obs_targets` combined with the `\"_uncertainty\"\ + ` suffix.\n" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_uns_parameters" + description: "The `.uns` key to store additional information about the parameters\ + \ used for the label transfer." + info: null + default: + - "labels_transfer" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Learning parameters" + arguments: + - type: "integer" + name: "--n_neighbors" + alternatives: + - "-k" + description: "Number of nearest neighbors to use for classification" + info: null + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "../utils/helper.py" + - type: "file" + path: "../../utils/setup_logger.py" + description: "Performs label transfer from reference to query using KNN classifier" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/annotation_test_data/" + - type: "file" + path: "resources_test/pbmc_1k_protein_v3/" + info: + method_id: "KNN_pynndescent" + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + - "git" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + upgrade: true + - type: "apt" + packages: + - "libopenblas-dev" + - "liblapack-dev" + - "gfortran" + interactive: false + - type: "python" + user: false + packages: + - "scanpy~=1.9.5" + - "pynndescent~=0.5.8" + - "numba~=0.56.4" + - "numpy~=1.23.5" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highmem" + - "highcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +- type: "native" + id: "native" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/labels_transfer/knn/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/labels_transfer/knn" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/labels_transfer/knn/knn" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/labels_transfer/knn/helper.py b/target/nextflow/labels_transfer/knn/helper.py new file mode 100644 index 00000000000..a90bf59efdb --- /dev/null +++ b/target/nextflow/labels_transfer/knn/helper.py @@ -0,0 +1,32 @@ +def check_arguments(par): + # check output .obs predictions + if not par["output_obs_predictions"]: + par["output_obs_predictions"] = [ t + "_pred" for t in par["reference_obs_targets"]] + assert len(par["output_obs_predictions"]) == len(par["reference_obs_targets"]), f"Number of output_obs_predictions must match number of reference_obs_targets\npar: {par}" + + # check output .obs uncertainty + if not par["output_obs_uncertainty"]: + par["output_obs_uncertainty"] = [ t + "_uncertainty" for t in par["reference_obs_targets"]] + assert len(par["output_obs_uncertainty"]) == len(par["reference_obs_targets"]), f"Number of output_obs_uncertainty must match number of reference_obs_targets\npar: {par}" + + return par + +def get_reference_features(adata_reference, par, logger): + if par["reference_obsm_features"] is None: + logger.info("Using .X of reference data") + train_data = adata_reference.X + else: + logger.info(f"Using .obsm[{par['reference_obsm_features']}] of reference data") + train_data = adata_reference.obsm[par["reference_obsm_features"]] + + return train_data + +def get_query_features(adata, par, logger): + if par["input_obsm_features"] is None: + logger.info("Using .X of query data") + query_data = adata.X + else: + logger.info(f"Using .obsm[{par['input_obsm_features']}] of query data") + query_data = adata.obsm[par["input_obsm_features"]] + + return query_data \ No newline at end of file diff --git a/target/nextflow/labels_transfer/knn/main.nf b/target/nextflow/labels_transfer/knn/main.nf new file mode 100644 index 00000000000..1cdb4932d0e --- /dev/null +++ b/target/nextflow/labels_transfer/knn/main.nf @@ -0,0 +1,2921 @@ +// knn 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Vladimir Shitov (author) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "knn", + "namespace" : "labels_transfer", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Vladimir Shitov", + "roles" : [ + "author" + ], + "info" : { + "role" : "Contributor", + "links" : { + "email" : "vladimir.shitov@helmholtz-muenchen.de", + "github" : "vladimirshitov", + "orcid" : "0000-0002-1960-8812", + "linkedin" : "vladimir-shitov-9a659513b" + }, + "organizations" : [ + { + "name" : "Helmholtz Munich", + "href" : "https://www.helmholtz-munich.de", + "role" : "PhD Candidate" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Input dataset (query) arguments", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "description" : "The query data to transfer the labels to. Should be a .h5mu file.", + "info" : { + "label" : "Query", + "file_format" : { + "type" : "h5mu", + "mod" : { + "rna" : { + "description" : "Modality in AnnData format containing RNA data.", + "required" : true, + "slots" : { + "X" : { + "type" : "double", + "name" : "features", + "required" : false, + "description" : "The expression data to use for the classifier's inference, if `--input_obsm_features` argument is not provided.\n" + }, + "obsm" : [ + { + "type" : "double", + "name" : "features", + "example" : "X_integrated_scanvi", + "required" : false, + "description" : "The embedding to use for the classifier's inference. Override using the `--input_obsm_features` argument. If not provided, the `.X` slot will be used instead.\nMake sure that embedding was obtained in the same way as the reference embedding (e.g. by the same model or preprocessing).\n" + } + ] + } + } + } + } + }, + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--modality", + "description" : "Which modality to use.", + "default" : [ + "rna" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--input_obsm_features", + "description" : "The `.obsm` key of the embedding to use for the classifier's inference. If not provided, the `.X` slot will be used instead.\nMake sure that embedding was obtained in the same way as the reference embedding (e.g. by the same model or preprocessing).\n", + "example" : [ + "X_integrated_scanvi" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Reference dataset arguments", + "arguments" : [ + { + "type" : "file", + "name" : "--reference", + "description" : "The reference data to train classifiers on.", + "info" : { + "label" : "Reference", + "file_format" : { + "type" : "h5ad", + "X" : { + "type" : "double", + "name" : "features", + "required" : false, + "description" : "The expression data to use for the classifier's training, if `--input_obsm_features` argument is not provided.\n" + }, + "obsm" : [ + { + "type" : "double", + "name" : "features", + "example" : "X_integrated_scanvi", + "description" : "The embedding to use for the classifier's training. Override using the `--reference_obsm_features` argument.\nMake sure that embedding was obtained in the same way as the query embedding (e.g. by the same model or preprocessing).\n", + "required" : true + } + ], + "obs" : [ + { + "type" : "string", + "name" : "targets", + "multiple" : true, + "example" : [ + "ann_level_1", + "ann_level_2", + "ann_level_3", + "ann_level_4", + "ann_level_5", + "ann_finest_level" + ], + "description" : "The target labels to transfer. Override using the `--reference_obs_targets` argument.", + "required" : true + } + ] + } + }, + "example" : [ + "https:/zenodo.org/record/6337966/files/HLCA_emb_and_metadata.h5ad" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--reference_obsm_features", + "description" : "The `.obsm` key of the embedding to use for the classifier's training.\nMake sure that embedding was obtained in the same way as the query embedding (e.g. by the same model or preprocessing).\n", + "default" : [ + "X_integrated_scanvi" + ], + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--reference_obs_targets", + "description" : "The `.obs` key of the target labels to tranfer.", + "default" : [ + "ann_level_1", + "ann_level_2", + "ann_level_3", + "ann_level_4", + "ann_level_5", + "ann_finest_level" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ",", + "dest" : "par" + } + ] + }, + { + "name" : "Outputs", + "arguments" : [ + { + "type" : "file", + "name" : "--output", + "description" : "The query data in .h5mu format with predicted labels transfered from the reference.", + "info" : { + "label" : "Output data", + "file_format" : { + "type" : "h5mu", + "mod" : { + "rna" : { + "description" : "Modality in AnnData format containing RNA data.", + "required" : true, + "obs" : [ + { + "type" : "string", + "name" : "predictions", + "description" : "The predicted labels. Override using the `--output_obs_predictions` argument.", + "required" : true + }, + { + "type" : "double", + "name" : "uncertainty", + "description" : "The uncertainty of the predicted labels. Override using the `--output_obs_uncertainty` argument.", + "required" : false + } + ], + "obsm" : [ + { + "type" : "double", + "name" : "X_integrated_scanvi", + "description" : "The embedding used for the classifier's inference. Could have any name, specified by `input_obsm_features` argument.\\"", + "required" : false + } + ], + "uns" : [ + { + "type" : "string", + "name" : "parameters", + "example" : "labels_tranfer", + "description" : "Additional information about the parameters used for the label transfer.", + "required" : true + } + ] + } + } + } + }, + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_obs_predictions", + "description" : "In which `.obs` slots to store the predicted information.\nIf provided, must have the same length as `--reference_obs_targets`.\nIf empty, will default to the `reference_obs_targets` combined with the `\\"_pred\\"` suffix.\n", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_obs_uncertainty", + "description" : "In which `.obs` slots to store the uncertainty of the predictions.\nIf provided, must have the same length as `--reference_obs_targets`.\nIf empty, will default to the `reference_obs_targets` combined with the `\\"_uncertainty\\"` suffix.\n", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_uns_parameters", + "description" : "The `.uns` key to store additional information about the parameters used for the label transfer.", + "default" : [ + "labels_transfer" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Learning parameters", + "arguments" : [ + { + "type" : "integer", + "name" : "--n_neighbors", + "alternatives" : [ + "-k" + ], + "description" : "Number of nearest neighbors to use for classification", + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/labels_transfer/knn/" + }, + { + "type" : "file", + "path" : "../utils/helper.py", + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/labels_transfer/knn/" + }, + { + "type" : "file", + "path" : "../../utils/setup_logger.py", + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/labels_transfer/knn/" + } + ], + "description" : "Performs label transfer from reference to query using KNN classifier", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/labels_transfer/knn/" + }, + { + "type" : "file", + "path" : "resources_test/annotation_test_data/", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + }, + { + "type" : "file", + "path" : "resources_test/pbmc_1k_protein_v3/", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "info" : { + "method_id" : "KNN_pynndescent" + }, + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.10-slim", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "procps", + "git" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "mudata~=0.2.3", + "anndata~=0.9.1" + ], + "upgrade" : true + }, + { + "type" : "apt", + "packages" : [ + "libopenblas-dev", + "liblapack-dev", + "gfortran" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "scanpy~=1.9.5", + "pynndescent~=0.5.8", + "numba~=0.56.4", + "numpy~=1.23.5" + ], + "upgrade" : true + } + ], + "test_setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "viashpy==0.5.0" + ], + "upgrade" : true + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "highmem", + "highcpu" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + }, + { + "type" : "native", + "id" : "native" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/labels_transfer/knn/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/labels_transfer/knn", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +import sys +import warnings + +import mudata +import numpy as np +import scanpy as sc +from scipy.sparse import issparse +import pynndescent +import numba + + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'input_obsm_features': $( if [ ! -z ${VIASH_PAR_INPUT_OBSM_FEATURES+x} ]; then echo "r'${VIASH_PAR_INPUT_OBSM_FEATURES//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'reference': $( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "r'${VIASH_PAR_REFERENCE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'reference_obsm_features': $( if [ ! -z ${VIASH_PAR_REFERENCE_OBSM_FEATURES+x} ]; then echo "r'${VIASH_PAR_REFERENCE_OBSM_FEATURES//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'reference_obs_targets': $( if [ ! -z ${VIASH_PAR_REFERENCE_OBS_TARGETS+x} ]; then echo "r'${VIASH_PAR_REFERENCE_OBS_TARGETS//\\'/\\'\\"\\'\\"r\\'}'.split(',')"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_obs_predictions': $( if [ ! -z ${VIASH_PAR_OUTPUT_OBS_PREDICTIONS+x} ]; then echo "r'${VIASH_PAR_OUTPUT_OBS_PREDICTIONS//\\'/\\'\\"\\'\\"r\\'}'.split(':')"; else echo None; fi ), + 'output_obs_uncertainty': $( if [ ! -z ${VIASH_PAR_OUTPUT_OBS_UNCERTAINTY+x} ]; then echo "r'${VIASH_PAR_OUTPUT_OBS_UNCERTAINTY//\\'/\\'\\"\\'\\"r\\'}'.split(':')"; else echo None; fi ), + 'output_uns_parameters': $( if [ ! -z ${VIASH_PAR_OUTPUT_UNS_PARAMETERS+x} ]; then echo "r'${VIASH_PAR_OUTPUT_UNS_PARAMETERS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'n_neighbors': $( if [ ! -z ${VIASH_PAR_N_NEIGHBORS+x} ]; then echo "int(r'${VIASH_PAR_N_NEIGHBORS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +## VIASH END + +sys.path.append(meta["resources_dir"]) +from helper import check_arguments, get_reference_features, get_query_features +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger + +@numba.njit +def weighted_prediction(weights, ref_cats): + """Get highest weight category.""" + N = len(weights) + predictions = np.zeros((N,), dtype=ref_cats.dtype) + uncertainty = np.zeros((N,)) + for i in range(N): + obs_weights = weights[i] + obs_cats = ref_cats[i] + best_prob = 0 + for c in np.unique(obs_cats): + cand_prob = np.sum(obs_weights[obs_cats == c]) + if cand_prob > best_prob: + best_prob = cand_prob + predictions[i] = c + uncertainty[i] = max(1 - best_prob, 0) + + return predictions, uncertainty + +def distances_to_affinities(distances): + stds = np.std(distances, axis=1) + stds = (2.0 / stds) ** 2 + stds = stds.reshape(-1, 1) + distances_tilda = np.exp(-np.true_divide(distances, stds)) + + return distances_tilda / np.sum(distances_tilda, axis=1, keepdims=True) + +def main(par): + logger = setup_logger() + + logger.info("Checking arguments") + par = check_arguments(par) + + logger.info("Reading input (query) data") + mdata = mudata.read(par["input"]) + adata = mdata.mod[par["modality"]] + + logger.info("Reading reference data") + adata_reference = sc.read(par["reference"], backup_url=par["reference"]) + + # fetch feature data + train_data = get_reference_features(adata_reference, par, logger) + query_data = get_query_features(adata, par, logger) + + # pynndescent does not support sparse matrices + if issparse(train_data): + warnings.warn("Converting sparse matrix to dense. This may consume a lot of memory.") + train_data = train_data.toarray() + + logger.debug(f"Shape of train data: {train_data.shape}") + + logger.info("Building NN index") + ref_nn_index = pynndescent.NNDescent(train_data, n_neighbors=par["n_neighbors"]) + ref_nn_index.prepare() + + ref_neighbors, ref_distances = ref_nn_index.query(query_data, k=par["n_neighbors"]) + + weights = distances_to_affinities(ref_distances) + + output_uns_parameters = adata.uns.get(par["output_uns_parameters"], {}) + + # for each annotation level, get prediction and uncertainty + + for obs_tar, obs_pred, obs_unc in zip(par["reference_obs_targets"], par["output_obs_predictions"], par["output_obs_uncertainty"]): + logger.info(f"Predicting labels for {obs_tar}") + ref_cats = adata_reference.obs[obs_tar].cat.codes.to_numpy()[ref_neighbors] + prediction, uncertainty = weighted_prediction(weights, ref_cats) + prediction = np.asarray(adata_reference.obs[obs_tar].cat.categories)[prediction] + + adata.obs[obs_pred], adata.obs[obs_unc] = prediction, uncertainty + + # Write information about labels transfer to uns + output_uns_parameters[obs_tar] = { + "method": "KNN_pynndescent", + "n_neighbors": par["n_neighbors"], + "reference": par["reference"] + } + + adata.uns[par["output_uns_parameters"]] = output_uns_parameters + + mdata.mod[par['modality']] = adata + mdata.update() + mdata.write_h5mu(par['output'].strip()) + +if __name__ == "__main__": + main(par) +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/labels_transfer_knn", + "tag" : "0.12.0" + }, + "label" : [ + "highmem", + "highcpu" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/labels_transfer/knn/nextflow.config b/target/nextflow/labels_transfer/knn/nextflow.config new file mode 100644 index 00000000000..59895a9eecd --- /dev/null +++ b/target/nextflow/labels_transfer/knn/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'knn' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Performs label transfer from reference to query using KNN classifier' + author = 'Vladimir Shitov' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/labels_transfer/knn/nextflow_params.yaml b/target/nextflow/labels_transfer/knn/nextflow_params.yaml new file mode 100644 index 00000000000..672161899c3 --- /dev/null +++ b/target/nextflow/labels_transfer/knn/nextflow_params.yaml @@ -0,0 +1,6 @@ +# Learning parameters +n_neighbors: # please fill in - example: 123 + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/labels_transfer/knn/nextflow_schema.json b/target/nextflow/labels_transfer/knn/nextflow_schema.json new file mode 100644 index 00000000000..2e268c2a852 --- /dev/null +++ b/target/nextflow/labels_transfer/knn/nextflow_schema.json @@ -0,0 +1,70 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "knn", +"description": "Performs label transfer from reference to query using KNN classifier", +"type": "object", +"definitions": { + + + + "learning parameters" : { + "title": "Learning parameters", + "type": "object", + "description": "No description", + "properties": { + + + "n_neighbors": { + "type": + "integer", + "description": "Type: `integer`, required. Number of nearest neighbors to use for classification", + "help_text": "Type: `integer`, required. Number of nearest neighbors to use for classification" + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/learning parameters" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/labels_transfer/knn/setup_logger.py b/target/nextflow/labels_transfer/knn/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/nextflow/labels_transfer/knn/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/nextflow/labels_transfer/xgboost/.config.vsh.yaml b/target/nextflow/labels_transfer/xgboost/.config.vsh.yaml new file mode 100644 index 00000000000..f1a68d5a4e8 --- /dev/null +++ b/target/nextflow/labels_transfer/xgboost/.config.vsh.yaml @@ -0,0 +1,594 @@ +functionality: + name: "xgboost" + namespace: "labels_transfer" + version: "0.12.4" + authors: + - name: "Vladimir Shitov" + roles: + - "author" + info: + role: "Contributor" + links: + email: "vladimir.shitov@helmholtz-muenchen.de" + github: "vladimirshitov" + orcid: "0000-0002-1960-8812" + linkedin: "vladimir-shitov-9a659513b" + organizations: + - name: "Helmholtz Munich" + href: "https://www.helmholtz-munich.de" + role: "PhD Candidate" + argument_groups: + - name: "Input dataset (query) arguments" + arguments: + - type: "file" + name: "--input" + description: "The query data to transfer the labels to. Should be a .h5mu file." + info: + label: "Query" + file_format: + type: "h5mu" + mod: + rna: + description: "Modality in AnnData format containing RNA data." + required: true + slots: + X: + type: "double" + name: "features" + required: false + description: "The expression data to use for the classifier's inference,\ + \ if `--input_obsm_features` argument is not provided.\n" + obsm: + - type: "double" + name: "features" + example: "X_integrated_scanvi" + required: false + description: "The embedding to use for the classifier's inference.\ + \ Override using the `--input_obsm_features` argument. If not\ + \ provided, the `.X` slot will be used instead.\nMake sure that\ + \ embedding was obtained in the same way as the reference embedding\ + \ (e.g. by the same model or preprocessing).\n" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + description: "Which modality to use." + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--input_obsm_features" + description: "The `.obsm` key of the embedding to use for the classifier's inference.\ + \ If not provided, the `.X` slot will be used instead.\nMake sure that embedding\ + \ was obtained in the same way as the reference embedding (e.g. by the same\ + \ model or preprocessing).\n" + info: null + example: + - "X_integrated_scanvi" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Reference dataset arguments" + arguments: + - type: "file" + name: "--reference" + description: "The reference data to train classifiers on." + info: + label: "Reference" + file_format: + type: "h5ad" + X: + type: "double" + name: "features" + required: false + description: "The expression data to use for the classifier's training,\ + \ if `--input_obsm_features` argument is not provided.\n" + obsm: + - type: "double" + name: "features" + example: "X_integrated_scanvi" + description: "The embedding to use for the classifier's training. Override\ + \ using the `--reference_obsm_features` argument.\nMake sure that embedding\ + \ was obtained in the same way as the query embedding (e.g. by the same\ + \ model or preprocessing).\n" + required: true + obs: + - type: "string" + name: "targets" + multiple: true + example: + - "ann_level_1" + - "ann_level_2" + - "ann_level_3" + - "ann_level_4" + - "ann_level_5" + - "ann_finest_level" + description: "The target labels to transfer. Override using the `--reference_obs_targets`\ + \ argument." + required: true + example: + - "https:/zenodo.org/record/6337966/files/HLCA_emb_and_metadata.h5ad" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--reference_obsm_features" + description: "The `.obsm` key of the embedding to use for the classifier's training.\n\ + Make sure that embedding was obtained in the same way as the query embedding\ + \ (e.g. by the same model or preprocessing).\n" + info: null + default: + - "X_integrated_scanvi" + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--reference_obs_targets" + description: "The `.obs` key of the target labels to tranfer." + info: null + default: + - "ann_level_1" + - "ann_level_2" + - "ann_level_3" + - "ann_level_4" + - "ann_level_5" + - "ann_finest_level" + required: false + direction: "input" + multiple: true + multiple_sep: "," + dest: "par" + - name: "Outputs" + arguments: + - type: "file" + name: "--output" + description: "The query data in .h5mu format with predicted labels transfered\ + \ from the reference." + info: + label: "Output data" + file_format: + type: "h5mu" + mod: + rna: + description: "Modality in AnnData format containing RNA data." + required: true + obs: + - type: "string" + name: "predictions" + description: "The predicted labels. Override using the `--output_obs_predictions`\ + \ argument." + required: true + - type: "double" + name: "uncertainty" + description: "The uncertainty of the predicted labels. Override using\ + \ the `--output_obs_uncertainty` argument." + required: false + obsm: + - type: "double" + name: "X_integrated_scanvi" + description: "The embedding used for the classifier's inference. Could\ + \ have any name, specified by `input_obsm_features` argument.\"" + required: false + uns: + - type: "string" + name: "parameters" + example: "labels_tranfer" + description: "Additional information about the parameters used for\ + \ the label transfer." + required: true + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_obs_predictions" + description: "In which `.obs` slots to store the predicted information.\nIf\ + \ provided, must have the same length as `--reference_obs_targets`.\nIf empty,\ + \ will default to the `reference_obs_targets` combined with the `\"_pred\"\ + ` suffix.\n" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_obs_uncertainty" + description: "In which `.obs` slots to store the uncertainty of the predictions.\n\ + If provided, must have the same length as `--reference_obs_targets`.\nIf empty,\ + \ will default to the `reference_obs_targets` combined with the `\"_uncertainty\"\ + ` suffix.\n" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_uns_parameters" + description: "The `.uns` key to store additional information about the parameters\ + \ used for the label transfer." + info: null + default: + - "labels_transfer" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Execution arguments" + arguments: + - type: "boolean_true" + name: "--force_retrain" + alternatives: + - "-f" + description: "Retrain models on the reference even if model_output directory\ + \ already has trained classifiers. WARNING! It will rewrite existing classifiers\ + \ for targets in the model_output directory!" + info: null + direction: "input" + dest: "par" + - type: "boolean" + name: "--use_gpu" + description: "Use GPU during models training and inference (recommended)." + info: null + default: + - false + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--verbosity" + alternatives: + - "-v" + description: "The verbosity level for evaluation of the classifier from the\ + \ range [0,2]" + info: null + default: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--model_output" + description: "Output directory for model" + info: null + default: + - "model" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Learning parameters" + arguments: + - type: "double" + name: "--learning_rate" + alternatives: + - "--eta" + description: "Step size shrinkage used in update to prevents overfitting. Range:\ + \ [0,1]. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ + \ for the reference" + info: null + default: + - 0.3 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--min_split_loss" + alternatives: + - "--gamma" + description: "Minimum loss reduction required to make a further partition on\ + \ a leaf node of the tree. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ + \ for the reference" + info: null + default: + - 0.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--max_depth" + alternatives: + - "-d" + description: "Maximum depth of a tree. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ + \ for the reference" + info: null + default: + - 6 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--min_child_weight" + description: "Minimum sum of instance weight (hessian) needed in a child. See\ + \ https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ + \ for the reference" + info: null + default: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--max_delta_step" + description: "Maximum delta step we allow each leaf output to be. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ + \ for the reference" + info: null + default: + - 0.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--subsample" + description: "Subsample ratio of the training instances. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ + \ for the reference" + info: null + default: + - 1.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--sampling_method" + description: "The method to use to sample the training instances. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ + \ for the reference" + info: null + default: + - "uniform" + required: false + choices: + - "uniform" + - "gradient_based" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--colsample_bytree" + description: "Fraction of columns to be subsampled. Range (0, 1]. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ + \ for the reference" + info: null + default: + - 1.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--colsample_bylevel" + description: "Subsample ratio of columns for each level. Range (0, 1]. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ + \ for the reference" + info: null + default: + - 1.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--colsample_bynode" + description: "Subsample ratio of columns for each node (split). Range (0, 1].\ + \ See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ + \ for the reference" + info: null + default: + - 1.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--reg_lambda" + alternatives: + - "--lambda" + description: "L2 regularization term on weights. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ + \ for the reference" + info: null + default: + - 1.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--reg_alpha" + alternatives: + - "--alpha" + description: "L1 regularization term on weights. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ + \ for the reference" + info: null + default: + - 0.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--scale_pos_weight" + description: "Control the balance of positive and negative weights, useful for\ + \ unbalanced classes. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster\ + \ for the reference" + info: null + default: + - 1.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "../utils/helper.py" + - type: "file" + path: "src/utils/setup_logger.py" + description: "Performs label transfer from reference to query using XGBoost classifier" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/annotation_test_data/" + - type: "file" + path: "resources_test/pbmc_1k_protein_v3/" + info: + method_id: "XGBClassifier" + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + - "git" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + upgrade: true + - type: "apt" + packages: + - "libopenblas-dev" + - "liblapack-dev" + - "gfortran" + interactive: false + - type: "python" + user: false + packages: + - "scanpy~=1.9.5" + - "xgboost~=1.7.1" + - "scikit-learn~=1.1.1" + - "numpy~=1.23.5" + - "pandas~=1.4.4" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highmem" + - "highcpu" + - "gpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +- type: "native" + id: "native" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/labels_transfer/xgboost/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/labels_transfer/xgboost" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/labels_transfer/xgboost/xgboost" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/labels_transfer/xgboost/helper.py b/target/nextflow/labels_transfer/xgboost/helper.py new file mode 100644 index 00000000000..a90bf59efdb --- /dev/null +++ b/target/nextflow/labels_transfer/xgboost/helper.py @@ -0,0 +1,32 @@ +def check_arguments(par): + # check output .obs predictions + if not par["output_obs_predictions"]: + par["output_obs_predictions"] = [ t + "_pred" for t in par["reference_obs_targets"]] + assert len(par["output_obs_predictions"]) == len(par["reference_obs_targets"]), f"Number of output_obs_predictions must match number of reference_obs_targets\npar: {par}" + + # check output .obs uncertainty + if not par["output_obs_uncertainty"]: + par["output_obs_uncertainty"] = [ t + "_uncertainty" for t in par["reference_obs_targets"]] + assert len(par["output_obs_uncertainty"]) == len(par["reference_obs_targets"]), f"Number of output_obs_uncertainty must match number of reference_obs_targets\npar: {par}" + + return par + +def get_reference_features(adata_reference, par, logger): + if par["reference_obsm_features"] is None: + logger.info("Using .X of reference data") + train_data = adata_reference.X + else: + logger.info(f"Using .obsm[{par['reference_obsm_features']}] of reference data") + train_data = adata_reference.obsm[par["reference_obsm_features"]] + + return train_data + +def get_query_features(adata, par, logger): + if par["input_obsm_features"] is None: + logger.info("Using .X of query data") + query_data = adata.X + else: + logger.info(f"Using .obsm[{par['input_obsm_features']}] of query data") + query_data = adata.obsm[par["input_obsm_features"]] + + return query_data \ No newline at end of file diff --git a/target/nextflow/labels_transfer/xgboost/main.nf b/target/nextflow/labels_transfer/xgboost/main.nf new file mode 100644 index 00000000000..f2a13f82c74 --- /dev/null +++ b/target/nextflow/labels_transfer/xgboost/main.nf @@ -0,0 +1,3422 @@ +// xgboost 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Vladimir Shitov (author) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "xgboost", + "namespace" : "labels_transfer", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Vladimir Shitov", + "roles" : [ + "author" + ], + "info" : { + "role" : "Contributor", + "links" : { + "email" : "vladimir.shitov@helmholtz-muenchen.de", + "github" : "vladimirshitov", + "orcid" : "0000-0002-1960-8812", + "linkedin" : "vladimir-shitov-9a659513b" + }, + "organizations" : [ + { + "name" : "Helmholtz Munich", + "href" : "https://www.helmholtz-munich.de", + "role" : "PhD Candidate" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Input dataset (query) arguments", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "description" : "The query data to transfer the labels to. Should be a .h5mu file.", + "info" : { + "label" : "Query", + "file_format" : { + "type" : "h5mu", + "mod" : { + "rna" : { + "description" : "Modality in AnnData format containing RNA data.", + "required" : true, + "slots" : { + "X" : { + "type" : "double", + "name" : "features", + "required" : false, + "description" : "The expression data to use for the classifier's inference, if `--input_obsm_features` argument is not provided.\n" + }, + "obsm" : [ + { + "type" : "double", + "name" : "features", + "example" : "X_integrated_scanvi", + "required" : false, + "description" : "The embedding to use for the classifier's inference. Override using the `--input_obsm_features` argument. If not provided, the `.X` slot will be used instead.\nMake sure that embedding was obtained in the same way as the reference embedding (e.g. by the same model or preprocessing).\n" + } + ] + } + } + } + } + }, + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--modality", + "description" : "Which modality to use.", + "default" : [ + "rna" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--input_obsm_features", + "description" : "The `.obsm` key of the embedding to use for the classifier's inference. If not provided, the `.X` slot will be used instead.\nMake sure that embedding was obtained in the same way as the reference embedding (e.g. by the same model or preprocessing).\n", + "example" : [ + "X_integrated_scanvi" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Reference dataset arguments", + "arguments" : [ + { + "type" : "file", + "name" : "--reference", + "description" : "The reference data to train classifiers on.", + "info" : { + "label" : "Reference", + "file_format" : { + "type" : "h5ad", + "X" : { + "type" : "double", + "name" : "features", + "required" : false, + "description" : "The expression data to use for the classifier's training, if `--input_obsm_features` argument is not provided.\n" + }, + "obsm" : [ + { + "type" : "double", + "name" : "features", + "example" : "X_integrated_scanvi", + "description" : "The embedding to use for the classifier's training. Override using the `--reference_obsm_features` argument.\nMake sure that embedding was obtained in the same way as the query embedding (e.g. by the same model or preprocessing).\n", + "required" : true + } + ], + "obs" : [ + { + "type" : "string", + "name" : "targets", + "multiple" : true, + "example" : [ + "ann_level_1", + "ann_level_2", + "ann_level_3", + "ann_level_4", + "ann_level_5", + "ann_finest_level" + ], + "description" : "The target labels to transfer. Override using the `--reference_obs_targets` argument.", + "required" : true + } + ] + } + }, + "example" : [ + "https:/zenodo.org/record/6337966/files/HLCA_emb_and_metadata.h5ad" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--reference_obsm_features", + "description" : "The `.obsm` key of the embedding to use for the classifier's training.\nMake sure that embedding was obtained in the same way as the query embedding (e.g. by the same model or preprocessing).\n", + "default" : [ + "X_integrated_scanvi" + ], + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--reference_obs_targets", + "description" : "The `.obs` key of the target labels to tranfer.", + "default" : [ + "ann_level_1", + "ann_level_2", + "ann_level_3", + "ann_level_4", + "ann_level_5", + "ann_finest_level" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ",", + "dest" : "par" + } + ] + }, + { + "name" : "Outputs", + "arguments" : [ + { + "type" : "file", + "name" : "--output", + "description" : "The query data in .h5mu format with predicted labels transfered from the reference.", + "info" : { + "label" : "Output data", + "file_format" : { + "type" : "h5mu", + "mod" : { + "rna" : { + "description" : "Modality in AnnData format containing RNA data.", + "required" : true, + "obs" : [ + { + "type" : "string", + "name" : "predictions", + "description" : "The predicted labels. Override using the `--output_obs_predictions` argument.", + "required" : true + }, + { + "type" : "double", + "name" : "uncertainty", + "description" : "The uncertainty of the predicted labels. Override using the `--output_obs_uncertainty` argument.", + "required" : false + } + ], + "obsm" : [ + { + "type" : "double", + "name" : "X_integrated_scanvi", + "description" : "The embedding used for the classifier's inference. Could have any name, specified by `input_obsm_features` argument.\\"", + "required" : false + } + ], + "uns" : [ + { + "type" : "string", + "name" : "parameters", + "example" : "labels_tranfer", + "description" : "Additional information about the parameters used for the label transfer.", + "required" : true + } + ] + } + } + } + }, + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_obs_predictions", + "description" : "In which `.obs` slots to store the predicted information.\nIf provided, must have the same length as `--reference_obs_targets`.\nIf empty, will default to the `reference_obs_targets` combined with the `\\"_pred\\"` suffix.\n", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_obs_uncertainty", + "description" : "In which `.obs` slots to store the uncertainty of the predictions.\nIf provided, must have the same length as `--reference_obs_targets`.\nIf empty, will default to the `reference_obs_targets` combined with the `\\"_uncertainty\\"` suffix.\n", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_uns_parameters", + "description" : "The `.uns` key to store additional information about the parameters used for the label transfer.", + "default" : [ + "labels_transfer" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Execution arguments", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--force_retrain", + "alternatives" : [ + "-f" + ], + "description" : "Retrain models on the reference even if model_output directory already has trained classifiers. WARNING! It will rewrite existing classifiers for targets in the model_output directory!", + "direction" : "input", + "dest" : "par" + }, + { + "type" : "boolean", + "name" : "--use_gpu", + "description" : "Use GPU during models training and inference (recommended).", + "default" : [ + false + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--verbosity", + "alternatives" : [ + "-v" + ], + "description" : "The verbosity level for evaluation of the classifier from the range [0,2]", + "default" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--model_output", + "description" : "Output directory for model", + "default" : [ + "model" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Learning parameters", + "arguments" : [ + { + "type" : "double", + "name" : "--learning_rate", + "alternatives" : [ + "--eta" + ], + "description" : "Step size shrinkage used in update to prevents overfitting. Range: [0,1]. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster for the reference", + "default" : [ + 0.3 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--min_split_loss", + "alternatives" : [ + "--gamma" + ], + "description" : "Minimum loss reduction required to make a further partition on a leaf node of the tree. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster for the reference", + "default" : [ + 0.0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--max_depth", + "alternatives" : [ + "-d" + ], + "description" : "Maximum depth of a tree. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster for the reference", + "default" : [ + 6 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--min_child_weight", + "description" : "Minimum sum of instance weight (hessian) needed in a child. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster for the reference", + "default" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--max_delta_step", + "description" : "Maximum delta step we allow each leaf output to be. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster for the reference", + "default" : [ + 0.0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--subsample", + "description" : "Subsample ratio of the training instances. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster for the reference", + "default" : [ + 1.0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--sampling_method", + "description" : "The method to use to sample the training instances. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster for the reference", + "default" : [ + "uniform" + ], + "required" : false, + "choices" : [ + "uniform", + "gradient_based" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--colsample_bytree", + "description" : "Fraction of columns to be subsampled. Range (0, 1]. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster for the reference", + "default" : [ + 1.0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--colsample_bylevel", + "description" : "Subsample ratio of columns for each level. Range (0, 1]. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster for the reference", + "default" : [ + 1.0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--colsample_bynode", + "description" : "Subsample ratio of columns for each node (split). Range (0, 1]. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster for the reference", + "default" : [ + 1.0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--reg_lambda", + "alternatives" : [ + "--lambda" + ], + "description" : "L2 regularization term on weights. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster for the reference", + "default" : [ + 1.0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--reg_alpha", + "alternatives" : [ + "--alpha" + ], + "description" : "L1 regularization term on weights. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster for the reference", + "default" : [ + 0.0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--scale_pos_weight", + "description" : "Control the balance of positive and negative weights, useful for unbalanced classes. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster for the reference", + "default" : [ + 1.0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/labels_transfer/xgboost/" + }, + { + "type" : "file", + "path" : "../utils/helper.py", + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/labels_transfer/xgboost/" + }, + { + "type" : "file", + "path" : "src/utils/setup_logger.py", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "description" : "Performs label transfer from reference to query using XGBoost classifier", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/labels_transfer/xgboost/" + }, + { + "type" : "file", + "path" : "resources_test/annotation_test_data/", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + }, + { + "type" : "file", + "path" : "resources_test/pbmc_1k_protein_v3/", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "info" : { + "method_id" : "XGBClassifier" + }, + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.10-slim", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "procps", + "git" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "mudata~=0.2.3", + "anndata~=0.9.1" + ], + "upgrade" : true + }, + { + "type" : "apt", + "packages" : [ + "libopenblas-dev", + "liblapack-dev", + "gfortran" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "scanpy~=1.9.5", + "xgboost~=1.7.1", + "scikit-learn~=1.1.1", + "numpy~=1.23.5", + "pandas~=1.4.4" + ], + "upgrade" : true + } + ], + "test_setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "viashpy==0.5.0" + ], + "upgrade" : true + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "highmem", + "highcpu", + "gpu" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + }, + { + "type" : "native", + "id" : "native" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/labels_transfer/xgboost/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/labels_transfer/xgboost", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +import sys +import json +import os +from typing import Optional +import yaml +from pathlib import Path + +import mudata +import numpy as np +import scanpy as sc +import pandas as pd +import xgboost as xgb +from sklearn.model_selection import train_test_split +from sklearn.metrics import classification_report +from sklearn.preprocessing import LabelEncoder + + +### VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'input_obsm_features': $( if [ ! -z ${VIASH_PAR_INPUT_OBSM_FEATURES+x} ]; then echo "r'${VIASH_PAR_INPUT_OBSM_FEATURES//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'reference': $( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "r'${VIASH_PAR_REFERENCE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'reference_obsm_features': $( if [ ! -z ${VIASH_PAR_REFERENCE_OBSM_FEATURES+x} ]; then echo "r'${VIASH_PAR_REFERENCE_OBSM_FEATURES//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'reference_obs_targets': $( if [ ! -z ${VIASH_PAR_REFERENCE_OBS_TARGETS+x} ]; then echo "r'${VIASH_PAR_REFERENCE_OBS_TARGETS//\\'/\\'\\"\\'\\"r\\'}'.split(',')"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_obs_predictions': $( if [ ! -z ${VIASH_PAR_OUTPUT_OBS_PREDICTIONS+x} ]; then echo "r'${VIASH_PAR_OUTPUT_OBS_PREDICTIONS//\\'/\\'\\"\\'\\"r\\'}'.split(':')"; else echo None; fi ), + 'output_obs_uncertainty': $( if [ ! -z ${VIASH_PAR_OUTPUT_OBS_UNCERTAINTY+x} ]; then echo "r'${VIASH_PAR_OUTPUT_OBS_UNCERTAINTY//\\'/\\'\\"\\'\\"r\\'}'.split(':')"; else echo None; fi ), + 'output_uns_parameters': $( if [ ! -z ${VIASH_PAR_OUTPUT_UNS_PARAMETERS+x} ]; then echo "r'${VIASH_PAR_OUTPUT_UNS_PARAMETERS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'force_retrain': $( if [ ! -z ${VIASH_PAR_FORCE_RETRAIN+x} ]; then echo "r'${VIASH_PAR_FORCE_RETRAIN//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), + 'use_gpu': $( if [ ! -z ${VIASH_PAR_USE_GPU+x} ]; then echo "r'${VIASH_PAR_USE_GPU//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), + 'verbosity': $( if [ ! -z ${VIASH_PAR_VERBOSITY+x} ]; then echo "int(r'${VIASH_PAR_VERBOSITY//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'model_output': $( if [ ! -z ${VIASH_PAR_MODEL_OUTPUT+x} ]; then echo "r'${VIASH_PAR_MODEL_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'learning_rate': $( if [ ! -z ${VIASH_PAR_LEARNING_RATE+x} ]; then echo "float(r'${VIASH_PAR_LEARNING_RATE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'min_split_loss': $( if [ ! -z ${VIASH_PAR_MIN_SPLIT_LOSS+x} ]; then echo "float(r'${VIASH_PAR_MIN_SPLIT_LOSS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'max_depth': $( if [ ! -z ${VIASH_PAR_MAX_DEPTH+x} ]; then echo "int(r'${VIASH_PAR_MAX_DEPTH//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'min_child_weight': $( if [ ! -z ${VIASH_PAR_MIN_CHILD_WEIGHT+x} ]; then echo "int(r'${VIASH_PAR_MIN_CHILD_WEIGHT//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'max_delta_step': $( if [ ! -z ${VIASH_PAR_MAX_DELTA_STEP+x} ]; then echo "float(r'${VIASH_PAR_MAX_DELTA_STEP//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'subsample': $( if [ ! -z ${VIASH_PAR_SUBSAMPLE+x} ]; then echo "float(r'${VIASH_PAR_SUBSAMPLE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'sampling_method': $( if [ ! -z ${VIASH_PAR_SAMPLING_METHOD+x} ]; then echo "r'${VIASH_PAR_SAMPLING_METHOD//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'colsample_bytree': $( if [ ! -z ${VIASH_PAR_COLSAMPLE_BYTREE+x} ]; then echo "float(r'${VIASH_PAR_COLSAMPLE_BYTREE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'colsample_bylevel': $( if [ ! -z ${VIASH_PAR_COLSAMPLE_BYLEVEL+x} ]; then echo "float(r'${VIASH_PAR_COLSAMPLE_BYLEVEL//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'colsample_bynode': $( if [ ! -z ${VIASH_PAR_COLSAMPLE_BYNODE+x} ]; then echo "float(r'${VIASH_PAR_COLSAMPLE_BYNODE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'reg_lambda': $( if [ ! -z ${VIASH_PAR_REG_LAMBDA+x} ]; then echo "float(r'${VIASH_PAR_REG_LAMBDA//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'reg_alpha': $( if [ ! -z ${VIASH_PAR_REG_ALPHA+x} ]; then echo "float(r'${VIASH_PAR_REG_ALPHA//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'scale_pos_weight': $( if [ ! -z ${VIASH_PAR_SCALE_POS_WEIGHT+x} ]; then echo "float(r'${VIASH_PAR_SCALE_POS_WEIGHT//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +### VIASH END + +sys.path.append(meta["resources_dir"]) +from helper import check_arguments, get_reference_features, get_query_features +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +# read config arguments +config = yaml.safe_load(Path(meta["config"]).read_text()) + +# look for training params for method +argument_groups = { grp["name"]: grp["arguments"] for grp in config["functionality"]["argument_groups"] } +training_arg_names = [ arg["name"].replace("--", "") for arg in argument_groups["Learning parameters"] ] +training_params = { arg_name: par[arg_name] for arg_name in training_arg_names } + +def encode_labels(y): + labels_encoder = LabelEncoder() + labels_encoder.fit(y) + + return labels_encoder.transform(y), labels_encoder + + +def get_model_eval(xgb_model, X_test, y_test, labels_encoder): + preds = xgb_model.predict(X_test) + + cr = classification_report(labels_encoder.inverse_transform(y_test), + labels_encoder.inverse_transform(preds), + output_dict=True) + cr_df = pd.DataFrame(cr).transpose() + + return cr_df + + +def train_test_split_adata(adata, labels): + train_data = pd.DataFrame(data=adata.X, index=adata.obs_names) + + X_train, X_test, y_train, y_test = train_test_split( + train_data, labels, test_size=0.2, random_state=42, stratify=labels) + + return X_train, X_test, y_train, y_test + + +def train_xgb_model(X_train, y_train, gpu=True) -> xgb.XGBClassifier: + n_classes = len(np.unique(y_train)) + objective = "binary:logistic" if n_classes == 2 else "multi:softprob" + + tree_method = "gpu_hist" if gpu else "hist" + xgbc = xgb.XGBClassifier(tree_method=tree_method, objective=objective, **training_params) + xgbc.fit(X_train, y_train) + + return xgbc + + +def build_classifier(X, y, labels_encoder, label_key, eval_verbosity: Optional[int] = 1, gpu=True) -> xgb.XGBClassifier: + # Adata prep + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y) + #Note: Do we need a new train-test split for each classifier? + + # Model training + xgb_model = train_xgb_model(X_train, y_train, gpu=gpu) + + # Model eval + if eval_verbosity != 0: + cr_df = get_model_eval(xgb_model, X_test, y_test, labels_encoder) + + if eval_verbosity == 2: + print(cr_df) + + else: + overall_accuracy = cr_df["support"]["accuracy"] + low_prec_key = cr_df.precision.idxmin() + low_prec_val = cr_df.precision.min() + low_rec_key = cr_df.recall.idxmin() + low_rec_val = cr_df.recall.min() + low_f1_key = cr_df["f1-score"].idxmin() + low_f1_val = cr_df["f1-score"].min() + + print("") + print(f"Summary stats for {label_key} model:") + print(f"Overall accuracy: {overall_accuracy}") + print(f"Min. precision: {low_prec_key}: {low_prec_val}") + print(f"Min. Recall: {low_rec_key}: {low_rec_val}") + print(f"Min. F1-score: {low_f1_key}: {low_f1_val}") + print("") + + return xgb_model + + +def build_ref_classifiers(adata_reference, targets, model_path, + eval_verbosity: Optional[int] = 1, gpu: Optional[bool] = True) -> None: + """ + This function builds xgboost classifiers on a reference embedding for a designated number of + adata_reference.obs columns. Classifier .xgb files and a model_info.json file is written to the \\`model_path\\` + directory. Model evaluation is printed to stdout. + + Inputs: + * \\`adata_reference\\`: The AnnData object that was used to train the reference model + * \\`model_path\\`: The reference model directory where the classifiers will also be stored + * \\`eval_verbosity\\`: The verbosity level for evaluation of the classifier from the range [0;2]. + * \\`gpu\\`: Boolean indicating whether a gpu is available for classifier training + + + Example: + \\`\\`\\` + >>> adata + AnnData object with n_obs x n_vars = 700 x 765 + obs: "ann_finest_level", "ann_level_1" + + >>> os.listdir("/path/to/model") + model_params.pt* + + >>> build_ref_classifiers(adata, "path/to/model", eval_verbosity=1, gpu=True) + >>> os.listdir("/path/to/model") + classifier_ann_finest_level.xgb* model_info.json* + classifier_ann_level_1.xgb* model_params.pt* + \\`\\`\\` + """ + + # Check inputs + if not isinstance(eval_verbosity, int): + raise TypeError("\\`eval_verbosity\\` should be an integer between 0 and 2.") + + if eval_verbosity < 0 or eval_verbosity > 2: + raise ValueError("\\`eval_verbosity\\` should be an integer between 0 and 2.") + + train_data = get_reference_features(adata_reference, par, logger) + + if not os.path.exists(model_path): + os.makedirs(model_path, exist_ok=True) + + # Map from name of classifier to file names + classifiers = dict() + + for label, obs_pred in zip(targets, par["output_obs_predictions"]): + if label not in adata_reference.obs: + raise ValueError(f"{label} is not in the \\`adata\\` object passed!") + + filename = "classifier_" + label + ".xgb" + + labels, labels_encoder = encode_labels(adata_reference.obs[label]) + logger.info(f"Classes: {labels_encoder.classes_}") + + logger.info(f"Building classifier for {label}...") + xgb_model = build_classifier( + X=train_data, + y=labels, + labels_encoder=labels_encoder, + label_key=label, + eval_verbosity=eval_verbosity, + gpu=gpu + ) + + # Save classifier + logger.info("Saving model") + xgb_model.save_model(os.path.join(model_path, filename)) + + # Store classifier info + classifiers[label] = { + "filename": filename, + "labels": labels_encoder.classes_.tolist(), + "obs_column": obs_pred, + "model_params": training_params, + } + + # Store model_info.json file + model_info = { + "classifier_info": classifiers + } + + logger.info("Writing model_info to the file") + # Read previous file if it exists + if os.path.exists(model_path + "/model_info.json"): + logger.info("Old model_info file found, updating") + with open(model_path + "/model_info.json", "r") as f: + old_model_info = json.loads(f.read()) + + for key in old_model_info: + if key in model_info: + old_model_info[key].update(model_info[key]) + json_string = json.dumps(old_model_info, indent=4) + + else: + logger.info("Creating a new file") + json_string = json.dumps(model_info, indent=4) + + with open(model_path + "/model_info.json", "w") as f: + f.write(json_string) + + +def project_labels( + query_dataset, + cell_type_classifier_model: xgb.XGBClassifier, + annotation_column_name='label_pred', + uncertainty_column_name='label_uncertainty', + uncertainty_thresh=None # Note: currently not passed to predict function +): + """ + A function that projects predicted labels onto the query dataset, along with uncertainty scores. + Performs in-place update of the adata object, adding columns to the \\`obs\\` DataFrame. + + Input: + * \\`query_dataset\\`: The query \\`AnnData\\` object + * \\`model_file\\`: Path to the classification model file + * \\`prediction_key\\`: Column name in \\`adata.obs\\` where to store the predicted labels + * \\`uncertainty_key\\`: Column name in \\`adata.obs\\` where to store the uncertainty scores + * \\`uncertainty_thresh\\`: The uncertainty threshold above which we call a cell 'Unknown' + + Output: + Nothing is output, the passed anndata is modified inplace + + """ + + if (uncertainty_thresh is not None) and (uncertainty_thresh < 0 or uncertainty_thresh > 1): + raise ValueError(f'\\`uncertainty_thresh\\` must be \\`None\\` or between 0 and 1.') + + query_data = get_query_features(query_dataset, par, logger) + + # Predict labels and probabilities + query_dataset.obs[annotation_column_name] = cell_type_classifier_model.predict(query_data) + + logger.info("Predicting probabilities") + probs = cell_type_classifier_model.predict_proba(query_data) + + # Format probabilities + df_probs = pd.DataFrame(probs, columns=cell_type_classifier_model.classes_, index=query_dataset.obs_names) + query_dataset.obs[uncertainty_column_name] = 1 - df_probs.max(1) + + # Note: this is here in case we want to propose a set of values for the user to accept to seed the + # manual curation of predicted labels + if uncertainty_thresh is not None: + logger.info("Marking uncertain predictions") + query_dataset.obs[annotation_column_name + "_filtered"] = [ + val if query_dataset.obs[uncertainty_column_name][i] < uncertainty_thresh + else "Unknown" for i, val in enumerate(query_dataset.obs[annotation_column_name])] + + return query_dataset + + +def predict( + query_dataset, + cell_type_classifier_model_path, + annotation_column_name: str, + prediction_column_name: str, + uncertainty_column_name: str, + models_info, + use_gpu: bool = False +) -> pd.DataFrame: + """ + Returns \\`obs\\` DataFrame with prediction columns appended + """ + + tree_method = "gpu_hist" if use_gpu else "hist" + + labels = models_info["classifier_info"][annotation_column_name]["labels"] + + objective = "binary:logistic" if len(labels) == 2 else "multi:softprob" + cell_type_classifier_model = xgb.XGBClassifier(tree_method=tree_method, objective=objective) + + logger.info("Loading model") + cell_type_classifier_model.load_model(fname=cell_type_classifier_model_path) + + logger.info("Predicting labels") + project_labels(query_dataset, + cell_type_classifier_model, + annotation_column_name=prediction_column_name, + uncertainty_column_name=uncertainty_column_name) + + logger.info("Converting labels from numbers to classes") + labels_encoder = LabelEncoder() + labels_encoder.classes_ = np.array(labels) + query_dataset.obs[prediction_column_name] = labels_encoder.inverse_transform(query_dataset.obs[prediction_column_name]) + + return query_dataset + + +def main(par): + logger.info("Checking arguments") + par = check_arguments(par) + + mdata = mudata.read(par["input"].strip()) + adata = mdata.mod[par["modality"]] + + adata_reference = sc.read(par["reference"], backup_url=par["reference"]) + + # If classifiers for targets are in the model_output directory, simply open them and run (unless \\`retrain\\` != True) + # If some classifiers are missing, train and save them first + # Predict and save the query data + + targets_to_train = [] + + for obs_target in par["reference_obs_targets"]: + if not os.path.exists(par["model_output"]) or f"classifier_{obs_target}.xgb" not in os.listdir(par["model_output"]) or par["force_retrain"]: + logger.info(f"Classifier for {obs_target} added to a training schedule") + targets_to_train.append(obs_target) + else: + logger.info(f"Found classifier for {obs_target}, no retraining required") + + build_ref_classifiers(adata_reference, targets_to_train, model_path=par["model_output"], + gpu=par["use_gpu"], eval_verbosity=par["verbosity"]) + + output_uns_parameters = adata.uns.get(par["output_uns_parameters"], {}) + + with open(par["model_output"] + "/model_info.json", "r") as f: + models_info = json.loads(f.read()) + + for obs_target, obs_pred, obs_unc in zip(par["reference_obs_targets"], par["output_obs_predictions"], par["output_obs_uncertainty"]): + logger.info(f"Predicting {obs_target}") + + adata = predict(query_dataset=adata, + cell_type_classifier_model_path=os.path.join(par["model_output"], "classifier_" + obs_target + ".xgb"), + annotation_column_name=obs_target, + prediction_column_name=obs_pred, + uncertainty_column_name=obs_unc, + models_info=models_info, + use_gpu=par["use_gpu"]) + + if obs_target in targets_to_train: + # Save information about the transfer to .uns + output_uns_parameters[obs_target] = { + "method": "XGBClassifier", + **training_params + } + + adata.uns[par["output_uns_parameters"]] = output_uns_parameters + + logger.info("Updating mdata") + mdata.mod[par['modality']] = adata + mdata.update() + + logger.info("Writing output") + mdata.write_h5mu(par['output'].strip()) + +if __name__ == "__main__": + main(par) +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/labels_transfer_xgboost", + "tag" : "0.12.0" + }, + "label" : [ + "highmem", + "highcpu", + "gpu" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/labels_transfer/xgboost/nextflow.config b/target/nextflow/labels_transfer/xgboost/nextflow.config new file mode 100644 index 00000000000..30a52321bbc --- /dev/null +++ b/target/nextflow/labels_transfer/xgboost/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'xgboost' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Performs label transfer from reference to query using XGBoost classifier' + author = 'Vladimir Shitov' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/labels_transfer/xgboost/nextflow_params.yaml b/target/nextflow/labels_transfer/xgboost/nextflow_params.yaml new file mode 100644 index 00000000000..b93b2716c7b --- /dev/null +++ b/target/nextflow/labels_transfer/xgboost/nextflow_params.yaml @@ -0,0 +1,24 @@ +# Execution arguments +force_retrain: false +use_gpu: false +verbosity: 1 +# model_output: "$id.$key.model_output.model_output" + +# Learning parameters +learning_rate: 0.3 +min_split_loss: 0 +max_depth: 6 +min_child_weight: 1 +max_delta_step: 0 +subsample: 1 +sampling_method: "uniform" +colsample_bytree: 1 +colsample_bylevel: 1 +colsample_bynode: 1 +reg_lambda: 1 +reg_alpha: 0 +scale_pos_weight: 1 + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/labels_transfer/xgboost/nextflow_schema.json b/target/nextflow/labels_transfer/xgboost/nextflow_schema.json new file mode 100644 index 00000000000..a5eb170bb64 --- /dev/null +++ b/target/nextflow/labels_transfer/xgboost/nextflow_schema.json @@ -0,0 +1,263 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "xgboost", +"description": "Performs label transfer from reference to query using XGBoost classifier", +"type": "object", +"definitions": { + + + + "execution arguments" : { + "title": "Execution arguments", + "type": "object", + "description": "No description", + "properties": { + + + "force_retrain": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Retrain models on the reference even if model_output directory already has trained classifiers", + "help_text": "Type: `boolean_true`, default: `false`. Retrain models on the reference even if model_output directory already has trained classifiers. WARNING! It will rewrite existing classifiers for targets in the model_output directory!" + , + "default": "False" + } + + + , + "use_gpu": { + "type": + "boolean", + "description": "Type: `boolean`, default: `false`. Use GPU during models training and inference (recommended)", + "help_text": "Type: `boolean`, default: `false`. Use GPU during models training and inference (recommended)." + , + "default": "False" + } + + + , + "verbosity": { + "type": + "integer", + "description": "Type: `integer`, default: `1`. The verbosity level for evaluation of the classifier from the range [0,2]", + "help_text": "Type: `integer`, default: `1`. The verbosity level for evaluation of the classifier from the range [0,2]" + , + "default": "1" + } + + + , + "model_output": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.model_output.model_output`. Output directory for model", + "help_text": "Type: `file`, default: `$id.$key.model_output.model_output`. Output directory for model" + , + "default": "$id.$key.model_output.model_output" + } + + +} +}, + + + "learning parameters" : { + "title": "Learning parameters", + "type": "object", + "description": "No description", + "properties": { + + + "learning_rate": { + "type": + "number", + "description": "Type: `double`, default: `0.3`. Step size shrinkage used in update to prevents overfitting", + "help_text": "Type: `double`, default: `0.3`. Step size shrinkage used in update to prevents overfitting. Range: [0,1]. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster for the reference" + , + "default": "0.3" + } + + + , + "min_split_loss": { + "type": + "number", + "description": "Type: `double`, default: `0`. Minimum loss reduction required to make a further partition on a leaf node of the tree", + "help_text": "Type: `double`, default: `0`. Minimum loss reduction required to make a further partition on a leaf node of the tree. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster for the reference" + , + "default": "0" + } + + + , + "max_depth": { + "type": + "integer", + "description": "Type: `integer`, default: `6`. Maximum depth of a tree", + "help_text": "Type: `integer`, default: `6`. Maximum depth of a tree. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster for the reference" + , + "default": "6" + } + + + , + "min_child_weight": { + "type": + "integer", + "description": "Type: `integer`, default: `1`. Minimum sum of instance weight (hessian) needed in a child", + "help_text": "Type: `integer`, default: `1`. Minimum sum of instance weight (hessian) needed in a child. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster for the reference" + , + "default": "1" + } + + + , + "max_delta_step": { + "type": + "number", + "description": "Type: `double`, default: `0`. Maximum delta step we allow each leaf output to be", + "help_text": "Type: `double`, default: `0`. Maximum delta step we allow each leaf output to be. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster for the reference" + , + "default": "0" + } + + + , + "subsample": { + "type": + "number", + "description": "Type: `double`, default: `1`. Subsample ratio of the training instances", + "help_text": "Type: `double`, default: `1`. Subsample ratio of the training instances. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster for the reference" + , + "default": "1" + } + + + , + "sampling_method": { + "type": + "string", + "description": "Type: `string`, default: `uniform`, choices: ``uniform`, `gradient_based``. The method to use to sample the training instances", + "help_text": "Type: `string`, default: `uniform`, choices: ``uniform`, `gradient_based``. The method to use to sample the training instances. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster for the reference", + "enum": ["uniform", "gradient_based"] + + , + "default": "uniform" + } + + + , + "colsample_bytree": { + "type": + "number", + "description": "Type: `double`, default: `1`. Fraction of columns to be subsampled", + "help_text": "Type: `double`, default: `1`. Fraction of columns to be subsampled. Range (0, 1]. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster for the reference" + , + "default": "1" + } + + + , + "colsample_bylevel": { + "type": + "number", + "description": "Type: `double`, default: `1`. Subsample ratio of columns for each level", + "help_text": "Type: `double`, default: `1`. Subsample ratio of columns for each level. Range (0, 1]. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster for the reference" + , + "default": "1" + } + + + , + "colsample_bynode": { + "type": + "number", + "description": "Type: `double`, default: `1`. Subsample ratio of columns for each node (split)", + "help_text": "Type: `double`, default: `1`. Subsample ratio of columns for each node (split). Range (0, 1]. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster for the reference" + , + "default": "1" + } + + + , + "reg_lambda": { + "type": + "number", + "description": "Type: `double`, default: `1`. L2 regularization term on weights", + "help_text": "Type: `double`, default: `1`. L2 regularization term on weights. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster for the reference" + , + "default": "1" + } + + + , + "reg_alpha": { + "type": + "number", + "description": "Type: `double`, default: `0`. L1 regularization term on weights", + "help_text": "Type: `double`, default: `0`. L1 regularization term on weights. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster for the reference" + , + "default": "0" + } + + + , + "scale_pos_weight": { + "type": + "number", + "description": "Type: `double`, default: `1`. Control the balance of positive and negative weights, useful for unbalanced classes", + "help_text": "Type: `double`, default: `1`. Control the balance of positive and negative weights, useful for unbalanced classes. See https://xgboost.readthedocs.io/en/stable/parameter.html#parameters-for-tree-booster for the reference" + , + "default": "1" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/execution arguments" + }, + + { + "$ref": "#/definitions/learning parameters" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/labels_transfer/xgboost/setup_logger.py b/target/nextflow/labels_transfer/xgboost/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/nextflow/labels_transfer/xgboost/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/nextflow/mapping/bd_rhapsody/.config.vsh.yaml b/target/nextflow/mapping/bd_rhapsody/.config.vsh.yaml new file mode 100644 index 00000000000..771016b4cc9 --- /dev/null +++ b/target/nextflow/mapping/bd_rhapsody/.config.vsh.yaml @@ -0,0 +1,417 @@ +functionality: + name: "bd_rhapsody" + namespace: "mapping" + version: "0.12.4" + authors: + - name: "Robrecht Cannoodt" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + argument_groups: + - name: "Inputs" + arguments: + - type: "string" + name: "--mode" + description: "Whether to run a whole transcriptome analysis (WTA) or a targeted\ + \ analysis." + info: null + example: + - "wta" + required: true + choices: + - "wta" + - "targeted" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Path to your read files in the FASTQ.GZ format. You may specify\ + \ as many R1/R2 read pairs as you want." + info: null + example: + - "input.fastq.gz" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "file" + name: "--reference" + alternatives: + - "-r" + - "--reference_genome" + description: "Refence to map to. For `--mode wta`, this is the path to STAR\ + \ index as a tar.gz file. For `--mode targeted`, this is the path to mRNA\ + \ reference file for pre-designed, supplemental, or custom panel, in FASTA\ + \ format" + info: null + example: + - "reference_genome.tar.gz|reference.fasta" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "file" + name: "--transcriptome_annotation" + alternatives: + - "-t" + description: "Path to GTF annotation file (only for `--mode wta`)." + info: null + example: + - "transcriptome.gtf" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--abseq_reference" + alternatives: + - "-a" + description: "Path to the AbSeq reference file in FASTA format. Only needed\ + \ if BD AbSeq Ab-Oligos are used." + info: null + example: + - "abseq_reference.fasta" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "file" + name: "--supplemental_reference" + alternatives: + - "-s" + description: "Path to the supplemental reference file in FASTA format. Only\ + \ needed if there are additional transgene sequences used in the experiment\ + \ (only for `--mode wta`)." + info: null + example: + - "supplemental_reference.fasta" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--sample_prefix" + description: "Specify a run name to use as the output file base name. Use only\ + \ letters, numbers, or hyphens. Do not use special characters or spaces." + info: null + default: + - "sample" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Outputs" + arguments: + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output folder. Output still needs to be processed further." + info: null + example: + - "output_dir" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Putative cell calling settings" + arguments: + - type: "string" + name: "--putative_cell_call" + description: "Specify the dataset to be used for putative cell calling. For\ + \ putative cell calling using an AbSeq dataset, please provide an AbSeq_Reference\ + \ fasta file above." + info: null + example: + - "mRNA" + required: false + choices: + - "mRNA" + - "AbSeq_Experimental" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--exact_cell_count" + description: "Exact cell count - Set a specific number (>=1) of cells as putative,\ + \ based on those with the highest error-corrected read count" + info: null + example: + - 10000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--disable_putative_calling" + description: "Disable Refined Putative Cell Calling - Determine putative cells\ + \ using only the basic algorithm (minimum second derivative along the cumulative\ + \ reads curve). The refined algorithm attempts to remove false positives and\ + \ recover false negatives, but may not be ideal for certain complex mixtures\ + \ of cell types. Does not apply if Exact Cell Count is set." + info: null + direction: "input" + dest: "par" + - name: "Subsample arguments" + arguments: + - type: "double" + name: "--subsample" + description: "A number >1 or fraction (0 < n < 1) to indicate the number or\ + \ percentage of reads to subsample." + info: null + example: + - 0.01 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--subsample_seed" + description: "A seed for replicating a previous subsampled run." + info: null + example: + - 3445 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Multiplex arguments" + arguments: + - type: "string" + name: "--sample_tags_version" + description: "Specify if multiplexed run." + info: null + example: + - "human" + required: false + choices: + - "human" + - "hs" + - "mouse" + - "mm" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--tag_names" + description: "Tag_Names (optional) - Specify the tag number followed by '-'\ + \ and the desired sample name to appear in Sample_Tag_Metrics.csv.\nDo not\ + \ use the special characters: &, (), [], {}, <>, ?, |\n" + info: null + example: + - "4-mySample" + - "9-myOtherSample" + - "6-alsoThisSample" + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - name: "VDJ arguments" + arguments: + - type: "string" + name: "--vdj_version" + description: "Specify if VDJ run." + info: null + example: + - "human" + required: false + choices: + - "human" + - "mouse" + - "humanBCR" + - "humanBCR" + - "humanTCR" + - "mouseBCR" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "CWL-runner arguments" + arguments: + - type: "boolean" + name: "--parallel" + description: "Run jobs in parallel." + info: null + default: + - true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--timestamps" + description: "Add timestamps to the errors, warnings, and notifications." + info: null + direction: "input" + dest: "par" + - type: "boolean_true" + name: "--dryrun" + description: "If true, the output directory will only contain the CWL input\ + \ files, but the pipeline itself will not be executed." + info: null + direction: "input" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "rhapsody_wta_1.10.1_nodocker.cwl" + - type: "file" + path: "rhapsody_targeted_1.10.1_nodocker.cwl" + - type: "file" + path: "src/utils/setup_logger.py" + description: "A wrapper for the BD Rhapsody Analysis CWL v1.10.1 pipeline.\n\nThe\ + \ CWL pipeline file is obtained by cloning 'https://bitbucket.org/CRSwDev/cwl/src/master/'\ + \ and removing all objects with class 'DockerRequirement' from the YML.\n\nThis\ + \ pipeline can be used for a targeted analysis (with `--mode targeted`) or for\ + \ a whole transcriptome analysis (with `--mode wta`).\n\n* If mode is `\"targeted\"\ + `, then either the `--reference` or `--abseq_reference` parameters must be defined.\n\ + * If mode is `\"wta\"`, then `--reference` and `--transcriptome_annotation` must\ + \ be defined, `--abseq_reference` and `--supplemental_reference` is optional.\n\ + \nThe reference_genome and transcriptome_annotation files can be generated with\ + \ the make_reference pipeline.\nAlternatively, BD also provides standard references\ + \ which can be downloaded from these locations:\n\n - Human: http://bd-rhapsody-public.s3-website-us-east-1.amazonaws.com/Rhapsody-WTA/GRCh38-PhiX-gencodev29/\n\ + \ - Mouse: http://bd-rhapsody-public.s3-website-us-east-1.amazonaws.com/Rhapsody-WTA/GRCm38-PhiX-gencodevM19/\n" + test_resources: + - type: "bash_script" + path: "test_memory.sh" + is_executable: true + - type: "bash_script" + path: "test_wta.sh" + is_executable: true + - type: "bash_script" + path: "test_targeted.sh" + is_executable: true + - type: "file" + path: "resources_test/bdrhap_vdj" + - type: "file" + path: "resources_test/bdrhap_5kjrt" + - type: "file" + path: "resources_test/reference_gencodev41_chr1/" + info: + name: "BD Rhapsody" + short_description: "A wrapper for the BD Rhapsody Analysis CWL v1.10.1 pipeline" + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "ghcr.io/data-intuitive/bd_rhapsody:1.10.1" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "python" + user: false + packages: + - "pandas<2" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highmem" + - "highcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/mapping/bd_rhapsody/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/bd_rhapsody" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/bd_rhapsody/bd_rhapsody" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/mapping/bd_rhapsody/main.nf b/target/nextflow/mapping/bd_rhapsody/main.nf new file mode 100644 index 00000000000..e6330397e3b --- /dev/null +++ b/target/nextflow/mapping/bd_rhapsody/main.nf @@ -0,0 +1,3249 @@ +// bd_rhapsody 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Robrecht Cannoodt (maintainer) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "bd_rhapsody", + "namespace" : "mapping", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Robrecht Cannoodt", + "roles" : [ + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "robrecht@data-intuitive.com", + "github" : "rcannood", + "orcid" : "0000-0003-3641-729X", + "linkedin" : "robrechtcannoodt" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Science Engineer" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Inputs", + "arguments" : [ + { + "type" : "string", + "name" : "--mode", + "description" : "Whether to run a whole transcriptome analysis (WTA) or a targeted analysis.", + "example" : [ + "wta" + ], + "required" : true, + "choices" : [ + "wta", + "targeted" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--input", + "alternatives" : [ + "-i" + ], + "description" : "Path to your read files in the FASTQ.GZ format. You may specify as many R1/R2 read pairs as you want.", + "example" : [ + "input.fastq.gz" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--reference", + "alternatives" : [ + "-r", + "--reference_genome" + ], + "description" : "Refence to map to. For `--mode wta`, this is the path to STAR index as a tar.gz file. For `--mode targeted`, this is the path to mRNA reference file for pre-designed, supplemental, or custom panel, in FASTA format", + "example" : [ + "reference_genome.tar.gz|reference.fasta" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--transcriptome_annotation", + "alternatives" : [ + "-t" + ], + "description" : "Path to GTF annotation file (only for `--mode wta`).", + "example" : [ + "transcriptome.gtf" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--abseq_reference", + "alternatives" : [ + "-a" + ], + "description" : "Path to the AbSeq reference file in FASTA format. Only needed if BD AbSeq Ab-Oligos are used.", + "example" : [ + "abseq_reference.fasta" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--supplemental_reference", + "alternatives" : [ + "-s" + ], + "description" : "Path to the supplemental reference file in FASTA format. Only needed if there are additional transgene sequences used in the experiment (only for `--mode wta`).", + "example" : [ + "supplemental_reference.fasta" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--sample_prefix", + "description" : "Specify a run name to use as the output file base name. Use only letters, numbers, or hyphens. Do not use special characters or spaces.", + "default" : [ + "sample" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Outputs", + "arguments" : [ + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "-o" + ], + "description" : "Output folder. Output still needs to be processed further.", + "example" : [ + "output_dir" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Putative cell calling settings", + "arguments" : [ + { + "type" : "string", + "name" : "--putative_cell_call", + "description" : "Specify the dataset to be used for putative cell calling. For putative cell calling using an AbSeq dataset, please provide an AbSeq_Reference fasta file above.", + "example" : [ + "mRNA" + ], + "required" : false, + "choices" : [ + "mRNA", + "AbSeq_Experimental" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--exact_cell_count", + "description" : "Exact cell count - Set a specific number (>=1) of cells as putative, based on those with the highest error-corrected read count", + "example" : [ + 10000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "boolean_true", + "name" : "--disable_putative_calling", + "description" : "Disable Refined Putative Cell Calling - Determine putative cells using only the basic algorithm (minimum second derivative along the cumulative reads curve). The refined algorithm attempts to remove false positives and recover false negatives, but may not be ideal for certain complex mixtures of cell types. Does not apply if Exact Cell Count is set.", + "direction" : "input", + "dest" : "par" + } + ] + }, + { + "name" : "Subsample arguments", + "arguments" : [ + { + "type" : "double", + "name" : "--subsample", + "description" : "A number >1 or fraction (0 < n < 1) to indicate the number or percentage of reads to subsample.", + "example" : [ + 0.01 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--subsample_seed", + "description" : "A seed for replicating a previous subsampled run.", + "example" : [ + 3445 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Multiplex arguments", + "arguments" : [ + { + "type" : "string", + "name" : "--sample_tags_version", + "description" : "Specify if multiplexed run.", + "example" : [ + "human" + ], + "required" : false, + "choices" : [ + "human", + "hs", + "mouse", + "mm" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--tag_names", + "description" : "Tag_Names (optional) - Specify the tag number followed by '-' and the desired sample name to appear in Sample_Tag_Metrics.csv.\nDo not use the special characters: &, (), [], {}, <>, ?, |\n", + "example" : [ + "4-mySample", + "9-myOtherSample", + "6-alsoThisSample" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "VDJ arguments", + "arguments" : [ + { + "type" : "string", + "name" : "--vdj_version", + "description" : "Specify if VDJ run.", + "example" : [ + "human" + ], + "required" : false, + "choices" : [ + "human", + "mouse", + "humanBCR", + "humanBCR", + "humanTCR", + "mouseBCR" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "CWL-runner arguments", + "arguments" : [ + { + "type" : "boolean", + "name" : "--parallel", + "description" : "Run jobs in parallel.", + "default" : [ + true + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "boolean_true", + "name" : "--timestamps", + "description" : "Add timestamps to the errors, warnings, and notifications.", + "direction" : "input", + "dest" : "par" + }, + { + "type" : "boolean_true", + "name" : "--dryrun", + "description" : "If true, the output directory will only contain the CWL input files, but the pipeline itself will not be executed.", + "direction" : "input", + "dest" : "par" + } + ] + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/bd_rhapsody/" + }, + { + "type" : "file", + "path" : "rhapsody_wta_1.10.1_nodocker.cwl", + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/bd_rhapsody/" + }, + { + "type" : "file", + "path" : "rhapsody_targeted_1.10.1_nodocker.cwl", + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/bd_rhapsody/" + }, + { + "type" : "file", + "path" : "src/utils/setup_logger.py", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "description" : "A wrapper for the BD Rhapsody Analysis CWL v1.10.1 pipeline.\n\nThe CWL pipeline file is obtained by cloning 'https://bitbucket.org/CRSwDev/cwl/src/master/' and removing all objects with class 'DockerRequirement' from the YML.\n\nThis pipeline can be used for a targeted analysis (with `--mode targeted`) or for a whole transcriptome analysis (with `--mode wta`).\n\n* If mode is `\\"targeted\\"`, then either the `--reference` or `--abseq_reference` parameters must be defined.\n* If mode is `\\"wta\\"`, then `--reference` and `--transcriptome_annotation` must be defined, `--abseq_reference` and `--supplemental_reference` is optional.\n\nThe reference_genome and transcriptome_annotation files can be generated with the make_reference pipeline.\nAlternatively, BD also provides standard references which can be downloaded from these locations:\n\n - Human: http://bd-rhapsody-public.s3-website-us-east-1.amazonaws.com/Rhapsody-WTA/GRCh38-PhiX-gencodev29/\n - Mouse: http://bd-rhapsody-public.s3-website-us-east-1.amazonaws.com/Rhapsody-WTA/GRCm38-PhiX-gencodevM19/\n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test_memory.sh", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/bd_rhapsody/" + }, + { + "type" : "bash_script", + "path" : "test_wta.sh", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/bd_rhapsody/" + }, + { + "type" : "bash_script", + "path" : "test_targeted.sh", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/bd_rhapsody/" + }, + { + "type" : "file", + "path" : "resources_test/bdrhap_vdj", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + }, + { + "type" : "file", + "path" : "resources_test/bdrhap_5kjrt", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + }, + { + "type" : "file", + "path" : "resources_test/reference_gencodev41_chr1/", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "info" : { + "name" : "BD Rhapsody", + "short_description" : "A wrapper for the BD Rhapsody Analysis CWL v1.10.1 pipeline" + }, + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "ghcr.io/data-intuitive/bd_rhapsody:1.10.1", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "pandas<2" + ], + "upgrade" : true + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "highmem", + "highcpu" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/mapping/bd_rhapsody/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/bd_rhapsody", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +import os +import re +import subprocess +import tempfile +import sys +from typing import Any +import pandas as pd +import gzip +import shutil + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'mode': $( if [ ! -z ${VIASH_PAR_MODE+x} ]; then echo "r'${VIASH_PAR_MODE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'reference': $( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "r'${VIASH_PAR_REFERENCE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'transcriptome_annotation': $( if [ ! -z ${VIASH_PAR_TRANSCRIPTOME_ANNOTATION+x} ]; then echo "r'${VIASH_PAR_TRANSCRIPTOME_ANNOTATION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'abseq_reference': $( if [ ! -z ${VIASH_PAR_ABSEQ_REFERENCE+x} ]; then echo "r'${VIASH_PAR_ABSEQ_REFERENCE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'supplemental_reference': $( if [ ! -z ${VIASH_PAR_SUPPLEMENTAL_REFERENCE+x} ]; then echo "r'${VIASH_PAR_SUPPLEMENTAL_REFERENCE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'sample_prefix': $( if [ ! -z ${VIASH_PAR_SAMPLE_PREFIX+x} ]; then echo "r'${VIASH_PAR_SAMPLE_PREFIX//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'putative_cell_call': $( if [ ! -z ${VIASH_PAR_PUTATIVE_CELL_CALL+x} ]; then echo "r'${VIASH_PAR_PUTATIVE_CELL_CALL//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'exact_cell_count': $( if [ ! -z ${VIASH_PAR_EXACT_CELL_COUNT+x} ]; then echo "int(r'${VIASH_PAR_EXACT_CELL_COUNT//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'disable_putative_calling': $( if [ ! -z ${VIASH_PAR_DISABLE_PUTATIVE_CALLING+x} ]; then echo "r'${VIASH_PAR_DISABLE_PUTATIVE_CALLING//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), + 'subsample': $( if [ ! -z ${VIASH_PAR_SUBSAMPLE+x} ]; then echo "float(r'${VIASH_PAR_SUBSAMPLE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'subsample_seed': $( if [ ! -z ${VIASH_PAR_SUBSAMPLE_SEED+x} ]; then echo "int(r'${VIASH_PAR_SUBSAMPLE_SEED//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'sample_tags_version': $( if [ ! -z ${VIASH_PAR_SAMPLE_TAGS_VERSION+x} ]; then echo "r'${VIASH_PAR_SAMPLE_TAGS_VERSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'tag_names': $( if [ ! -z ${VIASH_PAR_TAG_NAMES+x} ]; then echo "r'${VIASH_PAR_TAG_NAMES//\\'/\\'\\"\\'\\"r\\'}'.split(':')"; else echo None; fi ), + 'vdj_version': $( if [ ! -z ${VIASH_PAR_VDJ_VERSION+x} ]; then echo "r'${VIASH_PAR_VDJ_VERSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'parallel': $( if [ ! -z ${VIASH_PAR_PARALLEL+x} ]; then echo "r'${VIASH_PAR_PARALLEL//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), + 'timestamps': $( if [ ! -z ${VIASH_PAR_TIMESTAMPS+x} ]; then echo "r'${VIASH_PAR_TIMESTAMPS//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), + 'dryrun': $( if [ ! -z ${VIASH_PAR_DRYRUN+x} ]; then echo "r'${VIASH_PAR_DRYRUN//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +## VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +def is_gz_file(filepath): + with open(filepath, 'rb') as test_f: + return test_f.read(2) == b'\\\\x1f\\\\x8b' + +def strip_margin(text: str) -> str: + return re.sub('(\\\\n?)[ \\\\t]*\\\\|', '\\\\\\\\1', text) + +def process_params(par: dict[str, Any]) -> str: + # check input parameters + assert par["input"] is not None, "Pass at least one set of inputs to --input." + if par["mode"] == "wta": + assert len(par["reference"]) == 1, "When mode is \\\\"wta\\\\", --reference should be length 1" + assert par["transcriptome_annotation"] is not None, "When mode is \\\\"wta\\\\", --transcriptome_annotation should be defined" + elif par["mode"] == "targeted": + assert par["transcriptome_annotation"] is None, "When mode is \\\\"targeted\\\\", --transcriptome_annotation should be undefined" + assert par["supplemental_reference"] is None, "When mode is \\\\"targeted\\\\", --supplemental_reference should be undefined" + + # checking sample prefix + if re.match("[^A-Za-z0-9]", par["sample_prefix"]): + logger.warning("--sample_prefix should only consist of letters, numbers or hyphens. Replacing all '[^A-Za-z0-9]' with '-'.") + par["sample_prefix"] = re.sub("[^A-Za-z0-9\\\\\\\\-]", "-", par["sample_prefix"]) + + # if par_input is a directory, look for fastq files + if len(par["input"]) == 1 and os.path.isdir(par["input"][0]): + par["input"] = [ os.path.join(dp, f) for dp, dn, filenames in os.walk(par["input"]) for f in filenames if re.match(r'.*\\\\.fastq.gz', f) ] + + # use absolute paths + par["input"] = [ os.path.abspath(f) for f in par["input"] ] + if par["reference"]: + par["reference"] = [ os.path.abspath(f) for f in par["reference"] ] + if par["transcriptome_annotation"]: + par["transcriptome_annotation"] = os.path.abspath(par["transcriptome_annotation"]) + if par["abseq_reference"]: + par["abseq_reference"] = [ os.path.abspath(f) for f in par["abseq_reference"] ] + if par["supplemental_reference"]: + par["supplemental_reference"] = [ os.path.abspath(f) for f in par["supplemental_reference"] ] + par["output"] = os.path.abspath(par["output"]) + + return par + +def generate_config(par: dict[str, Any]) -> str: + content_list = [strip_margin(f"""\\\\ +#!/usr/bin/env cwl-runner + +cwl:tool: rhapsody + +# This is a YML file used to specify the inputs for a BD Genomics {"WTA" if par["mode"] == "wta" else "Targeted" } Rhapsody Analysis pipeline run. See the +# BD Genomics Analysis Setup User Guide (Doc ID: 47383) for more details. + +## Reads (required) - Path to your read files in the FASTQ.GZ format. You may specify as many R1/R2 read pairs as you want. +Reads: +""")] + + for file in par["input"]: + content_list.append(strip_margin(f"""\\\\ + - class: File + location: "{file}" +""")) + + if par["reference"] and par["mode"] == "wta": + content_list.append(strip_margin(f"""\\\\ + +## Reference_Genome (required) - Path to STAR index for tar.gz format. See Doc ID: 47383 for instructions to obtain pre-built STAR index file. +Reference_Genome: + class: File + location: "{par["reference"][0]}" +""")) + + if par["reference"] and par["mode"] == "targeted": + content_list.append(strip_margin(f"""\\\\ + +## Reference (optional) - Path to mRNA reference file for pre-designed, supplemental, or custom panel, in FASTA format. +Reference: +""")) + for file in par["reference"]: + content_list.append(strip_margin(f"""\\\\ + - class: File + location: {file} +""")) + + if par["transcriptome_annotation"]: + content_list.append(strip_margin(f"""\\\\ + +## Transcriptome_Annotation (required) - Path to GTF annotation file +Transcriptome_Annotation: + class: File + location: "{par["transcriptome_annotation"]}" +""")) + + if par["abseq_reference"]: + content_list.append(strip_margin(f"""\\\\ + +## AbSeq_Reference (optional) - Path to the AbSeq reference file in FASTA format. Only needed if BD AbSeq Ab-Oligos are used. +AbSeq_Reference: +""")) + for file in par["abseq_reference"]: + content_list.append(strip_margin(f"""\\\\ + - class: File + location: {file} +""")) + + if par["supplemental_reference"]: + content_list.append(strip_margin(f"""\\\\ + +## Supplemental_Reference (optional) - Path to the supplemental reference file in FASTA format. Only needed if there are additional transgene sequences used in the experiment. +Supplemental_Reference: +""")) + for file in par["supplemental_reference"]: + content_list.append(strip_margin(f"""\\\\ + - class: File + location: {file} +""")) + + ## Putative Cell Calling Settings + content_list.append(strip_margin(f"""\\\\ + +#################################### +## Putative Cell Calling Settings ## +#################################### +""")) + + if par["putative_cell_call"]: + content_list.append(strip_margin(f"""\\\\ +## Putative cell calling dataset (optional) - Specify the dataset to be used for putative cell calling: mRNA or AbSeq_Experimental. +## For putative cell calling using an AbSeq dataset, please provide an AbSeq_Reference fasta file above. +## By default, the mRNA data will be used for putative cell calling. +Putative_Cell_Call: {par["putative_cell_call"]} +""")) + + if par["exact_cell_count"]: + content_list.append(strip_margin(f"""\\\\ +## Exact cell count (optional) - Set a specific number (>=1) of cells as putative, based on those with the highest error-corrected read count +Exact_Cell_Count: {par["exact_cell_count"]} +""")) + + if par["disable_putative_calling"]: + content_list.append(strip_margin(f"""\\\\ +## Disable Refined Putative Cell Calling (optional) - Determine putative cells using only the basic algorithm (minimum second derivative along the cumulative reads curve). The refined algorithm attempts to remove false positives and recover false negatives, but may not be ideal for certain complex mixtures of cell types. Does not apply if Exact Cell Count is set. +## The values can be true or false. By default, the refined algorithm is used. +Basic_Algo_Only: {str(par["disable_putative_calling"]).lower()} +""")) + + ## Subsample Settings + content_list.append(strip_margin(f"""\\\\ + +######################## +## Subsample Settings ## +######################## +""" + )) + + if par["subsample"]: + content_list.append(strip_margin(f"""\\\\ +## Subsample (optional) - A number >1 or fraction (0 < n < 1) to indicate the number or percentage of reads to subsample. +Subsample: {par["subsample"]} +""")) + + if par["subsample_seed"]: + content_list.append(strip_margin(f"""\\\\ +## Subsample seed (optional) - A seed for replicating a previous subsampled run. +Subsample_seed: {par["subsample_seed"]} +""")) + + + ## Multiplex options + content_list.append(strip_margin(f"""\\\\ + +####################### +## Multiplex options ## +####################### +""" + )) + + if par["sample_tags_version"]: + content_list.append(strip_margin(f"""\\\\ +## Sample Tags Version (optional) - Specify if multiplexed run: human, hs, mouse or mm +Sample_Tags_Version: {par["sample_tags_version"]} +""")) + + if par["tag_names"]: + content_list.append(strip_margin(f"""\\\\ +## Tag_Names (optional) - Specify the tag number followed by '-' and the desired sample name to appear in Sample_Tag_Metrics.csv +# Do not use the special characters: &, (), [], {{}}, <>, ?, | +Tag_Names: [{', '.join(par["tag_names"])}] +""")) + + ## VDJ options + content_list.append(strip_margin(f"""\\\\ + +################# +## VDJ options ## +################# +""" + )) + + if par["vdj_version"]: + content_list.append(strip_margin(f"""\\\\ +## VDJ Version (optional) - Specify if VDJ run: human, mouse, humanBCR, humanTCR, mouseBCR, mouseTCR +VDJ_Version: {par["vdj_version"]} +""")) + + ## VDJ options + content_list.append(strip_margin(f"""\\\\ + +######################## +## Additional Options ## +######################## +""" + )) + + if par["sample_prefix"]: + content_list.append(strip_margin(f"""\\\\ +## Run Name (optional) - Specify a run name to use as the output file base name. Use only letters, numbers, or hyphens. Do not use special characters or spaces. +Run_Name: {par["sample_prefix"]} +""")) + + ## Write config to file + return ''.join(content_list) + +def generate_cwl_file(par: dict[str, Any], meta: dict[str, Any]) -> str: + # create cwl file (if need be) + if par["mode"] == "wta": + orig_cwl_file=os.path.join(meta["resources_dir"], "rhapsody_wta_1.10.1_nodocker.cwl") + elif par["mode"] == "targeted": + orig_cwl_file=os.path.join(meta["resources_dir"], "rhapsody_targeted_1.10.1_nodocker.cwl") + + # Inject computational requirements into pipeline + if meta["memory_mb"] or meta["cpus"]: + cwl_file = os.path.join(par["output"], "pipeline.cwl") + + # Read in the file + with open(orig_cwl_file, 'r') as file : + cwl_data = file.read() + + # Inject computational requirements into pipeline + if meta["memory_mb"]: + memory = int(meta["memory_mb"]) - 2000 # keep 2gb for OS + cwl_data = re.sub('"ramMin": [^\\\\n]*,\\\\n', f'"ramMin": {memory},\\\\n', cwl_data) + if meta["cpus"]: + cwl_data = re.sub('"coresMin": [^\\\\n]*,\\\\n', f'"coresMin": {meta["cpus"]},\\\\n', cwl_data) + + # Write the file out again + with open(cwl_file, 'w') as file: + file.write(cwl_data) + else: + cwl_file = orig_cwl_file + + return cwl_file + +def process_fasta(feature_type: str, path: str) -> pd.DataFrame: + with open(path) as f: + df = pd.DataFrame(data={ + 'feature_type': feature_type, + 'feature_id': [line[1:].strip() for line in f if line[0] == ">"], + 'reference_file': os.path.basename(path), + }) + return df + +def process_gtf(feature_type: str, path: str) -> pd.DataFrame: + with open(path) as f: + data = [] + for line in f: + if not line.startswith("#"): + attr = dict(item.strip().split(' ') for item in line.split('\\\\t')[8].strip('\\\\n').split(';') if item) + row = { + 'feature_types': feature_type, + 'feature_ids': attr["gene_name"].strip("\\\\""), + 'reference_file': os.path.basename(path), + } + data.append(row) + df = pd.DataFrame(data) + df = df.drop_duplicates() + return df + +def extract_feature_types(par: dict[str, Any]): + feature_types = [] + + if par["mode"] == "targeted": + for file in par["reference"]: + logger.info(f"Processing reference fasta {file}") + feature_types.append(process_fasta("Gene Expression", file)) + + if par["mode"] == "wta": + file = par["transcriptome_annotation"] + logger.info(f"Processing reference gtf {file}") + feature_types.append(process_gtf("Gene Expression", file)) + + if par["abseq_reference"]: + for file in par["abseq_reference"]: + logger.info(f"Processing abseq fasta {file}") + feature_types.append(process_fasta("Antibody Capture", file)) + + if par["supplemental_reference"]: + for file in par["supplemental_reference"]: + logger.info(f"Processing supp fasta {file}") + feature_types.append(process_fasta("Other", file)) + + return pd.concat(feature_types) + +def main(par: dict[str, Any], meta: dict[str, Any]): + # Preprocess params + par = process_params(par) + + # Create output dir if not exists + if not os.path.exists(par["output"]): + os.makedirs(par["output"]) + + ## Process parameters + proc_pars = ["--no-container", "--outdir", par["output"]] + + if par["parallel"]: + proc_pars.append("--parallel") + + if par["timestamps"]: + proc_pars.append("--timestamps") + + with tempfile.TemporaryDirectory(prefix="cwl-bd_rhapsody_wta-", dir=meta["temp_dir"]) as temp_dir: + # extract transcriptome gtf if need be + if par["transcriptome_annotation"] and is_gz_file(par["transcriptome_annotation"]): + with open(os.path.join(temp_dir, "transcriptome.gtf"), 'wb') as genes_uncompressed: + with gzip.open(par["transcriptome_annotation"], 'rb') as genes_compressed: + shutil.copyfileobj(genes_compressed, genes_uncompressed) + par["transcriptome_annotation"] = genes_uncompressed.name + + # Create params file + config_file = os.path.join(par["output"], "config.yml") + config_content = generate_config(par) + with open(config_file, "w") as f: + f.write(config_content) + + # Create cwl file (if need be) + cwl_file = generate_cwl_file(par, meta) + + ## Run pipeline + if not par["dryrun"]: + cmd = ["cwl-runner"] + proc_pars + [cwl_file, os.path.basename(config_file)] + + env = dict(os.environ) + env["TMPDIR"] = temp_dir + + logger.info("> " + ' '.join(cmd)) + _ = subprocess.check_call( + cmd, + cwd=os.path.dirname(config_file), + env=env + ) + + # extracting feature ids from references + # extract info from reference files (while they still exist) + feature_df = extract_feature_types(par) + feature_types_file = os.path.join(par["output"], "feature_types.tsv") + feature_df.to_csv(feature_types_file, sep="\\\\t", index=False) + + + if not par["dryrun"]: + # look for counts file + if not par["sample_prefix"]: + par["sample_prefix"] = "sample" + counts_filename = par["sample_prefix"] + "_RSEC_MolsPerCell.csv" + + if par["sample_tags_version"]: + counts_filename = "Combined_" + counts_filename + counts_file = os.path.join(par["output"], counts_filename) + + if not os.path.exists(counts_file): + raise ValueError(f"Could not find output counts file '{counts_filename}'") + + # look for metrics file + metrics_filename = par["sample_prefix"] + "_Metrics_Summary.csv" + metrics_file = os.path.join(par["output"], metrics_filename) + if not os.path.exists(metrics_file): + raise ValueError(f"Could not find output metrics file '{metrics_filename}'") + +if __name__ == "__main__": + main(par, meta) +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/mapping_bd_rhapsody", + "tag" : "0.12.0" + }, + "label" : [ + "highmem", + "highcpu" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/mapping/bd_rhapsody/nextflow.config b/target/nextflow/mapping/bd_rhapsody/nextflow.config new file mode 100644 index 00000000000..c875dd85b99 --- /dev/null +++ b/target/nextflow/mapping/bd_rhapsody/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'bd_rhapsody' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'A wrapper for the BD Rhapsody Analysis CWL v1.10.1 pipeline.\n\nThe CWL pipeline file is obtained by cloning \'https://bitbucket.org/CRSwDev/cwl/src/master/\' and removing all objects with class \'DockerRequirement\' from the YML.\n\nThis pipeline can be used for a targeted analysis (with `--mode targeted`) or for a whole transcriptome analysis (with `--mode wta`).\n\n* If mode is `"targeted"`, then either the `--reference` or `--abseq_reference` parameters must be defined.\n* If mode is `"wta"`, then `--reference` and `--transcriptome_annotation` must be defined, `--abseq_reference` and `--supplemental_reference` is optional.\n\nThe reference_genome and transcriptome_annotation files can be generated with the make_reference pipeline.\nAlternatively, BD also provides standard references which can be downloaded from these locations:\n\n - Human: http://bd-rhapsody-public.s3-website-us-east-1.amazonaws.com/Rhapsody-WTA/GRCh38-PhiX-gencodev29/\n - Mouse: http://bd-rhapsody-public.s3-website-us-east-1.amazonaws.com/Rhapsody-WTA/GRCm38-PhiX-gencodevM19/\n' + author = 'Robrecht Cannoodt' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/mapping/bd_rhapsody/nextflow_params.yaml b/target/nextflow/mapping/bd_rhapsody/nextflow_params.yaml new file mode 100644 index 00000000000..81ed1d3a705 --- /dev/null +++ b/target/nextflow/mapping/bd_rhapsody/nextflow_params.yaml @@ -0,0 +1,36 @@ +# Inputs +mode: # please fill in - example: "wta" +input: # please fill in - example: ["input.fastq.gz"] +reference: # please fill in - example: ["reference_genome.tar.gz|reference.fasta"] +# transcriptome_annotation: "transcriptome.gtf" +# abseq_reference: ["abseq_reference.fasta"] +# supplemental_reference: ["supplemental_reference.fasta"] +sample_prefix: "sample" + +# Outputs +# output: "$id.$key.output.output" + +# Putative cell calling settings +# putative_cell_call: "mRNA" +# exact_cell_count: 10000 +disable_putative_calling: false + +# Subsample arguments +# subsample: 0.01 +# subsample_seed: 3445 + +# Multiplex arguments +# sample_tags_version: "human" +# tag_names: ["4-mySample", "9-myOtherSample", "6-alsoThisSample"] + +# VDJ arguments +# vdj_version: "human" + +# CWL-runner arguments +parallel: true +timestamps: false +dryrun: false + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/mapping/bd_rhapsody/nextflow_schema.json b/target/nextflow/mapping/bd_rhapsody/nextflow_schema.json new file mode 100644 index 00000000000..78b4c85dd34 --- /dev/null +++ b/target/nextflow/mapping/bd_rhapsody/nextflow_schema.json @@ -0,0 +1,348 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "bd_rhapsody", +"description": "A wrapper for the BD Rhapsody Analysis CWL v1.10.1 pipeline.\n\nThe CWL pipeline file is obtained by cloning \u0027https://bitbucket.org/CRSwDev/cwl/src/master/\u0027 and removing all objects with class \u0027DockerRequirement\u0027 from the YML.\n\nThis pipeline can be used for a targeted analysis (with `--mode targeted`) or for a whole transcriptome analysis (with `--mode wta`).\n\n* If mode is `\"targeted\"`, then either the `--reference` or `--abseq_reference` parameters must be defined.\n* If mode is `\"wta\"`, then `--reference` and `--transcriptome_annotation` must be defined, `--abseq_reference` and `--supplemental_reference` is optional.\n\nThe reference_genome and transcriptome_annotation files can be generated with the make_reference pipeline.\nAlternatively, BD also provides standard references which can be downloaded from these locations:\n\n - Human: http://bd-rhapsody-public.s3-website-us-east-1.amazonaws.com/Rhapsody-WTA/GRCh38-PhiX-gencodev29/\n - Mouse: http://bd-rhapsody-public.s3-website-us-east-1.amazonaws.com/Rhapsody-WTA/GRCm38-PhiX-gencodevM19/\n", +"type": "object", +"definitions": { + + + + "inputs" : { + "title": "Inputs", + "type": "object", + "description": "No description", + "properties": { + + + "mode": { + "type": + "string", + "description": "Type: `string`, required, example: `wta`, choices: ``wta`, `targeted``. Whether to run a whole transcriptome analysis (WTA) or a targeted analysis", + "help_text": "Type: `string`, required, example: `wta`, choices: ``wta`, `targeted``. Whether to run a whole transcriptome analysis (WTA) or a targeted analysis.", + "enum": ["wta", "targeted"] + + + } + + + , + "input": { + "type": + "string", + "description": "Type: List of `file`, required, example: `input.fastq.gz`, multiple_sep: `\";\"`. Path to your read files in the FASTQ", + "help_text": "Type: List of `file`, required, example: `input.fastq.gz`, multiple_sep: `\";\"`. Path to your read files in the FASTQ.GZ format. You may specify as many R1/R2 read pairs as you want." + + } + + + , + "reference": { + "type": + "string", + "description": "Type: List of `file`, required, example: `reference_genome.tar.gz|reference.fasta`, multiple_sep: `\";\"`. Refence to map to", + "help_text": "Type: List of `file`, required, example: `reference_genome.tar.gz|reference.fasta`, multiple_sep: `\";\"`. Refence to map to. For `--mode wta`, this is the path to STAR index as a tar.gz file. For `--mode targeted`, this is the path to mRNA reference file for pre-designed, supplemental, or custom panel, in FASTA format" + + } + + + , + "transcriptome_annotation": { + "type": + "string", + "description": "Type: `file`, example: `transcriptome.gtf`. Path to GTF annotation file (only for `--mode wta`)", + "help_text": "Type: `file`, example: `transcriptome.gtf`. Path to GTF annotation file (only for `--mode wta`)." + + } + + + , + "abseq_reference": { + "type": + "string", + "description": "Type: List of `file`, example: `abseq_reference.fasta`, multiple_sep: `\";\"`. Path to the AbSeq reference file in FASTA format", + "help_text": "Type: List of `file`, example: `abseq_reference.fasta`, multiple_sep: `\";\"`. Path to the AbSeq reference file in FASTA format. Only needed if BD AbSeq Ab-Oligos are used." + + } + + + , + "supplemental_reference": { + "type": + "string", + "description": "Type: List of `file`, example: `supplemental_reference.fasta`, multiple_sep: `\";\"`. Path to the supplemental reference file in FASTA format", + "help_text": "Type: List of `file`, example: `supplemental_reference.fasta`, multiple_sep: `\";\"`. Path to the supplemental reference file in FASTA format. Only needed if there are additional transgene sequences used in the experiment (only for `--mode wta`)." + + } + + + , + "sample_prefix": { + "type": + "string", + "description": "Type: `string`, default: `sample`. Specify a run name to use as the output file base name", + "help_text": "Type: `string`, default: `sample`. Specify a run name to use as the output file base name. Use only letters, numbers, or hyphens. Do not use special characters or spaces." + , + "default": "sample" + } + + +} +}, + + + "outputs" : { + "title": "Outputs", + "type": "object", + "description": "No description", + "properties": { + + + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.output`, example: `output_dir/`. Output folder", + "help_text": "Type: `file`, required, default: `$id.$key.output.output`, example: `output_dir/`. Output folder. Output still needs to be processed further." + , + "default": "$id.$key.output.output" + } + + +} +}, + + + "putative cell calling settings" : { + "title": "Putative cell calling settings", + "type": "object", + "description": "No description", + "properties": { + + + "putative_cell_call": { + "type": + "string", + "description": "Type: `string`, example: `mRNA`, choices: ``mRNA`, `AbSeq_Experimental``. Specify the dataset to be used for putative cell calling", + "help_text": "Type: `string`, example: `mRNA`, choices: ``mRNA`, `AbSeq_Experimental``. Specify the dataset to be used for putative cell calling. For putative cell calling using an AbSeq dataset, please provide an AbSeq_Reference fasta file above.", + "enum": ["mRNA", "AbSeq_Experimental"] + + + } + + + , + "exact_cell_count": { + "type": + "integer", + "description": "Type: `integer`, example: `10000`. Exact cell count - Set a specific number (\u003e=1) of cells as putative, based on those with the highest error-corrected read count", + "help_text": "Type: `integer`, example: `10000`. Exact cell count - Set a specific number (\u003e=1) of cells as putative, based on those with the highest error-corrected read count" + + } + + + , + "disable_putative_calling": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Disable Refined Putative Cell Calling - Determine putative cells using only the basic algorithm (minimum second derivative along the cumulative reads curve)", + "help_text": "Type: `boolean_true`, default: `false`. Disable Refined Putative Cell Calling - Determine putative cells using only the basic algorithm (minimum second derivative along the cumulative reads curve). The refined algorithm attempts to remove false positives and recover false negatives, but may not be ideal for certain complex mixtures of cell types. Does not apply if Exact Cell Count is set." + , + "default": "False" + } + + +} +}, + + + "subsample arguments" : { + "title": "Subsample arguments", + "type": "object", + "description": "No description", + "properties": { + + + "subsample": { + "type": + "number", + "description": "Type: `double`, example: `0.01`. A number \u003e1 or fraction (0 \u003c n \u003c 1) to indicate the number or percentage of reads to subsample", + "help_text": "Type: `double`, example: `0.01`. A number \u003e1 or fraction (0 \u003c n \u003c 1) to indicate the number or percentage of reads to subsample." + + } + + + , + "subsample_seed": { + "type": + "integer", + "description": "Type: `integer`, example: `3445`. A seed for replicating a previous subsampled run", + "help_text": "Type: `integer`, example: `3445`. A seed for replicating a previous subsampled run." + + } + + +} +}, + + + "multiplex arguments" : { + "title": "Multiplex arguments", + "type": "object", + "description": "No description", + "properties": { + + + "sample_tags_version": { + "type": + "string", + "description": "Type: `string`, example: `human`, choices: ``human`, `hs`, `mouse`, `mm``. Specify if multiplexed run", + "help_text": "Type: `string`, example: `human`, choices: ``human`, `hs`, `mouse`, `mm``. Specify if multiplexed run.", + "enum": ["human", "hs", "mouse", "mm"] + + + } + + + , + "tag_names": { + "type": + "string", + "description": "Type: List of `string`, example: `4-mySample:9-myOtherSample:6-alsoThisSample`, multiple_sep: `\":\"`. Tag_Names (optional) - Specify the tag number followed by \u0027-\u0027 and the desired sample name to appear in Sample_Tag_Metrics", + "help_text": "Type: List of `string`, example: `4-mySample:9-myOtherSample:6-alsoThisSample`, multiple_sep: `\":\"`. Tag_Names (optional) - Specify the tag number followed by \u0027-\u0027 and the desired sample name to appear in Sample_Tag_Metrics.csv.\nDo not use the special characters: \u0026, (), [], {}, \u003c\u003e, ?, |\n" + + } + + +} +}, + + + "vdj arguments" : { + "title": "VDJ arguments", + "type": "object", + "description": "No description", + "properties": { + + + "vdj_version": { + "type": + "string", + "description": "Type: `string`, example: `human`, choices: ``human`, `mouse`, `humanBCR`, `humanBCR`, `humanTCR`, `mouseBCR``. Specify if VDJ run", + "help_text": "Type: `string`, example: `human`, choices: ``human`, `mouse`, `humanBCR`, `humanBCR`, `humanTCR`, `mouseBCR``. Specify if VDJ run.", + "enum": ["human", "mouse", "humanBCR", "humanBCR", "humanTCR", "mouseBCR"] + + + } + + +} +}, + + + "cwl-runner arguments" : { + "title": "CWL-runner arguments", + "type": "object", + "description": "No description", + "properties": { + + + "parallel": { + "type": + "boolean", + "description": "Type: `boolean`, default: `true`. Run jobs in parallel", + "help_text": "Type: `boolean`, default: `true`. Run jobs in parallel." + , + "default": "True" + } + + + , + "timestamps": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Add timestamps to the errors, warnings, and notifications", + "help_text": "Type: `boolean_true`, default: `false`. Add timestamps to the errors, warnings, and notifications." + , + "default": "False" + } + + + , + "dryrun": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. If true, the output directory will only contain the CWL input files, but the pipeline itself will not be executed", + "help_text": "Type: `boolean_true`, default: `false`. If true, the output directory will only contain the CWL input files, but the pipeline itself will not be executed." + , + "default": "False" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/inputs" + }, + + { + "$ref": "#/definitions/outputs" + }, + + { + "$ref": "#/definitions/putative cell calling settings" + }, + + { + "$ref": "#/definitions/subsample arguments" + }, + + { + "$ref": "#/definitions/multiplex arguments" + }, + + { + "$ref": "#/definitions/vdj arguments" + }, + + { + "$ref": "#/definitions/cwl-runner arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/mapping/bd_rhapsody/rhapsody_targeted_1.10.1_nodocker.cwl b/target/nextflow/mapping/bd_rhapsody/rhapsody_targeted_1.10.1_nodocker.cwl new file mode 100755 index 00000000000..56a6310bc07 --- /dev/null +++ b/target/nextflow/mapping/bd_rhapsody/rhapsody_targeted_1.10.1_nodocker.cwl @@ -0,0 +1,5159 @@ +#!/usr/bin/env cwl-runner +{ + "cwlVersion": "v1.0", + "$graph": [ + { + "inputs": [ + { + "inputBinding": { + "prefix": "--annot-r1", + "itemSeparator": "," + }, + "type": { + "items": "File", + "type": "array" + }, + "id": "#AddtoBam.cwl/Annotation_R1" + }, + { + "inputBinding": { + "prefix": "--cell-order" + }, + "type": "File", + "id": "#AddtoBam.cwl/Cell_Order" + }, + { + "inputBinding": { + "prefix": "--annot-mol-file" + }, + "type": "File", + "id": "#AddtoBam.cwl/Molecular_Annotation" + }, + { + "inputBinding": { + "prefix": "--r2-bam" + }, + "type": "File", + "id": "#AddtoBam.cwl/R2_Bam" + }, + { + "inputBinding": { + "prefix": "--run-metadata" + }, + "type": "File", + "id": "#AddtoBam.cwl/Run_Metadata" + }, + { + "inputBinding": { + "prefix": "--tag-calls" + }, + "type": [ + "null", + "File" + ], + "id": "#AddtoBam.cwl/Tag_Calls" + }, + { + "inputBinding": { + "prefix": "--target-gene-mapping" + }, + "type": [ + "null", + "File" + ], + "id": "#AddtoBam.cwl/Target_Gene_Mapping" + } + ], + "requirements": [ + ], + "outputs": [ + { + "outputBinding": { + "glob": "Annotated_mapping_R2.BAM" + }, + "type": "File", + "id": "#AddtoBam.cwl/Annotated_Bam" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#AddtoBam.cwl/output" + } + ], + "baseCommand": [ + "mist_add_to_bam.py" + ], + "class": "CommandLineTool", + "id": "#AddtoBam.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "prefix": "--extra-seqs" + }, + "type": [ + "null", + "File" + ], + "id": "#AlignR2.cwl/Extra_Seqs" + }, + { + "inputBinding": { + "prefix": "--index" + }, + "type": "File", + "id": "#AlignR2.cwl/Index" + }, + { + "inputBinding": { + "prefix": "--r2-fastqs", + "itemSeparator": "," + }, + "type": { + "items": "File", + "type": "array" + }, + "id": "#AlignR2.cwl/R2" + }, + { + "inputBinding": { + "prefix": "--run-metadata" + }, + "type": "File", + "id": "#AlignR2.cwl/Run_Metadata" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + }, + { + "envDef": [ + { + "envName": "CORES_ALLOCATED_PER_CWL_PROCESS", + "envValue": "$(String(runtime.cores))" + } + ], + "class": "EnvVarRequirement" + } + ], + "outputs": [ + { + "outputBinding": { + "glob": "*zip" + }, + "type": { + "items": "File", + "type": "array" + }, + "id": "#AlignR2.cwl/Alignments" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#AlignR2.cwl/output" + } + ], + "baseCommand": [ + "mist_align_R2.py" + ], + "class": "CommandLineTool", + "id": "#AlignR2.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "prefix": "--umi-option" + }, + "type": [ + "null", + "int" + ], + "id": "#AnnotateMolecules.cwl/AbSeq_UMI" + }, + { + "inputBinding": { + "prefix": "--run-metadata" + }, + "type": "File", + "id": "#AnnotateMolecules.cwl/Run_Metadata" + }, + { + "inputBinding": { + "prefix": "--use-dbec" + }, + "type": [ + "null", + "boolean" + ], + "id": "#AnnotateMolecules.cwl/Use_DBEC" + }, + { + "inputBinding": { + "prefix": "--valid-annot" + }, + "type": "File", + "id": "#AnnotateMolecules.cwl/Valids" + } + ], + "requirements": [ + ], + "outputs": [ + { + "outputBinding": { + "glob": "*_GeneStatus.csv.*" + }, + "type": "File", + "id": "#AnnotateMolecules.cwl/Gene_Status_List" + }, + { + "outputBinding": { + "glob": "stats.json", + "loadContents": true, + "outputEval": "$(JSON.parse(self[0].contents).max_count)\n" + }, + "type": "int", + "id": "#AnnotateMolecules.cwl/Max_Count" + }, + { + "outputBinding": { + "glob": "*_Annotation_Molecule.csv.*" + }, + "type": "File", + "id": "#AnnotateMolecules.cwl/Mol_Annot_List" + }, + { + "outputBinding": { + "glob": "stats.json", + "loadContents": true, + "outputEval": "$(JSON.parse(self[0].contents).total_molecules)\n" + }, + "type": "int", + "id": "#AnnotateMolecules.cwl/Total_Molecules" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#AnnotateMolecules.cwl/output" + } + ], + "baseCommand": [ + "mist_annotate_molecules.py" + ], + "class": "CommandLineTool", + "id": "#AnnotateMolecules.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "prefix": "--filter-metrics", + "itemSeparator": "," + }, + "type": [ + { + "items": [ + "null", + "File" + ], + "type": "array" + } + ], + "id": "#AnnotateR1.cwl/Filter_Metrics" + }, + { + "inputBinding": { + "prefix": "--R1" + }, + "type": "File", + "id": "#AnnotateR1.cwl/R1" + }, + { + "inputBinding": { + "prefix": "--run-metadata" + }, + "type": "File", + "id": "#AnnotateR1.cwl/Run_Metadata" + } + ], + "requirements": [ + { + "ramMin": 2000, + "class": "ResourceRequirement" + } + ], + "outputs": [ + { + "outputBinding": { + "glob": "*_Annotation_R1.csv.gz" + }, + "type": "File", + "id": "#AnnotateR1.cwl/Annotation_R1" + }, + { + "outputBinding": { + "glob": "*_R1_error_count_table.npy" + }, + "type": "File", + "id": "#AnnotateR1.cwl/R1_error_count_table" + }, + { + "outputBinding": { + "glob": "*_R1_read_count_breakdown.json" + }, + "type": "File", + "id": "#AnnotateR1.cwl/R1_read_count_breakdown" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#AnnotateR1.cwl/output" + } + ], + "baseCommand": [ + "mist_annotate_R1.py" + ], + "class": "CommandLineTool", + "id": "#AnnotateR1.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "prefix": "--extra-seqs" + }, + "type": [ + "null", + "File" + ], + "id": "#AnnotateR2.cwl/Extra_Seqs" + }, + { + "inputBinding": { + "prefix": "--gtf" + }, + "type": [ + "null", + "File" + ], + "id": "#AnnotateR2.cwl/GTF_Annotation" + }, + { + "inputBinding": { + "prefix": "--R2-zip" + }, + "type": "File", + "id": "#AnnotateR2.cwl/R2_zip" + }, + { + "inputBinding": { + "prefix": "--run-metadata" + }, + "type": "File", + "id": "#AnnotateR2.cwl/Run_Metadata" + }, + { + "inputBinding": { + "prefix": "--transcript-length" + }, + "type": [ + "null", + "File" + ], + "id": "#AnnotateR2.cwl/Transcript_Length" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "outputs": [ + { + "outputBinding": { + "glob": "*Annotation_R2.csv.gz" + }, + "type": "File", + "id": "#AnnotateR2.cwl/Annot_R2" + }, + { + "outputBinding": { + "glob": "*-annot.gtf" + }, + "type": [ + "null", + "File" + ], + "id": "#AnnotateR2.cwl/GTF" + }, + { + "outputBinding": { + "glob": "*mapping_R2.BAM" + }, + "type": "File", + "id": "#AnnotateR2.cwl/R2_Bam" + }, + { + "outputBinding": { + "glob": "*_picard_quality_metrics.csv.gz" + }, + "type": "File", + "id": "#AnnotateR2.cwl/R2_Quality_Metrics" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#AnnotateR2.cwl/output" + } + ], + "baseCommand": [ + "mist_annotate_R2.py" + ], + "class": "CommandLineTool", + "id": "#AnnotateR2.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "prefix": "--umi-option" + }, + "type": [ + "null", + "int" + ], + "id": "#AnnotateReads.cwl/AbSeq_UMI" + }, + { + "inputBinding": { + "prefix": "--extra-seqs", + "itemSeparator": "," + }, + "type": [ + "null", + "File" + ], + "id": "#AnnotateReads.cwl/Extra_Seqs" + }, + { + "type": { + "items": [ + "null", + "File" + ], + "type": "array" + }, + "id": "#AnnotateReads.cwl/Filter_Metrics" + }, + { + "inputBinding": { + "prefix": "--putative-cell-call" + }, + "type": [ + "null", + "int" + ], + "id": "#AnnotateReads.cwl/Putative_Cell_Call" + }, + { + "type": { + "items": "File", + "type": "array" + }, + "id": "#AnnotateReads.cwl/R1_Annotation" + }, + { + "type": { + "items": "File", + "type": "array" + }, + "id": "#AnnotateReads.cwl/R1_error_count_table" + }, + { + "type": { + "items": "File", + "type": "array" + }, + "id": "#AnnotateReads.cwl/R1_read_count_breakdown" + }, + { + "type": { + "items": "File", + "type": "array" + }, + "id": "#AnnotateReads.cwl/R2_Annotation" + }, + { + "type": { + "items": "File", + "type": "array" + }, + "id": "#AnnotateReads.cwl/R2_Quality_Metrics" + }, + { + "inputBinding": { + "prefix": "--run-metadata" + }, + "type": "File", + "id": "#AnnotateReads.cwl/Run_Metadata" + }, + { + "inputBinding": { + "prefix": "--target-gene-mapping" + }, + "type": [ + "null", + "File" + ], + "id": "#AnnotateReads.cwl/Target_Gene_Mapping" + } + ], + "requirements": [ + { + "class": "InitialWorkDirRequirement", + "listing": [ + { + "writable": false, + "entry": "${\n function getPaths(inputs, attribute) {\n var fp_arr = []\n for (var i = 0; i < inputs[attribute].length; i++)\n {\n fp_arr.push(inputs[attribute][i].path);\n }\n return fp_arr;\n }\n var paths = {}\n paths['annotR1'] = getPaths(inputs, 'R1_Annotation')\n paths['R1_error_count_table'] = getPaths(inputs, 'R1_error_count_table')\n paths['R1_read_count_breakdown'] = getPaths(inputs, 'R1_read_count_breakdown')\n paths['annotR2'] = getPaths(inputs, 'R2_Annotation')\n paths['r2_quality_metrics_fps'] = getPaths(inputs, 'R2_Quality_Metrics')\n if(inputs.Filter_Metrics[0] != null){\n paths['filtering_stat_files'] = getPaths(inputs, 'Filter_Metrics')\n }\n var paths_json = JSON.stringify(paths);\n return paths_json;\n}", + "entryname": "manifest.json" + } + ] + }, + { + "class": "InlineJavascriptRequirement" + }, + { + "envDef": [ + { + "envName": "CORES_ALLOCATED_PER_CWL_PROCESS", + "envValue": "4" + } + ], + "class": "EnvVarRequirement" + } + ], + "outputs": [ + { + "outputBinding": { + "glob": "*_Annotation_Read.csv.gz" + }, + "type": [ + "null", + "File" + ], + "id": "#AnnotateReads.cwl/Annotation_Read" + }, + { + "outputBinding": { + "glob": "*read1_error_rate_archive*" + }, + "type": "File", + "id": "#AnnotateReads.cwl/Read1_error_rate" + }, + { + "outputBinding": { + "glob": "*_SeqMetrics.csv.gz" + }, + "type": "File", + "id": "#AnnotateReads.cwl/Seq_Metrics" + }, + { + "outputBinding": { + "glob": "*Sorted_Valid_Reads.csv.*" + }, + "type": { + "items": "File", + "type": "array" + }, + "id": "#AnnotateReads.cwl/Valid_Reads" + }, + { + "outputBinding": { + "glob": "num_vdj_reads.json", + "loadContents": true, + "outputEval": "${ if (!self[0]) { return 0; } return parseInt(JSON.parse(self[0].contents).BCR); }" + }, + "type": "int", + "id": "#AnnotateReads.cwl/num_valid_ig_reads" + }, + { + "outputBinding": { + "glob": "num_vdj_reads.json", + "loadContents": true, + "outputEval": "${ if (!self[0]) { return 0; } return parseInt(JSON.parse(self[0].contents).TCR); }" + }, + "type": "int", + "id": "#AnnotateReads.cwl/num_valid_tcr_reads" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#AnnotateReads.cwl/output" + }, + { + "outputBinding": { + "glob": "*_VDJ_IG_Valid_Reads.fastq.gz" + }, + "type": [ + "null", + "File" + ], + "id": "#AnnotateReads.cwl/validIgReads" + }, + { + "outputBinding": { + "glob": "*_VDJ_TCR_Valid_Reads.fastq.gz" + }, + "type": [ + "null", + "File" + ], + "id": "#AnnotateReads.cwl/validTcrReads" + } + ], + "baseCommand": [ + "mist_annotate_reads.py" + ], + "class": "CommandLineTool", + "id": "#AnnotateReads.cwl" + }, + { + "inputs": [ + { + "type": { + "items": "File", + "type": "array" + }, + "id": "#BundleLogs.cwl/log_files" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + }, + { + "class": "MultipleInputFeatureRequirement" + } + ], + "outputs": [ + { + "type": "Directory", + "id": "#BundleLogs.cwl/logs_dir" + } + ], + "class": "ExpressionTool", + "expression": "${\n /* shamelly cribbed from https://gist.github.com/jcxplorer/823878 */\n function uuid() {\n var uuid = \"\", i, random;\n for (i = 0; i < 32; i++) {\n random = Math.random() * 16 | 0;\n if (i == 8 || i == 12 || i == 16 || i == 20) {\n uuid += \"-\";\n }\n uuid += (i == 12 ? 4 : (i == 16 ? (random & 3 | 8) : random)).toString(16);\n }\n return uuid;\n }\n var listing = [];\n for (var i = 0; i < inputs.log_files.length; i++) {\n var log_file = inputs.log_files[i];\n log_file.basename = uuid() + \"-\" + log_file.basename;\n listing.push(log_file);\n }\n return ({\n logs_dir: {\n class: \"Directory\",\n basename: \"Logs\",\n listing: listing\n }\n });\n}", + "id": "#BundleLogs.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "position": 0 + }, + "type": [ + "null", + "File" + ], + "id": "#Cell_Classifier.cwl/molsPerCellMatrix" + } + ], + "requirements": [ + ], + "outputs": [ + { + "outputBinding": { + "glob": "*cell_type_experimental.csv" + }, + "type": [ + "null", + "File" + ], + "id": "#Cell_Classifier.cwl/cellTypePredictions" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#Cell_Classifier.cwl/log" + } + ], + "baseCommand": [ + "mist_cell_classifier.py" + ], + "class": "CommandLineTool", + "id": "#Cell_Classifier.cwl" + }, + { + "inputs": [ + { + "doc": "The minimum size (megabytes) of a file that should get split into chunks of a size designated in NumRecordsPerSplit\n", + "inputBinding": { + "prefix": "--min-split-size" + }, + "type": [ + "null", + "int" + ], + "id": "#CheckFastqs.cwl/MinChunkSize" + }, + { + "inputBinding": { + "prefix": "--reads", + "itemSeparator": "," + }, + "type": { + "items": "File", + "type": "array" + }, + "id": "#CheckFastqs.cwl/Reads" + }, + { + "inputBinding": { + "prefix": "--subsample" + }, + "type": [ + "null", + "float" + ], + "id": "#CheckFastqs.cwl/Subsample" + }, + { + "inputBinding": { + "prefix": "--subsample-seed" + }, + "type": [ + "null", + "int" + ], + "id": "#CheckFastqs.cwl/Subsample_Seed" + }, + { + "inputBinding": { + "prefix": "--subsample-seed" + }, + "type": [ + "null", + "int" + ], + "id": "#CheckFastqs.cwl/UserInputSubsampleSeed" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "doc": "CheckFastqs does several quality control routines including: (1) ensuring that read pair file names are formatted correctly and contain a read pair mate; (2) disambiguating the \"Subsample Reads\" input and; (3) if not provided, generating a subsampling seed that the downstream instances can use.\n", + "baseCommand": [ + "mist_check_fastqs.py" + ], + "id": "#CheckFastqs.cwl", + "outputs": [ + { + "outputBinding": { + "glob": "bead_version.json", + "loadContents": true, + "outputEval": "$(JSON.parse(self[0].contents).BeadVersion)\n" + }, + "type": { + "items": { + "fields": [ + { + "type": "string", + "name": "#CheckFastqs.cwl/Bead_Version/Library" + }, + { + "type": "string", + "name": "#CheckFastqs.cwl/Bead_Version/bead_version" + } + ], + "type": "record" + }, + "type": "array" + }, + "id": "#CheckFastqs.cwl/Bead_Version" + }, + { + "outputBinding": { + "glob": "fastq_read_pairs.json", + "loadContents": true, + "outputEval": "$(JSON.parse(self[0].contents).fastq_read_pairs)\n" + }, + "type": { + "items": { + "fields": [ + { + "type": "string", + "name": "#CheckFastqs.cwl/FastqReadPairs/filename" + }, + { + "type": "string", + "name": "#CheckFastqs.cwl/FastqReadPairs/readFlag" + }, + { + "type": "string", + "name": "#CheckFastqs.cwl/FastqReadPairs/readPairId" + }, + { + "type": "string", + "name": "#CheckFastqs.cwl/FastqReadPairs/library" + }, + { + "type": "string", + "name": "#CheckFastqs.cwl/FastqReadPairs/beadVersion" + } + ], + "type": "record" + }, + "type": "array" + }, + "id": "#CheckFastqs.cwl/FastqReadPairs" + }, + { + "outputBinding": { + "glob": "files_to_skip_split_and_subsample.json", + "loadContents": true, + "outputEval": "$(JSON.parse(self[0].contents).files_to_skip_split_and_subsample)\n" + }, + "type": { + "items": "string", + "type": "array" + }, + "id": "#CheckFastqs.cwl/FilesToSkipSplitAndSubsample" + }, + { + "outputBinding": { + "glob": "fastq_read_pairs.json", + "loadContents": true, + "outputEval": "${\n var obj = JSON.parse(self[0].contents);\n var libraries = [];\n var pairs = obj.fastq_read_pairs\n for (var i in pairs){\n if (pairs[i][\"readFlag\"] == \"R1\"){\n if (libraries.indexOf(pairs[i][\"library\"]) == -1){ \n libraries.push(pairs[i][\"library\"]);\n }\n }\n }\n libraries.sort();\n return(libraries.toString())\n}\n" + }, + "type": [ + "null", + "string" + ], + "id": "#CheckFastqs.cwl/Libraries" + }, + { + "outputBinding": { + "outputEval": "${ \n var reads = []; \n var files = inputs.Reads\n for (var i in files){\n reads.push(files[i][\"basename\"]);\n }\n reads.sort();\n return(reads)\n}\n" + }, + "type": [ + "null", + { + "items": "string", + "type": "array" + } + ], + "id": "#CheckFastqs.cwl/ReadsList" + }, + { + "outputBinding": { + "glob": "subsampling_info.json", + "loadContents": true, + "outputEval": "$(JSON.parse(self[0].contents).subsampling_seed)\n" + }, + "type": "int", + "id": "#CheckFastqs.cwl/SubsampleSeed" + }, + { + "outputBinding": { + "glob": "subsampling_info.json", + "loadContents": true, + "outputEval": "$(JSON.parse(self[0].contents).subsampling_ratio)\n" + }, + "type": "float", + "id": "#CheckFastqs.cwl/SubsamplingRatio" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#CheckFastqs.cwl/log" + } + ], + "class": "CommandLineTool" + }, + { + "inputs": [ + { + "inputBinding": { + "prefix": "--abseq-reference", + "itemSeparator": "," + }, + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#CheckReference.cwl/AbSeq_Reference" + }, + { + "inputBinding": { + "prefix": "--putative-cell-call" + }, + "type": [ + "null", + "int" + ], + "id": "#CheckReference.cwl/Putative_Cell_Call" + }, + { + "inputBinding": { + "prefix": "--reference", + "itemSeparator": "," + }, + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#CheckReference.cwl/Reference" + }, + { + "inputBinding": { + "prefix": "--run-metadata" + }, + "type": "File", + "id": "#CheckReference.cwl/Run_Metadata" + }, + { + "inputBinding": { + "prefix": "--supplemental-reference", + "itemSeparator": "," + }, + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#CheckReference.cwl/Supplemental_Reference" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "outputs": [ + { + "outputBinding": { + "glob": "combined_extra_seq.fasta" + }, + "type": [ + "null", + "File" + ], + "id": "#CheckReference.cwl/Extra_Seqs" + }, + { + "outputBinding": { + "glob": "full-gene-list.json" + }, + "type": [ + "null", + "File" + ], + "id": "#CheckReference.cwl/Full_Genes" + }, + { + "outputBinding": { + "glob": "*gtf", + "outputEval": "${\n // get the WTA modified GTF with extra seqs\n if (self.length == 1) {\n return self;\n // there is no modified GTF\n } else if (self.length == 0) {\n // if Reference is null (i.e. AbSeq_Reference only), return no GTF\n if (inputs.Reference === null) {\n return null;\n } else {\n // get the original WTA GTF without extra seqs\n for (var i = 0; i < inputs.Reference.length; i++) {\n if (inputs.Reference[i].basename.toLowerCase().indexOf('gtf') !== -1) {\n return inputs.Reference[i];\n }\n }\n // return no GTF for Targeted\n return null\n }\n }\n}\n" + }, + "type": [ + "null", + "File" + ], + "id": "#CheckReference.cwl/GTF" + }, + { + "outputBinding": { + "glob": "*-annot.*", + "outputEval": "${\n if (self.length == 1) { // Targeted\n return self;\n } else if (self.length == 0){ // WTA without extra seqs or targets\n for (var i = 0; i < inputs.Reference.length; i++) {\n if (inputs.Reference[i].basename.toLowerCase().indexOf('tar.gz') !== -1) {\n return inputs.Reference[i];\n }\n }\n return null\n }\n}\n" + }, + "type": "File", + "id": "#CheckReference.cwl/Index" + }, + { + "outputBinding": { + "glob": "target-gene.json" + }, + "type": [ + "null", + "File" + ], + "id": "#CheckReference.cwl/Target_Gene_Mapping" + }, + { + "outputBinding": { + "glob": "transcript_length.json" + }, + "type": [ + "null", + "File" + ], + "id": "#CheckReference.cwl/Transcript_Length" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#CheckReference.cwl/output" + } + ], + "baseCommand": [ + "mist_check_references.py" + ], + "class": "CommandLineTool", + "id": "#CheckReference.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "prefix": "--cell-order" + }, + "type": "File", + "id": "#DensetoSparse.cwl/Cell_Order" + }, + { + "inputBinding": { + "prefix": "--dense-data-table" + }, + "type": [ + "null", + "File" + ], + "id": "#DensetoSparse.cwl/Dense_Data_Table" + }, + { + "inputBinding": { + "prefix": "--gene-list" + }, + "type": "File", + "id": "#DensetoSparse.cwl/Gene_List" + }, + { + "inputBinding": { + "prefix": "--run-metadata" + }, + "type": "File", + "id": "#DensetoSparse.cwl/Run_Metadata" + } + ], + "requirements": [ + ], + "outputs": [ + { + "outputBinding": { + "glob": "*.csv.gz" + }, + "type": "File", + "id": "#DensetoSparse.cwl/Data_Tables" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#DensetoSparse.cwl/output" + } + ], + "baseCommand": [ + "mist_dense_to_sparse.py" + ], + "class": "CommandLineTool", + "id": "#DensetoSparse.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "position": 1 + }, + "type": [ + "null", + "File" + ], + "id": "#DensetoSparseFile.cwl/GDT_cell_order" + } + ], + "requirements": [ + ], + "stdout": "cell_order.json", + "outputs": [ + { + "type": "stdout", + "id": "#DensetoSparseFile.cwl/Cell_Order" + } + ], + "baseCommand": "cat", + "id": "#DensetoSparseFile.cwl", + "class": "CommandLineTool" + }, + { + "inputs": [ + { + "inputBinding": { + "prefix": "--full-gene-list" + }, + "type": [ + "null", + "File" + ], + "id": "#GetDataTable.cwl/Full_Genes" + }, + { + "inputBinding": { + "prefix": "--gene-status", + "itemSeparator": "," + }, + "type": { + "items": "File", + "type": "array" + }, + "id": "#GetDataTable.cwl/Gene_Status_List" + }, + { + "inputBinding": { + "prefix": "--max-count", + "itemSeparator": "," + }, + "type": { + "items": "int", + "type": "array" + }, + "id": "#GetDataTable.cwl/Max_Count" + }, + { + "inputBinding": { + "prefix": "--mol-annot", + "itemSeparator": "," + }, + "type": { + "items": "File", + "type": "array" + }, + "id": "#GetDataTable.cwl/Molecule_Annotation_List" + }, + { + "inputBinding": { + "prefix": "--putative-cell-call" + }, + "type": [ + "null", + "int" + ], + "id": "#GetDataTable.cwl/Putative_Cell_Call" + }, + { + "inputBinding": { + "prefix": "--run-metadata" + }, + "type": "File", + "id": "#GetDataTable.cwl/Run_Metadata" + }, + { + "inputBinding": { + "prefix": "--seq-metrics" + }, + "type": "File", + "id": "#GetDataTable.cwl/Seq_Metrics" + }, + { + "inputBinding": { + "prefix": "--tag-names", + "itemSeparator": "," + }, + "type": [ + "null", + { + "items": "string", + "type": "array" + } + ], + "id": "#GetDataTable.cwl/Tag_Names" + }, + { + "type": { + "items": "int", + "type": "array" + }, + "id": "#GetDataTable.cwl/Total_Molecules" + } + ], + "requirements": [ + { + "ramMin": "${return Math.min(Math.max(parseInt(inputs.Total_Molecules.reduce(function(a, b) { return a + b; }, 0) / 4000), 32000), 768000);}", + "class": "ResourceRequirement" + }, + { + "class": "InlineJavascriptRequirement" + } + ], + "outputs": [ + { + "outputBinding": { + "glob": "metrics-files.tar.gz" + }, + "type": "File", + "id": "#GetDataTable.cwl/Annot_Files" + }, + { + "outputBinding": { + "glob": "Annotations/*_Bioproduct_Stats.csv" + }, + "type": [ + "null", + "File" + ], + "id": "#GetDataTable.cwl/Bioproduct_Stats" + }, + { + "outputBinding": { + "glob": "Cell_Label_Filtering/*.png" + }, + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#GetDataTable.cwl/Cell_Label_Filter" + }, + { + "outputBinding": { + "glob": "cell_order.json" + }, + "type": "File", + "id": "#GetDataTable.cwl/Cell_Order" + }, + { + "outputBinding": { + "glob": "*_Annotation_Molecule_corrected.csv.gz" + }, + "type": "File", + "id": "#GetDataTable.cwl/Corrected_Molecular_Annotation" + }, + { + "outputBinding": { + "glob": "*PerCell_Dense.csv.gz" + }, + "type": { + "items": "File", + "type": "array" + }, + "id": "#GetDataTable.cwl/Dense_Data_Tables" + }, + { + "outputBinding": { + "glob": "*PerCell_Unfiltered_Dense.csv.gz" + }, + "type": { + "items": "File", + "type": "array" + }, + "id": "#GetDataTable.cwl/Dense_Data_Tables_Unfiltered" + }, + { + "outputBinding": { + "glob": "*_Expression_Data.st.gz" + }, + "type": "File", + "id": "#GetDataTable.cwl/Expression_Data" + }, + { + "outputBinding": { + "glob": "*_Expression_Data_Unfiltered.st.gz" + }, + "type": [ + "null", + "File" + ], + "id": "#GetDataTable.cwl/Expression_Data_Unfiltered" + }, + { + "outputBinding": { + "glob": "gene_list.json" + }, + "type": "File", + "id": "#GetDataTable.cwl/Gene_List" + }, + { + "outputBinding": { + "glob": "Annotations/*_Annotation_Molecule.csv.gz" + }, + "type": "File", + "id": "#GetDataTable.cwl/Molecular_Annotation" + }, + { + "outputBinding": { + "glob": "Cell_Label_Filtering/*_Protein_Aggregates_Experimental.csv" + }, + "type": [ + "null", + "File" + ], + "id": "#GetDataTable.cwl/Protein_Aggregates_Experimental" + }, + { + "outputBinding": { + "glob": "Cell_Label_Filtering/*_Putative_Cells_Origin.csv" + }, + "type": [ + "null", + "File" + ], + "id": "#GetDataTable.cwl/Putative_Cells_Origin" + }, + { + "outputBinding": { + "glob": "Annotations/*_Annotation_Molecule_Trueno.csv" + }, + "type": [ + "null", + "File" + ], + "id": "#GetDataTable.cwl/Tag_Annotation" + }, + { + "outputBinding": { + "glob": "Trueno/*_Calls.csv" + }, + "type": [ + "null", + "File" + ], + "id": "#GetDataTable.cwl/Tag_Calls" + }, + { + "outputBinding": { + "glob": "Trueno/*csv" + }, + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#GetDataTable.cwl/Trueno_out" + }, + { + "outputBinding": { + "glob": "Trueno/*zip" + }, + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#GetDataTable.cwl/Trueno_zip" + }, + { + "outputBinding": { + "glob": "Annotations/*_UMI_Adjusted_CellLabel_Stats.csv" + }, + "type": [ + "null", + "File" + ], + "id": "#GetDataTable.cwl/UMI_Adjusted_CellLabel_Stats" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#GetDataTable.cwl/output" + } + ], + "baseCommand": [ + "mist_get_datatables.py" + ], + "class": "CommandLineTool", + "id": "#GetDataTable.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "position": 1 + }, + "type": "File", + "id": "#IndexBAM.cwl/BamFile" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "stdout": "samtools_index.log", + "outputs": [ + { + "outputBinding": { + "glob": "*.bai" + }, + "type": "File", + "id": "#IndexBAM.cwl/Index" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#IndexBAM.cwl/log" + } + ], + "baseCommand": [ + "samtools", + "index" + ], + "id": "#IndexBAM.cwl", + "arguments": [ + { + "position": 2, + "valueFrom": "${\n return inputs.BamFile.basename + \".bai\"\n}" + } + ], + "class": "CommandLineTool" + }, + { + "inputs": [], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "outputs": [ + { + "type": [ + "null", + "int" + ], + "id": "#InternalSettings.cwl/AbSeq_UMI" + }, + { + "type": [ + "null", + "int" + ], + "id": "#InternalSettings.cwl/Barcode_Num" + }, + { + "type": [ + "null", + "File" + ], + "id": "#InternalSettings.cwl/Extra_Seqs" + }, + { + "type": [ + "null", + "int" + ], + "id": "#InternalSettings.cwl/Label_Version" + }, + { + "type": [ + "null", + "int" + ], + "id": "#InternalSettings.cwl/MinChunkSize" + }, + { + "type": [ + "null", + "long" + ], + "id": "#InternalSettings.cwl/NumRecordsPerSplit" + }, + { + "type": [ + "null", + "boolean" + ], + "id": "#InternalSettings.cwl/Read_Filter_Off" + }, + { + "type": [ + "null", + "string" + ], + "id": "#InternalSettings.cwl/Seq_Run" + }, + { + "type": [ + "null", + "float" + ], + "id": "#InternalSettings.cwl/Subsample_Tags" + }, + { + "type": [ + "null", + "boolean" + ], + "id": "#InternalSettings.cwl/Target_analysis" + }, + { + "type": [ + "null", + "boolean" + ], + "id": "#InternalSettings.cwl/Use_DBEC" + }, + { + "type": [ + "null", + "float" + ], + "id": "#InternalSettings.cwl/VDJ_JGene_Evalue" + }, + { + "type": [ + "null", + "float" + ], + "id": "#InternalSettings.cwl/VDJ_VGene_Evalue" + } + ], + "class": "ExpressionTool", + "expression": "${\n var internalInputs = [\n '_Label_Version',\n '_Read_Filter_Off',\n '_Barcode_Num',\n '_Seq_Run',\n '_AbSeq_UMI',\n '_Use_DBEC',\n '_Extra_Seqs',\n '_MinChunkSize',\n '_NumRecordsPerSplit',\n '_Target_analysis',\n '_Subsample_Tags',\n '_VDJ_VGene_Evalue',\n '_VDJ_JGene_Evalue',\n ];\n var internalOutputs = {}\n for (var i = 0; i < internalInputs.length; i++) {\n var internalInput = internalInputs[i];\n var internalOutput = internalInput.slice(1); // remove leading underscore\n if (inputs.hasOwnProperty(internalInput)) {\n internalOutputs[internalOutput] = inputs[internalInput]; // if input specified, redirect to output\n } else {\n internalOutputs[internalOutput] = null; // if input not specified, provide a null\n }\n }\n return internalOutputs;\n}", + "id": "#InternalSettings.cwl" + }, + { + "inputs": [ + { + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#main/AbSeq_Reference", + "label": "AbSeq Reference" + }, + { + "doc": "Determine putative cells using only the basic algorithm (minimum second derivative along the cumulative reads curve). The refined algorithm attempts to remove false positives and recover false negatives, but may not be ideal for certain complex mixtures of cell types. Does not apply if Exact Cell Count is set.", + "type": [ + "null", + "boolean" + ], + "id": "#main/Basic_Algo_Only", + "label": "Disable Refined Putative Cell Calling" + }, + { + "doc": "Set a specific number (>=1) of cells as putative, based on those with the highest error-corrected read count", + "type": [ + "null", + "int" + ], + "id": "#main/Exact_Cell_Count", + "label": "Exact Cell Count" + }, + { + "doc": "Specify the data to be used for putative cell calling. mRNA is the default selected option. AbSeq (Experimental) is for troubleshooting only.", + "type": [ + "null", + { + "symbols": [ + "#main/Putative_Cell_Call/Putative_Cell_Call/mRNA", + "#main/Putative_Cell_Call/Putative_Cell_Call/AbSeq_Experimental" + ], + "type": "enum", + "name": "#main/Putative_Cell_Call/Putative_Cell_Call" + } + ], + "id": "#main/Putative_Cell_Call", + "label": "Putative Cell Calling" + }, + { + "type": { + "items": "File", + "type": "array" + }, + "id": "#main/Reads", + "label": "Reads" + }, + { + "doc": "A fasta file containing the mRNA panel amplicon targets used in the experiment", + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#main/Reference", + "label": "Reference" + }, + { + "doc": "This is a name for output files, for example Experiment1_Metrics_Summary.csv. Default if left empty is to name run based on a library. Any non-alpha numeric characters will be changed to a hyphen.", + "type": [ + "null", + "string" + ], + "id": "#main/Run_Name", + "label": "Run Name" + }, + { + "doc": "The sample multiplexing kit version. This option should only be set for a multiplexed experiment.", + "type": [ + "null", + { + "symbols": [ + "#main/Sample_Tags_Version/Sample_Tags_Version/human", + "#main/Sample_Tags_Version/Sample_Tags_Version/hs", + "#main/Sample_Tags_Version/Sample_Tags_Version/mouse", + "#main/Sample_Tags_Version/Sample_Tags_Version/mm", + "#main/Sample_Tags_Version/Sample_Tags_Version/custom" + ], + "type": "enum", + "name": "#main/Sample_Tags_Version/Sample_Tags_Version" + } + ], + "id": "#main/Sample_Tags_Version", + "label": "Sample Tags Version" + }, + { + "doc": "Any number of reads >1 or a fraction between 0 < n < 1 to indicate the percentage of reads to subsample.\n", + "type": [ + "null", + "float" + ], + "id": "#main/Subsample", + "label": "Subsample Reads" + }, + { + "doc": "For use when replicating a previous subsampling run only. Obtain the seed generated from the log file for the SplitFastQ node.\n", + "type": [ + "null", + "int" + ], + "id": "#main/Subsample_seed", + "label": "Subsample Seed" + }, + { + "doc": "Specify the Sample Tag number followed by - (hyphen) and a sample name to appear in the output files. For example: 4-Ramos. Should be alpha numeric, with + - and _ allowed. Any special characters: &, (), [], {}, <>, ?, | will be corrected to underscores. \n", + "type": [ + "null", + { + "items": "string", + "type": "array" + } + ], + "id": "#main/Tag_Names", + "label": "Tag Names" + }, + { + "doc": "The VDJ species and chain types. This option should only be set for VDJ experiment.", + "type": [ + "null", + { + "symbols": [ + "#main/VDJ_Version/VDJ_Version/human", + "#main/VDJ_Version/VDJ_Version/hs", + "#main/VDJ_Version/VDJ_Version/mouse", + "#main/VDJ_Version/VDJ_Version/mm", + "#main/VDJ_Version/VDJ_Version/humanBCR", + "#main/VDJ_Version/VDJ_Version/humanTCR", + "#main/VDJ_Version/VDJ_Version/mouseBCR", + "#main/VDJ_Version/VDJ_Version/mouseTCR" + ], + "type": "enum", + "name": "#main/VDJ_Version/VDJ_Version" + } + ], + "id": "#main/VDJ_Version", + "label": "VDJ Species Version" + } + ], + "requirements": [ + { + "class": "ScatterFeatureRequirement" + }, + { + "class": "MultipleInputFeatureRequirement" + }, + { + "class": "SubworkflowFeatureRequirement" + }, + { + "class": "StepInputExpressionRequirement" + }, + { + "class": "InlineJavascriptRequirement" + } + ], + "doc": "The BD Rhapsody\u2122 assays are used to create sequencing libraries from single cell transcriptomes.\n\nAfter sequencing, the analysis pipeline takes the FASTQ files and a reference file for gene alignment. The pipeline generates molecular counts per cell, read counts per cell, metrics, and an alignment file.", + "label": "BD Rhapsody\u2122 Targeted Analysis Pipeline", + "steps": [ + { + "run": "#AddtoBam.cwl", + "scatter": [ + "#main/AddtoBam/R2_Bam" + ], + "in": [ + { + "source": "#main/AnnotateR1/Annotation_R1", + "id": "#main/AddtoBam/Annotation_R1" + }, + { + "source": "#main/Dense_to_Sparse_File/Cell_Order", + "id": "#main/AddtoBam/Cell_Order" + }, + { + "source": "#main/GetDataTable/Corrected_Molecular_Annotation", + "id": "#main/AddtoBam/Molecular_Annotation" + }, + { + "source": "#main/AnnotateR2/R2_Bam", + "id": "#main/AddtoBam/R2_Bam" + }, + { + "source": "#main/Metadata_Settings/Run_Metadata", + "id": "#main/AddtoBam/Run_Metadata" + }, + { + "source": "#main/GetDataTable/Tag_Calls", + "id": "#main/AddtoBam/Tag_Calls" + }, + { + "source": "#main/CheckReference/Target_Gene_Mapping", + "id": "#main/AddtoBam/Target_Gene_Mapping" + } + ], + "requirements": [ + { + "ramMin": 16000, + "class": "ResourceRequirement" + } + ], + "id": "#main/AddtoBam", + "out": [ + "#main/AddtoBam/Annotated_Bam", + "#main/AddtoBam/output" + ] + }, + { + "run": "#AlignR2.cwl", + "out": [ + "#main/AlignR2/Alignments", + "#main/AlignR2/output" + ], + "requirements": [ + { + "coresMin": 8, + "ramMin": 4000, + "class": "ResourceRequirement" + } + ], + "id": "#main/AlignR2", + "in": [ + { + "source": "#main/CheckReference/Extra_Seqs", + "id": "#main/AlignR2/Extra_Seqs" + }, + { + "source": "#main/CheckReference/Index", + "id": "#main/AlignR2/Index" + }, + { + "source": "#main/QualityFilterOuter/R2", + "id": "#main/AlignR2/R2" + }, + { + "source": "#main/Metadata_Settings/Run_Metadata", + "id": "#main/AlignR2/Run_Metadata" + } + ] + }, + { + "run": "#AnnotateMolecules.cwl", + "scatter": [ + "#main/AnnotateMolecules/Valids" + ], + "in": [ + { + "source": "#main/Internal_Settings/AbSeq_UMI", + "id": "#main/AnnotateMolecules/AbSeq_UMI" + }, + { + "source": "#main/Metadata_Settings/Run_Metadata", + "id": "#main/AnnotateMolecules/Run_Metadata" + }, + { + "source": "#main/Internal_Settings/Use_DBEC", + "id": "#main/AnnotateMolecules/Use_DBEC" + }, + { + "source": "#main/AnnotateReads/Valid_Reads", + "id": "#main/AnnotateMolecules/Valids" + } + ], + "requirements": [ + { + "ramMin": 32000, + "class": "ResourceRequirement" + } + ], + "id": "#main/AnnotateMolecules", + "out": [ + "#main/AnnotateMolecules/Mol_Annot_List", + "#main/AnnotateMolecules/Gene_Status_List", + "#main/AnnotateMolecules/Max_Count", + "#main/AnnotateMolecules/Total_Molecules", + "#main/AnnotateMolecules/output" + ] + }, + { + "id": "#main/AnnotateR1", + "out": [ + "#main/AnnotateR1/Annotation_R1", + "#main/AnnotateR1/R1_error_count_table", + "#main/AnnotateR1/R1_read_count_breakdown", + "#main/AnnotateR1/output" + ], + "run": "#AnnotateR1.cwl", + "scatter": [ + "#main/AnnotateR1/R1" + ], + "in": [ + { + "source": "#main/QualityFilterOuter/Filter_Metrics", + "id": "#main/AnnotateR1/Filter_Metrics" + }, + { + "source": "#main/QualityFilterOuter/R1", + "id": "#main/AnnotateR1/R1" + }, + { + "source": "#main/Metadata_Settings/Run_Metadata", + "id": "#main/AnnotateR1/Run_Metadata" + } + ] + }, + { + "run": "#AnnotateR2.cwl", + "scatter": [ + "#main/AnnotateR2/R2_zip" + ], + "in": [ + { + "source": "#main/CheckReference/Extra_Seqs", + "id": "#main/AnnotateR2/Extra_Seqs" + }, + { + "source": "#main/CheckReference/GTF", + "id": "#main/AnnotateR2/GTF_Annotation" + }, + { + "source": "#main/AlignR2/Alignments", + "id": "#main/AnnotateR2/R2_zip" + }, + { + "source": "#main/Metadata_Settings/Run_Metadata", + "id": "#main/AnnotateR2/Run_Metadata" + }, + { + "source": "#main/CheckReference/Transcript_Length", + "id": "#main/AnnotateR2/Transcript_Length" + } + ], + "requirements": [ + { + "ramMin": 4000, + "class": "ResourceRequirement" + } + ], + "id": "#main/AnnotateR2", + "out": [ + "#main/AnnotateR2/Annot_R2", + "#main/AnnotateR2/R2_Bam", + "#main/AnnotateR2/GTF", + "#main/AnnotateR2/output", + "#main/AnnotateR2/R2_Quality_Metrics" + ] + }, + { + "run": "#AnnotateReads.cwl", + "out": [ + "#main/AnnotateReads/Seq_Metrics", + "#main/AnnotateReads/Valid_Reads", + "#main/AnnotateReads/Read1_error_rate", + "#main/AnnotateReads/Annotation_Read", + "#main/AnnotateReads/output", + "#main/AnnotateReads/validTcrReads", + "#main/AnnotateReads/validIgReads", + "#main/AnnotateReads/num_valid_tcr_reads", + "#main/AnnotateReads/num_valid_ig_reads" + ], + "requirements": [ + { + "ramMin": 32000, + "class": "ResourceRequirement" + } + ], + "id": "#main/AnnotateReads", + "in": [ + { + "source": "#main/Internal_Settings/AbSeq_UMI", + "id": "#main/AnnotateReads/AbSeq_UMI" + }, + { + "source": "#main/CheckReference/Extra_Seqs", + "id": "#main/AnnotateReads/Extra_Seqs" + }, + { + "source": "#main/QualityFilterOuter/Filter_Metrics", + "id": "#main/AnnotateReads/Filter_Metrics" + }, + { + "source": "#main/Putative_Cell_Calling_Settings/Putative_Cell_Call", + "id": "#main/AnnotateReads/Putative_Cell_Call" + }, + { + "source": "#main/AnnotateR1/Annotation_R1", + "id": "#main/AnnotateReads/R1_Annotation" + }, + { + "source": "#main/AnnotateR1/R1_error_count_table", + "id": "#main/AnnotateReads/R1_error_count_table" + }, + { + "source": "#main/AnnotateR1/R1_read_count_breakdown", + "id": "#main/AnnotateReads/R1_read_count_breakdown" + }, + { + "source": "#main/AnnotateR2/Annot_R2", + "id": "#main/AnnotateReads/R2_Annotation" + }, + { + "source": "#main/AnnotateR2/R2_Quality_Metrics", + "id": "#main/AnnotateReads/R2_Quality_Metrics" + }, + { + "source": "#main/Metadata_Settings/Run_Metadata", + "id": "#main/AnnotateReads/Run_Metadata" + }, + { + "source": "#main/CheckReference/Target_Gene_Mapping", + "id": "#main/AnnotateReads/Target_Gene_Mapping" + } + ] + }, + { + "out": [ + "#main/BundleLogs/logs_dir" + ], + "run": "#BundleLogs.cwl", + "id": "#main/BundleLogs", + "in": [ + { + "source": [ + "#main/AnnotateReads/output", + "#main/AnnotateR1/output", + "#main/AnnotateR2/output", + "#main/CheckReference/output", + "#main/GetDataTable/output", + "#main/Metrics/output", + "#main/AddtoBam/output", + "#main/AnnotateMolecules/output", + "#main/QualityFilterOuter/output", + "#main/CheckFastqs/log", + "#main/SplitAndSubsample/log", + "#main/MergeBAM/log", + "#main/Dense_to_Sparse_Datatable/output", + "#main/Dense_to_Sparse_Datatable_Unfiltered/output", + "#main/IndexBAM/log", + "#main/CellClassifier/log" + ], + "linkMerge": "merge_flattened", + "id": "#main/BundleLogs/log_files" + } + ] + }, + { + "run": "#Cell_Classifier.cwl", + "out": [ + "#main/CellClassifier/cellTypePredictions", + "#main/CellClassifier/log" + ], + "requirements": [ + { + "ramMin": 4000, + "class": "ResourceRequirement" + } + ], + "id": "#main/CellClassifier", + "in": [ + { + "source": "#main/FindDataTableForCellClassifier/molsPerCellMatrixForCellClassifier", + "id": "#main/CellClassifier/molsPerCellMatrix" + } + ] + }, + { + "out": [ + "#main/CheckFastqs/SubsampleSeed", + "#main/CheckFastqs/SubsamplingRatio", + "#main/CheckFastqs/FilesToSkipSplitAndSubsample", + "#main/CheckFastqs/FastqReadPairs", + "#main/CheckFastqs/Bead_Version", + "#main/CheckFastqs/Libraries", + "#main/CheckFastqs/ReadsList", + "#main/CheckFastqs/log" + ], + "run": "#CheckFastqs.cwl", + "id": "#main/CheckFastqs", + "in": [ + { + "source": "#main/Internal_Settings/MinChunkSize", + "id": "#main/CheckFastqs/MinChunkSize" + }, + { + "source": "#main/Reads", + "id": "#main/CheckFastqs/Reads" + }, + { + "source": "#main/Subsample_Settings/Subsample_Reads", + "id": "#main/CheckFastqs/Subsample" + }, + { + "source": "#main/Subsample_Settings/Subsample_Seed", + "id": "#main/CheckFastqs/Subsample_Seed" + } + ] + }, + { + "run": "#CheckReference.cwl", + "out": [ + "#main/CheckReference/Index", + "#main/CheckReference/Extra_Seqs", + "#main/CheckReference/Full_Genes", + "#main/CheckReference/output", + "#main/CheckReference/Transcript_Length", + "#main/CheckReference/GTF", + "#main/CheckReference/Target_Gene_Mapping" + ], + "requirements": [ + { + "ramMin": 1000, + "class": "ResourceRequirement" + } + ], + "id": "#main/CheckReference", + "in": [ + { + "source": "#main/AbSeq_Reference", + "id": "#main/CheckReference/AbSeq_Reference" + }, + { + "source": "#main/Putative_Cell_Calling_Settings/Putative_Cell_Call", + "id": "#main/CheckReference/Putative_Cell_Call" + }, + { + "source": "#main/Reference", + "id": "#main/CheckReference/Reference" + }, + { + "source": "#main/Metadata_Settings/Run_Metadata", + "id": "#main/CheckReference/Run_Metadata" + } + ] + }, + { + "run": "#DensetoSparse.cwl", + "scatter": [ + "#main/Dense_to_Sparse_Datatable/Dense_Data_Table" + ], + "in": [ + { + "source": "#main/Dense_to_Sparse_File/Cell_Order", + "id": "#main/Dense_to_Sparse_Datatable/Cell_Order" + }, + { + "source": "#main/GetDataTable/Dense_Data_Tables", + "id": "#main/Dense_to_Sparse_Datatable/Dense_Data_Table" + }, + { + "source": "#main/GetDataTable/Gene_List", + "id": "#main/Dense_to_Sparse_Datatable/Gene_List" + }, + { + "source": "#main/Metadata_Settings/Run_Metadata", + "id": "#main/Dense_to_Sparse_Datatable/Run_Metadata" + } + ], + "requirements": [ + { + "ramMin": 16000, + "class": "ResourceRequirement" + } + ], + "id": "#main/Dense_to_Sparse_Datatable", + "out": [ + "#main/Dense_to_Sparse_Datatable/Data_Tables", + "#main/Dense_to_Sparse_Datatable/output" + ] + }, + { + "run": "#DensetoSparse.cwl", + "scatter": [ + "#main/Dense_to_Sparse_Datatable_Unfiltered/Dense_Data_Table" + ], + "in": [ + { + "source": "#main/GetDataTable/Cell_Order", + "id": "#main/Dense_to_Sparse_Datatable_Unfiltered/Cell_Order" + }, + { + "source": "#main/GetDataTable/Dense_Data_Tables_Unfiltered", + "id": "#main/Dense_to_Sparse_Datatable_Unfiltered/Dense_Data_Table" + }, + { + "source": "#main/GetDataTable/Gene_List", + "id": "#main/Dense_to_Sparse_Datatable_Unfiltered/Gene_List" + }, + { + "source": "#main/Metadata_Settings/Run_Metadata", + "id": "#main/Dense_to_Sparse_Datatable_Unfiltered/Run_Metadata" + } + ], + "requirements": [ + { + "ramMin": 16000, + "class": "ResourceRequirement" + } + ], + "id": "#main/Dense_to_Sparse_Datatable_Unfiltered", + "out": [ + "#main/Dense_to_Sparse_Datatable_Unfiltered/Data_Tables", + "#main/Dense_to_Sparse_Datatable_Unfiltered/output" + ] + }, + { + "out": [ + "#main/Dense_to_Sparse_File/Cell_Order" + ], + "run": "#DensetoSparseFile.cwl", + "id": "#main/Dense_to_Sparse_File", + "in": [ + { + "source": "#main/GetDataTable/Cell_Order", + "id": "#main/Dense_to_Sparse_File/GDT_cell_order" + } + ] + }, + { + "out": [ + "#main/FindDataTableForCellClassifier/molsPerCellMatrixForCellClassifier" + ], + "run": { + "cwlVersion": "v1.0", + "inputs": [ + { + "type": { + "items": "File", + "type": "array" + }, + "id": "#main/FindDataTableForCellClassifier/c174ddb5-9fdb-4dae-a1c5-b5666a631cc7/dataTables" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "outputs": [ + { + "type": "File", + "id": "#main/FindDataTableForCellClassifier/c174ddb5-9fdb-4dae-a1c5-b5666a631cc7/molsPerCellMatrixForCellClassifier" + } + ], + "id": "#main/FindDataTableForCellClassifier/c174ddb5-9fdb-4dae-a1c5-b5666a631cc7", + "expression": "${\n for (var i = 0; i < inputs.dataTables.length; i++) {\n var dataTable = inputs.dataTables[i];\n if (dataTable.basename.indexOf(\"_RSEC_MolsPerCell.csv\") >= 0) {\n return({molsPerCellMatrixForCellClassifier: dataTable});\n }\n }\n return({molsPerCellMatrixForCellClassifier: null});\n}", + "class": "ExpressionTool" + }, + "id": "#main/FindDataTableForCellClassifier", + "in": [ + { + "source": "#main/Dense_to_Sparse_Datatable/Data_Tables", + "id": "#main/FindDataTableForCellClassifier/dataTables" + } + ] + }, + { + "out": [ + "#main/GetDataTable/Tag_Calls", + "#main/GetDataTable/Molecular_Annotation", + "#main/GetDataTable/Corrected_Molecular_Annotation", + "#main/GetDataTable/Tag_Annotation", + "#main/GetDataTable/Annot_Files", + "#main/GetDataTable/Cell_Label_Filter", + "#main/GetDataTable/Dense_Data_Tables", + "#main/GetDataTable/Dense_Data_Tables_Unfiltered", + "#main/GetDataTable/Expression_Data", + "#main/GetDataTable/Expression_Data_Unfiltered", + "#main/GetDataTable/Bioproduct_Stats", + "#main/GetDataTable/UMI_Adjusted_CellLabel_Stats", + "#main/GetDataTable/Putative_Cells_Origin", + "#main/GetDataTable/Protein_Aggregates_Experimental", + "#main/GetDataTable/Trueno_out", + "#main/GetDataTable/Trueno_zip", + "#main/GetDataTable/output", + "#main/GetDataTable/Cell_Order", + "#main/GetDataTable/Gene_List" + ], + "run": "#GetDataTable.cwl", + "id": "#main/GetDataTable", + "in": [ + { + "source": "#main/CheckReference/Full_Genes", + "id": "#main/GetDataTable/Full_Genes" + }, + { + "source": "#main/AnnotateMolecules/Gene_Status_List", + "id": "#main/GetDataTable/Gene_Status_List" + }, + { + "source": "#main/AnnotateMolecules/Max_Count", + "id": "#main/GetDataTable/Max_Count" + }, + { + "source": "#main/AnnotateMolecules/Mol_Annot_List", + "id": "#main/GetDataTable/Molecule_Annotation_List" + }, + { + "source": "#main/Putative_Cell_Calling_Settings/Putative_Cell_Call", + "id": "#main/GetDataTable/Putative_Cell_Call" + }, + { + "source": "#main/Metadata_Settings/Run_Metadata", + "id": "#main/GetDataTable/Run_Metadata" + }, + { + "source": "#main/AnnotateReads/Seq_Metrics", + "id": "#main/GetDataTable/Seq_Metrics" + }, + { + "source": "#main/Multiplexing_Settings/Tag_Sample_Names", + "id": "#main/GetDataTable/Tag_Names" + }, + { + "source": "#main/AnnotateMolecules/Total_Molecules", + "id": "#main/GetDataTable/Total_Molecules" + } + ] + }, + { + "out": [ + "#main/IndexBAM/Index", + "#main/IndexBAM/log" + ], + "run": "#IndexBAM.cwl", + "id": "#main/IndexBAM", + "in": [ + { + "source": "#main/MergeBAM/Final_Bam", + "id": "#main/IndexBAM/BamFile" + } + ] + }, + { + "out": [ + "#main/Internal_Settings/Read_Filter_Off", + "#main/Internal_Settings/Barcode_Num", + "#main/Internal_Settings/Seq_Run", + "#main/Internal_Settings/AbSeq_UMI", + "#main/Internal_Settings/Use_DBEC", + "#main/Internal_Settings/Extra_Seqs", + "#main/Internal_Settings/MinChunkSize", + "#main/Internal_Settings/NumRecordsPerSplit", + "#main/Internal_Settings/Target_analysis", + "#main/Internal_Settings/Subsample_Tags", + "#main/Internal_Settings/VDJ_VGene_Evalue", + "#main/Internal_Settings/VDJ_JGene_Evalue" + ], + "in": [], + "run": "#InternalSettings.cwl", + "id": "#main/Internal_Settings", + "label": "Internal Settings" + }, + { + "out": [ + "#main/MergeBAM/Final_Bam", + "#main/MergeBAM/log" + ], + "run": "#MergeBAM.cwl", + "id": "#main/MergeBAM", + "in": [ + { + "source": "#main/AddtoBam/Annotated_Bam", + "id": "#main/MergeBAM/BamFiles" + }, + { + "source": "#main/Metadata_Settings/Run_Base_Name", + "id": "#main/MergeBAM/Run_Name" + }, + { + "source": "#main/Multiplexing_Settings/Sample_Tags_Version", + "id": "#main/MergeBAM/Sample_Tags_Version" + } + ] + }, + { + "out": [ + "#main/MergeMultiplex/Multiplex_out" + ], + "run": { + "cwlVersion": "v1.0", + "inputs": [ + { + "type": { + "items": [ + "null", + "File" + ], + "type": "array" + }, + "id": "#main/MergeMultiplex/8e7f752c-1505-4d65-81b3-f91fcd83b679/SampleTag_Files" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "outputs": [ + { + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#main/MergeMultiplex/8e7f752c-1505-4d65-81b3-f91fcd83b679/Multiplex_out" + } + ], + "id": "#main/MergeMultiplex/8e7f752c-1505-4d65-81b3-f91fcd83b679", + "expression": "${\n var fp_array = [];\n for (var i = 0; i < inputs.SampleTag_Files.length; i++) {\n var fp = inputs.SampleTag_Files[i];\n if (fp != null) {\n fp_array.push(fp);\n }\n }\n return({\"Multiplex_out\": fp_array});\n}", + "class": "ExpressionTool" + }, + "id": "#main/MergeMultiplex", + "in": [ + { + "source": [ + "#main/GetDataTable/Trueno_out", + "#main/Metrics/Sample_Tag_Out" + ], + "linkMerge": "merge_flattened", + "id": "#main/MergeMultiplex/SampleTag_Files" + } + ] + }, + { + "out": [ + "#main/Metadata_Settings/Run_Metadata", + "#main/Metadata_Settings/Run_Base_Name" + ], + "run": "#Metadata.cwl", + "id": "#main/Metadata_Settings", + "in": [ + { + "source": "#main/AbSeq_Reference", + "id": "#main/Metadata_Settings/AbSeq_Reference" + }, + { + "valueFrom": "Targeted", + "id": "#main/Metadata_Settings/Assay" + }, + { + "source": "#main/Putative_Cell_Calling_Settings/Basic_Algo_Only", + "id": "#main/Metadata_Settings/Basic_Algo_Only" + }, + { + "source": "#main/CheckFastqs/Bead_Version", + "id": "#main/Metadata_Settings/Bead_Version" + }, + { + "source": "#main/Putative_Cell_Calling_Settings/Exact_Cell_Count", + "id": "#main/Metadata_Settings/Exact_Cell_Count" + }, + { + "source": "#main/CheckFastqs/Libraries", + "id": "#main/Metadata_Settings/Libraries" + }, + { + "valueFrom": "BD Rhapsody Targeted Analysis Pipeline", + "id": "#main/Metadata_Settings/Pipeline_Name" + }, + { + "source": "#main/Version/version", + "id": "#main/Metadata_Settings/Pipeline_Version" + }, + { + "source": "#main/Putative_Cell_Calling_Settings/Putative_Cell_Call", + "id": "#main/Metadata_Settings/Putative_Cell_Call" + }, + { + "source": "#main/CheckFastqs/ReadsList", + "id": "#main/Metadata_Settings/Reads" + }, + { + "source": "#main/Reference", + "id": "#main/Metadata_Settings/Reference" + }, + { + "source": "#main/Name_Settings/Run_Name", + "id": "#main/Metadata_Settings/Run_Name" + }, + { + "source": "#main/Multiplexing_Settings/Tag_Sample_Names", + "id": "#main/Metadata_Settings/Sample_Tag_Names" + }, + { + "source": "#main/Multiplexing_Settings/Sample_Tags_Version", + "id": "#main/Metadata_Settings/Sample_Tags_Version" + }, + { + "source": "#main/Start_Time/Start_Time", + "id": "#main/Metadata_Settings/Start_Time" + }, + { + "source": "#main/Subsample_Settings/Subsample_Reads", + "id": "#main/Metadata_Settings/Subsample" + }, + { + "source": "#main/Subsample_Settings/Subsample_Seed", + "id": "#main/Metadata_Settings/Subsample_Seed" + }, + { + "source": "#main/VDJ_Settings/VDJ_Version", + "id": "#main/Metadata_Settings/VDJ_Version" + } + ] + }, + { + "out": [ + "#main/Metrics/Metrics_Summary", + "#main/Metrics/Metrics_Archive", + "#main/Metrics/output", + "#main/Metrics/Sample_Tag_Out" + ], + "run": "#Metrics.cwl", + "id": "#main/Metrics", + "in": [ + { + "source": "#main/GetDataTable/Annot_Files", + "id": "#main/Metrics/Annot_Files" + }, + { + "source": "#main/AnnotateReads/Read1_error_rate", + "id": "#main/Metrics/Read1_error_rate" + }, + { + "source": "#main/Metadata_Settings/Run_Metadata", + "id": "#main/Metrics/Run_Metadata" + }, + { + "source": "#main/GetDataTable/Trueno_zip", + "id": "#main/Metrics/Sample_Tag_Archives" + }, + { + "source": "#main/Internal_Settings/Seq_Run", + "id": "#main/Metrics/Seq_Run" + }, + { + "source": "#main/GetDataTable/UMI_Adjusted_CellLabel_Stats", + "id": "#main/Metrics/UMI_Adjusted_Stats" + }, + { + "source": "#main/VDJ_Compile_Results/vdjMetricsJson", + "id": "#main/Metrics/vdjMetricsJson" + } + ] + }, + { + "out": [ + "#main/Multiplexing_Settings/Tag_Sample_Names", + "#main/Multiplexing_Settings/Sample_Tags_Version" + ], + "in": [ + { + "source": "#main/Sample_Tags_Version", + "id": "#main/Multiplexing_Settings/_Sample_Tags_Version" + }, + { + "source": "#main/Tag_Names", + "id": "#main/Multiplexing_Settings/_Tag_Sample_Names" + } + ], + "run": "#MultiplexingSettings.cwl", + "id": "#main/Multiplexing_Settings", + "label": "Multiplexing Settings" + }, + { + "out": [ + "#main/Name_Settings/Run_Name" + ], + "in": [ + { + "source": "#main/Run_Name", + "id": "#main/Name_Settings/_Run_Name" + } + ], + "run": "#NameSettings.cwl", + "id": "#main/Name_Settings", + "label": "Name Settings" + }, + { + "out": [ + "#main/PairReadFiles/ReadPairs" + ], + "run": "#PairReadFiles.cwl", + "id": "#main/PairReadFiles", + "in": [ + { + "source": "#main/CheckFastqs/FastqReadPairs", + "id": "#main/PairReadFiles/FastqReadPairs" + }, + { + "source": "#main/SplitAndSubsample/SplitAndSubsampledFastqs", + "id": "#main/PairReadFiles/Reads" + } + ] + }, + { + "out": [ + "#main/Putative_Cell_Calling_Settings/Putative_Cell_Call", + "#main/Putative_Cell_Calling_Settings/Exact_Cell_Count", + "#main/Putative_Cell_Calling_Settings/Basic_Algo_Only" + ], + "in": [ + { + "source": "#main/Basic_Algo_Only", + "id": "#main/Putative_Cell_Calling_Settings/_Basic_Algo_Only" + }, + { + "source": "#main/Exact_Cell_Count", + "id": "#main/Putative_Cell_Calling_Settings/_Exact_Cell_Count" + }, + { + "source": "#main/Putative_Cell_Call", + "id": "#main/Putative_Cell_Calling_Settings/_Putative_Cell_Call" + } + ], + "run": "#PutativeCellSettings.cwl", + "id": "#main/Putative_Cell_Calling_Settings", + "label": "Putative Cell Calling Settings" + }, + { + "out": [ + "#main/QualityFilterOuter/Filter_Metrics", + "#main/QualityFilterOuter/R1", + "#main/QualityFilterOuter/R2", + "#main/QualityFilterOuter/output" + ], + "run": "#QualityFilterOuter.cwl", + "id": "#main/QualityFilterOuter", + "in": [ + { + "source": "#main/Metadata_Settings/Run_Metadata", + "id": "#main/QualityFilterOuter/Run_Metadata" + }, + { + "source": "#main/PairReadFiles/ReadPairs", + "id": "#main/QualityFilterOuter/Split_Read_Pairs" + } + ] + }, + { + "out": [ + "#main/SplitAndSubsample/SplitAndSubsampledFastqs", + "#main/SplitAndSubsample/log" + ], + "run": "#SplitAndSubsample.cwl", + "id": "#main/SplitAndSubsample", + "in": [ + { + "source": "#main/Reads", + "id": "#main/SplitAndSubsample/Fastqs" + }, + { + "source": "#main/CheckFastqs/FilesToSkipSplitAndSubsample", + "id": "#main/SplitAndSubsample/FilesToSkipSplitAndSubsample" + }, + { + "source": "#main/Internal_Settings/NumRecordsPerSplit", + "id": "#main/SplitAndSubsample/NumRecordsPerSplit" + }, + { + "source": "#main/CheckFastqs/SubsamplingRatio", + "id": "#main/SplitAndSubsample/SubsampleRatio" + }, + { + "source": "#main/CheckFastqs/SubsampleSeed", + "id": "#main/SplitAndSubsample/SubsampleSeed" + } + ] + }, + { + "out": [ + "#main/Start_Time/Start_Time" + ], + "run": { + "cwlVersion": "v1.0", + "inputs": [], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "outputs": [ + { + "type": "string", + "id": "#main/Start_Time/dc4e9fd7-92dc-4aca-80ad-76601aaaf6ad/Start_Time" + } + ], + "id": "#main/Start_Time/dc4e9fd7-92dc-4aca-80ad-76601aaaf6ad", + "expression": "${ \n var today = new Date();\n var date = today.toString()\n return ({Start_Time: date});\n} ", + "class": "ExpressionTool" + }, + "id": "#main/Start_Time", + "in": [] + }, + { + "out": [ + "#main/Subsample_Settings/Subsample_Reads", + "#main/Subsample_Settings/Subsample_Seed" + ], + "in": [ + { + "source": "#main/Subsample", + "id": "#main/Subsample_Settings/_Subsample_Reads" + }, + { + "source": "#main/Subsample_seed", + "id": "#main/Subsample_Settings/_Subsample_Seed" + } + ], + "run": "#SubsampleSettings.cwl", + "id": "#main/Subsample_Settings", + "label": "Subsample Settings" + }, + { + "out": [ + "#main/Uncompress_Datatables/Uncompressed_Data_Tables", + "#main/Uncompress_Datatables/Uncompressed_Expression_Matrix" + ], + "run": "#UncompressDatatables.cwl", + "id": "#main/Uncompress_Datatables", + "in": [ + { + "source": "#main/Dense_to_Sparse_Datatable/Data_Tables", + "id": "#main/Uncompress_Datatables/Compressed_Data_Table" + }, + { + "source": "#main/GetDataTable/Expression_Data", + "id": "#main/Uncompress_Datatables/Compressed_Expression_Matrix" + } + ] + }, + { + "out": [ + "#main/VDJ_Assemble_and_Annotate_Contigs_IG/igCalls" + ], + "run": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl", + "id": "#main/VDJ_Assemble_and_Annotate_Contigs_IG", + "in": [ + { + "source": "#main/VDJ_Preprocess_Reads_IG/RSEC_Reads_Fastq", + "id": "#main/VDJ_Assemble_and_Annotate_Contigs_IG/RSEC_Reads_Fastq" + }, + { + "source": "#main/VDJ_Settings/VDJ_Version", + "id": "#main/VDJ_Assemble_and_Annotate_Contigs_IG/VDJ_Version" + }, + { + "source": "#main/VDJ_Preprocess_Reads_IG/num_cores", + "id": "#main/VDJ_Assemble_and_Annotate_Contigs_IG/num_cores" + } + ] + }, + { + "out": [ + "#main/VDJ_Assemble_and_Annotate_Contigs_TCR/tcrCalls" + ], + "run": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl", + "id": "#main/VDJ_Assemble_and_Annotate_Contigs_TCR", + "in": [ + { + "source": "#main/VDJ_Preprocess_Reads_TCR/RSEC_Reads_Fastq", + "id": "#main/VDJ_Assemble_and_Annotate_Contigs_TCR/RSEC_Reads_Fastq" + }, + { + "source": "#main/VDJ_Settings/VDJ_Version", + "id": "#main/VDJ_Assemble_and_Annotate_Contigs_TCR/VDJ_Version" + }, + { + "source": "#main/VDJ_Preprocess_Reads_TCR/num_cores", + "id": "#main/VDJ_Assemble_and_Annotate_Contigs_TCR/num_cores" + } + ] + }, + { + "out": [ + "#main/VDJ_Compile_Results/vdjCellsDatatable", + "#main/VDJ_Compile_Results/vdjCellsDatatableUncorrected", + "#main/VDJ_Compile_Results/vdjDominantContigs", + "#main/VDJ_Compile_Results/vdjUnfilteredContigs", + "#main/VDJ_Compile_Results/vdjMetricsJson", + "#main/VDJ_Compile_Results/vdjMetricsCsv", + "#main/VDJ_Compile_Results/vdjReadsPerCellByChainTypeFigure" + ], + "run": "#VDJ_Compile_Results.cwl", + "id": "#main/VDJ_Compile_Results", + "in": [ + { + "source": "#main/AnnotateReads/Seq_Metrics", + "id": "#main/VDJ_Compile_Results/Seq_Metrics" + }, + { + "source": "#main/CellClassifier/cellTypePredictions", + "id": "#main/VDJ_Compile_Results/cellTypeMapping" + }, + { + "valueFrom": "$([])", + "id": "#main/VDJ_Compile_Results/chainsToIgnore" + }, + { + "source": "#main/Internal_Settings/VDJ_JGene_Evalue", + "id": "#main/VDJ_Compile_Results/evalueJgene" + }, + { + "source": "#main/Internal_Settings/VDJ_VGene_Evalue", + "id": "#main/VDJ_Compile_Results/evalueVgene" + }, + { + "source": "#main/VDJ_GatherIGCalls/gatheredCalls", + "id": "#main/VDJ_Compile_Results/igCalls" + }, + { + "source": "#main/Metadata_Settings/Run_Metadata", + "id": "#main/VDJ_Compile_Results/metadata" + }, + { + "source": "#main/GetDataTable/Cell_Order", + "id": "#main/VDJ_Compile_Results/putativeCells" + }, + { + "source": "#main/VDJ_GatherTCRCalls/gatheredCalls", + "id": "#main/VDJ_Compile_Results/tcrCalls" + }, + { + "source": "#main/VDJ_Settings/VDJ_Version", + "id": "#main/VDJ_Compile_Results/vdjVersion" + } + ] + }, + { + "out": [ + "#main/VDJ_GatherIGCalls/gatheredCalls" + ], + "run": "#VDJ_GatherCalls.cwl", + "id": "#main/VDJ_GatherIGCalls", + "in": [ + { + "source": "#main/VDJ_Assemble_and_Annotate_Contigs_IG/igCalls", + "id": "#main/VDJ_GatherIGCalls/theCalls" + } + ] + }, + { + "out": [ + "#main/VDJ_GatherTCRCalls/gatheredCalls" + ], + "run": "#VDJ_GatherCalls.cwl", + "id": "#main/VDJ_GatherTCRCalls", + "in": [ + { + "source": "#main/VDJ_Assemble_and_Annotate_Contigs_TCR/tcrCalls", + "id": "#main/VDJ_GatherTCRCalls/theCalls" + } + ] + }, + { + "out": [ + "#main/VDJ_Preprocess_Reads_IG/RSEC_Reads_Fastq", + "#main/VDJ_Preprocess_Reads_IG/num_splits", + "#main/VDJ_Preprocess_Reads_IG/num_cores" + ], + "run": "#VDJ_Preprocess_Reads.cwl", + "id": "#main/VDJ_Preprocess_Reads_IG", + "in": [ + { + "source": "#main/AnnotateReads/validIgReads", + "id": "#main/VDJ_Preprocess_Reads_IG/Valid_Reads_Fastq" + }, + { + "source": "#main/AnnotateReads/num_valid_ig_reads", + "id": "#main/VDJ_Preprocess_Reads_IG/num_valid_reads" + }, + { + "valueFrom": "BCR", + "id": "#main/VDJ_Preprocess_Reads_IG/vdj_type" + } + ] + }, + { + "out": [ + "#main/VDJ_Preprocess_Reads_TCR/RSEC_Reads_Fastq", + "#main/VDJ_Preprocess_Reads_TCR/num_splits", + "#main/VDJ_Preprocess_Reads_TCR/num_cores" + ], + "run": "#VDJ_Preprocess_Reads.cwl", + "id": "#main/VDJ_Preprocess_Reads_TCR", + "in": [ + { + "source": "#main/AnnotateReads/validTcrReads", + "id": "#main/VDJ_Preprocess_Reads_TCR/Valid_Reads_Fastq" + }, + { + "source": "#main/AnnotateReads/num_valid_tcr_reads", + "id": "#main/VDJ_Preprocess_Reads_TCR/num_valid_reads" + }, + { + "valueFrom": "TCR", + "id": "#main/VDJ_Preprocess_Reads_TCR/vdj_type" + } + ] + }, + { + "out": [ + "#main/VDJ_Settings/VDJ_Version" + ], + "in": [ + { + "source": "#main/VDJ_Version", + "id": "#main/VDJ_Settings/_VDJ_Version" + } + ], + "run": "#VDJ_Settings.cwl", + "id": "#main/VDJ_Settings", + "label": "VDJ Settings" + }, + { + "out": [ + "#main/Version/version" + ], + "run": "#Version.cwl", + "id": "#main/Version", + "in": [] + } + ], + "outputs": [ + { + "outputSource": "#main/GetDataTable/Bioproduct_Stats", + "type": [ + "null", + "File" + ], + "id": "#main/Bioproduct_Stats", + "label": "Bioproduct Statistics" + }, + { + "outputSource": "#main/GetDataTable/Cell_Label_Filter", + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#main/Cell_Label_Filter", + "label": "Cell Label Filter" + }, + { + "outputSource": "#main/Uncompress_Datatables/Uncompressed_Data_Tables", + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#main/Data_Tables", + "label": "Data Tables" + }, + { + "outputSource": "#main/Dense_to_Sparse_Datatable_Unfiltered/Data_Tables", + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#main/Data_Tables_Unfiltered", + "label": "Unfiltered Data Tables" + }, + { + "outputSource": "#main/Uncompress_Datatables/Uncompressed_Expression_Matrix", + "type": [ + "null", + "File" + ], + "id": "#main/Expression_Data", + "label": "Expression Matrix" + }, + { + "outputSource": "#main/GetDataTable/Expression_Data_Unfiltered", + "type": [ + "null", + "File" + ], + "id": "#main/Expression_Data_Unfiltered", + "label": "Unfiltered Expression Matrix" + }, + { + "outputSource": "#main/MergeBAM/Final_Bam", + "type": "File", + "id": "#main/Final_Bam", + "label": "Final BAM File" + }, + { + "outputSource": "#main/IndexBAM/Index", + "type": "File", + "id": "#main/Final_Bam_Index", + "label": "Final BAM Index" + }, + { + "outputSource": "#main/CellClassifier/cellTypePredictions", + "type": [ + "null", + "File" + ], + "id": "#main/ImmuneCellClassification(Experimental)", + "label": "Immune Cell Classification (Experimental)" + }, + { + "outputSource": "#main/BundleLogs/logs_dir", + "type": "Directory", + "id": "#main/Logs", + "label": "Pipeline Logs" + }, + { + "outputSource": "#main/Metrics/Metrics_Summary", + "type": "File", + "id": "#main/Metrics_Summary", + "label": "Metrics Summary" + }, + { + "outputSource": "#main/MergeMultiplex/Multiplex_out", + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#main/Multiplex" + }, + { + "outputSource": "#main/GetDataTable/Protein_Aggregates_Experimental", + "type": [ + "null", + "File" + ], + "id": "#main/Protein_Aggregates_Experimental", + "label": "Protein Aggregates (Experimental)" + }, + { + "outputSource": "#main/GetDataTable/Putative_Cells_Origin", + "type": [ + "null", + "File" + ], + "id": "#main/Putative_Cells_Origin", + "label": "Putative Cells Origin" + }, + { + "outputSource": "#main/VDJ_Compile_Results/vdjCellsDatatable", + "type": [ + "null", + "File" + ], + "id": "#main/vdjCellsDatatable", + "label": "vdjCellsDatatable" + }, + { + "outputSource": "#main/VDJ_Compile_Results/vdjCellsDatatableUncorrected", + "type": [ + "null", + "File" + ], + "id": "#main/vdjCellsDatatableUncorrected", + "label": "vdjCellsDatatableUncorrected" + }, + { + "outputSource": "#main/VDJ_Compile_Results/vdjDominantContigs", + "type": [ + "null", + "File" + ], + "id": "#main/vdjDominantContigs", + "label": "vdjDominantContigs" + }, + { + "outputSource": "#main/VDJ_Compile_Results/vdjMetricsCsv", + "type": [ + "null", + "File" + ], + "id": "#main/vdjMetricsCsv", + "label": "vdjMetricsCsv" + }, + { + "outputSource": "#main/VDJ_Compile_Results/vdjUnfilteredContigs", + "type": [ + "null", + "File" + ], + "id": "#main/vdjUnfilteredContigs", + "label": "vdjUnfilteredContigs" + } + ], + "id": "#main", + "class": "Workflow" + }, + { + "inputs": [ + { + "inputBinding": { + "position": 1 + }, + "type": { + "items": "File", + "type": "array" + }, + "id": "#MergeBAM.cwl/BamFiles" + }, + { + "type": [ + "null", + "string" + ], + "id": "#MergeBAM.cwl/Run_Name" + }, + { + "type": [ + "null", + "string" + ], + "id": "#MergeBAM.cwl/Sample_Tags_Version" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "stdout": "samtools_merge.log", + "outputs": [ + { + "outputBinding": { + "glob": "*_final.BAM" + }, + "type": "File", + "id": "#MergeBAM.cwl/Final_Bam" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#MergeBAM.cwl/log" + } + ], + "baseCommand": [ + "samtools", + "merge" + ], + "id": "#MergeBAM.cwl", + "arguments": [ + { + "prefix": "-@", + "valueFrom": "$(runtime.cores)" + }, + { + "position": 0, + "valueFrom": "${\n if (inputs.Sample_Tags_Version) {\n return \"Combined_\" + inputs.Run_Name + \"_final.BAM\"\n } else {\n return inputs.Run_Name + \"_final.BAM\"\n }\n}" + } + ], + "class": "CommandLineTool", + "hints": [ + { + "coresMin": 4, + "class": "ResourceRequirement" + } + ] + }, + { + "inputs": [ + { + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#Metadata.cwl/AbSeq_Reference" + }, + { + "type": "string", + "id": "#Metadata.cwl/Assay" + }, + { + "type": [ + "null", + "boolean" + ], + "id": "#Metadata.cwl/Basic_Algo_Only" + }, + { + "type": { + "items": { + "fields": [ + { + "type": "string", + "name": "#Metadata.cwl/Bead_Version/Library" + }, + { + "type": "string", + "name": "#Metadata.cwl/Bead_Version/bead_version" + } + ], + "type": "record" + }, + "type": "array" + }, + "id": "#Metadata.cwl/Bead_Version" + }, + { + "type": [ + "null", + "int" + ], + "id": "#Metadata.cwl/Exact_Cell_Count" + }, + { + "type": [ + "null", + "int" + ], + "id": "#Metadata.cwl/Label_Version" + }, + { + "type": [ + "null", + "string" + ], + "id": "#Metadata.cwl/Libraries" + }, + { + "type": "string", + "id": "#Metadata.cwl/Pipeline_Name" + }, + { + "type": "string", + "id": "#Metadata.cwl/Pipeline_Version" + }, + { + "type": [ + "null", + "int" + ], + "id": "#Metadata.cwl/Putative_Cell_Call" + }, + { + "type": [ + "null", + "boolean" + ], + "id": "#Metadata.cwl/Read_Filter_Off" + }, + { + "type": [ + "null", + { + "items": "string", + "type": "array" + } + ], + "id": "#Metadata.cwl/Reads" + }, + { + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#Metadata.cwl/Reference" + }, + { + "type": [ + "null", + "string" + ], + "id": "#Metadata.cwl/Run_Name" + }, + { + "type": [ + "null", + { + "items": "string", + "type": "array" + } + ], + "id": "#Metadata.cwl/Sample_Tag_Names" + }, + { + "type": [ + "null", + "string" + ], + "id": "#Metadata.cwl/Sample_Tags_Version" + }, + { + "type": [ + "null", + "string" + ], + "id": "#Metadata.cwl/Start_Time" + }, + { + "type": [ + "null", + "float" + ], + "id": "#Metadata.cwl/Subsample" + }, + { + "type": [ + "null", + "int" + ], + "id": "#Metadata.cwl/Subsample_Seed" + }, + { + "type": [ + "null", + "float" + ], + "id": "#Metadata.cwl/Subsample_Tags" + }, + { + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#Metadata.cwl/Supplemental_Reference" + }, + { + "type": [ + "null", + "string" + ], + "id": "#Metadata.cwl/VDJ_Version" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "stdout": "run_metadata.json", + "outputs": [ + { + "outputBinding": { + "outputEval": "${ \n var name = inputs.Run_Name;\n if (name == null){\n var libraries = inputs.Libraries;\n name = libraries.split(',')[0];\n } \n return(name)\n} \n" + }, + "type": [ + "null", + "string" + ], + "id": "#Metadata.cwl/Run_Base_Name" + }, + { + "type": "stdout", + "id": "#Metadata.cwl/Run_Metadata" + } + ], + "baseCommand": "echo", + "id": "#Metadata.cwl", + "arguments": [ + { + "prefix": "" + }, + { + "shellQuote": true, + "valueFrom": "${\n var metadata = inputs;\n var all_bv = {};\n var customer_bv = \"Original (V1)\";\n for (var i = 0; i < inputs.Bead_Version.length; i++) {\n var BeadVer = inputs.Bead_Version[i];\n var Library = BeadVer[\"Library\"];\n var bead_version = BeadVer[\"bead_version\"];\n all_bv[Library] = bead_version \n var short_bv = bead_version.substring(0, 2);\n if (short_bv == \"V2\"){\n var customer_bv = \"Enhanced (V2)\";\n }\n }\n metadata[\"Bead_Version\"] = all_bv;\n\n var pipeline_name = inputs.Pipeline_Name;\n var assay = inputs.Assay;\n var version = inputs.Pipeline_Version;\n var time = inputs.Start_Time;\n var libraries = inputs.Libraries.split(\",\");\n var i = 0;\n var reference_list = []\n if(inputs.Reference != null){\n reference_list = reference_list.concat(inputs.Reference);\n }\n if(inputs.AbSeq_Reference != null){\n reference_list = reference_list.concat(inputs.AbSeq_Reference);\n }\n\n var supplemental = \"\"\n if(inputs.Supplemental_Reference != null){\n supplemental = \"; Supplemental_Reference - \" + inputs.Supplemental_Reference[0][\"basename\"];\n }\n var references = [];\n for (i = 0; i< reference_list.length; i++) {\n if(reference_list[i] != null){\n references.push(reference_list[i][\"basename\"]);\n }\n }\n var parameters = [];\n if(inputs.Sample_Tags_Version != null){\n var tags = \"Sample Tag Version: \" + inputs.Sample_Tags_Version;\n } else{ \n var tags = \"Sample Tag Version: None\";\n }\n parameters.push(tags);\n\n if(inputs.Sample_Tag_Names != null){\n var tag_names = inputs.Sample_Tag_Names.join(\" ; \")\n var tag_list = \"Sample Tag Names: \" + tag_names;\n } else{\n var tag_list = \"Sample Tag Names: None\";\n }\n parameters.push(tag_list);\n \n if(inputs.VDJ_Version != null){\n var vdj = \"VDJ Version: \" + inputs.VDJ_Version;\n } else{ \n var vdj = \"VDJ Version: None\";\n }\n parameters.push(vdj)\n\n if(inputs.Subsample != null){\n var subsample = \"Subsample: \" + inputs.Subsample;\n } else{ \n var subsample = \"Subsample: None\";\n } \n parameters.push(subsample);\n\n if(inputs.Putative_Cell_Call == 1){\n var call = \"Putative Cell Calling Type: AbSeq\";\n } else{ \n var call = \"Putative Cell Calling Type: mRNA\";\n } \n parameters.push(call)\n\n if(inputs.Basic_Algo_Only){\n var basic = \"Refined Putative Cell Calling: Off\";\n } else{ \n var basic = \"Refined Putative Cell Calling: On\";\n } \n parameters.push(basic)\n\n if(inputs.Exact_Cell_Count != null){\n var cells = \"Exact Cell Count: \" + inputs.Exact_Cell_Count;\n } else{ \n var cells = \"Exact Cell Count: None\";\n } \n parameters.push(cells)\n\n var name = inputs.Run_Name;\n if (name == null){\n var libraries = inputs.Libraries.split(',');\n name = libraries[0];\n } \n\n var header = [\"####################\"];\n header.push(\"## \" + pipeline_name + \" Version \" + version);\n header.push(\"## Analysis Date - \" + time);\n header.push(\"## Libraries - \" + libraries.join(' | ') + \" - Bead version detected: \" + customer_bv);\n header.push(\"## References - \" + references.join(' | ') + supplemental);\n header.push(\"## Parameters - \" + parameters.join(' | '));\n header.push(\"####################\");\n metadata[\"Output_Header\"] = header;\n metadata[\"Run_Base_Name\"] = name;\n var metadata_json = JSON.stringify(metadata);\n return metadata_json;\n}\n" + } + ], + "class": "CommandLineTool" + }, + { + "inputs": [ + { + "inputBinding": { + "prefix": "--annot-files" + }, + "type": "File", + "id": "#Metrics.cwl/Annot_Files" + }, + { + "inputBinding": { + "prefix": "--read1-error-rate" + }, + "type": "File", + "id": "#Metrics.cwl/Read1_error_rate" + }, + { + "inputBinding": { + "prefix": "--run-metadata" + }, + "type": "File", + "id": "#Metrics.cwl/Run_Metadata" + }, + { + "inputBinding": { + "prefix": "--sample-tag-archives", + "itemSeparator": "," + }, + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#Metrics.cwl/Sample_Tag_Archives" + }, + { + "inputBinding": { + "prefix": "--seq-run" + }, + "type": [ + "null", + "string" + ], + "id": "#Metrics.cwl/Seq_Run" + }, + { + "inputBinding": { + "prefix": "--umi-adjusted-stats" + }, + "type": [ + "null", + "File" + ], + "id": "#Metrics.cwl/UMI_Adjusted_Stats" + }, + { + "inputBinding": { + "prefix": "--vdj-metrics-fp" + }, + "type": [ + "null", + "File" + ], + "id": "#Metrics.cwl/vdjMetricsJson" + } + ], + "requirements": [ + ], + "outputs": [ + { + "outputBinding": { + "glob": "internal-metrics-archive.tar.gz" + }, + "type": "File", + "id": "#Metrics.cwl/Metrics_Archive" + }, + { + "outputBinding": { + "glob": "*_Metrics_Summary.csv" + }, + "type": "File", + "id": "#Metrics.cwl/Metrics_Summary" + }, + { + "outputBinding": { + "glob": "*.zip" + }, + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#Metrics.cwl/Sample_Tag_Out" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#Metrics.cwl/output" + } + ], + "baseCommand": [ + "mist_metrics.py" + ], + "class": "CommandLineTool", + "id": "#Metrics.cwl" + }, + { + "inputs": [ + { + "default": "Targeted", + "type": "string", + "id": "#MultiplexingSettings.cwl/Assay" + }, + { + "type": [ + "null", + "Any" + ], + "id": "#MultiplexingSettings.cwl/_Sample_Tags_Version" + }, + { + "type": [ + "null", + { + "items": "string", + "type": "array" + } + ], + "id": "#MultiplexingSettings.cwl/_Tag_Sample_Names" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "outputs": [ + { + "type": [ + "null", + "string" + ], + "id": "#MultiplexingSettings.cwl/Sample_Tags_Version" + }, + { + "type": [ + "null", + { + "items": "string", + "type": "array" + } + ], + "id": "#MultiplexingSettings.cwl/Tag_Sample_Names" + } + ], + "class": "ExpressionTool", + "expression": "${\n var enumifiedSampleTagsVersion = null;\n if (inputs._Sample_Tags_Version) {\n var _Sample_Tags_Version = inputs._Sample_Tags_Version.toLowerCase();\n if (_Sample_Tags_Version.indexOf('human') >= 0 || _Sample_Tags_Version === 'hs')\n {\n enumifiedSampleTagsVersion = 'hs';\n }\n else if (_Sample_Tags_Version.indexOf('mouse') >= 0 || _Sample_Tags_Version === 'mm')\n {\n enumifiedSampleTagsVersion = 'mm';\n }\n else if (_Sample_Tags_Version === 'no multiplexing')\n {\n enumifiedSampleTagsVersion = null;\n }\n else\n {\n throw new Error(\"Cannot parse Sample Tag Version: \" + inputs._Sample_Tags_Version);\n }\n }\n var listTagNames = inputs._Tag_Sample_Names\n var newTagNames = []\n for (var num in listTagNames) {\n var tag = listTagNames[num].replace(/[^A-Za-z0-9-+]/g,\"_\");\n newTagNames.push(tag); \n } \n return ({\n Tag_Sample_Names: newTagNames,\n Sample_Tags_Version: enumifiedSampleTagsVersion\n });\n}", + "id": "#MultiplexingSettings.cwl" + }, + { + "inputs": [ + { + "type": [ + "null", + "string" + ], + "id": "#NameSettings.cwl/_Run_Name" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "outputs": [ + { + "type": [ + "null", + "string" + ], + "id": "#NameSettings.cwl/Run_Name" + } + ], + "class": "ExpressionTool", + "expression": "${ var name = inputs._Run_Name;\n if (name != null) {\n name = name.replace(/[\\W_]+/g,\"-\");}\n return({'Run_Name' : name });\n } ", + "id": "#NameSettings.cwl" + }, + { + "inputs": [ + { + "type": { + "items": { + "fields": [ + { + "type": "string", + "name": "#PairReadFiles.cwl/FastqReadPairs/filename" + }, + { + "type": "string", + "name": "#PairReadFiles.cwl/FastqReadPairs/readFlag" + }, + { + "type": "string", + "name": "#PairReadFiles.cwl/FastqReadPairs/readPairId" + }, + { + "type": "string", + "name": "#PairReadFiles.cwl/FastqReadPairs/library" + } + ], + "type": "record" + }, + "type": "array" + }, + "id": "#PairReadFiles.cwl/FastqReadPairs" + }, + { + "type": { + "items": "File", + "type": "array" + }, + "id": "#PairReadFiles.cwl/Reads" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "doc": "PairReadFiles takes an array of split files and pairs them, such that an R1 file is transferred to the QualityFilter with its corresponding R2 file.\nThe original FASTQ files are paired in CheckFastqs and then split and sub-sampled in SplitAndSubsample. The pairing information is taken from CheckFastqs.\n", + "id": "#PairReadFiles.cwl", + "outputs": [ + { + "type": { + "items": { + "fields": [ + { + "type": "File", + "name": "#PairReadFiles.cwl/ReadPairs/R1" + }, + { + "type": "File", + "name": "#PairReadFiles.cwl/ReadPairs/R2" + }, + { + "type": "int", + "name": "#PairReadFiles.cwl/ReadPairs/readPairId" + }, + { + "type": "string", + "name": "#PairReadFiles.cwl/ReadPairs/library" + } + ], + "type": "record" + }, + "type": "array" + }, + "id": "#PairReadFiles.cwl/ReadPairs" + } + ], + "expression": "${\n // use the CheckFastqs read pairing information to create a dictionary\n // using the original fastq file name without the extension as the key\n var fastqReadPairs = {}\n for (var i = 0; i < inputs.FastqReadPairs.length; i++) {\n var fileDict = inputs.FastqReadPairs[i];\n var filename = fileDict[\"filename\"];\n\n if (!fastqReadPairs[filename]) {\n fastqReadPairs[filename] = {\n readPairId: null,\n readFlag: null,\n library: null,\n };\n }\n else {\n throw new Error(\"Found non-unique fastq filename '\" + filename + \"' in the FastqReadPairs dictionary from CheckFastqs.\")\n }\n\n fastqReadPairs[filename].readPairId = fileDict[\"readPairId\"]\n fastqReadPairs[filename].readFlag = fileDict[\"readFlag\"]\n fastqReadPairs[filename].library = fileDict[\"library\"]\n }\n\n // now loop through the input read files which could\n // be the original fastq files if no sub-sampling has\n // been done, or the sub-sampled fastq files\n var readPairs = {}\n for (var i = 0; i < inputs.Reads.length; i++) {\n\n // Set the fileDict to null\n var fileDict = null;\n\n // Get the fastq file\n var fastqFile = inputs.Reads[i];\n\n // Remove the .gz from the end of the filename\n var fileNoGzExt = fastqFile.basename.replace(/.gz$/i, \"\");\n\n // Remove the next file extension if it exists\n var fileArrayWithExt = fileNoGzExt.split(\".\");\n // If an extension exists, splice the array\n var fileArrayNoExt = null;\n if (fileArrayWithExt.length > 1) {\n fileArrayNoExt = fileArrayWithExt.splice(0, fileArrayWithExt.length-1);\n } else {\n // No file extension exists, so use the whole array\n fileArrayNoExt = fileArrayWithExt\n }\n var fileRootname = fileArrayNoExt.join(\".\")\n\n // if the original files were sub-sampled\n // get the original file and the chunk id\n if (fileRootname.indexOf(\"-\") != -1) {\n // Split on the dash to get the name of\n // the original file and the chunk id\n // The original file name can also have dashes\n var chunkFileArray = fileRootname.split(\"-\");\n\n // Get the original file rootname and chunk id\n // The rootname without the chunk id and file\n // extension is the key from CheckFastqs\n // The chunk id is used later to create a new unique\n // read pair id for all sub-sampled fastq files\n\n // The rootname array should contain all elements up to the last dash\n var fileRootnameArray = chunkFileArray.splice(0, chunkFileArray.length-1);\n var fileRootnameNoChunkId = fileRootnameArray.join(\"-\");\n\n // The chunk id is the last element in the array\n // representing the content after the last dash\n var orgChunkId = chunkFileArray.pop();\n\n // if there is no chunk id, use an arbitrary number\n // the chunk id is unique when the files are sub-sampled\n // and does not need to be unique when the files are not sub-sampled\n var chunkId = 9999;\n if (orgChunkId) {\n // cast to an integer\n chunkId = parseInt(orgChunkId);\n }\n // double check that we have a chunk id\n if (chunkId === undefined || chunkId === null) {\n throw new Error(\"The fastq file sub-sampling id could not be determined!\");\n }\n\n // The file rootname without the chunk id and file extension\n // should match the original file rootname from CheckFastqs\n // The original file rootname from CheckFastqs is the key for\n // the dictionary containing the original unique pair id\n var fileDict = fastqReadPairs[fileRootnameNoChunkId];\n }\n\n // If the files are not sub-sampled or the fileDict\n // is not found, then try to use the original\n // file rootname without the file extension as the key\n if (fileDict === undefined || fileDict === null) {\n\n // if the original files were not sub-sampled,\n // use the original file rootname and an arbitrary chunk id\n var chunkId = 9999;\n\n var fileDict = fastqReadPairs[fileRootname];\n\n // If the fileDict for this file rootname is not found,\n // then the filenames are in an unexpected format and\n // the code to parse the filenames in CheckFastqs,\n // SplitAndSubsample and here need to match\n if (fileDict === undefined || fileDict === null) {\n // Create an error\n if (fileDict === undefined || fileDict === null) {\n throw new Error(\"Cannot find the fastq read pair information for '\" + fastqFile.basename + \"'.\");\n }\n }\n }\n\n // Get the pairing information from CheckFastqs\n var readPairId = fileDict[\"readPairId\"];\n var library = fileDict[\"library\"];\n var flag = fileDict[\"readFlag\"];\n\n // Add the chunkId to create a new unique read pair id\n // for each file (sub-sampled or not)\n var chunkReadPairId = readPairId + \"_\" + chunkId;\n\n // Create a dictionary for each pair of files\n if (!readPairs[chunkReadPairId]) {\n readPairs[chunkReadPairId] = {\n R1: null,\n R2: null,\n library: library,\n readPairId: null,\n };\n }\n // add in the R1 and R2 files, depending on the flag\n if (flag === \"R1\") {\n readPairs[chunkReadPairId].R1 = fastqFile\n } else if (flag === \"R2\") {\n readPairs[chunkReadPairId].R2 = fastqFile\n }\n }\n // we are not interested in the read pair ids in readPairs\n // flatten into an array of objects\n var readPairsList = [];\n var i = 1;\n for (var key in readPairs) {\n if (readPairs.hasOwnProperty(key)) {\n var readPair = readPairs[key];\n readPair.readPairId = i;\n readPairsList.push(readPair);\n i++;\n }\n }\n // pass this array to the record array named \"ReadPairs\" on the CWL layer\n return {ReadPairs: readPairsList}\n}", + "class": "ExpressionTool" + }, + { + "inputs": [ + { + "type": [ + "null", + "boolean" + ], + "id": "#PutativeCellSettings.cwl/_Basic_Algo_Only" + }, + { + "type": [ + "null", + "int" + ], + "id": "#PutativeCellSettings.cwl/_Exact_Cell_Count" + }, + { + "type": [ + "null", + "Any" + ], + "id": "#PutativeCellSettings.cwl/_Putative_Cell_Call" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "outputs": [ + { + "type": [ + "null", + "boolean" + ], + "id": "#PutativeCellSettings.cwl/Basic_Algo_Only" + }, + { + "type": [ + "null", + "int" + ], + "id": "#PutativeCellSettings.cwl/Exact_Cell_Count" + }, + { + "type": [ + "null", + "int" + ], + "id": "#PutativeCellSettings.cwl/Putative_Cell_Call" + } + ], + "class": "ExpressionTool", + "expression": "${\n // the basic algorithm flag defaults to false\n var basicAlgOnlyFlag = false;\n // the user can set the basic algorithm flag\n if (inputs._Basic_Algo_Only) {\n basicAlgOnlyFlag = inputs._Basic_Algo_Only;\n }\n // convert the Putative_Cell_Call from a string to an integer\n var putativeCellCallInt = 0;\n if (inputs._Putative_Cell_Call) {\n if (inputs._Putative_Cell_Call === \"mRNA\") {\n putativeCellCallInt = 0;\n }\n else if (inputs._Putative_Cell_Call == \"AbSeq_Experimental\" || inputs._Putative_Cell_Call == \"AbSeq (Experimental)\") {\n putativeCellCallInt = 1;\n // for protein-only cell calling, we only have the basic algorithm\n basicAlgOnlyFlag = true;\n }\n else if (inputs._Putative_Cell_Call == \"mRNA_and_AbSeq\") {\n putativeCellCallInt = 2;\n }\n }\n // check the exact cell count\n if (inputs._Exact_Cell_Count) {\n if (inputs._Exact_Cell_Count < 1) {\n throw(\"Illogical value for exact cell count: \" + inputs._Exact_Cell_Count);\n }\n }\n return ({\n Putative_Cell_Call: putativeCellCallInt,\n Exact_Cell_Count: inputs._Exact_Cell_Count,\n Basic_Algo_Only: basicAlgOnlyFlag,\n });\n}", + "id": "#PutativeCellSettings.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "prefix": "--run-metadata" + }, + "type": "File", + "id": "#QualityFilter.cwl/Run_Metadata" + }, + { + "type": { + "fields": [ + { + "inputBinding": { + "prefix": "--r1" + }, + "type": "File", + "name": "#QualityFilter.cwl/Split_Read_Pairs/R1" + }, + { + "inputBinding": { + "prefix": "--r2" + }, + "type": "File", + "name": "#QualityFilter.cwl/Split_Read_Pairs/R2" + }, + { + "inputBinding": { + "prefix": "--read-pair-id" + }, + "type": "int", + "name": "#QualityFilter.cwl/Split_Read_Pairs/readPairId" + }, + { + "inputBinding": { + "prefix": "--library" + }, + "type": "string", + "name": "#QualityFilter.cwl/Split_Read_Pairs/library" + } + ], + "type": "record" + }, + "id": "#QualityFilter.cwl/Split_Read_Pairs" + } + ], + "requirements": [ + ], + "outputs": [ + { + "outputBinding": { + "glob": "*read_quality.csv.gz" + }, + "type": [ + "null", + "File" + ], + "id": "#QualityFilter.cwl/Filter_Metrics" + }, + { + "outputBinding": { + "glob": "*_R1*.fastq.gz" + }, + "type": "File", + "id": "#QualityFilter.cwl/R1" + }, + { + "outputBinding": { + "glob": "*_R2*.fastq.gz" + }, + "type": "File", + "id": "#QualityFilter.cwl/R2" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#QualityFilter.cwl/output" + } + ], + "baseCommand": [ + "mist_quality_filter.py" + ], + "class": "CommandLineTool", + "id": "#QualityFilter.cwl" + }, + { + "inputs": [ + { + "type": "File", + "id": "#QualityFilterOuter.cwl/Run_Metadata" + }, + { + "type": { + "items": { + "fields": [ + { + "type": "File", + "name": "#QualityFilterOuter.cwl/Split_Read_Pairs/R1" + }, + { + "type": "File", + "name": "#QualityFilterOuter.cwl/Split_Read_Pairs/R2" + }, + { + "type": "int", + "name": "#QualityFilterOuter.cwl/Split_Read_Pairs/readPairId" + }, + { + "type": "string", + "name": "#QualityFilterOuter.cwl/Split_Read_Pairs/library" + } + ], + "type": "record" + }, + "type": "array" + }, + "id": "#QualityFilterOuter.cwl/Split_Read_Pairs" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + }, + { + "class": "ScatterFeatureRequirement" + }, + { + "class": "StepInputExpressionRequirement" + }, + { + "class": "SubworkflowFeatureRequirement" + } + ], + "outputs": [ + { + "outputSource": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/Filter_Metrics", + "type": [ + { + "items": [ + "null", + "File" + ], + "type": "array" + } + ], + "id": "#QualityFilterOuter.cwl/Filter_Metrics" + }, + { + "outputSource": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/R1", + "type": [ + { + "items": [ + "null", + "File" + ], + "type": "array" + } + ], + "id": "#QualityFilterOuter.cwl/R1" + }, + { + "outputSource": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/R2", + "type": [ + { + "items": [ + "null", + "File" + ], + "type": "array" + } + ], + "id": "#QualityFilterOuter.cwl/R2" + }, + { + "outputSource": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/output", + "type": [ + { + "items": [ + "null", + "File" + ], + "type": "array" + } + ], + "id": "#QualityFilterOuter.cwl/output" + } + ], + "class": "Workflow", + "steps": [ + { + "scatter": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/Split_Read_Pairs", + "out": [ + "#QualityFilterOuter.cwl/Quality_Filter_Scatter/R1", + "#QualityFilterOuter.cwl/Quality_Filter_Scatter/R2", + "#QualityFilterOuter.cwl/Quality_Filter_Scatter/Filter_Metrics", + "#QualityFilterOuter.cwl/Quality_Filter_Scatter/output" + ], + "run": "#QualityFilter.cwl", + "id": "#QualityFilterOuter.cwl/Quality_Filter_Scatter", + "in": [ + { + "source": "#QualityFilterOuter.cwl/Run_Metadata", + "id": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/Run_Metadata" + }, + { + "source": "#QualityFilterOuter.cwl/Split_Read_Pairs", + "id": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/Split_Read_Pairs" + } + ] + } + ], + "id": "#QualityFilterOuter.cwl" + }, + { + "inputs": [ + { + "type": { + "items": "File", + "type": "array" + }, + "id": "#SplitAndSubsample.cwl/Fastqs" + }, + { + "type": { + "items": "string", + "type": "array" + }, + "id": "#SplitAndSubsample.cwl/FilesToSkipSplitAndSubsample" + }, + { + "type": [ + "null", + "long" + ], + "id": "#SplitAndSubsample.cwl/NumRecordsPerSplit" + }, + { + "type": "float", + "id": "#SplitAndSubsample.cwl/SubsampleRatio" + }, + { + "type": "int", + "id": "#SplitAndSubsample.cwl/SubsampleSeed" + } + ], + "requirements": [ + { + "class": "ScatterFeatureRequirement" + }, + { + "class": "InlineJavascriptRequirement" + } + ], + "doc": "SplitAndSubsample splits, subsamples and formats read files to be deposited in QualityFilter.\n", + "id": "#SplitAndSubsample.cwl", + "steps": [ + { + "doc": "After scattering \"SplitAndSubsample\" on a File array, the output of each node is also an array. Thus, we are left with a nestled list. This JS expression flattens this list to deal with the split reads in PairReadFiles.cwl", + "out": [ + "#SplitAndSubsample.cwl/FlattenOutput/SplitFastqList" + ], + "run": { + "cwlVersion": "v1.0", + "inputs": [ + { + "type": { + "items": { + "items": "File", + "type": "array" + }, + "type": "array" + }, + "id": "#SplitAndSubsample.cwl/FlattenOutput/flatten_output/nestledSplitFastqList" + } + ], + "outputs": [ + { + "type": { + "items": "File", + "type": "array" + }, + "id": "#SplitAndSubsample.cwl/FlattenOutput/flatten_output/SplitFastqList" + } + ], + "class": "ExpressionTool", + "expression": "${\n return {SplitFastqList: [].concat.apply([], inputs.nestledSplitFastqList)}\n}\n", + "id": "#SplitAndSubsample.cwl/FlattenOutput/flatten_output" + }, + "id": "#SplitAndSubsample.cwl/FlattenOutput", + "in": [ + { + "source": "#SplitAndSubsample.cwl/SplitAndSubsample/SplitAndSubsampledFastqs", + "id": "#SplitAndSubsample.cwl/FlattenOutput/nestledSplitFastqList" + } + ] + }, + { + "run": { + "cwlVersion": "v1.0", + "inputs": [ + { + "inputBinding": { + "prefix": "--fastq-file-path" + }, + "type": "File", + "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/Fastq" + }, + { + "inputBinding": { + "prefix": "--files-to-skip-split-and-subsample", + "itemSeparator": "," + }, + "type": { + "items": "string", + "type": "array" + }, + "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/FilesToSkipSplitAndSubsample" + }, + { + "inputBinding": { + "prefix": "--num-records" + }, + "type": [ + "null", + "long" + ], + "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/NumRecordsPerSplit" + }, + { + "inputBinding": { + "prefix": "--subsample-ratio" + }, + "type": "float", + "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/SubsampleRatio" + }, + { + "inputBinding": { + "prefix": "--subsample-seed" + }, + "type": "int", + "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/SubsampleSeed" + } + ], + "requirements": [ + ], + "outputs": [ + { + "outputBinding": { + "glob": "*.fastq.gz", + "outputEval": "${ if (self.length === 0) { return [inputs.Fastq]; } else { return self; } }" + }, + "type": { + "items": "File", + "type": "array" + }, + "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/SplitAndSubsampledFastqs" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/log" + } + ], + "baseCommand": [ + "mist_split_fastq.py" + ], + "class": "CommandLineTool", + "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq" + }, + "doc": "Allocate one docker/python process per file to do the actual file splitting.", + "scatter": [ + "#SplitAndSubsample.cwl/SplitAndSubsample/Fastq" + ], + "in": [ + { + "source": "#SplitAndSubsample.cwl/Fastqs", + "id": "#SplitAndSubsample.cwl/SplitAndSubsample/Fastq" + }, + { + "source": "#SplitAndSubsample.cwl/FilesToSkipSplitAndSubsample", + "id": "#SplitAndSubsample.cwl/SplitAndSubsample/FilesToSkipSplitAndSubsample" + }, + { + "source": "#SplitAndSubsample.cwl/NumRecordsPerSplit", + "id": "#SplitAndSubsample.cwl/SplitAndSubsample/NumRecordsPerSplit" + }, + { + "source": "#SplitAndSubsample.cwl/SubsampleRatio", + "id": "#SplitAndSubsample.cwl/SplitAndSubsample/SubsampleRatio" + }, + { + "source": "#SplitAndSubsample.cwl/SubsampleSeed", + "id": "#SplitAndSubsample.cwl/SplitAndSubsample/SubsampleSeed" + } + ], + "id": "#SplitAndSubsample.cwl/SplitAndSubsample", + "out": [ + "#SplitAndSubsample.cwl/SplitAndSubsample/SplitAndSubsampledFastqs", + "#SplitAndSubsample.cwl/SplitAndSubsample/log" + ] + } + ], + "outputs": [ + { + "outputSource": "#SplitAndSubsample.cwl/FlattenOutput/SplitFastqList", + "type": { + "items": "File", + "type": "array" + }, + "id": "#SplitAndSubsample.cwl/SplitAndSubsampledFastqs" + }, + { + "outputSource": "#SplitAndSubsample.cwl/SplitAndSubsample/log", + "type": { + "items": "File", + "type": "array" + }, + "id": "#SplitAndSubsample.cwl/log" + } + ], + "class": "Workflow" + }, + { + "inputs": [ + { + "type": [ + "null", + "float" + ], + "id": "#SubsampleSettings.cwl/_Subsample_Reads" + }, + { + "type": [ + "null", + "int" + ], + "id": "#SubsampleSettings.cwl/_Subsample_Seed" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "outputs": [ + { + "type": [ + "null", + "float" + ], + "id": "#SubsampleSettings.cwl/Subsample_Reads" + }, + { + "type": [ + "null", + "int" + ], + "id": "#SubsampleSettings.cwl/Subsample_Seed" + } + ], + "class": "ExpressionTool", + "expression": "${\n var subsamplingOutputs = {\n Subsample_Reads: inputs._Subsample_Reads,\n Subsample_Seed: inputs._Subsample_Seed\n }\n return subsamplingOutputs;\n}", + "id": "#SubsampleSettings.cwl" + }, + { + "inputs": [ + { + "type": { + "items": "File", + "type": "array" + }, + "id": "#UncompressDatatables.cwl/Compressed_Data_Table" + }, + { + "type": "File", + "id": "#UncompressDatatables.cwl/Compressed_Expression_Matrix" + } + ], + "requirements": [ + { + "class": "ScatterFeatureRequirement" + } + ], + "outputs": [ + { + "outputSource": "#UncompressDatatables.cwl/Uncompress_Datatable/Uncompressed_File", + "type": { + "items": "File", + "type": "array" + }, + "id": "#UncompressDatatables.cwl/Uncompressed_Data_Tables" + }, + { + "outputSource": "#UncompressDatatables.cwl/Uncompress_Expression_Matrix/Uncompressed_File", + "type": "File", + "id": "#UncompressDatatables.cwl/Uncompressed_Expression_Matrix" + } + ], + "class": "Workflow", + "steps": [ + { + "id": "#UncompressDatatables.cwl/Uncompress_Datatable", + "out": [ + "#UncompressDatatables.cwl/Uncompress_Datatable/Uncompressed_File" + ], + "run": { + "cwlVersion": "v1.0", + "inputs": [ + { + "inputBinding": { + "position": 1 + }, + "type": "File", + "id": "#UncompressDatatables.cwl/Uncompress_Datatable/Uncompress_Datatable_Inner/Compressed_File" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "stdout": "$(inputs.Compressed_File.nameroot)", + "outputs": [ + { + "outputBinding": { + "glob": "$(inputs.Compressed_File.nameroot)" + }, + "type": "File", + "id": "#UncompressDatatables.cwl/Uncompress_Datatable/Uncompress_Datatable_Inner/Uncompressed_File" + } + ], + "baseCommand": [ + "gunzip" + ], + "id": "#UncompressDatatables.cwl/Uncompress_Datatable/Uncompress_Datatable_Inner", + "arguments": [ + { + "position": 0, + "valueFrom": "-c" + } + ], + "class": "CommandLineTool", + "hints": [ + ] + }, + "scatter": [ + "#UncompressDatatables.cwl/Uncompress_Datatable/Compressed_File" + ], + "in": [ + { + "source": "#UncompressDatatables.cwl/Compressed_Data_Table", + "id": "#UncompressDatatables.cwl/Uncompress_Datatable/Compressed_File" + } + ] + }, + { + "out": [ + "#UncompressDatatables.cwl/Uncompress_Expression_Matrix/Uncompressed_File" + ], + "run": { + "cwlVersion": "v1.0", + "inputs": [ + { + "inputBinding": { + "position": 1 + }, + "type": "File", + "id": "#UncompressDatatables.cwl/Uncompress_Expression_Matrix/Uncompress_Expression_Matrix_Inner/Compressed_File" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "stdout": "$(inputs.Compressed_File.nameroot)", + "outputs": [ + { + "outputBinding": { + "glob": "$(inputs.Compressed_File.nameroot)" + }, + "type": "File", + "id": "#UncompressDatatables.cwl/Uncompress_Expression_Matrix/Uncompress_Expression_Matrix_Inner/Uncompressed_File" + } + ], + "baseCommand": [ + "gunzip" + ], + "id": "#UncompressDatatables.cwl/Uncompress_Expression_Matrix/Uncompress_Expression_Matrix_Inner", + "arguments": [ + { + "position": 0, + "valueFrom": "-c" + } + ], + "class": "CommandLineTool", + "hints": [ + ] + }, + "id": "#UncompressDatatables.cwl/Uncompress_Expression_Matrix", + "in": [ + { + "source": "#UncompressDatatables.cwl/Compressed_Expression_Matrix", + "id": "#UncompressDatatables.cwl/Uncompress_Expression_Matrix/Compressed_File" + } + ] + } + ], + "id": "#UncompressDatatables.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "position": 1 + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs.cwl/RSEC_Reads_Fastq" + }, + { + "inputBinding": { + "position": 2 + }, + "type": "string", + "id": "#VDJ_Assemble_and_Annotate_Contigs.cwl/Read_Limit" + }, + { + "inputBinding": { + "position": 3 + }, + "type": [ + "null", + "string" + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs.cwl/VDJ_Version" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + }, + { + "class": "ShellCommandRequirement" + } + ], + "outputs": [ + { + "outputBinding": { + "glob": "*_pruned.csv.gz" + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs.cwl/PyirCall" + } + ], + "baseCommand": [ + "AssembleAndAnnotate.sh" + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs.cwl", + "class": "CommandLineTool", + "hints": [ + { + "coresMin": 1, + "ramMin": 3200, + "class": "ResourceRequirement" + } + ] + }, + { + "inputs": [ + { + "type": [ + { + "items": [ + "null", + "File" + ], + "type": "array" + } + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/RSEC_Reads_Fastq" + }, + { + "type": [ + "null", + "string" + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Version" + }, + { + "type": [ + "null", + "int" + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/num_cores" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + }, + { + "class": "ScatterFeatureRequirement" + }, + { + "class": "StepInputExpressionRequirement" + }, + { + "class": "SubworkflowFeatureRequirement" + } + ], + "outputs": [ + { + "outputSource": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG/PyirCall", + "type": [ + { + "items": [ + "null", + "File" + ], + "type": "array" + } + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/igCalls" + } + ], + "class": "Workflow", + "steps": [ + { + "run": "#VDJ_Assemble_and_Annotate_Contigs.cwl", + "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG", + "in": [ + { + "source": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/RSEC_Reads_Fastq", + "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG/RSEC_Reads_Fastq" + }, + { + "valueFrom": "75000", + "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG/Read_Limit" + }, + { + "source": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Version", + "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG/VDJ_Version" + } + ], + "hints": [ + { + "coresMin": "$(inputs.num_cores)", + "class": "ResourceRequirement" + } + ], + "scatter": [ + "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG/RSEC_Reads_Fastq" + ], + "out": [ + "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG/PyirCall" + ] + } + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl" + }, + { + "inputs": [ + { + "type": [ + { + "items": [ + "null", + "File" + ], + "type": "array" + } + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/RSEC_Reads_Fastq" + }, + { + "type": [ + "null", + "string" + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Version" + }, + { + "type": [ + "null", + "int" + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/num_cores" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + }, + { + "class": "ScatterFeatureRequirement" + }, + { + "class": "StepInputExpressionRequirement" + }, + { + "class": "SubworkflowFeatureRequirement" + } + ], + "outputs": [ + { + "outputSource": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR/PyirCall", + "type": [ + { + "items": [ + "null", + "File" + ], + "type": "array" + } + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/tcrCalls" + } + ], + "class": "Workflow", + "steps": [ + { + "run": "#VDJ_Assemble_and_Annotate_Contigs.cwl", + "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR", + "in": [ + { + "source": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/RSEC_Reads_Fastq", + "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR/RSEC_Reads_Fastq" + }, + { + "valueFrom": "75000", + "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR/Read_Limit" + }, + { + "source": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Version", + "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR/VDJ_Version" + } + ], + "hints": [ + { + "coresMin": "$(inputs.num_cores)", + "class": "ResourceRequirement" + } + ], + "scatter": [ + "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR/RSEC_Reads_Fastq" + ], + "out": [ + "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR/PyirCall" + ] + } + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "position": 10, + "prefix": "--seq-metrics" + }, + "type": "File", + "id": "#VDJ_Compile_Results.cwl/Seq_Metrics" + }, + { + "inputBinding": { + "position": 0, + "prefix": "--cell-type-mapping-fp" + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_Compile_Results.cwl/cellTypeMapping" + }, + { + "inputBinding": { + "position": 4, + "prefix": "--ignore", + "itemSeparator": "," + }, + "type": [ + "null", + { + "items": "string", + "type": "array" + } + ], + "id": "#VDJ_Compile_Results.cwl/chainsToIgnore" + }, + { + "inputBinding": { + "position": 8, + "prefix": "--e-value-for-j" + }, + "type": [ + "null", + "float" + ], + "id": "#VDJ_Compile_Results.cwl/evalueJgene" + }, + { + "inputBinding": { + "position": 7, + "prefix": "--e-value-for-v" + }, + "type": [ + "null", + "float" + ], + "id": "#VDJ_Compile_Results.cwl/evalueVgene" + }, + { + "inputBinding": { + "position": 5 + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_Compile_Results.cwl/igCalls" + }, + { + "inputBinding": { + "position": 9, + "prefix": "--metadata-fp" + }, + "type": "File", + "id": "#VDJ_Compile_Results.cwl/metadata" + }, + { + "inputBinding": { + "position": 3, + "prefix": "--putative-cells-json-fp" + }, + "type": "File", + "id": "#VDJ_Compile_Results.cwl/putativeCells" + }, + { + "inputBinding": { + "position": 6 + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_Compile_Results.cwl/tcrCalls" + }, + { + "inputBinding": { + "position": 2, + "prefix": "--vdj-version" + }, + "type": [ + "null", + "string" + ], + "id": "#VDJ_Compile_Results.cwl/vdjVersion" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "outputs": [ + { + "doc": "VDJ data per cell, with distribution based error correction", + "outputBinding": { + "glob": "*_VDJ_perCell.csv" + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_Compile_Results.cwl/vdjCellsDatatable" + }, + { + "doc": "VDJ data per cell, including non-putative cells, no error correction applied", + "outputBinding": { + "glob": "*_VDJ_perCell_uncorrected.csv.gz" + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_Compile_Results.cwl/vdjCellsDatatableUncorrected" + }, + { + "outputBinding": { + "glob": "*_VDJ_Dominant_Contigs.csv.gz" + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_Compile_Results.cwl/vdjDominantContigs" + }, + { + "outputBinding": { + "glob": "*_VDJ_metrics.csv" + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_Compile_Results.cwl/vdjMetricsCsv" + }, + { + "outputBinding": { + "glob": "*_VDJ_metrics.json" + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_Compile_Results.cwl/vdjMetricsJson" + }, + { + "outputBinding": { + "glob": "*_DBEC_cutoff.png" + }, + "type": { + "items": "File", + "type": "array" + }, + "id": "#VDJ_Compile_Results.cwl/vdjReadsPerCellByChainTypeFigure" + }, + { + "outputBinding": { + "glob": "*_VDJ_Unfiltered_Contigs.csv.gz" + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_Compile_Results.cwl/vdjUnfilteredContigs" + } + ], + "baseCommand": [ + "mist_vdj_compile_results.py" + ], + "id": "#VDJ_Compile_Results.cwl", + "class": "CommandLineTool", + "hints": [ + { + "ramMin": 32000, + "class": "ResourceRequirement" + } + ] + }, + { + "inputs": [ + { + "type": [ + { + "items": [ + "null", + "File" + ], + "type": "array" + } + ], + "id": "#VDJ_GatherCalls.cwl/theCalls" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "doc": "VDJ_GatherCalls collect the outputs from the multi-processed VDJ step into one file.\n", + "id": "#VDJ_GatherCalls.cwl", + "steps": [ + { + "out": [ + "#VDJ_GatherCalls.cwl/VDJ_GatherCalls/gatheredCalls" + ], + "run": { + "cwlVersion": "v1.0", + "inputs": [ + { + "type": [ + { + "items": [ + "null", + "File" + ], + "type": "array" + } + ], + "id": "#VDJ_GatherCalls.cwl/VDJ_GatherCalls/gather_PyIR/theCalls" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + }, + { + "class": "ShellCommandRequirement" + } + ], + "outputs": [ + { + "outputBinding": { + "glob": "*_constant_region_called_pruned.csv.gz", + "outputEval": "${\n if (self.size == 0) {\n throw(\"No outputs from PyIR detected in VDJ_GatherCalls!\");\n } else {\n return(self);\n }\n}" + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_GatherCalls.cwl/VDJ_GatherCalls/gather_PyIR/gatheredCalls" + } + ], + "class": "CommandLineTool", + "arguments": [ + { + "shellQuote": false, + "valueFrom": "${\n if (!inputs.theCalls[0] ) {\n return (\"echo \\\"No outputs from PyIR detected in VDJ_GatherCalls\\\"\")\n }\n var inputFiles = \"\"\n if (!inputs.theCalls[0].path.split(\"_PrunePyIR\")[1]){\n inputFiles = \"zcat\"\n for (var i = 0; i < inputs.theCalls.length; i++) {\n inputFiles += \" \" + inputs.theCalls[i].path\n }\n inputFiles += \" | \"\n } else {\n inputFiles = \"zcat \" + inputs.theCalls[0].path.split(\"VDJ\")[0] + \"*\" + inputs.theCalls[0].path.split(\"_PrunePyIR\")[1].split(\"_Number_\")[0] + \"_Number_*.csv.gz | \"\n }\n var outputFileName = \"\\\"gzip > \" + inputs.theCalls[0].nameroot.split(\"_Number_\")[0] + \"_constant_region_called_pruned.csv.gz\" + \"\\\"\"\n var awkCommand = \"awk \\'NR==1{F=$1;print | \" + outputFileName + \" } $1!=F { print | \" + outputFileName + \" }\\' \"\n var outputCommand = inputFiles + awkCommand\n return (outputCommand)\n}" + } + ], + "id": "#VDJ_GatherCalls.cwl/VDJ_GatherCalls/gather_PyIR" + }, + "id": "#VDJ_GatherCalls.cwl/VDJ_GatherCalls", + "in": [ + { + "source": "#VDJ_GatherCalls.cwl/theCalls", + "id": "#VDJ_GatherCalls.cwl/VDJ_GatherCalls/theCalls" + } + ] + } + ], + "outputs": [ + { + "outputSource": "#VDJ_GatherCalls.cwl/VDJ_GatherCalls/gatheredCalls", + "type": [ + "null", + "File" + ], + "id": "#VDJ_GatherCalls.cwl/gatheredCalls" + } + ], + "class": "Workflow" + }, + { + "inputs": [ + { + "type": [ + "null", + "File" + ], + "id": "#VDJ_Preprocess_Reads.cwl/Valid_Reads_Fastq" + }, + { + "type": [ + "null", + "int" + ], + "id": "#VDJ_Preprocess_Reads.cwl/num_valid_reads" + }, + { + "type": "string", + "id": "#VDJ_Preprocess_Reads.cwl/vdj_type" + } + ], + "requirements": [ + { + "class": "SubworkflowFeatureRequirement" + }, + { + "class": "InlineJavascriptRequirement" + }, + { + "envDef": [ + { + "envName": "CORES_ALLOCATED_PER_CWL_PROCESS", + "envValue": "8" + } + ], + "class": "EnvVarRequirement" + } + ], + "outputs": [ + { + "outputSource": "#VDJ_Preprocess_Reads.cwl/VDJ_RSEC_Reads/RSEC_Reads_Fastq", + "type": [ + { + "items": [ + "null", + "File" + ], + "type": "array" + } + ], + "id": "#VDJ_Preprocess_Reads.cwl/RSEC_Reads_Fastq" + }, + { + "type": [ + "null", + "int" + ], + "outputSource": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/num_cores", + "id": "#VDJ_Preprocess_Reads.cwl/num_cores" + }, + { + "type": [ + "null", + "int" + ], + "outputSource": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/num_splits", + "id": "#VDJ_Preprocess_Reads.cwl/num_splits" + } + ], + "class": "Workflow", + "steps": [ + { + "run": "#VDJ_RSEC_Reads.cwl", + "out": [ + "#VDJ_Preprocess_Reads.cwl/VDJ_RSEC_Reads/RSEC_Reads_Fastq" + ], + "requirements": [ + { + "coresMin": 8, + "ramMin": "${ var est_ram = 0.0006 * parseInt(inputs.num_valid_reads) + 2000; var buffer = 1.25; est_ram *= buffer; if (est_ram < 2000) return 2000; if (est_ram > 370000) return 370000; return parseInt(est_ram); }", + "class": "ResourceRequirement" + } + ], + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_RSEC_Reads", + "in": [ + { + "source": "#VDJ_Preprocess_Reads.cwl/VDJ_Trim_Reads/Valid_Reads", + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_RSEC_Reads/Valid_Reads" + }, + { + "source": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/num_splits", + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_RSEC_Reads/num_splits" + }, + { + "source": "#VDJ_Preprocess_Reads.cwl/num_valid_reads", + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_RSEC_Reads/num_valid_reads" + } + ] + }, + { + "out": [ + "#VDJ_Preprocess_Reads.cwl/VDJ_Trim_Reads/Valid_Reads", + "#VDJ_Preprocess_Reads.cwl/VDJ_Trim_Reads/Trim_Report" + ], + "in": [ + { + "source": "#VDJ_Preprocess_Reads.cwl/Valid_Reads_Fastq", + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_Trim_Reads/Valid_Reads_Fastq" + } + ], + "run": "#VDJ_Trim_Reads.cwl", + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_Trim_Reads", + "hints": [ + { + "coresMin": 8, + "class": "ResourceRequirement" + } + ] + }, + { + "out": [ + "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/num_splits", + "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/num_cores" + ], + "run": { + "cwlVersion": "v1.0", + "inputs": [ + { + "type": [ + "null", + "int" + ], + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/determine_num_splits/num_valid_reads" + }, + { + "type": "string", + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/determine_num_splits/vdj_type" + } + ], + "outputs": [ + { + "type": [ + "null", + "int" + ], + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/determine_num_splits/num_cores" + }, + { + "type": [ + "null", + "int" + ], + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/determine_num_splits/num_splits" + } + ], + "class": "ExpressionTool", + "expression": "${\n var ram_per_instance = 192 * 1024;\n var num_cores = 96;\n if (inputs.vdj_type == \"BCR\") {\n ram_per_instance = 144 * 1024;\n num_cores = 72;\n }\n var ram_per_split = 3200;\n var num_splits_per_instance = parseInt(ram_per_instance / ram_per_split);\n var num_splits = num_splits_per_instance;\n\n var num_reads = parseInt(inputs.num_valid_reads);\n if (num_reads != null) {\n if (num_reads > 100000000)\n num_splits = num_splits_per_instance * 2;\n num_cores = num_cores * 2;\n }\n\n return ({\"num_splits\": num_splits, \"num_cores\": num_cores});\n}", + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/determine_num_splits" + }, + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits", + "in": [ + { + "source": "#VDJ_Preprocess_Reads.cwl/num_valid_reads", + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/num_valid_reads" + }, + { + "source": "#VDJ_Preprocess_Reads.cwl/vdj_type", + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/vdj_type" + } + ] + } + ], + "id": "#VDJ_Preprocess_Reads.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "prefix": "--vdj-valid-reads", + "itemSeparator": "," + }, + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#VDJ_RSEC_Reads.cwl/Valid_Reads" + }, + { + "inputBinding": { + "prefix": "--num-splits" + }, + "type": [ + "null", + "int" + ], + "id": "#VDJ_RSEC_Reads.cwl/num_splits" + } + ], + "requirements": [ + ], + "outputs": [ + { + "outputBinding": { + "glob": "*RSEC_Reads_Fastq_*.tar.gz" + }, + "type": [ + { + "items": [ + "null", + "File" + ], + "type": "array" + } + ], + "id": "#VDJ_RSEC_Reads.cwl/RSEC_Reads_Fastq" + } + ], + "baseCommand": "mist_vdj_rsec_reads.py", + "class": "CommandLineTool", + "id": "#VDJ_RSEC_Reads.cwl" + }, + { + "inputs": [ + { + "type": [ + "null", + "Any" + ], + "id": "#VDJ_Settings.cwl/_VDJ_Version" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "outputs": [ + { + "type": [ + "null", + "float" + ], + "id": "#VDJ_Settings.cwl/VDJ_JGene_Evalue" + }, + { + "type": [ + "null", + "float" + ], + "id": "#VDJ_Settings.cwl/VDJ_VGene_Evalue" + }, + { + "type": [ + "null", + "string" + ], + "id": "#VDJ_Settings.cwl/VDJ_Version" + } + ], + "class": "ExpressionTool", + "expression": "${\n var vdjVersion = null;\n if (!inputs._VDJ_Version) {\n vdjVersion = null;}\n else {\n var _VDJ_Version = inputs._VDJ_Version.toLowerCase();\n if (_VDJ_Version === \"human\" || _VDJ_Version === \"hs\" || _VDJ_Version === \"human vdj - bcr and tcr\") {\n vdjVersion = \"human\";\n } else if (_VDJ_Version === \"humanbcr\" || _VDJ_Version === \"human vdj - bcr only\") {\n vdjVersion = \"humanBCR\";\n } else if (_VDJ_Version === \"humantcr\" || _VDJ_Version === \"human vdj - tcr only\") {\n vdjVersion = \"humanTCR\";\n } else if (_VDJ_Version === \"mouse\" || _VDJ_Version === \"mm\" || _VDJ_Version === \"mouse vdj - bcr and tcr\") {\n vdjVersion = \"mouse\";\n } else if (_VDJ_Version === \"mousebcr\" || _VDJ_Version === \"mouse vdj - bcr only\") {\n vdjVersion = \"mouseBCR\";\n } else if (_VDJ_Version === \"mousetcr\" || _VDJ_Version === \"mouse vdj - tcr only\") {\n vdjVersion = \"mouseTCR\";\n } else {\n vdjVersion = inputs._VDJ_Version;\n }\n }\n\n return ({\n VDJ_Version: vdjVersion,\n })\n}", + "id": "#VDJ_Settings.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "position": 1 + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_Trim_Reads.cwl/Valid_Reads_Fastq" + } + ], + "requirements": [ + ], + "outputs": [ + { + "outputBinding": { + "glob": "cutadapt.log" + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_Trim_Reads.cwl/Trim_Report" + }, + { + "outputBinding": { + "glob": "*vdjtxt.gz" + }, + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#VDJ_Trim_Reads.cwl/Valid_Reads" + } + ], + "baseCommand": "VDJ_Trim_Reads.sh", + "class": "CommandLineTool", + "id": "#VDJ_Trim_Reads.cwl" + }, + { + "inputs": [], + "requirements": [ + ], + "stdout": "output.txt", + "outputs": [ + { + "outputBinding": { + "glob": "output.txt", + "loadContents": true, + "outputEval": "$(self[0].contents)" + }, + "type": "string", + "id": "#Version.cwl/version" + } + ], + "baseCommand": [ + "mist_version.py" + ], + "id": "#Version.cwl", + "class": "CommandLineTool" + } + ], + "$namespaces": { + "sbg": "https://sevenbridges.com#", + "arv": "http://arvados.org/cwl#" + } +} \ No newline at end of file diff --git a/target/nextflow/mapping/bd_rhapsody/rhapsody_wta_1.10.1_nodocker.cwl b/target/nextflow/mapping/bd_rhapsody/rhapsody_wta_1.10.1_nodocker.cwl new file mode 100755 index 00000000000..5fa9ea85e48 --- /dev/null +++ b/target/nextflow/mapping/bd_rhapsody/rhapsody_wta_1.10.1_nodocker.cwl @@ -0,0 +1,5204 @@ +#!/usr/bin/env cwl-runner +{ + "cwlVersion": "v1.0", + "$graph": [ + { + "inputs": [ + { + "inputBinding": { + "prefix": "--annot-r1", + "itemSeparator": "," + }, + "type": { + "items": "File", + "type": "array" + }, + "id": "#AddtoBam.cwl/Annotation_R1" + }, + { + "inputBinding": { + "prefix": "--cell-order" + }, + "type": "File", + "id": "#AddtoBam.cwl/Cell_Order" + }, + { + "inputBinding": { + "prefix": "--annot-mol-file" + }, + "type": "File", + "id": "#AddtoBam.cwl/Molecular_Annotation" + }, + { + "inputBinding": { + "prefix": "--r2-bam" + }, + "type": "File", + "id": "#AddtoBam.cwl/R2_Bam" + }, + { + "inputBinding": { + "prefix": "--run-metadata" + }, + "type": "File", + "id": "#AddtoBam.cwl/Run_Metadata" + }, + { + "inputBinding": { + "prefix": "--tag-calls" + }, + "type": [ + "null", + "File" + ], + "id": "#AddtoBam.cwl/Tag_Calls" + }, + { + "inputBinding": { + "prefix": "--target-gene-mapping" + }, + "type": [ + "null", + "File" + ], + "id": "#AddtoBam.cwl/Target_Gene_Mapping" + } + ], + "requirements": [ + + ], + "outputs": [ + { + "outputBinding": { + "glob": "Annotated_mapping_R2.BAM" + }, + "type": "File", + "id": "#AddtoBam.cwl/Annotated_Bam" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#AddtoBam.cwl/output" + } + ], + "baseCommand": [ + "mist_add_to_bam.py" + ], + "class": "CommandLineTool", + "id": "#AddtoBam.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "prefix": "--extra-seqs" + }, + "type": [ + "null", + "File" + ], + "id": "#AlignR2.cwl/Extra_Seqs" + }, + { + "inputBinding": { + "prefix": "--index" + }, + "type": "File", + "id": "#AlignR2.cwl/Index" + }, + { + "inputBinding": { + "prefix": "--r2-fastqs", + "itemSeparator": "," + }, + "type": { + "items": "File", + "type": "array" + }, + "id": "#AlignR2.cwl/R2" + }, + { + "inputBinding": { + "prefix": "--run-metadata" + }, + "type": "File", + "id": "#AlignR2.cwl/Run_Metadata" + } + ], + "requirements": [ + + { + "class": "InlineJavascriptRequirement" + }, + { + "envDef": [ + { + "envName": "CORES_ALLOCATED_PER_CWL_PROCESS", + "envValue": "$(String(runtime.cores))" + } + ], + "class": "EnvVarRequirement" + } + ], + "outputs": [ + { + "outputBinding": { + "glob": "*zip" + }, + "type": { + "items": "File", + "type": "array" + }, + "id": "#AlignR2.cwl/Alignments" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#AlignR2.cwl/output" + } + ], + "baseCommand": [ + "mist_align_R2.py" + ], + "class": "CommandLineTool", + "id": "#AlignR2.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "prefix": "--umi-option" + }, + "type": [ + "null", + "int" + ], + "id": "#AnnotateMolecules.cwl/AbSeq_UMI" + }, + { + "inputBinding": { + "prefix": "--run-metadata" + }, + "type": "File", + "id": "#AnnotateMolecules.cwl/Run_Metadata" + }, + { + "inputBinding": { + "prefix": "--use-dbec" + }, + "type": [ + "null", + "boolean" + ], + "id": "#AnnotateMolecules.cwl/Use_DBEC" + }, + { + "inputBinding": { + "prefix": "--valid-annot" + }, + "type": "File", + "id": "#AnnotateMolecules.cwl/Valids" + } + ], + "requirements": [ + + ], + "outputs": [ + { + "outputBinding": { + "glob": "*_GeneStatus.csv.*" + }, + "type": "File", + "id": "#AnnotateMolecules.cwl/Gene_Status_List" + }, + { + "outputBinding": { + "glob": "stats.json", + "loadContents": true, + "outputEval": "$(JSON.parse(self[0].contents).max_count)\n" + }, + "type": "int", + "id": "#AnnotateMolecules.cwl/Max_Count" + }, + { + "outputBinding": { + "glob": "*_Annotation_Molecule.csv.*" + }, + "type": "File", + "id": "#AnnotateMolecules.cwl/Mol_Annot_List" + }, + { + "outputBinding": { + "glob": "stats.json", + "loadContents": true, + "outputEval": "$(JSON.parse(self[0].contents).total_molecules)\n" + }, + "type": "int", + "id": "#AnnotateMolecules.cwl/Total_Molecules" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#AnnotateMolecules.cwl/output" + } + ], + "baseCommand": [ + "mist_annotate_molecules.py" + ], + "class": "CommandLineTool", + "id": "#AnnotateMolecules.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "prefix": "--filter-metrics", + "itemSeparator": "," + }, + "type": [ + { + "items": [ + "null", + "File" + ], + "type": "array" + } + ], + "id": "#AnnotateR1.cwl/Filter_Metrics" + }, + { + "inputBinding": { + "prefix": "--R1" + }, + "type": "File", + "id": "#AnnotateR1.cwl/R1" + }, + { + "inputBinding": { + "prefix": "--run-metadata" + }, + "type": "File", + "id": "#AnnotateR1.cwl/Run_Metadata" + } + ], + "requirements": [ + + { + "ramMin": 2000, + "class": "ResourceRequirement" + } + ], + "outputs": [ + { + "outputBinding": { + "glob": "*_Annotation_R1.csv.gz" + }, + "type": "File", + "id": "#AnnotateR1.cwl/Annotation_R1" + }, + { + "outputBinding": { + "glob": "*_R1_error_count_table.npy" + }, + "type": "File", + "id": "#AnnotateR1.cwl/R1_error_count_table" + }, + { + "outputBinding": { + "glob": "*_R1_read_count_breakdown.json" + }, + "type": "File", + "id": "#AnnotateR1.cwl/R1_read_count_breakdown" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#AnnotateR1.cwl/output" + } + ], + "baseCommand": [ + "mist_annotate_R1.py" + ], + "class": "CommandLineTool", + "id": "#AnnotateR1.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "prefix": "--extra-seqs" + }, + "type": [ + "null", + "File" + ], + "id": "#AnnotateR2.cwl/Extra_Seqs" + }, + { + "inputBinding": { + "prefix": "--gtf" + }, + "type": [ + "null", + "File" + ], + "id": "#AnnotateR2.cwl/GTF_Annotation" + }, + { + "inputBinding": { + "prefix": "--R2-zip" + }, + "type": "File", + "id": "#AnnotateR2.cwl/R2_zip" + }, + { + "inputBinding": { + "prefix": "--run-metadata" + }, + "type": "File", + "id": "#AnnotateR2.cwl/Run_Metadata" + }, + { + "inputBinding": { + "prefix": "--transcript-length" + }, + "type": [ + "null", + "File" + ], + "id": "#AnnotateR2.cwl/Transcript_Length" + } + ], + "requirements": [ + + { + "class": "InlineJavascriptRequirement" + } + ], + "outputs": [ + { + "outputBinding": { + "glob": "*Annotation_R2.csv.gz" + }, + "type": "File", + "id": "#AnnotateR2.cwl/Annot_R2" + }, + { + "outputBinding": { + "glob": "*-annot.gtf" + }, + "type": [ + "null", + "File" + ], + "id": "#AnnotateR2.cwl/GTF" + }, + { + "outputBinding": { + "glob": "*mapping_R2.BAM" + }, + "type": "File", + "id": "#AnnotateR2.cwl/R2_Bam" + }, + { + "outputBinding": { + "glob": "*_picard_quality_metrics.csv.gz" + }, + "type": "File", + "id": "#AnnotateR2.cwl/R2_Quality_Metrics" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#AnnotateR2.cwl/output" + } + ], + "baseCommand": [ + "mist_annotate_R2.py" + ], + "class": "CommandLineTool", + "id": "#AnnotateR2.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "prefix": "--umi-option" + }, + "type": [ + "null", + "int" + ], + "id": "#AnnotateReads.cwl/AbSeq_UMI" + }, + { + "inputBinding": { + "prefix": "--extra-seqs", + "itemSeparator": "," + }, + "type": [ + "null", + "File" + ], + "id": "#AnnotateReads.cwl/Extra_Seqs" + }, + { + "type": { + "items": [ + "null", + "File" + ], + "type": "array" + }, + "id": "#AnnotateReads.cwl/Filter_Metrics" + }, + { + "inputBinding": { + "prefix": "--putative-cell-call" + }, + "type": [ + "null", + "int" + ], + "id": "#AnnotateReads.cwl/Putative_Cell_Call" + }, + { + "type": { + "items": "File", + "type": "array" + }, + "id": "#AnnotateReads.cwl/R1_Annotation" + }, + { + "type": { + "items": "File", + "type": "array" + }, + "id": "#AnnotateReads.cwl/R1_error_count_table" + }, + { + "type": { + "items": "File", + "type": "array" + }, + "id": "#AnnotateReads.cwl/R1_read_count_breakdown" + }, + { + "type": { + "items": "File", + "type": "array" + }, + "id": "#AnnotateReads.cwl/R2_Annotation" + }, + { + "type": { + "items": "File", + "type": "array" + }, + "id": "#AnnotateReads.cwl/R2_Quality_Metrics" + }, + { + "inputBinding": { + "prefix": "--run-metadata" + }, + "type": "File", + "id": "#AnnotateReads.cwl/Run_Metadata" + }, + { + "inputBinding": { + "prefix": "--target-gene-mapping" + }, + "type": [ + "null", + "File" + ], + "id": "#AnnotateReads.cwl/Target_Gene_Mapping" + } + ], + "requirements": [ + + { + "class": "InitialWorkDirRequirement", + "listing": [ + { + "writable": false, + "entry": "${\n function getPaths(inputs, attribute) {\n var fp_arr = []\n for (var i = 0; i < inputs[attribute].length; i++)\n {\n fp_arr.push(inputs[attribute][i].path);\n }\n return fp_arr;\n }\n var paths = {}\n paths['annotR1'] = getPaths(inputs, 'R1_Annotation')\n paths['R1_error_count_table'] = getPaths(inputs, 'R1_error_count_table')\n paths['R1_read_count_breakdown'] = getPaths(inputs, 'R1_read_count_breakdown')\n paths['annotR2'] = getPaths(inputs, 'R2_Annotation')\n paths['r2_quality_metrics_fps'] = getPaths(inputs, 'R2_Quality_Metrics')\n if(inputs.Filter_Metrics[0] != null){\n paths['filtering_stat_files'] = getPaths(inputs, 'Filter_Metrics')\n }\n var paths_json = JSON.stringify(paths);\n return paths_json;\n}", + "entryname": "manifest.json" + } + ] + }, + { + "class": "InlineJavascriptRequirement" + }, + { + "envDef": [ + { + "envName": "CORES_ALLOCATED_PER_CWL_PROCESS", + "envValue": "4" + } + ], + "class": "EnvVarRequirement" + } + ], + "outputs": [ + { + "outputBinding": { + "glob": "*_Annotation_Read.csv.gz" + }, + "type": [ + "null", + "File" + ], + "id": "#AnnotateReads.cwl/Annotation_Read" + }, + { + "outputBinding": { + "glob": "*read1_error_rate_archive*" + }, + "type": "File", + "id": "#AnnotateReads.cwl/Read1_error_rate" + }, + { + "outputBinding": { + "glob": "*_SeqMetrics.csv.gz" + }, + "type": "File", + "id": "#AnnotateReads.cwl/Seq_Metrics" + }, + { + "outputBinding": { + "glob": "*Sorted_Valid_Reads.csv.*" + }, + "type": { + "items": "File", + "type": "array" + }, + "id": "#AnnotateReads.cwl/Valid_Reads" + }, + { + "outputBinding": { + "glob": "num_vdj_reads.json", + "loadContents": true, + "outputEval": "${ if (!self[0]) { return 0; } return parseInt(JSON.parse(self[0].contents).BCR); }" + }, + "type": "int", + "id": "#AnnotateReads.cwl/num_valid_ig_reads" + }, + { + "outputBinding": { + "glob": "num_vdj_reads.json", + "loadContents": true, + "outputEval": "${ if (!self[0]) { return 0; } return parseInt(JSON.parse(self[0].contents).TCR); }" + }, + "type": "int", + "id": "#AnnotateReads.cwl/num_valid_tcr_reads" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#AnnotateReads.cwl/output" + }, + { + "outputBinding": { + "glob": "*_VDJ_IG_Valid_Reads.fastq.gz" + }, + "type": [ + "null", + "File" + ], + "id": "#AnnotateReads.cwl/validIgReads" + }, + { + "outputBinding": { + "glob": "*_VDJ_TCR_Valid_Reads.fastq.gz" + }, + "type": [ + "null", + "File" + ], + "id": "#AnnotateReads.cwl/validTcrReads" + } + ], + "baseCommand": [ + "mist_annotate_reads.py" + ], + "class": "CommandLineTool", + "id": "#AnnotateReads.cwl" + }, + { + "inputs": [ + { + "type": { + "items": "File", + "type": "array" + }, + "id": "#BundleLogs.cwl/log_files" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + }, + { + "class": "MultipleInputFeatureRequirement" + } + ], + "outputs": [ + { + "type": "Directory", + "id": "#BundleLogs.cwl/logs_dir" + } + ], + "class": "ExpressionTool", + "expression": "${\n /* shamelly cribbed from https://gist.github.com/jcxplorer/823878 */\n function uuid() {\n var uuid = \"\", i, random;\n for (i = 0; i < 32; i++) {\n random = Math.random() * 16 | 0;\n if (i == 8 || i == 12 || i == 16 || i == 20) {\n uuid += \"-\";\n }\n uuid += (i == 12 ? 4 : (i == 16 ? (random & 3 | 8) : random)).toString(16);\n }\n return uuid;\n }\n var listing = [];\n for (var i = 0; i < inputs.log_files.length; i++) {\n var log_file = inputs.log_files[i];\n log_file.basename = uuid() + \"-\" + log_file.basename;\n listing.push(log_file);\n }\n return ({\n logs_dir: {\n class: \"Directory\",\n basename: \"Logs\",\n listing: listing\n }\n });\n}", + "id": "#BundleLogs.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "position": 0 + }, + "type": [ + "null", + "File" + ], + "id": "#Cell_Classifier.cwl/molsPerCellMatrix" + } + ], + "requirements": [ + + ], + "outputs": [ + { + "outputBinding": { + "glob": "*cell_type_experimental.csv" + }, + "type": [ + "null", + "File" + ], + "id": "#Cell_Classifier.cwl/cellTypePredictions" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#Cell_Classifier.cwl/log" + } + ], + "baseCommand": [ + "mist_cell_classifier.py" + ], + "class": "CommandLineTool", + "id": "#Cell_Classifier.cwl" + }, + { + "inputs": [ + { + "doc": "The minimum size (megabytes) of a file that should get split into chunks of a size designated in NumRecordsPerSplit\n", + "inputBinding": { + "prefix": "--min-split-size" + }, + "type": [ + "null", + "int" + ], + "id": "#CheckFastqs.cwl/MinChunkSize" + }, + { + "inputBinding": { + "prefix": "--reads", + "itemSeparator": "," + }, + "type": { + "items": "File", + "type": "array" + }, + "id": "#CheckFastqs.cwl/Reads" + }, + { + "inputBinding": { + "prefix": "--subsample" + }, + "type": [ + "null", + "float" + ], + "id": "#CheckFastqs.cwl/Subsample" + }, + { + "inputBinding": { + "prefix": "--subsample-seed" + }, + "type": [ + "null", + "int" + ], + "id": "#CheckFastqs.cwl/Subsample_Seed" + }, + { + "inputBinding": { + "prefix": "--subsample-seed" + }, + "type": [ + "null", + "int" + ], + "id": "#CheckFastqs.cwl/UserInputSubsampleSeed" + } + ], + "requirements": [ + + { + "class": "InlineJavascriptRequirement" + } + ], + "doc": "CheckFastqs does several quality control routines including: (1) ensuring that read pair file names are formatted correctly and contain a read pair mate; (2) disambiguating the \"Subsample Reads\" input and; (3) if not provided, generating a subsampling seed that the downstream instances can use.\n", + "baseCommand": [ + "mist_check_fastqs.py" + ], + "id": "#CheckFastqs.cwl", + "outputs": [ + { + "outputBinding": { + "glob": "bead_version.json", + "loadContents": true, + "outputEval": "$(JSON.parse(self[0].contents).BeadVersion)\n" + }, + "type": { + "items": { + "fields": [ + { + "type": "string", + "name": "#CheckFastqs.cwl/Bead_Version/Library" + }, + { + "type": "string", + "name": "#CheckFastqs.cwl/Bead_Version/bead_version" + } + ], + "type": "record" + }, + "type": "array" + }, + "id": "#CheckFastqs.cwl/Bead_Version" + }, + { + "outputBinding": { + "glob": "fastq_read_pairs.json", + "loadContents": true, + "outputEval": "$(JSON.parse(self[0].contents).fastq_read_pairs)\n" + }, + "type": { + "items": { + "fields": [ + { + "type": "string", + "name": "#CheckFastqs.cwl/FastqReadPairs/filename" + }, + { + "type": "string", + "name": "#CheckFastqs.cwl/FastqReadPairs/readFlag" + }, + { + "type": "string", + "name": "#CheckFastqs.cwl/FastqReadPairs/readPairId" + }, + { + "type": "string", + "name": "#CheckFastqs.cwl/FastqReadPairs/library" + }, + { + "type": "string", + "name": "#CheckFastqs.cwl/FastqReadPairs/beadVersion" + } + ], + "type": "record" + }, + "type": "array" + }, + "id": "#CheckFastqs.cwl/FastqReadPairs" + }, + { + "outputBinding": { + "glob": "files_to_skip_split_and_subsample.json", + "loadContents": true, + "outputEval": "$(JSON.parse(self[0].contents).files_to_skip_split_and_subsample)\n" + }, + "type": { + "items": "string", + "type": "array" + }, + "id": "#CheckFastqs.cwl/FilesToSkipSplitAndSubsample" + }, + { + "outputBinding": { + "glob": "fastq_read_pairs.json", + "loadContents": true, + "outputEval": "${\n var obj = JSON.parse(self[0].contents);\n var libraries = [];\n var pairs = obj.fastq_read_pairs\n for (var i in pairs){\n if (pairs[i][\"readFlag\"] == \"R1\"){\n if (libraries.indexOf(pairs[i][\"library\"]) == -1){ \n libraries.push(pairs[i][\"library\"]);\n }\n }\n }\n libraries.sort();\n return(libraries.toString())\n}\n" + }, + "type": [ + "null", + "string" + ], + "id": "#CheckFastqs.cwl/Libraries" + }, + { + "outputBinding": { + "outputEval": "${ \n var reads = []; \n var files = inputs.Reads\n for (var i in files){\n reads.push(files[i][\"basename\"]);\n }\n reads.sort();\n return(reads)\n}\n" + }, + "type": [ + "null", + { + "items": "string", + "type": "array" + } + ], + "id": "#CheckFastqs.cwl/ReadsList" + }, + { + "outputBinding": { + "glob": "subsampling_info.json", + "loadContents": true, + "outputEval": "$(JSON.parse(self[0].contents).subsampling_seed)\n" + }, + "type": "int", + "id": "#CheckFastqs.cwl/SubsampleSeed" + }, + { + "outputBinding": { + "glob": "subsampling_info.json", + "loadContents": true, + "outputEval": "$(JSON.parse(self[0].contents).subsampling_ratio)\n" + }, + "type": "float", + "id": "#CheckFastqs.cwl/SubsamplingRatio" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#CheckFastqs.cwl/log" + } + ], + "class": "CommandLineTool" + }, + { + "inputs": [ + { + "inputBinding": { + "prefix": "--abseq-reference", + "itemSeparator": "," + }, + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#CheckReference.cwl/AbSeq_Reference" + }, + { + "inputBinding": { + "prefix": "--putative-cell-call" + }, + "type": [ + "null", + "int" + ], + "id": "#CheckReference.cwl/Putative_Cell_Call" + }, + { + "inputBinding": { + "prefix": "--reference", + "itemSeparator": "," + }, + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#CheckReference.cwl/Reference" + }, + { + "inputBinding": { + "prefix": "--run-metadata" + }, + "type": "File", + "id": "#CheckReference.cwl/Run_Metadata" + }, + { + "inputBinding": { + "prefix": "--supplemental-reference", + "itemSeparator": "," + }, + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#CheckReference.cwl/Supplemental_Reference" + } + ], + "requirements": [ + + { + "class": "InlineJavascriptRequirement" + } + ], + "outputs": [ + { + "outputBinding": { + "glob": "combined_extra_seq.fasta" + }, + "type": [ + "null", + "File" + ], + "id": "#CheckReference.cwl/Extra_Seqs" + }, + { + "outputBinding": { + "glob": "full-gene-list.json" + }, + "type": [ + "null", + "File" + ], + "id": "#CheckReference.cwl/Full_Genes" + }, + { + "outputBinding": { + "glob": "*gtf", + "outputEval": "${\n // get the WTA modified GTF with extra seqs\n if (self.length == 1) {\n return self;\n // there is no modified GTF\n } else if (self.length == 0) {\n // if Reference is null (i.e. AbSeq_Reference only), return no GTF\n if (inputs.Reference === null) {\n return null;\n } else {\n // get the original WTA GTF without extra seqs\n for (var i = 0; i < inputs.Reference.length; i++) {\n if (inputs.Reference[i].basename.toLowerCase().indexOf('gtf') !== -1) {\n return inputs.Reference[i];\n }\n }\n // return no GTF for Targeted\n return null\n }\n }\n}\n" + }, + "type": [ + "null", + "File" + ], + "id": "#CheckReference.cwl/GTF" + }, + { + "outputBinding": { + "glob": "*-annot.*", + "outputEval": "${\n if (self.length == 1) { // Targeted\n return self;\n } else if (self.length == 0){ // WTA without extra seqs or targets\n for (var i = 0; i < inputs.Reference.length; i++) {\n if (inputs.Reference[i].basename.toLowerCase().indexOf('tar.gz') !== -1) {\n return inputs.Reference[i];\n }\n }\n return null\n }\n}\n" + }, + "type": "File", + "id": "#CheckReference.cwl/Index" + }, + { + "outputBinding": { + "glob": "target-gene.json" + }, + "type": [ + "null", + "File" + ], + "id": "#CheckReference.cwl/Target_Gene_Mapping" + }, + { + "outputBinding": { + "glob": "transcript_length.json" + }, + "type": [ + "null", + "File" + ], + "id": "#CheckReference.cwl/Transcript_Length" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#CheckReference.cwl/output" + } + ], + "baseCommand": [ + "mist_check_references.py" + ], + "class": "CommandLineTool", + "id": "#CheckReference.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "prefix": "--cell-order" + }, + "type": "File", + "id": "#DensetoSparse.cwl/Cell_Order" + }, + { + "inputBinding": { + "prefix": "--dense-data-table" + }, + "type": [ + "null", + "File" + ], + "id": "#DensetoSparse.cwl/Dense_Data_Table" + }, + { + "inputBinding": { + "prefix": "--gene-list" + }, + "type": "File", + "id": "#DensetoSparse.cwl/Gene_List" + }, + { + "inputBinding": { + "prefix": "--run-metadata" + }, + "type": "File", + "id": "#DensetoSparse.cwl/Run_Metadata" + } + ], + "requirements": [ + + ], + "outputs": [ + { + "outputBinding": { + "glob": "*.csv.gz" + }, + "type": "File", + "id": "#DensetoSparse.cwl/Data_Tables" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#DensetoSparse.cwl/output" + } + ], + "baseCommand": [ + "mist_dense_to_sparse.py" + ], + "class": "CommandLineTool", + "id": "#DensetoSparse.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "position": 1 + }, + "type": [ + "null", + "File" + ], + "id": "#DensetoSparseFile.cwl/GDT_cell_order" + } + ], + "requirements": [ + + ], + "stdout": "cell_order.json", + "outputs": [ + { + "type": "stdout", + "id": "#DensetoSparseFile.cwl/Cell_Order" + } + ], + "baseCommand": "cat", + "id": "#DensetoSparseFile.cwl", + "class": "CommandLineTool" + }, + { + "inputs": [ + { + "inputBinding": { + "prefix": "--full-gene-list" + }, + "type": [ + "null", + "File" + ], + "id": "#GetDataTable.cwl/Full_Genes" + }, + { + "inputBinding": { + "prefix": "--gene-status", + "itemSeparator": "," + }, + "type": { + "items": "File", + "type": "array" + }, + "id": "#GetDataTable.cwl/Gene_Status_List" + }, + { + "inputBinding": { + "prefix": "--max-count", + "itemSeparator": "," + }, + "type": { + "items": "int", + "type": "array" + }, + "id": "#GetDataTable.cwl/Max_Count" + }, + { + "inputBinding": { + "prefix": "--mol-annot", + "itemSeparator": "," + }, + "type": { + "items": "File", + "type": "array" + }, + "id": "#GetDataTable.cwl/Molecule_Annotation_List" + }, + { + "inputBinding": { + "prefix": "--putative-cell-call" + }, + "type": [ + "null", + "int" + ], + "id": "#GetDataTable.cwl/Putative_Cell_Call" + }, + { + "inputBinding": { + "prefix": "--run-metadata" + }, + "type": "File", + "id": "#GetDataTable.cwl/Run_Metadata" + }, + { + "inputBinding": { + "prefix": "--seq-metrics" + }, + "type": "File", + "id": "#GetDataTable.cwl/Seq_Metrics" + }, + { + "inputBinding": { + "prefix": "--tag-names", + "itemSeparator": "," + }, + "type": [ + "null", + { + "items": "string", + "type": "array" + } + ], + "id": "#GetDataTable.cwl/Tag_Names" + }, + { + "type": { + "items": "int", + "type": "array" + }, + "id": "#GetDataTable.cwl/Total_Molecules" + } + ], + "requirements": [ + { + "ramMin": "${return Math.min(Math.max(parseInt(inputs.Total_Molecules.reduce(function(a, b) { return a + b; }, 0) / 4000), 32000), 768000);}", + "class": "ResourceRequirement" + }, + + { + "class": "InlineJavascriptRequirement" + } + ], + "outputs": [ + { + "outputBinding": { + "glob": "metrics-files.tar.gz" + }, + "type": "File", + "id": "#GetDataTable.cwl/Annot_Files" + }, + { + "outputBinding": { + "glob": "Annotations/*_Bioproduct_Stats.csv" + }, + "type": [ + "null", + "File" + ], + "id": "#GetDataTable.cwl/Bioproduct_Stats" + }, + { + "outputBinding": { + "glob": "Cell_Label_Filtering/*.png" + }, + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#GetDataTable.cwl/Cell_Label_Filter" + }, + { + "outputBinding": { + "glob": "cell_order.json" + }, + "type": "File", + "id": "#GetDataTable.cwl/Cell_Order" + }, + { + "outputBinding": { + "glob": "*_Annotation_Molecule_corrected.csv.gz" + }, + "type": "File", + "id": "#GetDataTable.cwl/Corrected_Molecular_Annotation" + }, + { + "outputBinding": { + "glob": "*PerCell_Dense.csv.gz" + }, + "type": { + "items": "File", + "type": "array" + }, + "id": "#GetDataTable.cwl/Dense_Data_Tables" + }, + { + "outputBinding": { + "glob": "*PerCell_Unfiltered_Dense.csv.gz" + }, + "type": { + "items": "File", + "type": "array" + }, + "id": "#GetDataTable.cwl/Dense_Data_Tables_Unfiltered" + }, + { + "outputBinding": { + "glob": "*_Expression_Data.st.gz" + }, + "type": "File", + "id": "#GetDataTable.cwl/Expression_Data" + }, + { + "outputBinding": { + "glob": "*_Expression_Data_Unfiltered.st.gz" + }, + "type": [ + "null", + "File" + ], + "id": "#GetDataTable.cwl/Expression_Data_Unfiltered" + }, + { + "outputBinding": { + "glob": "gene_list.json" + }, + "type": "File", + "id": "#GetDataTable.cwl/Gene_List" + }, + { + "outputBinding": { + "glob": "Annotations/*_Annotation_Molecule.csv.gz" + }, + "type": "File", + "id": "#GetDataTable.cwl/Molecular_Annotation" + }, + { + "outputBinding": { + "glob": "Cell_Label_Filtering/*_Protein_Aggregates_Experimental.csv" + }, + "type": [ + "null", + "File" + ], + "id": "#GetDataTable.cwl/Protein_Aggregates_Experimental" + }, + { + "outputBinding": { + "glob": "Cell_Label_Filtering/*_Putative_Cells_Origin.csv" + }, + "type": [ + "null", + "File" + ], + "id": "#GetDataTable.cwl/Putative_Cells_Origin" + }, + { + "outputBinding": { + "glob": "Annotations/*_Annotation_Molecule_Trueno.csv" + }, + "type": [ + "null", + "File" + ], + "id": "#GetDataTable.cwl/Tag_Annotation" + }, + { + "outputBinding": { + "glob": "Trueno/*_Calls.csv" + }, + "type": [ + "null", + "File" + ], + "id": "#GetDataTable.cwl/Tag_Calls" + }, + { + "outputBinding": { + "glob": "Trueno/*csv" + }, + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#GetDataTable.cwl/Trueno_out" + }, + { + "outputBinding": { + "glob": "Trueno/*zip" + }, + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#GetDataTable.cwl/Trueno_zip" + }, + { + "outputBinding": { + "glob": "Annotations/*_UMI_Adjusted_CellLabel_Stats.csv" + }, + "type": [ + "null", + "File" + ], + "id": "#GetDataTable.cwl/UMI_Adjusted_CellLabel_Stats" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#GetDataTable.cwl/output" + } + ], + "baseCommand": [ + "mist_get_datatables.py" + ], + "class": "CommandLineTool", + "id": "#GetDataTable.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "position": 1 + }, + "type": "File", + "id": "#IndexBAM.cwl/BamFile" + } + ], + "requirements": [ + + { + "class": "InlineJavascriptRequirement" + } + ], + "stdout": "samtools_index.log", + "outputs": [ + { + "outputBinding": { + "glob": "*.bai" + }, + "type": "File", + "id": "#IndexBAM.cwl/Index" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#IndexBAM.cwl/log" + } + ], + "baseCommand": [ + "samtools", + "index" + ], + "id": "#IndexBAM.cwl", + "arguments": [ + { + "position": 2, + "valueFrom": "${\n return inputs.BamFile.basename + \".bai\"\n}" + } + ], + "class": "CommandLineTool" + }, + { + "inputs": [], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "outputs": [ + { + "type": [ + "null", + "int" + ], + "id": "#InternalSettings.cwl/AbSeq_UMI" + }, + { + "type": [ + "null", + "int" + ], + "id": "#InternalSettings.cwl/Barcode_Num" + }, + { + "type": [ + "null", + "File" + ], + "id": "#InternalSettings.cwl/Extra_Seqs" + }, + { + "type": [ + "null", + "int" + ], + "id": "#InternalSettings.cwl/Label_Version" + }, + { + "type": [ + "null", + "int" + ], + "id": "#InternalSettings.cwl/MinChunkSize" + }, + { + "type": [ + "null", + "long" + ], + "id": "#InternalSettings.cwl/NumRecordsPerSplit" + }, + { + "type": [ + "null", + "boolean" + ], + "id": "#InternalSettings.cwl/Read_Filter_Off" + }, + { + "type": [ + "null", + "string" + ], + "id": "#InternalSettings.cwl/Seq_Run" + }, + { + "type": [ + "null", + "float" + ], + "id": "#InternalSettings.cwl/Subsample_Tags" + }, + { + "type": [ + "null", + "boolean" + ], + "id": "#InternalSettings.cwl/Target_analysis" + }, + { + "type": [ + "null", + "boolean" + ], + "id": "#InternalSettings.cwl/Use_DBEC" + }, + { + "type": [ + "null", + "float" + ], + "id": "#InternalSettings.cwl/VDJ_JGene_Evalue" + }, + { + "type": [ + "null", + "float" + ], + "id": "#InternalSettings.cwl/VDJ_VGene_Evalue" + } + ], + "class": "ExpressionTool", + "expression": "${\n var internalInputs = [\n '_Label_Version',\n '_Read_Filter_Off',\n '_Barcode_Num',\n '_Seq_Run',\n '_AbSeq_UMI',\n '_Use_DBEC',\n '_Extra_Seqs',\n '_MinChunkSize',\n '_NumRecordsPerSplit',\n '_Target_analysis',\n '_Subsample_Tags',\n '_VDJ_VGene_Evalue',\n '_VDJ_JGene_Evalue',\n ];\n var internalOutputs = {}\n for (var i = 0; i < internalInputs.length; i++) {\n var internalInput = internalInputs[i];\n var internalOutput = internalInput.slice(1); // remove leading underscore\n if (inputs.hasOwnProperty(internalInput)) {\n internalOutputs[internalOutput] = inputs[internalInput]; // if input specified, redirect to output\n } else {\n internalOutputs[internalOutput] = null; // if input not specified, provide a null\n }\n }\n return internalOutputs;\n}", + "id": "#InternalSettings.cwl" + }, + { + "inputs": [ + { + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#main/AbSeq_Reference", + "label": "AbSeq Reference" + }, + { + "doc": "Determine putative cells using only the basic algorithm (minimum second derivative along the cumulative reads curve). The refined algorithm attempts to remove false positives and recover false negatives, but may not be ideal for certain complex mixtures of cell types. Does not apply if Exact Cell Count is set.", + "type": [ + "null", + "boolean" + ], + "id": "#main/Basic_Algo_Only", + "label": "Disable Refined Putative Cell Calling" + }, + { + "doc": "Set a specific number (>=1) of cells as putative, based on those with the highest error-corrected read count", + "type": [ + "null", + "int" + ], + "id": "#main/Exact_Cell_Count", + "label": "Exact Cell Count" + }, + { + "doc": "Specify the data to be used for putative cell calling. mRNA is the default selected option. AbSeq (Experimental) is for troubleshooting only.", + "type": [ + "null", + { + "symbols": [ + "#main/Putative_Cell_Call/Putative_Cell_Call/mRNA", + "#main/Putative_Cell_Call/Putative_Cell_Call/AbSeq_Experimental" + ], + "type": "enum", + "name": "#main/Putative_Cell_Call/Putative_Cell_Call" + } + ], + "id": "#main/Putative_Cell_Call", + "label": "Putative Cell Calling" + }, + { + "type": { + "items": "File", + "type": "array" + }, + "id": "#main/Reads", + "label": "Reads" + }, + { + "type": "File", + "id": "#main/Reference_Genome", + "label": "Reference Genome" + }, + { + "doc": "This is a name for output files, for example Experiment1_Metrics_Summary.csv. Default if left empty is to name run based on a library. Any non-alpha numeric characters will be changed to a hyphen.", + "type": [ + "null", + "string" + ], + "id": "#main/Run_Name", + "label": "Run Name" + }, + { + "doc": "The sample multiplexing kit version. This option should only be set for a multiplexed experiment.", + "type": [ + "null", + { + "symbols": [ + "#main/Sample_Tags_Version/Sample_Tags_Version/human", + "#main/Sample_Tags_Version/Sample_Tags_Version/hs", + "#main/Sample_Tags_Version/Sample_Tags_Version/mouse", + "#main/Sample_Tags_Version/Sample_Tags_Version/mm", + "#main/Sample_Tags_Version/Sample_Tags_Version/custom" + ], + "type": "enum", + "name": "#main/Sample_Tags_Version/Sample_Tags_Version" + } + ], + "id": "#main/Sample_Tags_Version", + "label": "Sample Tags Version" + }, + { + "doc": "Any number of reads >1 or a fraction between 0 < n < 1 to indicate the percentage of reads to subsample.\n", + "type": [ + "null", + "float" + ], + "id": "#main/Subsample", + "label": "Subsample Reads" + }, + { + "doc": "For use when replicating a previous subsampling run only. Obtain the seed generated from the log file for the SplitFastQ node.\n", + "type": [ + "null", + "int" + ], + "id": "#main/Subsample_seed", + "label": "Subsample Seed" + }, + { + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#main/Supplemental_Reference", + "label": "Supplemental Reference" + }, + { + "doc": "Specify the Sample Tag number followed by - (hyphen) and a sample name to appear in the output files. For example: 4-Ramos. Should be alpha numeric, with + - and _ allowed. Any special characters: &, (), [], {}, <>, ?, | will be corrected to underscores. \n", + "type": [ + "null", + { + "items": "string", + "type": "array" + } + ], + "id": "#main/Tag_Names", + "label": "Tag Names" + }, + { + "type": "File", + "id": "#main/Transcriptome_Annotation", + "label": "Transcriptome Annotation" + }, + { + "doc": "The VDJ species and chain types. This option should only be set for VDJ experiment.", + "type": [ + "null", + { + "symbols": [ + "#main/VDJ_Version/VDJ_Version/human", + "#main/VDJ_Version/VDJ_Version/hs", + "#main/VDJ_Version/VDJ_Version/mouse", + "#main/VDJ_Version/VDJ_Version/mm", + "#main/VDJ_Version/VDJ_Version/humanBCR", + "#main/VDJ_Version/VDJ_Version/humanTCR", + "#main/VDJ_Version/VDJ_Version/mouseBCR", + "#main/VDJ_Version/VDJ_Version/mouseTCR" + ], + "type": "enum", + "name": "#main/VDJ_Version/VDJ_Version" + } + ], + "id": "#main/VDJ_Version", + "label": "VDJ Species Version" + } + ], + "requirements": [ + { + "class": "ScatterFeatureRequirement" + }, + { + "class": "MultipleInputFeatureRequirement" + }, + { + "class": "SubworkflowFeatureRequirement" + }, + { + "class": "StepInputExpressionRequirement" + }, + { + "class": "InlineJavascriptRequirement" + } + ], + "doc": "The BD Rhapsody\u2122 WTA Analysis Pipeline is used to create sequencing libraries from single cell transcriptomes without having to specify a targeted panel.\n\nAfter sequencing, the analysis pipeline takes the FASTQ files, a reference genome file and a transcriptome annotation file for gene alignment. The pipeline generates molecular counts per cell, read counts per cell, metrics, and an alignment file.", + "label": "BD Rhapsody\u2122 WTA Analysis Pipeline", + "steps": [ + { + "run": "#AddtoBam.cwl", + "scatter": [ + "#main/AddtoBam/R2_Bam" + ], + "in": [ + { + "source": "#main/AnnotateR1/Annotation_R1", + "id": "#main/AddtoBam/Annotation_R1" + }, + { + "source": "#main/Dense_to_Sparse_File/Cell_Order", + "id": "#main/AddtoBam/Cell_Order" + }, + { + "source": "#main/GetDataTable/Corrected_Molecular_Annotation", + "id": "#main/AddtoBam/Molecular_Annotation" + }, + { + "source": "#main/AnnotateR2/R2_Bam", + "id": "#main/AddtoBam/R2_Bam" + }, + { + "source": "#main/Metadata_Settings/Run_Metadata", + "id": "#main/AddtoBam/Run_Metadata" + }, + { + "source": "#main/GetDataTable/Tag_Calls", + "id": "#main/AddtoBam/Tag_Calls" + }, + { + "source": "#main/CheckReference/Target_Gene_Mapping", + "id": "#main/AddtoBam/Target_Gene_Mapping" + } + ], + "requirements": [ + { + "ramMin": 16000, + "class": "ResourceRequirement" + } + ], + "id": "#main/AddtoBam", + "out": [ + "#main/AddtoBam/Annotated_Bam", + "#main/AddtoBam/output" + ] + }, + { + "run": "#AlignR2.cwl", + "out": [ + "#main/AlignR2/Alignments", + "#main/AlignR2/output" + ], + "requirements": [ + { + "coresMin": 8, + "ramMin": 48000, + "class": "ResourceRequirement" + } + ], + "id": "#main/AlignR2", + "in": [ + { + "source": "#main/CheckReference/Extra_Seqs", + "id": "#main/AlignR2/Extra_Seqs" + }, + { + "source": "#main/CheckReference/Index", + "id": "#main/AlignR2/Index" + }, + { + "source": "#main/QualityFilterOuter/R2", + "id": "#main/AlignR2/R2" + }, + { + "source": "#main/Metadata_Settings/Run_Metadata", + "id": "#main/AlignR2/Run_Metadata" + } + ] + }, + { + "run": "#AnnotateMolecules.cwl", + "scatter": [ + "#main/AnnotateMolecules/Valids" + ], + "in": [ + { + "source": "#main/Internal_Settings/AbSeq_UMI", + "id": "#main/AnnotateMolecules/AbSeq_UMI" + }, + { + "source": "#main/Metadata_Settings/Run_Metadata", + "id": "#main/AnnotateMolecules/Run_Metadata" + }, + { + "source": "#main/Internal_Settings/Use_DBEC", + "id": "#main/AnnotateMolecules/Use_DBEC" + }, + { + "source": "#main/AnnotateReads/Valid_Reads", + "id": "#main/AnnotateMolecules/Valids" + } + ], + "requirements": [ + { + "ramMin": 32000, + "class": "ResourceRequirement" + } + ], + "id": "#main/AnnotateMolecules", + "out": [ + "#main/AnnotateMolecules/Mol_Annot_List", + "#main/AnnotateMolecules/Gene_Status_List", + "#main/AnnotateMolecules/Max_Count", + "#main/AnnotateMolecules/Total_Molecules", + "#main/AnnotateMolecules/output" + ] + }, + { + "id": "#main/AnnotateR1", + "out": [ + "#main/AnnotateR1/Annotation_R1", + "#main/AnnotateR1/R1_error_count_table", + "#main/AnnotateR1/R1_read_count_breakdown", + "#main/AnnotateR1/output" + ], + "run": "#AnnotateR1.cwl", + "scatter": [ + "#main/AnnotateR1/R1" + ], + "in": [ + { + "source": "#main/QualityFilterOuter/Filter_Metrics", + "id": "#main/AnnotateR1/Filter_Metrics" + }, + { + "source": "#main/QualityFilterOuter/R1", + "id": "#main/AnnotateR1/R1" + }, + { + "source": "#main/Metadata_Settings/Run_Metadata", + "id": "#main/AnnotateR1/Run_Metadata" + } + ] + }, + { + "run": "#AnnotateR2.cwl", + "scatter": [ + "#main/AnnotateR2/R2_zip" + ], + "in": [ + { + "source": "#main/CheckReference/Extra_Seqs", + "id": "#main/AnnotateR2/Extra_Seqs" + }, + { + "source": "#main/CheckReference/GTF", + "id": "#main/AnnotateR2/GTF_Annotation" + }, + { + "source": "#main/AlignR2/Alignments", + "id": "#main/AnnotateR2/R2_zip" + }, + { + "source": "#main/Metadata_Settings/Run_Metadata", + "id": "#main/AnnotateR2/Run_Metadata" + }, + { + "source": "#main/CheckReference/Transcript_Length", + "id": "#main/AnnotateR2/Transcript_Length" + } + ], + "requirements": [ + { + "ramMin": 10000, + "class": "ResourceRequirement" + } + ], + "id": "#main/AnnotateR2", + "out": [ + "#main/AnnotateR2/Annot_R2", + "#main/AnnotateR2/R2_Bam", + "#main/AnnotateR2/GTF", + "#main/AnnotateR2/output", + "#main/AnnotateR2/R2_Quality_Metrics" + ] + }, + { + "run": "#AnnotateReads.cwl", + "out": [ + "#main/AnnotateReads/Seq_Metrics", + "#main/AnnotateReads/Valid_Reads", + "#main/AnnotateReads/Read1_error_rate", + "#main/AnnotateReads/Annotation_Read", + "#main/AnnotateReads/output", + "#main/AnnotateReads/validTcrReads", + "#main/AnnotateReads/validIgReads", + "#main/AnnotateReads/num_valid_tcr_reads", + "#main/AnnotateReads/num_valid_ig_reads" + ], + "requirements": [ + { + "ramMin": 32000, + "class": "ResourceRequirement" + } + ], + "id": "#main/AnnotateReads", + "in": [ + { + "source": "#main/Internal_Settings/AbSeq_UMI", + "id": "#main/AnnotateReads/AbSeq_UMI" + }, + { + "source": "#main/CheckReference/Extra_Seqs", + "id": "#main/AnnotateReads/Extra_Seqs" + }, + { + "source": "#main/QualityFilterOuter/Filter_Metrics", + "id": "#main/AnnotateReads/Filter_Metrics" + }, + { + "source": "#main/Putative_Cell_Calling_Settings/Putative_Cell_Call", + "id": "#main/AnnotateReads/Putative_Cell_Call" + }, + { + "source": "#main/AnnotateR1/Annotation_R1", + "id": "#main/AnnotateReads/R1_Annotation" + }, + { + "source": "#main/AnnotateR1/R1_error_count_table", + "id": "#main/AnnotateReads/R1_error_count_table" + }, + { + "source": "#main/AnnotateR1/R1_read_count_breakdown", + "id": "#main/AnnotateReads/R1_read_count_breakdown" + }, + { + "source": "#main/AnnotateR2/Annot_R2", + "id": "#main/AnnotateReads/R2_Annotation" + }, + { + "source": "#main/AnnotateR2/R2_Quality_Metrics", + "id": "#main/AnnotateReads/R2_Quality_Metrics" + }, + { + "source": "#main/Metadata_Settings/Run_Metadata", + "id": "#main/AnnotateReads/Run_Metadata" + }, + { + "source": "#main/CheckReference/Target_Gene_Mapping", + "id": "#main/AnnotateReads/Target_Gene_Mapping" + } + ] + }, + { + "out": [ + "#main/BundleLogs/logs_dir" + ], + "run": "#BundleLogs.cwl", + "id": "#main/BundleLogs", + "in": [ + { + "source": [ + "#main/AnnotateReads/output", + "#main/AnnotateR1/output", + "#main/AnnotateR2/output", + "#main/CheckReference/output", + "#main/GetDataTable/output", + "#main/Metrics/output", + "#main/AddtoBam/output", + "#main/AnnotateMolecules/output", + "#main/QualityFilterOuter/output", + "#main/CheckFastqs/log", + "#main/SplitAndSubsample/log", + "#main/MergeBAM/log", + "#main/Dense_to_Sparse_Datatable/output", + "#main/Dense_to_Sparse_Datatable_Unfiltered/output", + "#main/IndexBAM/log", + "#main/CellClassifier/log" + ], + "linkMerge": "merge_flattened", + "id": "#main/BundleLogs/log_files" + } + ] + }, + { + "run": "#Cell_Classifier.cwl", + "out": [ + "#main/CellClassifier/cellTypePredictions", + "#main/CellClassifier/log" + ], + "requirements": [ + { + "ramMin": 4000, + "class": "ResourceRequirement" + } + ], + "id": "#main/CellClassifier", + "in": [ + { + "source": "#main/FindDataTableForCellClassifier/molsPerCellMatrixForCellClassifier", + "id": "#main/CellClassifier/molsPerCellMatrix" + } + ] + }, + { + "out": [ + "#main/CheckFastqs/SubsampleSeed", + "#main/CheckFastqs/SubsamplingRatio", + "#main/CheckFastqs/FilesToSkipSplitAndSubsample", + "#main/CheckFastqs/FastqReadPairs", + "#main/CheckFastqs/Bead_Version", + "#main/CheckFastqs/Libraries", + "#main/CheckFastqs/ReadsList", + "#main/CheckFastqs/log" + ], + "run": "#CheckFastqs.cwl", + "id": "#main/CheckFastqs", + "in": [ + { + "source": "#main/Internal_Settings/MinChunkSize", + "id": "#main/CheckFastqs/MinChunkSize" + }, + { + "source": "#main/Reads", + "id": "#main/CheckFastqs/Reads" + }, + { + "source": "#main/Subsample_Settings/Subsample_Reads", + "id": "#main/CheckFastqs/Subsample" + }, + { + "source": "#main/Subsample_Settings/Subsample_Seed", + "id": "#main/CheckFastqs/Subsample_Seed" + } + ] + }, + { + "run": "#CheckReference.cwl", + "out": [ + "#main/CheckReference/Index", + "#main/CheckReference/Extra_Seqs", + "#main/CheckReference/Full_Genes", + "#main/CheckReference/output", + "#main/CheckReference/Transcript_Length", + "#main/CheckReference/GTF", + "#main/CheckReference/Target_Gene_Mapping" + ], + "requirements": [ + { + "ramMin": 10000, + "class": "ResourceRequirement" + } + ], + "id": "#main/CheckReference", + "in": [ + { + "source": "#main/AbSeq_Reference", + "id": "#main/CheckReference/AbSeq_Reference" + }, + { + "source": "#main/Putative_Cell_Calling_Settings/Putative_Cell_Call", + "id": "#main/CheckReference/Putative_Cell_Call" + }, + { + "source": [ + "#main/Transcriptome_Annotation", + "#main/Reference_Genome" + ], + "id": "#main/CheckReference/Reference" + }, + { + "source": "#main/Metadata_Settings/Run_Metadata", + "id": "#main/CheckReference/Run_Metadata" + }, + { + "source": "#main/Supplemental_Reference", + "id": "#main/CheckReference/Supplemental_Reference" + } + ] + }, + { + "run": "#DensetoSparse.cwl", + "scatter": [ + "#main/Dense_to_Sparse_Datatable/Dense_Data_Table" + ], + "in": [ + { + "source": "#main/Dense_to_Sparse_File/Cell_Order", + "id": "#main/Dense_to_Sparse_Datatable/Cell_Order" + }, + { + "source": "#main/GetDataTable/Dense_Data_Tables", + "id": "#main/Dense_to_Sparse_Datatable/Dense_Data_Table" + }, + { + "source": "#main/GetDataTable/Gene_List", + "id": "#main/Dense_to_Sparse_Datatable/Gene_List" + }, + { + "source": "#main/Metadata_Settings/Run_Metadata", + "id": "#main/Dense_to_Sparse_Datatable/Run_Metadata" + } + ], + "requirements": [ + { + "ramMin": 16000, + "class": "ResourceRequirement" + } + ], + "id": "#main/Dense_to_Sparse_Datatable", + "out": [ + "#main/Dense_to_Sparse_Datatable/Data_Tables", + "#main/Dense_to_Sparse_Datatable/output" + ] + }, + { + "run": "#DensetoSparse.cwl", + "scatter": [ + "#main/Dense_to_Sparse_Datatable_Unfiltered/Dense_Data_Table" + ], + "in": [ + { + "source": "#main/GetDataTable/Cell_Order", + "id": "#main/Dense_to_Sparse_Datatable_Unfiltered/Cell_Order" + }, + { + "source": "#main/GetDataTable/Dense_Data_Tables_Unfiltered", + "id": "#main/Dense_to_Sparse_Datatable_Unfiltered/Dense_Data_Table" + }, + { + "source": "#main/GetDataTable/Gene_List", + "id": "#main/Dense_to_Sparse_Datatable_Unfiltered/Gene_List" + }, + { + "source": "#main/Metadata_Settings/Run_Metadata", + "id": "#main/Dense_to_Sparse_Datatable_Unfiltered/Run_Metadata" + } + ], + "requirements": [ + { + "ramMin": 16000, + "class": "ResourceRequirement" + } + ], + "id": "#main/Dense_to_Sparse_Datatable_Unfiltered", + "out": [ + "#main/Dense_to_Sparse_Datatable_Unfiltered/Data_Tables", + "#main/Dense_to_Sparse_Datatable_Unfiltered/output" + ] + }, + { + "out": [ + "#main/Dense_to_Sparse_File/Cell_Order" + ], + "run": "#DensetoSparseFile.cwl", + "id": "#main/Dense_to_Sparse_File", + "in": [ + { + "source": "#main/GetDataTable/Cell_Order", + "id": "#main/Dense_to_Sparse_File/GDT_cell_order" + } + ] + }, + { + "out": [ + "#main/FindDataTableForCellClassifier/molsPerCellMatrixForCellClassifier" + ], + "run": { + "cwlVersion": "v1.0", + "inputs": [ + { + "type": { + "items": "File", + "type": "array" + }, + "id": "#main/FindDataTableForCellClassifier/e13a85b9-73df-4ed0-9386-c8c9ca3b47f0/dataTables" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "outputs": [ + { + "type": "File", + "id": "#main/FindDataTableForCellClassifier/e13a85b9-73df-4ed0-9386-c8c9ca3b47f0/molsPerCellMatrixForCellClassifier" + } + ], + "id": "#main/FindDataTableForCellClassifier/e13a85b9-73df-4ed0-9386-c8c9ca3b47f0", + "expression": "${\n for (var i = 0; i < inputs.dataTables.length; i++) {\n var dataTable = inputs.dataTables[i];\n if (dataTable.basename.indexOf(\"_RSEC_MolsPerCell.csv\") >= 0) {\n return({molsPerCellMatrixForCellClassifier: dataTable});\n }\n }\n return({molsPerCellMatrixForCellClassifier: null});\n}", + "class": "ExpressionTool" + }, + "id": "#main/FindDataTableForCellClassifier", + "in": [ + { + "source": "#main/Dense_to_Sparse_Datatable/Data_Tables", + "id": "#main/FindDataTableForCellClassifier/dataTables" + } + ] + }, + { + "out": [ + "#main/GetDataTable/Tag_Calls", + "#main/GetDataTable/Molecular_Annotation", + "#main/GetDataTable/Corrected_Molecular_Annotation", + "#main/GetDataTable/Tag_Annotation", + "#main/GetDataTable/Annot_Files", + "#main/GetDataTable/Cell_Label_Filter", + "#main/GetDataTable/Dense_Data_Tables", + "#main/GetDataTable/Dense_Data_Tables_Unfiltered", + "#main/GetDataTable/Expression_Data", + "#main/GetDataTable/Expression_Data_Unfiltered", + "#main/GetDataTable/Bioproduct_Stats", + "#main/GetDataTable/UMI_Adjusted_CellLabel_Stats", + "#main/GetDataTable/Putative_Cells_Origin", + "#main/GetDataTable/Protein_Aggregates_Experimental", + "#main/GetDataTable/Trueno_out", + "#main/GetDataTable/Trueno_zip", + "#main/GetDataTable/output", + "#main/GetDataTable/Cell_Order", + "#main/GetDataTable/Gene_List" + ], + "run": "#GetDataTable.cwl", + "id": "#main/GetDataTable", + "in": [ + { + "source": "#main/CheckReference/Full_Genes", + "id": "#main/GetDataTable/Full_Genes" + }, + { + "source": "#main/AnnotateMolecules/Gene_Status_List", + "id": "#main/GetDataTable/Gene_Status_List" + }, + { + "source": "#main/AnnotateMolecules/Max_Count", + "id": "#main/GetDataTable/Max_Count" + }, + { + "source": "#main/AnnotateMolecules/Mol_Annot_List", + "id": "#main/GetDataTable/Molecule_Annotation_List" + }, + { + "source": "#main/Putative_Cell_Calling_Settings/Putative_Cell_Call", + "id": "#main/GetDataTable/Putative_Cell_Call" + }, + { + "source": "#main/Metadata_Settings/Run_Metadata", + "id": "#main/GetDataTable/Run_Metadata" + }, + { + "source": "#main/AnnotateReads/Seq_Metrics", + "id": "#main/GetDataTable/Seq_Metrics" + }, + { + "source": "#main/Multiplexing_Settings/Tag_Sample_Names", + "id": "#main/GetDataTable/Tag_Names" + }, + { + "source": "#main/AnnotateMolecules/Total_Molecules", + "id": "#main/GetDataTable/Total_Molecules" + } + ] + }, + { + "out": [ + "#main/IndexBAM/Index", + "#main/IndexBAM/log" + ], + "run": "#IndexBAM.cwl", + "id": "#main/IndexBAM", + "in": [ + { + "source": "#main/MergeBAM/Final_Bam", + "id": "#main/IndexBAM/BamFile" + } + ] + }, + { + "out": [ + "#main/Internal_Settings/Read_Filter_Off", + "#main/Internal_Settings/Barcode_Num", + "#main/Internal_Settings/Seq_Run", + "#main/Internal_Settings/AbSeq_UMI", + "#main/Internal_Settings/Use_DBEC", + "#main/Internal_Settings/Extra_Seqs", + "#main/Internal_Settings/MinChunkSize", + "#main/Internal_Settings/NumRecordsPerSplit", + "#main/Internal_Settings/Target_analysis", + "#main/Internal_Settings/Subsample_Tags", + "#main/Internal_Settings/VDJ_VGene_Evalue", + "#main/Internal_Settings/VDJ_JGene_Evalue" + ], + "in": [], + "run": "#InternalSettings.cwl", + "id": "#main/Internal_Settings", + "label": "Internal Settings" + }, + { + "out": [ + "#main/MergeBAM/Final_Bam", + "#main/MergeBAM/log" + ], + "run": "#MergeBAM.cwl", + "id": "#main/MergeBAM", + "in": [ + { + "source": "#main/AddtoBam/Annotated_Bam", + "id": "#main/MergeBAM/BamFiles" + }, + { + "source": "#main/Metadata_Settings/Run_Base_Name", + "id": "#main/MergeBAM/Run_Name" + }, + { + "source": "#main/Multiplexing_Settings/Sample_Tags_Version", + "id": "#main/MergeBAM/Sample_Tags_Version" + } + ] + }, + { + "out": [ + "#main/MergeMultiplex/Multiplex_out" + ], + "run": { + "cwlVersion": "v1.0", + "inputs": [ + { + "type": { + "items": [ + "null", + "File" + ], + "type": "array" + }, + "id": "#main/MergeMultiplex/d7de4031-c557-4bec-bdfc-33e9f909e2d7/SampleTag_Files" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "outputs": [ + { + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#main/MergeMultiplex/d7de4031-c557-4bec-bdfc-33e9f909e2d7/Multiplex_out" + } + ], + "id": "#main/MergeMultiplex/d7de4031-c557-4bec-bdfc-33e9f909e2d7", + "expression": "${\n var fp_array = [];\n for (var i = 0; i < inputs.SampleTag_Files.length; i++) {\n var fp = inputs.SampleTag_Files[i];\n if (fp != null) {\n fp_array.push(fp);\n }\n }\n return({\"Multiplex_out\": fp_array});\n}", + "class": "ExpressionTool" + }, + "id": "#main/MergeMultiplex", + "in": [ + { + "source": [ + "#main/GetDataTable/Trueno_out", + "#main/Metrics/Sample_Tag_Out" + ], + "linkMerge": "merge_flattened", + "id": "#main/MergeMultiplex/SampleTag_Files" + } + ] + }, + { + "out": [ + "#main/Metadata_Settings/Run_Metadata", + "#main/Metadata_Settings/Run_Base_Name" + ], + "run": "#Metadata.cwl", + "id": "#main/Metadata_Settings", + "in": [ + { + "source": "#main/AbSeq_Reference", + "id": "#main/Metadata_Settings/AbSeq_Reference" + }, + { + "valueFrom": "WTA", + "id": "#main/Metadata_Settings/Assay" + }, + { + "source": "#main/Putative_Cell_Calling_Settings/Basic_Algo_Only", + "id": "#main/Metadata_Settings/Basic_Algo_Only" + }, + { + "source": "#main/CheckFastqs/Bead_Version", + "id": "#main/Metadata_Settings/Bead_Version" + }, + { + "source": "#main/Putative_Cell_Calling_Settings/Exact_Cell_Count", + "id": "#main/Metadata_Settings/Exact_Cell_Count" + }, + { + "source": "#main/CheckFastqs/Libraries", + "id": "#main/Metadata_Settings/Libraries" + }, + { + "valueFrom": "BD Rhapsody WTA Analysis Pipeline", + "id": "#main/Metadata_Settings/Pipeline_Name" + }, + { + "source": "#main/Version/version", + "id": "#main/Metadata_Settings/Pipeline_Version" + }, + { + "source": "#main/Putative_Cell_Calling_Settings/Putative_Cell_Call", + "id": "#main/Metadata_Settings/Putative_Cell_Call" + }, + { + "source": "#main/CheckFastqs/ReadsList", + "id": "#main/Metadata_Settings/Reads" + }, + { + "source": [ + "#main/Transcriptome_Annotation", + "#main/Reference_Genome" + ], + "id": "#main/Metadata_Settings/Reference" + }, + { + "source": "#main/Name_Settings/Run_Name", + "id": "#main/Metadata_Settings/Run_Name" + }, + { + "source": "#main/Multiplexing_Settings/Tag_Sample_Names", + "id": "#main/Metadata_Settings/Sample_Tag_Names" + }, + { + "source": "#main/Multiplexing_Settings/Sample_Tags_Version", + "id": "#main/Metadata_Settings/Sample_Tags_Version" + }, + { + "source": "#main/Start_Time/Start_Time", + "id": "#main/Metadata_Settings/Start_Time" + }, + { + "source": "#main/Subsample_Settings/Subsample_Reads", + "id": "#main/Metadata_Settings/Subsample" + }, + { + "source": "#main/Subsample_Settings/Subsample_Seed", + "id": "#main/Metadata_Settings/Subsample_Seed" + }, + { + "source": "#main/Supplemental_Reference", + "id": "#main/Metadata_Settings/Supplemental_Reference" + }, + { + "source": "#main/VDJ_Settings/VDJ_Version", + "id": "#main/Metadata_Settings/VDJ_Version" + } + ] + }, + { + "out": [ + "#main/Metrics/Metrics_Summary", + "#main/Metrics/Metrics_Archive", + "#main/Metrics/output", + "#main/Metrics/Sample_Tag_Out" + ], + "run": "#Metrics.cwl", + "id": "#main/Metrics", + "in": [ + { + "source": "#main/GetDataTable/Annot_Files", + "id": "#main/Metrics/Annot_Files" + }, + { + "source": "#main/AnnotateReads/Read1_error_rate", + "id": "#main/Metrics/Read1_error_rate" + }, + { + "source": "#main/Metadata_Settings/Run_Metadata", + "id": "#main/Metrics/Run_Metadata" + }, + { + "source": "#main/GetDataTable/Trueno_zip", + "id": "#main/Metrics/Sample_Tag_Archives" + }, + { + "source": "#main/Internal_Settings/Seq_Run", + "id": "#main/Metrics/Seq_Run" + }, + { + "source": "#main/GetDataTable/UMI_Adjusted_CellLabel_Stats", + "id": "#main/Metrics/UMI_Adjusted_Stats" + }, + { + "source": "#main/VDJ_Compile_Results/vdjMetricsJson", + "id": "#main/Metrics/vdjMetricsJson" + } + ] + }, + { + "out": [ + "#main/Multiplexing_Settings/Tag_Sample_Names", + "#main/Multiplexing_Settings/Sample_Tags_Version" + ], + "in": [ + { + "source": "#main/Sample_Tags_Version", + "id": "#main/Multiplexing_Settings/_Sample_Tags_Version" + }, + { + "source": "#main/Tag_Names", + "id": "#main/Multiplexing_Settings/_Tag_Sample_Names" + } + ], + "run": "#MultiplexingSettings.cwl", + "id": "#main/Multiplexing_Settings", + "label": "Multiplexing Settings" + }, + { + "out": [ + "#main/Name_Settings/Run_Name" + ], + "in": [ + { + "source": "#main/Run_Name", + "id": "#main/Name_Settings/_Run_Name" + } + ], + "run": "#NameSettings.cwl", + "id": "#main/Name_Settings", + "label": "Name Settings" + }, + { + "out": [ + "#main/PairReadFiles/ReadPairs" + ], + "run": "#PairReadFiles.cwl", + "id": "#main/PairReadFiles", + "in": [ + { + "source": "#main/CheckFastqs/FastqReadPairs", + "id": "#main/PairReadFiles/FastqReadPairs" + }, + { + "source": "#main/SplitAndSubsample/SplitAndSubsampledFastqs", + "id": "#main/PairReadFiles/Reads" + } + ] + }, + { + "out": [ + "#main/Putative_Cell_Calling_Settings/Putative_Cell_Call", + "#main/Putative_Cell_Calling_Settings/Exact_Cell_Count", + "#main/Putative_Cell_Calling_Settings/Basic_Algo_Only" + ], + "in": [ + { + "source": "#main/Basic_Algo_Only", + "id": "#main/Putative_Cell_Calling_Settings/_Basic_Algo_Only" + }, + { + "source": "#main/Exact_Cell_Count", + "id": "#main/Putative_Cell_Calling_Settings/_Exact_Cell_Count" + }, + { + "source": "#main/Putative_Cell_Call", + "id": "#main/Putative_Cell_Calling_Settings/_Putative_Cell_Call" + } + ], + "run": "#PutativeCellSettings.cwl", + "id": "#main/Putative_Cell_Calling_Settings", + "label": "Putative Cell Calling Settings" + }, + { + "out": [ + "#main/QualityFilterOuter/Filter_Metrics", + "#main/QualityFilterOuter/R1", + "#main/QualityFilterOuter/R2", + "#main/QualityFilterOuter/output" + ], + "run": "#QualityFilterOuter.cwl", + "id": "#main/QualityFilterOuter", + "in": [ + { + "source": "#main/Metadata_Settings/Run_Metadata", + "id": "#main/QualityFilterOuter/Run_Metadata" + }, + { + "source": "#main/PairReadFiles/ReadPairs", + "id": "#main/QualityFilterOuter/Split_Read_Pairs" + } + ] + }, + { + "out": [ + "#main/SplitAndSubsample/SplitAndSubsampledFastqs", + "#main/SplitAndSubsample/log" + ], + "run": "#SplitAndSubsample.cwl", + "id": "#main/SplitAndSubsample", + "in": [ + { + "source": "#main/Reads", + "id": "#main/SplitAndSubsample/Fastqs" + }, + { + "source": "#main/CheckFastqs/FilesToSkipSplitAndSubsample", + "id": "#main/SplitAndSubsample/FilesToSkipSplitAndSubsample" + }, + { + "source": "#main/Internal_Settings/NumRecordsPerSplit", + "id": "#main/SplitAndSubsample/NumRecordsPerSplit" + }, + { + "source": "#main/CheckFastqs/SubsamplingRatio", + "id": "#main/SplitAndSubsample/SubsampleRatio" + }, + { + "source": "#main/CheckFastqs/SubsampleSeed", + "id": "#main/SplitAndSubsample/SubsampleSeed" + } + ] + }, + { + "out": [ + "#main/Start_Time/Start_Time" + ], + "run": { + "cwlVersion": "v1.0", + "inputs": [], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "outputs": [ + { + "type": "string", + "id": "#main/Start_Time/c0e8267c-52e8-448b-b9c2-7600ab5ed59a/Start_Time" + } + ], + "id": "#main/Start_Time/c0e8267c-52e8-448b-b9c2-7600ab5ed59a", + "expression": "${ \n var today = new Date();\n var date = today.toString()\n return ({Start_Time: date});\n} ", + "class": "ExpressionTool" + }, + "id": "#main/Start_Time", + "in": [] + }, + { + "out": [ + "#main/Subsample_Settings/Subsample_Reads", + "#main/Subsample_Settings/Subsample_Seed" + ], + "in": [ + { + "source": "#main/Subsample", + "id": "#main/Subsample_Settings/_Subsample_Reads" + }, + { + "source": "#main/Subsample_seed", + "id": "#main/Subsample_Settings/_Subsample_Seed" + } + ], + "run": "#SubsampleSettings.cwl", + "id": "#main/Subsample_Settings", + "label": "Subsample Settings" + }, + { + "out": [ + "#main/Uncompress_Datatables/Uncompressed_Data_Tables", + "#main/Uncompress_Datatables/Uncompressed_Expression_Matrix" + ], + "run": "#UncompressDatatables.cwl", + "id": "#main/Uncompress_Datatables", + "in": [ + { + "source": "#main/Dense_to_Sparse_Datatable/Data_Tables", + "id": "#main/Uncompress_Datatables/Compressed_Data_Table" + }, + { + "source": "#main/GetDataTable/Expression_Data", + "id": "#main/Uncompress_Datatables/Compressed_Expression_Matrix" + } + ] + }, + { + "out": [ + "#main/VDJ_Assemble_and_Annotate_Contigs_IG/igCalls" + ], + "run": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl", + "id": "#main/VDJ_Assemble_and_Annotate_Contigs_IG", + "in": [ + { + "source": "#main/VDJ_Preprocess_Reads_IG/RSEC_Reads_Fastq", + "id": "#main/VDJ_Assemble_and_Annotate_Contigs_IG/RSEC_Reads_Fastq" + }, + { + "source": "#main/VDJ_Settings/VDJ_Version", + "id": "#main/VDJ_Assemble_and_Annotate_Contigs_IG/VDJ_Version" + }, + { + "source": "#main/VDJ_Preprocess_Reads_IG/num_cores", + "id": "#main/VDJ_Assemble_and_Annotate_Contigs_IG/num_cores" + } + ] + }, + { + "out": [ + "#main/VDJ_Assemble_and_Annotate_Contigs_TCR/tcrCalls" + ], + "run": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl", + "id": "#main/VDJ_Assemble_and_Annotate_Contigs_TCR", + "in": [ + { + "source": "#main/VDJ_Preprocess_Reads_TCR/RSEC_Reads_Fastq", + "id": "#main/VDJ_Assemble_and_Annotate_Contigs_TCR/RSEC_Reads_Fastq" + }, + { + "source": "#main/VDJ_Settings/VDJ_Version", + "id": "#main/VDJ_Assemble_and_Annotate_Contigs_TCR/VDJ_Version" + }, + { + "source": "#main/VDJ_Preprocess_Reads_TCR/num_cores", + "id": "#main/VDJ_Assemble_and_Annotate_Contigs_TCR/num_cores" + } + ] + }, + { + "out": [ + "#main/VDJ_Compile_Results/vdjCellsDatatable", + "#main/VDJ_Compile_Results/vdjCellsDatatableUncorrected", + "#main/VDJ_Compile_Results/vdjDominantContigs", + "#main/VDJ_Compile_Results/vdjUnfilteredContigs", + "#main/VDJ_Compile_Results/vdjMetricsJson", + "#main/VDJ_Compile_Results/vdjMetricsCsv", + "#main/VDJ_Compile_Results/vdjReadsPerCellByChainTypeFigure" + ], + "run": "#VDJ_Compile_Results.cwl", + "id": "#main/VDJ_Compile_Results", + "in": [ + { + "source": "#main/AnnotateReads/Seq_Metrics", + "id": "#main/VDJ_Compile_Results/Seq_Metrics" + }, + { + "source": "#main/CellClassifier/cellTypePredictions", + "id": "#main/VDJ_Compile_Results/cellTypeMapping" + }, + { + "valueFrom": "$([])", + "id": "#main/VDJ_Compile_Results/chainsToIgnore" + }, + { + "source": "#main/Internal_Settings/VDJ_JGene_Evalue", + "id": "#main/VDJ_Compile_Results/evalueJgene" + }, + { + "source": "#main/Internal_Settings/VDJ_VGene_Evalue", + "id": "#main/VDJ_Compile_Results/evalueVgene" + }, + { + "source": "#main/VDJ_GatherIGCalls/gatheredCalls", + "id": "#main/VDJ_Compile_Results/igCalls" + }, + { + "source": "#main/Metadata_Settings/Run_Metadata", + "id": "#main/VDJ_Compile_Results/metadata" + }, + { + "source": "#main/GetDataTable/Cell_Order", + "id": "#main/VDJ_Compile_Results/putativeCells" + }, + { + "source": "#main/VDJ_GatherTCRCalls/gatheredCalls", + "id": "#main/VDJ_Compile_Results/tcrCalls" + }, + { + "source": "#main/VDJ_Settings/VDJ_Version", + "id": "#main/VDJ_Compile_Results/vdjVersion" + } + ] + }, + { + "out": [ + "#main/VDJ_GatherIGCalls/gatheredCalls" + ], + "run": "#VDJ_GatherCalls.cwl", + "id": "#main/VDJ_GatherIGCalls", + "in": [ + { + "source": "#main/VDJ_Assemble_and_Annotate_Contigs_IG/igCalls", + "id": "#main/VDJ_GatherIGCalls/theCalls" + } + ] + }, + { + "out": [ + "#main/VDJ_GatherTCRCalls/gatheredCalls" + ], + "run": "#VDJ_GatherCalls.cwl", + "id": "#main/VDJ_GatherTCRCalls", + "in": [ + { + "source": "#main/VDJ_Assemble_and_Annotate_Contigs_TCR/tcrCalls", + "id": "#main/VDJ_GatherTCRCalls/theCalls" + } + ] + }, + { + "out": [ + "#main/VDJ_Preprocess_Reads_IG/RSEC_Reads_Fastq", + "#main/VDJ_Preprocess_Reads_IG/num_splits", + "#main/VDJ_Preprocess_Reads_IG/num_cores" + ], + "run": "#VDJ_Preprocess_Reads.cwl", + "id": "#main/VDJ_Preprocess_Reads_IG", + "in": [ + { + "source": "#main/AnnotateReads/validIgReads", + "id": "#main/VDJ_Preprocess_Reads_IG/Valid_Reads_Fastq" + }, + { + "source": "#main/AnnotateReads/num_valid_ig_reads", + "id": "#main/VDJ_Preprocess_Reads_IG/num_valid_reads" + }, + { + "valueFrom": "BCR", + "id": "#main/VDJ_Preprocess_Reads_IG/vdj_type" + } + ] + }, + { + "out": [ + "#main/VDJ_Preprocess_Reads_TCR/RSEC_Reads_Fastq", + "#main/VDJ_Preprocess_Reads_TCR/num_splits", + "#main/VDJ_Preprocess_Reads_TCR/num_cores" + ], + "run": "#VDJ_Preprocess_Reads.cwl", + "id": "#main/VDJ_Preprocess_Reads_TCR", + "in": [ + { + "source": "#main/AnnotateReads/validTcrReads", + "id": "#main/VDJ_Preprocess_Reads_TCR/Valid_Reads_Fastq" + }, + { + "source": "#main/AnnotateReads/num_valid_tcr_reads", + "id": "#main/VDJ_Preprocess_Reads_TCR/num_valid_reads" + }, + { + "valueFrom": "TCR", + "id": "#main/VDJ_Preprocess_Reads_TCR/vdj_type" + } + ] + }, + { + "out": [ + "#main/VDJ_Settings/VDJ_Version" + ], + "in": [ + { + "source": "#main/VDJ_Version", + "id": "#main/VDJ_Settings/_VDJ_Version" + } + ], + "run": "#VDJ_Settings.cwl", + "id": "#main/VDJ_Settings", + "label": "VDJ Settings" + }, + { + "out": [ + "#main/Version/version" + ], + "run": "#Version.cwl", + "id": "#main/Version", + "in": [] + } + ], + "outputs": [ + { + "outputSource": "#main/GetDataTable/Bioproduct_Stats", + "type": [ + "null", + "File" + ], + "id": "#main/Bioproduct_Stats", + "label": "Bioproduct Statistics" + }, + { + "outputSource": "#main/GetDataTable/Cell_Label_Filter", + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#main/Cell_Label_Filter", + "label": "Cell Label Filter" + }, + { + "outputSource": "#main/Uncompress_Datatables/Uncompressed_Data_Tables", + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#main/Data_Tables", + "label": "Data Tables" + }, + { + "outputSource": "#main/Dense_to_Sparse_Datatable_Unfiltered/Data_Tables", + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#main/Data_Tables_Unfiltered", + "label": "Unfiltered Data Tables" + }, + { + "outputSource": "#main/Uncompress_Datatables/Uncompressed_Expression_Matrix", + "type": [ + "null", + "File" + ], + "id": "#main/Expression_Data", + "label": "Expression Matrix" + }, + { + "outputSource": "#main/GetDataTable/Expression_Data_Unfiltered", + "type": [ + "null", + "File" + ], + "id": "#main/Expression_Data_Unfiltered", + "label": "Unfiltered Expression Matrix" + }, + { + "outputSource": "#main/MergeBAM/Final_Bam", + "type": "File", + "id": "#main/Final_Bam", + "label": "Final BAM File" + }, + { + "outputSource": "#main/IndexBAM/Index", + "type": "File", + "id": "#main/Final_Bam_Index", + "label": "Final BAM Index" + }, + { + "outputSource": "#main/CellClassifier/cellTypePredictions", + "type": [ + "null", + "File" + ], + "id": "#main/ImmuneCellClassification(Experimental)", + "label": "Immune Cell Classification (Experimental)" + }, + { + "outputSource": "#main/BundleLogs/logs_dir", + "type": "Directory", + "id": "#main/Logs", + "label": "Pipeline Logs" + }, + { + "outputSource": "#main/Metrics/Metrics_Summary", + "type": "File", + "id": "#main/Metrics_Summary", + "label": "Metrics Summary" + }, + { + "outputSource": "#main/MergeMultiplex/Multiplex_out", + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#main/Multiplex" + }, + { + "outputSource": "#main/GetDataTable/Protein_Aggregates_Experimental", + "type": [ + "null", + "File" + ], + "id": "#main/Protein_Aggregates_Experimental", + "label": "Protein Aggregates (Experimental)" + }, + { + "outputSource": "#main/GetDataTable/Putative_Cells_Origin", + "type": [ + "null", + "File" + ], + "id": "#main/Putative_Cells_Origin", + "label": "Putative Cells Origin" + }, + { + "outputSource": "#main/VDJ_Compile_Results/vdjCellsDatatable", + "type": [ + "null", + "File" + ], + "id": "#main/vdjCellsDatatable", + "label": "vdjCellsDatatable" + }, + { + "outputSource": "#main/VDJ_Compile_Results/vdjCellsDatatableUncorrected", + "type": [ + "null", + "File" + ], + "id": "#main/vdjCellsDatatableUncorrected", + "label": "vdjCellsDatatableUncorrected" + }, + { + "outputSource": "#main/VDJ_Compile_Results/vdjDominantContigs", + "type": [ + "null", + "File" + ], + "id": "#main/vdjDominantContigs", + "label": "vdjDominantContigs" + }, + { + "outputSource": "#main/VDJ_Compile_Results/vdjMetricsCsv", + "type": [ + "null", + "File" + ], + "id": "#main/vdjMetricsCsv", + "label": "vdjMetricsCsv" + }, + { + "outputSource": "#main/VDJ_Compile_Results/vdjUnfilteredContigs", + "type": [ + "null", + "File" + ], + "id": "#main/vdjUnfilteredContigs", + "label": "vdjUnfilteredContigs" + } + ], + "id": "#main", + "class": "Workflow" + }, + { + "inputs": [ + { + "inputBinding": { + "position": 1 + }, + "type": { + "items": "File", + "type": "array" + }, + "id": "#MergeBAM.cwl/BamFiles" + }, + { + "type": [ + "null", + "string" + ], + "id": "#MergeBAM.cwl/Run_Name" + }, + { + "type": [ + "null", + "string" + ], + "id": "#MergeBAM.cwl/Sample_Tags_Version" + } + ], + "requirements": [ + + { + "class": "InlineJavascriptRequirement" + } + ], + "stdout": "samtools_merge.log", + "outputs": [ + { + "outputBinding": { + "glob": "*_final.BAM" + }, + "type": "File", + "id": "#MergeBAM.cwl/Final_Bam" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#MergeBAM.cwl/log" + } + ], + "baseCommand": [ + "samtools", + "merge" + ], + "id": "#MergeBAM.cwl", + "arguments": [ + { + "prefix": "-@", + "valueFrom": "$(runtime.cores)" + }, + { + "position": 0, + "valueFrom": "${\n if (inputs.Sample_Tags_Version) {\n return \"Combined_\" + inputs.Run_Name + \"_final.BAM\"\n } else {\n return inputs.Run_Name + \"_final.BAM\"\n }\n}" + } + ], + "class": "CommandLineTool", + "hints": [ + { + "coresMin": 4, + "class": "ResourceRequirement" + } + ] + }, + { + "inputs": [ + { + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#Metadata.cwl/AbSeq_Reference" + }, + { + "type": "string", + "id": "#Metadata.cwl/Assay" + }, + { + "type": [ + "null", + "boolean" + ], + "id": "#Metadata.cwl/Basic_Algo_Only" + }, + { + "type": { + "items": { + "fields": [ + { + "type": "string", + "name": "#Metadata.cwl/Bead_Version/Library" + }, + { + "type": "string", + "name": "#Metadata.cwl/Bead_Version/bead_version" + } + ], + "type": "record" + }, + "type": "array" + }, + "id": "#Metadata.cwl/Bead_Version" + }, + { + "type": [ + "null", + "int" + ], + "id": "#Metadata.cwl/Exact_Cell_Count" + }, + { + "type": [ + "null", + "int" + ], + "id": "#Metadata.cwl/Label_Version" + }, + { + "type": [ + "null", + "string" + ], + "id": "#Metadata.cwl/Libraries" + }, + { + "type": "string", + "id": "#Metadata.cwl/Pipeline_Name" + }, + { + "type": "string", + "id": "#Metadata.cwl/Pipeline_Version" + }, + { + "type": [ + "null", + "int" + ], + "id": "#Metadata.cwl/Putative_Cell_Call" + }, + { + "type": [ + "null", + "boolean" + ], + "id": "#Metadata.cwl/Read_Filter_Off" + }, + { + "type": [ + "null", + { + "items": "string", + "type": "array" + } + ], + "id": "#Metadata.cwl/Reads" + }, + { + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#Metadata.cwl/Reference" + }, + { + "type": [ + "null", + "string" + ], + "id": "#Metadata.cwl/Run_Name" + }, + { + "type": [ + "null", + { + "items": "string", + "type": "array" + } + ], + "id": "#Metadata.cwl/Sample_Tag_Names" + }, + { + "type": [ + "null", + "string" + ], + "id": "#Metadata.cwl/Sample_Tags_Version" + }, + { + "type": [ + "null", + "string" + ], + "id": "#Metadata.cwl/Start_Time" + }, + { + "type": [ + "null", + "float" + ], + "id": "#Metadata.cwl/Subsample" + }, + { + "type": [ + "null", + "int" + ], + "id": "#Metadata.cwl/Subsample_Seed" + }, + { + "type": [ + "null", + "float" + ], + "id": "#Metadata.cwl/Subsample_Tags" + }, + { + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#Metadata.cwl/Supplemental_Reference" + }, + { + "type": [ + "null", + "string" + ], + "id": "#Metadata.cwl/VDJ_Version" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "stdout": "run_metadata.json", + "outputs": [ + { + "outputBinding": { + "outputEval": "${ \n var name = inputs.Run_Name;\n if (name == null){\n var libraries = inputs.Libraries;\n name = libraries.split(',')[0];\n } \n return(name)\n} \n" + }, + "type": [ + "null", + "string" + ], + "id": "#Metadata.cwl/Run_Base_Name" + }, + { + "type": "stdout", + "id": "#Metadata.cwl/Run_Metadata" + } + ], + "baseCommand": "echo", + "id": "#Metadata.cwl", + "arguments": [ + { + "prefix": "" + }, + { + "shellQuote": true, + "valueFrom": "${\n var metadata = inputs;\n var all_bv = {};\n var customer_bv = \"Original (V1)\";\n for (var i = 0; i < inputs.Bead_Version.length; i++) {\n var BeadVer = inputs.Bead_Version[i];\n var Library = BeadVer[\"Library\"];\n var bead_version = BeadVer[\"bead_version\"];\n all_bv[Library] = bead_version \n var short_bv = bead_version.substring(0, 2);\n if (short_bv == \"V2\"){\n var customer_bv = \"Enhanced (V2)\";\n }\n }\n metadata[\"Bead_Version\"] = all_bv;\n\n var pipeline_name = inputs.Pipeline_Name;\n var assay = inputs.Assay;\n var version = inputs.Pipeline_Version;\n var time = inputs.Start_Time;\n var libraries = inputs.Libraries.split(\",\");\n var i = 0;\n var reference_list = []\n if(inputs.Reference != null){\n reference_list = reference_list.concat(inputs.Reference);\n }\n if(inputs.AbSeq_Reference != null){\n reference_list = reference_list.concat(inputs.AbSeq_Reference);\n }\n\n var supplemental = \"\"\n if(inputs.Supplemental_Reference != null){\n supplemental = \"; Supplemental_Reference - \" + inputs.Supplemental_Reference[0][\"basename\"];\n }\n var references = [];\n for (i = 0; i< reference_list.length; i++) {\n if(reference_list[i] != null){\n references.push(reference_list[i][\"basename\"]);\n }\n }\n var parameters = [];\n if(inputs.Sample_Tags_Version != null){\n var tags = \"Sample Tag Version: \" + inputs.Sample_Tags_Version;\n } else{ \n var tags = \"Sample Tag Version: None\";\n }\n parameters.push(tags);\n\n if(inputs.Sample_Tag_Names != null){\n var tag_names = inputs.Sample_Tag_Names.join(\" ; \")\n var tag_list = \"Sample Tag Names: \" + tag_names;\n } else{\n var tag_list = \"Sample Tag Names: None\";\n }\n parameters.push(tag_list);\n \n if(inputs.VDJ_Version != null){\n var vdj = \"VDJ Version: \" + inputs.VDJ_Version;\n } else{ \n var vdj = \"VDJ Version: None\";\n }\n parameters.push(vdj)\n\n if(inputs.Subsample != null){\n var subsample = \"Subsample: \" + inputs.Subsample;\n } else{ \n var subsample = \"Subsample: None\";\n } \n parameters.push(subsample);\n\n if(inputs.Putative_Cell_Call == 1){\n var call = \"Putative Cell Calling Type: AbSeq\";\n } else{ \n var call = \"Putative Cell Calling Type: mRNA\";\n } \n parameters.push(call)\n\n if(inputs.Basic_Algo_Only){\n var basic = \"Refined Putative Cell Calling: Off\";\n } else{ \n var basic = \"Refined Putative Cell Calling: On\";\n } \n parameters.push(basic)\n\n if(inputs.Exact_Cell_Count != null){\n var cells = \"Exact Cell Count: \" + inputs.Exact_Cell_Count;\n } else{ \n var cells = \"Exact Cell Count: None\";\n } \n parameters.push(cells)\n\n var name = inputs.Run_Name;\n if (name == null){\n var libraries = inputs.Libraries.split(',');\n name = libraries[0];\n } \n\n var header = [\"####################\"];\n header.push(\"## \" + pipeline_name + \" Version \" + version);\n header.push(\"## Analysis Date - \" + time);\n header.push(\"## Libraries - \" + libraries.join(' | ') + \" - Bead version detected: \" + customer_bv);\n header.push(\"## References - \" + references.join(' | ') + supplemental);\n header.push(\"## Parameters - \" + parameters.join(' | '));\n header.push(\"####################\");\n metadata[\"Output_Header\"] = header;\n metadata[\"Run_Base_Name\"] = name;\n var metadata_json = JSON.stringify(metadata);\n return metadata_json;\n}\n" + } + ], + "class": "CommandLineTool" + }, + { + "inputs": [ + { + "inputBinding": { + "prefix": "--annot-files" + }, + "type": "File", + "id": "#Metrics.cwl/Annot_Files" + }, + { + "inputBinding": { + "prefix": "--read1-error-rate" + }, + "type": "File", + "id": "#Metrics.cwl/Read1_error_rate" + }, + { + "inputBinding": { + "prefix": "--run-metadata" + }, + "type": "File", + "id": "#Metrics.cwl/Run_Metadata" + }, + { + "inputBinding": { + "prefix": "--sample-tag-archives", + "itemSeparator": "," + }, + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#Metrics.cwl/Sample_Tag_Archives" + }, + { + "inputBinding": { + "prefix": "--seq-run" + }, + "type": [ + "null", + "string" + ], + "id": "#Metrics.cwl/Seq_Run" + }, + { + "inputBinding": { + "prefix": "--umi-adjusted-stats" + }, + "type": [ + "null", + "File" + ], + "id": "#Metrics.cwl/UMI_Adjusted_Stats" + }, + { + "inputBinding": { + "prefix": "--vdj-metrics-fp" + }, + "type": [ + "null", + "File" + ], + "id": "#Metrics.cwl/vdjMetricsJson" + } + ], + "requirements": [ + + ], + "outputs": [ + { + "outputBinding": { + "glob": "internal-metrics-archive.tar.gz" + }, + "type": "File", + "id": "#Metrics.cwl/Metrics_Archive" + }, + { + "outputBinding": { + "glob": "*_Metrics_Summary.csv" + }, + "type": "File", + "id": "#Metrics.cwl/Metrics_Summary" + }, + { + "outputBinding": { + "glob": "*.zip" + }, + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#Metrics.cwl/Sample_Tag_Out" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#Metrics.cwl/output" + } + ], + "baseCommand": [ + "mist_metrics.py" + ], + "class": "CommandLineTool", + "id": "#Metrics.cwl" + }, + { + "inputs": [ + { + "default": "Targeted", + "type": "string", + "id": "#MultiplexingSettings.cwl/Assay" + }, + { + "type": [ + "null", + "Any" + ], + "id": "#MultiplexingSettings.cwl/_Sample_Tags_Version" + }, + { + "type": [ + "null", + { + "items": "string", + "type": "array" + } + ], + "id": "#MultiplexingSettings.cwl/_Tag_Sample_Names" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "outputs": [ + { + "type": [ + "null", + "string" + ], + "id": "#MultiplexingSettings.cwl/Sample_Tags_Version" + }, + { + "type": [ + "null", + { + "items": "string", + "type": "array" + } + ], + "id": "#MultiplexingSettings.cwl/Tag_Sample_Names" + } + ], + "class": "ExpressionTool", + "expression": "${\n var enumifiedSampleTagsVersion = null;\n if (inputs._Sample_Tags_Version) {\n var _Sample_Tags_Version = inputs._Sample_Tags_Version.toLowerCase();\n if (_Sample_Tags_Version.indexOf('human') >= 0 || _Sample_Tags_Version === 'hs')\n {\n enumifiedSampleTagsVersion = 'hs';\n }\n else if (_Sample_Tags_Version.indexOf('mouse') >= 0 || _Sample_Tags_Version === 'mm')\n {\n enumifiedSampleTagsVersion = 'mm';\n }\n else if (_Sample_Tags_Version === 'no multiplexing')\n {\n enumifiedSampleTagsVersion = null;\n }\n else\n {\n throw new Error(\"Cannot parse Sample Tag Version: \" + inputs._Sample_Tags_Version);\n }\n }\n var listTagNames = inputs._Tag_Sample_Names\n var newTagNames = []\n for (var num in listTagNames) {\n var tag = listTagNames[num].replace(/[^A-Za-z0-9-+]/g,\"_\");\n newTagNames.push(tag); \n } \n return ({\n Tag_Sample_Names: newTagNames,\n Sample_Tags_Version: enumifiedSampleTagsVersion\n });\n}", + "id": "#MultiplexingSettings.cwl" + }, + { + "inputs": [ + { + "type": [ + "null", + "string" + ], + "id": "#NameSettings.cwl/_Run_Name" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "outputs": [ + { + "type": [ + "null", + "string" + ], + "id": "#NameSettings.cwl/Run_Name" + } + ], + "class": "ExpressionTool", + "expression": "${ var name = inputs._Run_Name;\n if (name != null) {\n name = name.replace(/[\\W_]+/g,\"-\");}\n return({'Run_Name' : name });\n } ", + "id": "#NameSettings.cwl" + }, + { + "inputs": [ + { + "type": { + "items": { + "fields": [ + { + "type": "string", + "name": "#PairReadFiles.cwl/FastqReadPairs/filename" + }, + { + "type": "string", + "name": "#PairReadFiles.cwl/FastqReadPairs/readFlag" + }, + { + "type": "string", + "name": "#PairReadFiles.cwl/FastqReadPairs/readPairId" + }, + { + "type": "string", + "name": "#PairReadFiles.cwl/FastqReadPairs/library" + } + ], + "type": "record" + }, + "type": "array" + }, + "id": "#PairReadFiles.cwl/FastqReadPairs" + }, + { + "type": { + "items": "File", + "type": "array" + }, + "id": "#PairReadFiles.cwl/Reads" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "doc": "PairReadFiles takes an array of split files and pairs them, such that an R1 file is transferred to the QualityFilter with its corresponding R2 file.\nThe original FASTQ files are paired in CheckFastqs and then split and sub-sampled in SplitAndSubsample. The pairing information is taken from CheckFastqs.\n", + "id": "#PairReadFiles.cwl", + "outputs": [ + { + "type": { + "items": { + "fields": [ + { + "type": "File", + "name": "#PairReadFiles.cwl/ReadPairs/R1" + }, + { + "type": "File", + "name": "#PairReadFiles.cwl/ReadPairs/R2" + }, + { + "type": "int", + "name": "#PairReadFiles.cwl/ReadPairs/readPairId" + }, + { + "type": "string", + "name": "#PairReadFiles.cwl/ReadPairs/library" + } + ], + "type": "record" + }, + "type": "array" + }, + "id": "#PairReadFiles.cwl/ReadPairs" + } + ], + "expression": "${\n // use the CheckFastqs read pairing information to create a dictionary\n // using the original fastq file name without the extension as the key\n var fastqReadPairs = {}\n for (var i = 0; i < inputs.FastqReadPairs.length; i++) {\n var fileDict = inputs.FastqReadPairs[i];\n var filename = fileDict[\"filename\"];\n\n if (!fastqReadPairs[filename]) {\n fastqReadPairs[filename] = {\n readPairId: null,\n readFlag: null,\n library: null,\n };\n }\n else {\n throw new Error(\"Found non-unique fastq filename '\" + filename + \"' in the FastqReadPairs dictionary from CheckFastqs.\")\n }\n\n fastqReadPairs[filename].readPairId = fileDict[\"readPairId\"]\n fastqReadPairs[filename].readFlag = fileDict[\"readFlag\"]\n fastqReadPairs[filename].library = fileDict[\"library\"]\n }\n\n // now loop through the input read files which could\n // be the original fastq files if no sub-sampling has\n // been done, or the sub-sampled fastq files\n var readPairs = {}\n for (var i = 0; i < inputs.Reads.length; i++) {\n\n // Set the fileDict to null\n var fileDict = null;\n\n // Get the fastq file\n var fastqFile = inputs.Reads[i];\n\n // Remove the .gz from the end of the filename\n var fileNoGzExt = fastqFile.basename.replace(/.gz$/i, \"\");\n\n // Remove the next file extension if it exists\n var fileArrayWithExt = fileNoGzExt.split(\".\");\n // If an extension exists, splice the array\n var fileArrayNoExt = null;\n if (fileArrayWithExt.length > 1) {\n fileArrayNoExt = fileArrayWithExt.splice(0, fileArrayWithExt.length-1);\n } else {\n // No file extension exists, so use the whole array\n fileArrayNoExt = fileArrayWithExt\n }\n var fileRootname = fileArrayNoExt.join(\".\")\n\n // if the original files were sub-sampled\n // get the original file and the chunk id\n if (fileRootname.indexOf(\"-\") != -1) {\n // Split on the dash to get the name of\n // the original file and the chunk id\n // The original file name can also have dashes\n var chunkFileArray = fileRootname.split(\"-\");\n\n // Get the original file rootname and chunk id\n // The rootname without the chunk id and file\n // extension is the key from CheckFastqs\n // The chunk id is used later to create a new unique\n // read pair id for all sub-sampled fastq files\n\n // The rootname array should contain all elements up to the last dash\n var fileRootnameArray = chunkFileArray.splice(0, chunkFileArray.length-1);\n var fileRootnameNoChunkId = fileRootnameArray.join(\"-\");\n\n // The chunk id is the last element in the array\n // representing the content after the last dash\n var orgChunkId = chunkFileArray.pop();\n\n // if there is no chunk id, use an arbitrary number\n // the chunk id is unique when the files are sub-sampled\n // and does not need to be unique when the files are not sub-sampled\n var chunkId = 9999;\n if (orgChunkId) {\n // cast to an integer\n chunkId = parseInt(orgChunkId);\n }\n // double check that we have a chunk id\n if (chunkId === undefined || chunkId === null) {\n throw new Error(\"The fastq file sub-sampling id could not be determined!\");\n }\n\n // The file rootname without the chunk id and file extension\n // should match the original file rootname from CheckFastqs\n // The original file rootname from CheckFastqs is the key for\n // the dictionary containing the original unique pair id\n var fileDict = fastqReadPairs[fileRootnameNoChunkId];\n }\n\n // If the files are not sub-sampled or the fileDict\n // is not found, then try to use the original\n // file rootname without the file extension as the key\n if (fileDict === undefined || fileDict === null) {\n\n // if the original files were not sub-sampled,\n // use the original file rootname and an arbitrary chunk id\n var chunkId = 9999;\n\n var fileDict = fastqReadPairs[fileRootname];\n\n // If the fileDict for this file rootname is not found,\n // then the filenames are in an unexpected format and\n // the code to parse the filenames in CheckFastqs,\n // SplitAndSubsample and here need to match\n if (fileDict === undefined || fileDict === null) {\n // Create an error\n if (fileDict === undefined || fileDict === null) {\n throw new Error(\"Cannot find the fastq read pair information for '\" + fastqFile.basename + \"'.\");\n }\n }\n }\n\n // Get the pairing information from CheckFastqs\n var readPairId = fileDict[\"readPairId\"];\n var library = fileDict[\"library\"];\n var flag = fileDict[\"readFlag\"];\n\n // Add the chunkId to create a new unique read pair id\n // for each file (sub-sampled or not)\n var chunkReadPairId = readPairId + \"_\" + chunkId;\n\n // Create a dictionary for each pair of files\n if (!readPairs[chunkReadPairId]) {\n readPairs[chunkReadPairId] = {\n R1: null,\n R2: null,\n library: library,\n readPairId: null,\n };\n }\n // add in the R1 and R2 files, depending on the flag\n if (flag === \"R1\") {\n readPairs[chunkReadPairId].R1 = fastqFile\n } else if (flag === \"R2\") {\n readPairs[chunkReadPairId].R2 = fastqFile\n }\n }\n // we are not interested in the read pair ids in readPairs\n // flatten into an array of objects\n var readPairsList = [];\n var i = 1;\n for (var key in readPairs) {\n if (readPairs.hasOwnProperty(key)) {\n var readPair = readPairs[key];\n readPair.readPairId = i;\n readPairsList.push(readPair);\n i++;\n }\n }\n // pass this array to the record array named \"ReadPairs\" on the CWL layer\n return {ReadPairs: readPairsList}\n}", + "class": "ExpressionTool" + }, + { + "inputs": [ + { + "type": [ + "null", + "boolean" + ], + "id": "#PutativeCellSettings.cwl/_Basic_Algo_Only" + }, + { + "type": [ + "null", + "int" + ], + "id": "#PutativeCellSettings.cwl/_Exact_Cell_Count" + }, + { + "type": [ + "null", + "Any" + ], + "id": "#PutativeCellSettings.cwl/_Putative_Cell_Call" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "outputs": [ + { + "type": [ + "null", + "boolean" + ], + "id": "#PutativeCellSettings.cwl/Basic_Algo_Only" + }, + { + "type": [ + "null", + "int" + ], + "id": "#PutativeCellSettings.cwl/Exact_Cell_Count" + }, + { + "type": [ + "null", + "int" + ], + "id": "#PutativeCellSettings.cwl/Putative_Cell_Call" + } + ], + "class": "ExpressionTool", + "expression": "${\n // the basic algorithm flag defaults to false\n var basicAlgOnlyFlag = false;\n // the user can set the basic algorithm flag\n if (inputs._Basic_Algo_Only) {\n basicAlgOnlyFlag = inputs._Basic_Algo_Only;\n }\n // convert the Putative_Cell_Call from a string to an integer\n var putativeCellCallInt = 0;\n if (inputs._Putative_Cell_Call) {\n if (inputs._Putative_Cell_Call === \"mRNA\") {\n putativeCellCallInt = 0;\n }\n else if (inputs._Putative_Cell_Call == \"AbSeq_Experimental\" || inputs._Putative_Cell_Call == \"AbSeq (Experimental)\") {\n putativeCellCallInt = 1;\n // for protein-only cell calling, we only have the basic algorithm\n basicAlgOnlyFlag = true;\n }\n else if (inputs._Putative_Cell_Call == \"mRNA_and_AbSeq\") {\n putativeCellCallInt = 2;\n }\n }\n // check the exact cell count\n if (inputs._Exact_Cell_Count) {\n if (inputs._Exact_Cell_Count < 1) {\n throw(\"Illogical value for exact cell count: \" + inputs._Exact_Cell_Count);\n }\n }\n return ({\n Putative_Cell_Call: putativeCellCallInt,\n Exact_Cell_Count: inputs._Exact_Cell_Count,\n Basic_Algo_Only: basicAlgOnlyFlag,\n });\n}", + "id": "#PutativeCellSettings.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "prefix": "--run-metadata" + }, + "type": "File", + "id": "#QualityFilter.cwl/Run_Metadata" + }, + { + "type": { + "fields": [ + { + "inputBinding": { + "prefix": "--r1" + }, + "type": "File", + "name": "#QualityFilter.cwl/Split_Read_Pairs/R1" + }, + { + "inputBinding": { + "prefix": "--r2" + }, + "type": "File", + "name": "#QualityFilter.cwl/Split_Read_Pairs/R2" + }, + { + "inputBinding": { + "prefix": "--read-pair-id" + }, + "type": "int", + "name": "#QualityFilter.cwl/Split_Read_Pairs/readPairId" + }, + { + "inputBinding": { + "prefix": "--library" + }, + "type": "string", + "name": "#QualityFilter.cwl/Split_Read_Pairs/library" + } + ], + "type": "record" + }, + "id": "#QualityFilter.cwl/Split_Read_Pairs" + } + ], + "requirements": [ + + ], + "outputs": [ + { + "outputBinding": { + "glob": "*read_quality.csv.gz" + }, + "type": [ + "null", + "File" + ], + "id": "#QualityFilter.cwl/Filter_Metrics" + }, + { + "outputBinding": { + "glob": "*_R1*.fastq.gz" + }, + "type": "File", + "id": "#QualityFilter.cwl/R1" + }, + { + "outputBinding": { + "glob": "*_R2*.fastq.gz" + }, + "type": "File", + "id": "#QualityFilter.cwl/R2" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#QualityFilter.cwl/output" + } + ], + "baseCommand": [ + "mist_quality_filter.py" + ], + "class": "CommandLineTool", + "id": "#QualityFilter.cwl" + }, + { + "inputs": [ + { + "type": "File", + "id": "#QualityFilterOuter.cwl/Run_Metadata" + }, + { + "type": { + "items": { + "fields": [ + { + "type": "File", + "name": "#QualityFilterOuter.cwl/Split_Read_Pairs/R1" + }, + { + "type": "File", + "name": "#QualityFilterOuter.cwl/Split_Read_Pairs/R2" + }, + { + "type": "int", + "name": "#QualityFilterOuter.cwl/Split_Read_Pairs/readPairId" + }, + { + "type": "string", + "name": "#QualityFilterOuter.cwl/Split_Read_Pairs/library" + } + ], + "type": "record" + }, + "type": "array" + }, + "id": "#QualityFilterOuter.cwl/Split_Read_Pairs" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + }, + { + "class": "ScatterFeatureRequirement" + }, + { + "class": "StepInputExpressionRequirement" + }, + { + "class": "SubworkflowFeatureRequirement" + } + ], + "outputs": [ + { + "outputSource": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/Filter_Metrics", + "type": [ + { + "items": [ + "null", + "File" + ], + "type": "array" + } + ], + "id": "#QualityFilterOuter.cwl/Filter_Metrics" + }, + { + "outputSource": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/R1", + "type": [ + { + "items": [ + "null", + "File" + ], + "type": "array" + } + ], + "id": "#QualityFilterOuter.cwl/R1" + }, + { + "outputSource": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/R2", + "type": [ + { + "items": [ + "null", + "File" + ], + "type": "array" + } + ], + "id": "#QualityFilterOuter.cwl/R2" + }, + { + "outputSource": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/output", + "type": [ + { + "items": [ + "null", + "File" + ], + "type": "array" + } + ], + "id": "#QualityFilterOuter.cwl/output" + } + ], + "class": "Workflow", + "steps": [ + { + "scatter": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/Split_Read_Pairs", + "out": [ + "#QualityFilterOuter.cwl/Quality_Filter_Scatter/R1", + "#QualityFilterOuter.cwl/Quality_Filter_Scatter/R2", + "#QualityFilterOuter.cwl/Quality_Filter_Scatter/Filter_Metrics", + "#QualityFilterOuter.cwl/Quality_Filter_Scatter/output" + ], + "run": "#QualityFilter.cwl", + "id": "#QualityFilterOuter.cwl/Quality_Filter_Scatter", + "in": [ + { + "source": "#QualityFilterOuter.cwl/Run_Metadata", + "id": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/Run_Metadata" + }, + { + "source": "#QualityFilterOuter.cwl/Split_Read_Pairs", + "id": "#QualityFilterOuter.cwl/Quality_Filter_Scatter/Split_Read_Pairs" + } + ] + } + ], + "id": "#QualityFilterOuter.cwl" + }, + { + "inputs": [ + { + "type": { + "items": "File", + "type": "array" + }, + "id": "#SplitAndSubsample.cwl/Fastqs" + }, + { + "type": { + "items": "string", + "type": "array" + }, + "id": "#SplitAndSubsample.cwl/FilesToSkipSplitAndSubsample" + }, + { + "type": [ + "null", + "long" + ], + "id": "#SplitAndSubsample.cwl/NumRecordsPerSplit" + }, + { + "type": "float", + "id": "#SplitAndSubsample.cwl/SubsampleRatio" + }, + { + "type": "int", + "id": "#SplitAndSubsample.cwl/SubsampleSeed" + } + ], + "requirements": [ + { + "class": "ScatterFeatureRequirement" + }, + { + "class": "InlineJavascriptRequirement" + } + ], + "doc": "SplitAndSubsample splits, subsamples and formats read files to be deposited in QualityFilter.\n", + "id": "#SplitAndSubsample.cwl", + "steps": [ + { + "doc": "After scattering \"SplitAndSubsample\" on a File array, the output of each node is also an array. Thus, we are left with a nestled list. This JS expression flattens this list to deal with the split reads in PairReadFiles.cwl", + "out": [ + "#SplitAndSubsample.cwl/FlattenOutput/SplitFastqList" + ], + "run": { + "cwlVersion": "v1.0", + "inputs": [ + { + "type": { + "items": { + "items": "File", + "type": "array" + }, + "type": "array" + }, + "id": "#SplitAndSubsample.cwl/FlattenOutput/flatten_output/nestledSplitFastqList" + } + ], + "outputs": [ + { + "type": { + "items": "File", + "type": "array" + }, + "id": "#SplitAndSubsample.cwl/FlattenOutput/flatten_output/SplitFastqList" + } + ], + "class": "ExpressionTool", + "expression": "${\n return {SplitFastqList: [].concat.apply([], inputs.nestledSplitFastqList)}\n}\n", + "id": "#SplitAndSubsample.cwl/FlattenOutput/flatten_output" + }, + "id": "#SplitAndSubsample.cwl/FlattenOutput", + "in": [ + { + "source": "#SplitAndSubsample.cwl/SplitAndSubsample/SplitAndSubsampledFastqs", + "id": "#SplitAndSubsample.cwl/FlattenOutput/nestledSplitFastqList" + } + ] + }, + { + "run": { + "cwlVersion": "v1.0", + "inputs": [ + { + "inputBinding": { + "prefix": "--fastq-file-path" + }, + "type": "File", + "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/Fastq" + }, + { + "inputBinding": { + "prefix": "--files-to-skip-split-and-subsample", + "itemSeparator": "," + }, + "type": { + "items": "string", + "type": "array" + }, + "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/FilesToSkipSplitAndSubsample" + }, + { + "inputBinding": { + "prefix": "--num-records" + }, + "type": [ + "null", + "long" + ], + "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/NumRecordsPerSplit" + }, + { + "inputBinding": { + "prefix": "--subsample-ratio" + }, + "type": "float", + "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/SubsampleRatio" + }, + { + "inputBinding": { + "prefix": "--subsample-seed" + }, + "type": "int", + "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/SubsampleSeed" + } + ], + "requirements": [ + ], + "outputs": [ + { + "outputBinding": { + "glob": "*.fastq.gz", + "outputEval": "${ if (self.length === 0) { return [inputs.Fastq]; } else { return self; } }" + }, + "type": { + "items": "File", + "type": "array" + }, + "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/SplitAndSubsampledFastqs" + }, + { + "outputBinding": { + "glob": "*.log" + }, + "type": "File", + "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq/log" + } + ], + "baseCommand": [ + "mist_split_fastq.py" + ], + "class": "CommandLineTool", + "id": "#SplitAndSubsample.cwl/SplitAndSubsample/split_fastq" + }, + "doc": "Allocate one docker/python process per file to do the actual file splitting.", + "scatter": [ + "#SplitAndSubsample.cwl/SplitAndSubsample/Fastq" + ], + "in": [ + { + "source": "#SplitAndSubsample.cwl/Fastqs", + "id": "#SplitAndSubsample.cwl/SplitAndSubsample/Fastq" + }, + { + "source": "#SplitAndSubsample.cwl/FilesToSkipSplitAndSubsample", + "id": "#SplitAndSubsample.cwl/SplitAndSubsample/FilesToSkipSplitAndSubsample" + }, + { + "source": "#SplitAndSubsample.cwl/NumRecordsPerSplit", + "id": "#SplitAndSubsample.cwl/SplitAndSubsample/NumRecordsPerSplit" + }, + { + "source": "#SplitAndSubsample.cwl/SubsampleRatio", + "id": "#SplitAndSubsample.cwl/SplitAndSubsample/SubsampleRatio" + }, + { + "source": "#SplitAndSubsample.cwl/SubsampleSeed", + "id": "#SplitAndSubsample.cwl/SplitAndSubsample/SubsampleSeed" + } + ], + "id": "#SplitAndSubsample.cwl/SplitAndSubsample", + "out": [ + "#SplitAndSubsample.cwl/SplitAndSubsample/SplitAndSubsampledFastqs", + "#SplitAndSubsample.cwl/SplitAndSubsample/log" + ] + } + ], + "outputs": [ + { + "outputSource": "#SplitAndSubsample.cwl/FlattenOutput/SplitFastqList", + "type": { + "items": "File", + "type": "array" + }, + "id": "#SplitAndSubsample.cwl/SplitAndSubsampledFastqs" + }, + { + "outputSource": "#SplitAndSubsample.cwl/SplitAndSubsample/log", + "type": { + "items": "File", + "type": "array" + }, + "id": "#SplitAndSubsample.cwl/log" + } + ], + "class": "Workflow" + }, + { + "inputs": [ + { + "type": [ + "null", + "float" + ], + "id": "#SubsampleSettings.cwl/_Subsample_Reads" + }, + { + "type": [ + "null", + "int" + ], + "id": "#SubsampleSettings.cwl/_Subsample_Seed" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "outputs": [ + { + "type": [ + "null", + "float" + ], + "id": "#SubsampleSettings.cwl/Subsample_Reads" + }, + { + "type": [ + "null", + "int" + ], + "id": "#SubsampleSettings.cwl/Subsample_Seed" + } + ], + "class": "ExpressionTool", + "expression": "${\n var subsamplingOutputs = {\n Subsample_Reads: inputs._Subsample_Reads,\n Subsample_Seed: inputs._Subsample_Seed\n }\n return subsamplingOutputs;\n}", + "id": "#SubsampleSettings.cwl" + }, + { + "inputs": [ + { + "type": { + "items": "File", + "type": "array" + }, + "id": "#UncompressDatatables.cwl/Compressed_Data_Table" + }, + { + "type": "File", + "id": "#UncompressDatatables.cwl/Compressed_Expression_Matrix" + } + ], + "requirements": [ + { + "class": "ScatterFeatureRequirement" + } + ], + "outputs": [ + { + "outputSource": "#UncompressDatatables.cwl/Uncompress_Datatable/Uncompressed_File", + "type": { + "items": "File", + "type": "array" + }, + "id": "#UncompressDatatables.cwl/Uncompressed_Data_Tables" + }, + { + "outputSource": "#UncompressDatatables.cwl/Uncompress_Expression_Matrix/Uncompressed_File", + "type": "File", + "id": "#UncompressDatatables.cwl/Uncompressed_Expression_Matrix" + } + ], + "class": "Workflow", + "steps": [ + { + "id": "#UncompressDatatables.cwl/Uncompress_Datatable", + "out": [ + "#UncompressDatatables.cwl/Uncompress_Datatable/Uncompressed_File" + ], + "run": { + "cwlVersion": "v1.0", + "inputs": [ + { + "inputBinding": { + "position": 1 + }, + "type": "File", + "id": "#UncompressDatatables.cwl/Uncompress_Datatable/Uncompress_Datatable_Inner/Compressed_File" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "stdout": "$(inputs.Compressed_File.nameroot)", + "outputs": [ + { + "outputBinding": { + "glob": "$(inputs.Compressed_File.nameroot)" + }, + "type": "File", + "id": "#UncompressDatatables.cwl/Uncompress_Datatable/Uncompress_Datatable_Inner/Uncompressed_File" + } + ], + "baseCommand": [ + "gunzip" + ], + "id": "#UncompressDatatables.cwl/Uncompress_Datatable/Uncompress_Datatable_Inner", + "arguments": [ + { + "position": 0, + "valueFrom": "-c" + } + ], + "class": "CommandLineTool", + "hints": [ + ] + }, + "scatter": [ + "#UncompressDatatables.cwl/Uncompress_Datatable/Compressed_File" + ], + "in": [ + { + "source": "#UncompressDatatables.cwl/Compressed_Data_Table", + "id": "#UncompressDatatables.cwl/Uncompress_Datatable/Compressed_File" + } + ] + }, + { + "out": [ + "#UncompressDatatables.cwl/Uncompress_Expression_Matrix/Uncompressed_File" + ], + "run": { + "cwlVersion": "v1.0", + "inputs": [ + { + "inputBinding": { + "position": 1 + }, + "type": "File", + "id": "#UncompressDatatables.cwl/Uncompress_Expression_Matrix/Uncompress_Expression_Matrix_Inner/Compressed_File" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "stdout": "$(inputs.Compressed_File.nameroot)", + "outputs": [ + { + "outputBinding": { + "glob": "$(inputs.Compressed_File.nameroot)" + }, + "type": "File", + "id": "#UncompressDatatables.cwl/Uncompress_Expression_Matrix/Uncompress_Expression_Matrix_Inner/Uncompressed_File" + } + ], + "baseCommand": [ + "gunzip" + ], + "id": "#UncompressDatatables.cwl/Uncompress_Expression_Matrix/Uncompress_Expression_Matrix_Inner", + "arguments": [ + { + "position": 0, + "valueFrom": "-c" + } + ], + "class": "CommandLineTool", + "hints": [ + + ] + }, + "id": "#UncompressDatatables.cwl/Uncompress_Expression_Matrix", + "in": [ + { + "source": "#UncompressDatatables.cwl/Compressed_Expression_Matrix", + "id": "#UncompressDatatables.cwl/Uncompress_Expression_Matrix/Compressed_File" + } + ] + } + ], + "id": "#UncompressDatatables.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "position": 1 + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs.cwl/RSEC_Reads_Fastq" + }, + { + "inputBinding": { + "position": 2 + }, + "type": "string", + "id": "#VDJ_Assemble_and_Annotate_Contigs.cwl/Read_Limit" + }, + { + "inputBinding": { + "position": 3 + }, + "type": [ + "null", + "string" + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs.cwl/VDJ_Version" + } + ], + "requirements": [ + + { + "class": "InlineJavascriptRequirement" + }, + { + "class": "ShellCommandRequirement" + } + ], + "outputs": [ + { + "outputBinding": { + "glob": "*_pruned.csv.gz" + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs.cwl/PyirCall" + } + ], + "baseCommand": [ + "AssembleAndAnnotate.sh" + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs.cwl", + "class": "CommandLineTool", + "hints": [ + { + "coresMin": 1, + "ramMin": 3200, + "class": "ResourceRequirement" + } + ] + }, + { + "inputs": [ + { + "type": [ + { + "items": [ + "null", + "File" + ], + "type": "array" + } + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/RSEC_Reads_Fastq" + }, + { + "type": [ + "null", + "string" + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Version" + }, + { + "type": [ + "null", + "int" + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/num_cores" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + }, + { + "class": "ScatterFeatureRequirement" + }, + { + "class": "StepInputExpressionRequirement" + }, + { + "class": "SubworkflowFeatureRequirement" + } + ], + "outputs": [ + { + "outputSource": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG/PyirCall", + "type": [ + { + "items": [ + "null", + "File" + ], + "type": "array" + } + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/igCalls" + } + ], + "class": "Workflow", + "steps": [ + { + "run": "#VDJ_Assemble_and_Annotate_Contigs.cwl", + "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG", + "in": [ + { + "source": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/RSEC_Reads_Fastq", + "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG/RSEC_Reads_Fastq" + }, + { + "valueFrom": "75000", + "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG/Read_Limit" + }, + { + "source": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Version", + "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG/VDJ_Version" + } + ], + "hints": [ + { + "coresMin": "$(inputs.num_cores)", + "class": "ResourceRequirement" + } + ], + "scatter": [ + "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG/RSEC_Reads_Fastq" + ], + "out": [ + "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl/VDJ_Assemble_and_Annotate_Contigs_IG/PyirCall" + ] + } + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs_IG.cwl" + }, + { + "inputs": [ + { + "type": [ + { + "items": [ + "null", + "File" + ], + "type": "array" + } + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/RSEC_Reads_Fastq" + }, + { + "type": [ + "null", + "string" + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Version" + }, + { + "type": [ + "null", + "int" + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/num_cores" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + }, + { + "class": "ScatterFeatureRequirement" + }, + { + "class": "StepInputExpressionRequirement" + }, + { + "class": "SubworkflowFeatureRequirement" + } + ], + "outputs": [ + { + "outputSource": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR/PyirCall", + "type": [ + { + "items": [ + "null", + "File" + ], + "type": "array" + } + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/tcrCalls" + } + ], + "class": "Workflow", + "steps": [ + { + "run": "#VDJ_Assemble_and_Annotate_Contigs.cwl", + "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR", + "in": [ + { + "source": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/RSEC_Reads_Fastq", + "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR/RSEC_Reads_Fastq" + }, + { + "valueFrom": "75000", + "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR/Read_Limit" + }, + { + "source": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Version", + "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR/VDJ_Version" + } + ], + "hints": [ + { + "coresMin": "$(inputs.num_cores)", + "class": "ResourceRequirement" + } + ], + "scatter": [ + "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR/RSEC_Reads_Fastq" + ], + "out": [ + "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl/VDJ_Assemble_and_Annotate_Contigs_TCR/PyirCall" + ] + } + ], + "id": "#VDJ_Assemble_and_Annotate_Contigs_TCR.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "position": 10, + "prefix": "--seq-metrics" + }, + "type": "File", + "id": "#VDJ_Compile_Results.cwl/Seq_Metrics" + }, + { + "inputBinding": { + "position": 0, + "prefix": "--cell-type-mapping-fp" + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_Compile_Results.cwl/cellTypeMapping" + }, + { + "inputBinding": { + "position": 4, + "prefix": "--ignore", + "itemSeparator": "," + }, + "type": [ + "null", + { + "items": "string", + "type": "array" + } + ], + "id": "#VDJ_Compile_Results.cwl/chainsToIgnore" + }, + { + "inputBinding": { + "position": 8, + "prefix": "--e-value-for-j" + }, + "type": [ + "null", + "float" + ], + "id": "#VDJ_Compile_Results.cwl/evalueJgene" + }, + { + "inputBinding": { + "position": 7, + "prefix": "--e-value-for-v" + }, + "type": [ + "null", + "float" + ], + "id": "#VDJ_Compile_Results.cwl/evalueVgene" + }, + { + "inputBinding": { + "position": 5 + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_Compile_Results.cwl/igCalls" + }, + { + "inputBinding": { + "position": 9, + "prefix": "--metadata-fp" + }, + "type": "File", + "id": "#VDJ_Compile_Results.cwl/metadata" + }, + { + "inputBinding": { + "position": 3, + "prefix": "--putative-cells-json-fp" + }, + "type": "File", + "id": "#VDJ_Compile_Results.cwl/putativeCells" + }, + { + "inputBinding": { + "position": 6 + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_Compile_Results.cwl/tcrCalls" + }, + { + "inputBinding": { + "position": 2, + "prefix": "--vdj-version" + }, + "type": [ + "null", + "string" + ], + "id": "#VDJ_Compile_Results.cwl/vdjVersion" + } + ], + "requirements": [ + + { + "class": "InlineJavascriptRequirement" + } + ], + "outputs": [ + { + "doc": "VDJ data per cell, with distribution based error correction", + "outputBinding": { + "glob": "*_VDJ_perCell.csv" + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_Compile_Results.cwl/vdjCellsDatatable" + }, + { + "doc": "VDJ data per cell, including non-putative cells, no error correction applied", + "outputBinding": { + "glob": "*_VDJ_perCell_uncorrected.csv.gz" + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_Compile_Results.cwl/vdjCellsDatatableUncorrected" + }, + { + "outputBinding": { + "glob": "*_VDJ_Dominant_Contigs.csv.gz" + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_Compile_Results.cwl/vdjDominantContigs" + }, + { + "outputBinding": { + "glob": "*_VDJ_metrics.csv" + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_Compile_Results.cwl/vdjMetricsCsv" + }, + { + "outputBinding": { + "glob": "*_VDJ_metrics.json" + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_Compile_Results.cwl/vdjMetricsJson" + }, + { + "outputBinding": { + "glob": "*_DBEC_cutoff.png" + }, + "type": { + "items": "File", + "type": "array" + }, + "id": "#VDJ_Compile_Results.cwl/vdjReadsPerCellByChainTypeFigure" + }, + { + "outputBinding": { + "glob": "*_VDJ_Unfiltered_Contigs.csv.gz" + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_Compile_Results.cwl/vdjUnfilteredContigs" + } + ], + "baseCommand": [ + "mist_vdj_compile_results.py" + ], + "id": "#VDJ_Compile_Results.cwl", + "class": "CommandLineTool", + "hints": [ + { + "ramMin": 32000, + "class": "ResourceRequirement" + } + ] + }, + { + "inputs": [ + { + "type": [ + { + "items": [ + "null", + "File" + ], + "type": "array" + } + ], + "id": "#VDJ_GatherCalls.cwl/theCalls" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "doc": "VDJ_GatherCalls collect the outputs from the multi-processed VDJ step into one file.\n", + "id": "#VDJ_GatherCalls.cwl", + "steps": [ + { + "out": [ + "#VDJ_GatherCalls.cwl/VDJ_GatherCalls/gatheredCalls" + ], + "run": { + "cwlVersion": "v1.0", + "inputs": [ + { + "type": [ + { + "items": [ + "null", + "File" + ], + "type": "array" + } + ], + "id": "#VDJ_GatherCalls.cwl/VDJ_GatherCalls/gather_PyIR/theCalls" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + }, + { + "class": "ShellCommandRequirement" + } + ], + "outputs": [ + { + "outputBinding": { + "glob": "*_constant_region_called_pruned.csv.gz", + "outputEval": "${\n if (self.size == 0) {\n throw(\"No outputs from PyIR detected in VDJ_GatherCalls!\");\n } else {\n return(self);\n }\n}" + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_GatherCalls.cwl/VDJ_GatherCalls/gather_PyIR/gatheredCalls" + } + ], + "class": "CommandLineTool", + "arguments": [ + { + "shellQuote": false, + "valueFrom": "${\n if (!inputs.theCalls[0] ) {\n return (\"echo \\\"No outputs from PyIR detected in VDJ_GatherCalls\\\"\")\n }\n var inputFiles = \"\"\n if (!inputs.theCalls[0].path.split(\"_PrunePyIR\")[1]){\n inputFiles = \"zcat\"\n for (var i = 0; i < inputs.theCalls.length; i++) {\n inputFiles += \" \" + inputs.theCalls[i].path\n }\n inputFiles += \" | \"\n } else {\n inputFiles = \"zcat \" + inputs.theCalls[0].path.split(\"VDJ\")[0] + \"*\" + inputs.theCalls[0].path.split(\"_PrunePyIR\")[1].split(\"_Number_\")[0] + \"_Number_*.csv.gz | \"\n }\n var outputFileName = \"\\\"gzip > \" + inputs.theCalls[0].nameroot.split(\"_Number_\")[0] + \"_constant_region_called_pruned.csv.gz\" + \"\\\"\"\n var awkCommand = \"awk \\'NR==1{F=$1;print | \" + outputFileName + \" } $1!=F { print | \" + outputFileName + \" }\\' \"\n var outputCommand = inputFiles + awkCommand\n return (outputCommand)\n}" + } + ], + "id": "#VDJ_GatherCalls.cwl/VDJ_GatherCalls/gather_PyIR" + }, + "id": "#VDJ_GatherCalls.cwl/VDJ_GatherCalls", + "in": [ + { + "source": "#VDJ_GatherCalls.cwl/theCalls", + "id": "#VDJ_GatherCalls.cwl/VDJ_GatherCalls/theCalls" + } + ] + } + ], + "outputs": [ + { + "outputSource": "#VDJ_GatherCalls.cwl/VDJ_GatherCalls/gatheredCalls", + "type": [ + "null", + "File" + ], + "id": "#VDJ_GatherCalls.cwl/gatheredCalls" + } + ], + "class": "Workflow" + }, + { + "inputs": [ + { + "type": [ + "null", + "File" + ], + "id": "#VDJ_Preprocess_Reads.cwl/Valid_Reads_Fastq" + }, + { + "type": [ + "null", + "int" + ], + "id": "#VDJ_Preprocess_Reads.cwl/num_valid_reads" + }, + { + "type": "string", + "id": "#VDJ_Preprocess_Reads.cwl/vdj_type" + } + ], + "requirements": [ + { + "class": "SubworkflowFeatureRequirement" + }, + { + "class": "InlineJavascriptRequirement" + }, + { + "envDef": [ + { + "envName": "CORES_ALLOCATED_PER_CWL_PROCESS", + "envValue": "8" + } + ], + "class": "EnvVarRequirement" + } + ], + "outputs": [ + { + "outputSource": "#VDJ_Preprocess_Reads.cwl/VDJ_RSEC_Reads/RSEC_Reads_Fastq", + "type": [ + { + "items": [ + "null", + "File" + ], + "type": "array" + } + ], + "id": "#VDJ_Preprocess_Reads.cwl/RSEC_Reads_Fastq" + }, + { + "type": [ + "null", + "int" + ], + "outputSource": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/num_cores", + "id": "#VDJ_Preprocess_Reads.cwl/num_cores" + }, + { + "type": [ + "null", + "int" + ], + "outputSource": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/num_splits", + "id": "#VDJ_Preprocess_Reads.cwl/num_splits" + } + ], + "class": "Workflow", + "steps": [ + { + "run": "#VDJ_RSEC_Reads.cwl", + "out": [ + "#VDJ_Preprocess_Reads.cwl/VDJ_RSEC_Reads/RSEC_Reads_Fastq" + ], + "requirements": [ + { + "coresMin": 8, + "ramMin": "${ var est_ram = 0.0006 * parseInt(inputs.num_valid_reads) + 2000; var buffer = 1.25; est_ram *= buffer; if (est_ram < 2000) return 2000; if (est_ram > 370000) return 370000; return parseInt(est_ram); }", + "class": "ResourceRequirement" + } + ], + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_RSEC_Reads", + "in": [ + { + "source": "#VDJ_Preprocess_Reads.cwl/VDJ_Trim_Reads/Valid_Reads", + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_RSEC_Reads/Valid_Reads" + }, + { + "source": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/num_splits", + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_RSEC_Reads/num_splits" + }, + { + "source": "#VDJ_Preprocess_Reads.cwl/num_valid_reads", + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_RSEC_Reads/num_valid_reads" + } + ] + }, + { + "out": [ + "#VDJ_Preprocess_Reads.cwl/VDJ_Trim_Reads/Valid_Reads", + "#VDJ_Preprocess_Reads.cwl/VDJ_Trim_Reads/Trim_Report" + ], + "in": [ + { + "source": "#VDJ_Preprocess_Reads.cwl/Valid_Reads_Fastq", + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_Trim_Reads/Valid_Reads_Fastq" + } + ], + "run": "#VDJ_Trim_Reads.cwl", + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_Trim_Reads", + "hints": [ + { + "coresMin": 8, + "class": "ResourceRequirement" + } + ] + }, + { + "out": [ + "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/num_splits", + "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/num_cores" + ], + "run": { + "cwlVersion": "v1.0", + "inputs": [ + { + "type": [ + "null", + "int" + ], + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/determine_num_splits/num_valid_reads" + }, + { + "type": "string", + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/determine_num_splits/vdj_type" + } + ], + "outputs": [ + { + "type": [ + "null", + "int" + ], + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/determine_num_splits/num_cores" + }, + { + "type": [ + "null", + "int" + ], + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/determine_num_splits/num_splits" + } + ], + "class": "ExpressionTool", + "expression": "${\n var ram_per_instance = 192 * 1024;\n var num_cores = 96;\n if (inputs.vdj_type == \"BCR\") {\n ram_per_instance = 144 * 1024;\n num_cores = 72;\n }\n var ram_per_split = 3200;\n var num_splits_per_instance = parseInt(ram_per_instance / ram_per_split);\n var num_splits = num_splits_per_instance;\n\n var num_reads = parseInt(inputs.num_valid_reads);\n if (num_reads != null) {\n if (num_reads > 100000000)\n num_splits = num_splits_per_instance * 2;\n num_cores = num_cores * 2;\n }\n\n return ({\"num_splits\": num_splits, \"num_cores\": num_cores});\n}", + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/determine_num_splits" + }, + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits", + "in": [ + { + "source": "#VDJ_Preprocess_Reads.cwl/num_valid_reads", + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/num_valid_reads" + }, + { + "source": "#VDJ_Preprocess_Reads.cwl/vdj_type", + "id": "#VDJ_Preprocess_Reads.cwl/VDJ_num_splits/vdj_type" + } + ] + } + ], + "id": "#VDJ_Preprocess_Reads.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "prefix": "--vdj-valid-reads", + "itemSeparator": "," + }, + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#VDJ_RSEC_Reads.cwl/Valid_Reads" + }, + { + "inputBinding": { + "prefix": "--num-splits" + }, + "type": [ + "null", + "int" + ], + "id": "#VDJ_RSEC_Reads.cwl/num_splits" + } + ], + "requirements": [ + + ], + "outputs": [ + { + "outputBinding": { + "glob": "*RSEC_Reads_Fastq_*.tar.gz" + }, + "type": [ + { + "items": [ + "null", + "File" + ], + "type": "array" + } + ], + "id": "#VDJ_RSEC_Reads.cwl/RSEC_Reads_Fastq" + } + ], + "baseCommand": "mist_vdj_rsec_reads.py", + "class": "CommandLineTool", + "id": "#VDJ_RSEC_Reads.cwl" + }, + { + "inputs": [ + { + "type": [ + "null", + "Any" + ], + "id": "#VDJ_Settings.cwl/_VDJ_Version" + } + ], + "requirements": [ + { + "class": "InlineJavascriptRequirement" + } + ], + "outputs": [ + { + "type": [ + "null", + "float" + ], + "id": "#VDJ_Settings.cwl/VDJ_JGene_Evalue" + }, + { + "type": [ + "null", + "float" + ], + "id": "#VDJ_Settings.cwl/VDJ_VGene_Evalue" + }, + { + "type": [ + "null", + "string" + ], + "id": "#VDJ_Settings.cwl/VDJ_Version" + } + ], + "class": "ExpressionTool", + "expression": "${\n var vdjVersion = null;\n if (!inputs._VDJ_Version) {\n vdjVersion = null;}\n else {\n var _VDJ_Version = inputs._VDJ_Version.toLowerCase();\n if (_VDJ_Version === \"human\" || _VDJ_Version === \"hs\" || _VDJ_Version === \"human vdj - bcr and tcr\") {\n vdjVersion = \"human\";\n } else if (_VDJ_Version === \"humanbcr\" || _VDJ_Version === \"human vdj - bcr only\") {\n vdjVersion = \"humanBCR\";\n } else if (_VDJ_Version === \"humantcr\" || _VDJ_Version === \"human vdj - tcr only\") {\n vdjVersion = \"humanTCR\";\n } else if (_VDJ_Version === \"mouse\" || _VDJ_Version === \"mm\" || _VDJ_Version === \"mouse vdj - bcr and tcr\") {\n vdjVersion = \"mouse\";\n } else if (_VDJ_Version === \"mousebcr\" || _VDJ_Version === \"mouse vdj - bcr only\") {\n vdjVersion = \"mouseBCR\";\n } else if (_VDJ_Version === \"mousetcr\" || _VDJ_Version === \"mouse vdj - tcr only\") {\n vdjVersion = \"mouseTCR\";\n } else {\n vdjVersion = inputs._VDJ_Version;\n }\n }\n\n return ({\n VDJ_Version: vdjVersion,\n })\n}", + "id": "#VDJ_Settings.cwl" + }, + { + "inputs": [ + { + "inputBinding": { + "position": 1 + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_Trim_Reads.cwl/Valid_Reads_Fastq" + } + ], + "requirements": [ + + ], + "outputs": [ + { + "outputBinding": { + "glob": "cutadapt.log" + }, + "type": [ + "null", + "File" + ], + "id": "#VDJ_Trim_Reads.cwl/Trim_Report" + }, + { + "outputBinding": { + "glob": "*vdjtxt.gz" + }, + "type": [ + "null", + { + "items": "File", + "type": "array" + } + ], + "id": "#VDJ_Trim_Reads.cwl/Valid_Reads" + } + ], + "baseCommand": "VDJ_Trim_Reads.sh", + "class": "CommandLineTool", + "id": "#VDJ_Trim_Reads.cwl" + }, + { + "inputs": [], + "requirements": [ + + ], + "stdout": "output.txt", + "outputs": [ + { + "outputBinding": { + "glob": "output.txt", + "loadContents": true, + "outputEval": "$(self[0].contents)" + }, + "type": "string", + "id": "#Version.cwl/version" + } + ], + "baseCommand": [ + "mist_version.py" + ], + "id": "#Version.cwl", + "class": "CommandLineTool" + } + ], + "$namespaces": { + "sbg": "https://sevenbridges.com#", + "arv": "http://arvados.org/cwl#" + } +} \ No newline at end of file diff --git a/target/nextflow/mapping/bd_rhapsody/setup_logger.py b/target/nextflow/mapping/bd_rhapsody/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/nextflow/mapping/bd_rhapsody/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/nextflow/mapping/cellranger_count/.config.vsh.yaml b/target/nextflow/mapping/cellranger_count/.config.vsh.yaml new file mode 100644 index 00000000000..589b46ac27b --- /dev/null +++ b/target/nextflow/mapping/cellranger_count/.config.vsh.yaml @@ -0,0 +1,266 @@ +functionality: + name: "cellranger_count" + namespace: "mapping" + version: "0.12.4" + authors: + - name: "Angela Oliveira Pisco" + roles: + - "author" + info: + role: "Contributor" + links: + github: "aopisco" + orcid: "0000-0003-0142-2355" + linkedin: "aopisco" + organizations: + - name: "Insitro" + href: "https://insitro.com" + role: "Director of Computational Biology" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + - name: "Samuel D'Souza" + roles: + - "author" + info: + role: "Contributor" + links: + github: "srdsam" + linkedin: "samuel-d-souza-887023150/" + organizations: + - name: "Chan Zuckerberg Biohub" + href: "https://www.czbiohub.org" + role: "Data Engineer" + - name: "Robrecht Cannoodt" + roles: + - "author" + - "maintainer" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + argument_groups: + - name: "Inputs" + arguments: + - type: "file" + name: "--input" + description: "The fastq.gz files to align. Can also be a single directory containing\ + \ fastq.gz files." + info: null + example: + - "sample_S1_L001_R1_001.fastq.gz" + - "sample_S1_L001_R2_001.fastq.gz" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "file" + name: "--reference" + description: "The path to Cell Ranger reference tar.gz file. Can also be a directory." + info: null + example: + - "reference.tar.gz" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Outputs" + arguments: + - type: "file" + name: "--output" + description: "The folder to store the alignment results." + info: null + example: + - "/path/to/output" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Arguments" + arguments: + - type: "integer" + name: "--expect_cells" + description: "Expected number of recovered cells, used as input to cell calling\ + \ algorithm." + info: null + example: + - 3000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--chemistry" + description: "Assay configuration.\n- auto: autodetect mode\n- threeprime: Single\ + \ Cell 3'\n- fiveprime: Single Cell 5'\n- SC3Pv1: Single Cell 3' v1\n- SC3Pv2:\ + \ Single Cell 3' v2\n- SC3Pv3: Single Cell 3' v3\n- SC3Pv3LT: Single Cell\ + \ 3' v3 LT\n- SC3Pv3HT: Single Cell 3' v3 HT\n- SC5P-PE: Single Cell 5' paired-end\n\ + - SC5P-R2: Single Cell 5' R2-only\n- SC-FB: Single Cell Antibody-only 3' v2\ + \ or 5'\nSee https://kb.10xgenomics.com/hc/en-us/articles/115003764132-How-does-Cell-Ranger-auto-detect-chemistry-\ + \ for more information.\n" + info: null + default: + - "auto" + required: false + choices: + - "auto" + - "threeprime" + - "fiveprime" + - "SC3Pv1" + - "SC3Pv2" + - "SC3Pv3" + - "SC3Pv3LT" + - "SC3Pv3HT" + - "SC5P-PE" + - "SC5P-R2" + - "SC-FB" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean" + name: "--secondary_analysis" + description: "Whether or not to run the secondary analysis e.g. clustering." + info: null + default: + - false + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean" + name: "--generate_bam" + description: "Whether to generate a BAM file." + info: null + default: + - true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean" + name: "--include_introns" + description: "Include intronic reads in count (default=true unless --target-panel\ + \ is specified in which case default=false)" + info: null + default: + - true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "bash_script" + path: "script.sh" + is_executable: true + description: "Align fastq files using Cell Ranger count." + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/cellranger_tiny_fastq" + - type: "file" + path: "src/utils/setup_logger.py" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "ghcr.io/data-intuitive/cellranger:7.0" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "docker" + run: + - "apt update && apt upgrade -y" + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highmem" + - "highcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/mapping/cellranger_count/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/cellranger_count" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/cellranger_count/cellranger_count" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/mapping/cellranger_count/main.nf b/target/nextflow/mapping/cellranger_count/main.nf new file mode 100644 index 00000000000..f3e52a88a97 --- /dev/null +++ b/target/nextflow/mapping/cellranger_count/main.nf @@ -0,0 +1,2745 @@ +// cellranger_count 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Angela Oliveira Pisco (author) +// * Samuel D'Souza (author) +// * Robrecht Cannoodt (author, maintainer) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "cellranger_count", + "namespace" : "mapping", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Angela Oliveira Pisco", + "roles" : [ + "author" + ], + "info" : { + "role" : "Contributor", + "links" : { + "github" : "aopisco", + "orcid" : "0000-0003-0142-2355", + "linkedin" : "aopisco" + }, + "organizations" : [ + { + "name" : "Insitro", + "href" : "https://insitro.com", + "role" : "Director of Computational Biology" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + }, + { + "name" : "Samuel D'Souza", + "roles" : [ + "author" + ], + "info" : { + "role" : "Contributor", + "links" : { + "github" : "srdsam", + "linkedin" : "samuel-d-souza-887023150/" + }, + "organizations" : [ + { + "name" : "Chan Zuckerberg Biohub", + "href" : "https://www.czbiohub.org", + "role" : "Data Engineer" + } + ] + } + }, + { + "name" : "Robrecht Cannoodt", + "roles" : [ + "author", + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "robrecht@data-intuitive.com", + "github" : "rcannood", + "orcid" : "0000-0003-3641-729X", + "linkedin" : "robrechtcannoodt" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Science Engineer" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Inputs", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "description" : "The fastq.gz files to align. Can also be a single directory containing fastq.gz files.", + "example" : [ + "sample_S1_L001_R1_001.fastq.gz", + "sample_S1_L001_R2_001.fastq.gz" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--reference", + "description" : "The path to Cell Ranger reference tar.gz file. Can also be a directory.", + "example" : [ + "reference.tar.gz" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Outputs", + "arguments" : [ + { + "type" : "file", + "name" : "--output", + "description" : "The folder to store the alignment results.", + "example" : [ + "/path/to/output" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Arguments", + "arguments" : [ + { + "type" : "integer", + "name" : "--expect_cells", + "description" : "Expected number of recovered cells, used as input to cell calling algorithm.", + "example" : [ + 3000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--chemistry", + "description" : "Assay configuration.\n- auto: autodetect mode\n- threeprime: Single Cell 3'\n- fiveprime: Single Cell 5'\n- SC3Pv1: Single Cell 3' v1\n- SC3Pv2: Single Cell 3' v2\n- SC3Pv3: Single Cell 3' v3\n- SC3Pv3LT: Single Cell 3' v3 LT\n- SC3Pv3HT: Single Cell 3' v3 HT\n- SC5P-PE: Single Cell 5' paired-end\n- SC5P-R2: Single Cell 5' R2-only\n- SC-FB: Single Cell Antibody-only 3' v2 or 5'\nSee https://kb.10xgenomics.com/hc/en-us/articles/115003764132-How-does-Cell-Ranger-auto-detect-chemistry- for more information.\n", + "default" : [ + "auto" + ], + "required" : false, + "choices" : [ + "auto", + "threeprime", + "fiveprime", + "SC3Pv1", + "SC3Pv2", + "SC3Pv3", + "SC3Pv3LT", + "SC3Pv3HT", + "SC5P-PE", + "SC5P-R2", + "SC-FB" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "boolean", + "name" : "--secondary_analysis", + "description" : "Whether or not to run the secondary analysis e.g. clustering.", + "default" : [ + false + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "boolean", + "name" : "--generate_bam", + "description" : "Whether to generate a BAM file.", + "default" : [ + true + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "boolean", + "name" : "--include_introns", + "description" : "Include intronic reads in count (default=true unless --target-panel is specified in which case default=false)", + "default" : [ + true + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/cellranger_count/" + } + ], + "description" : "Align fastq files using Cell Ranger count.", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/cellranger_count/" + }, + { + "type" : "file", + "path" : "resources_test/cellranger_tiny_fastq", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + }, + { + "type" : "file", + "path" : "src/utils/setup_logger.py", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "ghcr.io/data-intuitive/cellranger:7.0", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "docker", + "run" : [ + "apt update && apt upgrade -y" + ] + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "highmem", + "highcpu" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/mapping/cellranger_count/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/cellranger_count", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +#!/bin/bash + +set -eo pipefail + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "${VIASH_PAR_REFERENCE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_reference='&'#" ; else echo "# par_reference="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_PAR_EXPECT_CELLS+x} ]; then echo "${VIASH_PAR_EXPECT_CELLS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_expect_cells='&'#" ; else echo "# par_expect_cells="; fi ) +$( if [ ! -z ${VIASH_PAR_CHEMISTRY+x} ]; then echo "${VIASH_PAR_CHEMISTRY}" | sed "s#'#'\\"'\\"'#g;s#.*#par_chemistry='&'#" ; else echo "# par_chemistry="; fi ) +$( if [ ! -z ${VIASH_PAR_SECONDARY_ANALYSIS+x} ]; then echo "${VIASH_PAR_SECONDARY_ANALYSIS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_secondary_analysis='&'#" ; else echo "# par_secondary_analysis="; fi ) +$( if [ ! -z ${VIASH_PAR_GENERATE_BAM+x} ]; then echo "${VIASH_PAR_GENERATE_BAM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_generate_bam='&'#" ; else echo "# par_generate_bam="; fi ) +$( if [ ! -z ${VIASH_PAR_INCLUDE_INTRONS+x} ]; then echo "${VIASH_PAR_INCLUDE_INTRONS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_include_introns='&'#" ; else echo "# par_include_introns="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) + +## VIASH END + +# just to make sure paths are absolute +par_reference=\\`realpath \\$par_reference\\` +par_output=\\`realpath \\$par_output\\` + +# create temporary directory +tmpdir=\\$(mktemp -d "\\$meta_temp_dir/\\$meta_functionality_name-XXXXXXXX") +function clean_up { + rm -rf "\\$tmpdir" +} +trap clean_up EXIT + +# process inputs +# for every fastq file found, make a symlink into the tempdir +fastq_dir="\\$tmpdir/fastqs" +mkdir -p "\\$fastq_dir" +IFS=";" +for var in \\$par_input; do + unset IFS + abs_path=\\`realpath \\$var\\` + if [ -d "\\$abs_path" ]; then + find "\\$abs_path" -name *.fastq.gz -exec ln -s {} "\\$fastq_dir" \\\\; + else + ln -s "\\$abs_path" "\\$fastq_dir" + fi +done + +# process reference +if file \\$par_reference | grep -q 'gzip compressed data'; then + echo "Untarring genome" + reference_dir="\\$tmpdir/fastqs" + mkdir -p "\\$reference_dir" + tar -xvf "\\$par_reference" -C "\\$reference_dir" --strip-components=1 + par_reference="\\$reference_dir" +fi + +# cd into tempdir +cd "\\$tmpdir" + +# add additional params +extra_params=( ) + +if [ ! -z "\\$meta_cpus" ]; then + extra_params+=( "--localcores=\\$meta_cpus" ) +fi +if [ ! -z "\\$meta_memory_gb" ]; then + # always keep 2gb for the OS itself + memory_gb=\\`python -c "print(int('\\$meta_memory_gb') - 2)"\\` + extra_params+=( "--localmem=\\$memory_gb" ) +fi +if [ ! -z "\\$par_expect_cells" ]; then + extra_params+=( "--expect-cells=\\$par_expect_cells" ) +fi +if [ ! -z "\\$par_chemistry" ]; then + extra_params+=( "--chemistry=\\$par_chemistry" ) +fi +if [ "\\$par_secondary_analysis" == "false" ]; then + extra_params+=( "--nosecondary" ) +fi +if [ "\\$par_generate_bam" == "false" ]; then + extra_params+=( "--no-bam" ) +fi +echo "Running cellranger count" + + +id=myoutput +cellranger count \\\\ + --id "\\$id" \\\\ + --fastqs "\\$fastq_dir" \\\\ + --transcriptome "\\$par_reference" \\\\ + --include-introns "\\$par_include_introns" \\\\ + "\\${extra_params[@]}" \\\\ + --disable-ui \\\\ + +echo "Copying output" +if [ -d "\\$id/outs/" ]; then + if [ ! -d "\\$par_output" ]; then + mkdir -p "\\$par_output" + fi + mv "\\$id/outs/"* "\\$par_output" +fi +VIASHMAIN +bash "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/mapping_cellranger_count", + "tag" : "0.12.0" + }, + "label" : [ + "highmem", + "highcpu" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/mapping/cellranger_count/nextflow.config b/target/nextflow/mapping/cellranger_count/nextflow.config new file mode 100644 index 00000000000..447bb8b9239 --- /dev/null +++ b/target/nextflow/mapping/cellranger_count/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'cellranger_count' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Align fastq files using Cell Ranger count.' + author = 'Angela Oliveira Pisco, Samuel D\'Souza, Robrecht Cannoodt' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/mapping/cellranger_count/nextflow_params.yaml b/target/nextflow/mapping/cellranger_count/nextflow_params.yaml new file mode 100644 index 00000000000..858ba95c91b --- /dev/null +++ b/target/nextflow/mapping/cellranger_count/nextflow_params.yaml @@ -0,0 +1,17 @@ +# Inputs +input: # please fill in - example: ["sample_S1_L001_R1_001.fastq.gz", "sample_S1_L001_R2_001.fastq.gz"] +reference: # please fill in - example: "reference.tar.gz" + +# Outputs +# output: "$id.$key.output.output" + +# Arguments +# expect_cells: 3000 +chemistry: "auto" +secondary_analysis: false +generate_bam: true +include_introns: true + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/mapping/cellranger_count/nextflow_schema.json b/target/nextflow/mapping/cellranger_count/nextflow_schema.json new file mode 100644 index 00000000000..e6f42eec020 --- /dev/null +++ b/target/nextflow/mapping/cellranger_count/nextflow_schema.json @@ -0,0 +1,175 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "cellranger_count", +"description": "Align fastq files using Cell Ranger count.", +"type": "object", +"definitions": { + + + + "inputs" : { + "title": "Inputs", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: List of `file`, required, example: `sample_S1_L001_R1_001.fastq.gz;sample_S1_L001_R2_001.fastq.gz`, multiple_sep: `\";\"`. The fastq", + "help_text": "Type: List of `file`, required, example: `sample_S1_L001_R1_001.fastq.gz;sample_S1_L001_R2_001.fastq.gz`, multiple_sep: `\";\"`. The fastq.gz files to align. Can also be a single directory containing fastq.gz files." + + } + + + , + "reference": { + "type": + "string", + "description": "Type: `file`, required, example: `reference.tar.gz`. The path to Cell Ranger reference tar", + "help_text": "Type: `file`, required, example: `reference.tar.gz`. The path to Cell Ranger reference tar.gz file. Can also be a directory." + + } + + +} +}, + + + "outputs" : { + "title": "Outputs", + "type": "object", + "description": "No description", + "properties": { + + + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.output`, example: `/path/to/output`. The folder to store the alignment results", + "help_text": "Type: `file`, required, default: `$id.$key.output.output`, example: `/path/to/output`. The folder to store the alignment results." + , + "default": "$id.$key.output.output" + } + + +} +}, + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "expect_cells": { + "type": + "integer", + "description": "Type: `integer`, example: `3000`. Expected number of recovered cells, used as input to cell calling algorithm", + "help_text": "Type: `integer`, example: `3000`. Expected number of recovered cells, used as input to cell calling algorithm." + + } + + + , + "chemistry": { + "type": + "string", + "description": "Type: `string`, default: `auto`, choices: ``auto`, `threeprime`, `fiveprime`, `SC3Pv1`, `SC3Pv2`, `SC3Pv3`, `SC3Pv3LT`, `SC3Pv3HT`, `SC5P-PE`, `SC5P-R2`, `SC-FB``. Assay configuration", + "help_text": "Type: `string`, default: `auto`, choices: ``auto`, `threeprime`, `fiveprime`, `SC3Pv1`, `SC3Pv2`, `SC3Pv3`, `SC3Pv3LT`, `SC3Pv3HT`, `SC5P-PE`, `SC5P-R2`, `SC-FB``. Assay configuration.\n- auto: autodetect mode\n- threeprime: Single Cell 3\u0027\n- fiveprime: Single Cell 5\u0027\n- SC3Pv1: Single Cell 3\u0027 v1\n- SC3Pv2: Single Cell 3\u0027 v2\n- SC3Pv3: Single Cell 3\u0027 v3\n- SC3Pv3LT: Single Cell 3\u0027 v3 LT\n- SC3Pv3HT: Single Cell 3\u0027 v3 HT\n- SC5P-PE: Single Cell 5\u0027 paired-end\n- SC5P-R2: Single Cell 5\u0027 R2-only\n- SC-FB: Single Cell Antibody-only 3\u0027 v2 or 5\u0027\nSee https://kb.10xgenomics.com/hc/en-us/articles/115003764132-How-does-Cell-Ranger-auto-detect-chemistry- for more information.\n", + "enum": ["auto", "threeprime", "fiveprime", "SC3Pv1", "SC3Pv2", "SC3Pv3", "SC3Pv3LT", "SC3Pv3HT", "SC5P-PE", "SC5P-R2", "SC-FB"] + + , + "default": "auto" + } + + + , + "secondary_analysis": { + "type": + "boolean", + "description": "Type: `boolean`, default: `false`. Whether or not to run the secondary analysis e", + "help_text": "Type: `boolean`, default: `false`. Whether or not to run the secondary analysis e.g. clustering." + , + "default": "False" + } + + + , + "generate_bam": { + "type": + "boolean", + "description": "Type: `boolean`, default: `true`. Whether to generate a BAM file", + "help_text": "Type: `boolean`, default: `true`. Whether to generate a BAM file." + , + "default": "True" + } + + + , + "include_introns": { + "type": + "boolean", + "description": "Type: `boolean`, default: `true`. Include intronic reads in count (default=true unless --target-panel is specified in which case default=false)", + "help_text": "Type: `boolean`, default: `true`. Include intronic reads in count (default=true unless --target-panel is specified in which case default=false)" + , + "default": "True" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/inputs" + }, + + { + "$ref": "#/definitions/outputs" + }, + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/mapping/cellranger_count_split/.config.vsh.yaml b/target/nextflow/mapping/cellranger_count_split/.config.vsh.yaml new file mode 100644 index 00000000000..4c86d993a7f --- /dev/null +++ b/target/nextflow/mapping/cellranger_count_split/.config.vsh.yaml @@ -0,0 +1,218 @@ +functionality: + name: "cellranger_count_split" + namespace: "mapping" + version: "0.12.4" + authors: + - name: "Angela Oliveira Pisco" + roles: + - "author" + info: + role: "Contributor" + links: + github: "aopisco" + orcid: "0000-0003-0142-2355" + linkedin: "aopisco" + organizations: + - name: "Insitro" + href: "https://insitro.com" + role: "Director of Computational Biology" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + - name: "Samuel D'Souza" + roles: + - "author" + info: + role: "Contributor" + links: + github: "srdsam" + linkedin: "samuel-d-souza-887023150/" + organizations: + - name: "Chan Zuckerberg Biohub" + href: "https://www.czbiohub.org" + role: "Data Engineer" + - name: "Robrecht Cannoodt" + roles: + - "author" + - "maintainer" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + arguments: + - type: "file" + name: "--input" + description: "Output directory from a Cell Ranger count run." + info: null + example: + - "input_dir" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--filtered_h5" + info: null + example: + - "filtered_feature_bc_matrix.h5" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--metrics_summary" + info: null + example: + - "metrics_summary.csv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--molecule_info" + info: null + example: + - "molecule_info.h5" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--bam" + info: null + example: + - "possorted_genome_bam.bam" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--bai" + info: null + example: + - "possorted_genome_bam.bam.bai" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--raw_h5" + info: null + example: + - "raw_feature_bc_matrix.h5" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "bash_script" + path: "script.sh" + is_executable: true + description: "Split 10x Cell Ranger output directory into separate output fields." + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "ubuntu:jammy" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "docker" + run: + - "apt update && apt upgrade -y" + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/mapping/cellranger_count_split/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/cellranger_count_split" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/cellranger_count_split/cellranger_count_split" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/mapping/cellranger_count_split/main.nf b/target/nextflow/mapping/cellranger_count_split/main.nf new file mode 100644 index 00000000000..93c8774e3f5 --- /dev/null +++ b/target/nextflow/mapping/cellranger_count_split/main.nf @@ -0,0 +1,2633 @@ +// cellranger_count_split 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Angela Oliveira Pisco (author) +// * Samuel D'Souza (author) +// * Robrecht Cannoodt (author, maintainer) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "cellranger_count_split", + "namespace" : "mapping", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Angela Oliveira Pisco", + "roles" : [ + "author" + ], + "info" : { + "role" : "Contributor", + "links" : { + "github" : "aopisco", + "orcid" : "0000-0003-0142-2355", + "linkedin" : "aopisco" + }, + "organizations" : [ + { + "name" : "Insitro", + "href" : "https://insitro.com", + "role" : "Director of Computational Biology" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + }, + { + "name" : "Samuel D'Souza", + "roles" : [ + "author" + ], + "info" : { + "role" : "Contributor", + "links" : { + "github" : "srdsam", + "linkedin" : "samuel-d-souza-887023150/" + }, + "organizations" : [ + { + "name" : "Chan Zuckerberg Biohub", + "href" : "https://www.czbiohub.org", + "role" : "Data Engineer" + } + ] + } + }, + { + "name" : "Robrecht Cannoodt", + "roles" : [ + "author", + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "robrecht@data-intuitive.com", + "github" : "rcannood", + "orcid" : "0000-0003-3641-729X", + "linkedin" : "robrechtcannoodt" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Science Engineer" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + } + ], + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "description" : "Output directory from a Cell Ranger count run.", + "example" : [ + "input_dir" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--filtered_h5", + "example" : [ + "filtered_feature_bc_matrix.h5" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--metrics_summary", + "example" : [ + "metrics_summary.csv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--molecule_info", + "example" : [ + "molecule_info.h5" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--bam", + "example" : [ + "possorted_genome_bam.bam" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--bai", + "example" : [ + "possorted_genome_bam.bam.bai" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--raw_h5", + "example" : [ + "raw_feature_bc_matrix.h5" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/cellranger_count_split/" + } + ], + "description" : "Split 10x Cell Ranger output directory into separate output fields.", + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "ubuntu:jammy", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "docker", + "run" : [ + "apt update && apt upgrade -y" + ] + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/mapping/cellranger_count_split/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/cellranger_count_split", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +#!/bin/bash + +set -eo pipefail + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_FILTERED_H5+x} ]; then echo "${VIASH_PAR_FILTERED_H5}" | sed "s#'#'\\"'\\"'#g;s#.*#par_filtered_h5='&'#" ; else echo "# par_filtered_h5="; fi ) +$( if [ ! -z ${VIASH_PAR_METRICS_SUMMARY+x} ]; then echo "${VIASH_PAR_METRICS_SUMMARY}" | sed "s#'#'\\"'\\"'#g;s#.*#par_metrics_summary='&'#" ; else echo "# par_metrics_summary="; fi ) +$( if [ ! -z ${VIASH_PAR_MOLECULE_INFO+x} ]; then echo "${VIASH_PAR_MOLECULE_INFO}" | sed "s#'#'\\"'\\"'#g;s#.*#par_molecule_info='&'#" ; else echo "# par_molecule_info="; fi ) +$( if [ ! -z ${VIASH_PAR_BAM+x} ]; then echo "${VIASH_PAR_BAM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bam='&'#" ; else echo "# par_bam="; fi ) +$( if [ ! -z ${VIASH_PAR_BAI+x} ]; then echo "${VIASH_PAR_BAI}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bai='&'#" ; else echo "# par_bai="; fi ) +$( if [ ! -z ${VIASH_PAR_RAW_H5+x} ]; then echo "${VIASH_PAR_RAW_H5}" | sed "s#'#'\\"'\\"'#g;s#.*#par_raw_h5='&'#" ; else echo "# par_raw_h5="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) + +## VIASH END + +filtered_h5="\\$par_input/filtered_feature_bc_matrix.h5" +if [ -f "\\$filtered_h5" ] && [ ! -z "\\$par_filtered_h5" ]; then + echo "+ cp \\$filtered_h5 \\$par_filtered_h5" + cp "\\$filtered_h5" "\\$par_filtered_h5" +fi + +metrics_summary="\\$par_input/metrics_summary.csv" +if [ -f "\\$metrics_summary" ] && [ ! -z "\\$par_metrics_summary" ]; then + echo "+ cp \\$metrics_summary \\$par_metrics_summary" + cp "\\$metrics_summary" "\\$par_metrics_summary" +fi + +molecule_info="\\$par_input/molecule_info.h5" +if [ -f "\\$molecule_info" ] && [ ! -z "\\$par_molecule_info" ]; then + echo "+ cp \\$molecule_info \\$par_molecule_info" + cp "\\$molecule_info" "\\$par_molecule_info" +fi + +bam="\\$par_input/possorted_genome_bam.bam" +if [ -f "\\$bam" ] && [ ! -z "\\$par_bam" ]; then + echo "cp \\$bam \\$par_bam" + cp "\\$bam" "\\$par_bam" +fi + +raw_h5="\\$par_input/raw_feature_bc_matrix.h5" +if [ -f "\\$raw_h5" ] && [ ! -z "\\$par_raw_h5" ]; then + echo "+ cp \\$raw_h5 \\$par_raw_h5" + cp "\\$raw_h5" "\\$par_raw_h5" +fi + +bai="\\$par_input/possorted_genome_bam.bam.bai" +if [ -f "\\$bai" ] && [ ! -z "\\$par_bai" ]; then + echo "+ cp \\$bai \\$par_bai" + cp "\\$bai" "\\$par_bai" +fi +VIASHMAIN +bash "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/mapping_cellranger_count_split", + "tag" : "0.12.0" + }, + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/mapping/cellranger_count_split/nextflow.config b/target/nextflow/mapping/cellranger_count_split/nextflow.config new file mode 100644 index 00000000000..3aa03c9d330 --- /dev/null +++ b/target/nextflow/mapping/cellranger_count_split/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'cellranger_count_split' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Split 10x Cell Ranger output directory into separate output fields.' + author = 'Angela Oliveira Pisco, Samuel D\'Souza, Robrecht Cannoodt' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/mapping/cellranger_count_split/nextflow_params.yaml b/target/nextflow/mapping/cellranger_count_split/nextflow_params.yaml new file mode 100644 index 00000000000..fdd7f445ecf --- /dev/null +++ b/target/nextflow/mapping/cellranger_count_split/nextflow_params.yaml @@ -0,0 +1,12 @@ +# Arguments +input: # please fill in - example: "input_dir" +# filtered_h5: "$id.$key.filtered_h5.h5" +# metrics_summary: "$id.$key.metrics_summary.csv" +# molecule_info: "$id.$key.molecule_info.h5" +# bam: "$id.$key.bam.bam" +# bai: "$id.$key.bai.bai" +# raw_h5: "$id.$key.raw_h5.h5" + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/mapping/cellranger_count_split/nextflow_schema.json b/target/nextflow/mapping/cellranger_count_split/nextflow_schema.json new file mode 100644 index 00000000000..e8746190f71 --- /dev/null +++ b/target/nextflow/mapping/cellranger_count_split/nextflow_schema.json @@ -0,0 +1,136 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "cellranger_count_split", +"description": "Split 10x Cell Ranger output directory into separate output fields.", +"type": "object", +"definitions": { + + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required, example: `input_dir`. Output directory from a Cell Ranger count run", + "help_text": "Type: `file`, required, example: `input_dir`. Output directory from a Cell Ranger count run." + + } + + + , + "filtered_h5": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.filtered_h5.h5`, example: `filtered_feature_bc_matrix.h5`. ", + "help_text": "Type: `file`, default: `$id.$key.filtered_h5.h5`, example: `filtered_feature_bc_matrix.h5`. " + , + "default": "$id.$key.filtered_h5.h5" + } + + + , + "metrics_summary": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.metrics_summary.csv`, example: `metrics_summary.csv`. ", + "help_text": "Type: `file`, default: `$id.$key.metrics_summary.csv`, example: `metrics_summary.csv`. " + , + "default": "$id.$key.metrics_summary.csv" + } + + + , + "molecule_info": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.molecule_info.h5`, example: `molecule_info.h5`. ", + "help_text": "Type: `file`, default: `$id.$key.molecule_info.h5`, example: `molecule_info.h5`. " + , + "default": "$id.$key.molecule_info.h5" + } + + + , + "bam": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.bam.bam`, example: `possorted_genome_bam.bam`. ", + "help_text": "Type: `file`, default: `$id.$key.bam.bam`, example: `possorted_genome_bam.bam`. " + , + "default": "$id.$key.bam.bam" + } + + + , + "bai": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.bai.bai`, example: `possorted_genome_bam.bam.bai`. ", + "help_text": "Type: `file`, default: `$id.$key.bai.bai`, example: `possorted_genome_bam.bam.bai`. " + , + "default": "$id.$key.bai.bai" + } + + + , + "raw_h5": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.raw_h5.h5`, example: `raw_feature_bc_matrix.h5`. ", + "help_text": "Type: `file`, default: `$id.$key.raw_h5.h5`, example: `raw_feature_bc_matrix.h5`. " + , + "default": "$id.$key.raw_h5.h5" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/mapping/cellranger_multi/.config.vsh.yaml b/target/nextflow/mapping/cellranger_multi/.config.vsh.yaml new file mode 100644 index 00000000000..e0e7c0a576f --- /dev/null +++ b/target/nextflow/mapping/cellranger_multi/.config.vsh.yaml @@ -0,0 +1,423 @@ +functionality: + name: "cellranger_multi" + namespace: "mapping" + version: "0.12.4" + authors: + - name: "Angela Oliveira Pisco" + roles: + - "author" + info: + role: "Contributor" + links: + github: "aopisco" + orcid: "0000-0003-0142-2355" + linkedin: "aopisco" + organizations: + - name: "Insitro" + href: "https://insitro.com" + role: "Director of Computational Biology" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + - name: "Robrecht Cannoodt" + roles: + - "author" + - "maintainer" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + - name: "Dries De Maeyer" + roles: + - "author" + info: + role: "Core Team Member" + links: + email: "ddemaeyer@gmail.com" + github: "ddemaeyer" + linkedin: "dries-de-maeyer-b46a814" + organizations: + - name: "Janssen Pharmaceuticals" + href: "https://www.janssen.com" + role: "Principal Scientist" + argument_groups: + - name: "Input files" + arguments: + - type: "file" + name: "--input" + description: "The FASTQ files to be analyzed. FASTQ files should conform to\ + \ the naming conventions of bcl2fastq and mkfastq:\n`[Sample Name]_S[Sample\ + \ Index]_L00[Lane Number]_[Read Type]_001.fastq.gz`\n" + info: null + example: + - "mysample_S1_L001_R1_001.fastq.gz" + - "mysample_S1_L001_R2_001.fastq.gz" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "file" + name: "--gex_reference" + description: "Genome refence index built by Cell Ranger mkref." + info: null + example: + - "reference_genome.tar.gz" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--vdj_reference" + description: "VDJ refence index built by Cell Ranger mkref." + info: null + example: + - "reference_vdj.tar.gz" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--vdj_inner_enrichment_primers" + description: "V(D)J Immune Profiling libraries: if inner enrichment primers\ + \ other than those provided \nin the 10x Genomics kits are used, they need\ + \ to be specified here as a\ntext file with one primer per line.\n" + info: null + example: + - "enrichment_primers.txt" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--feature_reference" + description: "Path to the Feature reference CSV file, declaring Feature Barcode\ + \ constructs and associated barcodes. Required only for Antibody Capture or\ + \ CRISPR Guide Capture libraries. See https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/feature-bc-analysis#feature-ref\ + \ for more information." + info: null + example: + - "feature_reference.csv" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Library arguments" + arguments: + - type: "string" + name: "--library_id" + description: "The Illumina sample name to analyze. This must exactly match the\ + \ 'Sample Name' part of the FASTQ files specified in the `--input` argument." + info: null + example: + - "mysample1" + required: true + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--library_type" + description: "The underlying feature type of the library.\nPossible values:\ + \ \"Gene Expression\", \"VDJ\", \"VDJ-T\", \"VDJ-B\", \"Antibody Capture\"\ + , \"CRISPR Guide Capture\", \"Multiplexing Capture\"\n" + info: null + example: + - "Gene Expression" + required: true + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--library_subsample" + description: "Optional. The rate at which reads from the provided FASTQ files\ + \ are sampled. Must be strictly greater than 0 and less than or equal to 1." + info: null + example: + - "0.5" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--library_lanes" + description: "Lanes associated with this sample. Defaults to using all lanes." + info: null + example: + - "1-4" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - name: "Gene expression arguments" + description: "Arguments relevant to the analysis of gene expression data." + arguments: + - type: "integer" + name: "--gex_expect_cells" + description: "Expected number of recovered cells, used as input to cell calling\ + \ algorithm." + info: null + example: + - 3000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--gex_chemistry" + description: "Assay configuration.\n- auto: autodetect mode\n- threeprime: Single\ + \ Cell 3'\n- fiveprime: Single Cell 5'\n- SC3Pv1: Single Cell 3' v1\n- SC3Pv2:\ + \ Single Cell 3' v2\n- SC3Pv3: Single Cell 3' v3\n- SC3Pv3LT: Single Cell\ + \ 3' v3 LT\n- SC3Pv3HT: Single Cell 3' v3 HT\n- SC5P-PE: Single Cell 5' paired-end\n\ + - SC5P-R2: Single Cell 5' R2-only\n- SC-FB: Single Cell Antibody-only 3' v2\ + \ or 5'\nSee https://kb.10xgenomics.com/hc/en-us/articles/115003764132-How-does-Cell-Ranger-auto-detect-chemistry-\ + \ for more information.\n" + info: null + default: + - "auto" + required: false + choices: + - "auto" + - "threeprime" + - "fiveprime" + - "SC3Pv1" + - "SC3Pv2" + - "SC3Pv3" + - "SC3Pv3LT" + - "SC3Pv3HT" + - "SC5P-PE" + - "SC5P-R2" + - "SC-FB" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean" + name: "--gex_secondary_analysis" + description: "Whether or not to run the secondary analysis e.g. clustering." + info: null + default: + - false + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean" + name: "--gex_generate_bam" + description: "Whether to generate a BAM file." + info: null + default: + - false + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean" + name: "--gex_include_introns" + description: "Include intronic reads in count (default=true unless --target-panel\ + \ is specified in which case default=false)" + info: null + default: + - true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Cell multiplexing parameters" + description: "Arguments related to cell multiplexing." + arguments: + - type: "string" + name: "--cell_multiplex_sample_id" + description: "A name to identify a multiplexed sample. Must be alphanumeric\ + \ with hyphens and/or underscores, and less than 64 characters. Required for\ + \ Cell Multiplexing libraries." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--cell_multiplex_oligo_ids" + description: "The Cell Multiplexing oligo IDs used to multiplex this sample.\ + \ If multiple CMOs were used for a sample, separate IDs with a pipe (e.g.,\ + \ CMO301|CMO302). Required for Cell Multiplexing libraries." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--cell_multiplex_description" + description: "A description for the sample." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Outputs" + arguments: + - type: "file" + name: "--output" + description: "The folder to store the alignment results." + info: null + example: + - "/path/to/output" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Executor arguments" + arguments: + - type: "boolean_true" + name: "--dryrun" + description: "If true, the output directory will only contain the CWL input\ + \ files, but the pipeline itself will not be executed." + info: null + direction: "input" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Align fastq files using Cell Ranger multi." + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/10x_5k_anticmv/raw/" + dest: "10x_5k_anticmv/raw/" + - type: "file" + path: "resources_test/10x_5k_lung_crispr/raw/" + dest: "10x_5k_lung_crispr/raw/" + - type: "file" + path: "resources_test/reference_gencodev41_chr1" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "ghcr.io/data-intuitive/cellranger:7.0" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "docker" + run: + - "DEBIAN_FRONTEND=noninteractive apt update && apt upgrade -y && rm -rf /var/lib/apt/lists/*" + - type: "python" + user: false + packages: + - "pandas" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "veryhighmem" + - "highcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/mapping/cellranger_multi/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/cellranger_multi" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/cellranger_multi/cellranger_multi" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/mapping/cellranger_multi/main.nf b/target/nextflow/mapping/cellranger_multi/main.nf new file mode 100644 index 00000000000..ac1eb88faf5 --- /dev/null +++ b/target/nextflow/mapping/cellranger_multi/main.nf @@ -0,0 +1,3132 @@ +// cellranger_multi 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Angela Oliveira Pisco (author) +// * Robrecht Cannoodt (author, maintainer) +// * Dries De Maeyer (author) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "cellranger_multi", + "namespace" : "mapping", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Angela Oliveira Pisco", + "roles" : [ + "author" + ], + "info" : { + "role" : "Contributor", + "links" : { + "github" : "aopisco", + "orcid" : "0000-0003-0142-2355", + "linkedin" : "aopisco" + }, + "organizations" : [ + { + "name" : "Insitro", + "href" : "https://insitro.com", + "role" : "Director of Computational Biology" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + }, + { + "name" : "Robrecht Cannoodt", + "roles" : [ + "author", + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "robrecht@data-intuitive.com", + "github" : "rcannood", + "orcid" : "0000-0003-3641-729X", + "linkedin" : "robrechtcannoodt" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Science Engineer" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + }, + { + "name" : "Dries De Maeyer", + "roles" : [ + "author" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "ddemaeyer@gmail.com", + "github" : "ddemaeyer", + "linkedin" : "dries-de-maeyer-b46a814" + }, + "organizations" : [ + { + "name" : "Janssen Pharmaceuticals", + "href" : "https://www.janssen.com", + "role" : "Principal Scientist" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Input files", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "description" : "The FASTQ files to be analyzed. FASTQ files should conform to the naming conventions of bcl2fastq and mkfastq:\n`[Sample Name]_S[Sample Index]_L00[Lane Number]_[Read Type]_001.fastq.gz`\n", + "example" : [ + "mysample_S1_L001_R1_001.fastq.gz", + "mysample_S1_L001_R2_001.fastq.gz" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--gex_reference", + "description" : "Genome refence index built by Cell Ranger mkref.", + "example" : [ + "reference_genome.tar.gz" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--vdj_reference", + "description" : "VDJ refence index built by Cell Ranger mkref.", + "example" : [ + "reference_vdj.tar.gz" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--vdj_inner_enrichment_primers", + "description" : "V(D)J Immune Profiling libraries: if inner enrichment primers other than those provided \nin the 10x Genomics kits are used, they need to be specified here as a\ntext file with one primer per line.\n", + "example" : [ + "enrichment_primers.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--feature_reference", + "description" : "Path to the Feature reference CSV file, declaring Feature Barcode constructs and associated barcodes. Required only for Antibody Capture or CRISPR Guide Capture libraries. See https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/feature-bc-analysis#feature-ref for more information.", + "example" : [ + "feature_reference.csv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Library arguments", + "arguments" : [ + { + "type" : "string", + "name" : "--library_id", + "description" : "The Illumina sample name to analyze. This must exactly match the 'Sample Name' part of the FASTQ files specified in the `--input` argument.", + "example" : [ + "mysample1" + ], + "required" : true, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--library_type", + "description" : "The underlying feature type of the library.\nPossible values: \\"Gene Expression\\", \\"VDJ\\", \\"VDJ-T\\", \\"VDJ-B\\", \\"Antibody Capture\\", \\"CRISPR Guide Capture\\", \\"Multiplexing Capture\\"\n", + "example" : [ + "Gene Expression" + ], + "required" : true, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--library_subsample", + "description" : "Optional. The rate at which reads from the provided FASTQ files are sampled. Must be strictly greater than 0 and less than or equal to 1.", + "example" : [ + "0.5" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--library_lanes", + "description" : "Lanes associated with this sample. Defaults to using all lanes.", + "example" : [ + "1-4" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + } + ] + }, + { + "name" : "Gene expression arguments", + "description" : "Arguments relevant to the analysis of gene expression data.", + "arguments" : [ + { + "type" : "integer", + "name" : "--gex_expect_cells", + "description" : "Expected number of recovered cells, used as input to cell calling algorithm.", + "example" : [ + 3000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--gex_chemistry", + "description" : "Assay configuration.\n- auto: autodetect mode\n- threeprime: Single Cell 3'\n- fiveprime: Single Cell 5'\n- SC3Pv1: Single Cell 3' v1\n- SC3Pv2: Single Cell 3' v2\n- SC3Pv3: Single Cell 3' v3\n- SC3Pv3LT: Single Cell 3' v3 LT\n- SC3Pv3HT: Single Cell 3' v3 HT\n- SC5P-PE: Single Cell 5' paired-end\n- SC5P-R2: Single Cell 5' R2-only\n- SC-FB: Single Cell Antibody-only 3' v2 or 5'\nSee https://kb.10xgenomics.com/hc/en-us/articles/115003764132-How-does-Cell-Ranger-auto-detect-chemistry- for more information.\n", + "default" : [ + "auto" + ], + "required" : false, + "choices" : [ + "auto", + "threeprime", + "fiveprime", + "SC3Pv1", + "SC3Pv2", + "SC3Pv3", + "SC3Pv3LT", + "SC3Pv3HT", + "SC5P-PE", + "SC5P-R2", + "SC-FB" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "boolean", + "name" : "--gex_secondary_analysis", + "description" : "Whether or not to run the secondary analysis e.g. clustering.", + "default" : [ + false + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "boolean", + "name" : "--gex_generate_bam", + "description" : "Whether to generate a BAM file.", + "default" : [ + false + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "boolean", + "name" : "--gex_include_introns", + "description" : "Include intronic reads in count (default=true unless --target-panel is specified in which case default=false)", + "default" : [ + true + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Cell multiplexing parameters", + "description" : "Arguments related to cell multiplexing.", + "arguments" : [ + { + "type" : "string", + "name" : "--cell_multiplex_sample_id", + "description" : "A name to identify a multiplexed sample. Must be alphanumeric with hyphens and/or underscores, and less than 64 characters. Required for Cell Multiplexing libraries.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--cell_multiplex_oligo_ids", + "description" : "The Cell Multiplexing oligo IDs used to multiplex this sample. If multiple CMOs were used for a sample, separate IDs with a pipe (e.g., CMO301|CMO302). Required for Cell Multiplexing libraries.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--cell_multiplex_description", + "description" : "A description for the sample.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Outputs", + "arguments" : [ + { + "type" : "file", + "name" : "--output", + "description" : "The folder to store the alignment results.", + "example" : [ + "/path/to/output" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Executor arguments", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--dryrun", + "description" : "If true, the output directory will only contain the CWL input files, but the pipeline itself will not be executed.", + "direction" : "input", + "dest" : "par" + } + ] + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/cellranger_multi/" + }, + { + "type" : "file", + "path" : "src/utils/setup_logger.py", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "description" : "Align fastq files using Cell Ranger multi.", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/cellranger_multi/" + }, + { + "type" : "file", + "path" : "resources_test/10x_5k_anticmv/raw/", + "dest" : "10x_5k_anticmv/raw/", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + }, + { + "type" : "file", + "path" : "resources_test/10x_5k_lung_crispr/raw/", + "dest" : "10x_5k_lung_crispr/raw/", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + }, + { + "type" : "file", + "path" : "resources_test/reference_gencodev41_chr1", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "ghcr.io/data-intuitive/cellranger:7.0", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "docker", + "run" : [ + "DEBIAN_FRONTEND=noninteractive apt update && apt upgrade -y && rm -rf /var/lib/apt/lists/*" + ] + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "pandas" + ], + "upgrade" : true + } + ], + "test_setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "viashpy==0.5.0" + ], + "upgrade" : true + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "veryhighmem", + "highcpu" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/mapping/cellranger_multi/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/cellranger_multi", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +from __future__ import annotations + +import sys +import re +import subprocess +import tempfile +import pandas as pd +from typing import Optional, Any, Union +import tarfile +from pathlib import Path +import shutil +from itertools import chain + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'gex_reference': $( if [ ! -z ${VIASH_PAR_GEX_REFERENCE+x} ]; then echo "r'${VIASH_PAR_GEX_REFERENCE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'vdj_reference': $( if [ ! -z ${VIASH_PAR_VDJ_REFERENCE+x} ]; then echo "r'${VIASH_PAR_VDJ_REFERENCE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'vdj_inner_enrichment_primers': $( if [ ! -z ${VIASH_PAR_VDJ_INNER_ENRICHMENT_PRIMERS+x} ]; then echo "r'${VIASH_PAR_VDJ_INNER_ENRICHMENT_PRIMERS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'feature_reference': $( if [ ! -z ${VIASH_PAR_FEATURE_REFERENCE+x} ]; then echo "r'${VIASH_PAR_FEATURE_REFERENCE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'library_id': $( if [ ! -z ${VIASH_PAR_LIBRARY_ID+x} ]; then echo "r'${VIASH_PAR_LIBRARY_ID//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'library_type': $( if [ ! -z ${VIASH_PAR_LIBRARY_TYPE+x} ]; then echo "r'${VIASH_PAR_LIBRARY_TYPE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'library_subsample': $( if [ ! -z ${VIASH_PAR_LIBRARY_SUBSAMPLE+x} ]; then echo "r'${VIASH_PAR_LIBRARY_SUBSAMPLE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'library_lanes': $( if [ ! -z ${VIASH_PAR_LIBRARY_LANES+x} ]; then echo "r'${VIASH_PAR_LIBRARY_LANES//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'gex_expect_cells': $( if [ ! -z ${VIASH_PAR_GEX_EXPECT_CELLS+x} ]; then echo "int(r'${VIASH_PAR_GEX_EXPECT_CELLS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'gex_chemistry': $( if [ ! -z ${VIASH_PAR_GEX_CHEMISTRY+x} ]; then echo "r'${VIASH_PAR_GEX_CHEMISTRY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'gex_secondary_analysis': $( if [ ! -z ${VIASH_PAR_GEX_SECONDARY_ANALYSIS+x} ]; then echo "r'${VIASH_PAR_GEX_SECONDARY_ANALYSIS//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), + 'gex_generate_bam': $( if [ ! -z ${VIASH_PAR_GEX_GENERATE_BAM+x} ]; then echo "r'${VIASH_PAR_GEX_GENERATE_BAM//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), + 'gex_include_introns': $( if [ ! -z ${VIASH_PAR_GEX_INCLUDE_INTRONS+x} ]; then echo "r'${VIASH_PAR_GEX_INCLUDE_INTRONS//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), + 'cell_multiplex_sample_id': $( if [ ! -z ${VIASH_PAR_CELL_MULTIPLEX_SAMPLE_ID+x} ]; then echo "r'${VIASH_PAR_CELL_MULTIPLEX_SAMPLE_ID//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cell_multiplex_oligo_ids': $( if [ ! -z ${VIASH_PAR_CELL_MULTIPLEX_OLIGO_IDS+x} ]; then echo "r'${VIASH_PAR_CELL_MULTIPLEX_OLIGO_IDS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cell_multiplex_description': $( if [ ! -z ${VIASH_PAR_CELL_MULTIPLEX_DESCRIPTION+x} ]; then echo "r'${VIASH_PAR_CELL_MULTIPLEX_DESCRIPTION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'dryrun': $( if [ ! -z ${VIASH_PAR_DRYRUN+x} ]; then echo "r'${VIASH_PAR_DRYRUN//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +## VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +fastq_regex = r'([A-Za-z0-9\\\\-_\\\\.]+)_S(\\\\d+)_L(\\\\d+)_[RI](\\\\d+)_(\\\\d+)\\\\.fastq\\\\.gz' +# assert re.match(fastq_regex, "5k_human_GEX_1_subset_S1_L001_R1_001.fastq.gz") is not None + +# Invert some parameters. Keep the original ones in the config for compatibility +inverted_params = { + "gex_generate_no_bam": "gex_generate_bam", + "gex_no_secondary_analysis": "gex_secondary_analysis" +} +for inverted_param, param in inverted_params.items(): + par[inverted_param] = not par[param] if par[param] is not None else None + del par[param] + +GEX_CONFIG_KEYS = { + "gex_reference": "reference", + "gex_expect_cells": "expect-cells", + "gex_chemistry": "chemistry", + "gex_no_secondary_analysis": "no-secondary", + "gex_generate_no_bam": "no-bam", + "gex_include_introns": "include-introns" +} +FEATURE_CONFIG_KEYS = {"feature_reference": "reference"} +VDJ_CONFIG_KEYS = {"vdj_reference": "reference", + "vdj_inner_enrichment_primers": "inner-enrichment-primers"} + +REFERENCE_SECTIONS = { + "gene-expression": (GEX_CONFIG_KEYS, "index"), + "feature": (FEATURE_CONFIG_KEYS, "index"), + "vdj": (VDJ_CONFIG_KEYS, "index") +} + +LIBRARY_CONFIG_KEYS = {'library_id': 'fastq_id', + 'library_type': 'feature_types', + 'library_subsample': 'subsample_rate', + 'library_lanes': 'lanes'} +SAMPLE_PARAMS_CONFIG_KEYS = {'cell_multiplex_sample_id': 'sample_id', + 'cell_multiplex_oligo_ids': 'cmo_ids', + 'cell_multiplex_description': 'description'} + + +# These are derived from the dictionaries above +REFERENCES = tuple(reference_param for reference_param, cellranger_param + in chain(GEX_CONFIG_KEYS.items(), FEATURE_CONFIG_KEYS.items(), VDJ_CONFIG_KEYS.items()) + if cellranger_param == "reference") +LIBRARY_PARAMS = tuple(LIBRARY_CONFIG_KEYS.keys()) +SAMPLE_PARAMS = tuple(SAMPLE_PARAMS_CONFIG_KEYS.keys()) + + +def lengths_gt1(dic: dict[str, Optional[list[Any]]]) -> dict[str, int]: + return {key: len(li) for key, li in dic.items() + if li is not None and len(li) > 1} + +def strip_margin(text: str) -> str: + return re.sub('(\\\\n?)[ \\\\t]*\\\\|', '\\\\\\\\1', text) + + +def subset_dict(dictionary: dict[str, str], + keys: Union[dict[str, str], list[str]]) -> dict[str, str]: + if isinstance(keys, (list, tuple)): + keys = {key: key for key in keys} + return {dest_key: dictionary[orig_key] + for orig_key, dest_key in keys.items() + if dictionary[orig_key] is not None} + +def check_subset_dict_equal_length(group_name: str, + dictionary: dict[str, list[str]]) -> None: + lens = lengths_gt1(dictionary) + assert len(set(lens.values())) <= 1, f"The number of values passed to {group_name} "\\\\ + f"arguments must be 0, 1 or all the same. Offenders: {lens}" + +def process_params(par: dict[str, Any]) -> str: + # if par_input is a directory, look for fastq files + par["input"] = [Path(fastq) for fastq in par["input"]] + if len(par["input"]) == 1 and par["input"][0].is_dir(): + logger.info("Detected '--input' as a directory, " + "traversing to see if we can detect any FASTQ files.") + par["input"] = [input_path for input_path in par["input"][0].rglob('*') + if re.match(fastq_regex, input_path.name) ] + + # check input fastq files + for input_path in par["input"]: + assert re.match(fastq_regex, input_path.name) is not None, \\\\ + f"File name of --input '{input_path}' should match regex {fastq_regex}." + + # check lengths of libraries metadata + library_dict = subset_dict(par, LIBRARY_PARAMS) + check_subset_dict_equal_length("Library", library_dict) + # storing for later use + par["libraries"] = library_dict + + cmo_dict = subset_dict(par, SAMPLE_PARAMS) + check_subset_dict_equal_length("Cell multiplexing", cmo_dict) + # storing for later use + par["cmo"] = cmo_dict + + # use absolute paths + par["input"] = [input_path.resolve() for input_path in par["input"]] + for file_path in REFERENCES + ('output', ): + if par[file_path]: + logger.info('Making path %s absolute', par[file_path]) + par[file_path] = Path(par[file_path]).resolve() + return par + + +def generate_csv_category(name: str, args: dict[str, str], orient: str) -> list[str]: + assert orient in ("index", "columns") + if not args: + return [] + title = [ f'[{name}]' ] + # Which index to include in csv section is based on orientation + to_csv_args = {"index": (orient=="index"), "header": (orient=="columns")} + values = [pd.DataFrame.from_dict(args, orient=orient).to_csv(**to_csv_args).strip()] + return title + values + [""] + + +def generate_config(par: dict[str, Any], fastq_dir: str) -> str: + content_list = [] + par["fastqs"] = fastq_dir + libraries = dict(LIBRARY_CONFIG_KEYS, **{"fastqs": "fastqs"}) + #TODO: use the union (|) operator when python is updated to 3.9 + all_sections = dict(REFERENCE_SECTIONS, + **{"libraries": (libraries, "columns")}, + **{"samples": (SAMPLE_PARAMS_CONFIG_KEYS, "columns")}) + for section_name, (section_params, orientation) in all_sections.items(): + reference_pars = subset_dict(par, section_params) + content_list += generate_csv_category(section_name, reference_pars, orient=orientation) + + return '\\\\n'.join(content_list) + +def main(par: dict[str, Any], meta: dict[str, Any]): + logger.info(" Processing params") + par = process_params(par) + logger.info(par) + + # TODO: throw error or else Cell Ranger will + with tempfile.TemporaryDirectory(prefix="cellranger_multi-", + dir=meta["temp_dir"]) as temp_dir: + temp_dir_path = Path(temp_dir) + for reference_par_name in REFERENCES: + reference = par[reference_par_name] + logger.info('Looking at %s to check if it needs decompressing', reference) + if reference and Path(reference).is_file() and tarfile.is_tarfile(reference): + extaction_dir_name = Path(reference.stem).stem # Remove two extensions (if they exist) + unpacked_directory = temp_dir_path / extaction_dir_name + logger.info('Extracting %s to %s', reference, unpacked_directory) + + with tarfile.open(reference, 'r') as open_tar: + members = open_tar.getmembers() + root_dirs = [member for member in members if member.isdir() + and member.name != '.' and '/' not in member.name] + # if there is only one root_dir (and there are files in that directory) + # strip that directory name from the destination folder + if len(root_dirs) == 1: + for mem in members: + mem.path = Path(*Path(mem.path).parts[1:]) + members_to_move = [mem for mem in members if mem.path != Path('.')] + open_tar.extractall(unpacked_directory, members=members_to_move) + par[reference_par_name] = unpacked_directory + + # Creating symlinks of fastq files to tempdir + input_symlinks_dir = temp_dir_path / "input_symlinks" + input_symlinks_dir.mkdir() + for fastq in par['input']: + destination = input_symlinks_dir / fastq.name + destination.symlink_to(fastq) + + logger.info(" Creating config file") + config_content = generate_config(par, input_symlinks_dir) + + logger.info(" Creating Cell Ranger argument") + temp_id="run" + proc_pars=["--disable-ui", "--id", temp_id] + + command_line_parameters = { + "--localcores": meta['cpus'], + "--localmem": int(meta['memory_gb']) - 2 if meta['memory_gb'] else None, + } + for param, param_value in command_line_parameters.items(): + if param_value: + proc_pars.append(f"{param}={param_value}") + + ## Run pipeline + if par["dryrun"]: + par['output'].mkdir(parents=True, exist_ok=True) + + # write config file + config_file = par['output'] / "config.csv" + with open(config_file, "w") as f: + f.write(config_content) + proc_pars.append(f"--csv={config_file}") + + # display command that would've been used + cmd = ["cellranger multi"] + proc_pars + ["--csv=config.csv"] + logger.info("> " + ' '.join(cmd)) + else: + # write config file to execution directory + config_file = temp_dir_path / "config.csv" + with open(config_file, "w") as f: + f.write(config_content) + proc_pars.append(f"--csv={config_file}") + + # Already copy config file to output directory + par['output'].mkdir(parents=True, exist_ok=True) + with (par['output'] / "config.csv").open('w') as open_config: + open_config.write(config_content) + + # run process + cmd = ["cellranger", "multi"] + proc_pars + logger.info("> " + ' '.join(cmd)) + try: + process_output = subprocess.run( + cmd, + cwd=temp_dir, + check=True, + capture_output=True + ) + except subprocess.CalledProcessError as e: + print(e.output.decode('utf-8'), flush=True) + raise e + else: + # Write stdout output to output folder + with (par["output"] / "cellranger_multi.log").open('w') as open_log: + open_log.write(process_output.stdout.decode('utf-8')) + print(process_output.stdout.decode('utf-8'), flush=True) + + # look for output dir file + tmp_output_dir = temp_dir_path / temp_id / "outs" + expected_files = { + Path("multi"): Path.is_dir, + Path("per_sample_outs"): Path.is_dir, + Path("config.csv"): Path.is_file, + } + for file_path, type_func in expected_files.items(): + output_path = tmp_output_dir / file_path + if not type_func(output_path): + raise ValueError(f"Could not find expected '{output_path}'") + + for output_path in tmp_output_dir.rglob('*'): + if output_path.name != "config.csv": # Already created + shutil.move(str(output_path), par['output']) + +if __name__ == "__main__": + main(par, meta) +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/mapping_cellranger_multi", + "tag" : "0.12.0" + }, + "label" : [ + "veryhighmem", + "highcpu" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/mapping/cellranger_multi/nextflow.config b/target/nextflow/mapping/cellranger_multi/nextflow.config new file mode 100644 index 00000000000..a27a28f2b6c --- /dev/null +++ b/target/nextflow/mapping/cellranger_multi/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'cellranger_multi' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Align fastq files using Cell Ranger multi.' + author = 'Angela Oliveira Pisco, Robrecht Cannoodt, Dries De Maeyer' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/mapping/cellranger_multi/nextflow_params.yaml b/target/nextflow/mapping/cellranger_multi/nextflow_params.yaml new file mode 100644 index 00000000000..b3ef771c035 --- /dev/null +++ b/target/nextflow/mapping/cellranger_multi/nextflow_params.yaml @@ -0,0 +1,34 @@ +# Outputs +# output: "$id.$key.output.output" + +# Input files +input: # please fill in - example: ["mysample_S1_L001_R1_001.fastq.gz", "mysample_S1_L001_R2_001.fastq.gz"] +gex_reference: # please fill in - example: "reference_genome.tar.gz" +# vdj_reference: "reference_vdj.tar.gz" +# vdj_inner_enrichment_primers: "enrichment_primers.txt" +# feature_reference: "feature_reference.csv" + +# Library arguments +library_id: # please fill in - example: ["mysample1"] +library_type: # please fill in - example: ["Gene Expression"] +# library_subsample: ["0.5"] +# library_lanes: ["1-4"] + +# Gene expression arguments +# gex_expect_cells: 3000 +gex_chemistry: "auto" +gex_secondary_analysis: false +gex_generate_bam: false +gex_include_introns: true + +# Cell multiplexing parameters +# cell_multiplex_sample_id: "foo" +# cell_multiplex_oligo_ids: "foo" +# cell_multiplex_description: "foo" + +# Executor arguments +dryrun: false + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/mapping/cellranger_multi/nextflow_schema.json b/target/nextflow/mapping/cellranger_multi/nextflow_schema.json new file mode 100644 index 00000000000..ad0f1ffda48 --- /dev/null +++ b/target/nextflow/mapping/cellranger_multi/nextflow_schema.json @@ -0,0 +1,328 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "cellranger_multi", +"description": "Align fastq files using Cell Ranger multi.", +"type": "object", +"definitions": { + + + + "outputs" : { + "title": "Outputs", + "type": "object", + "description": "No description", + "properties": { + + + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.output`, example: `/path/to/output`. The folder to store the alignment results", + "help_text": "Type: `file`, required, default: `$id.$key.output.output`, example: `/path/to/output`. The folder to store the alignment results." + , + "default": "$id.$key.output.output" + } + + +} +}, + + + "input files" : { + "title": "Input files", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: List of `file`, required, example: `mysample_S1_L001_R1_001.fastq.gz;mysample_S1_L001_R2_001.fastq.gz`, multiple_sep: `\";\"`. The FASTQ files to be analyzed", + "help_text": "Type: List of `file`, required, example: `mysample_S1_L001_R1_001.fastq.gz;mysample_S1_L001_R2_001.fastq.gz`, multiple_sep: `\";\"`. The FASTQ files to be analyzed. FASTQ files should conform to the naming conventions of bcl2fastq and mkfastq:\n`[Sample Name]_S[Sample Index]_L00[Lane Number]_[Read Type]_001.fastq.gz`\n" + + } + + + , + "gex_reference": { + "type": + "string", + "description": "Type: `file`, required, example: `reference_genome.tar.gz`. Genome refence index built by Cell Ranger mkref", + "help_text": "Type: `file`, required, example: `reference_genome.tar.gz`. Genome refence index built by Cell Ranger mkref." + + } + + + , + "vdj_reference": { + "type": + "string", + "description": "Type: `file`, example: `reference_vdj.tar.gz`. VDJ refence index built by Cell Ranger mkref", + "help_text": "Type: `file`, example: `reference_vdj.tar.gz`. VDJ refence index built by Cell Ranger mkref." + + } + + + , + "vdj_inner_enrichment_primers": { + "type": + "string", + "description": "Type: `file`, example: `enrichment_primers.txt`. V(D)J Immune Profiling libraries: if inner enrichment primers other than those provided \nin the 10x Genomics kits are used, they need to be specified here as a\ntext file with one primer per line", + "help_text": "Type: `file`, example: `enrichment_primers.txt`. V(D)J Immune Profiling libraries: if inner enrichment primers other than those provided \nin the 10x Genomics kits are used, they need to be specified here as a\ntext file with one primer per line.\n" + + } + + + , + "feature_reference": { + "type": + "string", + "description": "Type: `file`, example: `feature_reference.csv`. Path to the Feature reference CSV file, declaring Feature Barcode constructs and associated barcodes", + "help_text": "Type: `file`, example: `feature_reference.csv`. Path to the Feature reference CSV file, declaring Feature Barcode constructs and associated barcodes. Required only for Antibody Capture or CRISPR Guide Capture libraries. See https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/feature-bc-analysis#feature-ref for more information." + + } + + +} +}, + + + "library arguments" : { + "title": "Library arguments", + "type": "object", + "description": "No description", + "properties": { + + + "library_id": { + "type": + "string", + "description": "Type: List of `string`, required, example: `mysample1`, multiple_sep: `\";\"`. The Illumina sample name to analyze", + "help_text": "Type: List of `string`, required, example: `mysample1`, multiple_sep: `\";\"`. The Illumina sample name to analyze. This must exactly match the \u0027Sample Name\u0027 part of the FASTQ files specified in the `--input` argument." + + } + + + , + "library_type": { + "type": + "string", + "description": "Type: List of `string`, required, example: `Gene Expression`, multiple_sep: `\";\"`. The underlying feature type of the library", + "help_text": "Type: List of `string`, required, example: `Gene Expression`, multiple_sep: `\";\"`. The underlying feature type of the library.\nPossible values: \"Gene Expression\", \"VDJ\", \"VDJ-T\", \"VDJ-B\", \"Antibody Capture\", \"CRISPR Guide Capture\", \"Multiplexing Capture\"\n" + + } + + + , + "library_subsample": { + "type": + "string", + "description": "Type: List of `string`, example: `0.5`, multiple_sep: `\";\"`. Optional", + "help_text": "Type: List of `string`, example: `0.5`, multiple_sep: `\";\"`. Optional. The rate at which reads from the provided FASTQ files are sampled. Must be strictly greater than 0 and less than or equal to 1." + + } + + + , + "library_lanes": { + "type": + "string", + "description": "Type: List of `string`, example: `1-4`, multiple_sep: `\";\"`. Lanes associated with this sample", + "help_text": "Type: List of `string`, example: `1-4`, multiple_sep: `\";\"`. Lanes associated with this sample. Defaults to using all lanes." + + } + + +} +}, + + + "gene expression arguments" : { + "title": "Gene expression arguments", + "type": "object", + "description": "Arguments relevant to the analysis of gene expression data.", + "properties": { + + + "gex_expect_cells": { + "type": + "integer", + "description": "Type: `integer`, example: `3000`. Expected number of recovered cells, used as input to cell calling algorithm", + "help_text": "Type: `integer`, example: `3000`. Expected number of recovered cells, used as input to cell calling algorithm." + + } + + + , + "gex_chemistry": { + "type": + "string", + "description": "Type: `string`, default: `auto`, choices: ``auto`, `threeprime`, `fiveprime`, `SC3Pv1`, `SC3Pv2`, `SC3Pv3`, `SC3Pv3LT`, `SC3Pv3HT`, `SC5P-PE`, `SC5P-R2`, `SC-FB``. Assay configuration", + "help_text": "Type: `string`, default: `auto`, choices: ``auto`, `threeprime`, `fiveprime`, `SC3Pv1`, `SC3Pv2`, `SC3Pv3`, `SC3Pv3LT`, `SC3Pv3HT`, `SC5P-PE`, `SC5P-R2`, `SC-FB``. Assay configuration.\n- auto: autodetect mode\n- threeprime: Single Cell 3\u0027\n- fiveprime: Single Cell 5\u0027\n- SC3Pv1: Single Cell 3\u0027 v1\n- SC3Pv2: Single Cell 3\u0027 v2\n- SC3Pv3: Single Cell 3\u0027 v3\n- SC3Pv3LT: Single Cell 3\u0027 v3 LT\n- SC3Pv3HT: Single Cell 3\u0027 v3 HT\n- SC5P-PE: Single Cell 5\u0027 paired-end\n- SC5P-R2: Single Cell 5\u0027 R2-only\n- SC-FB: Single Cell Antibody-only 3\u0027 v2 or 5\u0027\nSee https://kb.10xgenomics.com/hc/en-us/articles/115003764132-How-does-Cell-Ranger-auto-detect-chemistry- for more information.\n", + "enum": ["auto", "threeprime", "fiveprime", "SC3Pv1", "SC3Pv2", "SC3Pv3", "SC3Pv3LT", "SC3Pv3HT", "SC5P-PE", "SC5P-R2", "SC-FB"] + + , + "default": "auto" + } + + + , + "gex_secondary_analysis": { + "type": + "boolean", + "description": "Type: `boolean`, default: `false`. Whether or not to run the secondary analysis e", + "help_text": "Type: `boolean`, default: `false`. Whether or not to run the secondary analysis e.g. clustering." + , + "default": "False" + } + + + , + "gex_generate_bam": { + "type": + "boolean", + "description": "Type: `boolean`, default: `false`. Whether to generate a BAM file", + "help_text": "Type: `boolean`, default: `false`. Whether to generate a BAM file." + , + "default": "False" + } + + + , + "gex_include_introns": { + "type": + "boolean", + "description": "Type: `boolean`, default: `true`. Include intronic reads in count (default=true unless --target-panel is specified in which case default=false)", + "help_text": "Type: `boolean`, default: `true`. Include intronic reads in count (default=true unless --target-panel is specified in which case default=false)" + , + "default": "True" + } + + +} +}, + + + "cell multiplexing parameters" : { + "title": "Cell multiplexing parameters", + "type": "object", + "description": "Arguments related to cell multiplexing.", + "properties": { + + + "cell_multiplex_sample_id": { + "type": + "string", + "description": "Type: `string`. A name to identify a multiplexed sample", + "help_text": "Type: `string`. A name to identify a multiplexed sample. Must be alphanumeric with hyphens and/or underscores, and less than 64 characters. Required for Cell Multiplexing libraries." + + } + + + , + "cell_multiplex_oligo_ids": { + "type": + "string", + "description": "Type: `string`. The Cell Multiplexing oligo IDs used to multiplex this sample", + "help_text": "Type: `string`. The Cell Multiplexing oligo IDs used to multiplex this sample. If multiple CMOs were used for a sample, separate IDs with a pipe (e.g., CMO301|CMO302). Required for Cell Multiplexing libraries." + + } + + + , + "cell_multiplex_description": { + "type": + "string", + "description": "Type: `string`. A description for the sample", + "help_text": "Type: `string`. A description for the sample." + + } + + +} +}, + + + "executor arguments" : { + "title": "Executor arguments", + "type": "object", + "description": "No description", + "properties": { + + + "dryrun": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. If true, the output directory will only contain the CWL input files, but the pipeline itself will not be executed", + "help_text": "Type: `boolean_true`, default: `false`. If true, the output directory will only contain the CWL input files, but the pipeline itself will not be executed." + , + "default": "False" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/outputs" + }, + + { + "$ref": "#/definitions/input files" + }, + + { + "$ref": "#/definitions/library arguments" + }, + + { + "$ref": "#/definitions/gene expression arguments" + }, + + { + "$ref": "#/definitions/cell multiplexing parameters" + }, + + { + "$ref": "#/definitions/executor arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/mapping/cellranger_multi/setup_logger.py b/target/nextflow/mapping/cellranger_multi/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/nextflow/mapping/cellranger_multi/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/nextflow/mapping/htseq_count/.config.vsh.yaml b/target/nextflow/mapping/htseq_count/.config.vsh.yaml new file mode 100644 index 00000000000..1b58e73d30c --- /dev/null +++ b/target/nextflow/mapping/htseq_count/.config.vsh.yaml @@ -0,0 +1,418 @@ +functionality: + name: "htseq_count" + namespace: "mapping" + version: "0.12.4" + authors: + - name: "Robrecht Cannoodt" + roles: + - "author" + - "maintainer" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + - name: "Angela Oliveira Pisco" + roles: + - "author" + info: + role: "Contributor" + links: + github: "aopisco" + orcid: "0000-0003-0142-2355" + linkedin: "aopisco" + organizations: + - name: "Insitro" + href: "https://insitro.com" + role: "Director of Computational Biology" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + argument_groups: + - name: "Input" + arguments: + - type: "file" + name: "--input" + description: "Path to the SAM/BAM files containing the mapped reads." + info: + orig_arg: "samfilenames" + example: + - "mysample1.BAM" + - "mysample2.BAM" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "file" + name: "--reference" + description: "Path to the GTF file containing the features." + info: + orig_arg: "featurefilename" + example: + - "reference.gtf" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Output" + arguments: + - type: "file" + name: "--output" + description: "Filename to output the counts to." + info: + orig_arg: "--counts_output" + example: + - "htseq-count.tsv" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_delimiter" + description: "Column delimiter in output." + info: + orig_arg: "--delimiter" + example: + - "\t" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output_sam" + description: "Write out all SAM alignment records into SAM/BAM files (one per\ + \ input file needed), \nannotating each line with its feature assignment (as\ + \ an optional field with tag 'XF'). \nSee the -p option to use BAM instead\ + \ of SAM.\n" + info: + orig_arg: "--samout" + example: + - "mysample1_out.BAM" + - "mysample2_out.BAM" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--output_sam_format" + description: "Format to use with the --output_sam argument." + info: + orig_arg: "--samout-format" + required: false + choices: + - "sam" + - "bam" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Arguments" + arguments: + - type: "string" + name: "--order" + alternatives: + - "-r" + description: "Sorting order of . Paired-end sequencing data\ + \ must be sorted either by position or\nby read name, and the sorting order\ + \ must be specified. Ignored for single-end data.\n" + info: + orig_arg: "--order" + default: + - "name" + required: false + choices: + - "pos" + - "name" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--stranded" + alternatives: + - "-s" + description: "Whether the data is from a strand-specific assay. 'reverse' means\ + \ 'yes' with reversed strand interpretation." + info: + orig_arg: "--stranded" + default: + - "yes" + required: false + choices: + - "yes" + - "no" + - "reverse" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--minimum_alignment_quality" + alternatives: + - "-a" + - "--minaqual" + description: "Skip all reads with MAPQ alignment quality lower than the given\ + \ minimum value. \nMAPQ is the 5th column of a SAM/BAM file and its usage\ + \ depends on the software \nused to map the reads.\n" + info: + orig_arg: "--minaqual" + default: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--type" + alternatives: + - "-t" + description: "Feature type (3rd column in GTF file) to be used, all features\ + \ of other type are ignored (default, suitable for Ensembl GTF files: exon)" + info: + orig_arg: "--type" + example: + - "exon" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--id_attribute" + alternatives: + - "-i" + description: "GTF attribute to be used as feature ID (default, suitable for\ + \ Ensembl GTF files: gene_id).\nAll feature of the right type (see -t option)\ + \ within the same GTF attribute will be added\ntogether. The typical way of\ + \ using this option is to count all exonic reads from each gene\nand add the\ + \ exons but other uses are possible as well. You can call this option multiple\n\ + times: in that case, the combination of all attributes separated by colons\ + \ (:) will be used\nas a unique identifier, e.g. for exons you might use -i\ + \ gene_id -i exon_number.\n" + info: + orig_arg: "--idattr" + example: + - "gene_id" + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--additional_attributes" + description: "Additional feature attributes (suitable for Ensembl GTF files:\ + \ gene_name). Use multiple times\nfor more than one additional attribute.\ + \ These attributes are only used as annotations in the\noutput, while the\ + \ determination of how the counts are added together is done based on option\ + \ -i.\n" + info: + orig_arg: "--additional-attr" + example: + - "gene_name" + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--add_chromosome_info" + description: "Store information about the chromosome of each feature as an additional\ + \ attribute\n(e.g. colunm in the TSV output file).\n" + info: + orig_arg: "--add-chromosome-info" + direction: "input" + dest: "par" + - type: "string" + name: "--mode" + alternatives: + - "-m" + description: "Mode to handle reads overlapping more than one feature." + info: + orig_arg: "--mode" + default: + - "union" + required: false + choices: + - "union" + - "intersection-strict" + - "intersection-nonempty" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--non_unique" + description: "Whether and how to score reads that are not uniquely aligned or\ + \ ambiguously assigned to features." + info: + orig_arg: "--nonunique" + default: + - "none" + required: false + choices: + - "none" + - "all" + - "fraction" + - "random" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--secondary_alignments" + description: "Whether to score secondary alignments (0x100 flag)." + info: + orig_arg: "--secondary-alignments" + required: false + choices: + - "score" + - "ignore" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--supplementary_alignments" + description: "Whether to score supplementary alignments (0x800 flag)." + info: + orig_arg: "--supplementary-alignments" + required: false + choices: + - "score" + - "ignore" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--counts_output_sparse" + description: "Store the counts as a sparse matrix (mtx, h5ad, loom)." + info: + orig_arg: "--counts-output-sparse" + direction: "input" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + description: "Quantify gene expression for subsequent testing for differential expression.\n\ + \nThis script takes one or more alignment files in SAM/BAM format and a feature\ + \ file in GFF format and calculates for each feature the number of reads mapping\ + \ to it. \n\nSee http://htseq.readthedocs.io/en/master/count.html for details.\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/cellranger_tiny_fastq" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "HTSeq" + - "pyyaml" + - "scipy" + - "pandas~=2.0.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highmem" + - "highcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/mapping/htseq_count/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/htseq_count" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/htseq_count/htseq_count" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/mapping/htseq_count/main.nf b/target/nextflow/mapping/htseq_count/main.nf new file mode 100644 index 00000000000..76bba96a6a3 --- /dev/null +++ b/target/nextflow/mapping/htseq_count/main.nf @@ -0,0 +1,2978 @@ +// htseq_count 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Robrecht Cannoodt (author, maintainer) +// * Angela Oliveira Pisco (author) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "htseq_count", + "namespace" : "mapping", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Robrecht Cannoodt", + "roles" : [ + "author", + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "robrecht@data-intuitive.com", + "github" : "rcannood", + "orcid" : "0000-0003-3641-729X", + "linkedin" : "robrechtcannoodt" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Science Engineer" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + }, + { + "name" : "Angela Oliveira Pisco", + "roles" : [ + "author" + ], + "info" : { + "role" : "Contributor", + "links" : { + "github" : "aopisco", + "orcid" : "0000-0003-0142-2355", + "linkedin" : "aopisco" + }, + "organizations" : [ + { + "name" : "Insitro", + "href" : "https://insitro.com", + "role" : "Director of Computational Biology" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "description" : "Path to the SAM/BAM files containing the mapped reads.", + "info" : { + "orig_arg" : "samfilenames" + }, + "example" : [ + "mysample1.BAM", + "mysample2.BAM" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--reference", + "description" : "Path to the GTF file containing the features.", + "info" : { + "orig_arg" : "featurefilename" + }, + "example" : [ + "reference.gtf" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Output", + "arguments" : [ + { + "type" : "file", + "name" : "--output", + "description" : "Filename to output the counts to.", + "info" : { + "orig_arg" : "--counts_output" + }, + "example" : [ + "htseq-count.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_delimiter", + "description" : "Column delimiter in output.", + "info" : { + "orig_arg" : "--delimiter" + }, + "example" : [ + "\t" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--output_sam", + "description" : "Write out all SAM alignment records into SAM/BAM files (one per input file needed), \nannotating each line with its feature assignment (as an optional field with tag 'XF'). \nSee the -p option to use BAM instead of SAM.\n", + "info" : { + "orig_arg" : "--samout" + }, + "example" : [ + "mysample1_out.BAM", + "mysample2_out.BAM" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_sam_format", + "description" : "Format to use with the --output_sam argument.", + "info" : { + "orig_arg" : "--samout-format" + }, + "required" : false, + "choices" : [ + "sam", + "bam" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Arguments", + "arguments" : [ + { + "type" : "string", + "name" : "--order", + "alternatives" : [ + "-r" + ], + "description" : "Sorting order of . Paired-end sequencing data must be sorted either by position or\nby read name, and the sorting order must be specified. Ignored for single-end data.\n", + "info" : { + "orig_arg" : "--order" + }, + "default" : [ + "name" + ], + "required" : false, + "choices" : [ + "pos", + "name" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--stranded", + "alternatives" : [ + "-s" + ], + "description" : "Whether the data is from a strand-specific assay. 'reverse' means 'yes' with reversed strand interpretation.", + "info" : { + "orig_arg" : "--stranded" + }, + "default" : [ + "yes" + ], + "required" : false, + "choices" : [ + "yes", + "no", + "reverse" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--minimum_alignment_quality", + "alternatives" : [ + "-a", + "--minaqual" + ], + "description" : "Skip all reads with MAPQ alignment quality lower than the given minimum value. \nMAPQ is the 5th column of a SAM/BAM file and its usage depends on the software \nused to map the reads.\n", + "info" : { + "orig_arg" : "--minaqual" + }, + "default" : [ + 10 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--type", + "alternatives" : [ + "-t" + ], + "description" : "Feature type (3rd column in GTF file) to be used, all features of other type are ignored (default, suitable for Ensembl GTF files: exon)", + "info" : { + "orig_arg" : "--type" + }, + "example" : [ + "exon" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--id_attribute", + "alternatives" : [ + "-i" + ], + "description" : "GTF attribute to be used as feature ID (default, suitable for Ensembl GTF files: gene_id).\nAll feature of the right type (see -t option) within the same GTF attribute will be added\ntogether. The typical way of using this option is to count all exonic reads from each gene\nand add the exons but other uses are possible as well. You can call this option multiple\ntimes: in that case, the combination of all attributes separated by colons (:) will be used\nas a unique identifier, e.g. for exons you might use -i gene_id -i exon_number.\n", + "info" : { + "orig_arg" : "--idattr" + }, + "example" : [ + "gene_id" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--additional_attributes", + "description" : "Additional feature attributes (suitable for Ensembl GTF files: gene_name). Use multiple times\nfor more than one additional attribute. These attributes are only used as annotations in the\noutput, while the determination of how the counts are added together is done based on option -i.\n", + "info" : { + "orig_arg" : "--additional-attr" + }, + "example" : [ + "gene_name" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "boolean_true", + "name" : "--add_chromosome_info", + "description" : "Store information about the chromosome of each feature as an additional attribute\n(e.g. colunm in the TSV output file).\n", + "info" : { + "orig_arg" : "--add-chromosome-info" + }, + "direction" : "input", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--mode", + "alternatives" : [ + "-m" + ], + "description" : "Mode to handle reads overlapping more than one feature.", + "info" : { + "orig_arg" : "--mode" + }, + "default" : [ + "union" + ], + "required" : false, + "choices" : [ + "union", + "intersection-strict", + "intersection-nonempty" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--non_unique", + "description" : "Whether and how to score reads that are not uniquely aligned or ambiguously assigned to features.", + "info" : { + "orig_arg" : "--nonunique" + }, + "default" : [ + "none" + ], + "required" : false, + "choices" : [ + "none", + "all", + "fraction", + "random" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--secondary_alignments", + "description" : "Whether to score secondary alignments (0x100 flag).", + "info" : { + "orig_arg" : "--secondary-alignments" + }, + "required" : false, + "choices" : [ + "score", + "ignore" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--supplementary_alignments", + "description" : "Whether to score supplementary alignments (0x800 flag).", + "info" : { + "orig_arg" : "--supplementary-alignments" + }, + "required" : false, + "choices" : [ + "score", + "ignore" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "boolean_true", + "name" : "--counts_output_sparse", + "description" : "Store the counts as a sparse matrix (mtx, h5ad, loom).", + "info" : { + "orig_arg" : "--counts-output-sparse" + }, + "direction" : "input", + "dest" : "par" + } + ] + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/htseq_count/" + } + ], + "description" : "Quantify gene expression for subsequent testing for differential expression.\n\nThis script takes one or more alignment files in SAM/BAM format and a feature file in GFF format and calculates for each feature the number of reads mapping to it. \n\nSee http://htseq.readthedocs.io/en/master/count.html for details.\n", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/htseq_count/" + }, + { + "type" : "file", + "path" : "resources_test/cellranger_tiny_fastq", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.10-slim", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "procps" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "HTSeq", + "pyyaml", + "scipy", + "pandas~=2.0.0" + ], + "upgrade" : true + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "highmem", + "highcpu" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/mapping/htseq_count/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/htseq_count", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +import tempfile +import subprocess +from pathlib import Path +import tarfile +import gzip +import shutil +import yaml + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'reference': $( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "r'${VIASH_PAR_REFERENCE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_delimiter': $( if [ ! -z ${VIASH_PAR_OUTPUT_DELIMITER+x} ]; then echo "r'${VIASH_PAR_OUTPUT_DELIMITER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_sam': $( if [ ! -z ${VIASH_PAR_OUTPUT_SAM+x} ]; then echo "r'${VIASH_PAR_OUTPUT_SAM//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'output_sam_format': $( if [ ! -z ${VIASH_PAR_OUTPUT_SAM_FORMAT+x} ]; then echo "r'${VIASH_PAR_OUTPUT_SAM_FORMAT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'order': $( if [ ! -z ${VIASH_PAR_ORDER+x} ]; then echo "r'${VIASH_PAR_ORDER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'stranded': $( if [ ! -z ${VIASH_PAR_STRANDED+x} ]; then echo "r'${VIASH_PAR_STRANDED//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'minimum_alignment_quality': $( if [ ! -z ${VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY+x} ]; then echo "int(r'${VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'type': $( if [ ! -z ${VIASH_PAR_TYPE+x} ]; then echo "r'${VIASH_PAR_TYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'id_attribute': $( if [ ! -z ${VIASH_PAR_ID_ATTRIBUTE+x} ]; then echo "r'${VIASH_PAR_ID_ATTRIBUTE//\\'/\\'\\"\\'\\"r\\'}'.split(':')"; else echo None; fi ), + 'additional_attributes': $( if [ ! -z ${VIASH_PAR_ADDITIONAL_ATTRIBUTES+x} ]; then echo "r'${VIASH_PAR_ADDITIONAL_ATTRIBUTES//\\'/\\'\\"\\'\\"r\\'}'.split(':')"; else echo None; fi ), + 'add_chromosome_info': $( if [ ! -z ${VIASH_PAR_ADD_CHROMOSOME_INFO+x} ]; then echo "r'${VIASH_PAR_ADD_CHROMOSOME_INFO//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), + 'mode': $( if [ ! -z ${VIASH_PAR_MODE+x} ]; then echo "r'${VIASH_PAR_MODE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'non_unique': $( if [ ! -z ${VIASH_PAR_NON_UNIQUE+x} ]; then echo "r'${VIASH_PAR_NON_UNIQUE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'secondary_alignments': $( if [ ! -z ${VIASH_PAR_SECONDARY_ALIGNMENTS+x} ]; then echo "r'${VIASH_PAR_SECONDARY_ALIGNMENTS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'supplementary_alignments': $( if [ ! -z ${VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS+x} ]; then echo "r'${VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'counts_output_sparse': $( if [ ! -z ${VIASH_PAR_COUNTS_OUTPUT_SPARSE+x} ]; then echo "r'${VIASH_PAR_COUNTS_OUTPUT_SPARSE//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +## VIASH END + +######################## +### Helper functions ### +######################## + +# helper function for cheching whether something is a gzip +def is_gz_file(path: Path) -> bool: + with open(path, 'rb') as file: + return file.read(2) == b'\\\\x1f\\\\x8b' + +# if {par_value} is a Path, extract it to a temp_dir_path and return the resulting path +def extract_if_need_be(par_value: Path, temp_dir_path: Path) -> Path: + if par_value.is_file() and tarfile.is_tarfile(par_value): + # Remove two extensions (if they exist) + extaction_dir_name = Path(par_value.stem).stem + unpacked_path = temp_dir_path / extaction_dir_name + print(f' Tar detected; extracting {par_value} to {unpacked_path}', flush=True) + + with tarfile.open(par_value, 'r') as open_tar: + members = open_tar.getmembers() + root_dirs = [member + for member in members + if member.isdir() and member.name != '.' and '/' not in member.name] + # if there is only one root_dir (and there are files in that directory) + # strip that directory name from the destination folder + if len(root_dirs) == 1: + for mem in members: + mem.path = Path(*Path(mem.path).parts[1:]) + members_to_move = [mem for mem in members if mem.path != Path('.')] + open_tar.extractall(unpacked_path, members=members_to_move) + return unpacked_path + + elif par_value.is_file() and is_gz_file(par_value): + # Remove extension (if it exists) + extaction_file_name = Path(par_value.stem) + unpacked_path = temp_dir_path / extaction_file_name + print(f' Gzip detected; extracting {par_value} to {unpacked_path}', flush=True) + + with gzip.open(par_value, 'rb') as f_in: + with open(unpacked_path, 'wb') as f_out: + shutil.copyfileobj(f_in, f_out) + return unpacked_path + + else: + return par_value + +def generate_args(par, config): + # fetch arguments from config + arguments = [ + arg + for group in config["functionality"]["argument_groups"] + for arg in group["arguments"] + ] + + cmd_args = [] + + for arg in arguments: + arg_val = par.get(arg["name"].removeprefix("--")) + orig_arg = arg.get("info", {}).get("orig_arg") + if arg_val and orig_arg: + if not arg.get("multiple", False): + arg_val = [arg_val] + + if arg["type"] in ["boolean_true", "boolean_false"]: + # if argument is a boolean_true or boolean_false, simply add the flag + arg_val = [orig_arg] + elif orig_arg.startswith("-"): + # if the orig arg flag is not a positional, + # add the flag in front of each element and flatten + arg_val = [str(x) for val in arg_val for x in [orig_arg, val]] + + cmd_args.extend(arg_val) + + return cmd_args + +######################## +### Main code ### +######################## + +# read config arguments +config = yaml.safe_load(Path(meta["config"]).read_text()) + + +with tempfile.TemporaryDirectory(prefix="htseq-", dir=meta["temp_dir"]) as temp_dir: + + # checking for compressed files, ungzip files if need be + temp_dir_path = Path(temp_dir) + reference = Path(par["reference"]) + + print(f'>> Check compression of --reference with value: {reference}', flush=True) + par["reference"] = extract_if_need_be(reference, temp_dir_path) + + print(">> Constructing command", flush=True) + cmd_args = [ "htseq-count" ] + generate_args(par, config) + + # manually process cpus parameter + if 'cpus' in meta and meta['cpus']: + cmd_args.extend(["--nprocesses", str(meta["cpus"])]) + + print(">> Running htseq-count with command:", flush=True) + print("+ " + ' '.join([str(x) for x in cmd_args]), flush=True) + + subprocess.run( + cmd_args, + check=True + ) +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/mapping_htseq_count", + "tag" : "0.12.0" + }, + "label" : [ + "highmem", + "highcpu" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/mapping/htseq_count/nextflow.config b/target/nextflow/mapping/htseq_count/nextflow.config new file mode 100644 index 00000000000..c652b269db6 --- /dev/null +++ b/target/nextflow/mapping/htseq_count/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'htseq_count' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Quantify gene expression for subsequent testing for differential expression.\n\nThis script takes one or more alignment files in SAM/BAM format and a feature file in GFF format and calculates for each feature the number of reads mapping to it. \n\nSee http://htseq.readthedocs.io/en/master/count.html for details.\n' + author = 'Robrecht Cannoodt, Angela Oliveira Pisco' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/mapping/htseq_count/nextflow_params.yaml b/target/nextflow/mapping/htseq_count/nextflow_params.yaml new file mode 100644 index 00000000000..55b2152e8ff --- /dev/null +++ b/target/nextflow/mapping/htseq_count/nextflow_params.yaml @@ -0,0 +1,27 @@ +# Arguments +order: "name" +stranded: "yes" +minimum_alignment_quality: 10 +# type: "exon" +# id_attribute: ["gene_id"] +# additional_attributes: ["gene_name"] +add_chromosome_info: false +mode: "union" +non_unique: "none" +# secondary_alignments: "foo" +# supplementary_alignments: "foo" +counts_output_sparse: false + +# Input +input: # please fill in - example: ["mysample1.BAM", "mysample2.BAM"] +reference: # please fill in - example: "reference.gtf" + +# Output +# output: "$id.$key.output.tsv" +# output_delimiter: " " +# output_sam: ["$id.$key.output_sam_*.BAM"] +# output_sam_format: "foo" + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/mapping/htseq_count/nextflow_schema.json b/target/nextflow/mapping/htseq_count/nextflow_schema.json new file mode 100644 index 00000000000..d426d557a63 --- /dev/null +++ b/target/nextflow/mapping/htseq_count/nextflow_schema.json @@ -0,0 +1,291 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "htseq_count", +"description": "Quantify gene expression for subsequent testing for differential expression.\n\nThis script takes one or more alignment files in SAM/BAM format and a feature file in GFF format and calculates for each feature the number of reads mapping to it. \n\nSee http://htseq.readthedocs.io/en/master/count.html for details.\n", +"type": "object", +"definitions": { + + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "order": { + "type": + "string", + "description": "Type: `string`, default: `name`, choices: ``pos`, `name``. Sorting order of \u003calignment_file\u003e", + "help_text": "Type: `string`, default: `name`, choices: ``pos`, `name``. Sorting order of \u003calignment_file\u003e. Paired-end sequencing data must be sorted either by position or\nby read name, and the sorting order must be specified. Ignored for single-end data.\n", + "enum": ["pos", "name"] + + , + "default": "name" + } + + + , + "stranded": { + "type": + "string", + "description": "Type: `string`, default: `yes`, choices: ``yes`, `no`, `reverse``. Whether the data is from a strand-specific assay", + "help_text": "Type: `string`, default: `yes`, choices: ``yes`, `no`, `reverse``. Whether the data is from a strand-specific assay. \u0027reverse\u0027 means \u0027yes\u0027 with reversed strand interpretation.", + "enum": ["yes", "no", "reverse"] + + , + "default": "yes" + } + + + , + "minimum_alignment_quality": { + "type": + "integer", + "description": "Type: `integer`, default: `10`. Skip all reads with MAPQ alignment quality lower than the given minimum value", + "help_text": "Type: `integer`, default: `10`. Skip all reads with MAPQ alignment quality lower than the given minimum value. \nMAPQ is the 5th column of a SAM/BAM file and its usage depends on the software \nused to map the reads.\n" + , + "default": "10" + } + + + , + "type": { + "type": + "string", + "description": "Type: `string`, example: `exon`. Feature type (3rd column in GTF file) to be used, all features of other type are ignored (default, suitable for Ensembl GTF files: exon)", + "help_text": "Type: `string`, example: `exon`. Feature type (3rd column in GTF file) to be used, all features of other type are ignored (default, suitable for Ensembl GTF files: exon)" + + } + + + , + "id_attribute": { + "type": + "string", + "description": "Type: List of `string`, example: `gene_id`, multiple_sep: `\":\"`. GTF attribute to be used as feature ID (default, suitable for Ensembl GTF files: gene_id)", + "help_text": "Type: List of `string`, example: `gene_id`, multiple_sep: `\":\"`. GTF attribute to be used as feature ID (default, suitable for Ensembl GTF files: gene_id).\nAll feature of the right type (see -t option) within the same GTF attribute will be added\ntogether. The typical way of using this option is to count all exonic reads from each gene\nand add the exons but other uses are possible as well. You can call this option multiple\ntimes: in that case, the combination of all attributes separated by colons (:) will be used\nas a unique identifier, e.g. for exons you might use -i gene_id -i exon_number.\n" + + } + + + , + "additional_attributes": { + "type": + "string", + "description": "Type: List of `string`, example: `gene_name`, multiple_sep: `\":\"`. Additional feature attributes (suitable for Ensembl GTF files: gene_name)", + "help_text": "Type: List of `string`, example: `gene_name`, multiple_sep: `\":\"`. Additional feature attributes (suitable for Ensembl GTF files: gene_name). Use multiple times\nfor more than one additional attribute. These attributes are only used as annotations in the\noutput, while the determination of how the counts are added together is done based on option -i.\n" + + } + + + , + "add_chromosome_info": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Store information about the chromosome of each feature as an additional attribute\n(e", + "help_text": "Type: `boolean_true`, default: `false`. Store information about the chromosome of each feature as an additional attribute\n(e.g. colunm in the TSV output file).\n" + , + "default": "False" + } + + + , + "mode": { + "type": + "string", + "description": "Type: `string`, default: `union`, choices: ``union`, `intersection-strict`, `intersection-nonempty``. Mode to handle reads overlapping more than one feature", + "help_text": "Type: `string`, default: `union`, choices: ``union`, `intersection-strict`, `intersection-nonempty``. Mode to handle reads overlapping more than one feature.", + "enum": ["union", "intersection-strict", "intersection-nonempty"] + + , + "default": "union" + } + + + , + "non_unique": { + "type": + "string", + "description": "Type: `string`, default: `none`, choices: ``none`, `all`, `fraction`, `random``. Whether and how to score reads that are not uniquely aligned or ambiguously assigned to features", + "help_text": "Type: `string`, default: `none`, choices: ``none`, `all`, `fraction`, `random``. Whether and how to score reads that are not uniquely aligned or ambiguously assigned to features.", + "enum": ["none", "all", "fraction", "random"] + + , + "default": "none" + } + + + , + "secondary_alignments": { + "type": + "string", + "description": "Type: `string`, choices: ``score`, `ignore``. Whether to score secondary alignments (0x100 flag)", + "help_text": "Type: `string`, choices: ``score`, `ignore``. Whether to score secondary alignments (0x100 flag).", + "enum": ["score", "ignore"] + + + } + + + , + "supplementary_alignments": { + "type": + "string", + "description": "Type: `string`, choices: ``score`, `ignore``. Whether to score supplementary alignments (0x800 flag)", + "help_text": "Type: `string`, choices: ``score`, `ignore``. Whether to score supplementary alignments (0x800 flag).", + "enum": ["score", "ignore"] + + + } + + + , + "counts_output_sparse": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Store the counts as a sparse matrix (mtx, h5ad, loom)", + "help_text": "Type: `boolean_true`, default: `false`. Store the counts as a sparse matrix (mtx, h5ad, loom)." + , + "default": "False" + } + + +} +}, + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: List of `file`, required, example: `mysample1.BAM;mysample2.BAM`, multiple_sep: `\";\"`. Path to the SAM/BAM files containing the mapped reads", + "help_text": "Type: List of `file`, required, example: `mysample1.BAM;mysample2.BAM`, multiple_sep: `\";\"`. Path to the SAM/BAM files containing the mapped reads." + + } + + + , + "reference": { + "type": + "string", + "description": "Type: `file`, required, example: `reference.gtf`. Path to the GTF file containing the features", + "help_text": "Type: `file`, required, example: `reference.gtf`. Path to the GTF file containing the features." + + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.tsv`, example: `htseq-count.tsv`. Filename to output the counts to", + "help_text": "Type: `file`, required, default: `$id.$key.output.tsv`, example: `htseq-count.tsv`. Filename to output the counts to." + , + "default": "$id.$key.output.tsv" + } + + + , + "output_delimiter": { + "type": + "string", + "description": "Type: `string`, example: `\t`. Column delimiter in output", + "help_text": "Type: `string`, example: `\t`. Column delimiter in output." + + } + + + , + "output_sam": { + "type": + "string", + "description": "Type: List of `file`, default: `$id.$key.output_sam_*.BAM`, example: `mysample1_out.BAM;mysample2_out.BAM`, multiple_sep: `\";\"`. Write out all SAM alignment records into SAM/BAM files (one per input file needed), \nannotating each line with its feature assignment (as an optional field with tag \u0027XF\u0027)", + "help_text": "Type: List of `file`, default: `$id.$key.output_sam_*.BAM`, example: `mysample1_out.BAM;mysample2_out.BAM`, multiple_sep: `\";\"`. Write out all SAM alignment records into SAM/BAM files (one per input file needed), \nannotating each line with its feature assignment (as an optional field with tag \u0027XF\u0027). \nSee the -p option to use BAM instead of SAM.\n" + , + "default": "$id.$key.output_sam_*.BAM" + } + + + , + "output_sam_format": { + "type": + "string", + "description": "Type: `string`, choices: ``sam`, `bam``. Format to use with the --output_sam argument", + "help_text": "Type: `string`, choices: ``sam`, `bam``. Format to use with the --output_sam argument.", + "enum": ["sam", "bam"] + + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/mapping/htseq_count_to_h5mu/.config.vsh.yaml b/target/nextflow/mapping/htseq_count_to_h5mu/.config.vsh.yaml new file mode 100644 index 00000000000..7c3a327a49f --- /dev/null +++ b/target/nextflow/mapping/htseq_count_to_h5mu/.config.vsh.yaml @@ -0,0 +1,209 @@ +functionality: + name: "htseq_count_to_h5mu" + namespace: "mapping" + version: "0.12.4" + authors: + - name: "Robrecht Cannoodt" + roles: + - "author" + - "maintainer" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + - name: "Angela Oliveira Pisco" + roles: + - "author" + info: + role: "Contributor" + links: + github: "aopisco" + orcid: "0000-0003-0142-2355" + linkedin: "aopisco" + organizations: + - name: "Insitro" + href: "https://insitro.com" + role: "Director of Computational Biology" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + argument_groups: + - name: "Input" + arguments: + - type: "string" + name: "--input_id" + description: "The obs index for the counts" + info: null + example: + - "foo" + required: true + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "file" + name: "--input_counts" + description: "The counts as a TSV file as output by HTSeq." + info: null + example: + - "counts.tsv" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "file" + name: "--reference" + description: "The GTF file." + info: null + example: + - "gencode_v41_star" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Outputs" + arguments: + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output h5mu file." + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + description: "Convert the htseq table to a h5mu.\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/cellranger_tiny_fastq" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "gtfparse" + - "polars[pyarrow] < 0.16.14" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highmem" + - "midcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/mapping/htseq_count_to_h5mu/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/htseq_count_to_h5mu" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/htseq_count_to_h5mu/htseq_count_to_h5mu" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/mapping/htseq_count_to_h5mu/main.nf b/target/nextflow/mapping/htseq_count_to_h5mu/main.nf new file mode 100644 index 00000000000..ccfb32f36c6 --- /dev/null +++ b/target/nextflow/mapping/htseq_count_to_h5mu/main.nf @@ -0,0 +1,2710 @@ +// htseq_count_to_h5mu 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Robrecht Cannoodt (author, maintainer) +// * Angela Oliveira Pisco (author) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "htseq_count_to_h5mu", + "namespace" : "mapping", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Robrecht Cannoodt", + "roles" : [ + "author", + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "robrecht@data-intuitive.com", + "github" : "rcannood", + "orcid" : "0000-0003-3641-729X", + "linkedin" : "robrechtcannoodt" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Science Engineer" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + }, + { + "name" : "Angela Oliveira Pisco", + "roles" : [ + "author" + ], + "info" : { + "role" : "Contributor", + "links" : { + "github" : "aopisco", + "orcid" : "0000-0003-0142-2355", + "linkedin" : "aopisco" + }, + "organizations" : [ + { + "name" : "Insitro", + "href" : "https://insitro.com", + "role" : "Director of Computational Biology" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "string", + "name" : "--input_id", + "description" : "The obs index for the counts", + "example" : [ + "foo" + ], + "required" : true, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--input_counts", + "description" : "The counts as a TSV file as output by HTSeq.", + "example" : [ + "counts.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--reference", + "description" : "The GTF file.", + "example" : [ + "gencode_v41_star" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Outputs", + "arguments" : [ + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "-o" + ], + "description" : "Output h5mu file.", + "example" : [ + "output.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_compression", + "description" : "The compression format to be used on the output h5mu object.", + "example" : [ + "gzip" + ], + "required" : false, + "choices" : [ + "gzip", + "lzf" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/htseq_count_to_h5mu/" + } + ], + "description" : "Convert the htseq table to a h5mu.\n", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/htseq_count_to_h5mu/" + }, + { + "type" : "file", + "path" : "resources_test/cellranger_tiny_fastq", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.10-slim", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "procps" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "mudata~=0.2.3", + "anndata~=0.9.1", + "gtfparse", + "polars[pyarrow] < 0.16.14" + ], + "upgrade" : true + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "highmem", + "midcpu" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/mapping/htseq_count_to_h5mu/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/htseq_count_to_h5mu", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +import tempfile +from pathlib import Path +import tarfile +import gzip +import shutil +import pandas as pd +import mudata as md +import anndata as ad +import polars as pl +import numpy as np +import gtfparse + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input_id': $( if [ ! -z ${VIASH_PAR_INPUT_ID+x} ]; then echo "r'${VIASH_PAR_INPUT_ID//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'input_counts': $( if [ ! -z ${VIASH_PAR_INPUT_COUNTS+x} ]; then echo "r'${VIASH_PAR_INPUT_COUNTS//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'reference': $( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "r'${VIASH_PAR_REFERENCE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +## VIASH END + +######################## +### Helper functions ### +######################## + +# helper function for cheching whether something is a gzip +def is_gz_file(path: Path) -> bool: + with open(path, 'rb') as file: + return file.read(2) == b'\\\\x1f\\\\x8b' + +# if {par_value} is a Path, extract it to a temp_dir_path and return the resulting path +def extract_if_need_be(par_value: Path, temp_dir_path: Path) -> Path: + if par_value.is_file() and tarfile.is_tarfile(par_value): + # Remove two extensions (if they exist) + extaction_dir_name = Path(par_value.stem).stem + unpacked_path = temp_dir_path / extaction_dir_name + print(f' Tar detected; extracting {par_value} to {unpacked_path}', flush=True) + + with tarfile.open(par_value, 'r') as open_tar: + members = open_tar.getmembers() + root_dirs = [member + for member in members + if member.isdir() and member.name != '.' and '/' not in member.name] + # if there is only one root_dir (and there are files in that directory) + # strip that directory name from the destination folder + if len(root_dirs) == 1: + for mem in members: + mem.path = Path(*Path(mem.path).parts[1:]) + members_to_move = [mem for mem in members if mem.path != Path('.')] + open_tar.extractall(unpacked_path, members=members_to_move) + return unpacked_path + + elif par_value.is_file() and is_gz_file(par_value): + # Remove extension (if it exists) + extaction_file_name = Path(par_value.stem) + unpacked_path = temp_dir_path / extaction_file_name + print(f' Gzip detected; extracting {par_value} to {unpacked_path}', flush=True) + + with gzip.open(par_value, 'rb') as f_in: + with open(unpacked_path, 'wb') as f_out: + shutil.copyfileobj(f_in, f_out) + return unpacked_path + + else: + return par_value + + +print("> combine counts data", flush=True) +counts_data = [] + +for input_id, input_counts in zip(par["input_id"], par["input_counts"]): + data = pd.read_table(input_counts, index_col=0, names=["gene_ids", input_id], dtype={'gene_ids': 'U', input_id: 'i'}).transpose() + counts_data.append(data) + +# combine all counts +counts_and_qc = pd.concat(counts_data, axis=0) + +print("> split qc", flush=True) +idx = counts_and_qc.columns.str.startswith("_") +qc = counts_and_qc.loc[:,idx] +qc.columns = qc.columns.str.replace("^__", "", regex=True) +counts = counts_and_qc.loc[:,~idx] + +print("> construct var", flush=True) +with tempfile.TemporaryDirectory(prefix="htseq-", dir=meta["temp_dir"]) as temp_dir: + # checking for compressed files, ungzip files if need be + temp_dir_path = Path(temp_dir) + reference = Path(par["reference"]) + + print(f'>> Check compression of --reference with value: {reference}', flush=True) + par["reference"] = extract_if_need_be(reference, temp_dir_path) + + # read_gtf only works on str object, not pathlib.Path + reference = gtfparse.read_gtf(str(par["reference"])) + + +# This is a polars dataframe, not pandas +reference_genes = reference.filter((pl.col("feature") == "gene") & + (pl.col("gene_id").is_in(list(counts.columns))))\\\\ + .sort("gene_id") + +var = pd.DataFrame( + data={ + "gene_ids": pd.Index(reference_genes.get_column("gene_id")), + "feature_types": "Gene Expression", + "gene_symbol": reference_genes.get_column("gene_name").to_pandas(), + } +).set_index("gene_ids") + +print("> construct anndata", flush=True) +adata = ad.AnnData( + X=counts, + obsm={"qc_htseq": qc}, + var=var, + dtype=np.int32 +) + +print("> convert to mudata", flush=True) +mdata = md.MuData(adata) + +print("> write to file", flush=True) +mdata.write_h5mu(par["output"], compression=par["output_compression"]) +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/mapping_htseq_count_to_h5mu", + "tag" : "0.12.0" + }, + "label" : [ + "highmem", + "midcpu" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/mapping/htseq_count_to_h5mu/nextflow.config b/target/nextflow/mapping/htseq_count_to_h5mu/nextflow.config new file mode 100644 index 00000000000..6f5cfc8c813 --- /dev/null +++ b/target/nextflow/mapping/htseq_count_to_h5mu/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'htseq_count_to_h5mu' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Convert the htseq table to a h5mu.\n' + author = 'Robrecht Cannoodt, Angela Oliveira Pisco' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/mapping/htseq_count_to_h5mu/nextflow_params.yaml b/target/nextflow/mapping/htseq_count_to_h5mu/nextflow_params.yaml new file mode 100644 index 00000000000..004665f9c67 --- /dev/null +++ b/target/nextflow/mapping/htseq_count_to_h5mu/nextflow_params.yaml @@ -0,0 +1,12 @@ +# Outputs +# output: "$id.$key.output.h5mu" +# output_compression: "gzip" + +# Input +input_id: # please fill in - example: ["foo"] +input_counts: # please fill in - example: ["counts.tsv"] +reference: # please fill in - example: "gencode_v41_star" + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/mapping/htseq_count_to_h5mu/nextflow_schema.json b/target/nextflow/mapping/htseq_count_to_h5mu/nextflow_schema.json new file mode 100644 index 00000000000..290bc0b9f25 --- /dev/null +++ b/target/nextflow/mapping/htseq_count_to_h5mu/nextflow_schema.json @@ -0,0 +1,127 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "htseq_count_to_h5mu", +"description": "Convert the htseq table to a h5mu.\n", +"type": "object", +"definitions": { + + + + "outputs" : { + "title": "Outputs", + "type": "object", + "description": "No description", + "properties": { + + + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file", + "help_text": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file." + , + "default": "$id.$key.output.h5mu" + } + + + , + "output_compression": { + "type": + "string", + "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", + "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", + "enum": ["gzip", "lzf"] + + + } + + +} +}, + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "input_id": { + "type": + "string", + "description": "Type: List of `string`, required, example: `foo`, multiple_sep: `\";\"`. The obs index for the counts", + "help_text": "Type: List of `string`, required, example: `foo`, multiple_sep: `\";\"`. The obs index for the counts" + + } + + + , + "input_counts": { + "type": + "string", + "description": "Type: List of `file`, required, example: `counts.tsv`, multiple_sep: `\";\"`. The counts as a TSV file as output by HTSeq", + "help_text": "Type: List of `file`, required, example: `counts.tsv`, multiple_sep: `\";\"`. The counts as a TSV file as output by HTSeq." + + } + + + , + "reference": { + "type": + "string", + "description": "Type: `file`, required, example: `gencode_v41_star`. The GTF file", + "help_text": "Type: `file`, required, example: `gencode_v41_star`. The GTF file." + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/outputs" + }, + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/mapping/multi_star/.config.vsh.yaml b/target/nextflow/mapping/multi_star/.config.vsh.yaml new file mode 100644 index 00000000000..2fcfe48d2e0 --- /dev/null +++ b/target/nextflow/mapping/multi_star/.config.vsh.yaml @@ -0,0 +1,3080 @@ +functionality: + name: "multi_star" + namespace: "mapping" + version: "0.12.4" + authors: + - name: "Angela Oliveira Pisco" + roles: + - "author" + info: + role: "Contributor" + links: + github: "aopisco" + orcid: "0000-0003-0142-2355" + linkedin: "aopisco" + organizations: + - name: "Insitro" + href: "https://insitro.com" + role: "Director of Computational Biology" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + - name: "Robrecht Cannoodt" + roles: + - "author" + - "maintainer" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + argument_groups: + - name: "Input/Output" + arguments: + - type: "string" + name: "--input_id" + description: "The ID of the sample being processed. This vector should have\ + \ the same length as the `--input_r1` argument." + info: null + example: + - "mysample" + - "mysample" + required: true + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "file" + name: "--input_r1" + description: "Paths to the sequences to be mapped. If using Illumina paired-end\ + \ reads, only the R1 files should be passed." + info: null + example: + - "mysample_S1_L001_R1_001.fastq.gz" + - "mysample_S1_L002_R1_001.fastq.gz" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "file" + name: "--input_r2" + description: "Paths to the sequences to be mapped. If using Illumina paired-end\ + \ reads, only the R2 files should be passed." + info: null + example: + - "mysample_S1_L001_R2_001.fastq.gz" + - "mysample_S1_L002_R2_001.fastq.gz" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "file" + name: "--reference_index" + alternatives: + - "--genomeDir" + description: "Path to the reference built by star_build_reference. Corresponds\ + \ to the --genomeDir argument in the STAR command." + info: null + example: + - "/path/to/reference" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--reference_gtf" + description: "Path to the gtf reference file." + info: null + example: + - "genes.gtf" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "--outFileNamePrefix" + description: "Path to output directory. Corresponds to the --outFileNamePrefix\ + \ argument in the STAR command." + info: null + example: + - "/path/to/foo" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Processing arguments" + arguments: + - type: "boolean" + name: "--run_htseq_count" + description: "Whether or not to also run htseq-count after STAR." + info: null + default: + - true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean" + name: "--run_multiqc" + description: "Whether or not to also run MultiQC at the end." + info: null + default: + - true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--min_success_rate" + description: "Fail when the success rate is below this threshold." + info: null + default: + - 0.5 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Run Parameters" + arguments: + - type: "integer" + name: "--runRNGseed" + description: "random number generator seed." + info: + step: "star" + orig_arg: "--runRNGseed" + example: + - 777 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Genome Parameters" + arguments: + - type: "file" + name: "--genomeFastaFiles" + description: "path(s) to the fasta files with the genome sequences, separated\ + \ by spaces. These files should be plain text FASTA files, they *cannot* be\ + \ zipped.\n\nRequired for the genome generation (--runMode genomeGenerate).\ + \ Can also be used in the mapping (--runMode alignReads) to add extra (new)\ + \ sequences to the genome (e.g. spike-ins)." + info: + step: "star" + orig_arg: "--genomeFastaFiles" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - name: "Splice Junctions Database" + arguments: + - type: "string" + name: "--sjdbFileChrStartEnd" + description: "path to the files with genomic coordinates (chr start \ + \ end strand) for the splice junction introns. Multiple files can be\ + \ supplied and will be concatenated." + info: + step: "star" + orig_arg: "--sjdbFileChrStartEnd" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "file" + name: "--sjdbGTFfile" + description: "path to the GTF file with annotations" + info: + step: "star" + orig_arg: "--sjdbGTFfile" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--sjdbGTFchrPrefix" + description: "prefix for chromosome names in a GTF file (e.g. 'chr' for using\ + \ ENSMEBL annotations with UCSC genomes)" + info: + step: "star" + orig_arg: "--sjdbGTFchrPrefix" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--sjdbGTFfeatureExon" + description: "feature type in GTF file to be used as exons for building transcripts" + info: + step: "star" + orig_arg: "--sjdbGTFfeatureExon" + example: + - "exon" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--sjdbGTFtagExonParentTranscript" + description: "GTF attribute name for parent transcript ID (default \"transcript_id\"\ + \ works for GTF files)" + info: + step: "star" + orig_arg: "--sjdbGTFtagExonParentTranscript" + example: + - "transcript_id" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--sjdbGTFtagExonParentGene" + description: "GTF attribute name for parent gene ID (default \"gene_id\" works\ + \ for GTF files)" + info: + step: "star" + orig_arg: "--sjdbGTFtagExonParentGene" + example: + - "gene_id" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--sjdbGTFtagExonParentGeneName" + description: "GTF attribute name for parent gene name" + info: + step: "star" + orig_arg: "--sjdbGTFtagExonParentGeneName" + example: + - "gene_name" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--sjdbGTFtagExonParentGeneType" + description: "GTF attribute name for parent gene type" + info: + step: "star" + orig_arg: "--sjdbGTFtagExonParentGeneType" + example: + - "gene_type" + - "gene_biotype" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--sjdbOverhang" + description: "length of the donor/acceptor sequence on each side of the junctions,\ + \ ideally = (mate_length - 1)" + info: + step: "star" + orig_arg: "--sjdbOverhang" + example: + - 100 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--sjdbScore" + description: "extra alignment score for alignments that cross database junctions" + info: + step: "star" + orig_arg: "--sjdbScore" + example: + - 2 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--sjdbInsertSave" + description: "which files to save when sjdb junctions are inserted on the fly\ + \ at the mapping step\n\n- Basic ... only small junction / transcript files\n\ + - All ... all files including big Genome, SA and SAindex - this will create\ + \ a complete genome directory" + info: + step: "star" + orig_arg: "--sjdbInsertSave" + example: + - "Basic" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Variation parameters" + arguments: + - type: "string" + name: "--varVCFfile" + description: "path to the VCF file that contains variation data. The 10th column\ + \ should contain the genotype information, e.g. 0/1" + info: + step: "star" + orig_arg: "--varVCFfile" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Read Parameters" + arguments: + - type: "string" + name: "--readFilesType" + description: "format of input read files\n\n- Fastx ... FASTA or FASTQ\n\ + - SAM SE ... SAM or BAM single-end reads; for BAM use --readFilesCommand\ + \ samtools view\n- SAM PE ... SAM or BAM paired-end reads; for BAM use\ + \ --readFilesCommand samtools view" + info: + step: "star" + orig_arg: "--readFilesType" + example: + - "Fastx" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--readFilesSAMattrKeep" + description: "for --readFilesType SAM SE/PE, which SAM tags to keep in the output\ + \ BAM, e.g.: --readFilesSAMtagsKeep RG PL\n\n- All ... keep all tags\n\ + - None ... do not keep any tags" + info: + step: "star" + orig_arg: "--readFilesSAMattrKeep" + example: + - "All" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "file" + name: "--readFilesManifest" + description: "path to the \"manifest\" file with the names of read files. The\ + \ manifest file should contain 3 tab-separated columns:\n\npaired-end reads:\ + \ read1_file_name $tab$ read2_file_name $tab$ read_group_line.\nsingle-end\ + \ reads: read1_file_name $tab$ - $tab$ read_group_line.\nSpaces,\ + \ but not tabs are allowed in file names.\nIf read_group_line does not start\ + \ with ID:, it can only contain one ID field, and ID: will be added to it.\n\ + If read_group_line starts with ID:, it can contain several fields separated\ + \ by $tab$, and all fields will be be copied verbatim into SAM @RG header\ + \ line." + info: + step: "star" + orig_arg: "--readFilesManifest" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--readFilesPrefix" + description: "prefix for the read files names, i.e. it will be added in front\ + \ of the strings in --readFilesIn" + info: + step: "star" + orig_arg: "--readFilesPrefix" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--readFilesCommand" + description: "command line to execute for each of the input file. This command\ + \ should generate FASTA or FASTQ text and send it to stdout\n\nFor example:\ + \ zcat - to uncompress .gz files, bzcat - to uncompress .bz2 files, etc." + info: + step: "star" + orig_arg: "--readFilesCommand" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--readMapNumber" + description: "number of reads to map from the beginning of the file\n\n-1: map\ + \ all reads" + info: + step: "star" + orig_arg: "--readMapNumber" + example: + - -1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--readMatesLengthsIn" + description: "Equal/NotEqual - lengths of names,sequences,qualities for both\ + \ mates are the same / not the same. NotEqual is safe in all situations." + info: + step: "star" + orig_arg: "--readMatesLengthsIn" + example: + - "NotEqual" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--readNameSeparator" + description: "character(s) separating the part of the read names that will be\ + \ trimmed in output (read name after space is always trimmed)" + info: + step: "star" + orig_arg: "--readNameSeparator" + example: + - "/" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--readQualityScoreBase" + description: "number to be subtracted from the ASCII code to get Phred quality\ + \ score" + info: + step: "star" + orig_arg: "--readQualityScoreBase" + example: + - 33 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Read Clipping" + arguments: + - type: "string" + name: "--clipAdapterType" + description: "adapter clipping type\n\n- Hamming ... adapter clipping based\ + \ on Hamming distance, with the number of mismatches controlled by --clip5pAdapterMMp\n\ + - CellRanger4 ... 5p and 3p adapter clipping similar to CellRanger4. Utilizes\ + \ Opal package by Martin Sosic: https://github.com/Martinsos/opal\n- None\ + \ ... no adapter clipping, all other clip* parameters are disregarded" + info: + step: "star" + orig_arg: "--clipAdapterType" + example: + - "Hamming" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--clip3pNbases" + description: "number(s) of bases to clip from 3p of each mate. If one value\ + \ is given, it will be assumed the same for both mates." + info: + step: "star" + orig_arg: "--clip3pNbases" + example: + - 0 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--clip3pAdapterSeq" + description: "adapter sequences to clip from 3p of each mate. If one value\ + \ is given, it will be assumed the same for both mates.\n\n- polyA ... polyA\ + \ sequence with the length equal to read length" + info: + step: "star" + orig_arg: "--clip3pAdapterSeq" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "double" + name: "--clip3pAdapterMMp" + description: "max proportion of mismatches for 3p adapter clipping for each\ + \ mate. If one value is given, it will be assumed the same for both mates." + info: + step: "star" + orig_arg: "--clip3pAdapterMMp" + example: + - 0.1 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--clip3pAfterAdapterNbases" + description: "number of bases to clip from 3p of each mate after the adapter\ + \ clipping. If one value is given, it will be assumed the same for both mates." + info: + step: "star" + orig_arg: "--clip3pAfterAdapterNbases" + example: + - 0 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--clip5pNbases" + description: "number(s) of bases to clip from 5p of each mate. If one value\ + \ is given, it will be assumed the same for both mates." + info: + step: "star" + orig_arg: "--clip5pNbases" + example: + - 0 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - name: "Limits" + arguments: + - type: "long" + name: "--limitGenomeGenerateRAM" + description: "maximum available RAM (bytes) for genome generation" + info: + step: "star" + orig_arg: "--limitGenomeGenerateRAM" + example: + - 31000000000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "long" + name: "--limitIObufferSize" + description: "max available buffers size (bytes) for input/output, per thread" + info: + step: "star" + orig_arg: "--limitIObufferSize" + example: + - 30000000 + - 50000000 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "long" + name: "--limitOutSAMoneReadBytes" + description: "max size of the SAM record (bytes) for one read. Recommended value:\ + \ >(2*(LengthMate1+LengthMate2+100)*outFilterMultimapNmax" + info: + step: "star" + orig_arg: "--limitOutSAMoneReadBytes" + example: + - 100000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--limitOutSJoneRead" + description: "max number of junctions for one read (including all multi-mappers)" + info: + step: "star" + orig_arg: "--limitOutSJoneRead" + example: + - 1000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--limitOutSJcollapsed" + description: "max number of collapsed junctions" + info: + step: "star" + orig_arg: "--limitOutSJcollapsed" + example: + - 1000000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "long" + name: "--limitBAMsortRAM" + description: "maximum available RAM (bytes) for sorting BAM. If =0, it will\ + \ be set to the genome index size. 0 value can only be used with --genomeLoad\ + \ NoSharedMemory option." + info: + step: "star" + orig_arg: "--limitBAMsortRAM" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--limitSjdbInsertNsj" + description: "maximum number of junctions to be inserted to the genome on the\ + \ fly at the mapping stage, including those from annotations and those detected\ + \ in the 1st step of the 2-pass run" + info: + step: "star" + orig_arg: "--limitSjdbInsertNsj" + example: + - 1000000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--limitNreadsSoft" + description: "soft limit on the number of reads" + info: + step: "star" + orig_arg: "--limitNreadsSoft" + example: + - -1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Output: general" + arguments: + - type: "string" + name: "--outTmpKeep" + description: "whether to keep the temporary files after STAR runs is finished\n\ + \n- None ... remove all temporary files\n- All ... keep all files" + info: + step: "star" + orig_arg: "--outTmpKeep" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outStd" + description: "which output will be directed to stdout (standard out)\n\n- Log\ + \ ... log messages\n- SAM ... alignments\ + \ in SAM format (which normally are output to Aligned.out.sam file), normal\ + \ standard output will go into Log.std.out\n- BAM_Unsorted ... alignments\ + \ in BAM format, unsorted. Requires --outSAMtype BAM Unsorted\n- BAM_SortedByCoordinate\ + \ ... alignments in BAM format, sorted by coordinate. Requires --outSAMtype\ + \ BAM SortedByCoordinate\n- BAM_Quant ... alignments to transcriptome\ + \ in BAM format, unsorted. Requires --quantMode TranscriptomeSAM" + info: + step: "star" + orig_arg: "--outStd" + example: + - "Log" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outReadsUnmapped" + description: "output of unmapped and partially mapped (i.e. mapped only one\ + \ mate of a paired end read) reads in separate file(s).\n\n- None ... no\ + \ output\n- Fastx ... output in separate fasta/fastq files, Unmapped.out.mate1/2" + info: + step: "star" + orig_arg: "--outReadsUnmapped" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outQSconversionAdd" + description: "add this number to the quality score (e.g. to convert from Illumina\ + \ to Sanger, use -31)" + info: + step: "star" + orig_arg: "--outQSconversionAdd" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outMultimapperOrder" + description: "order of multimapping alignments in the output files\n\n- Old_2.4\ + \ ... quasi-random order used before 2.5.0\n- Random \ + \ ... random order of alignments for each multi-mapper. Read mates (pairs)\ + \ are always adjacent, all alignment for each read stay together. This option\ + \ will become default in the future releases." + info: + step: "star" + orig_arg: "--outMultimapperOrder" + example: + - "Old_2.4" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Output: SAM and BAM" + arguments: + - type: "string" + name: "--outSAMmode" + description: "mode of SAM output\n\n- None ... no SAM output\n- Full ... full\ + \ SAM output\n- NoQS ... full SAM but without quality scores" + info: + step: "star" + orig_arg: "--outSAMmode" + example: + - "Full" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outSAMstrandField" + description: "Cufflinks-like strand field flag\n\n- None ... not used\n\ + - intronMotif ... strand derived from the intron motif. This option changes\ + \ the output alignments: reads with inconsistent and/or non-canonical introns\ + \ are filtered out." + info: + step: "star" + orig_arg: "--outSAMstrandField" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outSAMattributes" + description: "a string of desired SAM attributes, in the order desired for the\ + \ output SAM. Tags can be listed in any combination/order.\n\n***Presets:\n\ + - None ... no attributes\n- Standard ... NH HI AS nM\n- All \ + \ ... NH HI AS nM NM MD jM jI MC ch\n***Alignment:\n- NH ...\ + \ number of loci the reads maps to: =1 for unique mappers, >1 for multimappers.\ + \ Standard SAM tag.\n- HI ... multiple alignment index, starts with\ + \ --outSAMattrIHstart (=1 by default). Standard SAM tag.\n- AS ...\ + \ local alignment score, +1/-1 for matches/mismateches, score* penalties for\ + \ indels and gaps. For PE reads, total score for two mates. Stadnard SAM tag.\n\ + - nM ... number of mismatches. For PE reads, sum over two mates.\n\ + - NM ... edit distance to the reference (number of mismatched + inserted\ + \ + deleted bases) for each mate. Standard SAM tag.\n- MD ... string\ + \ encoding mismatched and deleted reference bases (see standard SAM specifications).\ + \ Standard SAM tag.\n- jM ... intron motifs for all junctions (i.e.\ + \ N in CIGAR): 0: non-canonical; 1: GT/AG, 2: CT/AC, 3: GC/AG, 4: CT/GC, 5:\ + \ AT/AC, 6: GT/AT. If splice junctions database is used, and a junction is\ + \ annotated, 20 is added to its motif value.\n- jI ... start and\ + \ end of introns for all junctions (1-based).\n- XS ... alignment\ + \ strand according to --outSAMstrandField.\n- MC ... mate's CIGAR\ + \ string. Standard SAM tag.\n- ch ... marks all segment of all chimeric\ + \ alingments for --chimOutType WithinBAM output.\n- cN ... number\ + \ of bases clipped from the read ends: 5' and 3'\n***Variation:\n- vA \ + \ ... variant allele\n- vG ... genomic coordinate of the variant\ + \ overlapped by the read.\n- vW ... 1 - alignment passes WASP filtering;\ + \ 2,3,4,5,6,7 - alignment does not pass WASP filtering. Requires --waspOutputMode\ + \ SAMtag.\n***STARsolo:\n- CR CY UR UY ... sequences and quality scores of\ + \ cell barcodes and UMIs for the solo* demultiplexing.\n- GX GN ...\ + \ gene ID and gene name for unique-gene reads.\n- gx gn ... gene IDs\ + \ and gene names for unique- and multi-gene reads.\n- CB UB ... error-corrected\ + \ cell barcodes and UMIs for solo* demultiplexing. Requires --outSAMtype BAM\ + \ SortedByCoordinate.\n- sM ... assessment of CB and UMI.\n- sS \ + \ ... sequence of the entire barcode (CB,UMI,adapter).\n- sQ \ + \ ... quality of the entire barcode.\n***Unsupported/undocumented:\n-\ + \ ha ... haplotype (1/2) when mapping to the diploid genome. Requires\ + \ genome generated with --genomeTransformType Diploid .\n- rB ...\ + \ alignment block read/genomic coordinates.\n- vR ... read coordinate\ + \ of the variant." + info: + step: "star" + orig_arg: "--outSAMattributes" + example: + - "Standard" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--outSAMattrIHstart" + description: "start value for the IH attribute. 0 may be required by some downstream\ + \ software, such as Cufflinks or StringTie." + info: + step: "star" + orig_arg: "--outSAMattrIHstart" + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outSAMunmapped" + description: "output of unmapped reads in the SAM format\n\n1st word:\n- None\ + \ ... no output\n- Within ... output unmapped reads within the main SAM\ + \ file (i.e. Aligned.out.sam)\n2nd word:\n- KeepPairs ... record unmapped\ + \ mate for each alignment, and, in case of unsorted output, keep it adjacent\ + \ to its mapped mate. Only affects multi-mapping reads." + info: + step: "star" + orig_arg: "--outSAMunmapped" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--outSAMorder" + description: "type of sorting for the SAM output\n\nPaired: one mate after the\ + \ other for all paired alignments\nPairedKeepInputOrder: one mate after the\ + \ other for all paired alignments, the order is kept the same as in the input\ + \ FASTQ files" + info: + step: "star" + orig_arg: "--outSAMorder" + example: + - "Paired" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outSAMprimaryFlag" + description: "which alignments are considered primary - all others will be marked\ + \ with 0x100 bit in the FLAG\n\n- OneBestScore ... only one alignment with\ + \ the best score is primary\n- AllBestScore ... all alignments with the best\ + \ score are primary" + info: + step: "star" + orig_arg: "--outSAMprimaryFlag" + example: + - "OneBestScore" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outSAMreadID" + description: "read ID record type\n\n- Standard ... first word (until space)\ + \ from the FASTx read ID line, removing /1,/2 from the end\n- Number ...\ + \ read number (index) in the FASTx file" + info: + step: "star" + orig_arg: "--outSAMreadID" + example: + - "Standard" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outSAMmapqUnique" + description: "0 to 255: the MAPQ value for unique mappers" + info: + step: "star" + orig_arg: "--outSAMmapqUnique" + example: + - 255 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outSAMflagOR" + description: "0 to 65535: sam FLAG will be bitwise OR'd with this value, i.e.\ + \ FLAG=FLAG | outSAMflagOR. This is applied after all flags have been set\ + \ by STAR, and after outSAMflagAND. Can be used to set specific bits that\ + \ are not set otherwise." + info: + step: "star" + orig_arg: "--outSAMflagOR" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outSAMflagAND" + description: "0 to 65535: sam FLAG will be bitwise AND'd with this value, i.e.\ + \ FLAG=FLAG & outSAMflagOR. This is applied after all flags have been set\ + \ by STAR, but before outSAMflagOR. Can be used to unset specific bits that\ + \ are not set otherwise." + info: + step: "star" + orig_arg: "--outSAMflagAND" + example: + - 65535 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outSAMattrRGline" + description: "SAM/BAM read group line. The first word contains the read group\ + \ identifier and must start with \"ID:\", e.g. --outSAMattrRGline ID:xxx CN:yy\ + \ \"DS:z z z\".\n\nxxx will be added as RG tag to each output alignment. Any\ + \ spaces in the tag values have to be double quoted.\nComma separated RG lines\ + \ correspons to different (comma separated) input files in --readFilesIn.\ + \ Commas have to be surrounded by spaces, e.g.\n--outSAMattrRGline ID:xxx\ + \ , ID:zzz \"DS:z z\" , ID:yyy DS:yyyy" + info: + step: "star" + orig_arg: "--outSAMattrRGline" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--outSAMheaderHD" + description: "@HD (header) line of the SAM header" + info: + step: "star" + orig_arg: "--outSAMheaderHD" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--outSAMheaderPG" + description: "extra @PG (software) line of the SAM header (in addition to STAR)" + info: + step: "star" + orig_arg: "--outSAMheaderPG" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--outSAMheaderCommentFile" + description: "path to the file with @CO (comment) lines of the SAM header" + info: + step: "star" + orig_arg: "--outSAMheaderCommentFile" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outSAMfilter" + description: "filter the output into main SAM/BAM files\n\n- KeepOnlyAddedReferences\ + \ ... only keep the reads for which all alignments are to the extra reference\ + \ sequences added with --genomeFastaFiles at the mapping stage.\n- KeepAllAddedReferences\ + \ ... keep all alignments to the extra reference sequences added with --genomeFastaFiles\ + \ at the mapping stage." + info: + step: "star" + orig_arg: "--outSAMfilter" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--outSAMmultNmax" + description: "max number of multiple alignments for a read that will be output\ + \ to the SAM/BAM files. Note that if this value is not equal to -1, the top\ + \ scoring alignment will be output first\n\n- -1 ... all alignments (up to\ + \ --outFilterMultimapNmax) will be output" + info: + step: "star" + orig_arg: "--outSAMmultNmax" + example: + - -1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outSAMtlen" + description: "calculation method for the TLEN field in the SAM/BAM files\n\n\ + - 1 ... leftmost base of the (+)strand mate to rightmost base of the (-)mate.\ + \ (+)sign for the (+)strand mate\n- 2 ... leftmost base of any mate to rightmost\ + \ base of any mate. (+)sign for the mate with the leftmost base. This is different\ + \ from 1 for overlapping mates with protruding ends" + info: + step: "star" + orig_arg: "--outSAMtlen" + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outBAMcompression" + description: "-1 to 10 BAM compression level, -1=default compression (6?),\ + \ 0=no compression, 10=maximum compression" + info: + step: "star" + orig_arg: "--outBAMcompression" + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outBAMsortingThreadN" + description: ">=0: number of threads for BAM sorting. 0 will default to min(6,--runThreadN)." + info: + step: "star" + orig_arg: "--outBAMsortingThreadN" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outBAMsortingBinsN" + description: ">0: number of genome bins for coordinate-sorting" + info: + step: "star" + orig_arg: "--outBAMsortingBinsN" + example: + - 50 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "BAM processing" + arguments: + - type: "string" + name: "--bamRemoveDuplicatesType" + description: "mark duplicates in the BAM file, for now only works with (i) sorted\ + \ BAM fed with inputBAMfile, and (ii) for paired-end alignments only\n\n-\ + \ - ... no duplicate removal/marking\n- UniqueIdentical\ + \ ... mark all multimappers, and duplicate unique mappers. The coordinates,\ + \ FLAG, CIGAR must be identical\n- UniqueIdenticalNotMulti ... mark duplicate\ + \ unique mappers but not multimappers." + info: + step: "star" + orig_arg: "--bamRemoveDuplicatesType" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--bamRemoveDuplicatesMate2basesN" + description: "number of bases from the 5' of mate 2 to use in collapsing (e.g.\ + \ for RAMPAGE)" + info: + step: "star" + orig_arg: "--bamRemoveDuplicatesMate2basesN" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Output Wiggle" + arguments: + - type: "string" + name: "--outWigType" + description: "type of signal output, e.g. \"bedGraph\" OR \"bedGraph read1_5p\"\ + . Requires sorted BAM: --outSAMtype BAM SortedByCoordinate .\n\n1st word:\n\ + - None ... no signal output\n- bedGraph ... bedGraph format\n- wiggle\ + \ ... wiggle format\n2nd word:\n- read1_5p ... signal from only 5' of\ + \ the 1st read, useful for CAGE/RAMPAGE etc\n- read2 ... signal from\ + \ only 2nd read" + info: + step: "star" + orig_arg: "--outWigType" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--outWigStrand" + description: "strandedness of wiggle/bedGraph output\n\n- Stranded ... separate\ + \ strands, str1 and str2\n- Unstranded ... collapsed strands" + info: + step: "star" + orig_arg: "--outWigStrand" + example: + - "Stranded" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outWigReferencesPrefix" + description: "prefix matching reference names to include in the output wiggle\ + \ file, e.g. \"chr\", default \"-\" - include all references" + info: + step: "star" + orig_arg: "--outWigReferencesPrefix" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outWigNorm" + description: "type of normalization for the signal\n\n- RPM ... reads per\ + \ million of mapped reads\n- None ... no normalization, \"raw\" counts" + info: + step: "star" + orig_arg: "--outWigNorm" + example: + - "RPM" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Output Filtering" + arguments: + - type: "string" + name: "--outFilterType" + description: "type of filtering\n\n- Normal ... standard filtering using only\ + \ current alignment\n- BySJout ... keep only those reads that contain junctions\ + \ that passed filtering into SJ.out.tab" + info: + step: "star" + orig_arg: "--outFilterType" + example: + - "Normal" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outFilterMultimapScoreRange" + description: "the score range below the maximum score for multimapping alignments" + info: + step: "star" + orig_arg: "--outFilterMultimapScoreRange" + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outFilterMultimapNmax" + description: "maximum number of loci the read is allowed to map to. Alignments\ + \ (all of them) will be output only if the read maps to no more loci than\ + \ this value.\n\nOtherwise no alignments will be output, and the read will\ + \ be counted as \"mapped to too many loci\" in the Log.final.out ." + info: + step: "star" + orig_arg: "--outFilterMultimapNmax" + example: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outFilterMismatchNmax" + description: "alignment will be output only if it has no more mismatches than\ + \ this value." + info: + step: "star" + orig_arg: "--outFilterMismatchNmax" + example: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--outFilterMismatchNoverLmax" + description: "alignment will be output only if its ratio of mismatches to *mapped*\ + \ length is less than or equal to this value." + info: + step: "star" + orig_arg: "--outFilterMismatchNoverLmax" + example: + - 0.3 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--outFilterMismatchNoverReadLmax" + description: "alignment will be output only if its ratio of mismatches to *read*\ + \ length is less than or equal to this value." + info: + step: "star" + orig_arg: "--outFilterMismatchNoverReadLmax" + example: + - 1.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outFilterScoreMin" + description: "alignment will be output only if its score is higher than or equal\ + \ to this value." + info: + step: "star" + orig_arg: "--outFilterScoreMin" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--outFilterScoreMinOverLread" + description: "same as outFilterScoreMin, but normalized to read length (sum\ + \ of mates' lengths for paired-end reads)" + info: + step: "star" + orig_arg: "--outFilterScoreMinOverLread" + example: + - 0.66 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outFilterMatchNmin" + description: "alignment will be output only if the number of matched bases is\ + \ higher than or equal to this value." + info: + step: "star" + orig_arg: "--outFilterMatchNmin" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--outFilterMatchNminOverLread" + description: "sam as outFilterMatchNmin, but normalized to the read length (sum\ + \ of mates' lengths for paired-end reads)." + info: + step: "star" + orig_arg: "--outFilterMatchNminOverLread" + example: + - 0.66 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outFilterIntronMotifs" + description: "filter alignment using their motifs\n\n- None \ + \ ... no filtering\n- RemoveNoncanonical ... filter\ + \ out alignments that contain non-canonical junctions\n- RemoveNoncanonicalUnannotated\ + \ ... filter out alignments that contain non-canonical unannotated junctions\ + \ when using annotated splice junctions database. The annotated non-canonical\ + \ junctions will be kept." + info: + step: "star" + orig_arg: "--outFilterIntronMotifs" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outFilterIntronStrands" + description: "filter alignments\n\n- RemoveInconsistentStrands ... remove\ + \ alignments that have junctions with inconsistent strands\n- None \ + \ ... no filtering" + info: + step: "star" + orig_arg: "--outFilterIntronStrands" + example: + - "RemoveInconsistentStrands" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Output splice junctions (SJ.out.tab)" + arguments: + - type: "string" + name: "--outSJtype" + description: "type of splice junction output\n\n- Standard ... standard SJ.out.tab\ + \ output\n- None ... no splice junction output" + info: + step: "star" + orig_arg: "--outSJtype" + example: + - "Standard" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Output Filtering: Splice Junctions" + arguments: + - type: "string" + name: "--outSJfilterReads" + description: "which reads to consider for collapsed splice junctions output\n\ + \n- All ... all reads, unique- and multi-mappers\n- Unique ... uniquely\ + \ mapping reads only" + info: + step: "star" + orig_arg: "--outSJfilterReads" + example: + - "All" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outSJfilterOverhangMin" + description: "minimum overhang length for splice junctions on both sides for:\ + \ (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC\ + \ motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\n\ + does not apply to annotated junctions" + info: + step: "star" + orig_arg: "--outSJfilterOverhangMin" + example: + - 30 + - 12 + - 12 + - 12 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--outSJfilterCountUniqueMin" + description: "minimum uniquely mapping read count per junction for: (1) non-canonical\ + \ motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC\ + \ and GT/AT motif. -1 means no output for that motif\n\nJunctions are output\ + \ if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin conditions\ + \ are satisfied\ndoes not apply to annotated junctions" + info: + step: "star" + orig_arg: "--outSJfilterCountUniqueMin" + example: + - 3 + - 1 + - 1 + - 1 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--outSJfilterCountTotalMin" + description: "minimum total (multi-mapping+unique) read count per junction for:\ + \ (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC\ + \ motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\n\ + Junctions are output if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin\ + \ conditions are satisfied\ndoes not apply to annotated junctions" + info: + step: "star" + orig_arg: "--outSJfilterCountTotalMin" + example: + - 3 + - 1 + - 1 + - 1 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--outSJfilterDistToOtherSJmin" + description: "minimum allowed distance to other junctions' donor/acceptor\n\n\ + does not apply to annotated junctions" + info: + step: "star" + orig_arg: "--outSJfilterDistToOtherSJmin" + example: + - 10 + - 0 + - 5 + - 10 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--outSJfilterIntronMaxVsReadN" + description: "maximum gap allowed for junctions supported by 1,2,3,,,N reads\n\ + \ni.e. by default junctions supported by 1 read can have gaps <=50000b, by\ + \ 2 reads: <=100000b, by 3 reads: <=200000. by >=4 reads any gap <=alignIntronMax\n\ + does not apply to annotated junctions" + info: + step: "star" + orig_arg: "--outSJfilterIntronMaxVsReadN" + example: + - 50000 + - 100000 + - 200000 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - name: "Scoring" + arguments: + - type: "integer" + name: "--scoreGap" + description: "splice junction penalty (independent on intron motif)" + info: + step: "star" + orig_arg: "--scoreGap" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--scoreGapNoncan" + description: "non-canonical junction penalty (in addition to scoreGap)" + info: + step: "star" + orig_arg: "--scoreGapNoncan" + example: + - -8 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--scoreGapGCAG" + description: "GC/AG and CT/GC junction penalty (in addition to scoreGap)" + info: + step: "star" + orig_arg: "--scoreGapGCAG" + example: + - -4 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--scoreGapATAC" + description: "AT/AC and GT/AT junction penalty (in addition to scoreGap)" + info: + step: "star" + orig_arg: "--scoreGapATAC" + example: + - -8 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--scoreGenomicLengthLog2scale" + description: "extra score logarithmically scaled with genomic length of the\ + \ alignment: scoreGenomicLengthLog2scale*log2(genomicLength)" + info: + step: "star" + orig_arg: "--scoreGenomicLengthLog2scale" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--scoreDelOpen" + description: "deletion open penalty" + info: + step: "star" + orig_arg: "--scoreDelOpen" + example: + - -2 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--scoreDelBase" + description: "deletion extension penalty per base (in addition to scoreDelOpen)" + info: + step: "star" + orig_arg: "--scoreDelBase" + example: + - -2 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--scoreInsOpen" + description: "insertion open penalty" + info: + step: "star" + orig_arg: "--scoreInsOpen" + example: + - -2 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--scoreInsBase" + description: "insertion extension penalty per base (in addition to scoreInsOpen)" + info: + step: "star" + orig_arg: "--scoreInsBase" + example: + - -2 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--scoreStitchSJshift" + description: "maximum score reduction while searching for SJ boundaries in the\ + \ stitching step" + info: + step: "star" + orig_arg: "--scoreStitchSJshift" + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Alignments and Seeding" + arguments: + - type: "integer" + name: "--seedSearchStartLmax" + description: "defines the search start point through the read - the read is\ + \ split into pieces no longer than this value" + info: + step: "star" + orig_arg: "--seedSearchStartLmax" + example: + - 50 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--seedSearchStartLmaxOverLread" + description: "seedSearchStartLmax normalized to read length (sum of mates' lengths\ + \ for paired-end reads)" + info: + step: "star" + orig_arg: "--seedSearchStartLmaxOverLread" + example: + - 1.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--seedSearchLmax" + description: "defines the maximum length of the seeds, if =0 seed length is\ + \ not limited" + info: + step: "star" + orig_arg: "--seedSearchLmax" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--seedMultimapNmax" + description: "only pieces that map fewer than this value are utilized in the\ + \ stitching procedure" + info: + step: "star" + orig_arg: "--seedMultimapNmax" + example: + - 10000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--seedPerReadNmax" + description: "max number of seeds per read" + info: + step: "star" + orig_arg: "--seedPerReadNmax" + example: + - 1000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--seedPerWindowNmax" + description: "max number of seeds per window" + info: + step: "star" + orig_arg: "--seedPerWindowNmax" + example: + - 50 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--seedNoneLociPerWindow" + description: "max number of one seed loci per window" + info: + step: "star" + orig_arg: "--seedNoneLociPerWindow" + example: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--seedSplitMin" + description: "min length of the seed sequences split by Ns or mate gap" + info: + step: "star" + orig_arg: "--seedSplitMin" + example: + - 12 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--seedMapMin" + description: "min length of seeds to be mapped" + info: + step: "star" + orig_arg: "--seedMapMin" + example: + - 5 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--alignIntronMin" + description: "minimum intron size, genomic gap is considered intron if its length>=alignIntronMin,\ + \ otherwise it is considered Deletion" + info: + step: "star" + orig_arg: "--alignIntronMin" + example: + - 21 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--alignIntronMax" + description: "maximum intron size, if 0, max intron size will be determined\ + \ by (2^winBinNbits)*winAnchorDistNbins" + info: + step: "star" + orig_arg: "--alignIntronMax" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--alignMatesGapMax" + description: "maximum gap between two mates, if 0, max intron gap will be determined\ + \ by (2^winBinNbits)*winAnchorDistNbins" + info: + step: "star" + orig_arg: "--alignMatesGapMax" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--alignSJoverhangMin" + description: "minimum overhang (i.e. block size) for spliced alignments" + info: + step: "star" + orig_arg: "--alignSJoverhangMin" + example: + - 5 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--alignSJstitchMismatchNmax" + description: "maximum number of mismatches for stitching of the splice junctions\ + \ (-1: no limit).\n\n(1) non-canonical motifs, (2) GT/AG and CT/AC motif,\ + \ (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif." + info: + step: "star" + orig_arg: "--alignSJstitchMismatchNmax" + example: + - 0 + - -1 + - 0 + - 0 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--alignSJDBoverhangMin" + description: "minimum overhang (i.e. block size) for annotated (sjdb) spliced\ + \ alignments" + info: + step: "star" + orig_arg: "--alignSJDBoverhangMin" + example: + - 3 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--alignSplicedMateMapLmin" + description: "minimum mapped length for a read mate that is spliced" + info: + step: "star" + orig_arg: "--alignSplicedMateMapLmin" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--alignSplicedMateMapLminOverLmate" + description: "alignSplicedMateMapLmin normalized to mate length" + info: + step: "star" + orig_arg: "--alignSplicedMateMapLminOverLmate" + example: + - 0.66 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--alignWindowsPerReadNmax" + description: "max number of windows per read" + info: + step: "star" + orig_arg: "--alignWindowsPerReadNmax" + example: + - 10000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--alignTranscriptsPerWindowNmax" + description: "max number of transcripts per window" + info: + step: "star" + orig_arg: "--alignTranscriptsPerWindowNmax" + example: + - 100 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--alignTranscriptsPerReadNmax" + description: "max number of different alignments per read to consider" + info: + step: "star" + orig_arg: "--alignTranscriptsPerReadNmax" + example: + - 10000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--alignEndsType" + description: "type of read ends alignment\n\n- Local ... standard\ + \ local alignment with soft-clipping allowed\n- EndToEnd ... force\ + \ end-to-end read alignment, do not soft-clip\n- Extend5pOfRead1 ... fully\ + \ extend only the 5p of the read1, all other ends: local alignment\n- Extend5pOfReads12\ + \ ... fully extend only the 5p of the both read1 and read2, all other ends:\ + \ local alignment" + info: + step: "star" + orig_arg: "--alignEndsType" + example: + - "Local" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--alignEndsProtrude" + description: "allow protrusion of alignment ends, i.e. start (end) of the +strand\ + \ mate downstream of the start (end) of the -strand mate\n\n1st word: int:\ + \ maximum number of protrusion bases allowed\n2nd word: string:\n- \ + \ ConcordantPair ... report alignments with non-zero protrusion\ + \ as concordant pairs\n- DiscordantPair ... report alignments\ + \ with non-zero protrusion as discordant pairs" + info: + step: "star" + orig_arg: "--alignEndsProtrude" + example: + - "0 ConcordantPair" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--alignSoftClipAtReferenceEnds" + description: "allow the soft-clipping of the alignments past the end of the\ + \ chromosomes\n\n- Yes ... allow\n- No ... prohibit, useful for compatibility\ + \ with Cufflinks" + info: + step: "star" + orig_arg: "--alignSoftClipAtReferenceEnds" + example: + - "Yes" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--alignInsertionFlush" + description: "how to flush ambiguous insertion positions\n\n- None ... insertions\ + \ are not flushed\n- Right ... insertions are flushed to the right" + info: + step: "star" + orig_arg: "--alignInsertionFlush" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Paired-End reads" + arguments: + - type: "integer" + name: "--peOverlapNbasesMin" + description: "minimum number of overlapping bases to trigger mates merging and\ + \ realignment. Specify >0 value to switch on the \"merginf of overlapping\ + \ mates\" algorithm." + info: + step: "star" + orig_arg: "--peOverlapNbasesMin" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--peOverlapMMp" + description: "maximum proportion of mismatched bases in the overlap area" + info: + step: "star" + orig_arg: "--peOverlapMMp" + example: + - 0.01 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Windows, Anchors, Binning" + arguments: + - type: "integer" + name: "--winAnchorMultimapNmax" + description: "max number of loci anchors are allowed to map to" + info: + step: "star" + orig_arg: "--winAnchorMultimapNmax" + example: + - 50 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--winBinNbits" + description: "=log2(winBin), where winBin is the size of the bin for the windows/clustering,\ + \ each window will occupy an integer number of bins." + info: + step: "star" + orig_arg: "--winBinNbits" + example: + - 16 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--winAnchorDistNbins" + description: "max number of bins between two anchors that allows aggregation\ + \ of anchors into one window" + info: + step: "star" + orig_arg: "--winAnchorDistNbins" + example: + - 9 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--winFlankNbins" + description: "log2(winFlank), where win Flank is the size of the left and right\ + \ flanking regions for each window" + info: + step: "star" + orig_arg: "--winFlankNbins" + example: + - 4 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--winReadCoverageRelativeMin" + description: "minimum relative coverage of the read sequence by the seeds in\ + \ a window, for STARlong algorithm only." + info: + step: "star" + orig_arg: "--winReadCoverageRelativeMin" + example: + - 0.5 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--winReadCoverageBasesMin" + description: "minimum number of bases covered by the seeds in a window , for\ + \ STARlong algorithm only." + info: + step: "star" + orig_arg: "--winReadCoverageBasesMin" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Chimeric Alignments" + arguments: + - type: "string" + name: "--chimOutType" + description: "type of chimeric output\n\n- Junctions ... Chimeric.out.junction\n\ + - SeparateSAMold ... output old SAM into separate Chimeric.out.sam file\n\ + - WithinBAM ... output into main aligned BAM files (Aligned.*.bam)\n\ + - WithinBAM HardClip ... (default) hard-clipping in the CIGAR for supplemental\ + \ chimeric alignments (default if no 2nd word is present)\n- WithinBAM SoftClip\ + \ ... soft-clipping in the CIGAR for supplemental chimeric alignments" + info: + step: "star" + orig_arg: "--chimOutType" + example: + - "Junctions" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--chimSegmentMin" + description: "minimum length of chimeric segment length, if ==0, no chimeric\ + \ output" + info: + step: "star" + orig_arg: "--chimSegmentMin" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimScoreMin" + description: "minimum total (summed) score of the chimeric segments" + info: + step: "star" + orig_arg: "--chimScoreMin" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimScoreDropMax" + description: "max drop (difference) of chimeric score (the sum of scores of\ + \ all chimeric segments) from the read length" + info: + step: "star" + orig_arg: "--chimScoreDropMax" + example: + - 20 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimScoreSeparation" + description: "minimum difference (separation) between the best chimeric score\ + \ and the next one" + info: + step: "star" + orig_arg: "--chimScoreSeparation" + example: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimScoreJunctionNonGTAG" + description: "penalty for a non-GT/AG chimeric junction" + info: + step: "star" + orig_arg: "--chimScoreJunctionNonGTAG" + example: + - -1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimJunctionOverhangMin" + description: "minimum overhang for a chimeric junction" + info: + step: "star" + orig_arg: "--chimJunctionOverhangMin" + example: + - 20 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimSegmentReadGapMax" + description: "maximum gap in the read sequence between chimeric segments" + info: + step: "star" + orig_arg: "--chimSegmentReadGapMax" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--chimFilter" + description: "different filters for chimeric alignments\n\n- None ... no filtering\n\ + - banGenomicN ... Ns are not allowed in the genome sequence around the chimeric\ + \ junction" + info: + step: "star" + orig_arg: "--chimFilter" + example: + - "banGenomicN" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--chimMainSegmentMultNmax" + description: "maximum number of multi-alignments for the main chimeric segment.\ + \ =1 will prohibit multimapping main segments." + info: + step: "star" + orig_arg: "--chimMainSegmentMultNmax" + example: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimMultimapNmax" + description: "maximum number of chimeric multi-alignments\n\n- 0 ... use the\ + \ old scheme for chimeric detection which only considered unique alignments" + info: + step: "star" + orig_arg: "--chimMultimapNmax" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimMultimapScoreRange" + description: "the score range for multi-mapping chimeras below the best chimeric\ + \ score. Only works with --chimMultimapNmax > 1" + info: + step: "star" + orig_arg: "--chimMultimapScoreRange" + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimNonchimScoreDropMin" + description: "to trigger chimeric detection, the drop in the best non-chimeric\ + \ alignment score with respect to the read length has to be greater than this\ + \ value" + info: + step: "star" + orig_arg: "--chimNonchimScoreDropMin" + example: + - 20 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimOutJunctionFormat" + description: "formatting type for the Chimeric.out.junction file\n\n- 0 ...\ + \ no comment lines/headers\n- 1 ... comment lines at the end of the file:\ + \ command line and Nreads: total, unique/multi-mapping" + info: + step: "star" + orig_arg: "--chimOutJunctionFormat" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Quantification of Annotations" + arguments: + - type: "string" + name: "--quantMode" + description: "types of quantification requested\n\n- - ... none\n\ + - TranscriptomeSAM ... output SAM/BAM alignments to transcriptome into a separate\ + \ file\n- GeneCounts ... count reads per gene" + info: + step: "star" + orig_arg: "--quantMode" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--quantTranscriptomeBAMcompression" + description: "-2 to 10 transcriptome BAM compression level\n\n- -2 ... no\ + \ BAM output\n- -1 ... default compression (6?)\n- 0 ... no compression\n\ + - 10 ... maximum compression" + info: + step: "star" + orig_arg: "--quantTranscriptomeBAMcompression" + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--quantTranscriptomeBan" + description: "prohibit various alignment type\n\n- IndelSoftclipSingleend ...\ + \ prohibit indels, soft clipping and single-end alignments - compatible with\ + \ RSEM\n- Singleend ... prohibit single-end alignments" + info: + step: "star" + orig_arg: "--quantTranscriptomeBan" + example: + - "IndelSoftclipSingleend" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "2-pass Mapping" + arguments: + - type: "string" + name: "--twopassMode" + description: "2-pass mapping mode.\n\n- None ... 1-pass mapping\n- Basic\ + \ ... basic 2-pass mapping, with all 1st pass junctions inserted into\ + \ the genome indices on the fly" + info: + step: "star" + orig_arg: "--twopassMode" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--twopass1readsN" + description: "number of reads to process for the 1st step. Use very large number\ + \ (or default -1) to map all reads in the first step." + info: + step: "star" + orig_arg: "--twopass1readsN" + example: + - -1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "WASP parameters" + arguments: + - type: "string" + name: "--waspOutputMode" + description: "WASP allele-specific output type. This is re-implementation of\ + \ the original WASP mappability filtering by Bryce van de Geijn, Graham McVicker,\ + \ Yoav Gilad & Jonathan K Pritchard. Please cite the original WASP paper:\ + \ Nature Methods 12, 1061-1063 (2015), https://www.nature.com/articles/nmeth.3582\ + \ .\n\n- SAMtag ... add WASP tags to the alignments that pass WASP filtering" + info: + step: "star" + orig_arg: "--waspOutputMode" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "STARsolo (single cell RNA-seq) parameters" + arguments: + - type: "string" + name: "--soloType" + description: "type of single-cell RNA-seq\n\n- CB_UMI_Simple ... (a.k.a. Droplet)\ + \ one UMI and one Cell Barcode of fixed length in read2, e.g. Drop-seq and\ + \ 10X Chromium.\n- CB_UMI_Complex ... multiple Cell Barcodes of varying length,\ + \ one UMI of fixed length and one adapter sequence of fixed length are allowed\ + \ in read2 only (e.g. inDrop, ddSeq).\n- CB_samTagOut ... output Cell Barcode\ + \ as CR and/or CB SAm tag. No UMI counting. --readFilesIn cDNA_read1 [cDNA_read2\ + \ if paired-end] CellBarcode_read . Requires --outSAMtype BAM Unsorted [and/or\ + \ SortedByCoordinate]\n- SmartSeq ... Smart-seq: each cell in a separate\ + \ FASTQ (paired- or single-end), barcodes are corresponding read-groups, no\ + \ UMI sequences, alignments deduplicated according to alignment start and\ + \ end (after extending soft-clipped bases)" + info: + step: "star" + orig_arg: "--soloType" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloCBwhitelist" + description: "file(s) with whitelist(s) of cell barcodes. Only --soloType CB_UMI_Complex\ + \ allows more than one whitelist file.\n\n- None ... no whitelist:\ + \ all cell barcodes are allowed" + info: + step: "star" + orig_arg: "--soloCBwhitelist" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--soloCBstart" + description: "cell barcode start base" + info: + step: "star" + orig_arg: "--soloCBstart" + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--soloCBlen" + description: "cell barcode length" + info: + step: "star" + orig_arg: "--soloCBlen" + example: + - 16 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--soloUMIstart" + description: "UMI start base" + info: + step: "star" + orig_arg: "--soloUMIstart" + example: + - 17 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--soloUMIlen" + description: "UMI length" + info: + step: "star" + orig_arg: "--soloUMIlen" + example: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--soloBarcodeReadLength" + description: "length of the barcode read\n\n- 1 ... equal to sum of soloCBlen+soloUMIlen\n\ + - 0 ... not defined, do not check" + info: + step: "star" + orig_arg: "--soloBarcodeReadLength" + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--soloBarcodeMate" + description: "identifies which read mate contains the barcode (CB+UMI) sequence\n\ + \n- 0 ... barcode sequence is on separate read, which should always be the\ + \ last file in the --readFilesIn listed\n- 1 ... barcode sequence is a part\ + \ of mate 1\n- 2 ... barcode sequence is a part of mate 2" + info: + step: "star" + orig_arg: "--soloBarcodeMate" + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--soloCBposition" + description: "position of Cell Barcode(s) on the barcode read.\n\nPresently\ + \ only works with --soloType CB_UMI_Complex, and barcodes are assumed to be\ + \ on Read2.\nFormat for each barcode: startAnchor_startPosition_endAnchor_endPosition\n\ + start(end)Anchor defines the Anchor Base for the CB: 0: read start; 1: read\ + \ end; 2: adapter start; 3: adapter end\nstart(end)Position is the 0-based\ + \ position with of the CB start(end) with respect to the Anchor Base\nString\ + \ for different barcodes are separated by space.\nExample: inDrop (Zilionis\ + \ et al, Nat. Protocols, 2017):\n--soloCBposition 0_0_2_-1 3_1_3_8" + info: + step: "star" + orig_arg: "--soloCBposition" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloUMIposition" + description: "position of the UMI on the barcode read, same as soloCBposition\n\ + \nExample: inDrop (Zilionis et al, Nat. Protocols, 2017):\n--soloCBposition\ + \ 3_9_3_14" + info: + step: "star" + orig_arg: "--soloUMIposition" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--soloAdapterSequence" + description: "adapter sequence to anchor barcodes. Only one adapter sequence\ + \ is allowed." + info: + step: "star" + orig_arg: "--soloAdapterSequence" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--soloAdapterMismatchesNmax" + description: "maximum number of mismatches allowed in adapter sequence." + info: + step: "star" + orig_arg: "--soloAdapterMismatchesNmax" + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--soloCBmatchWLtype" + description: "matching the Cell Barcodes to the WhiteList\n\n- Exact \ + \ ... only exact matches allowed\n- 1MM \ + \ ... only one match in whitelist with 1 mismatched base allowed.\ + \ Allowed CBs have to have at least one read with exact match.\n- 1MM_multi\ + \ ... multiple matches in whitelist with 1 mismatched\ + \ base allowed, posterior probability calculation is used choose one of the\ + \ matches.\nAllowed CBs have to have at least one read with exact match. This\ + \ option matches best with CellRanger 2.2.0\n- 1MM_multi_pseudocounts \ + \ ... same as 1MM_Multi, but pseudocounts of 1 are added to all whitelist\ + \ barcodes.\n- 1MM_multi_Nbase_pseudocounts ... same as 1MM_multi_pseudocounts,\ + \ multimatching to WL is allowed for CBs with N-bases. This option matches\ + \ best with CellRanger >= 3.0.0\n- EditDist_2 ... allow\ + \ up to edit distance of 3 fpr each of the barcodes. May include one deletion\ + \ + one insertion. Only works with --soloType CB_UMI_Complex. Matches to multiple\ + \ passlist barcdoes are not allowed. Similar to ParseBio Split-seq pipeline." + info: + step: "star" + orig_arg: "--soloCBmatchWLtype" + example: + - "1MM_multi" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--soloInputSAMattrBarcodeSeq" + description: "when inputting reads from a SAM file (--readsFileType SAM SE/PE),\ + \ these SAM attributes mark the barcode sequence (in proper order).\n\nFor\ + \ instance, for 10X CellRanger or STARsolo BAMs, use --soloInputSAMattrBarcodeSeq\ + \ CR UR .\nThis parameter is required when running STARsolo with input from\ + \ SAM." + info: + step: "star" + orig_arg: "--soloInputSAMattrBarcodeSeq" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloInputSAMattrBarcodeQual" + description: "when inputting reads from a SAM file (--readsFileType SAM SE/PE),\ + \ these SAM attributes mark the barcode qualities (in proper order).\n\nFor\ + \ instance, for 10X CellRanger or STARsolo BAMs, use --soloInputSAMattrBarcodeQual\ + \ CY UY .\nIf this parameter is '-' (default), the quality 'H' will be assigned\ + \ to all bases." + info: + step: "star" + orig_arg: "--soloInputSAMattrBarcodeQual" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloStrand" + description: "strandedness of the solo libraries:\n\n- Unstranded ... no strand\ + \ information\n- Forward ... read strand same as the original RNA molecule\n\ + - Reverse ... read strand opposite to the original RNA molecule" + info: + step: "star" + orig_arg: "--soloStrand" + example: + - "Forward" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--soloFeatures" + description: "genomic features for which the UMI counts per Cell Barcode are\ + \ collected\n\n- Gene ... genes: reads match the gene transcript\n\ + - SJ ... splice junctions: reported in SJ.out.tab\n- GeneFull\ + \ ... full gene (pre-mRNA): count all reads overlapping genes' exons\ + \ and introns\n- GeneFull_ExonOverIntron ... full gene (pre-mRNA): count all\ + \ reads overlapping genes' exons and introns: prioritize 100% overlap with\ + \ exons\n- GeneFull_Ex50pAS ... full gene (pre-RNA): count all reads\ + \ overlapping genes' exons and introns: prioritize >50% overlap with exons.\ + \ Do not count reads with 100% exonic overlap in the antisense direction." + info: + step: "star" + orig_arg: "--soloFeatures" + example: + - "Gene" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloMultiMappers" + description: "counting method for reads mapping to multiple genes\n\n- Unique\ + \ ... count only reads that map to unique genes\n- Uniform ... uniformly\ + \ distribute multi-genic UMIs to all genes\n- Rescue ... distribute UMIs\ + \ proportionally to unique+uniform counts (~ first iteration of EM)\n- PropUnique\ + \ ... distribute UMIs proportionally to unique mappers, if present, and uniformly\ + \ if not.\n- EM ... multi-gene UMIs are distributed using Expectation\ + \ Maximization algorithm" + info: + step: "star" + orig_arg: "--soloMultiMappers" + example: + - "Unique" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloUMIdedup" + description: "type of UMI deduplication (collapsing) algorithm\n\n- 1MM_All\ + \ ... all UMIs with 1 mismatch distance to each other\ + \ are collapsed (i.e. counted once).\n- 1MM_Directional_UMItools ... follows\ + \ the \"directional\" method from the UMI-tools by Smith, Heger and Sudbery\ + \ (Genome Research 2017).\n- 1MM_Directional ... same as 1MM_Directional_UMItools,\ + \ but with more stringent criteria for duplicate UMIs\n- Exact \ + \ ... only exactly matching UMIs are collapsed.\n- NoDedup \ + \ ... no deduplication of UMIs, count all reads.\n- 1MM_CR\ + \ ... CellRanger2-4 algorithm for 1MM UMI collapsing." + info: + step: "star" + orig_arg: "--soloUMIdedup" + example: + - "1MM_All" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloUMIfiltering" + description: "type of UMI filtering (for reads uniquely mapping to genes)\n\n\ + - - ... basic filtering: remove UMIs with N and homopolymers\ + \ (similar to CellRanger 2.2.0).\n- MultiGeneUMI ... basic + remove\ + \ lower-count UMIs that map to more than one gene.\n- MultiGeneUMI_All ...\ + \ basic + remove all UMIs that map to more than one gene.\n- MultiGeneUMI_CR\ + \ ... basic + remove lower-count UMIs that map to more than one gene, matching\ + \ CellRanger > 3.0.0 .\nOnly works with --soloUMIdedup 1MM_CR" + info: + step: "star" + orig_arg: "--soloUMIfiltering" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloOutFileNames" + description: "file names for STARsolo output:\n\nfile_name_prefix gene_names\ + \ barcode_sequences cell_feature_count_matrix" + info: + step: "star" + orig_arg: "--soloOutFileNames" + example: + - "Solo.out/" + - "features.tsv" + - "barcodes.tsv" + - "matrix.mtx" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloCellFilter" + description: "cell filtering type and parameters\n\n- None ... do\ + \ not output filtered cells\n- TopCells ... only report top cells by\ + \ UMI count, followed by the exact number of cells\n- CellRanger2.2 ...\ + \ simple filtering of CellRanger 2.2.\nCan be followed by numbers: number\ + \ of expected cells, robust maximum percentile for UMI count, maximum to minimum\ + \ ratio for UMI count\nThe harcoded values are from CellRanger: nExpectedCells=3000;\ + \ maxPercentile=0.99; maxMinRatio=10\n- EmptyDrops_CR ... EmptyDrops filtering\ + \ in CellRanger flavor. Please cite the original EmptyDrops paper: A.T.L Lun\ + \ et al, Genome Biology, 20, 63 (2019): https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1662-y\n\ + Can be followed by 10 numeric parameters: nExpectedCells maxPercentile\ + \ maxMinRatio indMin indMax umiMin umiMinFracMedian candMaxN \ + \ FDR simN\nThe harcoded values are from CellRanger: 3000 \ + \ 0.99 10 45000 90000 500 0.01\ + \ 20000 0.01 10000" + info: + step: "star" + orig_arg: "--soloCellFilter" + example: + - "CellRanger2.2" + - "3000" + - "0.99" + - "10" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloOutFormatFeaturesGeneField3" + description: "field 3 in the Gene features.tsv file. If \"-\", then no 3rd field\ + \ is output." + info: + step: "star" + orig_arg: "--soloOutFormatFeaturesGeneField3" + example: + - "Gene Expression" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloCellReadStats" + description: "Output reads statistics for each CB\n\n- Standard ... standard\ + \ output" + info: + step: "star" + orig_arg: "--soloCellReadStats" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "HTSeq arguments" + arguments: + - type: "string" + name: "--stranded" + alternatives: + - "-s" + description: "Whether the data is from a strand-specific assay. 'reverse' means\ + \ 'yes' with reversed strand interpretation." + info: + step: "htseq" + orig_arg: "--stranded" + default: + - "yes" + required: false + choices: + - "yes" + - "no" + - "reverse" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--minimum_alignment_quality" + alternatives: + - "-a" + - "--minaqual" + description: "Skip all reads with MAPQ alignment quality lower than the given\ + \ minimum value. \nMAPQ is the 5th column of a SAM/BAM file and its usage\ + \ depends on the software \nused to map the reads.\n" + info: + step: "htseq" + orig_arg: "--minaqual" + default: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--type" + alternatives: + - "-t" + description: "Feature type (3rd column in GTF file) to be used, all features\ + \ of other type are ignored (default, suitable for Ensembl GTF files: exon)" + info: + step: "htseq" + orig_arg: "--type" + example: + - "exon" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--id_attribute" + alternatives: + - "-i" + description: "GTF attribute to be used as feature ID (default, suitable for\ + \ Ensembl GTF files: gene_id).\nAll feature of the right type (see -t option)\ + \ within the same GTF attribute will be added\ntogether. The typical way of\ + \ using this option is to count all exonic reads from each gene\nand add the\ + \ exons but other uses are possible as well. You can call this option multiple\n\ + times: in that case, the combination of all attributes separated by colons\ + \ (:) will be used\nas a unique identifier, e.g. for exons you might use -i\ + \ gene_id -i exon_number.\n" + info: + step: "htseq" + orig_arg: "--idattr" + example: + - "gene_id" + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--additional_attributes" + description: "Additional feature attributes (suitable for Ensembl GTF files:\ + \ gene_name). Use multiple times\nfor more than one additional attribute.\ + \ These attributes are only used as annotations in the\noutput, while the\ + \ determination of how the counts are added together is done based on option\ + \ -i.\n" + info: + step: "htseq" + orig_arg: "--additional-attr" + example: + - "gene_name" + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--add_chromosome_info" + description: "Store information about the chromosome of each feature as an additional\ + \ attribute\n(e.g. colunm in the TSV output file).\n" + info: + step: "htseq" + orig_arg: "--add-chromosome-info" + direction: "input" + dest: "par" + - type: "string" + name: "--mode" + alternatives: + - "-m" + description: "Mode to handle reads overlapping more than one feature." + info: + step: "htseq" + orig_arg: "--mode" + default: + - "union" + required: false + choices: + - "union" + - "intersection-strict" + - "intersection-nonempty" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--non_unique" + description: "Whether and how to score reads that are not uniquely aligned or\ + \ ambiguously assigned to features." + info: + step: "htseq" + orig_arg: "--nonunique" + default: + - "none" + required: false + choices: + - "none" + - "all" + - "fraction" + - "random" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--secondary_alignments" + description: "Whether to score secondary alignments (0x100 flag)." + info: + step: "htseq" + orig_arg: "--secondary-alignments" + required: false + choices: + - "score" + - "ignore" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--supplementary_alignments" + description: "Whether to score supplementary alignments (0x800 flag)." + info: + step: "htseq" + orig_arg: "--supplementary-alignments" + required: false + choices: + - "score" + - "ignore" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--counts_output_sparse" + description: "Store the counts as a sparse matrix (mtx, h5ad, loom)." + info: + step: "htseq" + orig_arg: "--counts-output-sparse" + direction: "input" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + description: "Align fastq files using STAR." + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/cellranger_tiny_fastq" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "docker" + env: + - "STAR_VERSION 2.7.10b" + - "PACKAGES gcc g++ make wget zlib1g-dev unzip" + - type: "docker" + run: + - "apt-get update && \\\n apt-get install -y --no-install-recommends ${PACKAGES}\ + \ && \\\n cd /tmp && \\\n wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip\ + \ && \\\n unzip ${STAR_VERSION}.zip && \\\n cd STAR-${STAR_VERSION}/source\ + \ && \\\n make STARstatic CXXFLAGS_SIMD=-std=c++11 && \\\n cp STAR /usr/local/bin\ + \ && \\\n cd / && \\\n rm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip\ + \ && \\\n apt-get --purge autoremove -y ${PACKAGES} && \\\n apt-get clean\n" + - type: "apt" + packages: + - "samtools" + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "pyyaml" + - "HTSeq" + - "multiprocess" + - "gtfparse<2.0" + - "pandas" + - "multiqc~=1.15.0" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "pytest" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highmem" + - "highcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/mapping/multi_star/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/multi_star" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/multi_star/multi_star" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/mapping/multi_star/main.nf b/target/nextflow/mapping/multi_star/main.nf new file mode 100644 index 00000000000..311ea13d63f --- /dev/null +++ b/target/nextflow/mapping/multi_star/main.nf @@ -0,0 +1,6497 @@ +// multi_star 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Angela Oliveira Pisco (author) +// * Robrecht Cannoodt (author, maintainer) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "multi_star", + "namespace" : "mapping", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Angela Oliveira Pisco", + "roles" : [ + "author" + ], + "info" : { + "role" : "Contributor", + "links" : { + "github" : "aopisco", + "orcid" : "0000-0003-0142-2355", + "linkedin" : "aopisco" + }, + "organizations" : [ + { + "name" : "Insitro", + "href" : "https://insitro.com", + "role" : "Director of Computational Biology" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + }, + { + "name" : "Robrecht Cannoodt", + "roles" : [ + "author", + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "robrecht@data-intuitive.com", + "github" : "rcannood", + "orcid" : "0000-0003-3641-729X", + "linkedin" : "robrechtcannoodt" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Science Engineer" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Input/Output", + "arguments" : [ + { + "type" : "string", + "name" : "--input_id", + "description" : "The ID of the sample being processed. This vector should have the same length as the `--input_r1` argument.", + "example" : [ + "mysample", + "mysample" + ], + "required" : true, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--input_r1", + "description" : "Paths to the sequences to be mapped. If using Illumina paired-end reads, only the R1 files should be passed.", + "example" : [ + "mysample_S1_L001_R1_001.fastq.gz", + "mysample_S1_L002_R1_001.fastq.gz" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--input_r2", + "description" : "Paths to the sequences to be mapped. If using Illumina paired-end reads, only the R2 files should be passed.", + "example" : [ + "mysample_S1_L001_R2_001.fastq.gz", + "mysample_S1_L002_R2_001.fastq.gz" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--reference_index", + "alternatives" : [ + "--genomeDir" + ], + "description" : "Path to the reference built by star_build_reference. Corresponds to the --genomeDir argument in the STAR command.", + "example" : [ + "/path/to/reference" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--reference_gtf", + "description" : "Path to the gtf reference file.", + "example" : [ + "genes.gtf" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "--outFileNamePrefix" + ], + "description" : "Path to output directory. Corresponds to the --outFileNamePrefix argument in the STAR command.", + "example" : [ + "/path/to/foo" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Processing arguments", + "arguments" : [ + { + "type" : "boolean", + "name" : "--run_htseq_count", + "description" : "Whether or not to also run htseq-count after STAR.", + "default" : [ + true + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "boolean", + "name" : "--run_multiqc", + "description" : "Whether or not to also run MultiQC at the end.", + "default" : [ + true + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--min_success_rate", + "description" : "Fail when the success rate is below this threshold.", + "default" : [ + 0.5 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Run Parameters", + "arguments" : [ + { + "type" : "integer", + "name" : "--runRNGseed", + "description" : "random number generator seed.", + "info" : { + "step" : "star", + "orig_arg" : "--runRNGseed" + }, + "example" : [ + 777 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Genome Parameters", + "arguments" : [ + { + "type" : "file", + "name" : "--genomeFastaFiles", + "description" : "path(s) to the fasta files with the genome sequences, separated by spaces. These files should be plain text FASTA files, they *cannot* be zipped.\n\nRequired for the genome generation (--runMode genomeGenerate). Can also be used in the mapping (--runMode alignReads) to add extra (new) sequences to the genome (e.g. spike-ins).", + "info" : { + "step" : "star", + "orig_arg" : "--genomeFastaFiles" + }, + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + } + ] + }, + { + "name" : "Splice Junctions Database", + "arguments" : [ + { + "type" : "string", + "name" : "--sjdbFileChrStartEnd", + "description" : "path to the files with genomic coordinates (chr start end strand) for the splice junction introns. Multiple files can be supplied and will be concatenated.", + "info" : { + "step" : "star", + "orig_arg" : "--sjdbFileChrStartEnd" + }, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--sjdbGTFfile", + "description" : "path to the GTF file with annotations", + "info" : { + "step" : "star", + "orig_arg" : "--sjdbGTFfile" + }, + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--sjdbGTFchrPrefix", + "description" : "prefix for chromosome names in a GTF file (e.g. 'chr' for using ENSMEBL annotations with UCSC genomes)", + "info" : { + "step" : "star", + "orig_arg" : "--sjdbGTFchrPrefix" + }, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--sjdbGTFfeatureExon", + "description" : "feature type in GTF file to be used as exons for building transcripts", + "info" : { + "step" : "star", + "orig_arg" : "--sjdbGTFfeatureExon" + }, + "example" : [ + "exon" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--sjdbGTFtagExonParentTranscript", + "description" : "GTF attribute name for parent transcript ID (default \\"transcript_id\\" works for GTF files)", + "info" : { + "step" : "star", + "orig_arg" : "--sjdbGTFtagExonParentTranscript" + }, + "example" : [ + "transcript_id" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--sjdbGTFtagExonParentGene", + "description" : "GTF attribute name for parent gene ID (default \\"gene_id\\" works for GTF files)", + "info" : { + "step" : "star", + "orig_arg" : "--sjdbGTFtagExonParentGene" + }, + "example" : [ + "gene_id" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--sjdbGTFtagExonParentGeneName", + "description" : "GTF attribute name for parent gene name", + "info" : { + "step" : "star", + "orig_arg" : "--sjdbGTFtagExonParentGeneName" + }, + "example" : [ + "gene_name" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--sjdbGTFtagExonParentGeneType", + "description" : "GTF attribute name for parent gene type", + "info" : { + "step" : "star", + "orig_arg" : "--sjdbGTFtagExonParentGeneType" + }, + "example" : [ + "gene_type", + "gene_biotype" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--sjdbOverhang", + "description" : "length of the donor/acceptor sequence on each side of the junctions, ideally = (mate_length - 1)", + "info" : { + "step" : "star", + "orig_arg" : "--sjdbOverhang" + }, + "example" : [ + 100 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--sjdbScore", + "description" : "extra alignment score for alignments that cross database junctions", + "info" : { + "step" : "star", + "orig_arg" : "--sjdbScore" + }, + "example" : [ + 2 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--sjdbInsertSave", + "description" : "which files to save when sjdb junctions are inserted on the fly at the mapping step\n\n- Basic ... only small junction / transcript files\n- All ... all files including big Genome, SA and SAindex - this will create a complete genome directory", + "info" : { + "step" : "star", + "orig_arg" : "--sjdbInsertSave" + }, + "example" : [ + "Basic" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Variation parameters", + "arguments" : [ + { + "type" : "string", + "name" : "--varVCFfile", + "description" : "path to the VCF file that contains variation data. The 10th column should contain the genotype information, e.g. 0/1", + "info" : { + "step" : "star", + "orig_arg" : "--varVCFfile" + }, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Read Parameters", + "arguments" : [ + { + "type" : "string", + "name" : "--readFilesType", + "description" : "format of input read files\n\n- Fastx ... FASTA or FASTQ\n- SAM SE ... SAM or BAM single-end reads; for BAM use --readFilesCommand samtools view\n- SAM PE ... SAM or BAM paired-end reads; for BAM use --readFilesCommand samtools view", + "info" : { + "step" : "star", + "orig_arg" : "--readFilesType" + }, + "example" : [ + "Fastx" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--readFilesSAMattrKeep", + "description" : "for --readFilesType SAM SE/PE, which SAM tags to keep in the output BAM, e.g.: --readFilesSAMtagsKeep RG PL\n\n- All ... keep all tags\n- None ... do not keep any tags", + "info" : { + "step" : "star", + "orig_arg" : "--readFilesSAMattrKeep" + }, + "example" : [ + "All" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--readFilesManifest", + "description" : "path to the \\"manifest\\" file with the names of read files. The manifest file should contain 3 tab-separated columns:\n\npaired-end reads: read1_file_name $tab$ read2_file_name $tab$ read_group_line.\nsingle-end reads: read1_file_name $tab$ - $tab$ read_group_line.\nSpaces, but not tabs are allowed in file names.\nIf read_group_line does not start with ID:, it can only contain one ID field, and ID: will be added to it.\nIf read_group_line starts with ID:, it can contain several fields separated by $tab$, and all fields will be be copied verbatim into SAM @RG header line.", + "info" : { + "step" : "star", + "orig_arg" : "--readFilesManifest" + }, + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--readFilesPrefix", + "description" : "prefix for the read files names, i.e. it will be added in front of the strings in --readFilesIn", + "info" : { + "step" : "star", + "orig_arg" : "--readFilesPrefix" + }, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--readFilesCommand", + "description" : "command line to execute for each of the input file. This command should generate FASTA or FASTQ text and send it to stdout\n\nFor example: zcat - to uncompress .gz files, bzcat - to uncompress .bz2 files, etc.", + "info" : { + "step" : "star", + "orig_arg" : "--readFilesCommand" + }, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--readMapNumber", + "description" : "number of reads to map from the beginning of the file\n\n-1: map all reads", + "info" : { + "step" : "star", + "orig_arg" : "--readMapNumber" + }, + "example" : [ + -1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--readMatesLengthsIn", + "description" : "Equal/NotEqual - lengths of names,sequences,qualities for both mates are the same / not the same. NotEqual is safe in all situations.", + "info" : { + "step" : "star", + "orig_arg" : "--readMatesLengthsIn" + }, + "example" : [ + "NotEqual" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--readNameSeparator", + "description" : "character(s) separating the part of the read names that will be trimmed in output (read name after space is always trimmed)", + "info" : { + "step" : "star", + "orig_arg" : "--readNameSeparator" + }, + "example" : [ + "/" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--readQualityScoreBase", + "description" : "number to be subtracted from the ASCII code to get Phred quality score", + "info" : { + "step" : "star", + "orig_arg" : "--readQualityScoreBase" + }, + "example" : [ + 33 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Read Clipping", + "arguments" : [ + { + "type" : "string", + "name" : "--clipAdapterType", + "description" : "adapter clipping type\n\n- Hamming ... adapter clipping based on Hamming distance, with the number of mismatches controlled by --clip5pAdapterMMp\n- CellRanger4 ... 5p and 3p adapter clipping similar to CellRanger4. Utilizes Opal package by Martin Sosic: https://github.com/Martinsos/opal\n- None ... no adapter clipping, all other clip* parameters are disregarded", + "info" : { + "step" : "star", + "orig_arg" : "--clipAdapterType" + }, + "example" : [ + "Hamming" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--clip3pNbases", + "description" : "number(s) of bases to clip from 3p of each mate. If one value is given, it will be assumed the same for both mates.", + "info" : { + "step" : "star", + "orig_arg" : "--clip3pNbases" + }, + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--clip3pAdapterSeq", + "description" : "adapter sequences to clip from 3p of each mate. If one value is given, it will be assumed the same for both mates.\n\n- polyA ... polyA sequence with the length equal to read length", + "info" : { + "step" : "star", + "orig_arg" : "--clip3pAdapterSeq" + }, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--clip3pAdapterMMp", + "description" : "max proportion of mismatches for 3p adapter clipping for each mate. If one value is given, it will be assumed the same for both mates.", + "info" : { + "step" : "star", + "orig_arg" : "--clip3pAdapterMMp" + }, + "example" : [ + 0.1 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--clip3pAfterAdapterNbases", + "description" : "number of bases to clip from 3p of each mate after the adapter clipping. If one value is given, it will be assumed the same for both mates.", + "info" : { + "step" : "star", + "orig_arg" : "--clip3pAfterAdapterNbases" + }, + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--clip5pNbases", + "description" : "number(s) of bases to clip from 5p of each mate. If one value is given, it will be assumed the same for both mates.", + "info" : { + "step" : "star", + "orig_arg" : "--clip5pNbases" + }, + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + } + ] + }, + { + "name" : "Limits", + "arguments" : [ + { + "type" : "long", + "name" : "--limitGenomeGenerateRAM", + "description" : "maximum available RAM (bytes) for genome generation", + "info" : { + "step" : "star", + "orig_arg" : "--limitGenomeGenerateRAM" + }, + "example" : [ + 31000000000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "long", + "name" : "--limitIObufferSize", + "description" : "max available buffers size (bytes) for input/output, per thread", + "info" : { + "step" : "star", + "orig_arg" : "--limitIObufferSize" + }, + "example" : [ + 30000000, + 50000000 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "long", + "name" : "--limitOutSAMoneReadBytes", + "description" : "max size of the SAM record (bytes) for one read. Recommended value: >(2*(LengthMate1+LengthMate2+100)*outFilterMultimapNmax", + "info" : { + "step" : "star", + "orig_arg" : "--limitOutSAMoneReadBytes" + }, + "example" : [ + 100000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--limitOutSJoneRead", + "description" : "max number of junctions for one read (including all multi-mappers)", + "info" : { + "step" : "star", + "orig_arg" : "--limitOutSJoneRead" + }, + "example" : [ + 1000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--limitOutSJcollapsed", + "description" : "max number of collapsed junctions", + "info" : { + "step" : "star", + "orig_arg" : "--limitOutSJcollapsed" + }, + "example" : [ + 1000000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "long", + "name" : "--limitBAMsortRAM", + "description" : "maximum available RAM (bytes) for sorting BAM. If =0, it will be set to the genome index size. 0 value can only be used with --genomeLoad NoSharedMemory option.", + "info" : { + "step" : "star", + "orig_arg" : "--limitBAMsortRAM" + }, + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--limitSjdbInsertNsj", + "description" : "maximum number of junctions to be inserted to the genome on the fly at the mapping stage, including those from annotations and those detected in the 1st step of the 2-pass run", + "info" : { + "step" : "star", + "orig_arg" : "--limitSjdbInsertNsj" + }, + "example" : [ + 1000000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--limitNreadsSoft", + "description" : "soft limit on the number of reads", + "info" : { + "step" : "star", + "orig_arg" : "--limitNreadsSoft" + }, + "example" : [ + -1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Output: general", + "arguments" : [ + { + "type" : "string", + "name" : "--outTmpKeep", + "description" : "whether to keep the temporary files after STAR runs is finished\n\n- None ... remove all temporary files\n- All ... keep all files", + "info" : { + "step" : "star", + "orig_arg" : "--outTmpKeep" + }, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outStd", + "description" : "which output will be directed to stdout (standard out)\n\n- Log ... log messages\n- SAM ... alignments in SAM format (which normally are output to Aligned.out.sam file), normal standard output will go into Log.std.out\n- BAM_Unsorted ... alignments in BAM format, unsorted. Requires --outSAMtype BAM Unsorted\n- BAM_SortedByCoordinate ... alignments in BAM format, sorted by coordinate. Requires --outSAMtype BAM SortedByCoordinate\n- BAM_Quant ... alignments to transcriptome in BAM format, unsorted. Requires --quantMode TranscriptomeSAM", + "info" : { + "step" : "star", + "orig_arg" : "--outStd" + }, + "example" : [ + "Log" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outReadsUnmapped", + "description" : "output of unmapped and partially mapped (i.e. mapped only one mate of a paired end read) reads in separate file(s).\n\n- None ... no output\n- Fastx ... output in separate fasta/fastq files, Unmapped.out.mate1/2", + "info" : { + "step" : "star", + "orig_arg" : "--outReadsUnmapped" + }, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outQSconversionAdd", + "description" : "add this number to the quality score (e.g. to convert from Illumina to Sanger, use -31)", + "info" : { + "step" : "star", + "orig_arg" : "--outQSconversionAdd" + }, + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outMultimapperOrder", + "description" : "order of multimapping alignments in the output files\n\n- Old_2.4 ... quasi-random order used before 2.5.0\n- Random ... random order of alignments for each multi-mapper. Read mates (pairs) are always adjacent, all alignment for each read stay together. This option will become default in the future releases.", + "info" : { + "step" : "star", + "orig_arg" : "--outMultimapperOrder" + }, + "example" : [ + "Old_2.4" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Output: SAM and BAM", + "arguments" : [ + { + "type" : "string", + "name" : "--outSAMmode", + "description" : "mode of SAM output\n\n- None ... no SAM output\n- Full ... full SAM output\n- NoQS ... full SAM but without quality scores", + "info" : { + "step" : "star", + "orig_arg" : "--outSAMmode" + }, + "example" : [ + "Full" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outSAMstrandField", + "description" : "Cufflinks-like strand field flag\n\n- None ... not used\n- intronMotif ... strand derived from the intron motif. This option changes the output alignments: reads with inconsistent and/or non-canonical introns are filtered out.", + "info" : { + "step" : "star", + "orig_arg" : "--outSAMstrandField" + }, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outSAMattributes", + "description" : "a string of desired SAM attributes, in the order desired for the output SAM. Tags can be listed in any combination/order.\n\n***Presets:\n- None ... no attributes\n- Standard ... NH HI AS nM\n- All ... NH HI AS nM NM MD jM jI MC ch\n***Alignment:\n- NH ... number of loci the reads maps to: =1 for unique mappers, >1 for multimappers. Standard SAM tag.\n- HI ... multiple alignment index, starts with --outSAMattrIHstart (=1 by default). Standard SAM tag.\n- AS ... local alignment score, +1/-1 for matches/mismateches, score* penalties for indels and gaps. For PE reads, total score for two mates. Stadnard SAM tag.\n- nM ... number of mismatches. For PE reads, sum over two mates.\n- NM ... edit distance to the reference (number of mismatched + inserted + deleted bases) for each mate. Standard SAM tag.\n- MD ... string encoding mismatched and deleted reference bases (see standard SAM specifications). Standard SAM tag.\n- jM ... intron motifs for all junctions (i.e. N in CIGAR): 0: non-canonical; 1: GT/AG, 2: CT/AC, 3: GC/AG, 4: CT/GC, 5: AT/AC, 6: GT/AT. If splice junctions database is used, and a junction is annotated, 20 is added to its motif value.\n- jI ... start and end of introns for all junctions (1-based).\n- XS ... alignment strand according to --outSAMstrandField.\n- MC ... mate's CIGAR string. Standard SAM tag.\n- ch ... marks all segment of all chimeric alingments for --chimOutType WithinBAM output.\n- cN ... number of bases clipped from the read ends: 5' and 3'\n***Variation:\n- vA ... variant allele\n- vG ... genomic coordinate of the variant overlapped by the read.\n- vW ... 1 - alignment passes WASP filtering; 2,3,4,5,6,7 - alignment does not pass WASP filtering. Requires --waspOutputMode SAMtag.\n***STARsolo:\n- CR CY UR UY ... sequences and quality scores of cell barcodes and UMIs for the solo* demultiplexing.\n- GX GN ... gene ID and gene name for unique-gene reads.\n- gx gn ... gene IDs and gene names for unique- and multi-gene reads.\n- CB UB ... error-corrected cell barcodes and UMIs for solo* demultiplexing. Requires --outSAMtype BAM SortedByCoordinate.\n- sM ... assessment of CB and UMI.\n- sS ... sequence of the entire barcode (CB,UMI,adapter).\n- sQ ... quality of the entire barcode.\n***Unsupported/undocumented:\n- ha ... haplotype (1/2) when mapping to the diploid genome. Requires genome generated with --genomeTransformType Diploid .\n- rB ... alignment block read/genomic coordinates.\n- vR ... read coordinate of the variant.", + "info" : { + "step" : "star", + "orig_arg" : "--outSAMattributes" + }, + "example" : [ + "Standard" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outSAMattrIHstart", + "description" : "start value for the IH attribute. 0 may be required by some downstream software, such as Cufflinks or StringTie.", + "info" : { + "step" : "star", + "orig_arg" : "--outSAMattrIHstart" + }, + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outSAMunmapped", + "description" : "output of unmapped reads in the SAM format\n\n1st word:\n- None ... no output\n- Within ... output unmapped reads within the main SAM file (i.e. Aligned.out.sam)\n2nd word:\n- KeepPairs ... record unmapped mate for each alignment, and, in case of unsorted output, keep it adjacent to its mapped mate. Only affects multi-mapping reads.", + "info" : { + "step" : "star", + "orig_arg" : "--outSAMunmapped" + }, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outSAMorder", + "description" : "type of sorting for the SAM output\n\nPaired: one mate after the other for all paired alignments\nPairedKeepInputOrder: one mate after the other for all paired alignments, the order is kept the same as in the input FASTQ files", + "info" : { + "step" : "star", + "orig_arg" : "--outSAMorder" + }, + "example" : [ + "Paired" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outSAMprimaryFlag", + "description" : "which alignments are considered primary - all others will be marked with 0x100 bit in the FLAG\n\n- OneBestScore ... only one alignment with the best score is primary\n- AllBestScore ... all alignments with the best score are primary", + "info" : { + "step" : "star", + "orig_arg" : "--outSAMprimaryFlag" + }, + "example" : [ + "OneBestScore" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outSAMreadID", + "description" : "read ID record type\n\n- Standard ... first word (until space) from the FASTx read ID line, removing /1,/2 from the end\n- Number ... read number (index) in the FASTx file", + "info" : { + "step" : "star", + "orig_arg" : "--outSAMreadID" + }, + "example" : [ + "Standard" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outSAMmapqUnique", + "description" : "0 to 255: the MAPQ value for unique mappers", + "info" : { + "step" : "star", + "orig_arg" : "--outSAMmapqUnique" + }, + "example" : [ + 255 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outSAMflagOR", + "description" : "0 to 65535: sam FLAG will be bitwise OR'd with this value, i.e. FLAG=FLAG | outSAMflagOR. This is applied after all flags have been set by STAR, and after outSAMflagAND. Can be used to set specific bits that are not set otherwise.", + "info" : { + "step" : "star", + "orig_arg" : "--outSAMflagOR" + }, + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outSAMflagAND", + "description" : "0 to 65535: sam FLAG will be bitwise AND'd with this value, i.e. FLAG=FLAG & outSAMflagOR. This is applied after all flags have been set by STAR, but before outSAMflagOR. Can be used to unset specific bits that are not set otherwise.", + "info" : { + "step" : "star", + "orig_arg" : "--outSAMflagAND" + }, + "example" : [ + 65535 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outSAMattrRGline", + "description" : "SAM/BAM read group line. The first word contains the read group identifier and must start with \\"ID:\\", e.g. --outSAMattrRGline ID:xxx CN:yy \\"DS:z z z\\".\n\nxxx will be added as RG tag to each output alignment. Any spaces in the tag values have to be double quoted.\nComma separated RG lines correspons to different (comma separated) input files in --readFilesIn. Commas have to be surrounded by spaces, e.g.\n--outSAMattrRGline ID:xxx , ID:zzz \\"DS:z z\\" , ID:yyy DS:yyyy", + "info" : { + "step" : "star", + "orig_arg" : "--outSAMattrRGline" + }, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outSAMheaderHD", + "description" : "@HD (header) line of the SAM header", + "info" : { + "step" : "star", + "orig_arg" : "--outSAMheaderHD" + }, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outSAMheaderPG", + "description" : "extra @PG (software) line of the SAM header (in addition to STAR)", + "info" : { + "step" : "star", + "orig_arg" : "--outSAMheaderPG" + }, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outSAMheaderCommentFile", + "description" : "path to the file with @CO (comment) lines of the SAM header", + "info" : { + "step" : "star", + "orig_arg" : "--outSAMheaderCommentFile" + }, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outSAMfilter", + "description" : "filter the output into main SAM/BAM files\n\n- KeepOnlyAddedReferences ... only keep the reads for which all alignments are to the extra reference sequences added with --genomeFastaFiles at the mapping stage.\n- KeepAllAddedReferences ... keep all alignments to the extra reference sequences added with --genomeFastaFiles at the mapping stage.", + "info" : { + "step" : "star", + "orig_arg" : "--outSAMfilter" + }, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outSAMmultNmax", + "description" : "max number of multiple alignments for a read that will be output to the SAM/BAM files. Note that if this value is not equal to -1, the top scoring alignment will be output first\n\n- -1 ... all alignments (up to --outFilterMultimapNmax) will be output", + "info" : { + "step" : "star", + "orig_arg" : "--outSAMmultNmax" + }, + "example" : [ + -1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outSAMtlen", + "description" : "calculation method for the TLEN field in the SAM/BAM files\n\n- 1 ... leftmost base of the (+)strand mate to rightmost base of the (-)mate. (+)sign for the (+)strand mate\n- 2 ... leftmost base of any mate to rightmost base of any mate. (+)sign for the mate with the leftmost base. This is different from 1 for overlapping mates with protruding ends", + "info" : { + "step" : "star", + "orig_arg" : "--outSAMtlen" + }, + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outBAMcompression", + "description" : "-1 to 10 BAM compression level, -1=default compression (6?), 0=no compression, 10=maximum compression", + "info" : { + "step" : "star", + "orig_arg" : "--outBAMcompression" + }, + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outBAMsortingThreadN", + "description" : ">=0: number of threads for BAM sorting. 0 will default to min(6,--runThreadN).", + "info" : { + "step" : "star", + "orig_arg" : "--outBAMsortingThreadN" + }, + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outBAMsortingBinsN", + "description" : ">0: number of genome bins for coordinate-sorting", + "info" : { + "step" : "star", + "orig_arg" : "--outBAMsortingBinsN" + }, + "example" : [ + 50 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "BAM processing", + "arguments" : [ + { + "type" : "string", + "name" : "--bamRemoveDuplicatesType", + "description" : "mark duplicates in the BAM file, for now only works with (i) sorted BAM fed with inputBAMfile, and (ii) for paired-end alignments only\n\n- - ... no duplicate removal/marking\n- UniqueIdentical ... mark all multimappers, and duplicate unique mappers. The coordinates, FLAG, CIGAR must be identical\n- UniqueIdenticalNotMulti ... mark duplicate unique mappers but not multimappers.", + "info" : { + "step" : "star", + "orig_arg" : "--bamRemoveDuplicatesType" + }, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--bamRemoveDuplicatesMate2basesN", + "description" : "number of bases from the 5' of mate 2 to use in collapsing (e.g. for RAMPAGE)", + "info" : { + "step" : "star", + "orig_arg" : "--bamRemoveDuplicatesMate2basesN" + }, + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Output Wiggle", + "arguments" : [ + { + "type" : "string", + "name" : "--outWigType", + "description" : "type of signal output, e.g. \\"bedGraph\\" OR \\"bedGraph read1_5p\\". Requires sorted BAM: --outSAMtype BAM SortedByCoordinate .\n\n1st word:\n- None ... no signal output\n- bedGraph ... bedGraph format\n- wiggle ... wiggle format\n2nd word:\n- read1_5p ... signal from only 5' of the 1st read, useful for CAGE/RAMPAGE etc\n- read2 ... signal from only 2nd read", + "info" : { + "step" : "star", + "orig_arg" : "--outWigType" + }, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outWigStrand", + "description" : "strandedness of wiggle/bedGraph output\n\n- Stranded ... separate strands, str1 and str2\n- Unstranded ... collapsed strands", + "info" : { + "step" : "star", + "orig_arg" : "--outWigStrand" + }, + "example" : [ + "Stranded" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outWigReferencesPrefix", + "description" : "prefix matching reference names to include in the output wiggle file, e.g. \\"chr\\", default \\"-\\" - include all references", + "info" : { + "step" : "star", + "orig_arg" : "--outWigReferencesPrefix" + }, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outWigNorm", + "description" : "type of normalization for the signal\n\n- RPM ... reads per million of mapped reads\n- None ... no normalization, \\"raw\\" counts", + "info" : { + "step" : "star", + "orig_arg" : "--outWigNorm" + }, + "example" : [ + "RPM" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Output Filtering", + "arguments" : [ + { + "type" : "string", + "name" : "--outFilterType", + "description" : "type of filtering\n\n- Normal ... standard filtering using only current alignment\n- BySJout ... keep only those reads that contain junctions that passed filtering into SJ.out.tab", + "info" : { + "step" : "star", + "orig_arg" : "--outFilterType" + }, + "example" : [ + "Normal" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outFilterMultimapScoreRange", + "description" : "the score range below the maximum score for multimapping alignments", + "info" : { + "step" : "star", + "orig_arg" : "--outFilterMultimapScoreRange" + }, + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outFilterMultimapNmax", + "description" : "maximum number of loci the read is allowed to map to. Alignments (all of them) will be output only if the read maps to no more loci than this value.\n\nOtherwise no alignments will be output, and the read will be counted as \\"mapped to too many loci\\" in the Log.final.out .", + "info" : { + "step" : "star", + "orig_arg" : "--outFilterMultimapNmax" + }, + "example" : [ + 10 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outFilterMismatchNmax", + "description" : "alignment will be output only if it has no more mismatches than this value.", + "info" : { + "step" : "star", + "orig_arg" : "--outFilterMismatchNmax" + }, + "example" : [ + 10 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--outFilterMismatchNoverLmax", + "description" : "alignment will be output only if its ratio of mismatches to *mapped* length is less than or equal to this value.", + "info" : { + "step" : "star", + "orig_arg" : "--outFilterMismatchNoverLmax" + }, + "example" : [ + 0.3 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--outFilterMismatchNoverReadLmax", + "description" : "alignment will be output only if its ratio of mismatches to *read* length is less than or equal to this value.", + "info" : { + "step" : "star", + "orig_arg" : "--outFilterMismatchNoverReadLmax" + }, + "example" : [ + 1.0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outFilterScoreMin", + "description" : "alignment will be output only if its score is higher than or equal to this value.", + "info" : { + "step" : "star", + "orig_arg" : "--outFilterScoreMin" + }, + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--outFilterScoreMinOverLread", + "description" : "same as outFilterScoreMin, but normalized to read length (sum of mates' lengths for paired-end reads)", + "info" : { + "step" : "star", + "orig_arg" : "--outFilterScoreMinOverLread" + }, + "example" : [ + 0.66 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outFilterMatchNmin", + "description" : "alignment will be output only if the number of matched bases is higher than or equal to this value.", + "info" : { + "step" : "star", + "orig_arg" : "--outFilterMatchNmin" + }, + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--outFilterMatchNminOverLread", + "description" : "sam as outFilterMatchNmin, but normalized to the read length (sum of mates' lengths for paired-end reads).", + "info" : { + "step" : "star", + "orig_arg" : "--outFilterMatchNminOverLread" + }, + "example" : [ + 0.66 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outFilterIntronMotifs", + "description" : "filter alignment using their motifs\n\n- None ... no filtering\n- RemoveNoncanonical ... filter out alignments that contain non-canonical junctions\n- RemoveNoncanonicalUnannotated ... filter out alignments that contain non-canonical unannotated junctions when using annotated splice junctions database. The annotated non-canonical junctions will be kept.", + "info" : { + "step" : "star", + "orig_arg" : "--outFilterIntronMotifs" + }, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outFilterIntronStrands", + "description" : "filter alignments\n\n- RemoveInconsistentStrands ... remove alignments that have junctions with inconsistent strands\n- None ... no filtering", + "info" : { + "step" : "star", + "orig_arg" : "--outFilterIntronStrands" + }, + "example" : [ + "RemoveInconsistentStrands" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Output splice junctions (SJ.out.tab)", + "arguments" : [ + { + "type" : "string", + "name" : "--outSJtype", + "description" : "type of splice junction output\n\n- Standard ... standard SJ.out.tab output\n- None ... no splice junction output", + "info" : { + "step" : "star", + "orig_arg" : "--outSJtype" + }, + "example" : [ + "Standard" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Output Filtering: Splice Junctions", + "arguments" : [ + { + "type" : "string", + "name" : "--outSJfilterReads", + "description" : "which reads to consider for collapsed splice junctions output\n\n- All ... all reads, unique- and multi-mappers\n- Unique ... uniquely mapping reads only", + "info" : { + "step" : "star", + "orig_arg" : "--outSJfilterReads" + }, + "example" : [ + "All" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outSJfilterOverhangMin", + "description" : "minimum overhang length for splice junctions on both sides for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\ndoes not apply to annotated junctions", + "info" : { + "step" : "star", + "orig_arg" : "--outSJfilterOverhangMin" + }, + "example" : [ + 30, + 12, + 12, + 12 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outSJfilterCountUniqueMin", + "description" : "minimum uniquely mapping read count per junction for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\nJunctions are output if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin conditions are satisfied\ndoes not apply to annotated junctions", + "info" : { + "step" : "star", + "orig_arg" : "--outSJfilterCountUniqueMin" + }, + "example" : [ + 3, + 1, + 1, + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outSJfilterCountTotalMin", + "description" : "minimum total (multi-mapping+unique) read count per junction for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\nJunctions are output if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin conditions are satisfied\ndoes not apply to annotated junctions", + "info" : { + "step" : "star", + "orig_arg" : "--outSJfilterCountTotalMin" + }, + "example" : [ + 3, + 1, + 1, + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outSJfilterDistToOtherSJmin", + "description" : "minimum allowed distance to other junctions' donor/acceptor\n\ndoes not apply to annotated junctions", + "info" : { + "step" : "star", + "orig_arg" : "--outSJfilterDistToOtherSJmin" + }, + "example" : [ + 10, + 0, + 5, + 10 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outSJfilterI''' + '''ntronMaxVsReadN", + "description" : "maximum gap allowed for junctions supported by 1,2,3,,,N reads\n\ni.e. by default junctions supported by 1 read can have gaps <=50000b, by 2 reads: <=100000b, by 3 reads: <=200000. by >=4 reads any gap <=alignIntronMax\ndoes not apply to annotated junctions", + "info" : { + "step" : "star", + "orig_arg" : "--outSJfilterIntronMaxVsReadN" + }, + "example" : [ + 50000, + 100000, + 200000 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + } + ] + }, + { + "name" : "Scoring", + "arguments" : [ + { + "type" : "integer", + "name" : "--scoreGap", + "description" : "splice junction penalty (independent on intron motif)", + "info" : { + "step" : "star", + "orig_arg" : "--scoreGap" + }, + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--scoreGapNoncan", + "description" : "non-canonical junction penalty (in addition to scoreGap)", + "info" : { + "step" : "star", + "orig_arg" : "--scoreGapNoncan" + }, + "example" : [ + -8 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--scoreGapGCAG", + "description" : "GC/AG and CT/GC junction penalty (in addition to scoreGap)", + "info" : { + "step" : "star", + "orig_arg" : "--scoreGapGCAG" + }, + "example" : [ + -4 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--scoreGapATAC", + "description" : "AT/AC and GT/AT junction penalty (in addition to scoreGap)", + "info" : { + "step" : "star", + "orig_arg" : "--scoreGapATAC" + }, + "example" : [ + -8 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--scoreGenomicLengthLog2scale", + "description" : "extra score logarithmically scaled with genomic length of the alignment: scoreGenomicLengthLog2scale*log2(genomicLength)", + "info" : { + "step" : "star", + "orig_arg" : "--scoreGenomicLengthLog2scale" + }, + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--scoreDelOpen", + "description" : "deletion open penalty", + "info" : { + "step" : "star", + "orig_arg" : "--scoreDelOpen" + }, + "example" : [ + -2 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--scoreDelBase", + "description" : "deletion extension penalty per base (in addition to scoreDelOpen)", + "info" : { + "step" : "star", + "orig_arg" : "--scoreDelBase" + }, + "example" : [ + -2 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--scoreInsOpen", + "description" : "insertion open penalty", + "info" : { + "step" : "star", + "orig_arg" : "--scoreInsOpen" + }, + "example" : [ + -2 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--scoreInsBase", + "description" : "insertion extension penalty per base (in addition to scoreInsOpen)", + "info" : { + "step" : "star", + "orig_arg" : "--scoreInsBase" + }, + "example" : [ + -2 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--scoreStitchSJshift", + "description" : "maximum score reduction while searching for SJ boundaries in the stitching step", + "info" : { + "step" : "star", + "orig_arg" : "--scoreStitchSJshift" + }, + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Alignments and Seeding", + "arguments" : [ + { + "type" : "integer", + "name" : "--seedSearchStartLmax", + "description" : "defines the search start point through the read - the read is split into pieces no longer than this value", + "info" : { + "step" : "star", + "orig_arg" : "--seedSearchStartLmax" + }, + "example" : [ + 50 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--seedSearchStartLmaxOverLread", + "description" : "seedSearchStartLmax normalized to read length (sum of mates' lengths for paired-end reads)", + "info" : { + "step" : "star", + "orig_arg" : "--seedSearchStartLmaxOverLread" + }, + "example" : [ + 1.0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--seedSearchLmax", + "description" : "defines the maximum length of the seeds, if =0 seed length is not limited", + "info" : { + "step" : "star", + "orig_arg" : "--seedSearchLmax" + }, + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--seedMultimapNmax", + "description" : "only pieces that map fewer than this value are utilized in the stitching procedure", + "info" : { + "step" : "star", + "orig_arg" : "--seedMultimapNmax" + }, + "example" : [ + 10000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--seedPerReadNmax", + "description" : "max number of seeds per read", + "info" : { + "step" : "star", + "orig_arg" : "--seedPerReadNmax" + }, + "example" : [ + 1000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--seedPerWindowNmax", + "description" : "max number of seeds per window", + "info" : { + "step" : "star", + "orig_arg" : "--seedPerWindowNmax" + }, + "example" : [ + 50 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--seedNoneLociPerWindow", + "description" : "max number of one seed loci per window", + "info" : { + "step" : "star", + "orig_arg" : "--seedNoneLociPerWindow" + }, + "example" : [ + 10 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--seedSplitMin", + "description" : "min length of the seed sequences split by Ns or mate gap", + "info" : { + "step" : "star", + "orig_arg" : "--seedSplitMin" + }, + "example" : [ + 12 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--seedMapMin", + "description" : "min length of seeds to be mapped", + "info" : { + "step" : "star", + "orig_arg" : "--seedMapMin" + }, + "example" : [ + 5 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--alignIntronMin", + "description" : "minimum intron size, genomic gap is considered intron if its length>=alignIntronMin, otherwise it is considered Deletion", + "info" : { + "step" : "star", + "orig_arg" : "--alignIntronMin" + }, + "example" : [ + 21 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--alignIntronMax", + "description" : "maximum intron size, if 0, max intron size will be determined by (2^winBinNbits)*winAnchorDistNbins", + "info" : { + "step" : "star", + "orig_arg" : "--alignIntronMax" + }, + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--alignMatesGapMax", + "description" : "maximum gap between two mates, if 0, max intron gap will be determined by (2^winBinNbits)*winAnchorDistNbins", + "info" : { + "step" : "star", + "orig_arg" : "--alignMatesGapMax" + }, + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--alignSJoverhangMin", + "description" : "minimum overhang (i.e. block size) for spliced alignments", + "info" : { + "step" : "star", + "orig_arg" : "--alignSJoverhangMin" + }, + "example" : [ + 5 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--alignSJstitchMismatchNmax", + "description" : "maximum number of mismatches for stitching of the splice junctions (-1: no limit).\n\n(1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif.", + "info" : { + "step" : "star", + "orig_arg" : "--alignSJstitchMismatchNmax" + }, + "example" : [ + 0, + -1, + 0, + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--alignSJDBoverhangMin", + "description" : "minimum overhang (i.e. block size) for annotated (sjdb) spliced alignments", + "info" : { + "step" : "star", + "orig_arg" : "--alignSJDBoverhangMin" + }, + "example" : [ + 3 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--alignSplicedMateMapLmin", + "description" : "minimum mapped length for a read mate that is spliced", + "info" : { + "step" : "star", + "orig_arg" : "--alignSplicedMateMapLmin" + }, + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--alignSplicedMateMapLminOverLmate", + "description" : "alignSplicedMateMapLmin normalized to mate length", + "info" : { + "step" : "star", + "orig_arg" : "--alignSplicedMateMapLminOverLmate" + }, + "example" : [ + 0.66 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--alignWindowsPerReadNmax", + "description" : "max number of windows per read", + "info" : { + "step" : "star", + "orig_arg" : "--alignWindowsPerReadNmax" + }, + "example" : [ + 10000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--alignTranscriptsPerWindowNmax", + "description" : "max number of transcripts per window", + "info" : { + "step" : "star", + "orig_arg" : "--alignTranscriptsPerWindowNmax" + }, + "example" : [ + 100 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--alignTranscriptsPerReadNmax", + "description" : "max number of different alignments per read to consider", + "info" : { + "step" : "star", + "orig_arg" : "--alignTranscriptsPerReadNmax" + }, + "example" : [ + 10000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--alignEndsType", + "description" : "type of read ends alignment\n\n- Local ... standard local alignment with soft-clipping allowed\n- EndToEnd ... force end-to-end read alignment, do not soft-clip\n- Extend5pOfRead1 ... fully extend only the 5p of the read1, all other ends: local alignment\n- Extend5pOfReads12 ... fully extend only the 5p of the both read1 and read2, all other ends: local alignment", + "info" : { + "step" : "star", + "orig_arg" : "--alignEndsType" + }, + "example" : [ + "Local" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--alignEndsProtrude", + "description" : "allow protrusion of alignment ends, i.e. start (end) of the +strand mate downstream of the start (end) of the -strand mate\n\n1st word: int: maximum number of protrusion bases allowed\n2nd word: string:\n- ConcordantPair ... report alignments with non-zero protrusion as concordant pairs\n- DiscordantPair ... report alignments with non-zero protrusion as discordant pairs", + "info" : { + "step" : "star", + "orig_arg" : "--alignEndsProtrude" + }, + "example" : [ + "0 ConcordantPair" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--alignSoftClipAtReferenceEnds", + "description" : "allow the soft-clipping of the alignments past the end of the chromosomes\n\n- Yes ... allow\n- No ... prohibit, useful for compatibility with Cufflinks", + "info" : { + "step" : "star", + "orig_arg" : "--alignSoftClipAtReferenceEnds" + }, + "example" : [ + "Yes" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--alignInsertionFlush", + "description" : "how to flush ambiguous insertion positions\n\n- None ... insertions are not flushed\n- Right ... insertions are flushed to the right", + "info" : { + "step" : "star", + "orig_arg" : "--alignInsertionFlush" + }, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Paired-End reads", + "arguments" : [ + { + "type" : "integer", + "name" : "--peOverlapNbasesMin", + "description" : "minimum number of overlapping bases to trigger mates merging and realignment. Specify >0 value to switch on the \\"merginf of overlapping mates\\" algorithm.", + "info" : { + "step" : "star", + "orig_arg" : "--peOverlapNbasesMin" + }, + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--peOverlapMMp", + "description" : "maximum proportion of mismatched bases in the overlap area", + "info" : { + "step" : "star", + "orig_arg" : "--peOverlapMMp" + }, + "example" : [ + 0.01 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Windows, Anchors, Binning", + "arguments" : [ + { + "type" : "integer", + "name" : "--winAnchorMultimapNmax", + "description" : "max number of loci anchors are allowed to map to", + "info" : { + "step" : "star", + "orig_arg" : "--winAnchorMultimapNmax" + }, + "example" : [ + 50 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--winBinNbits", + "description" : "=log2(winBin), where winBin is the size of the bin for the windows/clustering, each window will occupy an integer number of bins.", + "info" : { + "step" : "star", + "orig_arg" : "--winBinNbits" + }, + "example" : [ + 16 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--winAnchorDistNbins", + "description" : "max number of bins between two anchors that allows aggregation of anchors into one window", + "info" : { + "step" : "star", + "orig_arg" : "--winAnchorDistNbins" + }, + "example" : [ + 9 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--winFlankNbins", + "description" : "log2(winFlank), where win Flank is the size of the left and right flanking regions for each window", + "info" : { + "step" : "star", + "orig_arg" : "--winFlankNbins" + }, + "example" : [ + 4 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--winReadCoverageRelativeMin", + "description" : "minimum relative coverage of the read sequence by the seeds in a window, for STARlong algorithm only.", + "info" : { + "step" : "star", + "orig_arg" : "--winReadCoverageRelativeMin" + }, + "example" : [ + 0.5 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--winReadCoverageBasesMin", + "description" : "minimum number of bases covered by the seeds in a window , for STARlong algorithm only.", + "info" : { + "step" : "star", + "orig_arg" : "--winReadCoverageBasesMin" + }, + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Chimeric Alignments", + "arguments" : [ + { + "type" : "string", + "name" : "--chimOutType", + "description" : "type of chimeric output\n\n- Junctions ... Chimeric.out.junction\n- SeparateSAMold ... output old SAM into separate Chimeric.out.sam file\n- WithinBAM ... output into main aligned BAM files (Aligned.*.bam)\n- WithinBAM HardClip ... (default) hard-clipping in the CIGAR for supplemental chimeric alignments (default if no 2nd word is present)\n- WithinBAM SoftClip ... soft-clipping in the CIGAR for supplemental chimeric alignments", + "info" : { + "step" : "star", + "orig_arg" : "--chimOutType" + }, + "example" : [ + "Junctions" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--chimSegmentMin", + "description" : "minimum length of chimeric segment length, if ==0, no chimeric output", + "info" : { + "step" : "star", + "orig_arg" : "--chimSegmentMin" + }, + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--chimScoreMin", + "description" : "minimum total (summed) score of the chimeric segments", + "info" : { + "step" : "star", + "orig_arg" : "--chimScoreMin" + }, + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--chimScoreDropMax", + "description" : "max drop (difference) of chimeric score (the sum of scores of all chimeric segments) from the read length", + "info" : { + "step" : "star", + "orig_arg" : "--chimScoreDropMax" + }, + "example" : [ + 20 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--chimScoreSeparation", + "description" : "minimum difference (separation) between the best chimeric score and the next one", + "info" : { + "step" : "star", + "orig_arg" : "--chimScoreSeparation" + }, + "example" : [ + 10 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--chimScoreJunctionNonGTAG", + "description" : "penalty for a non-GT/AG chimeric junction", + "info" : { + "step" : "star", + "orig_arg" : "--chimScoreJunctionNonGTAG" + }, + "example" : [ + -1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--chimJunctionOverhangMin", + "description" : "minimum overhang for a chimeric junction", + "info" : { + "step" : "star", + "orig_arg" : "--chimJunctionOverhangMin" + }, + "example" : [ + 20 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--chimSegmentReadGapMax", + "description" : "maximum gap in the read sequence between chimeric segments", + "info" : { + "step" : "star", + "orig_arg" : "--chimSegmentReadGapMax" + }, + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--chimFilter", + "description" : "different filters for chimeric alignments\n\n- None ... no filtering\n- banGenomicN ... Ns are not allowed in the genome sequence around the chimeric junction", + "info" : { + "step" : "star", + "orig_arg" : "--chimFilter" + }, + "example" : [ + "banGenomicN" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--chimMainSegmentMultNmax", + "description" : "maximum number of multi-alignments for the main chimeric segment. =1 will prohibit multimapping main segments.", + "info" : { + "step" : "star", + "orig_arg" : "--chimMainSegmentMultNmax" + }, + "example" : [ + 10 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--chimMultimapNmax", + "description" : "maximum number of chimeric multi-alignments\n\n- 0 ... use the old scheme for chimeric detection which only considered unique alignments", + "info" : { + "step" : "star", + "orig_arg" : "--chimMultimapNmax" + }, + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--chimMultimapScoreRange", + "description" : "the score range for multi-mapping chimeras below the best chimeric score. Only works with --chimMultimapNmax > 1", + "info" : { + "step" : "star", + "orig_arg" : "--chimMultimapScoreRange" + }, + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--chimNonchimScoreDropMin", + "description" : "to trigger chimeric detection, the drop in the best non-chimeric alignment score with respect to the read length has to be greater than this value", + "info" : { + "step" : "star", + "orig_arg" : "--chimNonchimScoreDropMin" + }, + "example" : [ + 20 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--chimOutJunctionFormat", + "description" : "formatting type for the Chimeric.out.junction file\n\n- 0 ... no comment lines/headers\n- 1 ... comment lines at the end of the file: command line and Nreads: total, unique/multi-mapping", + "info" : { + "step" : "star", + "orig_arg" : "--chimOutJunctionFormat" + }, + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Quantification of Annotations", + "arguments" : [ + { + "type" : "string", + "name" : "--quantMode", + "description" : "types of quantification requested\n\n- - ... none\n- TranscriptomeSAM ... output SAM/BAM alignments to transcriptome into a separate file\n- GeneCounts ... count reads per gene", + "info" : { + "step" : "star", + "orig_arg" : "--quantMode" + }, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--quantTranscriptomeBAMcompression", + "description" : "-2 to 10 transcriptome BAM compression level\n\n- -2 ... no BAM output\n- -1 ... default compression (6?)\n- 0 ... no compression\n- 10 ... maximum compression", + "info" : { + "step" : "star", + "orig_arg" : "--quantTranscriptomeBAMcompression" + }, + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--quantTranscriptomeBan", + "description" : "prohibit various alignment type\n\n- IndelSoftclipSingleend ... prohibit indels, soft clipping and single-end alignments - compatible with RSEM\n- Singleend ... prohibit single-end alignments", + "info" : { + "step" : "star", + "orig_arg" : "--quantTranscriptomeBan" + }, + "example" : [ + "IndelSoftclipSingleend" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "2-pass Mapping", + "arguments" : [ + { + "type" : "string", + "name" : "--twopassMode", + "description" : "2-pass mapping mode.\n\n- None ... 1-pass mapping\n- Basic ... basic 2-pass mapping, with all 1st pass junctions inserted into the genome indices on the fly", + "info" : { + "step" : "star", + "orig_arg" : "--twopassMode" + }, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--twopass1readsN", + "description" : "number of reads to process for the 1st step. Use very large number (or default -1) to map all reads in the first step.", + "info" : { + "step" : "star", + "orig_arg" : "--twopass1readsN" + }, + "example" : [ + -1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "WASP parameters", + "arguments" : [ + { + "type" : "string", + "name" : "--waspOutputMode", + "description" : "WASP allele-specific output type. This is re-implementation of the original WASP mappability filtering by Bryce van de Geijn, Graham McVicker, Yoav Gilad & Jonathan K Pritchard. Please cite the original WASP paper: Nature Methods 12, 1061-1063 (2015), https://www.nature.com/articles/nmeth.3582 .\n\n- SAMtag ... add WASP tags to the alignments that pass WASP filtering", + "info" : { + "step" : "star", + "orig_arg" : "--waspOutputMode" + }, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "STARsolo (single cell RNA-seq) parameters", + "arguments" : [ + { + "type" : "string", + "name" : "--soloType", + "description" : "type of single-cell RNA-seq\n\n- CB_UMI_Simple ... (a.k.a. Droplet) one UMI and one Cell Barcode of fixed length in read2, e.g. Drop-seq and 10X Chromium.\n- CB_UMI_Complex ... multiple Cell Barcodes of varying length, one UMI of fixed length and one adapter sequence of fixed length are allowed in read2 only (e.g. inDrop, ddSeq).\n- CB_samTagOut ... output Cell Barcode as CR and/or CB SAm tag. No UMI counting. --readFilesIn cDNA_read1 [cDNA_read2 if paired-end] CellBarcode_read . Requires --outSAMtype BAM Unsorted [and/or SortedByCoordinate]\n- SmartSeq ... Smart-seq: each cell in a separate FASTQ (paired- or single-end), barcodes are corresponding read-groups, no UMI sequences, alignments deduplicated according to alignment start and end (after extending soft-clipped bases)", + "info" : { + "step" : "star", + "orig_arg" : "--soloType" + }, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--soloCBwhitelist", + "description" : "file(s) with whitelist(s) of cell barcodes. Only --soloType CB_UMI_Complex allows more than one whitelist file.\n\n- None ... no whitelist: all cell barcodes are allowed", + "info" : { + "step" : "star", + "orig_arg" : "--soloCBwhitelist" + }, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--soloCBstart", + "description" : "cell barcode start base", + "info" : { + "step" : "star", + "orig_arg" : "--soloCBstart" + }, + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--soloCBlen", + "description" : "cell barcode length", + "info" : { + "step" : "star", + "orig_arg" : "--soloCBlen" + }, + "example" : [ + 16 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--soloUMIstart", + "description" : "UMI start base", + "info" : { + "step" : "star", + "orig_arg" : "--soloUMIstart" + }, + "example" : [ + 17 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--soloUMIlen", + "description" : "UMI length", + "info" : { + "step" : "star", + "orig_arg" : "--soloUMIlen" + }, + "example" : [ + 10 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--soloBarcodeReadLength", + "description" : "length of the barcode read\n\n- 1 ... equal to sum of soloCBlen+soloUMIlen\n- 0 ... not defined, do not check", + "info" : { + "step" : "star", + "orig_arg" : "--soloBarcodeReadLength" + }, + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--soloBarcodeMate", + "description" : "identifies which read mate contains the barcode (CB+UMI) sequence\n\n- 0 ... barcode sequence is on separate read, which should always be the last file in the --readFilesIn listed\n- 1 ... barcode sequence is a part of mate 1\n- 2 ... barcode sequence is a part of mate 2", + "info" : { + "step" : "star", + "orig_arg" : "--soloBarcodeMate" + }, + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--soloCBposition", + "description" : "position of Cell Barcode(s) on the barcode read.\n\nPresently only works with --soloType CB_UMI_Complex, and barcodes are assumed to be on Read2.\nFormat for each barcode: startAnchor_startPosition_endAnchor_endPosition\nstart(end)Anchor defines the Anchor Base for the CB: 0: read start; 1: read end; 2: adapter start; 3: adapter end\nstart(end)Position is the 0-based position with of the CB start(end) with respect to the Anchor Base\nString for different barcodes are separated by space.\nExample: inDrop (Zilionis et al, Nat. Protocols, 2017):\n--soloCBposition 0_0_2_-1 3_1_3_8", + "info" : { + "step" : "star", + "orig_arg" : "--soloCBposition" + }, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--soloUMIposition", + "description" : "position of the UMI on the barcode read, same as soloCBposition\n\nExample: inDrop (Zilionis et al, Nat. Protocols, 2017):\n--soloCBposition 3_9_3_14", + "info" : { + "step" : "star", + "orig_arg" : "--soloUMIposition" + }, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--soloAdapterSequence", + "description" : "adapter sequence to anchor barcodes. Only one adapter sequence is allowed.", + "info" : { + "step" : "star", + "orig_arg" : "--soloAdapterSequence" + }, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--soloAdapterMismatchesNmax", + "description" : "maximum number of mismatches allowed in adapter sequence.", + "info" : { + "step" : "star", + "orig_arg" : "--soloAdapterMismatchesNmax" + }, + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--soloCBmatchWLtype", + "description" : "matching the Cell Barcodes to the WhiteList\n\n- Exact ... only exact matches allowed\n- 1MM ... only one match in whitelist with 1 mismatched base allowed. Allowed CBs have to have at least one read with exact match.\n- 1MM_multi ... multiple matches in whitelist with 1 mismatched base allowed, posterior probability calculation is used choose one of the matches.\nAllowed CBs have to have at least one read with exact match. This option matches best with CellRanger 2.2.0\n- 1MM_multi_pseudocounts ... same as 1MM_Multi, but pseudocounts of 1 are added to all whitelist barcodes.\n- 1MM_multi_Nbase_pseudocounts ... same as 1MM_multi_pseudocounts, multimatching to WL is allowed for CBs with N-bases. This option matches best with CellRanger >= 3.0.0\n- EditDist_2 ... allow up to edit distance of 3 fpr each of the barcodes. May include one deletion + one insertion. Only works with --soloType CB_UMI_Complex. Matches to multiple passlist barcdoes are not allowed. Similar to ParseBio Split-seq pipeline.", + "info" : { + "step" : "star", + "orig_arg" : "--soloCBmatchWLtype" + }, + "example" : [ + "1MM_multi" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--soloInputSAMattrBarcodeSeq", + "description" : "when inputting reads from a SAM file (--readsFileType SAM SE/PE), these SAM attributes mark the barcode sequence (in proper order).\n\nFor instance, for 10X CellRanger or STARsolo BAMs, use --soloInputSAMattrBarcodeSeq CR UR .\nThis parameter is required when running STARsolo with input from SAM.", + "info" : { + "step" : "star", + "orig_arg" : "--soloInputSAMattrBarcodeSeq" + }, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--soloInputSAMattrBarcodeQual", + "description" : "when inputting reads from a SAM file (--readsFileType SAM SE/PE), these SAM attributes mark the barcode qualities (in proper order).\n\nFor instance, for 10X CellRanger or STARsolo BAMs, use --soloInputSAMattrBarcodeQual CY UY .\nIf this parameter is '-' (default), the quality 'H' will be assigned to all bases.", + "info" : { + "step" : "star", + "orig_arg" : "--soloInputSAMattrBarcodeQual" + }, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--soloStrand", + "description" : "strandedness of the solo libraries:\n\n- Unstranded ... no strand information\n- Forward ... read strand same as the original RNA molecule\n- Reverse ... read strand opposite to the original RNA molecule", + "info" : { + "step" : "star", + "orig_arg" : "--soloStrand" + }, + "example" : [ + "Forward" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--soloFeatures", + "description" : "genomic features for which the UMI counts per Cell Barcode are collected\n\n- Gene ... genes: reads match the gene transcript\n- SJ ... splice junctions: reported in SJ.out.tab\n- GeneFull ... full gene (pre-mRNA): count all reads overlapping genes' exons and introns\n- GeneFull_ExonOverIntron ... full gene (pre-mRNA): count all reads overlapping genes' exons and introns: prioritize 100% overlap with exons\n- GeneFull_Ex50pAS ... full gene (pre-RNA): count all reads overlapping genes' exons and introns: prioritize >50% overlap with exons. Do not count reads with 100% exonic overlap in the antisense direction.", + "info" : { + "step" : "star", + "orig_arg" : "--soloFeatures" + }, + "example" : [ + "Gene" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--soloMultiMappers", + "description" : "counting method for reads mapping to multiple genes\n\n- Unique ... count only reads that map to unique genes\n- Uniform ... uniformly distribute multi-genic UMIs to all genes\n- Rescue ... distribute UMIs proportionally to unique+uniform counts (~ first iteration of EM)\n- PropUnique ... distribute UMIs proportionally to unique mappers, if present, and uniformly if not.\n- EM ... multi-gene UMIs are distributed using Expectation Maximization algorithm", + "info" : { + "step" : "star", + "orig_arg" : "--soloMultiMappers" + }, + "example" : [ + "Unique" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--soloUMIdedup", + "description" : "type of UMI deduplication (collapsing) algorithm\n\n- 1MM_All ... all UMIs with 1 mismatch distance to each other are collapsed (i.e. counted once).\n- 1MM_Directional_UMItools ... follows the \\"directional\\" method from the UMI-tools by Smith, Heger and Sudbery (Genome Research 2017).\n- 1MM_Directional ... same as 1MM_Directional_UMItools, but with more stringent criteria for duplicate UMIs\n- Exact ... only exactly matching UMIs are collapsed.\n- NoDedup ... no deduplication of UMIs, count all reads.\n- 1MM_CR ... CellRanger2-4 algorithm for 1MM UMI collapsing.", + "info" : { + "step" : "star", + "orig_arg" : "--soloUMIdedup" + }, + "example" : [ + "1MM_All" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--soloUMIfiltering", + "description" : "type of UMI filtering (for reads uniquely mapping to genes)\n\n- - ... basic filtering: remove UMIs with N and homopolymers (similar to CellRanger 2.2.0).\n- MultiGeneUMI ... basic + remove lower-count UMIs that map to more than one gene.\n- MultiGeneUMI_All ... basic + remove all UMIs that map to more than one gene.\n- MultiGeneUMI_CR ... basic + remove lower-count UMIs that map to more than one gene, matching CellRanger > 3.0.0 .\nOnly works with --soloUMIdedup 1MM_CR", + "info" : { + "step" : "star", + "orig_arg" : "--soloUMIfiltering" + }, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--soloOutFileNames", + "description" : "file names for STARsolo output:\n\nfile_name_prefix gene_names barcode_sequences cell_feature_count_matrix", + "info" : { + "step" : "star", + "orig_arg" : "--soloOutFileNames" + }, + "example" : [ + "Solo.out/", + "features.tsv", + "barcodes.tsv", + "matrix.mtx" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--soloCellFilter", + "description" : "cell filtering type and parameters\n\n- None ... do not output filtered cells\n- TopCells ... only report top cells by UMI count, followed by the exact number of cells\n- CellRanger2.2 ... simple filtering of CellRanger 2.2.\nCan be followed by numbers: number of expected cells, robust maximum percentile for UMI count, maximum to minimum ratio for UMI count\nThe harcoded values are from CellRanger: nExpectedCells=3000; maxPercentile=0.99; maxMinRatio=10\n- EmptyDrops_CR ... EmptyDrops filtering in CellRanger flavor. Please cite the original EmptyDrops paper: A.T.L Lun et al, Genome Biology, 20, 63 (2019): https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1662-y\nCan be followed by 10 numeric parameters: nExpectedCells maxPercentile maxMinRatio indMin indMax umiMin umiMinFracMedian candMaxN FDR simN\nThe harcoded values are from CellRanger: 3000 0.99 10 45000 90000 500 0.01 20000 0.01 10000", + "info" : { + "step" : "star", + "orig_arg" : "--soloCellFilter" + }, + "example" : [ + "CellRanger2.2", + "3000", + "0.99", + "10" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--soloOutFormatFeaturesGeneField3", + "description" : "field 3 in the Gene features.tsv file. If \\"-\\", then no 3rd field is output.", + "info" : { + "step" : "star", + "orig_arg" : "--soloOutFormatFeaturesGeneField3" + }, + "example" : [ + "Gene Expression" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--soloCellReadStats", + "description" : "Output reads statistics for each CB\n\n- Standard ... standard output", + "info" : { + "step" : "star", + "orig_arg" : "--soloCellReadStats" + }, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "HTSeq arguments", + "arguments" : [ + { + "type" : "string", + "name" : "--stranded", + "alternatives" : [ + "-s" + ], + "description" : "Whether the data is from a strand-specific assay. 'reverse' means 'yes' with reversed strand interpretation.", + "info" : { + "step" : "htseq", + "orig_arg" : "--stranded" + }, + "default" : [ + "yes" + ], + "required" : false, + "choices" : [ + "yes", + "no", + "reverse" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--minimum_alignment_quality", + "alternatives" : [ + "-a", + "--minaqual" + ], + "description" : "Skip all reads with MAPQ alignment quality lower than the given minimum value. \nMAPQ is the 5th column of a SAM/BAM file and its usage depends on the software \nused to map the reads.\n", + "info" : { + "step" : "htseq", + "orig_arg" : "--minaqual" + }, + "default" : [ + 10 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--type", + "alternatives" : [ + "-t" + ], + "description" : "Feature type (3rd column in GTF file) to be used, all features of other type are ignored (default, suitable for Ensembl GTF files: exon)", + "info" : { + "step" : "htseq", + "orig_arg" : "--type" + }, + "example" : [ + "exon" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--id_attribute", + "alternatives" : [ + "-i" + ], + "description" : "GTF attribute to be used as feature ID (default, suitable for Ensembl GTF files: gene_id).\nAll feature of the right type (see -t option) within the same GTF attribute will be added\ntogether. The typical way of using this option is to count all exonic reads from each gene\nand add the exons but other uses are possible as well. You can call this option multiple\ntimes: in that case, the combination of all attributes separated by colons (:) will be used\nas a unique identifier, e.g. for exons you might use -i gene_id -i exon_number.\n", + "info" : { + "step" : "htseq", + "orig_arg" : "--idattr" + }, + "example" : [ + "gene_id" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--additional_attributes", + "description" : "Additional feature attributes (suitable for Ensembl GTF files: gene_name). Use multiple times\nfor more than one additional attribute. These attributes are only used as annotations in the\noutput, while the determination of how the counts are added together is done based on option -i.\n", + "info" : { + "step" : "htseq", + "orig_arg" : "--additional-attr" + }, + "example" : [ + "gene_name" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "boolean_true", + "name" : "--add_chromosome_info", + "description" : "Store information about the chromosome of each feature as an additional attribute\n(e.g. colunm in the TSV output file).\n", + "info" : { + "step" : "htseq", + "orig_arg" : "--add-chromosome-info" + }, + "direction" : "input", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--mode", + "alternatives" : [ + "-m" + ], + "description" : "Mode to handle reads overlapping more than one feature.", + "info" : { + "step" : "htseq", + "orig_arg" : "--mode" + }, + "default" : [ + "union" + ], + "required" : false, + "choices" : [ + "union", + "intersection-strict", + "intersection-nonempty" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--non_unique", + "description" : "Whether and how to score reads that are not uniquely aligned or ambiguously assigned to features.", + "info" : { + "step" : "htseq", + "orig_arg" : "--nonunique" + }, + "default" : [ + "none" + ], + "required" : false, + "choices" : [ + "none", + "all", + "fraction", + "random" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--secondary_alignments", + "description" : "Whether to score secondary alignments (0x100 flag).", + "info" : { + "step" : "htseq", + "orig_arg" : "--secondary-alignments" + }, + "required" : false, + "choices" : [ + "score", + "ignore" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--supplementary_alignments", + "description" : "Whether to score supplementary alignments (0x800 flag).", + "info" : { + "step" : "htseq", + "orig_arg" : "--supplementary-alignments" + }, + "required" : false, + "choices" : [ + "score", + "ignore" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "boolean_true", + "name" : "--counts_output_sparse", + "description" : "Store the counts as a sparse matrix (mtx, h5ad, loom).", + "info" : { + "step" : "htseq", + "orig_arg" : "--counts-output-sparse" + }, + "direction" : "input", + "dest" : "par" + } + ] + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/multi_star/" + } + ], + "description" : "Align fastq files using STAR.", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/multi_star/" + }, + { + "type" : "file", + "path" : "resources_test/cellranger_tiny_fastq", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.10-slim", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "docker", + "env" : [ + "STAR_VERSION 2.7.10b", + "PACKAGES gcc g++ make wget zlib1g-''' + '''dev unzip" + ] + }, + { + "type" : "docker", + "run" : [ + "apt-get update && \\\\\n apt-get install -y --no-install-recommends ${PACKAGES} && \\\\\n cd /tmp && \\\\\n wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip && \\\\\n unzip ${STAR_VERSION}.zip && \\\\\n cd STAR-${STAR_VERSION}/source && \\\\\n make STARstatic CXXFLAGS_SIMD=-std=c++11 && \\\\\n cp STAR /usr/local/bin && \\\\\n cd / && \\\\\n rm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip && \\\\\n apt-get --purge autoremove -y ${PACKAGES} && \\\\\n apt-get clean\n" + ] + }, + { + "type" : "apt", + "packages" : [ + "samtools", + "procps" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "pyyaml", + "HTSeq", + "multiprocess", + "gtfparse<2.0", + "pandas", + "multiqc~=1.15.0" + ], + "upgrade" : true + } + ], + "test_setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "pytest" + ], + "upgrade" : true + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "highmem", + "highcpu" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/mapping/multi_star/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/multi_star", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +from typing import Any, Dict, List, Tuple +import math +import tempfile +import subprocess +import tarfile +import gzip +import shutil +from pathlib import Path +import yaml +import pandas as pd +from multiprocess import Pool +import gtfparse + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input_id': $( if [ ! -z ${VIASH_PAR_INPUT_ID+x} ]; then echo "r'${VIASH_PAR_INPUT_ID//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'input_r1': $( if [ ! -z ${VIASH_PAR_INPUT_R1+x} ]; then echo "r'${VIASH_PAR_INPUT_R1//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'input_r2': $( if [ ! -z ${VIASH_PAR_INPUT_R2+x} ]; then echo "r'${VIASH_PAR_INPUT_R2//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'reference_index': $( if [ ! -z ${VIASH_PAR_REFERENCE_INDEX+x} ]; then echo "r'${VIASH_PAR_REFERENCE_INDEX//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'reference_gtf': $( if [ ! -z ${VIASH_PAR_REFERENCE_GTF+x} ]; then echo "r'${VIASH_PAR_REFERENCE_GTF//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'run_htseq_count': $( if [ ! -z ${VIASH_PAR_RUN_HTSEQ_COUNT+x} ]; then echo "r'${VIASH_PAR_RUN_HTSEQ_COUNT//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), + 'run_multiqc': $( if [ ! -z ${VIASH_PAR_RUN_MULTIQC+x} ]; then echo "r'${VIASH_PAR_RUN_MULTIQC//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), + 'min_success_rate': $( if [ ! -z ${VIASH_PAR_MIN_SUCCESS_RATE+x} ]; then echo "float(r'${VIASH_PAR_MIN_SUCCESS_RATE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'runRNGseed': $( if [ ! -z ${VIASH_PAR_RUNRNGSEED+x} ]; then echo "int(r'${VIASH_PAR_RUNRNGSEED//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'genomeFastaFiles': $( if [ ! -z ${VIASH_PAR_GENOMEFASTAFILES+x} ]; then echo "r'${VIASH_PAR_GENOMEFASTAFILES//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'sjdbFileChrStartEnd': $( if [ ! -z ${VIASH_PAR_SJDBFILECHRSTARTEND+x} ]; then echo "r'${VIASH_PAR_SJDBFILECHRSTARTEND//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'sjdbGTFfile': $( if [ ! -z ${VIASH_PAR_SJDBGTFFILE+x} ]; then echo "r'${VIASH_PAR_SJDBGTFFILE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'sjdbGTFchrPrefix': $( if [ ! -z ${VIASH_PAR_SJDBGTFCHRPREFIX+x} ]; then echo "r'${VIASH_PAR_SJDBGTFCHRPREFIX//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'sjdbGTFfeatureExon': $( if [ ! -z ${VIASH_PAR_SJDBGTFFEATUREEXON+x} ]; then echo "r'${VIASH_PAR_SJDBGTFFEATUREEXON//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'sjdbGTFtagExonParentTranscript': $( if [ ! -z ${VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT+x} ]; then echo "r'${VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'sjdbGTFtagExonParentGene': $( if [ ! -z ${VIASH_PAR_SJDBGTFTAGEXONPARENTGENE+x} ]; then echo "r'${VIASH_PAR_SJDBGTFTAGEXONPARENTGENE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'sjdbGTFtagExonParentGeneName': $( if [ ! -z ${VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME+x} ]; then echo "r'${VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'sjdbGTFtagExonParentGeneType': $( if [ ! -z ${VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE+x} ]; then echo "r'${VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'sjdbOverhang': $( if [ ! -z ${VIASH_PAR_SJDBOVERHANG+x} ]; then echo "int(r'${VIASH_PAR_SJDBOVERHANG//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'sjdbScore': $( if [ ! -z ${VIASH_PAR_SJDBSCORE+x} ]; then echo "int(r'${VIASH_PAR_SJDBSCORE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'sjdbInsertSave': $( if [ ! -z ${VIASH_PAR_SJDBINSERTSAVE+x} ]; then echo "r'${VIASH_PAR_SJDBINSERTSAVE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'varVCFfile': $( if [ ! -z ${VIASH_PAR_VARVCFFILE+x} ]; then echo "r'${VIASH_PAR_VARVCFFILE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'readFilesType': $( if [ ! -z ${VIASH_PAR_READFILESTYPE+x} ]; then echo "r'${VIASH_PAR_READFILESTYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'readFilesSAMattrKeep': $( if [ ! -z ${VIASH_PAR_READFILESSAMATTRKEEP+x} ]; then echo "r'${VIASH_PAR_READFILESSAMATTRKEEP//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'readFilesManifest': $( if [ ! -z ${VIASH_PAR_READFILESMANIFEST+x} ]; then echo "r'${VIASH_PAR_READFILESMANIFEST//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'readFilesPrefix': $( if [ ! -z ${VIASH_PAR_READFILESPREFIX+x} ]; then echo "r'${VIASH_PAR_READFILESPREFIX//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'readFilesCommand': $( if [ ! -z ${VIASH_PAR_READFILESCOMMAND+x} ]; then echo "r'${VIASH_PAR_READFILESCOMMAND//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'readMapNumber': $( if [ ! -z ${VIASH_PAR_READMAPNUMBER+x} ]; then echo "int(r'${VIASH_PAR_READMAPNUMBER//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'readMatesLengthsIn': $( if [ ! -z ${VIASH_PAR_READMATESLENGTHSIN+x} ]; then echo "r'${VIASH_PAR_READMATESLENGTHSIN//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'readNameSeparator': $( if [ ! -z ${VIASH_PAR_READNAMESEPARATOR+x} ]; then echo "r'${VIASH_PAR_READNAMESEPARATOR//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'readQualityScoreBase': $( if [ ! -z ${VIASH_PAR_READQUALITYSCOREBASE+x} ]; then echo "int(r'${VIASH_PAR_READQUALITYSCOREBASE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'clipAdapterType': $( if [ ! -z ${VIASH_PAR_CLIPADAPTERTYPE+x} ]; then echo "r'${VIASH_PAR_CLIPADAPTERTYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'clip3pNbases': $( if [ ! -z ${VIASH_PAR_CLIP3PNBASES+x} ]; then echo "list(map(int, r'${VIASH_PAR_CLIP3PNBASES//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'clip3pAdapterSeq': $( if [ ! -z ${VIASH_PAR_CLIP3PADAPTERSEQ+x} ]; then echo "r'${VIASH_PAR_CLIP3PADAPTERSEQ//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'clip3pAdapterMMp': $( if [ ! -z ${VIASH_PAR_CLIP3PADAPTERMMP+x} ]; then echo "list(map(float, r'${VIASH_PAR_CLIP3PADAPTERMMP//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'clip3pAfterAdapterNbases': $( if [ ! -z ${VIASH_PAR_CLIP3PAFTERADAPTERNBASES+x} ]; then echo "list(map(int, r'${VIASH_PAR_CLIP3PAFTERADAPTERNBASES//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'clip5pNbases': $( if [ ! -z ${VIASH_PAR_CLIP5PNBASES+x} ]; then echo "list(map(int, r'${VIASH_PAR_CLIP5PNBASES//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'limitGenomeGenerateRAM': $( if [ ! -z ${VIASH_PAR_LIMITGENOMEGENERATERAM+x} ]; then echo "int(r'${VIASH_PAR_LIMITGENOMEGENERATERAM//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'limitIObufferSize': $( if [ ! -z ${VIASH_PAR_LIMITIOBUFFERSIZE+x} ]; then echo "list(map(int, r'${VIASH_PAR_LIMITIOBUFFERSIZE//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'limitOutSAMoneReadBytes': $( if [ ! -z ${VIASH_PAR_LIMITOUTSAMONEREADBYTES+x} ]; then echo "int(r'${VIASH_PAR_LIMITOUTSAMONEREADBYTES//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'limitOutSJoneRead': $( if [ ! -z ${VIASH_PAR_LIMITOUTSJONEREAD+x} ]; then echo "int(r'${VIASH_PAR_LIMITOUTSJONEREAD//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'limitOutSJcollapsed': $( if [ ! -z ${VIASH_PAR_LIMITOUTSJCOLLAPSED+x} ]; then echo "int(r'${VIASH_PAR_LIMITOUTSJCOLLAPSED//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'limitBAMsortRAM': $( if [ ! -z ${VIASH_PAR_LIMITBAMSORTRAM+x} ]; then echo "int(r'${VIASH_PAR_LIMITBAMSORTRAM//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'limitSjdbInsertNsj': $( if [ ! -z ${VIASH_PAR_LIMITSJDBINSERTNSJ+x} ]; then echo "int(r'${VIASH_PAR_LIMITSJDBINSERTNSJ//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'limitNreadsSoft': $( if [ ! -z ${VIASH_PAR_LIMITNREADSSOFT+x} ]; then echo "int(r'${VIASH_PAR_LIMITNREADSSOFT//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outTmpKeep': $( if [ ! -z ${VIASH_PAR_OUTTMPKEEP+x} ]; then echo "r'${VIASH_PAR_OUTTMPKEEP//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outStd': $( if [ ! -z ${VIASH_PAR_OUTSTD+x} ]; then echo "r'${VIASH_PAR_OUTSTD//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outReadsUnmapped': $( if [ ! -z ${VIASH_PAR_OUTREADSUNMAPPED+x} ]; then echo "r'${VIASH_PAR_OUTREADSUNMAPPED//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outQSconversionAdd': $( if [ ! -z ${VIASH_PAR_OUTQSCONVERSIONADD+x} ]; then echo "int(r'${VIASH_PAR_OUTQSCONVERSIONADD//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outMultimapperOrder': $( if [ ! -z ${VIASH_PAR_OUTMULTIMAPPERORDER+x} ]; then echo "r'${VIASH_PAR_OUTMULTIMAPPERORDER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outSAMmode': $( if [ ! -z ${VIASH_PAR_OUTSAMMODE+x} ]; then echo "r'${VIASH_PAR_OUTSAMMODE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outSAMstrandField': $( if [ ! -z ${VIASH_PAR_OUTSAMSTRANDFIELD+x} ]; then echo "r'${VIASH_PAR_OUTSAMSTRANDFIELD//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outSAMattributes': $( if [ ! -z ${VIASH_PAR_OUTSAMATTRIBUTES+x} ]; then echo "r'${VIASH_PAR_OUTSAMATTRIBUTES//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'outSAMattrIHstart': $( if [ ! -z ${VIASH_PAR_OUTSAMATTRIHSTART+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMATTRIHSTART//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outSAMunmapped': $( if [ ! -z ${VIASH_PAR_OUTSAMUNMAPPED+x} ]; then echo "r'${VIASH_PAR_OUTSAMUNMAPPED//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'outSAMorder': $( if [ ! -z ${VIASH_PAR_OUTSAMORDER+x} ]; then echo "r'${VIASH_PAR_OUTSAMORDER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outSAMprimaryFlag': $( if [ ! -z ${VIASH_PAR_OUTSAMPRIMARYFLAG+x} ]; then echo "r'${VIASH_PAR_OUTSAMPRIMARYFLAG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outSAMreadID': $( if [ ! -z ${VIASH_PAR_OUTSAMREADID+x} ]; then echo "r'${VIASH_PAR_OUTSAMREADID//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outSAMmapqUnique': $( if [ ! -z ${VIASH_PAR_OUTSAMMAPQUNIQUE+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMMAPQUNIQUE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outSAMflagOR': $( if [ ! -z ${VIASH_PAR_OUTSAMFLAGOR+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMFLAGOR//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outSAMflagAND': $( if [ ! -z ${VIASH_PAR_OUTSAMFLAGAND+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMFLAGAND//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outSAMattrRGline': $( if [ ! -z ${VIASH_PAR_OUTSAMATTRRGLINE+x} ]; then echo "r'${VIASH_PAR_OUTSAMATTRRGLINE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'outSAMheaderHD': $( if [ ! -z ${VIASH_PAR_OUTSAMHEADERHD+x} ]; then echo "r'${VIASH_PAR_OUTSAMHEADERHD//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'outSAMheaderPG': $( if [ ! -z ${VIASH_PAR_OUTSAMHEADERPG+x} ]; then echo "r'${VIASH_PAR_OUTSAMHEADERPG//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'outSAMheaderCommentFile': $( if [ ! -z ${VIASH_PAR_OUTSAMHEADERCOMMENTFILE+x} ]; then echo "r'${VIASH_PAR_OUTSAMHEADERCOMMENTFILE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outSAMfilter': $( if [ ! -z ${VIASH_PAR_OUTSAMFILTER+x} ]; then echo "r'${VIASH_PAR_OUTSAMFILTER//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'outSAMmultNmax': $( if [ ! -z ${VIASH_PAR_OUTSAMMULTNMAX+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMMULTNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outSAMtlen': $( if [ ! -z ${VIASH_PAR_OUTSAMTLEN+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMTLEN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outBAMcompression': $( if [ ! -z ${VIASH_PAR_OUTBAMCOMPRESSION+x} ]; then echo "int(r'${VIASH_PAR_OUTBAMCOMPRESSION//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outBAMsortingThreadN': $( if [ ! -z ${VIASH_PAR_OUTBAMSORTINGTHREADN+x} ]; then echo "int(r'${VIASH_PAR_OUTBAMSORTINGTHREADN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outBAMsortingBinsN': $( if [ ! -z ${VIASH_PAR_OUTBAMSORTINGBINSN+x} ]; then echo "int(r'${VIASH_PAR_OUTBAMSORTINGBINSN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'bamRemoveDuplicatesType': $( if [ ! -z ${VIASH_PAR_BAMREMOVEDUPLICATESTYPE+x} ]; then echo "r'${VIASH_PAR_BAMREMOVEDUPLICATESTYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'bamRemoveDuplicatesMate2basesN': $( if [ ! -z ${VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN+x} ]; then echo "int(r'${VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outWigType': $( if [ ! -z ${VIASH_PAR_OUTWIGTYPE+x} ]; then echo "r'${VIASH_PAR_OUTWIGTYPE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'outWigStrand': $( if [ ! -z ${VIASH_PAR_OUTWIGSTRAND+x} ]; then echo "r'${VIASH_PAR_OUTWIGSTRAND//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outWigReferencesPrefix': $( if [ ! -z ${VIASH_PAR_OUTWIGREFERENCESPREFIX+x} ]; then echo "r'${VIASH_PAR_OUTWIGREFERENCESPREFIX//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outWigNorm': $( if [ ! -z ${VIASH_PAR_OUTWIGNORM+x} ]; then echo "r'${VIASH_PAR_OUTWIGNORM//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outFilterType': $( if [ ! -z ${VIASH_PAR_OUTFILTERTYPE+x} ]; then echo "r'${VIASH_PAR_OUTFILTERTYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outFilterMultimapScoreRange': $( if [ ! -z ${VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outFilterMultimapNmax': $( if [ ! -z ${VIASH_PAR_OUTFILTERMULTIMAPNMAX+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERMULTIMAPNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outFilterMismatchNmax': $( if [ ! -z ${VIASH_PAR_OUTFILTERMISMATCHNMAX+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERMISMATCHNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outFilterMismatchNoverLmax': $( if [ ! -z ${VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX+x} ]; then echo "float(r'${VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outFilterMismatchNoverReadLmax': $( if [ ! -z ${VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX+x} ]; then echo "float(r'${VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outFilterScoreMin': $( if [ ! -z ${VIASH_PAR_OUTFILTERSCOREMIN+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERSCOREMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outFilterScoreMinOverLread': $( if [ ! -z ${VIASH_PAR_OUTFILTERSCOREMINOVERLREAD+x} ]; then echo "float(r'${VIASH_PAR_OUTFILTERSCOREMINOVERLREAD//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outFilterMatchNmin': $( if [ ! -z ${VIASH_PAR_OUTFILTERMATCHNMIN+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERMATCHNMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outFilterMatchNminOverLread': $( if [ ! -z ${VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD+x} ]; then echo "float(r'${VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outFilterIntronMotifs': $( if [ ! -z ${VIASH_PAR_OUTFILTERINTRONMOTIFS+x} ]; then echo "r'${VIASH_PAR_OUTFILTERINTRONMOTIFS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outFilterIntronStrands': $( if [ ! -z ${VIASH_PAR_OUTFILTERINTRONSTRANDS+x} ]; then echo "r'${VIASH_PAR_OUTFILTERINTRONSTRANDS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outSJtype': $( if [ ! -z ${VIASH_PAR_OUTSJTYPE+x} ]; then echo "r'${VIASH_PAR_OUTSJTYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outSJfilterReads': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERREADS+x} ]; then echo "r'${VIASH_PAR_OUTSJFILTERREADS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outSJfilterOverhangMin': $( if [ ! -z ${VIASH_PAR_OUTSJFILTEROVERHANGMIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTEROVERHANGMIN//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'outSJfilterCountUniqueMin': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'outSJfilterCountTotalMin': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'outSJfilterDistToOtherSJmin': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'outSJfilterIntronMaxVsReadN': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'scoreGap': $( if [ ! -z ${VIASH_PAR_SCOREGAP+x} ]; then echo "int(r'${VIASH_PAR_SCOREGAP//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'scoreGapNoncan': $( if [ ! -z ${VIASH_PAR_SCOREGAPNONCAN+x} ]; then echo "int(r'${VIASH_PAR_SCOREGAPNONCAN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'scoreGapGCAG': $( if [ ! -z ${VIASH_PAR_SCOREGAPGCAG+x} ]; then echo "int(r'${VIASH_PAR_SCOREGAPGCAG//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'scoreGapATAC': $( if [ ! -z ${VIASH_PAR_SCOREGAPATAC+x} ]; then echo "int(r'${VIASH_PAR_SCOREGAPATAC//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'scoreGenomicLengthLog2scale': $( if [ ! -z ${VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE+x} ]; then echo "int(r'${VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'scoreDelOpen': $( if [ ! -z ${VIASH_PAR_SCOREDELOPEN+x} ]; then echo "int(r'${VIASH_PAR_SCOREDELOPEN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'scoreDelBase': $( if [ ! -z ${VIASH_PAR_SCOREDELBASE+x} ]; then echo "int(r'${VIASH_PAR_SCOREDELBASE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'scoreInsOpen': $( if [ ! -z ${VIASH_PAR_SCOREINSOPEN+x} ]; then echo "int(r'${VIASH_PAR_SCOREINSOPEN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'scoreInsBase': $( if [ ! -z ${VIASH_PAR_SCOREINSBASE+x} ]; then echo "int(r'${VIASH_PAR_SCOREINSBASE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'scoreStitchSJshift': $( if [ ! -z ${VIASH_PAR_SCORESTITCHSJSHIFT+x} ]; then echo "int(r'${VIASH_PAR_SCORESTITCHSJSHIFT//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'seedSearchStartLmax': $( if [ ! -z ${VIASH_PAR_SEEDSEARCHSTARTLMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDSEARCHSTARTLMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'seedSearchStartLmaxOverLread': $( if [ ! -z ${VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD+x} ]; then echo "float(r'${VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'seedSearchLmax': $( if [ ! -z ${VIASH_PAR_SEEDSEARCHLMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDSEARCHLMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'seedMultimapNmax': $( if [ ! -z ${VIASH_PAR_SEEDMULTIMAPNMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDMULTIMAPNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'seedPerReadNmax': $( if [ ! -z ${VIASH_PAR_SEEDPERREADNMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDPERREADNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'seedPerWindowNmax': $( if [ ! -z ${VIASH_PAR_SEEDPERWINDOWNMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDPERWINDOWNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'seedNoneLociPerWindow': $( if [ ! -z ${VIASH_PAR_SEEDNONELOCIPERWINDOW+x} ]; then echo "int(r'${VIASH_PAR_SEEDNONELOCIPERWINDOW//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'seedSplitMin': $( if [ ! -z ${VIASH_PAR_SEEDSPLITMIN+x} ]; then echo "int(r'${VIASH_PAR_SEEDSPLITMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'seedMapMin': $( if [ ! -z ${VIASH_PAR_SEEDMAPMIN+x} ]; then echo "int(r'${VIASH_PAR_SEEDMAPMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'alignIntronMin': $( if [ ! -z ${VIASH_PAR_ALIGNINTRONMIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGNINTRONMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'alignIntronMax': $( if [ ! -z ${VIASH_PAR_ALIGNINTRONMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNINTRONMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'alignMatesGapMax': $( if [ ! -z ${VIASH_PAR_ALIGNMATESGAPMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNMATESGAPMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'alignSJoverhangMin': $( if [ ! -z ${VIASH_PAR_ALIGNSJOVERHANGMIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGNSJOVERHANGMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'alignSJstitchMismatchNmax': $( if [ ! -z ${VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX+x} ]; then echo "list(map(int, r'${VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'alignSJDBoverhangMin': $( if [ ! -z ${VIASH_PAR_ALIGNSJDBOVERHANGMIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGNSJDBOVERHANGMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'alignSplicedMateMapLmin': $( if [ ! -z ${VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'alignSplicedMateMapLminOverLmate': $( if [ ! -z ${VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE+x} ]; then echo "float(r'${VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'alignWindowsPerReadNmax': $( if [ ! -z ${VIASH_PAR_ALIGNWINDOWSPERREADNMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNWINDOWSPERREADNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'alignTranscriptsPerWindowNmax': $( if [ ! -z ${VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'alignTranscriptsPerReadNmax': $( if [ ! -z ${VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'alignEndsType': $( if [ ! -z ${VIASH_PAR_ALIGNENDSTYPE+x} ]; then echo "r'${VIASH_PAR_ALIGNENDSTYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'alignEndsProtrude': $( if [ ! -z ${VIASH_PAR_ALIGNENDSPROTRUDE+x} ]; then echo "r'${VIASH_PAR_ALIGNENDSPROTRUDE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'alignSoftClipAtReferenceEnds': $( if [ ! -z ${VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS+x} ]; then echo "r'${VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'alignInsertionFlush': $( if [ ! -z ${VIASH_PAR_ALIGNINSERTIONFLUSH+x} ]; then echo "r'${VIASH_PAR_ALIGNINSERTIONFLUSH//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'peOverlapNbasesMin': $( if [ ! -z ${VIASH_PAR_PEOVERLAPNBASESMIN+x} ]; then echo "int(r'${VIASH_PAR_PEOVERLAPNBASESMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'peOverlapMMp': $( if [ ! -z ${VIASH_PAR_PEOVERLAPMMP+x} ]; then echo "float(r'${VIASH_PAR_PEOVERLAPMMP//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'winAnchorMultimapNmax': $( if [ ! -z ${VIASH_PAR_WINANCHORMULTIMAPNMAX+x} ]; then echo "int(r'${VIASH_PAR_WINANCHORMULTIMAPNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'winBinNbits': $( if [ ! -z ${VIASH_PAR_WINBINNBITS+x} ]; then echo "int(r'${VIASH_PAR_WINBINNBITS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'winAnchorDistNbins': $( if [ ! -z ${VIASH_PAR_WINANCHORDISTNBINS+x} ]; then echo "int(r'${VIASH_PAR_WINANCHORDISTNBINS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'winFlankNbins': $( if [ ! -z ${VIASH_PAR_WINFLANKNBINS+x} ]; then echo "int(r'${VIASH_PAR_WINFLANKNBINS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'winReadCoverageRelativeMin': $( if [ ! -z ${VIASH_PAR_WINREADCOVERAGERELATIVEMIN+x} ]; then echo "float(r'${VIASH_PAR_WINREADCOVERAGERELATIVEMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'winReadCoverageBasesMin': $( if [ ! -z ${VIASH_PAR_WINREADCOVERAGEBASESMIN+x} ]; then echo "int(r'${VIASH_PAR_WINREADCOVERAGEBASESMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chimOutType': $( if [ ! -z ${VIASH_PAR_CHIMOUTTYPE+x} ]; then echo "r'${VIASH_PAR_CHIMOUTTYPE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'chimSegmentMin': $( if [ ! -z ${VIASH_PAR_CHIMSEGMENTMIN+x} ]; then echo "int(r'${VIASH_PAR_CHIMSEGMENTMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chimScoreMin': $( if [ ! -z ${VIASH_PAR_CHIMSCOREMIN+x} ]; then echo "int(r'${VIASH_PAR_CHIMSCOREMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chimScoreDropMax': $( if [ ! -z ${VIASH_PAR_CHIMSCOREDROPMAX+x} ]; then echo "int(r'${VIASH_PAR_CHIMSCOREDROPMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chimScoreSeparation': $( if [ ! -z ${VIASH_PAR_CHIMSCORESEPARATION+x} ]; then echo "int(r'${VIASH_PAR_CHIMSCORESEPARATION//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chimScoreJunctionNonGTAG': $( if [ ! -z ${VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG+x} ]; then echo "int(r'${VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chimJunctionOverhangMin': $( if [ ! -z ${VIASH_PAR_CHIMJUNCTIONOVERHANGMIN+x} ]; then echo "int(r'${VIASH_PAR_CHIMJUNCTIONOVERHANGMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chimSegmentReadGapMax': $( if [ ! -z ${VIASH_PAR_CHIMSEGMENTREADGAPMAX+x} ]; then echo "int(r'${VIASH_PAR_CHIMSEGMENTREADGAPMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chimFilter': $( if [ ! -z ${VIASH_PAR_CHIMFILTER+x} ]; then echo "r'${VIASH_PAR_CHIMFILTER//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'chimMainSegmentMultNmax': $( if [ ! -z ${VIASH_PAR_CHIMMAINSEGMENTMULTNMAX+x} ]; then echo "int(r'${VIASH_PAR_CHIMMAINSEGMENTMULTNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chimMultimapNmax': $( if [ ! -z ${VIASH_PAR_CHIMMULTIMAPNMAX+x} ]; then echo "int(r'${VIASH_PAR_CHIMMULTIMAPNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chimMultimapScoreRange': $( if [ ! -z ${VIASH_PAR_CHIMMULTIMAPSCORERANGE+x} ]; then echo "int(r'${VIASH_PAR_CHIMMULTIMAPSCORERANGE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chimNonchimScoreDropMin': $( if [ ! -z ${VIASH_PAR_CHIMNONCHIMSCOREDROPMIN+x} ]; then echo "int(r'${VIASH_PAR_CHIMNONCHIMSCOREDROPMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chimOutJunctionFormat': $( if [ ! -z ${VIASH_PAR_CHIMOUTJUNCTIONFORMAT+x} ]; then echo "int(r'${VIASH_PAR_CHIMOUTJUNCTIONFORMAT//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'quantMode': $( if [ ! -z ${VIASH_PAR_QUANTMODE+x} ]; then echo "r'${VIASH_PAR_QUANTMODE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'quantTranscriptomeBAMcompression': $( if [ ! -z ${VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION+x} ]; then echo "int(r'${VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'quantTranscriptomeBan': $( if [ ! -z ${VIASH_PAR_QUANTTRANSCRIPTOMEBAN+x} ]; then echo "r'${VIASH_PAR_QUANTTRANSCRIPTOMEBAN//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'twopassMode': $( if [ ! -z ${VIASH_PAR_TWOPASSMODE+x} ]; then echo "r'${VIASH_PAR_TWOPASSMODE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'twopass1readsN': $( if [ ! -z ${VIASH_PAR_TWOPASS1READSN+x} ]; then echo "int(r'${VIASH_PAR_TWOPASS1READSN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'waspOutputMode': $( if [ ! -z ${VIASH_PAR_WASPOUTPUTMODE+x} ]; then echo "r'${VIASH_PAR_WASPOUTPUTMODE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'soloType': $( if [ ! -z ${VIASH_PAR_SOLOTYPE+x} ]; then echo "r'${VIASH_PAR_SOLOTYPE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'soloCBwhitelist': $( if [ ! -z ${VIASH_PAR_SOLOCBWHITELIST+x} ]; then echo "r'${VIASH_PAR_SOLOCBWHITELIST//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'soloCBstart': $( if [ ! -z ${VIASH_PAR_SOLOCBSTART+x} ]; then echo "int(r'${VIASH_PAR_SOLOCBSTART//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'soloCBlen': $( if [ ! -z ${VIASH_PAR_SOLOCBLEN+x} ]; then echo "int(r'${VIASH_PAR_SOLOCBLEN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'soloUMIstart': $( if [ ! -z ${VIASH_PAR_SOLOUMISTART+x} ]; then echo "int(r'${VIASH_PAR_SOLOUMISTART//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'soloUMIlen': $( if [ ! -z ${VIASH_PAR_SOLOUMILEN+x} ]; then echo "int(r'${VIASH_PAR_SOLOUMILEN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'soloBarcodeReadLength': $( if [ ! -z ${VIASH_PAR_SOLOBARCODEREADLENGTH+x} ]; then echo "int(r'${VIASH_PAR_SOLOBARCODEREADLENGTH//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'soloBarcodeMate': $( if [ ! -z ${VIASH_PAR_SOLOBARCODEMATE+x} ]; then echo "int(r'${VIASH_PAR_SOLOBARCODEMATE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'soloCBposition': $( if [ ! -z ${VIASH_PAR_SOLOCBPOSITION+x} ]; then echo "r'${VIASH_PAR_SOLOCBPOSITION//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'soloUMIposition': $( if [ ! -z ${VIASH_PAR_SOLOUMIPOSITION+x} ]; then echo "r'${VIASH_PAR_SOLOUMIPOSITION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'soloAdapterSequence': $( if [ ! -z ${VIASH_PAR_SOLOADAPTERSEQUENCE+x} ]; then echo "r'${VIASH_PAR_SOLOADAPTERSEQUENCE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'soloAdapterMismatchesNmax': $( if [ ! -z ${VIASH_PAR_SOLOADAPTERMISMATCHESNMAX+x} ]; then echo "int(r'${VIASH_PAR_SOLOADAPTERMISMATCHESNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'soloCBmatchWLtype': $( if [ ! -z ${VIASH_PAR_SOLOCBMATCHWLTYPE+x} ]; then echo "r'${VIASH_PAR_SOLOCBMATCHWLTYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'soloInputSAMattrBarcodeSeq': $( if [ ! -z ${VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ+x} ]; then echo "r'${VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'soloInputSAMattrBarcodeQual': $( if [ ! -z ${VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL+x} ]; then echo "r'${VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'soloStrand': $( if [ ! -z ${VIASH_PAR_SOLOSTRAND+x} ]; then echo "r'${VIASH_PAR_SOLOSTRAND//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'soloFeatures': $( if [ ! -z ${VIASH_PAR_SOLOFEATURES+x} ]; then echo "r'${VIASH_PAR_SOLOFEATURES//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'soloMultiMappers': $( if [ ! -z ${VIASH_PAR_SOLOMULTIMAPPERS+x} ]; then echo "r'${VIASH_PAR_SOLOMULTIMAPPERS//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'soloUMIdedup': $( if [ ! -z ${VIASH_PAR_SOLOUMIDEDUP+x} ]; then echo "r'${VIASH_PAR_SOLOUMIDEDUP//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'soloUMIfiltering': $( if [ ! -z ${VIASH_PAR_SOLOUMIFILTERING+x} ]; then echo "r'${VIASH_PAR_SOLOUMIFILTERING//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'soloOutFileNames': $( if [ ! -z ${VIASH_PAR_SOLOOUTFILENAMES+x} ]; then echo "r'${VIASH_PAR_SOLOOUTFILENAMES//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'soloCellFilter': $( if [ ! -z ${VIASH_PAR_SOLOCELLFILTER+x} ]; then echo "r'${VIASH_PAR_SOLOCELLFILTER//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'soloOutFormatFeaturesGeneField3': $( if [ ! -z ${VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3+x} ]; then echo "r'${VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'soloCellReadStats': $( if [ ! -z ${VIASH_PAR_SOLOCELLREADSTATS+x} ]; then echo "r'${VIASH_PAR_SOLOCELLREADSTATS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'stranded': $( if [ ! -z ${VIASH_PAR_STRANDED+x} ]; then echo "r'${VIASH_PAR_STRANDED//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'minimum_alignment_quality': $( if [ ! -z ${VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY+x} ]; then echo "int(r'${VIASH_PAR_MINIMUM_ALIGNMENT_QUALITY//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'type': $( if [ ! -z ${VIASH_PAR_TYPE+x} ]; then echo "r'${VIASH_PAR_TYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'id_attribute': $( if [ ! -z ${VIASH_PAR_ID_ATTRIBUTE+x} ]; then echo "r'${VIASH_PAR_ID_ATTRIBUTE//\\'/\\'\\"\\'\\"r\\'}'.split(':')"; else echo None; fi ), + 'additional_attributes': $( if [ ! -z ${VIASH_PAR_ADDITIONAL_ATTRIBUTES+x} ]; then echo "r'${VIASH_PAR_ADDITIONAL_ATTRIBUTES//\\'/\\'\\"\\'\\"r\\'}'.split(':')"; else echo None; fi ), + 'add_chromosome_info': $( if [ ! -z ${VIASH_PAR_ADD_CHROMOSOME_INFO+x} ]; then echo "r'${VIASH_PAR_ADD_CHROMOSOME_INFO//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), + 'mode': $( if [ ! -z ${VIASH_PAR_MODE+x} ]; then echo "r'${VIASH_PAR_MODE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'non_unique': $( if [ ! -z ${VIASH_PAR_NON_UNIQUE+x} ]; then echo "r'${VIASH_PAR_NON_UNIQUE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'secondary_alignments': $( if [ ! -z ${VIASH_PAR_SECONDARY_ALIGNMENTS+x} ]; then echo "r'${VIASH_PAR_SECONDARY_ALIGNMENTS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'supplementary_alignments': $( if [ ! -z ${VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS+x} ]; then echo "r'${VIASH_PAR_SUPPLEMENTARY_ALIGNMENTS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'counts_output_sparse': $( if [ ! -z ${VIASH_PAR_COUNTS_OUTPUT_SPARSE+x} ]; then echo "r'${VIASH_PAR_COUNTS_OUTPUT_SPARSE//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +## VIASH END + +######################## +### Helper functions ### +######################## + + +def fetch_arguments_info(config: Dict[str, Any]) -> Dict[str, Any]: + """Fetch arguments from config""" + arguments = { + arg["name"].removeprefix("-").removeprefix("-"): arg + for group in config["functionality"]["argument_groups"] + for arg in group["arguments"] + } + return arguments + +def process_par( + par: Dict[str, Any], + arguments_info: Dict[str, Any], + gz_args: List[str], + temp_dir: Path +) -> Dict[str, Any]: + """ + Process the Viash par dictionary + + This turns file strings into Path objects and extracting gzipped files if need be. + + Parameters + ---------- + par: The par dictionary created by Viash + arguments_info: The arguments info Dictionary created by \\`fetch_arguments_info\\` + gz_args: A list of argument keys which could be gzip files which need to be decompressed. + temp_dir: A temporary directory in which to ungzip files + """ + new_par = {} + for key, value in par.items(): + arg_info = arguments_info[key] + # turn file arguments into paths + if value and arg_info["type"] == "file": + is_multiple = isinstance(value, list) + + if is_multiple: + value = [Path(val) for val in value] + else: + value = Path(value) + + if key in gz_args: + print(f">> Checking compression of --{key}", flush=True) + # turn value into list if need be + if not is_multiple: + value = [value] + + # extract + value = [extract_if_need_be(path, temp_dir) for path in value] + + # unlist if need be + if not is_multiple: + value = value[0] + + new_par[key] = value + return new_par + +def generate_cmd_arguments(par, arguments_info, step_filter=None, flatten=False): + """ + Generate command-line arguments by fetching the relevant args + + Parameters + ---------- + par: The par dictionary created by Viash + arguments_info: The arguments info Dictionary created by \\`fetch_arguments_info\\` + step_filter: If provided,\\`par\\` will be filtered to only contain arguments for which + argument.info.step == step_filter. + flatten: If \\`False\\`, the command for an argument with multiple values will be + \\`["--key", "value1", "--key", "value2"]\\`, otherwise \\`["--key", "value1", "value2"]\\`. + """ + cmd_args = [] + + for key, arg in arguments_info.items(): + arg_val = par.get(key) + # The info key is always present (changed in viash 0.7.4) + # in the parsed config (None if not specified in source config) + info = arg["info"] or {} + orig_arg = info.get("orig_arg") + step = info.get("step") + if arg_val and orig_arg and (not step_filter or step == step_filter): + if not arg.get("multiple", False): + arg_val = [arg_val] + + if arg["type"] in ["boolean_true", "boolean_false"]: + # if argument is a boolean_true or boolean_false, simply add the flag + arg_val = [orig_arg] + elif orig_arg.startswith("-"): + # if the orig arg flag is not a positional, + # add the flag in front of each element and flatten + if flatten: + arg_val = [str(x) for x in [orig_arg] + arg_val] + else: + arg_val = [str(x) for val in arg_val for x in [orig_arg, val]] + + cmd_args.extend(arg_val) + + return cmd_args + +def is_gz_file(path: Path) -> bool: + """Check whether something is a gzip""" + with open(path, "rb") as file: + return file.read(2) == b"\\\\x1f\\\\x8b" + +def extract_if_need_be(par_value: Path, temp_dir_path: Path) -> Path: + """if {par_value} is a Path, extract it to a temp_dir_path and return the resulting path""" + if par_value.is_file() and tarfile.is_tarfile(par_value): + # Remove two extensions (if they exist) + extaction_dir_name = Path(par_value.stem).stem + unpacked_path = temp_dir_path / extaction_dir_name + print(f" Tar detected; extracting {par_value} to {unpacked_path}", flush=True) + + with tarfile.open(par_value, "r") as open_tar: + members = open_tar.getmembers() + root_dirs = [ + member + for member in members + if member.isdir() and member.name != "." and "/" not in member.name + ] + # if there is only one root_dir (and there are files in that directory) + # strip that directory name from the destination folder + if len(root_dirs) == 1: + for mem in members: + mem.path = Path(*Path(mem.path).parts[1:]) + members_to_move = [mem for mem in members if mem.path != Path(".")] + open_tar.extractall(unpacked_path, members=members_to_move) + return unpacked_path + + elif par_value.is_file() and is_gz_file(par_value): + # Remove extension (if it exists) + extaction_file_name = Path(par_value.stem) + unpacked_path = temp_dir_path / extaction_file_name + print(f" Gzip detected; extracting {par_value} to {unpacked_path}", flush=True) + + with gzip.open(par_value, "rb") as f_in: + with open(unpacked_path, "wb") as f_out: + shutil.copyfileobj(f_in, f_out) + return unpacked_path + + else: + return par_value + +def load_star_reference(reference_index: str) -> None: + """Load star reference index into memory.""" + subprocess.run( + [ + "STAR", + "--genomeLoad", "LoadAndExit", + "--genomeDir", str(reference_index), + ], + check=True + ) + +def unload_star_reference(reference_index: str) -> None: + """Remove star reference index from memory.""" + subprocess.run( + [ + "STAR", + "--genomeLoad", "Remove", + "--genomeDir", str(reference_index), + ], + check=True + ) + +def star_and_htseq( + group_id: str, + r1_files: List[Path], + r2_files: List[Path], + temp_dir: Path, + par: Dict[str, Any], + arguments_info: Dict[str, Any], + num_threads: int +) -> Tuple[int, str] : + star_output = par["output"] / "per" / group_id + temp_dir_group = temp_dir / f"star_tmp_{group_id}" + unsorted_bam = star_output / "Aligned.out.bam" + sorted_bam = star_output / "Aligned.sorted.out.bam" + counts_file = star_output / "htseq-count.txt" + multiqc_path = star_output / "multiqc_data" + + print(f">> Running STAR for group '{group_id}' with command:", flush=True) + star_output.mkdir(parents=True, exist_ok=True) + temp_dir_group.parent.mkdir(parents=True, exist_ok=True) + run_star( + r1_files=r1_files, + r2_files=r2_files, + output_dir=star_output, + temp_dir=temp_dir / f"star_tmp_{group_id}", + par=par, + arguments_info=arguments_info, + num_threads=num_threads + ) + if not unsorted_bam.exists(): + return (1, f"Could not find unsorted bam at '{unsorted_bam}'") + + if par["run_htseq_count"]: + print(f">> Running samtools sort for group '{group_id}' with command:", flush=True) + run_samtools_sort(unsorted_bam, sorted_bam) + if not sorted_bam.exists(): + return (1, f"Could not find sorted bam at '{unsorted_bam}'") + + print(f">> Running htseq-count for group '{group_id}' with command:", flush=True) + run_htseq_count(sorted_bam, counts_file, par, arguments_info) + if not counts_file.exists(): + return (1, f"Could not find counts at '{counts_file}'") + + if par["run_multiqc"]: + run_multiqc(star_output) + if not multiqc_path.exists(): + return (1, f"Could not find MultiQC output at '{multiqc_path}'") + + return (0, "") + +def run_star( + r1_files: List[Path], + r2_files: List[Path], + output_dir: Path, + temp_dir: Path, + par: Dict[str, Any], + arguments_info: Dict[str, Any], + num_threads: int +) -> None: + """Run star""" + # process manual arguments + r1_pasted = [",".join([str(r1) for r1 in r1_files])] + r2_pasted = [",".join([str(r2) for r2 in r2_files])] if r2_files else [] + manual_par = { + "--genomeDir": [par["reference_index"]], + "--genomeLoad": ["LoadAndRemove"], + "--runThreadN": [str(num_threads)], + "--runMode": ["alignReads"], + "--readFilesIn": r1_pasted + r2_pasted, + # create a tempdir per group + "--outTmpDir": [temp_dir], + # make sure there is a trailing / + "--outFileNamePrefix": [f"{output_dir}/"], + # fix the outSAMtype to return unsorted BAM files + "--outSAMtype": ["BAM", "Unsorted"] + } + manual_cmd = [str(x) + for key, values in manual_par.items() + for x in [key] + values + ] + + # process all passthrough star arguments + par_cmd = generate_cmd_arguments(par, arguments_info, "star", flatten=True) + + # combine into one command and turn into strings + cmd_args = [str(val) for val in ["STAR"] + manual_cmd + par_cmd] + + # run star + subprocess.run(cmd_args, check=True) + +def run_samtools_sort( + unsorted_bam: Path, + sorted_bam: Path +) -> None: + "Run samtools sort" + cmd_args = [ + "samtools", + "sort", + "-o", + sorted_bam, + unsorted_bam, + ] + subprocess.run(cmd_args, check=True) + +def run_htseq_count( + sorted_bam: Path, + counts_file: Path, + par: Dict[str, Any], + arguments_info: Dict[str, Any] +) -> None: + """Run HTSeq count""" + # process manual arguments + manual_cmd = [ + sorted_bam, + par["reference_gtf"] + ] + + # process all passthrough htseq arguments + par_cmd = generate_cmd_arguments(par, arguments_info, "htseq") + + # combine into one command and turn into strings + cmd_args = [str(val) for val in ["htseq-count"] + manual_cmd + par_cmd] + + # run htseq + with open(counts_file, "w", encoding="utf-8") as file: + subprocess.run(cmd_args, check=True, stdout=file) + +def get_feature_info(reference_gtf) -> pd.DataFrame: + ref = gtfparse.read_gtf(reference_gtf) + ref_genes = ref.loc[(ref["feature"] == "gene") | (ref["source"] == "ERCC")] + return pd.DataFrame( + { + "feature_id": ref_genes["gene_id"], + "feature_type": "Gene Expression", + "feature_name": ref_genes["gene_name"] + } + ) + +def run_multiqc(input_dir: Path) -> None: + cmd_args = ["multiqc", str(input_dir), "--outdir", str(input_dir), "--no-report", "--force"] + + # run multiqc + subprocess.run(cmd_args, check=True) + + +######################## +### Main code ### +######################## + +def main(par, meta): + """Main function""" + + # check input arguments + assert len(par["input_id"]) == len(par["input_r1"]), "--input_r1 should have same length as --input_id" + if par["input_r2"]: + assert len(par["input_id"]) == len(par["input_r2"]), "--input_r2 should have same length as --input_id" + + # read config arguments + with open(meta["config"], "r", encoding="utf-8") as file: + config = yaml.safe_load(file) + + # fetch all arguments from the config and turn it into a Dict[str, Argument] + arguments_info = fetch_arguments_info(config) + + # temp_dir = "tmp/" + with tempfile.TemporaryDirectory( + prefix=f"{meta['functionality_name']}-", + dir=meta["temp_dir"], + ignore_cleanup_errors=True + ) as temp_dir: + temp_dir = Path(temp_dir) + temp_dir.mkdir(parents=True, exist_ok=True) + + # turn file strings into Paths and decompress gzip if need be + gz_args = ["input_r1", "input_r2", "reference_index", "reference_gtf"] + par = process_par(par, arguments_info, gz_args, temp_dir) + + # make sure input_r2 has same length as input_r1 + if not par["input_r2"]: + par["input_r2"] = [None for _ in par["input_r1"]] + + # group input_files by input_id + print(">> Group by --input_id", flush=True) + grouped_inputs = {} + for group_id, file_r1, file_r2 in zip(par["input_id"], par["input_r1"], par["input_r2"]): + if group_id not in grouped_inputs: + grouped_inputs[group_id] = ([], []) + grouped_inputs[group_id][0].append(file_r1) + if file_r2: + grouped_inputs[group_id][1].append(file_r2) + + # create output dir if need be + par["output"].mkdir(parents=True, exist_ok=True) + + # store features metadata + feature_info = get_feature_info(str(par["reference_gtf"])) + with open(par["output"] / "feature_info.tsv", "w", encoding="utf-8") as file: + feature_info.to_csv(file, sep="\\\\t", index=False) + + # try: + # print(">> Loading genome in memory", flush=True) + # load_star_reference(par["reference_index"]) + + cpus = meta.get("cpus", 1) + num_items = len(grouped_inputs) + pool_size = min(cpus, num_items) + num_threads_per_task = math.ceil(cpus / pool_size) + + with Pool(pool_size) as pool: + outs = pool.starmap( + lambda group_id, files: star_and_htseq( + group_id=group_id, + r1_files=files[0], + r2_files=files[1], + temp_dir=temp_dir, + par=par, + arguments_info=arguments_info, + num_threads=num_threads_per_task + ), + grouped_inputs.items() + ) + + num_errored = 0 + for exit, msg in outs: + if exit != 0: + print(f"Error: {msg}") + num_errored += 1 + + pct_succeeded = 1.0 - num_errored / len(outs) + print("------------------") + print(f"Success rate: {math.ceil(pct_succeeded * 100)}%") + + assert pct_succeeded >= par["min_success_rate"], f"Success rate should be at least {math.ceil(par['min_success_rate'] * 100)}%" + +if __name__ == "__main__": + main(par, meta) +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/mapping_multi_star", + "tag" : "0.12.0" + }, + "label" : [ + "highmem", + "highcpu" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/mapping/multi_star/nextflow.config b/target/nextflow/mapping/multi_star/nextflow.config new file mode 100644 index 00000000000..af23dbf650b --- /dev/null +++ b/target/nextflow/mapping/multi_star/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'multi_star' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Align fastq files using STAR.' + author = 'Angela Oliveira Pisco, Robrecht Cannoodt' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/mapping/multi_star/nextflow_params.yaml b/target/nextflow/mapping/multi_star/nextflow_params.yaml new file mode 100644 index 00000000000..0c697bff22a --- /dev/null +++ b/target/nextflow/mapping/multi_star/nextflow_params.yaml @@ -0,0 +1,16 @@ +# Input/Output +input_id: # please fill in - example: ["mysample", "mysample"] +input_r1: # please fill in - example: ["mysample_S1_L001_R1_001.fastq.gz", "mysample_S1_L002_R1_001.fastq.gz"] +# input_r2: ["mysample_S1_L001_R2_001.fastq.gz", "mysample_S1_L002_R2_001.fastq.gz"] +reference_index: # please fill in - example: "/path/to/reference" +reference_gtf: # please fill in - example: "genes.gtf" +# output: "$id.$key.output.output" + +# Processing arguments +run_htseq_count: true +run_multiqc: true +min_success_rate: 0.5 + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/mapping/multi_star/nextflow_schema.json b/target/nextflow/mapping/multi_star/nextflow_schema.json new file mode 100644 index 00000000000..37ac21f8779 --- /dev/null +++ b/target/nextflow/mapping/multi_star/nextflow_schema.json @@ -0,0 +1,168 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "multi_star", +"description": "Align fastq files using STAR.", +"type": "object", +"definitions": { + + + + "input/output" : { + "title": "Input/Output", + "type": "object", + "description": "No description", + "properties": { + + + "input_id": { + "type": + "string", + "description": "Type: List of `string`, required, example: `mysample;mysample`, multiple_sep: `\";\"`. The ID of the sample being processed", + "help_text": "Type: List of `string`, required, example: `mysample;mysample`, multiple_sep: `\";\"`. The ID of the sample being processed. This vector should have the same length as the `--input_r1` argument." + + } + + + , + "input_r1": { + "type": + "string", + "description": "Type: List of `file`, required, example: `mysample_S1_L001_R1_001.fastq.gz;mysample_S1_L002_R1_001.fastq.gz`, multiple_sep: `\";\"`. Paths to the sequences to be mapped", + "help_text": "Type: List of `file`, required, example: `mysample_S1_L001_R1_001.fastq.gz;mysample_S1_L002_R1_001.fastq.gz`, multiple_sep: `\";\"`. Paths to the sequences to be mapped. If using Illumina paired-end reads, only the R1 files should be passed." + + } + + + , + "input_r2": { + "type": + "string", + "description": "Type: List of `file`, example: `mysample_S1_L001_R2_001.fastq.gz;mysample_S1_L002_R2_001.fastq.gz`, multiple_sep: `\";\"`. Paths to the sequences to be mapped", + "help_text": "Type: List of `file`, example: `mysample_S1_L001_R2_001.fastq.gz;mysample_S1_L002_R2_001.fastq.gz`, multiple_sep: `\";\"`. Paths to the sequences to be mapped. If using Illumina paired-end reads, only the R2 files should be passed." + + } + + + , + "reference_index": { + "type": + "string", + "description": "Type: `file`, required, example: `/path/to/reference`. Path to the reference built by star_build_reference", + "help_text": "Type: `file`, required, example: `/path/to/reference`. Path to the reference built by star_build_reference. Corresponds to the --genomeDir argument in the STAR command." + + } + + + , + "reference_gtf": { + "type": + "string", + "description": "Type: `file`, required, example: `genes.gtf`. Path to the gtf reference file", + "help_text": "Type: `file`, required, example: `genes.gtf`. Path to the gtf reference file." + + } + + + , + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.output`, example: `/path/to/foo`. Path to output directory", + "help_text": "Type: `file`, required, default: `$id.$key.output.output`, example: `/path/to/foo`. Path to output directory. Corresponds to the --outFileNamePrefix argument in the STAR command." + , + "default": "$id.$key.output.output" + } + + +} +}, + + + "processing arguments" : { + "title": "Processing arguments", + "type": "object", + "description": "No description", + "properties": { + + + "run_htseq_count": { + "type": + "boolean", + "description": "Type: `boolean`, default: `true`. Whether or not to also run htseq-count after STAR", + "help_text": "Type: `boolean`, default: `true`. Whether or not to also run htseq-count after STAR." + , + "default": "True" + } + + + , + "run_multiqc": { + "type": + "boolean", + "description": "Type: `boolean`, default: `true`. Whether or not to also run MultiQC at the end", + "help_text": "Type: `boolean`, default: `true`. Whether or not to also run MultiQC at the end." + , + "default": "True" + } + + + , + "min_success_rate": { + "type": + "number", + "description": "Type: `double`, default: `0.5`. Fail when the success rate is below this threshold", + "help_text": "Type: `double`, default: `0.5`. Fail when the success rate is below this threshold." + , + "default": "0.5" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input/output" + }, + + { + "$ref": "#/definitions/processing arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/mapping/multi_star_to_h5mu/.config.vsh.yaml b/target/nextflow/mapping/multi_star_to_h5mu/.config.vsh.yaml new file mode 100644 index 00000000000..8e486e6e2c0 --- /dev/null +++ b/target/nextflow/mapping/multi_star_to_h5mu/.config.vsh.yaml @@ -0,0 +1,179 @@ +functionality: + name: "multi_star_to_h5mu" + namespace: "mapping" + version: "0.12.4" + authors: + - name: "Robrecht Cannoodt" + roles: + - "author" + - "maintainer" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + - name: "Angela Oliveira Pisco" + roles: + - "author" + info: + role: "Contributor" + links: + github: "aopisco" + orcid: "0000-0003-0142-2355" + linkedin: "aopisco" + organizations: + - name: "Insitro" + href: "https://insitro.com" + role: "Director of Computational Biology" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + arguments: + - type: "file" + name: "--input" + description: "The directory created by `multi_star`" + info: null + example: + - "/path/to/foo" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output h5mu file." + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + description: "Convert the output of `multi_star` to a h5mu.\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/cellranger_tiny_fastq/multi_star" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "midmem" + - "midcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/mapping/multi_star_to_h5mu/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/multi_star_to_h5mu" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/multi_star_to_h5mu/multi_star_to_h5mu" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/mapping/multi_star_to_h5mu/main.nf b/target/nextflow/mapping/multi_star_to_h5mu/main.nf new file mode 100644 index 00000000000..470483407d2 --- /dev/null +++ b/target/nextflow/mapping/multi_star_to_h5mu/main.nf @@ -0,0 +1,2625 @@ +// multi_star_to_h5mu 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Robrecht Cannoodt (author, maintainer) +// * Angela Oliveira Pisco (author) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "multi_star_to_h5mu", + "namespace" : "mapping", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Robrecht Cannoodt", + "roles" : [ + "author", + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "robrecht@data-intuitive.com", + "github" : "rcannood", + "orcid" : "0000-0003-3641-729X", + "linkedin" : "robrechtcannoodt" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Science Engineer" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + }, + { + "name" : "Angela Oliveira Pisco", + "roles" : [ + "author" + ], + "info" : { + "role" : "Contributor", + "links" : { + "github" : "aopisco", + "orcid" : "0000-0003-0142-2355", + "linkedin" : "aopisco" + }, + "organizations" : [ + { + "name" : "Insitro", + "href" : "https://insitro.com", + "role" : "Director of Computational Biology" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + } + ], + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "description" : "The directory created by `multi_star`", + "example" : [ + "/path/to/foo" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "-o" + ], + "description" : "Output h5mu file.", + "example" : [ + "output.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_compression", + "description" : "The compression format to be used on the output h5mu object.", + "example" : [ + "gzip" + ], + "required" : false, + "choices" : [ + "gzip", + "lzf" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/multi_star_to_h5mu/" + } + ], + "description" : "Convert the output of `multi_star` to a h5mu.\n", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/multi_star_to_h5mu/" + }, + { + "type" : "file", + "path" : "resources_test/cellranger_tiny_fastq/multi_star", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.10-slim", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "procps" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "mudata~=0.2.3", + "anndata~=0.9.1" + ], + "upgrade" : true + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "midmem", + "midcpu" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/mapping/multi_star_to_h5mu/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/multi_star_to_h5mu", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +from pathlib import Path +import pandas as pd +import mudata as md +import anndata as ad +import numpy as np +import json + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +## VIASH END + +# convert to path +input_dir = Path(par["input"]) + +# read counts information +print("> Read counts data", flush=True) +per_obs_data = [] + +for input_counts in (input_dir / "per").glob("**/htseq-count.txt"): + per_obs_dir = input_counts.parent + input_id = per_obs_dir.name + input_multiqc = per_obs_dir / "multiqc_data" / "multiqc_data.json" + + data = pd.read_table( + input_counts, + index_col=0, + names=["cell_id", input_id], + dtype={"cell_id": "U", input_id: "i"} + ) + data2 = data[~data.index.str.startswith("__")] + + with open(input_multiqc, "r") as file: + qc = json.load(file) + + qc_star = qc.get("report_saved_raw_data", {}).get("multiqc_star", {}).get(input_id) + qc_htseq = qc.get("report_saved_raw_data", {}).get("multiqc_htseq", {}).get("htseq-count") + + per_obs_data.append({ + "counts": data2.transpose(), + "qc_star": pd.DataFrame(qc_star, index=[input_id]), + "qc_htseq": pd.DataFrame(qc_htseq, index=[input_id]) + }) + + +# combine all counts +counts = pd.concat([x["counts"] for x in per_obs_data], axis=0) +qc_star = pd.concat([x["qc_star"] for x in per_obs_data], axis=0) +qc_htseq = pd.concat([x["qc_htseq"] for x in per_obs_data], axis=0) + +# read feature info +feature_info = pd.read_csv(input_dir / "feature_info.tsv", sep="\\\\t", index_col=0) +feature_info_ord = feature_info.loc[counts.columns] + +var = pd.DataFrame( + data={ + "gene_ids": feature_info_ord.index, + "feature_types": "Gene Expression", + "gene_name": feature_info_ord["feature_name"], + } +).set_index("gene_ids") + +print("> construct anndata", flush=True) +adata = ad.AnnData( + X=counts, + obsm={"qc_star": qc_star, "qc_htseq": qc_htseq}, + var=var, + dtype=np.int32 +) + +print("> convert to mudata", flush=True) +mdata = md.MuData(adata) + +print("> write to file", flush=True) +mdata.write_h5mu(par["output"], compression=par["output_compression"]) +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/mapping_multi_star_to_h5mu", + "tag" : "0.12.0" + }, + "label" : [ + "midmem", + "midcpu" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/mapping/multi_star_to_h5mu/nextflow.config b/target/nextflow/mapping/multi_star_to_h5mu/nextflow.config new file mode 100644 index 00000000000..75b89fe2947 --- /dev/null +++ b/target/nextflow/mapping/multi_star_to_h5mu/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'multi_star_to_h5mu' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Convert the output of `multi_star` to a h5mu.\n' + author = 'Robrecht Cannoodt, Angela Oliveira Pisco' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/mapping/multi_star_to_h5mu/nextflow_params.yaml b/target/nextflow/mapping/multi_star_to_h5mu/nextflow_params.yaml new file mode 100644 index 00000000000..9fcbaf165d7 --- /dev/null +++ b/target/nextflow/mapping/multi_star_to_h5mu/nextflow_params.yaml @@ -0,0 +1,8 @@ +# Arguments +input: # please fill in - example: "/path/to/foo" +# output: "$id.$key.output.h5mu" +# output_compression: "gzip" + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/mapping/multi_star_to_h5mu/nextflow_schema.json b/target/nextflow/mapping/multi_star_to_h5mu/nextflow_schema.json new file mode 100644 index 00000000000..a38f5162445 --- /dev/null +++ b/target/nextflow/mapping/multi_star_to_h5mu/nextflow_schema.json @@ -0,0 +1,93 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "multi_star_to_h5mu", +"description": "Convert the output of `multi_star` to a h5mu.\n", +"type": "object", +"definitions": { + + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required, example: `/path/to/foo`. The directory created by `multi_star`", + "help_text": "Type: `file`, required, example: `/path/to/foo`. The directory created by `multi_star`" + + } + + + , + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file", + "help_text": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file." + , + "default": "$id.$key.output.h5mu" + } + + + , + "output_compression": { + "type": + "string", + "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", + "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", + "enum": ["gzip", "lzf"] + + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/mapping/samtools_sort/.config.vsh.yaml b/target/nextflow/mapping/samtools_sort/.config.vsh.yaml new file mode 100644 index 00000000000..1f4dbb4f456 --- /dev/null +++ b/target/nextflow/mapping/samtools_sort/.config.vsh.yaml @@ -0,0 +1,270 @@ +functionality: + name: "samtools_sort" + namespace: "mapping" + version: "0.12.4" + authors: + - name: "Robrecht Cannoodt" + roles: + - "author" + - "maintainer" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + - name: "Angela Oliveira Pisco" + roles: + - "author" + info: + role: "Contributor" + links: + github: "aopisco" + orcid: "0000-0003-0142-2355" + linkedin: "aopisco" + organizations: + - name: "Insitro" + href: "https://insitro.com" + role: "Director of Computational Biology" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + argument_groups: + - name: "Input" + arguments: + - type: "file" + name: "--input" + description: "Path to the SAM/BAM/CRAM files containing the mapped reads." + info: + orig_arg: "in_sam" + example: + - "input.bam" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Output" + arguments: + - type: "file" + name: "--output_bam" + description: "Filename to output the counts to." + info: + orig_arg: "-o" + example: + - "output.bam" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output_bai" + description: "BAI-format index for BAM file." + info: null + example: + - "output.bam.bai" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_format" + description: "The output format. By default, samtools tries to select a format\ + \ based on the -o filename extension; if output is to standard output or no\ + \ format can be deduced, bam is selected." + info: + orig_arg: "-O" + example: + - "bam" + required: false + choices: + - "sam" + - "bam" + - "cram" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--compression" + description: "Compression level, from 0 (uncompressed) to 9 (best" + info: + orig_arg: "-l" + example: + - 5 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Arguments" + arguments: + - type: "boolean_true" + name: "--minimizer_cluster" + description: "Sort unmapped reads (those in chromosome \"*\") by their sequence\ + \ minimiser (Schleimer et al., 2003; Roberts et al., 2004), \nalso reverse\ + \ complementing as appropriate. This has the effect of collating some similar\ + \ data together, improving the \ncompressibility of the unmapped sequence.\ + \ The minimiser kmer size is adjusted using the -K option. Note data compressed\ + \ \nin this manner may need to be name collated prior to conversion back to\ + \ fastq.\n\nMapped sequences are sorted by chromosome and position. \n" + info: + orig_arg: "-M" + direction: "input" + dest: "par" + - type: "integer" + name: "--minimizer_kmer" + description: "Sets the kmer size to be used in the -M option." + info: + orig_arg: "-K" + example: + - 20 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--sort_by_read_names" + description: "Sort by read names (i.e., the QNAME field) rather than by chromosomal\ + \ coordinates." + info: + orig_arg: "-n" + direction: "input" + dest: "par" + - type: "string" + name: "--sort_by" + description: "Sort first by this value in the alignment tag, then by position\ + \ or name (if also using -n)." + info: + orig_arg: "-t" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--no_pg" + description: "Do not add a @PG line to the header of the output file." + info: + orig_arg: "--no-PG" + direction: "input" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + description: "Sort and (optionally) index alignments.\n\nReads are sorted by leftmost\ + \ coordinates, or by read name when `--sort_by_read_names` is used.\n\nAn appropriate\ + \ `@HD-SO` sort order header tag will be added or an existing one updated if necessary.\n\ + \nNote that to generate an index file (by specifying `--output_bai`), the default\ + \ coordinate sort must be used.\nThus the `--sort_by_read_names` and `--sort_by\ + \ ` options are incompatible with `--output_bai`. \n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/cellranger_tiny_fastq" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "samtools" + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "pyyaml" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highmem" + - "highcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/mapping/samtools_sort/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/samtools_sort" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/samtools_sort/samtools_sort" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/mapping/samtools_sort/main.nf b/target/nextflow/mapping/samtools_sort/main.nf new file mode 100644 index 00000000000..678e00c4d7b --- /dev/null +++ b/target/nextflow/mapping/samtools_sort/main.nf @@ -0,0 +1,2740 @@ +// samtools_sort 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Robrecht Cannoodt (author, maintainer) +// * Angela Oliveira Pisco (author) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "samtools_sort", + "namespace" : "mapping", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Robrecht Cannoodt", + "roles" : [ + "author", + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "robrecht@data-intuitive.com", + "github" : "rcannood", + "orcid" : "0000-0003-3641-729X", + "linkedin" : "robrechtcannoodt" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Science Engineer" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + }, + { + "name" : "Angela Oliveira Pisco", + "roles" : [ + "author" + ], + "info" : { + "role" : "Contributor", + "links" : { + "github" : "aopisco", + "orcid" : "0000-0003-0142-2355", + "linkedin" : "aopisco" + }, + "organizations" : [ + { + "name" : "Insitro", + "href" : "https://insitro.com", + "role" : "Director of Computational Biology" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "description" : "Path to the SAM/BAM/CRAM files containing the mapped reads.", + "info" : { + "orig_arg" : "in_sam" + }, + "example" : [ + "input.bam" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Output", + "arguments" : [ + { + "type" : "file", + "name" : "--output_bam", + "description" : "Filename to output the counts to.", + "info" : { + "orig_arg" : "-o" + }, + "example" : [ + "output.bam" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--output_bai", + "description" : "BAI-format index for BAM file.", + "example" : [ + "output.bam.bai" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_format", + "description" : "The output format. By default, samtools tries to select a format based on the -o filename extension; if output is to standard output or no format can be deduced, bam is selected.", + "info" : { + "orig_arg" : "-O" + }, + "example" : [ + "bam" + ], + "required" : false, + "choices" : [ + "sam", + "bam", + "cram" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--compression", + "description" : "Compression level, from 0 (uncompressed) to 9 (best", + "info" : { + "orig_arg" : "-l" + }, + "example" : [ + 5 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Arguments", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--minimizer_cluster", + "description" : "Sort unmapped reads (those in chromosome \\"*\\") by their sequence minimiser (Schleimer et al., 2003; Roberts et al., 2004), \nalso reverse complementing as appropriate. This has the effect of collating some similar data together, improving the \ncompressibility of the unmapped sequence. The minimiser kmer size is adjusted using the -K option. Note data compressed \nin this manner may need to be name collated prior to conversion back to fastq.\n\nMapped sequences are sorted by chromosome and position. \n", + "info" : { + "orig_arg" : "-M" + }, + "direction" : "input", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--minimizer_kmer", + "description" : "Sets the kmer size to be used in the -M option.", + "info" : { + "orig_arg" : "-K" + }, + "example" : [ + 20 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "boolean_true", + "name" : "--sort_by_read_names", + "description" : "Sort by read names (i.e., the QNAME field) rather than by chromosomal coordinates.", + "info" : { + "orig_arg" : "-n" + }, + "direction" : "input", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--sort_by", + "description" : "Sort first by this value in the alignment tag, then by position or name (if also using -n).", + "info" : { + "orig_arg" : "-t" + }, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "boolean_true", + "name" : "--no_pg", + "description" : "Do not add a @PG line to the header of the output file.", + "info" : { + "orig_arg" : "--no-PG" + }, + "direction" : "input", + "dest" : "par" + } + ] + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/samtools_sort/" + } + ], + "description" : "Sort and (optionally) index alignments.\n\nReads are sorted by leftmost coordinates, or by read name when `--sort_by_read_names` is used.\n\nAn appropriate `@HD-SO` sort order header tag will be added or an existing one updated if necessary.\n\nNote that to generate an index file (by specifying `--output_bai`), the default coordinate sort must be used.\nThus the `--sort_by_read_names` and `--sort_by ` options are incompatible with `--output_bai`. \n", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/samtools_sort/" + }, + { + "type" : "file", + "path" : "resources_test/cellranger_tiny_fastq", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.10-slim", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "samtools", + "procps" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "pyyaml" + ], + "upgrade" : true + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "highmem", + "highcpu" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/mapping/samtools_sort/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/samtools_sort", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +import tempfile +import subprocess +from pathlib import Path +import yaml + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_bam': $( if [ ! -z ${VIASH_PAR_OUTPUT_BAM+x} ]; then echo "r'${VIASH_PAR_OUTPUT_BAM//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_bai': $( if [ ! -z ${VIASH_PAR_OUTPUT_BAI+x} ]; then echo "r'${VIASH_PAR_OUTPUT_BAI//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_format': $( if [ ! -z ${VIASH_PAR_OUTPUT_FORMAT+x} ]; then echo "r'${VIASH_PAR_OUTPUT_FORMAT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'compression': $( if [ ! -z ${VIASH_PAR_COMPRESSION+x} ]; then echo "int(r'${VIASH_PAR_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'minimizer_cluster': $( if [ ! -z ${VIASH_PAR_MINIMIZER_CLUSTER+x} ]; then echo "r'${VIASH_PAR_MINIMIZER_CLUSTER//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), + 'minimizer_kmer': $( if [ ! -z ${VIASH_PAR_MINIMIZER_KMER+x} ]; then echo "int(r'${VIASH_PAR_MINIMIZER_KMER//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'sort_by_read_names': $( if [ ! -z ${VIASH_PAR_SORT_BY_READ_NAMES+x} ]; then echo "r'${VIASH_PAR_SORT_BY_READ_NAMES//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), + 'sort_by': $( if [ ! -z ${VIASH_PAR_SORT_BY+x} ]; then echo "r'${VIASH_PAR_SORT_BY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'no_pg': $( if [ ! -z ${VIASH_PAR_NO_PG+x} ]; then echo "r'${VIASH_PAR_NO_PG//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +## VIASH END + +def generate_args(par, config): + # fetch arguments from config + arguments = [ + arg + for group in config["functionality"]["argument_groups"] + for arg in group["arguments"] + ] + + cmd_args = [] + + for arg in arguments: + arg_val = par.get(arg["name"].removeprefix("--")) + # The info key is always present (changed in viash 0.7.4) + # in the parsed config (None if not specified in source config) + info = arg["info"] or {} + orig_arg = info.get("orig_arg") + if arg_val and orig_arg: + if not arg.get("multiple", False): + arg_val = [arg_val] + + if arg["type"] in ["boolean_true", "boolean_false"]: + # if argument is a boolean_true or boolean_false, simply add the flag + arg_val = [orig_arg] + elif orig_arg.startswith("-"): + # if the orig arg flag is not a positional, + # add the flag in front of each element and flatten + arg_val = [str(x) for val in arg_val for x in [orig_arg, val]] + + cmd_args.extend(arg_val) + + return cmd_args + +# read config arguments +config = yaml.safe_load(Path(meta["config"]).read_text()) + +print(">> Constructing command", flush=True) +cmd_args = [ "samtools", "sort" ] + generate_args(par, config) + +# manually process cpus parameter +if 'cpus' in meta and meta['cpus']: + cmd_args.extend(["--threads", str(meta["cpus"])]) +# add memory +if 'memory_mb' in meta and meta['memory_mb']: + import math + mem_per_thread = math.ceil(meta['memory_mb'] * .8 / meta['cpus']) + cmd_args.extend(["-m", f"{mem_per_thread}M"]) + +with tempfile.TemporaryDirectory(prefix="samtools-", dir=meta["temp_dir"]) as temp_dir: + # add tempdir + cmd_args.extend(["-T", str(temp_dir + "/")]) + + # run command + print(">> Running samtools sort with command:", flush=True) + print("+ " + ' '.join([str(x) for x in cmd_args]), flush=True) + subprocess.run(cmd_args, check=True) + +if par.get("output_bai"): + print(">> Running samtools index with command:", flush=True) + cmd_index_args = ["samtools", "index", "-b", par["output_bam"], par["output_bai"]] + print("+ " + ' '.join([str(x) for x in cmd_index_args]), flush=True) + subprocess.run(cmd_index_args, check=True) +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/mapping_samtools_sort", + "tag" : "0.12.0" + }, + "label" : [ + "highmem", + "highcpu" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/mapping/samtools_sort/nextflow.config b/target/nextflow/mapping/samtools_sort/nextflow.config new file mode 100644 index 00000000000..0fd6f643c35 --- /dev/null +++ b/target/nextflow/mapping/samtools_sort/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'samtools_sort' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Sort and (optionally) index alignments.\n\nReads are sorted by leftmost coordinates, or by read name when `--sort_by_read_names` is used.\n\nAn appropriate `@HD-SO` sort order header tag will be added or an existing one updated if necessary.\n\nNote that to generate an index file (by specifying `--output_bai`), the default coordinate sort must be used.\nThus the `--sort_by_read_names` and `--sort_by ` options are incompatible with `--output_bai`. \n' + author = 'Robrecht Cannoodt, Angela Oliveira Pisco' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/mapping/samtools_sort/nextflow_params.yaml b/target/nextflow/mapping/samtools_sort/nextflow_params.yaml new file mode 100644 index 00000000000..c3bf9c86cc2 --- /dev/null +++ b/target/nextflow/mapping/samtools_sort/nextflow_params.yaml @@ -0,0 +1,19 @@ +# Arguments +minimizer_cluster: false +# minimizer_kmer: 20 +sort_by_read_names: false +# sort_by: "foo" +no_pg: false + +# Input +input: # please fill in - example: "input.bam" + +# Output +# output_bam: "$id.$key.output_bam.bam" +# output_bai: "$id.$key.output_bai.bai" +# output_format: "bam" +# compression: 5 + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/mapping/samtools_sort/nextflow_schema.json b/target/nextflow/mapping/samtools_sort/nextflow_schema.json new file mode 100644 index 00000000000..3e9b8d6c456 --- /dev/null +++ b/target/nextflow/mapping/samtools_sort/nextflow_schema.json @@ -0,0 +1,195 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "samtools_sort", +"description": "Sort and (optionally) index alignments.\n\nReads are sorted by leftmost coordinates, or by read name when `--sort_by_read_names` is used.\n\nAn appropriate `@HD-SO` sort order header tag will be added or an existing one updated if necessary.\n\nNote that to generate an index file (by specifying `--output_bai`), the default coordinate sort must be used.\nThus the `--sort_by_read_names` and `--sort_by \u003cTAG\u003e` options are incompatible with `--output_bai`. \n", +"type": "object", +"definitions": { + + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "minimizer_cluster": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Sort unmapped reads (those in chromosome \"*\") by their sequence minimiser (Schleimer et al", + "help_text": "Type: `boolean_true`, default: `false`. Sort unmapped reads (those in chromosome \"*\") by their sequence minimiser (Schleimer et al., 2003; Roberts et al., 2004), \nalso reverse complementing as appropriate. This has the effect of collating some similar data together, improving the \ncompressibility of the unmapped sequence. The minimiser kmer size is adjusted using the -K option. Note data compressed \nin this manner may need to be name collated prior to conversion back to fastq.\n\nMapped sequences are sorted by chromosome and position. \n" + , + "default": "False" + } + + + , + "minimizer_kmer": { + "type": + "integer", + "description": "Type: `integer`, example: `20`. Sets the kmer size to be used in the -M option", + "help_text": "Type: `integer`, example: `20`. Sets the kmer size to be used in the -M option." + + } + + + , + "sort_by_read_names": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Sort by read names (i", + "help_text": "Type: `boolean_true`, default: `false`. Sort by read names (i.e., the QNAME field) rather than by chromosomal coordinates." + , + "default": "False" + } + + + , + "sort_by": { + "type": + "string", + "description": "Type: `string`. Sort first by this value in the alignment tag, then by position or name (if also using -n)", + "help_text": "Type: `string`. Sort first by this value in the alignment tag, then by position or name (if also using -n)." + + } + + + , + "no_pg": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Do not add a @PG line to the header of the output file", + "help_text": "Type: `boolean_true`, default: `false`. Do not add a @PG line to the header of the output file." + , + "default": "False" + } + + +} +}, + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required, example: `input.bam`. Path to the SAM/BAM/CRAM files containing the mapped reads", + "help_text": "Type: `file`, required, example: `input.bam`. Path to the SAM/BAM/CRAM files containing the mapped reads." + + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "output_bam": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output_bam.bam`, example: `output.bam`. Filename to output the counts to", + "help_text": "Type: `file`, required, default: `$id.$key.output_bam.bam`, example: `output.bam`. Filename to output the counts to." + , + "default": "$id.$key.output_bam.bam" + } + + + , + "output_bai": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.output_bai.bai`, example: `output.bam.bai`. BAI-format index for BAM file", + "help_text": "Type: `file`, default: `$id.$key.output_bai.bai`, example: `output.bam.bai`. BAI-format index for BAM file." + , + "default": "$id.$key.output_bai.bai" + } + + + , + "output_format": { + "type": + "string", + "description": "Type: `string`, example: `bam`, choices: ``sam`, `bam`, `cram``. The output format", + "help_text": "Type: `string`, example: `bam`, choices: ``sam`, `bam`, `cram``. The output format. By default, samtools tries to select a format based on the -o filename extension; if output is to standard output or no format can be deduced, bam is selected.", + "enum": ["sam", "bam", "cram"] + + + } + + + , + "compression": { + "type": + "integer", + "description": "Type: `integer`, example: `5`. Compression level, from 0 (uncompressed) to 9 (best", + "help_text": "Type: `integer`, example: `5`. Compression level, from 0 (uncompressed) to 9 (best" + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/mapping/star_align/.config.vsh.yaml b/target/nextflow/mapping/star_align/.config.vsh.yaml new file mode 100644 index 00000000000..16d6a77a3a0 --- /dev/null +++ b/target/nextflow/mapping/star_align/.config.vsh.yaml @@ -0,0 +1,2535 @@ +functionality: + name: "star_align" + namespace: "mapping" + version: "0.12.4" + authors: + - name: "Angela Oliveira Pisco" + roles: + - "author" + info: + role: "Contributor" + links: + github: "aopisco" + orcid: "0000-0003-0142-2355" + linkedin: "aopisco" + organizations: + - name: "Insitro" + href: "https://insitro.com" + role: "Director of Computational Biology" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + - name: "Robrecht Cannoodt" + roles: + - "author" + - "maintainer" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + argument_groups: + - name: "Input/Output" + arguments: + - type: "file" + name: "--input" + alternatives: + - "--readFilesIn" + description: "The FASTQ files to be analyzed. Corresponds to the --readFilesIn\ + \ argument in the STAR command." + info: null + example: + - "mysample_S1_L001_R1_001.fastq.gz" + - "mysample_S1_L001_R2_001.fastq.gz" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "file" + name: "--reference" + alternatives: + - "--genomeDir" + description: "Path to the reference built by star_build_reference. Corresponds\ + \ to the --genomeDir argument in the STAR command." + info: null + example: + - "/path/to/reference" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "--outFileNamePrefix" + description: "Path to output directory. Corresponds to the --outFileNamePrefix\ + \ argument in the STAR command." + info: null + example: + - "/path/to/foo" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Run Parameters" + arguments: + - type: "integer" + name: "--runRNGseed" + description: "random number generator seed." + info: null + example: + - 777 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Genome Parameters" + arguments: + - type: "string" + name: "--genomeLoad" + description: "mode of shared memory usage for the genome files. Only used with\ + \ --runMode alignReads.\n\n- LoadAndKeep ... load genome into shared and\ + \ keep it in memory after run\n- LoadAndRemove ... load genome into shared\ + \ but remove it after run\n- LoadAndExit ... load genome into shared memory\ + \ and exit, keeping the genome in memory for future runs\n- Remove \ + \ ... do not map anything, just remove loaded genome from memory\n- NoSharedMemory\ + \ ... do not use shared memory, each job will have its own private copy of\ + \ the genome" + info: null + example: + - "NoSharedMemory" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--genomeFastaFiles" + description: "path(s) to the fasta files with the genome sequences, separated\ + \ by spaces. These files should be plain text FASTA files, they *cannot* be\ + \ zipped.\n\nRequired for the genome generation (--runMode genomeGenerate).\ + \ Can also be used in the mapping (--runMode alignReads) to add extra (new)\ + \ sequences to the genome (e.g. spike-ins)." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--genomeFileSizes" + description: "genome files exact sizes in bytes. Typically, this should not\ + \ be defined by the user." + info: null + example: + - 0 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--genomeTransformOutput" + description: "which output to transform back to original genome\n\n- SAM \ + \ ... SAM/BAM alignments\n- SJ ... splice junctions (SJ.out.tab)\n-\ + \ None ... no transformation of the output" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--genomeChrSetMitochondrial" + description: "names of the mitochondrial chromosomes. Presently only used for\ + \ STARsolo statistics output/" + info: null + example: + - "chrM" + - "M" + - "MT" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - name: "Splice Junctions Database" + arguments: + - type: "string" + name: "--sjdbFileChrStartEnd" + description: "path to the files with genomic coordinates (chr start \ + \ end strand) for the splice junction introns. Multiple files can be\ + \ supplied and will be concatenated." + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "file" + name: "--sjdbGTFfile" + description: "path to the GTF file with annotations" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--sjdbGTFchrPrefix" + description: "prefix for chromosome names in a GTF file (e.g. 'chr' for using\ + \ ENSMEBL annotations with UCSC genomes)" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--sjdbGTFfeatureExon" + description: "feature type in GTF file to be used as exons for building transcripts" + info: null + example: + - "exon" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--sjdbGTFtagExonParentTranscript" + description: "GTF attribute name for parent transcript ID (default \"transcript_id\"\ + \ works for GTF files)" + info: null + example: + - "transcript_id" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--sjdbGTFtagExonParentGene" + description: "GTF attribute name for parent gene ID (default \"gene_id\" works\ + \ for GTF files)" + info: null + example: + - "gene_id" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--sjdbGTFtagExonParentGeneName" + description: "GTF attribute name for parent gene name" + info: null + example: + - "gene_name" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--sjdbGTFtagExonParentGeneType" + description: "GTF attribute name for parent gene type" + info: null + example: + - "gene_type" + - "gene_biotype" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--sjdbOverhang" + description: "length of the donor/acceptor sequence on each side of the junctions,\ + \ ideally = (mate_length - 1)" + info: null + example: + - 100 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--sjdbScore" + description: "extra alignment score for alignments that cross database junctions" + info: null + example: + - 2 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--sjdbInsertSave" + description: "which files to save when sjdb junctions are inserted on the fly\ + \ at the mapping step\n\n- Basic ... only small junction / transcript files\n\ + - All ... all files including big Genome, SA and SAindex - this will create\ + \ a complete genome directory" + info: null + example: + - "Basic" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Variation parameters" + arguments: + - type: "string" + name: "--varVCFfile" + description: "path to the VCF file that contains variation data. The 10th column\ + \ should contain the genotype information, e.g. 0/1" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Read Parameters" + arguments: + - type: "string" + name: "--readFilesType" + description: "format of input read files\n\n- Fastx ... FASTA or FASTQ\n\ + - SAM SE ... SAM or BAM single-end reads; for BAM use --readFilesCommand\ + \ samtools view\n- SAM PE ... SAM or BAM paired-end reads; for BAM use\ + \ --readFilesCommand samtools view" + info: null + example: + - "Fastx" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--readFilesSAMattrKeep" + description: "for --readFilesType SAM SE/PE, which SAM tags to keep in the output\ + \ BAM, e.g.: --readFilesSAMtagsKeep RG PL\n\n- All ... keep all tags\n\ + - None ... do not keep any tags" + info: null + example: + - "All" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "file" + name: "--readFilesManifest" + description: "path to the \"manifest\" file with the names of read files. The\ + \ manifest file should contain 3 tab-separated columns:\n\npaired-end reads:\ + \ read1_file_name $tab$ read2_file_name $tab$ read_group_line.\nsingle-end\ + \ reads: read1_file_name $tab$ - $tab$ read_group_line.\nSpaces,\ + \ but not tabs are allowed in file names.\nIf read_group_line does not start\ + \ with ID:, it can only contain one ID field, and ID: will be added to it.\n\ + If read_group_line starts with ID:, it can contain several fields separated\ + \ by $tab$, and all fields will be be copied verbatim into SAM @RG header\ + \ line." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--readFilesPrefix" + description: "prefix for the read files names, i.e. it will be added in front\ + \ of the strings in --readFilesIn" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--readFilesCommand" + description: "command line to execute for each of the input file. This command\ + \ should generate FASTA or FASTQ text and send it to stdout\n\nFor example:\ + \ zcat - to uncompress .gz files, bzcat - to uncompress .bz2 files, etc." + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--readMapNumber" + description: "number of reads to map from the beginning of the file\n\n-1: map\ + \ all reads" + info: null + example: + - -1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--readMatesLengthsIn" + description: "Equal/NotEqual - lengths of names,sequences,qualities for both\ + \ mates are the same / not the same. NotEqual is safe in all situations." + info: null + example: + - "NotEqual" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--readNameSeparator" + description: "character(s) separating the part of the read names that will be\ + \ trimmed in output (read name after space is always trimmed)" + info: null + example: + - "/" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--readQualityScoreBase" + description: "number to be subtracted from the ASCII code to get Phred quality\ + \ score" + info: null + example: + - 33 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Read Clipping" + arguments: + - type: "string" + name: "--clipAdapterType" + description: "adapter clipping type\n\n- Hamming ... adapter clipping based\ + \ on Hamming distance, with the number of mismatches controlled by --clip5pAdapterMMp\n\ + - CellRanger4 ... 5p and 3p adapter clipping similar to CellRanger4. Utilizes\ + \ Opal package by Martin Sosic: https://github.com/Martinsos/opal\n- None\ + \ ... no adapter clipping, all other clip* parameters are disregarded" + info: null + example: + - "Hamming" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--clip3pNbases" + description: "number(s) of bases to clip from 3p of each mate. If one value\ + \ is given, it will be assumed the same for both mates." + info: null + example: + - 0 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--clip3pAdapterSeq" + description: "adapter sequences to clip from 3p of each mate. If one value\ + \ is given, it will be assumed the same for both mates.\n\n- polyA ... polyA\ + \ sequence with the length equal to read length" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "double" + name: "--clip3pAdapterMMp" + description: "max proportion of mismatches for 3p adapter clipping for each\ + \ mate. If one value is given, it will be assumed the same for both mates." + info: null + example: + - 0.1 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--clip3pAfterAdapterNbases" + description: "number of bases to clip from 3p of each mate after the adapter\ + \ clipping. If one value is given, it will be assumed the same for both mates." + info: null + example: + - 0 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--clip5pNbases" + description: "number(s) of bases to clip from 5p of each mate. If one value\ + \ is given, it will be assumed the same for both mates." + info: null + example: + - 0 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - name: "Limits" + arguments: + - type: "long" + name: "--limitGenomeGenerateRAM" + description: "maximum available RAM (bytes) for genome generation" + info: null + example: + - 31000000000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "long" + name: "--limitIObufferSize" + description: "max available buffers size (bytes) for input/output, per thread" + info: null + example: + - 30000000 + - 50000000 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "long" + name: "--limitOutSAMoneReadBytes" + description: "max size of the SAM record (bytes) for one read. Recommended value:\ + \ >(2*(LengthMate1+LengthMate2+100)*outFilterMultimapNmax" + info: null + example: + - 100000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--limitOutSJoneRead" + description: "max number of junctions for one read (including all multi-mappers)" + info: null + example: + - 1000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--limitOutSJcollapsed" + description: "max number of collapsed junctions" + info: null + example: + - 1000000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "long" + name: "--limitBAMsortRAM" + description: "maximum available RAM (bytes) for sorting BAM. If =0, it will\ + \ be set to the genome index size. 0 value can only be used with --genomeLoad\ + \ NoSharedMemory option." + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--limitSjdbInsertNsj" + description: "maximum number of junctions to be inserted to the genome on the\ + \ fly at the mapping stage, including those from annotations and those detected\ + \ in the 1st step of the 2-pass run" + info: null + example: + - 1000000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--limitNreadsSoft" + description: "soft limit on the number of reads" + info: null + example: + - -1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Output: general" + arguments: + - type: "string" + name: "--outTmpKeep" + description: "whether to keep the temporary files after STAR runs is finished\n\ + \n- None ... remove all temporary files\n- All ... keep all files" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outStd" + description: "which output will be directed to stdout (standard out)\n\n- Log\ + \ ... log messages\n- SAM ... alignments\ + \ in SAM format (which normally are output to Aligned.out.sam file), normal\ + \ standard output will go into Log.std.out\n- BAM_Unsorted ... alignments\ + \ in BAM format, unsorted. Requires --outSAMtype BAM Unsorted\n- BAM_SortedByCoordinate\ + \ ... alignments in BAM format, sorted by coordinate. Requires --outSAMtype\ + \ BAM SortedByCoordinate\n- BAM_Quant ... alignments to transcriptome\ + \ in BAM format, unsorted. Requires --quantMode TranscriptomeSAM" + info: null + example: + - "Log" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outReadsUnmapped" + description: "output of unmapped and partially mapped (i.e. mapped only one\ + \ mate of a paired end read) reads in separate file(s).\n\n- None ... no\ + \ output\n- Fastx ... output in separate fasta/fastq files, Unmapped.out.mate1/2" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outQSconversionAdd" + description: "add this number to the quality score (e.g. to convert from Illumina\ + \ to Sanger, use -31)" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outMultimapperOrder" + description: "order of multimapping alignments in the output files\n\n- Old_2.4\ + \ ... quasi-random order used before 2.5.0\n- Random \ + \ ... random order of alignments for each multi-mapper. Read mates (pairs)\ + \ are always adjacent, all alignment for each read stay together. This option\ + \ will become default in the future releases." + info: null + example: + - "Old_2.4" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Output: SAM and BAM" + arguments: + - type: "string" + name: "--outSAMtype" + description: "type of SAM/BAM output\n\n1st word:\n- BAM ... output BAM without\ + \ sorting\n- SAM ... output SAM without sorting\n- None ... no SAM/BAM output\n\ + 2nd, 3rd:\n- Unsorted ... standard unsorted\n- SortedByCoordinate\ + \ ... sorted by coordinate. This option will allocate extra memory for sorting\ + \ which can be specified by --limitBAMsortRAM." + info: null + example: + - "SAM" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--outSAMmode" + description: "mode of SAM output\n\n- None ... no SAM output\n- Full ... full\ + \ SAM output\n- NoQS ... full SAM but without quality scores" + info: null + example: + - "Full" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outSAMstrandField" + description: "Cufflinks-like strand field flag\n\n- None ... not used\n\ + - intronMotif ... strand derived from the intron motif. This option changes\ + \ the output alignments: reads with inconsistent and/or non-canonical introns\ + \ are filtered out." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outSAMattributes" + description: "a string of desired SAM attributes, in the order desired for the\ + \ output SAM. Tags can be listed in any combination/order.\n\n***Presets:\n\ + - None ... no attributes\n- Standard ... NH HI AS nM\n- All \ + \ ... NH HI AS nM NM MD jM jI MC ch\n***Alignment:\n- NH ...\ + \ number of loci the reads maps to: =1 for unique mappers, >1 for multimappers.\ + \ Standard SAM tag.\n- HI ... multiple alignment index, starts with\ + \ --outSAMattrIHstart (=1 by default). Standard SAM tag.\n- AS ...\ + \ local alignment score, +1/-1 for matches/mismateches, score* penalties for\ + \ indels and gaps. For PE reads, total score for two mates. Stadnard SAM tag.\n\ + - nM ... number of mismatches. For PE reads, sum over two mates.\n\ + - NM ... edit distance to the reference (number of mismatched + inserted\ + \ + deleted bases) for each mate. Standard SAM tag.\n- MD ... string\ + \ encoding mismatched and deleted reference bases (see standard SAM specifications).\ + \ Standard SAM tag.\n- jM ... intron motifs for all junctions (i.e.\ + \ N in CIGAR): 0: non-canonical; 1: GT/AG, 2: CT/AC, 3: GC/AG, 4: CT/GC, 5:\ + \ AT/AC, 6: GT/AT. If splice junctions database is used, and a junction is\ + \ annotated, 20 is added to its motif value.\n- jI ... start and\ + \ end of introns for all junctions (1-based).\n- XS ... alignment\ + \ strand according to --outSAMstrandField.\n- MC ... mate's CIGAR\ + \ string. Standard SAM tag.\n- ch ... marks all segment of all chimeric\ + \ alingments for --chimOutType WithinBAM output.\n- cN ... number\ + \ of bases clipped from the read ends: 5' and 3'\n***Variation:\n- vA \ + \ ... variant allele\n- vG ... genomic coordinate of the variant\ + \ overlapped by the read.\n- vW ... 1 - alignment passes WASP filtering;\ + \ 2,3,4,5,6,7 - alignment does not pass WASP filtering. Requires --waspOutputMode\ + \ SAMtag.\n***STARsolo:\n- CR CY UR UY ... sequences and quality scores of\ + \ cell barcodes and UMIs for the solo* demultiplexing.\n- GX GN ...\ + \ gene ID and gene name for unique-gene reads.\n- gx gn ... gene IDs\ + \ and gene names for unique- and multi-gene reads.\n- CB UB ... error-corrected\ + \ cell barcodes and UMIs for solo* demultiplexing. Requires --outSAMtype BAM\ + \ SortedByCoordinate.\n- sM ... assessment of CB and UMI.\n- sS \ + \ ... sequence of the entire barcode (CB,UMI,adapter).\n- sQ \ + \ ... quality of the entire barcode.\n***Unsupported/undocumented:\n-\ + \ ha ... haplotype (1/2) when mapping to the diploid genome. Requires\ + \ genome generated with --genomeTransformType Diploid .\n- rB ...\ + \ alignment block read/genomic coordinates.\n- vR ... read coordinate\ + \ of the variant." + info: null + example: + - "Standard" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--outSAMattrIHstart" + description: "start value for the IH attribute. 0 may be required by some downstream\ + \ software, such as Cufflinks or StringTie." + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outSAMunmapped" + description: "output of unmapped reads in the SAM format\n\n1st word:\n- None\ + \ ... no output\n- Within ... output unmapped reads within the main SAM\ + \ file (i.e. Aligned.out.sam)\n2nd word:\n- KeepPairs ... record unmapped\ + \ mate for each alignment, and, in case of unsorted output, keep it adjacent\ + \ to its mapped mate. Only affects multi-mapping reads." + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--outSAMorder" + description: "type of sorting for the SAM output\n\nPaired: one mate after the\ + \ other for all paired alignments\nPairedKeepInputOrder: one mate after the\ + \ other for all paired alignments, the order is kept the same as in the input\ + \ FASTQ files" + info: null + example: + - "Paired" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outSAMprimaryFlag" + description: "which alignments are considered primary - all others will be marked\ + \ with 0x100 bit in the FLAG\n\n- OneBestScore ... only one alignment with\ + \ the best score is primary\n- AllBestScore ... all alignments with the best\ + \ score are primary" + info: null + example: + - "OneBestScore" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outSAMreadID" + description: "read ID record type\n\n- Standard ... first word (until space)\ + \ from the FASTx read ID line, removing /1,/2 from the end\n- Number ...\ + \ read number (index) in the FASTx file" + info: null + example: + - "Standard" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outSAMmapqUnique" + description: "0 to 255: the MAPQ value for unique mappers" + info: null + example: + - 255 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outSAMflagOR" + description: "0 to 65535: sam FLAG will be bitwise OR'd with this value, i.e.\ + \ FLAG=FLAG | outSAMflagOR. This is applied after all flags have been set\ + \ by STAR, and after outSAMflagAND. Can be used to set specific bits that\ + \ are not set otherwise." + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outSAMflagAND" + description: "0 to 65535: sam FLAG will be bitwise AND'd with this value, i.e.\ + \ FLAG=FLAG & outSAMflagOR. This is applied after all flags have been set\ + \ by STAR, but before outSAMflagOR. Can be used to unset specific bits that\ + \ are not set otherwise." + info: null + example: + - 65535 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outSAMattrRGline" + description: "SAM/BAM read group line. The first word contains the read group\ + \ identifier and must start with \"ID:\", e.g. --outSAMattrRGline ID:xxx CN:yy\ + \ \"DS:z z z\".\n\nxxx will be added as RG tag to each output alignment. Any\ + \ spaces in the tag values have to be double quoted.\nComma separated RG lines\ + \ correspons to different (comma separated) input files in --readFilesIn.\ + \ Commas have to be surrounded by spaces, e.g.\n--outSAMattrRGline ID:xxx\ + \ , ID:zzz \"DS:z z\" , ID:yyy DS:yyyy" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--outSAMheaderHD" + description: "@HD (header) line of the SAM header" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--outSAMheaderPG" + description: "extra @PG (software) line of the SAM header (in addition to STAR)" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--outSAMheaderCommentFile" + description: "path to the file with @CO (comment) lines of the SAM header" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outSAMfilter" + description: "filter the output into main SAM/BAM files\n\n- KeepOnlyAddedReferences\ + \ ... only keep the reads for which all alignments are to the extra reference\ + \ sequences added with --genomeFastaFiles at the mapping stage.\n- KeepAllAddedReferences\ + \ ... keep all alignments to the extra reference sequences added with --genomeFastaFiles\ + \ at the mapping stage." + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--outSAMmultNmax" + description: "max number of multiple alignments for a read that will be output\ + \ to the SAM/BAM files. Note that if this value is not equal to -1, the top\ + \ scoring alignment will be output first\n\n- -1 ... all alignments (up to\ + \ --outFilterMultimapNmax) will be output" + info: null + example: + - -1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outSAMtlen" + description: "calculation method for the TLEN field in the SAM/BAM files\n\n\ + - 1 ... leftmost base of the (+)strand mate to rightmost base of the (-)mate.\ + \ (+)sign for the (+)strand mate\n- 2 ... leftmost base of any mate to rightmost\ + \ base of any mate. (+)sign for the mate with the leftmost base. This is different\ + \ from 1 for overlapping mates with protruding ends" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outBAMcompression" + description: "-1 to 10 BAM compression level, -1=default compression (6?),\ + \ 0=no compression, 10=maximum compression" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outBAMsortingThreadN" + description: ">=0: number of threads for BAM sorting. 0 will default to min(6,--runThreadN)." + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outBAMsortingBinsN" + description: ">0: number of genome bins for coordinate-sorting" + info: null + example: + - 50 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "BAM processing" + arguments: + - type: "string" + name: "--bamRemoveDuplicatesType" + description: "mark duplicates in the BAM file, for now only works with (i) sorted\ + \ BAM fed with inputBAMfile, and (ii) for paired-end alignments only\n\n-\ + \ - ... no duplicate removal/marking\n- UniqueIdentical\ + \ ... mark all multimappers, and duplicate unique mappers. The coordinates,\ + \ FLAG, CIGAR must be identical\n- UniqueIdenticalNotMulti ... mark duplicate\ + \ unique mappers but not multimappers." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--bamRemoveDuplicatesMate2basesN" + description: "number of bases from the 5' of mate 2 to use in collapsing (e.g.\ + \ for RAMPAGE)" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Output Wiggle" + arguments: + - type: "string" + name: "--outWigType" + description: "type of signal output, e.g. \"bedGraph\" OR \"bedGraph read1_5p\"\ + . Requires sorted BAM: --outSAMtype BAM SortedByCoordinate .\n\n1st word:\n\ + - None ... no signal output\n- bedGraph ... bedGraph format\n- wiggle\ + \ ... wiggle format\n2nd word:\n- read1_5p ... signal from only 5' of\ + \ the 1st read, useful for CAGE/RAMPAGE etc\n- read2 ... signal from\ + \ only 2nd read" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--outWigStrand" + description: "strandedness of wiggle/bedGraph output\n\n- Stranded ... separate\ + \ strands, str1 and str2\n- Unstranded ... collapsed strands" + info: null + example: + - "Stranded" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outWigReferencesPrefix" + description: "prefix matching reference names to include in the output wiggle\ + \ file, e.g. \"chr\", default \"-\" - include all references" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outWigNorm" + description: "type of normalization for the signal\n\n- RPM ... reads per\ + \ million of mapped reads\n- None ... no normalization, \"raw\" counts" + info: null + example: + - "RPM" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Output Filtering" + arguments: + - type: "string" + name: "--outFilterType" + description: "type of filtering\n\n- Normal ... standard filtering using only\ + \ current alignment\n- BySJout ... keep only those reads that contain junctions\ + \ that passed filtering into SJ.out.tab" + info: null + example: + - "Normal" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outFilterMultimapScoreRange" + description: "the score range below the maximum score for multimapping alignments" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outFilterMultimapNmax" + description: "maximum number of loci the read is allowed to map to. Alignments\ + \ (all of them) will be output only if the read maps to no more loci than\ + \ this value.\n\nOtherwise no alignments will be output, and the read will\ + \ be counted as \"mapped to too many loci\" in the Log.final.out ." + info: null + example: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outFilterMismatchNmax" + description: "alignment will be output only if it has no more mismatches than\ + \ this value." + info: null + example: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--outFilterMismatchNoverLmax" + description: "alignment will be output only if its ratio of mismatches to *mapped*\ + \ length is less than or equal to this value." + info: null + example: + - 0.3 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--outFilterMismatchNoverReadLmax" + description: "alignment will be output only if its ratio of mismatches to *read*\ + \ length is less than or equal to this value." + info: null + example: + - 1.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outFilterScoreMin" + description: "alignment will be output only if its score is higher than or equal\ + \ to this value." + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--outFilterScoreMinOverLread" + description: "same as outFilterScoreMin, but normalized to read length (sum\ + \ of mates' lengths for paired-end reads)" + info: null + example: + - 0.66 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outFilterMatchNmin" + description: "alignment will be output only if the number of matched bases is\ + \ higher than or equal to this value." + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--outFilterMatchNminOverLread" + description: "sam as outFilterMatchNmin, but normalized to the read length (sum\ + \ of mates' lengths for paired-end reads)." + info: null + example: + - 0.66 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outFilterIntronMotifs" + description: "filter alignment using their motifs\n\n- None \ + \ ... no filtering\n- RemoveNoncanonical ... filter\ + \ out alignments that contain non-canonical junctions\n- RemoveNoncanonicalUnannotated\ + \ ... filter out alignments that contain non-canonical unannotated junctions\ + \ when using annotated splice junctions database. The annotated non-canonical\ + \ junctions will be kept." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outFilterIntronStrands" + description: "filter alignments\n\n- RemoveInconsistentStrands ... remove\ + \ alignments that have junctions with inconsistent strands\n- None \ + \ ... no filtering" + info: null + example: + - "RemoveInconsistentStrands" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Output splice junctions (SJ.out.tab)" + arguments: + - type: "string" + name: "--outSJtype" + description: "type of splice junction output\n\n- Standard ... standard SJ.out.tab\ + \ output\n- None ... no splice junction output" + info: null + example: + - "Standard" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Output Filtering: Splice Junctions" + arguments: + - type: "string" + name: "--outSJfilterReads" + description: "which reads to consider for collapsed splice junctions output\n\ + \n- All ... all reads, unique- and multi-mappers\n- Unique ... uniquely\ + \ mapping reads only" + info: null + example: + - "All" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outSJfilterOverhangMin" + description: "minimum overhang length for splice junctions on both sides for:\ + \ (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC\ + \ motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\n\ + does not apply to annotated junctions" + info: null + example: + - 30 + - 12 + - 12 + - 12 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--outSJfilterCountUniqueMin" + description: "minimum uniquely mapping read count per junction for: (1) non-canonical\ + \ motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC\ + \ and GT/AT motif. -1 means no output for that motif\n\nJunctions are output\ + \ if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin conditions\ + \ are satisfied\ndoes not apply to annotated junctions" + info: null + example: + - 3 + - 1 + - 1 + - 1 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--outSJfilterCountTotalMin" + description: "minimum total (multi-mapping+unique) read count per junction for:\ + \ (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC\ + \ motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\n\ + Junctions are output if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin\ + \ conditions are satisfied\ndoes not apply to annotated junctions" + info: null + example: + - 3 + - 1 + - 1 + - 1 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--outSJfilterDistToOtherSJmin" + description: "minimum allowed distance to other junctions' donor/acceptor\n\n\ + does not apply to annotated junctions" + info: null + example: + - 10 + - 0 + - 5 + - 10 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--outSJfilterIntronMaxVsReadN" + description: "maximum gap allowed for junctions supported by 1,2,3,,,N reads\n\ + \ni.e. by default junctions supported by 1 read can have gaps <=50000b, by\ + \ 2 reads: <=100000b, by 3 reads: <=200000. by >=4 reads any gap <=alignIntronMax\n\ + does not apply to annotated junctions" + info: null + example: + - 50000 + - 100000 + - 200000 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - name: "Scoring" + arguments: + - type: "integer" + name: "--scoreGap" + description: "splice junction penalty (independent on intron motif)" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--scoreGapNoncan" + description: "non-canonical junction penalty (in addition to scoreGap)" + info: null + example: + - -8 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--scoreGapGCAG" + description: "GC/AG and CT/GC junction penalty (in addition to scoreGap)" + info: null + example: + - -4 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--scoreGapATAC" + description: "AT/AC and GT/AT junction penalty (in addition to scoreGap)" + info: null + example: + - -8 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--scoreGenomicLengthLog2scale" + description: "extra score logarithmically scaled with genomic length of the\ + \ alignment: scoreGenomicLengthLog2scale*log2(genomicLength)" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--scoreDelOpen" + description: "deletion open penalty" + info: null + example: + - -2 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--scoreDelBase" + description: "deletion extension penalty per base (in addition to scoreDelOpen)" + info: null + example: + - -2 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--scoreInsOpen" + description: "insertion open penalty" + info: null + example: + - -2 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--scoreInsBase" + description: "insertion extension penalty per base (in addition to scoreInsOpen)" + info: null + example: + - -2 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--scoreStitchSJshift" + description: "maximum score reduction while searching for SJ boundaries in the\ + \ stitching step" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Alignments and Seeding" + arguments: + - type: "integer" + name: "--seedSearchStartLmax" + description: "defines the search start point through the read - the read is\ + \ split into pieces no longer than this value" + info: null + example: + - 50 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--seedSearchStartLmaxOverLread" + description: "seedSearchStartLmax normalized to read length (sum of mates' lengths\ + \ for paired-end reads)" + info: null + example: + - 1.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--seedSearchLmax" + description: "defines the maximum length of the seeds, if =0 seed length is\ + \ not limited" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--seedMultimapNmax" + description: "only pieces that map fewer than this value are utilized in the\ + \ stitching procedure" + info: null + example: + - 10000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--seedPerReadNmax" + description: "max number of seeds per read" + info: null + example: + - 1000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--seedPerWindowNmax" + description: "max number of seeds per window" + info: null + example: + - 50 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--seedNoneLociPerWindow" + description: "max number of one seed loci per window" + info: null + example: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--seedSplitMin" + description: "min length of the seed sequences split by Ns or mate gap" + info: null + example: + - 12 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--seedMapMin" + description: "min length of seeds to be mapped" + info: null + example: + - 5 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--alignIntronMin" + description: "minimum intron size, genomic gap is considered intron if its length>=alignIntronMin,\ + \ otherwise it is considered Deletion" + info: null + example: + - 21 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--alignIntronMax" + description: "maximum intron size, if 0, max intron size will be determined\ + \ by (2^winBinNbits)*winAnchorDistNbins" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--alignMatesGapMax" + description: "maximum gap between two mates, if 0, max intron gap will be determined\ + \ by (2^winBinNbits)*winAnchorDistNbins" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--alignSJoverhangMin" + description: "minimum overhang (i.e. block size) for spliced alignments" + info: null + example: + - 5 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--alignSJstitchMismatchNmax" + description: "maximum number of mismatches for stitching of the splice junctions\ + \ (-1: no limit).\n\n(1) non-canonical motifs, (2) GT/AG and CT/AC motif,\ + \ (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif." + info: null + example: + - 0 + - -1 + - 0 + - 0 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--alignSJDBoverhangMin" + description: "minimum overhang (i.e. block size) for annotated (sjdb) spliced\ + \ alignments" + info: null + example: + - 3 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--alignSplicedMateMapLmin" + description: "minimum mapped length for a read mate that is spliced" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--alignSplicedMateMapLminOverLmate" + description: "alignSplicedMateMapLmin normalized to mate length" + info: null + example: + - 0.66 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--alignWindowsPerReadNmax" + description: "max number of windows per read" + info: null + example: + - 10000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--alignTranscriptsPerWindowNmax" + description: "max number of transcripts per window" + info: null + example: + - 100 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--alignTranscriptsPerReadNmax" + description: "max number of different alignments per read to consider" + info: null + example: + - 10000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--alignEndsType" + description: "type of read ends alignment\n\n- Local ... standard\ + \ local alignment with soft-clipping allowed\n- EndToEnd ... force\ + \ end-to-end read alignment, do not soft-clip\n- Extend5pOfRead1 ... fully\ + \ extend only the 5p of the read1, all other ends: local alignment\n- Extend5pOfReads12\ + \ ... fully extend only the 5p of the both read1 and read2, all other ends:\ + \ local alignment" + info: null + example: + - "Local" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--alignEndsProtrude" + description: "allow protrusion of alignment ends, i.e. start (end) of the +strand\ + \ mate downstream of the start (end) of the -strand mate\n\n1st word: int:\ + \ maximum number of protrusion bases allowed\n2nd word: string:\n- \ + \ ConcordantPair ... report alignments with non-zero protrusion\ + \ as concordant pairs\n- DiscordantPair ... report alignments\ + \ with non-zero protrusion as discordant pairs" + info: null + example: + - "0 ConcordantPair" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--alignSoftClipAtReferenceEnds" + description: "allow the soft-clipping of the alignments past the end of the\ + \ chromosomes\n\n- Yes ... allow\n- No ... prohibit, useful for compatibility\ + \ with Cufflinks" + info: null + example: + - "Yes" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--alignInsertionFlush" + description: "how to flush ambiguous insertion positions\n\n- None ... insertions\ + \ are not flushed\n- Right ... insertions are flushed to the right" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Paired-End reads" + arguments: + - type: "integer" + name: "--peOverlapNbasesMin" + description: "minimum number of overlapping bases to trigger mates merging and\ + \ realignment. Specify >0 value to switch on the \"merginf of overlapping\ + \ mates\" algorithm." + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--peOverlapMMp" + description: "maximum proportion of mismatched bases in the overlap area" + info: null + example: + - 0.01 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Windows, Anchors, Binning" + arguments: + - type: "integer" + name: "--winAnchorMultimapNmax" + description: "max number of loci anchors are allowed to map to" + info: null + example: + - 50 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--winBinNbits" + description: "=log2(winBin), where winBin is the size of the bin for the windows/clustering,\ + \ each window will occupy an integer number of bins." + info: null + example: + - 16 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--winAnchorDistNbins" + description: "max number of bins between two anchors that allows aggregation\ + \ of anchors into one window" + info: null + example: + - 9 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--winFlankNbins" + description: "log2(winFlank), where win Flank is the size of the left and right\ + \ flanking regions for each window" + info: null + example: + - 4 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--winReadCoverageRelativeMin" + description: "minimum relative coverage of the read sequence by the seeds in\ + \ a window, for STARlong algorithm only." + info: null + example: + - 0.5 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--winReadCoverageBasesMin" + description: "minimum number of bases covered by the seeds in a window , for\ + \ STARlong algorithm only." + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Chimeric Alignments" + arguments: + - type: "string" + name: "--chimOutType" + description: "type of chimeric output\n\n- Junctions ... Chimeric.out.junction\n\ + - SeparateSAMold ... output old SAM into separate Chimeric.out.sam file\n\ + - WithinBAM ... output into main aligned BAM files (Aligned.*.bam)\n\ + - WithinBAM HardClip ... (default) hard-clipping in the CIGAR for supplemental\ + \ chimeric alignments (default if no 2nd word is present)\n- WithinBAM SoftClip\ + \ ... soft-clipping in the CIGAR for supplemental chimeric alignments" + info: null + example: + - "Junctions" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--chimSegmentMin" + description: "minimum length of chimeric segment length, if ==0, no chimeric\ + \ output" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimScoreMin" + description: "minimum total (summed) score of the chimeric segments" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimScoreDropMax" + description: "max drop (difference) of chimeric score (the sum of scores of\ + \ all chimeric segments) from the read length" + info: null + example: + - 20 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimScoreSeparation" + description: "minimum difference (separation) between the best chimeric score\ + \ and the next one" + info: null + example: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimScoreJunctionNonGTAG" + description: "penalty for a non-GT/AG chimeric junction" + info: null + example: + - -1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimJunctionOverhangMin" + description: "minimum overhang for a chimeric junction" + info: null + example: + - 20 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimSegmentReadGapMax" + description: "maximum gap in the read sequence between chimeric segments" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--chimFilter" + description: "different filters for chimeric alignments\n\n- None ... no filtering\n\ + - banGenomicN ... Ns are not allowed in the genome sequence around the chimeric\ + \ junction" + info: null + example: + - "banGenomicN" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--chimMainSegmentMultNmax" + description: "maximum number of multi-alignments for the main chimeric segment.\ + \ =1 will prohibit multimapping main segments." + info: null + example: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimMultimapNmax" + description: "maximum number of chimeric multi-alignments\n\n- 0 ... use the\ + \ old scheme for chimeric detection which only considered unique alignments" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimMultimapScoreRange" + description: "the score range for multi-mapping chimeras below the best chimeric\ + \ score. Only works with --chimMultimapNmax > 1" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimNonchimScoreDropMin" + description: "to trigger chimeric detection, the drop in the best non-chimeric\ + \ alignment score with respect to the read length has to be greater than this\ + \ value" + info: null + example: + - 20 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimOutJunctionFormat" + description: "formatting type for the Chimeric.out.junction file\n\n- 0 ...\ + \ no comment lines/headers\n- 1 ... comment lines at the end of the file:\ + \ command line and Nreads: total, unique/multi-mapping" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Quantification of Annotations" + arguments: + - type: "string" + name: "--quantMode" + description: "types of quantification requested\n\n- - ... none\n\ + - TranscriptomeSAM ... output SAM/BAM alignments to transcriptome into a separate\ + \ file\n- GeneCounts ... count reads per gene" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--quantTranscriptomeBAMcompression" + description: "-2 to 10 transcriptome BAM compression level\n\n- -2 ... no\ + \ BAM output\n- -1 ... default compression (6?)\n- 0 ... no compression\n\ + - 10 ... maximum compression" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--quantTranscriptomeBan" + description: "prohibit various alignment type\n\n- IndelSoftclipSingleend ...\ + \ prohibit indels, soft clipping and single-end alignments - compatible with\ + \ RSEM\n- Singleend ... prohibit single-end alignments" + info: null + example: + - "IndelSoftclipSingleend" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "2-pass Mapping" + arguments: + - type: "string" + name: "--twopassMode" + description: "2-pass mapping mode.\n\n- None ... 1-pass mapping\n- Basic\ + \ ... basic 2-pass mapping, with all 1st pass junctions inserted into\ + \ the genome indices on the fly" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--twopass1readsN" + description: "number of reads to process for the 1st step. Use very large number\ + \ (or default -1) to map all reads in the first step." + info: null + example: + - -1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "WASP parameters" + arguments: + - type: "string" + name: "--waspOutputMode" + description: "WASP allele-specific output type. This is re-implementation of\ + \ the original WASP mappability filtering by Bryce van de Geijn, Graham McVicker,\ + \ Yoav Gilad & Jonathan K Pritchard. Please cite the original WASP paper:\ + \ Nature Methods 12, 1061-1063 (2015), https://www.nature.com/articles/nmeth.3582\ + \ .\n\n- SAMtag ... add WASP tags to the alignments that pass WASP filtering" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "STARsolo (single cell RNA-seq) parameters" + arguments: + - type: "string" + name: "--soloType" + description: "type of single-cell RNA-seq\n\n- CB_UMI_Simple ... (a.k.a. Droplet)\ + \ one UMI and one Cell Barcode of fixed length in read2, e.g. Drop-seq and\ + \ 10X Chromium.\n- CB_UMI_Complex ... multiple Cell Barcodes of varying length,\ + \ one UMI of fixed length and one adapter sequence of fixed length are allowed\ + \ in read2 only (e.g. inDrop, ddSeq).\n- CB_samTagOut ... output Cell Barcode\ + \ as CR and/or CB SAm tag. No UMI counting. --readFilesIn cDNA_read1 [cDNA_read2\ + \ if paired-end] CellBarcode_read . Requires --outSAMtype BAM Unsorted [and/or\ + \ SortedByCoordinate]\n- SmartSeq ... Smart-seq: each cell in a separate\ + \ FASTQ (paired- or single-end), barcodes are corresponding read-groups, no\ + \ UMI sequences, alignments deduplicated according to alignment start and\ + \ end (after extending soft-clipped bases)" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloCBwhitelist" + description: "file(s) with whitelist(s) of cell barcodes. Only --soloType CB_UMI_Complex\ + \ allows more than one whitelist file.\n\n- None ... no whitelist:\ + \ all cell barcodes are allowed" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--soloCBstart" + description: "cell barcode start base" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--soloCBlen" + description: "cell barcode length" + info: null + example: + - 16 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--soloUMIstart" + description: "UMI start base" + info: null + example: + - 17 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--soloUMIlen" + description: "UMI length" + info: null + example: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--soloBarcodeReadLength" + description: "length of the barcode read\n\n- 1 ... equal to sum of soloCBlen+soloUMIlen\n\ + - 0 ... not defined, do not check" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--soloBarcodeMate" + description: "identifies which read mate contains the barcode (CB+UMI) sequence\n\ + \n- 0 ... barcode sequence is on separate read, which should always be the\ + \ last file in the --readFilesIn listed\n- 1 ... barcode sequence is a part\ + \ of mate 1\n- 2 ... barcode sequence is a part of mate 2" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--soloCBposition" + description: "position of Cell Barcode(s) on the barcode read.\n\nPresently\ + \ only works with --soloType CB_UMI_Complex, and barcodes are assumed to be\ + \ on Read2.\nFormat for each barcode: startAnchor_startPosition_endAnchor_endPosition\n\ + start(end)Anchor defines the Anchor Base for the CB: 0: read start; 1: read\ + \ end; 2: adapter start; 3: adapter end\nstart(end)Position is the 0-based\ + \ position with of the CB start(end) with respect to the Anchor Base\nString\ + \ for different barcodes are separated by space.\nExample: inDrop (Zilionis\ + \ et al, Nat. Protocols, 2017):\n--soloCBposition 0_0_2_-1 3_1_3_8" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloUMIposition" + description: "position of the UMI on the barcode read, same as soloCBposition\n\ + \nExample: inDrop (Zilionis et al, Nat. Protocols, 2017):\n--soloCBposition\ + \ 3_9_3_14" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--soloAdapterSequence" + description: "adapter sequence to anchor barcodes. Only one adapter sequence\ + \ is allowed." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--soloAdapterMismatchesNmax" + description: "maximum number of mismatches allowed in adapter sequence." + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--soloCBmatchWLtype" + description: "matching the Cell Barcodes to the WhiteList\n\n- Exact \ + \ ... only exact matches allowed\n- 1MM \ + \ ... only one match in whitelist with 1 mismatched base allowed.\ + \ Allowed CBs have to have at least one read with exact match.\n- 1MM_multi\ + \ ... multiple matches in whitelist with 1 mismatched\ + \ base allowed, posterior probability calculation is used choose one of the\ + \ matches.\nAllowed CBs have to have at least one read with exact match. This\ + \ option matches best with CellRanger 2.2.0\n- 1MM_multi_pseudocounts \ + \ ... same as 1MM_Multi, but pseudocounts of 1 are added to all whitelist\ + \ barcodes.\n- 1MM_multi_Nbase_pseudocounts ... same as 1MM_multi_pseudocounts,\ + \ multimatching to WL is allowed for CBs with N-bases. This option matches\ + \ best with CellRanger >= 3.0.0\n- EditDist_2 ... allow\ + \ up to edit distance of 3 fpr each of the barcodes. May include one deletion\ + \ + one insertion. Only works with --soloType CB_UMI_Complex. Matches to multiple\ + \ passlist barcdoes are not allowed. Similar to ParseBio Split-seq pipeline." + info: null + example: + - "1MM_multi" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--soloInputSAMattrBarcodeSeq" + description: "when inputting reads from a SAM file (--readsFileType SAM SE/PE),\ + \ these SAM attributes mark the barcode sequence (in proper order).\n\nFor\ + \ instance, for 10X CellRanger or STARsolo BAMs, use --soloInputSAMattrBarcodeSeq\ + \ CR UR .\nThis parameter is required when running STARsolo with input from\ + \ SAM." + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloInputSAMattrBarcodeQual" + description: "when inputting reads from a SAM file (--readsFileType SAM SE/PE),\ + \ these SAM attributes mark the barcode qualities (in proper order).\n\nFor\ + \ instance, for 10X CellRanger or STARsolo BAMs, use --soloInputSAMattrBarcodeQual\ + \ CY UY .\nIf this parameter is '-' (default), the quality 'H' will be assigned\ + \ to all bases." + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloStrand" + description: "strandedness of the solo libraries:\n\n- Unstranded ... no strand\ + \ information\n- Forward ... read strand same as the original RNA molecule\n\ + - Reverse ... read strand opposite to the original RNA molecule" + info: null + example: + - "Forward" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--soloFeatures" + description: "genomic features for which the UMI counts per Cell Barcode are\ + \ collected\n\n- Gene ... genes: reads match the gene transcript\n\ + - SJ ... splice junctions: reported in SJ.out.tab\n- GeneFull\ + \ ... full gene (pre-mRNA): count all reads overlapping genes' exons\ + \ and introns\n- GeneFull_ExonOverIntron ... full gene (pre-mRNA): count all\ + \ reads overlapping genes' exons and introns: prioritize 100% overlap with\ + \ exons\n- GeneFull_Ex50pAS ... full gene (pre-RNA): count all reads\ + \ overlapping genes' exons and introns: prioritize >50% overlap with exons.\ + \ Do not count reads with 100% exonic overlap in the antisense direction." + info: null + example: + - "Gene" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloMultiMappers" + description: "counting method for reads mapping to multiple genes\n\n- Unique\ + \ ... count only reads that map to unique genes\n- Uniform ... uniformly\ + \ distribute multi-genic UMIs to all genes\n- Rescue ... distribute UMIs\ + \ proportionally to unique+uniform counts (~ first iteration of EM)\n- PropUnique\ + \ ... distribute UMIs proportionally to unique mappers, if present, and uniformly\ + \ if not.\n- EM ... multi-gene UMIs are distributed using Expectation\ + \ Maximization algorithm" + info: null + example: + - "Unique" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloUMIdedup" + description: "type of UMI deduplication (collapsing) algorithm\n\n- 1MM_All\ + \ ... all UMIs with 1 mismatch distance to each other\ + \ are collapsed (i.e. counted once).\n- 1MM_Directional_UMItools ... follows\ + \ the \"directional\" method from the UMI-tools by Smith, Heger and Sudbery\ + \ (Genome Research 2017).\n- 1MM_Directional ... same as 1MM_Directional_UMItools,\ + \ but with more stringent criteria for duplicate UMIs\n- Exact \ + \ ... only exactly matching UMIs are collapsed.\n- NoDedup \ + \ ... no deduplication of UMIs, count all reads.\n- 1MM_CR\ + \ ... CellRanger2-4 algorithm for 1MM UMI collapsing." + info: null + example: + - "1MM_All" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloUMIfiltering" + description: "type of UMI filtering (for reads uniquely mapping to genes)\n\n\ + - - ... basic filtering: remove UMIs with N and homopolymers\ + \ (similar to CellRanger 2.2.0).\n- MultiGeneUMI ... basic + remove\ + \ lower-count UMIs that map to more than one gene.\n- MultiGeneUMI_All ...\ + \ basic + remove all UMIs that map to more than one gene.\n- MultiGeneUMI_CR\ + \ ... basic + remove lower-count UMIs that map to more than one gene, matching\ + \ CellRanger > 3.0.0 .\nOnly works with --soloUMIdedup 1MM_CR" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloOutFileNames" + description: "file names for STARsolo output:\n\nfile_name_prefix gene_names\ + \ barcode_sequences cell_feature_count_matrix" + info: null + example: + - "Solo.out/" + - "features.tsv" + - "barcodes.tsv" + - "matrix.mtx" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloCellFilter" + description: "cell filtering type and parameters\n\n- None ... do\ + \ not output filtered cells\n- TopCells ... only report top cells by\ + \ UMI count, followed by the exact number of cells\n- CellRanger2.2 ...\ + \ simple filtering of CellRanger 2.2.\nCan be followed by numbers: number\ + \ of expected cells, robust maximum percentile for UMI count, maximum to minimum\ + \ ratio for UMI count\nThe harcoded values are from CellRanger: nExpectedCells=3000;\ + \ maxPercentile=0.99; maxMinRatio=10\n- EmptyDrops_CR ... EmptyDrops filtering\ + \ in CellRanger flavor. Please cite the original EmptyDrops paper: A.T.L Lun\ + \ et al, Genome Biology, 20, 63 (2019): https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1662-y\n\ + Can be followed by 10 numeric parameters: nExpectedCells maxPercentile\ + \ maxMinRatio indMin indMax umiMin umiMinFracMedian candMaxN \ + \ FDR simN\nThe harcoded values are from CellRanger: 3000 \ + \ 0.99 10 45000 90000 500 0.01\ + \ 20000 0.01 10000" + info: null + example: + - "CellRanger2.2" + - "3000" + - "0.99" + - "10" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloOutFormatFeaturesGeneField3" + description: "field 3 in the Gene features.tsv file. If \"-\", then no 3rd field\ + \ is output." + info: null + example: + - "Gene Expression" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloCellReadStats" + description: "Output reads statistics for each CB\n\n- Standard ... standard\ + \ output" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Align fastq files using STAR." + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/cellranger_tiny_fastq" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "docker" + env: + - "STAR_VERSION 2.7.10b" + - "PACKAGES gcc g++ make wget zlib1g-dev unzip" + - type: "docker" + run: + - "apt-get update && \\\n apt-get install -y --no-install-recommends ${PACKAGES}\ + \ && \\\n cd /tmp && \\\n wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip\ + \ && \\\n unzip ${STAR_VERSION}.zip && \\\n cd STAR-${STAR_VERSION}/source\ + \ && \\\n make STARstatic CXXFLAGS_SIMD=-std=c++11 && \\\n cp STAR /usr/local/bin\ + \ && \\\n cd / && \\\n rm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip\ + \ && \\\n apt-get --purge autoremove -y ${PACKAGES} && \\\n apt-get clean\n" + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highmem" + - "highcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/mapping/star_align/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/star_align" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/star_align/star_align" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/mapping/star_align/main.nf b/target/nextflow/mapping/star_align/main.nf new file mode 100644 index 00000000000..055bb223f25 --- /dev/null +++ b/target/nextflow/mapping/star_align/main.nf @@ -0,0 +1,5287 @@ +// star_align 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Angela Oliveira Pisco (author) +// * Robrecht Cannoodt (author, maintainer) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "star_align", + "namespace" : "mapping", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Angela Oliveira Pisco", + "roles" : [ + "author" + ], + "info" : { + "role" : "Contributor", + "links" : { + "github" : "aopisco", + "orcid" : "0000-0003-0142-2355", + "linkedin" : "aopisco" + }, + "organizations" : [ + { + "name" : "Insitro", + "href" : "https://insitro.com", + "role" : "Director of Computational Biology" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + }, + { + "name" : "Robrecht Cannoodt", + "roles" : [ + "author", + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "robrecht@data-intuitive.com", + "github" : "rcannood", + "orcid" : "0000-0003-3641-729X", + "linkedin" : "robrechtcannoodt" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Science Engineer" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Input/Output", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "alternatives" : [ + "--readFilesIn" + ], + "description" : "The FASTQ files to be analyzed. Corresponds to the --readFilesIn argument in the STAR command.", + "example" : [ + "mysample_S1_L001_R1_001.fastq.gz", + "mysample_S1_L001_R2_001.fastq.gz" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--reference", + "alternatives" : [ + "--genomeDir" + ], + "description" : "Path to the reference built by star_build_reference. Corresponds to the --genomeDir argument in the STAR command.", + "example" : [ + "/path/to/reference" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "--outFileNamePrefix" + ], + "description" : "Path to output directory. Corresponds to the --outFileNamePrefix argument in the STAR command.", + "example" : [ + "/path/to/foo" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Run Parameters", + "arguments" : [ + { + "type" : "integer", + "name" : "--runRNGseed", + "description" : "random number generator seed.", + "example" : [ + 777 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Genome Parameters", + "arguments" : [ + { + "type" : "string", + "name" : "--genomeLoad", + "description" : "mode of shared memory usage for the genome files. Only used with --runMode alignReads.\n\n- LoadAndKeep ... load genome into shared and keep it in memory after run\n- LoadAndRemove ... load genome into shared but remove it after run\n- LoadAndExit ... load genome into shared memory and exit, keeping the genome in memory for future runs\n- Remove ... do not map anything, just remove loaded genome from memory\n- NoSharedMemory ... do not use shared memory, each job will have its own private copy of the genome", + "example" : [ + "NoSharedMemory" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--genomeFastaFiles", + "description" : "path(s) to the fasta files with the genome sequences, separated by spaces. These files should be plain text FASTA files, they *cannot* be zipped.\n\nRequired for the genome generation (--runMode genomeGenerate). Can also be used in the mapping (--runMode alignReads) to add extra (new) sequences to the genome (e.g. spike-ins).", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--genomeFileSizes", + "description" : "genome files exact sizes in bytes. Typically, this should not be defined by the user.", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--genomeTransformOutput", + "description" : "which output to transform back to original genome\n\n- SAM ... SAM/BAM alignments\n- SJ ... splice junctions (SJ.out.tab)\n- None ... no transformation of the output", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--genomeChrSetMitochondrial", + "description" : "names of the mitochondrial chromosomes. Presently only used for STARsolo statistics output/", + "example" : [ + "chrM", + "M", + "MT" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + } + ] + }, + { + "name" : "Splice Junctions Database", + "arguments" : [ + { + "type" : "string", + "name" : "--sjdbFileChrStartEnd", + "description" : "path to the files with genomic coordinates (chr start end strand) for the splice junction introns. Multiple files can be supplied and will be concatenated.", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--sjdbGTFfile", + "description" : "path to the GTF file with annotations", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--sjdbGTFchrPrefix", + "description" : "prefix for chromosome names in a GTF file (e.g. 'chr' for using ENSMEBL annotations with UCSC genomes)", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--sjdbGTFfeatureExon", + "description" : "feature type in GTF file to be used as exons for building transcripts", + "example" : [ + "exon" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--sjdbGTFtagExonParentTranscript", + "description" : "GTF attribute name for parent transcript ID (default \\"transcript_id\\" works for GTF files)", + "example" : [ + "transcript_id" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--sjdbGTFtagExonParentGene", + "description" : "GTF attribute name for parent gene ID (default \\"gene_id\\" works for GTF files)", + "example" : [ + "gene_id" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--sjdbGTFtagExonParentGeneName", + "description" : "GTF attribute name for parent gene name", + "example" : [ + "gene_name" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--sjdbGTFtagExonParentGeneType", + "description" : "GTF attribute name for parent gene type", + "example" : [ + "gene_type", + "gene_biotype" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--sjdbOverhang", + "description" : "length of the donor/acceptor sequence on each side of the junctions, ideally = (mate_length - 1)", + "example" : [ + 100 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--sjdbScore", + "description" : "extra alignment score for alignments that cross database junctions", + "example" : [ + 2 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--sjdbInsertSave", + "description" : "which files to save when sjdb junctions are inserted on the fly at the mapping step\n\n- Basic ... only small junction / transcript files\n- All ... all files including big Genome, SA and SAindex - this will create a complete genome directory", + "example" : [ + "Basic" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Variation parameters", + "arguments" : [ + { + "type" : "string", + "name" : "--varVCFfile", + "description" : "path to the VCF file that contains variation data. The 10th column should contain the genotype information, e.g. 0/1", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Read Parameters", + "arguments" : [ + { + "type" : "string", + "name" : "--readFilesType", + "description" : "format of input read files\n\n- Fastx ... FASTA or FASTQ\n- SAM SE ... SAM or BAM single-end reads; for BAM use --readFilesCommand samtools view\n- SAM PE ... SAM or BAM paired-end reads; for BAM use --readFilesCommand samtools view", + "example" : [ + "Fastx" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--readFilesSAMattrKeep", + "description" : "for --readFilesType SAM SE/PE, which SAM tags to keep in the output BAM, e.g.: --readFilesSAMtagsKeep RG PL\n\n- All ... keep all tags\n- None ... do not keep any tags", + "example" : [ + "All" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--readFilesManifest", + "description" : "path to the \\"manifest\\" file with the names of read files. The manifest file should contain 3 tab-separated columns:\n\npaired-end reads: read1_file_name $tab$ read2_file_name $tab$ read_group_line.\nsingle-end reads: read1_file_name $tab$ - $tab$ read_group_line.\nSpaces, but not tabs are allowed in file names.\nIf read_group_line does not start with ID:, it can only contain one ID field, and ID: will be added to it.\nIf read_group_line starts with ID:, it can contain several fields separated by $tab$, and all fields will be be copied verbatim into SAM @RG header line.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--readFilesPrefix", + "description" : "prefix for the read files names, i.e. it will be added in front of the strings in --readFilesIn", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--readFilesCommand", + "description" : "command line to execute for each of the input file. This command should generate FASTA or FASTQ text and send it to stdout\n\nFor example: zcat - to uncompress .gz files, bzcat - to uncompress .bz2 files, etc.", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--readMapNumber", + "description" : "number of reads to map from the beginning of the file\n\n-1: map all reads", + "example" : [ + -1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--readMatesLengthsIn", + "description" : "Equal/NotEqual - lengths of names,sequences,qualities for both mates are the same / not the same. NotEqual is safe in all situations.", + "example" : [ + "NotEqual" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--readNameSeparator", + "description" : "character(s) separating the part of the read names that will be trimmed in output (read name after space is always trimmed)", + "example" : [ + "/" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--readQualityScoreBase", + "description" : "number to be subtracted from the ASCII code to get Phred quality score", + "example" : [ + 33 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Read Clipping", + "arguments" : [ + { + "type" : "string", + "name" : "--clipAdapterType", + "description" : "adapter clipping type\n\n- Hamming ... adapter clipping based on Hamming distance, with the number of mismatches controlled by --clip5pAdapterMMp\n- CellRanger4 ... 5p and 3p adapter clipping similar to CellRanger4. Utilizes Opal package by Martin Sosic: https://github.com/Martinsos/opal\n- None ... no adapter clipping, all other clip* parameters are disregarded", + "example" : [ + "Hamming" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--clip3pNbases", + "description" : "number(s) of bases to clip from 3p of each mate. If one value is given, it will be assumed the same for both mates.", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--clip3pAdapterSeq", + "description" : "adapter sequences to clip from 3p of each mate. If one value is given, it will be assumed the same for both mates.\n\n- polyA ... polyA sequence with the length equal to read length", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--clip3pAdapterMMp", + "description" : "max proportion of mismatches for 3p adapter clipping for each mate. If one value is given, it will be assumed the same for both mates.", + "example" : [ + 0.1 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--clip3pAfterAdapterNbases", + "description" : "number of bases to clip from 3p of each mate after the adapter clipping. If one value is given, it will be assumed the same for both mates.", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--clip5pNbases", + "description" : "number(s) of bases to clip from 5p of each mate. If one value is given, it will be assumed the same for both mates.", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + } + ] + }, + { + "name" : "Limits", + "arguments" : [ + { + "type" : "long", + "name" : "--limitGenomeGenerateRAM", + "description" : "maximum available RAM (bytes) for genome generation", + "example" : [ + 31000000000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "long", + "name" : "--limitIObufferSize", + "description" : "max available buffers size (bytes) for input/output, per thread", + "example" : [ + 30000000, + 50000000 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "long", + "name" : "--limitOutSAMoneReadBytes", + "description" : "max size of the SAM record (bytes) for one read. Recommended value: >(2*(LengthMate1+LengthMate2+100)*outFilterMultimapNmax", + "example" : [ + 100000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--limitOutSJoneRead", + "description" : "max number of junctions for one read (including all multi-mappers)", + "example" : [ + 1000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--limitOutSJcollapsed", + "description" : "max number of collapsed junctions", + "example" : [ + 1000000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "long", + "name" : "--limitBAMsortRAM", + "description" : "maximum available RAM (bytes) for sorting BAM. If =0, it will be set to the genome index size. 0 value can only be used with --genomeLoad NoSharedMemory option.", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--limitSjdbInsertNsj", + "description" : "maximum number of junctions to be inserted to the genome on the fly at the mapping stage, including those from annotations and those detected in the 1st step of the 2-pass run", + "example" : [ + 1000000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--limitNreadsSoft", + "description" : "soft limit on the number of reads", + "example" : [ + -1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Output: general", + "arguments" : [ + { + "type" : "string", + "name" : "--outTmpKeep", + "description" : "whether to keep the temporary files after STAR runs is finished\n\n- None ... remove all temporary files\n- All ... keep all files", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outStd", + "description" : "which output will be directed to stdout (standard out)\n\n- Log ... log messages\n- SAM ... alignments in SAM format (which normally are output to Aligned.out.sam file), normal standard output will go into Log.std.out\n- BAM_Unsorted ... alignments in BAM format, unsorted. Requires --outSAMtype BAM Unsorted\n- BAM_SortedByCoordinate ... alignments in BAM format, sorted by coordinate. Requires --outSAMtype BAM SortedByCoordinate\n- BAM_Quant ... alignments to transcriptome in BAM format, unsorted. Requires --quantMode TranscriptomeSAM", + "example" : [ + "Log" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outReadsUnmapped", + "description" : "output of unmapped and partially mapped (i.e. mapped only one mate of a paired end read) reads in separate file(s).\n\n- None ... no output\n- Fastx ... output in separate fasta/fastq files, Unmapped.out.mate1/2", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outQSconversionAdd", + "description" : "add this number to the quality score (e.g. to convert from Illumina to Sanger, use -31)", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outMultimapperOrder", + "description" : "order of multimapping alignments in the output files\n\n- Old_2.4 ... quasi-random order used before 2.5.0\n- Random ... random order of alignments for each multi-mapper. Read mates (pairs) are always adjacent, all alignment for each read stay together. This option will become default in the future releases.", + "example" : [ + "Old_2.4" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Output: SAM and BAM", + "arguments" : [ + { + "type" : "string", + "name" : "--outSAMtype", + "description" : "type of SAM/BAM output\n\n1st word:\n- BAM ... output BAM without sorting\n- SAM ... output SAM without sorting\n- None ... no SAM/BAM output\n2nd, 3rd:\n- Unsorted ... standard unsorted\n- SortedByCoordinate ... sorted by coordinate. This option will allocate extra memory for sorting which can be specified by --limitBAMsortRAM.", + "example" : [ + "SAM" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outSAMmode", + "description" : "mode of SAM output\n\n- None ... no SAM output\n- Full ... full SAM output\n- NoQS ... full SAM but without quality scores", + "example" : [ + "Full" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outSAMstrandField", + "description" : "Cufflinks-like strand field flag\n\n- None ... not used\n- intronMotif ... strand derived from the intron motif. This option changes the output alignments: reads with inconsistent and/or non-canonical introns are filtered out.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outSAMattributes", + "description" : "a string of desired SAM attributes, in the order desired for the output SAM. Tags can be listed in any combination/order.\n\n***Presets:\n- None ... no attributes\n- Standard ... NH HI AS nM\n- All ... NH HI AS nM NM MD jM jI MC ch\n***Alignment:\n- NH ... number of loci the reads maps to: =1 for unique mappers, >1 for multimappers. Standard SAM tag.\n- HI ... multiple alignment index, starts with --outSAMattrIHstart (=1 by default). Standard SAM tag.\n- AS ... local alignment score, +1/-1 for matches/mismateches, score* penalties for indels and gaps. For PE reads, total score for two mates. Stadnard SAM tag.\n- nM ... number of mismatches. For PE reads, sum over two mates.\n- NM ... edit distance to the reference (number of mismatched + inserted + deleted bases) for each mate. Standard SAM tag.\n- MD ... string encoding mismatched and deleted reference bases (see standard SAM specifications). Standard SAM tag.\n- jM ... intron motifs for all junctions (i.e. N in CIGAR): 0: non-canonical; 1: GT/AG, 2: CT/AC, 3: GC/AG, 4: CT/GC, 5: AT/AC, 6: GT/AT. If splice junctions database is used, and a junction is annotated, 20 is added to its motif value.\n- jI ... start and end of introns for all junctions (1-based).\n- XS ... alignment strand according to --outSAMstrandField.\n- MC ... mate's CIGAR string. Standard SAM tag.\n- ch ... marks all segment of all chimeric alingments for --chimOutType WithinBAM output.\n- cN ... number of bases clipped from the read ends: 5' and 3'\n***Variation:\n- vA ... variant allele\n- vG ... genomic coordinate of the variant overlapped by the read.\n- vW ... 1 - alignment passes WASP filtering; 2,3,4,5,6,7 - alignment does not pass WASP filtering. Requires --waspOutputMode SAMtag.\n***STARsolo:\n- CR CY UR UY ... sequences and quality scores of cell barcodes and UMIs for the solo* demultiplexing.\n- GX GN ... gene ID and gene name for unique-gene reads.\n- gx gn ... gene IDs and gene names for unique- and multi-gene reads.\n- CB UB ... error-corrected cell barcodes and UMIs for solo* demultiplexing. Requires --outSAMtype BAM SortedByCoordinate.\n- sM ... assessment of CB and UMI.\n- sS ... sequence of the entire barcode (CB,UMI,adapter).\n- sQ ... quality of the entire barcode.\n***Unsupported/undocumented:\n- ha ... haplotype (1/2) when mapping to the diploid genome. Requires genome generated with --genomeTransformType Diploid .\n- rB ... alignment block read/genomic coordinates.\n- vR ... read coordinate of the variant.", + "example" : [ + "Standard" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outSAMattrIHstart", + "description" : "start value for the IH attribute. 0 may be required by some downstream software, such as Cufflinks or StringTie.", + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outSAMunmapped", + "description" : "output of unmapped reads in the SAM format\n\n1st word:\n- None ... no output\n- Within ... output unmapped reads within the main SAM file (i.e. Aligned.out.sam)\n2nd word:\n- KeepPairs ... record unmapped mate for each alignment, and, in case of unsorted output, keep it adjacent to its mapped mate. Only affects multi-mapping reads.", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outSAMorder", + "description" : "type of sorting for the SAM output\n\nPaired: one mate after the other for all paired alignments\nPairedKeepInputOrder: one mate after the other for all paired alignments, the order is kept the same as in the input FASTQ files", + "example" : [ + "Paired" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outSAMprimaryFlag", + "description" : "which alignments are considered primary - all others will be marked with 0x100 bit in the FLAG\n\n- OneBestScore ... only one alignment with the best score is primary\n- AllBestScore ... all alignments with the best score are primary", + "example" : [ + "OneBestScore" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outSAMreadID", + "description" : "read ID record type\n\n- Standard ... first word (until space) from the FASTx read ID line, removing /1,/2 from the end\n- Number ... read number (index) in the FASTx file", + "example" : [ + "Standard" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outSAMmapqUnique", + "description" : "0 to 255: the MAPQ value for unique mappers", + "example" : [ + 255 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outSAMflagOR", + "description" : "0 to 65535: sam FLAG will be bitwise OR'd with this value, i.e. FLAG=FLAG | outSAMflagOR. This is applied after all flags have been set by STAR, and after outSAMflagAND. Can be used to set specific bits that are not set otherwise.", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outSAMflagAND", + "description" : "0 to 65535: sam FLAG will be bitwise AND'd with this value, i.e. FLAG=FLAG & outSAMflagOR. This is applied after all flags have been set by STAR, but before outSAMflagOR. Can be used to unset specific bits that are not set otherwise.", + "example" : [ + 65535 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outSAMattrRGline", + "description" : "SAM/BAM read group line. The first word contains the read group identifier and must start with \\"ID:\\", e.g. --outSAMattrRGline ID:xxx CN:yy \\"DS:z z z\\".\n\nxxx will be added as RG tag to each output alignment. Any spaces in the tag values have to be double quoted.\nComma separated RG lines correspons to different (comma separated) input files in --readFilesIn. Commas have to be surrounded by spaces, e.g.\n--outSAMattrRGline ID:xxx , ID:zzz \\"DS:z z\\" , ID:yyy DS:yyyy", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outSAMheaderHD", + "description" : "@HD (header) line of the SAM header", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outSAMheaderPG", + "description" : "extra @PG (software) line of the SAM header (in addition to STAR)", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outSAMheaderCommentFile", + "description" : "path to the file with @CO (comment) lines of the SAM header", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outSAMfilter", + "description" : "filter the output into main SAM/BAM files\n\n- KeepOnlyAddedReferences ... only keep the reads for which all alignments are to the extra reference sequences added with --genomeFastaFiles at the mapping stage.\n- KeepAllAddedReferences ... keep all alignments to the extra reference sequences added with --genomeFastaFiles at the mapping stage.", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outSAMmultNmax", + "description" : "max number of multiple alignments for a read that will be output to the SAM/BAM files. Note that if this value is not equal to -1, the top scoring alignment will be output first\n\n- -1 ... all alignments (up to --outFilterMultimapNmax) will be output", + "example" : [ + -1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outSAMtlen", + "description" : "calculation method for the TLEN field in the SAM/BAM files\n\n- 1 ... leftmost base of the (+)strand mate to rightmost base of the (-)mate. (+)sign for the (+)strand mate\n- 2 ... leftmost base of any mate to rightmost base of any mate. (+)sign for the mate with the leftmost base. This is different from 1 for overlapping mates with protruding ends", + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outBAMcompression", + "description" : "-1 to 10 BAM compression level, -1=default compression (6?), 0=no compression, 10=maximum compression", + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outBAMsortingThreadN", + "description" : ">=0: number of threads for BAM sorting. 0 will default to min(6,--runThreadN).", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outBAMsortingBinsN", + "description" : ">0: number of genome bins for coordinate-sorting", + "example" : [ + 50 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "BAM processing", + "arguments" : [ + { + "type" : "string", + "name" : "--bamRemoveDuplicatesType", + "description" : "mark duplicates in the BAM file, for now only works with (i) sorted BAM fed with inputBAMfile, and (ii) for paired-end alignments only\n\n- - ... no duplicate removal/marking\n- UniqueIdentical ... mark all multimappers, and duplicate unique mappers. The coordinates, FLAG, CIGAR must be identical\n- UniqueIdenticalNotMulti ... mark duplicate unique mappers but not multimappers.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--bamRemoveDuplicatesMate2basesN", + "description" : "number of bases from the 5' of mate 2 to use in collapsing (e.g. for RAMPAGE)", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Output Wiggle", + "arguments" : [ + { + "type" : "string", + "name" : "--outWigType", + "description" : "type of signal output, e.g. \\"bedGraph\\" OR \\"bedGraph read1_5p\\". Requires sorted BAM: --outSAMtype BAM SortedByCoordinate .\n\n1st word:\n- None ... no signal output\n- bedGraph ... bedGraph format\n- wiggle ... wiggle format\n2nd word:\n- read1_5p ... signal from only 5' of the 1st read, useful for CAGE/RAMPAGE etc\n- read2 ... signal from only 2nd read", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outWigStrand", + "description" : "strandedness of wiggle/bedGraph output\n\n- Stranded ... separate strands, str1 and str2\n- Unstranded ... collapsed strands", + "example" : [ + "Stranded" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outWigReferencesPrefix", + "description" : "prefix matching reference names to include in the output wiggle file, e.g. \\"chr\\", default \\"-\\" - include all references", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outWigNorm", + "description" : "type of normalization for the signal\n\n- RPM ... reads per million of mapped reads\n- None ... no normalization, \\"raw\\" counts", + "example" : [ + "RPM" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Output Filtering", + "arguments" : [ + { + "type" : "string", + "name" : "--outFilterType", + "description" : "type of filtering\n\n- Normal ... standard filtering using only current alignment\n- BySJout ... keep only those reads that contain junctions that passed filtering into SJ.out.tab", + "example" : [ + "Normal" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outFilterMultimapScoreRange", + "description" : "the score range below the maximum score for multimapping alignments", + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outFilterMultimapNmax", + "description" : "maximum number of loci the read is allowed to map to. Alignments (all of them) will be output only if the read maps to no more loci than this value.\n\nOtherwise no alignments will be output, and the read will be counted as \\"mapped to too many loci\\" in the Log.final.out .", + "example" : [ + 10 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outFilterMismatchNmax", + "description" : "alignment will be output only if it has no more mismatches than this value.", + "example" : [ + 10 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--outFilterMismatchNoverLmax", + "description" : "alignment will be output only if its ratio of mismatches to *mapped* length is less than or equal to this value.", + "example" : [ + 0.3 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--outFilterMismatchNoverReadLmax", + "description" : "alignment will be output only if its ratio of mismatches to *read* length is less than or equal to this value.", + "example" : [ + 1.0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outFilterScoreMin", + "description" : "alignment will be output only if its score is higher than or equal to this value.", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--outFilterScoreMinOverLread", + "description" : "same as outFilterScoreMin, but normalized to read length (sum of mates' lengths for paired-end reads)", + "example" : [ + 0.66 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outFilterMatchNmin", + "description" : "alignment will be output only if the number of matched bases is higher than or equal to this value.", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--outFilterMatchNminOverLread", + "description" : "sam as outFilterMatchNmin, but normalized to the read length (sum of mates' lengths for paired-end reads).", + "example" : [ + 0.66 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outFilterIntronMotifs", + "description" : "filter alignment using their motifs\n\n- None ... no filtering\n- RemoveNoncanonical ... filter out alignments that contain non-canonical junctions\n- RemoveNoncanonicalUnannotated ... filter out alignments that contain non-canonical unannotated junctions when using annotated splice junctions database. The annotated non-canonical junctions will be kept.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outFilterIntronStrands", + "description" : "filter alignments\n\n- RemoveInconsistentStrands ... remove alignments that have junctions with inconsistent strands\n- None ... no filtering", + "example" : [ + "RemoveInconsistentStrands" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Output splice junctions (SJ.out.tab)", + "arguments" : [ + { + "type" : "string", + "name" : "--outSJtype", + "description" : "type of splice junction output\n\n- Standard ... standard SJ.out.tab output\n- None ... no splice junction output", + "example" : [ + "Standard" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Output Filtering: Splice Junctions", + "arguments" : [ + { + "type" : "string", + "name" : "--outSJfilterReads", + "description" : "which reads to consider for collapsed splice junctions output\n\n- All ... all reads, unique- and multi-mappers\n- Unique ... uniquely mapping reads only", + "example" : [ + "All" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outSJfilterOverhangMin", + "description" : "minimum overhang length for splice junctions on both sides for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\ndoes not apply to annotated junctions", + "example" : [ + 30, + 12, + 12, + 12 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outSJfilterCountUniqueMin", + "description" : "minimum uniquely mapping read count per junction for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\nJunctions are output if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin conditions are satisfied\ndoes not apply to annotated junctions", + "example" : [ + 3, + 1, + 1, + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outSJfilterCountTotalMin", + "description" : "minimum total (multi-mapping+unique) read count per junction for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\nJunctions are output if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin conditions are satisfied\ndoes not apply to annotated junctions", + "example" : [ + 3, + 1, + 1, + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outSJfilterDistToOtherSJmin", + "description" : "minimum allowed distance to other junctions' donor/acceptor\n\ndoes not apply to annotated junctions", + "example" : [ + 10, + 0, + 5, + 10 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outSJfilterIntronMaxVsReadN", + "description" : "maximum gap allowed for junctions supported by 1,2,3,,,N reads\n\ni.e. by default junctions supported by 1 read can have gaps <=50000b, by 2 reads: <=100000b, by 3 reads: <=200000. by >=4 reads any gap <=alignIntronMax\ndoes not apply to annotated junctions", + "example" : [ + 50000, + 100000, + 200000 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + } + ] + }, + { + "name" : "Scoring", + "arguments" : [ + { + "type" : "integer", + "name" : "--scoreGap", + "description" : "splice junction penalty (independent on intron motif)", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--scoreGapNoncan", + "description" : "non-canonical junction penalty (in addition to scoreGap)", + "example" : [ + -8 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--scoreGapGCAG", + "description" : "GC/AG and CT/GC junction penalty (in addition to scoreGap)", + "example" : [ + -4 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--scoreGapATAC", + "description" : "AT/AC and GT/AT junction penalty (in addition to scoreGap)", + "example" : [ + -8 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--scoreGenomicLengthLog2scale", + "description" : "extra score logarithmically scaled with genomic length of the alignment: scoreGenomicLengthLog2scale*log2(genomicLength)", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--scoreDelOpen", + "description" : "deletion open penalty", + "example" : [ + -2 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--scoreDelBase", + "description" : "deletion extension penalty per base (in addition to scoreDelOpen)", + "example" : [ + -2 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--scoreInsOpen", + "description" : "insertion open penalty", + "example" : [ + -2 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--scoreInsBase", + "description" : "insertion extension penalty per base (in addition to scoreInsOpen)", + "example" : [ + -2 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--scoreStitchSJshift", + "description" : "maximum score reduction while searching for SJ boundaries in the stitching step", + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Alignments and Seeding", + "arguments" : [ + { + "type" : "integer", + "name" : "--seedSearchStartLmax", + "description" : "defines the search start point through the read - the read is split into pieces no longer than this value", + "example" : [ + 50 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--seedSearchStartLmaxOverLread", + "description" : "seedSearchStartLmax normalized to read length (sum of mates' lengths for paired-end reads)", + "example" : [ + 1.0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--seedSearchLmax", + "description" : "defines the maximum length of the seeds, if =0 seed length is not limited", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--seedMultimapNmax", + "description" : "only pieces that map fewer than this value are utilized in the stitching procedure", + "example" : [ + 10000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--seedPerReadNmax", + "description" : "max number of seeds per read", + "example" : [ + 1000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--seedPerWindowNmax", + "description" : "max number of seeds per window", + "example" : [ + 50 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--seedNoneLociPerWindow", + "description" : "max number of one seed loci per window", + "example" : [ + 10 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--seedSplitMin", + "description" : "min length of the seed sequences split by Ns or mate gap", + "example" : [ + 12 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--seedMapMin", + "description" : "min length of seeds to be mapped", + "example" : [ + 5 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--alignIntronMin", + "description" : "minimum intron size, genomic gap is considered intron if its length>=alignIntronMin, otherwise it is considered Deletion", + "example" : [ + 21 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--alignIntronMax", + "description" : "maximum intron size, if 0, max intron size will be determined by (2^winBinNbits)*winAnchorDistNbins", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--alignMatesGapMax", + "description" : "maximum gap between two mates, if 0, max intron gap will be determined by (2^winBinNbits)*winAnchorDistNbins", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--alignSJoverhangMin", + "description" : "minimum overhang (i.e. block size) for spliced alignments", + "exampl''' + '''e" : [ + 5 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--alignSJstitchMismatchNmax", + "description" : "maximum number of mismatches for stitching of the splice junctions (-1: no limit).\n\n(1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif.", + "example" : [ + 0, + -1, + 0, + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--alignSJDBoverhangMin", + "description" : "minimum overhang (i.e. block size) for annotated (sjdb) spliced alignments", + "example" : [ + 3 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--alignSplicedMateMapLmin", + "description" : "minimum mapped length for a read mate that is spliced", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--alignSplicedMateMapLminOverLmate", + "description" : "alignSplicedMateMapLmin normalized to mate length", + "example" : [ + 0.66 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--alignWindowsPerReadNmax", + "description" : "max number of windows per read", + "example" : [ + 10000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--alignTranscriptsPerWindowNmax", + "description" : "max number of transcripts per window", + "example" : [ + 100 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--alignTranscriptsPerReadNmax", + "description" : "max number of different alignments per read to consider", + "example" : [ + 10000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--alignEndsType", + "description" : "type of read ends alignment\n\n- Local ... standard local alignment with soft-clipping allowed\n- EndToEnd ... force end-to-end read alignment, do not soft-clip\n- Extend5pOfRead1 ... fully extend only the 5p of the read1, all other ends: local alignment\n- Extend5pOfReads12 ... fully extend only the 5p of the both read1 and read2, all other ends: local alignment", + "example" : [ + "Local" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--alignEndsProtrude", + "description" : "allow protrusion of alignment ends, i.e. start (end) of the +strand mate downstream of the start (end) of the -strand mate\n\n1st word: int: maximum number of protrusion bases allowed\n2nd word: string:\n- ConcordantPair ... report alignments with non-zero protrusion as concordant pairs\n- DiscordantPair ... report alignments with non-zero protrusion as discordant pairs", + "example" : [ + "0 ConcordantPair" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--alignSoftClipAtReferenceEnds", + "description" : "allow the soft-clipping of the alignments past the end of the chromosomes\n\n- Yes ... allow\n- No ... prohibit, useful for compatibility with Cufflinks", + "example" : [ + "Yes" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--alignInsertionFlush", + "description" : "how to flush ambiguous insertion positions\n\n- None ... insertions are not flushed\n- Right ... insertions are flushed to the right", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Paired-End reads", + "arguments" : [ + { + "type" : "integer", + "name" : "--peOverlapNbasesMin", + "description" : "minimum number of overlapping bases to trigger mates merging and realignment. Specify >0 value to switch on the \\"merginf of overlapping mates\\" algorithm.", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--peOverlapMMp", + "description" : "maximum proportion of mismatched bases in the overlap area", + "example" : [ + 0.01 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Windows, Anchors, Binning", + "arguments" : [ + { + "type" : "integer", + "name" : "--winAnchorMultimapNmax", + "description" : "max number of loci anchors are allowed to map to", + "example" : [ + 50 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--winBinNbits", + "description" : "=log2(winBin), where winBin is the size of the bin for the windows/clustering, each window will occupy an integer number of bins.", + "example" : [ + 16 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--winAnchorDistNbins", + "description" : "max number of bins between two anchors that allows aggregation of anchors into one window", + "example" : [ + 9 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--winFlankNbins", + "description" : "log2(winFlank), where win Flank is the size of the left and right flanking regions for each window", + "example" : [ + 4 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--winReadCoverageRelativeMin", + "description" : "minimum relative coverage of the read sequence by the seeds in a window, for STARlong algorithm only.", + "example" : [ + 0.5 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--winReadCoverageBasesMin", + "description" : "minimum number of bases covered by the seeds in a window , for STARlong algorithm only.", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Chimeric Alignments", + "arguments" : [ + { + "type" : "string", + "name" : "--chimOutType", + "description" : "type of chimeric output\n\n- Junctions ... Chimeric.out.junction\n- SeparateSAMold ... output old SAM into separate Chimeric.out.sam file\n- WithinBAM ... output into main aligned BAM files (Aligned.*.bam)\n- WithinBAM HardClip ... (default) hard-clipping in the CIGAR for supplemental chimeric alignments (default if no 2nd word is present)\n- WithinBAM SoftClip ... soft-clipping in the CIGAR for supplemental chimeric alignments", + "example" : [ + "Junctions" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--chimSegmentMin", + "description" : "minimum length of chimeric segment length, if ==0, no chimeric output", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--chimScoreMin", + "description" : "minimum total (summed) score of the chimeric segments", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--chimScoreDropMax", + "description" : "max drop (difference) of chimeric score (the sum of scores of all chimeric segments) from the read length", + "example" : [ + 20 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--chimScoreSeparation", + "description" : "minimum difference (separation) between the best chimeric score and the next one", + "example" : [ + 10 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--chimScoreJunctionNonGTAG", + "description" : "penalty for a non-GT/AG chimeric junction", + "example" : [ + -1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--chimJunctionOverhangMin", + "description" : "minimum overhang for a chimeric junction", + "example" : [ + 20 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--chimSegmentReadGapMax", + "description" : "maximum gap in the read sequence between chimeric segments", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--chimFilter", + "description" : "different filters for chimeric alignments\n\n- None ... no filtering\n- banGenomicN ... Ns are not allowed in the genome sequence around the chimeric junction", + "example" : [ + "banGenomicN" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--chimMainSegmentMultNmax", + "description" : "maximum number of multi-alignments for the main chimeric segment. =1 will prohibit multimapping main segments.", + "example" : [ + 10 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--chimMultimapNmax", + "description" : "maximum number of chimeric multi-alignments\n\n- 0 ... use the old scheme for chimeric detection which only considered unique alignments", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--chimMultimapScoreRange", + "description" : "the score range for multi-mapping chimeras below the best chimeric score. Only works with --chimMultimapNmax > 1", + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--chimNonchimScoreDropMin", + "description" : "to trigger chimeric detection, the drop in the best non-chimeric alignment score with respect to the read length has to be greater than this value", + "example" : [ + 20 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--chimOutJunctionFormat", + "description" : "formatting type for the Chimeric.out.junction file\n\n- 0 ... no comment lines/headers\n- 1 ... comment lines at the end of the file: command line and Nreads: total, unique/multi-mapping", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Quantification of Annotations", + "arguments" : [ + { + "type" : "string", + "name" : "--quantMode", + "description" : "types of quantification requested\n\n- - ... none\n- TranscriptomeSAM ... output SAM/BAM alignments to transcriptome into a separate file\n- GeneCounts ... count reads per gene", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--quantTranscriptomeBAMcompression", + "description" : "-2 to 10 transcriptome BAM compression level\n\n- -2 ... no BAM output\n- -1 ... default compression (6?)\n- 0 ... no compression\n- 10 ... maximum compression", + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--quantTranscriptomeBan", + "description" : "prohibit various alignment type\n\n- IndelSoftclipSingleend ... prohibit indels, soft clipping and single-end alignments - compatible with RSEM\n- Singleend ... prohibit single-end alignments", + "example" : [ + "IndelSoftclipSingleend" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "2-pass Mapping", + "arguments" : [ + { + "type" : "string", + "name" : "--twopassMode", + "description" : "2-pass mapping mode.\n\n- None ... 1-pass mapping\n- Basic ... basic 2-pass mapping, with all 1st pass junctions inserted into the genome indices on the fly", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--twopass1readsN", + "description" : "number of reads to process for the 1st step. Use very large number (or default -1) to map all reads in the first step.", + "example" : [ + -1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "WASP parameters", + "arguments" : [ + { + "type" : "string", + "name" : "--waspOutputMode", + "description" : "WASP allele-specific output type. This is re-implementation of the original WASP mappability filtering by Bryce van de Geijn, Graham McVicker, Yoav Gilad & Jonathan K Pritchard. Please cite the original WASP paper: Nature Methods 12, 1061-1063 (2015), https://www.nature.com/articles/nmeth.3582 .\n\n- SAMtag ... add WASP tags to the alignments that pass WASP filtering", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "STARsolo (single cell RNA-seq) parameters", + "arguments" : [ + { + "type" : "string", + "name" : "--soloType", + "description" : "type of single-cell RNA-seq\n\n- CB_UMI_Simple ... (a.k.a. Droplet) one UMI and one Cell Barcode of fixed length in read2, e.g. Drop-seq and 10X Chromium.\n- CB_UMI_Complex ... multiple Cell Barcodes of varying length, one UMI of fixed length and one adapter sequence of fixed length are allowed in read2 only (e.g. inDrop, ddSeq).\n- CB_samTagOut ... output Cell Barcode as CR and/or CB SAm tag. No UMI counting. --readFilesIn cDNA_read1 [cDNA_read2 if paired-end] CellBarcode_read . Requires --outSAMtype BAM Unsorted [and/or SortedByCoordinate]\n- SmartSeq ... Smart-seq: each cell in a separate FASTQ (paired- or single-end), barcodes are corresponding read-groups, no UMI sequences, alignments deduplicated according to alignment start and end (after extending soft-clipped bases)", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--soloCBwhitelist", + "description" : "file(s) with whitelist(s) of cell barcodes. Only --soloType CB_UMI_Complex allows more than one whitelist file.\n\n- None ... no whitelist: all cell barcodes are allowed", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--soloCBstart", + "description" : "cell barcode start base", + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--soloCBlen", + "description" : "cell barcode length", + "example" : [ + 16 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--soloUMIstart", + "description" : "UMI start base", + "example" : [ + 17 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--soloUMIlen", + "description" : "UMI length", + "example" : [ + 10 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--soloBarcodeReadLength", + "description" : "length of the barcode read\n\n- 1 ... equal to sum of soloCBlen+soloUMIlen\n- 0 ... not defined, do not check", + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--soloBarcodeMate", + "description" : "identifies which read mate contains the barcode (CB+UMI) sequence\n\n- 0 ... barcode sequence is on separate read, which should always be the last file in the --readFilesIn listed\n- 1 ... barcode sequence is a part of mate 1\n- 2 ... barcode sequence is a part of mate 2", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--soloCBposition", + "description" : "position of Cell Barcode(s) on the barcode read.\n\nPresently only works with --soloType CB_UMI_Complex, and barcodes are assumed to be on Read2.\nFormat for each barcode: startAnchor_startPosition_endAnchor_endPosition\nstart(end)Anchor defines the Anchor Base for the CB: 0: read start; 1: read end; 2: adapter start; 3: adapter end\nstart(end)Position is the 0-based position with of the CB start(end) with respect to the Anchor Base\nString for different barcodes are separated by space.\nExample: inDrop (Zilionis et al, Nat. Protocols, 2017):\n--soloCBposition 0_0_2_-1 3_1_3_8", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--soloUMIposition", + "description" : "position of the UMI on the barcode read, same as soloCBposition\n\nExample: inDrop (Zilionis et al, Nat. Protocols, 2017):\n--soloCBposition 3_9_3_14", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--soloAdapterSequence", + "description" : "adapter sequence to anchor barcodes. Only one adapter sequence is allowed.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--soloAdapterMismatchesNmax", + "description" : "maximum number of mismatches allowed in adapter sequence.", + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--soloCBmatchWLtype", + "description" : "matching the Cell Barcodes to the WhiteList\n\n- Exact ... only exact matches allowed\n- 1MM ... only one match in whitelist with 1 mismatched base allowed. Allowed CBs have to have at least one read with exact match.\n- 1MM_multi ... multiple matches in whitelist with 1 mismatched base allowed, posterior probability calculation is used choose one of the matches.\nAllowed CBs have to have at least one read with exact match. This option matches best with CellRanger 2.2.0\n- 1MM_multi_pseudocounts ... same as 1MM_Multi, but pseudocounts of 1 are added to all whitelist barcodes.\n- 1MM_multi_Nbase_pseudocounts ... same as 1MM_multi_pseudocounts, multimatching to WL is allowed for CBs with N-bases. This option matches best with CellRanger >= 3.0.0\n- EditDist_2 ... allow up to edit distance of 3 fpr each of the barcodes. May include one deletion + one insertion. Only works with --soloType CB_UMI_Complex. Matches to multiple passlist barcdoes are not allowed. Similar to ParseBio Split-seq pipeline.", + "example" : [ + "1MM_multi" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--soloInputSAMattrBarcodeSeq", + "description" : "when inputting reads from a SAM file (--readsFileType SAM SE/PE), these SAM attributes mark the barcode sequence (in proper order).\n\nFor instance, for 10X CellRanger or STARsolo BAMs, use --soloInputSAMattrBarcodeSeq CR UR .\nThis parameter is required when running STARsolo with input from SAM.", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--soloInputSAMattrBarcodeQual", + "description" : "when inputting reads from a SAM file (--readsFileType SAM SE/PE), these SAM attributes mark the barcode qualities (in proper order).\n\nFor instance, for 10X CellRanger or STARsolo BAMs, use --soloInputSAMattrBarcodeQual CY UY .\nIf this parameter is '-' (default), the quality 'H' will be assigned to all bases.", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--soloStrand", + "description" : "strandedness of the solo libraries:\n\n- Unstranded ... no strand information\n- Forward ... read strand same as the original RNA molecule\n- Reverse ... read strand opposite to the original RNA molecule", + "example" : [ + "Forward" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--soloFeatures", + "description" : "genomic features for which the UMI counts per Cell Barcode are collected\n\n- Gene ... genes: reads match the gene transcript\n- SJ ... splice junctions: reported in SJ.out.tab\n- GeneFull ... full gene (pre-mRNA): count all reads overlapping genes' exons and introns\n- GeneFull_ExonOverIntron ... full gene (pre-mRNA): count all reads overlapping genes' exons and introns: prioritize 100% overlap with exons\n- GeneFull_Ex50pAS ... full gene (pre-RNA): count all reads overlapping genes' exons and introns: prioritize >50% overlap with exons. Do not count reads with 100% exonic overlap in the antisense direction.", + "example" : [ + "Gene" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--soloMultiMappers", + "description" : "counting method for reads mapping to multiple genes\n\n- Unique ... count only reads that map to unique genes\n- Uniform ... uniformly distribute multi-genic UMIs to all genes\n- Rescue ... distribute UMIs proportionally to unique+uniform counts (~ first iteration of EM)\n- PropUnique ... distribute UMIs proportionally to unique mappers, if present, and uniformly if not.\n- EM ... multi-gene UMIs are distributed using Expectation Maximization algorithm", + "example" : [ + "Unique" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--soloUMIdedup", + "description" : "type of UMI deduplication (collapsing) algorithm\n\n- 1MM_All ... all UMIs with 1 mismatch distance to each other are collapsed (i.e. counted once).\n- 1MM_Directional_UMItools ... follows the \\"directional\\" method from the UMI-tools by Smith, Heger and Sudbery (Genome Research 2017).\n- 1MM_Directional ... same as 1MM_Directional_UMItools, but with more stringent criteria for duplicate UMIs\n- Exact ... only exactly matching UMIs are collapsed.\n- NoDedup ... no deduplication of UMIs, count all reads.\n- 1MM_CR ... CellRanger2-4 algorithm for 1MM UMI collapsing.", + "example" : [ + "1MM_All" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--soloUMIfiltering", + "description" : "type of UMI filtering (for reads uniquely mapping to genes)\n\n- - ... basic filtering: remove UMIs with N and homopolymers (similar to CellRanger 2.2.0).\n- MultiGeneUMI ... basic + remove lower-count UMIs that map to more than one gene.\n- MultiGeneUMI_All ... basic + remove all UMIs that map to more than one gene.\n- MultiGeneUMI_CR ... basic + remove lower-count UMIs that map to more than one gene, matching CellRanger > 3.0.0 .\nOnly works with --soloUMIdedup 1MM_CR", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--soloOutFileNames", + "description" : "file names for STARsolo output:\n\nfile_name_prefix gene_names barcode_sequences cell_feature_count_matrix", + "example" : [ + "Solo.out/", + "features.tsv", + "barcodes.tsv", + "matrix.mtx" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--soloCellFilter", + "description" : "cell filtering type and parameters\n\n- None ... do not output filtered cells\n- TopCells ... only report top cells by UMI count, followed by the exact number of cells\n- CellRanger2.2 ... simple filtering of CellRanger 2.2.\nCan be followed by numbers: number of expected cells, robust maximum percentile for UMI count, maximum to minimum ratio for UMI count\nThe harcoded values are from CellRanger: nExpectedCells=3000; maxPercentile=0.99; maxMinRatio=10\n- EmptyDrops_CR ... EmptyDrops filtering in CellRanger flavor. Please cite the original EmptyDrops paper: A.T.L Lun et al, Genome Biology, 20, 63 (2019): https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1662-y\nCan be followed by 10 numeric parameters: nExpectedCells maxPercentile maxMinRatio indMin indMax umiMin umiMinFracMedian candMaxN FDR simN\nThe harcoded values are from CellRanger: 3000 0.99 10 45000 90000 500 0.01 20000 0.01 10000", + "example" : [ + "CellRanger2.2", + "3000", + "0.99", + "10" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--soloOutFormatFeaturesGeneField3", + "description" : "field 3 in the Gene features.tsv file. If \\"-\\", then no 3rd field is output.", + "example" : [ + "Gene Expression" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--soloCellReadStats", + "description" : "Output reads statistics for each CB\n\n- Standard ... standard output", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/star_align/" + }, + { + "type" : "file", + "path" : "src/utils/setup_logger.py", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "description" : "Align fastq files using STAR.", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/star_align/" + }, + { + "type" : "file", + "path" : "resources_test/cellranger_tiny_fastq", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.10-slim", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "procps" + ], + "interactive" : false + }, + { + "type" : "docker", + "env" : [ + "STAR_VERSION 2.7.10b", + "PACKAGES gcc g++ make wget zlib1g-dev unzip" + ] + }, + { + "type" : "docker", + "run" : [ + "apt-get update && \\\\\n apt-get install -y --no-install-recommends ${PACKAGES} && \\\\\n cd /tmp && \\\\\n wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip && \\\\\n unzip ${STAR_VERSION}.zip && \\\\\n cd STAR-${STAR_VERSION}/source && \\\\\n make STARstatic CXXFLAGS_SIMD=-std=c++11 && \\\\\n cp STAR /usr/local/bin && \\\\\n cd / && \\\\\n rm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip && \\\\\n apt-get --purge autoremove -y ${PACKAGES} && \\\\\n apt-get clean\n" + ] + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "highmem", + "highcpu" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/mapping/star_align/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/star_align", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +import re +import tempfile +import subprocess +from pathlib import Path +import tarfile +import gzip +import shutil + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'reference': $( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "r'${VIASH_PAR_REFERENCE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'runRNGseed': $( if [ ! -z ${VIASH_PAR_RUNRNGSEED+x} ]; then echo "int(r'${VIASH_PAR_RUNRNGSEED//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'genomeLoad': $( if [ ! -z ${VIASH_PAR_GENOMELOAD+x} ]; then echo "r'${VIASH_PAR_GENOMELOAD//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'genomeFastaFiles': $( if [ ! -z ${VIASH_PAR_GENOMEFASTAFILES+x} ]; then echo "r'${VIASH_PAR_GENOMEFASTAFILES//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'genomeFileSizes': $( if [ ! -z ${VIASH_PAR_GENOMEFILESIZES+x} ]; then echo "list(map(int, r'${VIASH_PAR_GENOMEFILESIZES//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'genomeTransformOutput': $( if [ ! -z ${VIASH_PAR_GENOMETRANSFORMOUTPUT+x} ]; then echo "r'${VIASH_PAR_GENOMETRANSFORMOUTPUT//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'genomeChrSetMitochondrial': $( if [ ! -z ${VIASH_PAR_GENOMECHRSETMITOCHONDRIAL+x} ]; then echo "r'${VIASH_PAR_GENOMECHRSETMITOCHONDRIAL//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'sjdbFileChrStartEnd': $( if [ ! -z ${VIASH_PAR_SJDBFILECHRSTARTEND+x} ]; then echo "r'${VIASH_PAR_SJDBFILECHRSTARTEND//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'sjdbGTFfile': $( if [ ! -z ${VIASH_PAR_SJDBGTFFILE+x} ]; then echo "r'${VIASH_PAR_SJDBGTFFILE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'sjdbGTFchrPrefix': $( if [ ! -z ${VIASH_PAR_SJDBGTFCHRPREFIX+x} ]; then echo "r'${VIASH_PAR_SJDBGTFCHRPREFIX//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'sjdbGTFfeatureExon': $( if [ ! -z ${VIASH_PAR_SJDBGTFFEATUREEXON+x} ]; then echo "r'${VIASH_PAR_SJDBGTFFEATUREEXON//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'sjdbGTFtagExonParentTranscript': $( if [ ! -z ${VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT+x} ]; then echo "r'${VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'sjdbGTFtagExonParentGene': $( if [ ! -z ${VIASH_PAR_SJDBGTFTAGEXONPARENTGENE+x} ]; then echo "r'${VIASH_PAR_SJDBGTFTAGEXONPARENTGENE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'sjdbGTFtagExonParentGeneName': $( if [ ! -z ${VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME+x} ]; then echo "r'${VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'sjdbGTFtagExonParentGeneType': $( if [ ! -z ${VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE+x} ]; then echo "r'${VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'sjdbOverhang': $( if [ ! -z ${VIASH_PAR_SJDBOVERHANG+x} ]; then echo "int(r'${VIASH_PAR_SJDBOVERHANG//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'sjdbScore': $( if [ ! -z ${VIASH_PAR_SJDBSCORE+x} ]; then echo "int(r'${VIASH_PAR_SJDBSCORE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'sjdbInsertSave': $( if [ ! -z ${VIASH_PAR_SJDBINSERTSAVE+x} ]; then echo "r'${VIASH_PAR_SJDBINSERTSAVE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'varVCFfile': $( if [ ! -z ${VIASH_PAR_VARVCFFILE+x} ]; then echo "r'${VIASH_PAR_VARVCFFILE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'readFilesType': $( if [ ! -z ${VIASH_PAR_READFILESTYPE+x} ]; then echo "r'${VIASH_PAR_READFILESTYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'readFilesSAMattrKeep': $( if [ ! -z ${VIASH_PAR_READFILESSAMATTRKEEP+x} ]; then echo "r'${VIASH_PAR_READFILESSAMATTRKEEP//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'readFilesManifest': $( if [ ! -z ${VIASH_PAR_READFILESMANIFEST+x} ]; then echo "r'${VIASH_PAR_READFILESMANIFEST//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'readFilesPrefix': $( if [ ! -z ${VIASH_PAR_READFILESPREFIX+x} ]; then echo "r'${VIASH_PAR_READFILESPREFIX//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'readFilesCommand': $( if [ ! -z ${VIASH_PAR_READFILESCOMMAND+x} ]; then echo "r'${VIASH_PAR_READFILESCOMMAND//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'readMapNumber': $( if [ ! -z ${VIASH_PAR_READMAPNUMBER+x} ]; then echo "int(r'${VIASH_PAR_READMAPNUMBER//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'readMatesLengthsIn': $( if [ ! -z ${VIASH_PAR_READMATESLENGTHSIN+x} ]; then echo "r'${VIASH_PAR_READMATESLENGTHSIN//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'readNameSeparator': $( if [ ! -z ${VIASH_PAR_READNAMESEPARATOR+x} ]; then echo "r'${VIASH_PAR_READNAMESEPARATOR//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'readQualityScoreBase': $( if [ ! -z ${VIASH_PAR_READQUALITYSCOREBASE+x} ]; then echo "int(r'${VIASH_PAR_READQUALITYSCOREBASE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'clipAdapterType': $( if [ ! -z ${VIASH_PAR_CLIPADAPTERTYPE+x} ]; then echo "r'${VIASH_PAR_CLIPADAPTERTYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'clip3pNbases': $( if [ ! -z ${VIASH_PAR_CLIP3PNBASES+x} ]; then echo "list(map(int, r'${VIASH_PAR_CLIP3PNBASES//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'clip3pAdapterSeq': $( if [ ! -z ${VIASH_PAR_CLIP3PADAPTERSEQ+x} ]; then echo "r'${VIASH_PAR_CLIP3PADAPTERSEQ//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'clip3pAdapterMMp': $( if [ ! -z ${VIASH_PAR_CLIP3PADAPTERMMP+x} ]; then echo "list(map(float, r'${VIASH_PAR_CLIP3PADAPTERMMP//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'clip3pAfterAdapterNbases': $( if [ ! -z ${VIASH_PAR_CLIP3PAFTERADAPTERNBASES+x} ]; then echo "list(map(int, r'${VIASH_PAR_CLIP3PAFTERADAPTERNBASES//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'clip5pNbases': $( if [ ! -z ${VIASH_PAR_CLIP5PNBASES+x} ]; then echo "list(map(int, r'${VIASH_PAR_CLIP5PNBASES//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'limitGenomeGenerateRAM': $( if [ ! -z ${VIASH_PAR_LIMITGENOMEGENERATERAM+x} ]; then echo "int(r'${VIASH_PAR_LIMITGENOMEGENERATERAM//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'limitIObufferSize': $( if [ ! -z ${VIASH_PAR_LIMITIOBUFFERSIZE+x} ]; then echo "list(map(int, r'${VIASH_PAR_LIMITIOBUFFERSIZE//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'limitOutSAMoneReadBytes': $( if [ ! -z ${VIASH_PAR_LIMITOUTSAMONEREADBYTES+x} ]; then echo "int(r'${VIASH_PAR_LIMITOUTSAMONEREADBYTES//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'limitOutSJoneRead': $( if [ ! -z ${VIASH_PAR_LIMITOUTSJONEREAD+x} ]; then echo "int(r'${VIASH_PAR_LIMITOUTSJONEREAD//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'limitOutSJcollapsed': $( if [ ! -z ${VIASH_PAR_LIMITOUTSJCOLLAPSED+x} ]; then echo "int(r'${VIASH_PAR_LIMITOUTSJCOLLAPSED//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'limitBAMsortRAM': $( if [ ! -z ${VIASH_PAR_LIMITBAMSORTRAM+x} ]; then echo "int(r'${VIASH_PAR_LIMITBAMSORTRAM//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'limitSjdbInsertNsj': $( if [ ! -z ${VIASH_PAR_LIMITSJDBINSERTNSJ+x} ]; then echo "int(r'${VIASH_PAR_LIMITSJDBINSERTNSJ//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'limitNreadsSoft': $( if [ ! -z ${VIASH_PAR_LIMITNREADSSOFT+x} ]; then echo "int(r'${VIASH_PAR_LIMITNREADSSOFT//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outTmpKeep': $( if [ ! -z ${VIASH_PAR_OUTTMPKEEP+x} ]; then echo "r'${VIASH_PAR_OUTTMPKEEP//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outStd': $( if [ ! -z ${VIASH_PAR_OUTSTD+x} ]; then echo "r'${VIASH_PAR_OUTSTD//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outReadsUnmapped': $( if [ ! -z ${VIASH_PAR_OUTREADSUNMAPPED+x} ]; then echo "r'${VIASH_PAR_OUTREADSUNMAPPED//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outQSconversionAdd': $( if [ ! -z ${VIASH_PAR_OUTQSCONVERSIONADD+x} ]; then echo "int(r'${VIASH_PAR_OUTQSCONVERSIONADD//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outMultimapperOrder': $( if [ ! -z ${VIASH_PAR_OUTMULTIMAPPERORDER+x} ]; then echo "r'${VIASH_PAR_OUTMULTIMAPPERORDER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outSAMtype': $( if [ ! -z ${VIASH_PAR_OUTSAMTYPE+x} ]; then echo "r'${VIASH_PAR_OUTSAMTYPE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'outSAMmode': $( if [ ! -z ${VIASH_PAR_OUTSAMMODE+x} ]; then echo "r'${VIASH_PAR_OUTSAMMODE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outSAMstrandField': $( if [ ! -z ${VIASH_PAR_OUTSAMSTRANDFIELD+x} ]; then echo "r'${VIASH_PAR_OUTSAMSTRANDFIELD//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outSAMattributes': $( if [ ! -z ${VIASH_PAR_OUTSAMATTRIBUTES+x} ]; then echo "r'${VIASH_PAR_OUTSAMATTRIBUTES//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'outSAMattrIHstart': $( if [ ! -z ${VIASH_PAR_OUTSAMATTRIHSTART+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMATTRIHSTART//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outSAMunmapped': $( if [ ! -z ${VIASH_PAR_OUTSAMUNMAPPED+x} ]; then echo "r'${VIASH_PAR_OUTSAMUNMAPPED//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'outSAMorder': $( if [ ! -z ${VIASH_PAR_OUTSAMORDER+x} ]; then echo "r'${VIASH_PAR_OUTSAMORDER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outSAMprimaryFlag': $( if [ ! -z ${VIASH_PAR_OUTSAMPRIMARYFLAG+x} ]; then echo "r'${VIASH_PAR_OUTSAMPRIMARYFLAG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outSAMreadID': $( if [ ! -z ${VIASH_PAR_OUTSAMREADID+x} ]; then echo "r'${VIASH_PAR_OUTSAMREADID//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outSAMmapqUnique': $( if [ ! -z ${VIASH_PAR_OUTSAMMAPQUNIQUE+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMMAPQUNIQUE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outSAMflagOR': $( if [ ! -z ${VIASH_PAR_OUTSAMFLAGOR+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMFLAGOR//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outSAMflagAND': $( if [ ! -z ${VIASH_PAR_OUTSAMFLAGAND+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMFLAGAND//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outSAMattrRGline': $( if [ ! -z ${VIASH_PAR_OUTSAMATTRRGLINE+x} ]; then echo "r'${VIASH_PAR_OUTSAMATTRRGLINE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'outSAMheaderHD': $( if [ ! -z ${VIASH_PAR_OUTSAMHEADERHD+x} ]; then echo "r'${VIASH_PAR_OUTSAMHEADERHD//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'outSAMheaderPG': $( if [ ! -z ${VIASH_PAR_OUTSAMHEADERPG+x} ]; then echo "r'${VIASH_PAR_OUTSAMHEADERPG//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'outSAMheaderCommentFile': $( if [ ! -z ${VIASH_PAR_OUTSAMHEADERCOMMENTFILE+x} ]; then echo "r'${VIASH_PAR_OUTSAMHEADERCOMMENTFILE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outSAMfilter': $( if [ ! -z ${VIASH_PAR_OUTSAMFILTER+x} ]; then echo "r'${VIASH_PAR_OUTSAMFILTER//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'outSAMmultNmax': $( if [ ! -z ${VIASH_PAR_OUTSAMMULTNMAX+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMMULTNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outSAMtlen': $( if [ ! -z ${VIASH_PAR_OUTSAMTLEN+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMTLEN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outBAMcompression': $( if [ ! -z ${VIASH_PAR_OUTBAMCOMPRESSION+x} ]; then echo "int(r'${VIASH_PAR_OUTBAMCOMPRESSION//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outBAMsortingThreadN': $( if [ ! -z ${VIASH_PAR_OUTBAMSORTINGTHREADN+x} ]; then echo "int(r'${VIASH_PAR_OUTBAMSORTINGTHREADN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outBAMsortingBinsN': $( if [ ! -z ${VIASH_PAR_OUTBAMSORTINGBINSN+x} ]; then echo "int(r'${VIASH_PAR_OUTBAMSORTINGBINSN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'bamRemoveDuplicatesType': $( if [ ! -z ${VIASH_PAR_BAMREMOVEDUPLICATESTYPE+x} ]; then echo "r'${VIASH_PAR_BAMREMOVEDUPLICATESTYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'bamRemoveDuplicatesMate2basesN': $( if [ ! -z ${VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN+x} ]; then echo "int(r'${VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outWigType': $( if [ ! -z ${VIASH_PAR_OUTWIGTYPE+x} ]; then echo "r'${VIASH_PAR_OUTWIGTYPE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'outWigStrand': $( if [ ! -z ${VIASH_PAR_OUTWIGSTRAND+x} ]; then echo "r'${VIASH_PAR_OUTWIGSTRAND//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outWigReferencesPrefix': $( if [ ! -z ${VIASH_PAR_OUTWIGREFERENCESPREFIX+x} ]; then echo "r'${VIASH_PAR_OUTWIGREFERENCESPREFIX//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outWigNorm': $( if [ ! -z ${VIASH_PAR_OUTWIGNORM+x} ]; then echo "r'${VIASH_PAR_OUTWIGNORM//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outFilterType': $( if [ ! -z ${VIASH_PAR_OUTFILTERTYPE+x} ]; then echo "r'${VIASH_PAR_OUTFILTERTYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outFilterMultimapScoreRange': $( if [ ! -z ${VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outFilterMultimapNmax': $( if [ ! -z ${VIASH_PAR_OUTFILTERMULTIMAPNMAX+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERMULTIMAPNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outFilterMismatchNmax': $( if [ ! -z ${VIASH_PAR_OUTFILTERMISMATCHNMAX+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERMISMATCHNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outFilterMismatchNoverLmax': $( if [ ! -z ${VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX+x} ]; then echo "float(r'${VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outFilterMismatchNoverReadLmax': $( if [ ! -z ${VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX+x} ]; then echo "float(r'${VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outFilterScoreMin': $( if [ ! -z ${VIASH_PAR_OUTFILTERSCOREMIN+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERSCOREMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outFilterScoreMinOverLread': $( if [ ! -z ${VIASH_PAR_OUTFILTERSCOREMINOVERLREAD+x} ]; then echo "float(r'${VIASH_PAR_OUTFILTERSCOREMINOVERLREAD//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outFilterMatchNmin': $( if [ ! -z ${VIASH_PAR_OUTFILTERMATCHNMIN+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERMATCHNMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outFilterMatchNminOverLread': $( if [ ! -z ${VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD+x} ]; then echo "float(r'${VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outFilterIntronMotifs': $( if [ ! -z ${VIASH_PAR_OUTFILTERINTRONMOTIFS+x} ]; then echo "r'${VIASH_PAR_OUTFILTERINTRONMOTIFS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outFilterIntronStrands': $( if [ ! -z ${VIASH_PAR_OUTFILTERINTRONSTRANDS+x} ]; then echo "r'${VIASH_PAR_OUTFILTERINTRONSTRANDS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outSJtype': $( if [ ! -z ${VIASH_PAR_OUTSJTYPE+x} ]; then echo "r'${VIASH_PAR_OUTSJTYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outSJfilterReads': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERREADS+x} ]; then echo "r'${VIASH_PAR_OUTSJFILTERREADS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outSJfilterOverhangMin': $( if [ ! -z ${VIASH_PAR_OUTSJFILTEROVERHANGMIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTEROVERHANGMIN//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'outSJfilterCountUniqueMin': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'outSJfilterCountTotalMin': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'outSJfilterDistToOtherSJmin': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'outSJfilterIntronMaxVsReadN': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'scoreGap': $( if [ ! -z ${VIASH_PAR_SCOREGAP+x} ]; then echo "int(r'${VIASH_PAR_SCOREGAP//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'scoreGapNoncan': $( if [ ! -z ${VIASH_PAR_SCOREGAPNONCAN+x} ]; then echo "int(r'${VIASH_PAR_SCOREGAPNONCAN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'scoreGapGCAG': $( if [ ! -z ${VIASH_PAR_SCOREGAPGCAG+x} ]; then echo "int(r'${VIASH_PAR_SCOREGAPGCAG//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'scoreGapATAC': $( if [ ! -z ${VIASH_PAR_SCOREGAPATAC+x} ]; then echo "int(r'${VIASH_PAR_SCOREGAPATAC//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'scoreGenomicLengthLog2scale': $( if [ ! -z ${VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE+x} ]; then echo "int(r'${VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'scoreDelOpen': $( if [ ! -z ${VIASH_PAR_SCOREDELOPEN+x} ]; then echo "int(r'${VIASH_PAR_SCOREDELOPEN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'scoreDelBase': $( if [ ! -z ${VIASH_PAR_SCOREDELBASE+x} ]; then echo "int(r'${VIASH_PAR_SCOREDELBASE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'scoreInsOpen': $( if [ ! -z ${VIASH_PAR_SCOREINSOPEN+x} ]; then echo "int(r'${VIASH_PAR_SCOREINSOPEN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'scoreInsBase': $( if [ ! -z ${VIASH_PAR_SCOREINSBASE+x} ]; then echo "int(r'${VIASH_PAR_SCOREINSBASE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'scoreStitchSJshift': $( if [ ! -z ${VIASH_PAR_SCORESTITCHSJSHIFT+x} ]; then echo "int(r'${VIASH_PAR_SCORESTITCHSJSHIFT//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'seedSearchStartLmax': $( if [ ! -z ${VIASH_PAR_SEEDSEARCHSTARTLMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDSEARCHSTARTLMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'seedSearchStartLmaxOverLread': $( if [ ! -z ${VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD+x} ]; then echo "float(r'${VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'seedSearchLmax': $( if [ ! -z ${VIASH_PAR_SEEDSEARCHLMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDSEARCHLMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'seedMultimapNmax': $( if [ ! -z ${VIASH_PAR_SEEDMULTIMAPNMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDMULTIMAPNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'seedPerReadNmax': $( if [ ! -z ${VIASH_PAR_SEEDPERREADNMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDPERREADNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'seedPerWindowNmax': $( if [ ! -z ${VIASH_PAR_SEEDPERWINDOWNMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDPERWINDOWNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'seedNoneLociPerWindow': $( if [ ! -z ${VIASH_PAR_SEEDNONELOCIPERWINDOW+x} ]; then echo "int(r'${VIASH_PAR_SEEDNONELOCIPERWINDOW//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'seedSplitMin': $( if [ ! -z ${VIASH_PAR_SEEDSPLITMIN+x} ]; then echo "int(r'${VIASH_PAR_SEEDSPLITMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'seedMapMin': $( if [ ! -z ${VIASH_PAR_SEEDMAPMIN+x} ]; then echo "int(r'${VIASH_PAR_SEEDMAPMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'alignIntronMin': $( if [ ! -z ${VIASH_PAR_ALIGNINTRONMIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGNINTRONMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'alignIntronMax': $( if [ ! -z ${VIASH_PAR_ALIGNINTRONMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNINTRONMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'alignMatesGapMax': $( if [ ! -z ${VIASH_PAR_ALIGNMATESGAPMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNMATESGAPMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'alignSJoverhangMin': $( if [ ! -z ${VIASH_PAR_ALIGNSJOVERHANGMIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGNSJOVERHANGMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'alignSJstitchMismatchNmax': $( if [ ! -z ${VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX+x} ]; then echo "list(map(int, r'${VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'alignSJDBoverhangMin': $( if [ ! -z ${VIASH_PAR_ALIGNSJDBOVERHANGMIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGNSJDBOVERHANGMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'alignSplicedMateMapLmin': $( if [ ! -z ${VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'alignSplicedMateMapLminOverLmate': $( if [ ! -z ${VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE+x} ]; then echo "float(r'${VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'alignWindowsPerReadNmax': $( if [ ! -z ${VIASH_PAR_ALIGNWINDOWSPERREADNMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNWINDOWSPERREADNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'alignTranscriptsPerWindowNmax': $( if [ ! -z ${VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'alignTranscriptsPerReadNmax': $( if [ ! -z ${VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'alignEndsType': $( if [ ! -z ${VIASH_PAR_ALIGNENDSTYPE+x} ]; then echo "r'${VIASH_PAR_ALIGNENDSTYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'alignEndsProtrude': $( if [ ! -z ${VIASH_PAR_ALIGNENDSPROTRUDE+x} ]; then echo "r'${VIASH_PAR_ALIGNENDSPROTRUDE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'alignSoftClipAtReferenceEnds': $( if [ ! -z ${VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS+x} ]; then echo "r'${VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'alignInsertionFlush': $( if [ ! -z ${VIASH_PAR_ALIGNINSERTIONFLUSH+x} ]; then echo "r'${VIASH_PAR_ALIGNINSERTIONFLUSH//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'peOverlapNbasesMin': $( if [ ! -z ${VIASH_PAR_PEOVERLAPNBASESMIN+x} ]; then echo "int(r'${VIASH_PAR_PEOVERLAPNBASESMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'peOverlapMMp': $( if [ ! -z ${VIASH_PAR_PEOVERLAPMMP+x} ]; then echo "float(r'${VIASH_PAR_PEOVERLAPMMP//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'winAnchorMultimapNmax': $( if [ ! -z ${VIASH_PAR_WINANCHORMULTIMAPNMAX+x} ]; then echo "int(r'${VIASH_PAR_WINANCHORMULTIMAPNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'winBinNbits': $( if [ ! -z ${VIASH_PAR_WINBINNBITS+x} ]; then echo "int(r'${VIASH_PAR_WINBINNBITS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'winAnchorDistNbins': $( if [ ! -z ${VIASH_PAR_WINANCHORDISTNBINS+x} ]; then echo "int(r'${VIASH_PAR_WINANCHORDISTNBINS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'winFlankNbins': $( if [ ! -z ${VIASH_PAR_WINFLANKNBINS+x} ]; then echo "int(r'${VIASH_PAR_WINFLANKNBINS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'winReadCoverageRelativeMin': $( if [ ! -z ${VIASH_PAR_WINREADCOVERAGERELATIVEMIN+x} ]; then echo "float(r'${VIASH_PAR_WINREADCOVERAGERELATIVEMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'winReadCoverageBasesMin': $( if [ ! -z ${VIASH_PAR_WINREADCOVERAGEBASESMIN+x} ]; then echo "int(r'${VIASH_PAR_WINREADCOVERAGEBASESMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chimOutType': $( if [ ! -z ${VIASH_PAR_CHIMOUTTYPE+x} ]; then echo "r'${VIASH_PAR_CHIMOUTTYPE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'chimSegmentMin': $( if [ ! -z ${VIASH_PAR_CHIMSEGMENTMIN+x} ]; then echo "int(r'${VIASH_PAR_CHIMSEGMENTMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chimScoreMin': $( if [ ! -z ${VIASH_PAR_CHIMSCOREMIN+x} ]; then echo "int(r'${VIASH_PAR_CHIMSCOREMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chimScoreDropMax': $( if [ ! -z ${VIASH_PAR_CHIMSCOREDROPMAX+x} ]; then echo "int(r'${VIASH_PAR_CHIMSCOREDROPMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chimScoreSeparation': $( if [ ! -z ${VIASH_PAR_CHIMSCORESEPARATION+x} ]; then echo "int(r'${VIASH_PAR_CHIMSCORESEPARATION//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chimScoreJunctionNonGTAG': $( if [ ! -z ${VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG+x} ]; then echo "int(r'${VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chimJunctionOverhangMin': $( if [ ! -z ${VIASH_PAR_CHIMJUNCTIONOVERHANGMIN+x} ]; then echo "int(r'${VIASH_PAR_CHIMJUNCTIONOVERHANGMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chimSegmentReadGapMax': $( if [ ! -z ${VIASH_PAR_CHIMSEGMENTREADGAPMAX+x} ]; then echo "int(r'${VIASH_PAR_CHIMSEGMENTREADGAPMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chimFilter': $( if [ ! -z ${VIASH_PAR_CHIMFILTER+x} ]; then echo "r'${VIASH_PAR_CHIMFILTER//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'chimMainSegmentMultNmax': $( if [ ! -z ${VIASH_PAR_CHIMMAINSEGMENTMULTNMAX+x} ]; then echo "int(r'${VIASH_PAR_CHIMMAINSEGMENTMULTNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chimMultimapNmax': $( if [ ! -z ${VIASH_PAR_CHIMMULTIMAPNMAX+x} ]; then echo "int(r'${VIASH_PAR_CHIMMULTIMAPNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chimMultimapScoreRange': $( if [ ! -z ${VIASH_PAR_CHIMMULTIMAPSCORERANGE+x} ]; then echo "int(r'${VIASH_PAR_CHIMMULTIMAPSCORERANGE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chimNonchimScoreDropMin': $( if [ ! -z ${VIASH_PAR_CHIMNONCHIMSCOREDROPMIN+x} ]; then echo "int(r'${VIASH_PAR_CHIMNONCHIMSCOREDROPMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chimOutJunctionFormat': $( if [ ! -z ${VIASH_PAR_CHIMOUTJUNCTIONFORMAT+x} ]; then echo "int(r'${VIASH_PAR_CHIMOUTJUNCTIONFORMAT//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'quantMode': $( if [ ! -z ${VIASH_PAR_QUANTMODE+x} ]; then echo "r'${VIASH_PAR_QUANTMODE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'quantTranscriptomeBAMcompression': $( if [ ! -z ${VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION+x} ]; then echo "int(r'${VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'quantTranscriptomeBan': $( if [ ! -z ${VIASH_PAR_QUANTTRANSCRIPTOMEBAN+x} ]; then echo "r'${VIASH_PAR_QUANTTRANSCRIPTOMEBAN//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'twopassMode': $( if [ ! -z ${VIASH_PAR_TWOPASSMODE+x} ]; then echo "r'${VIASH_PAR_TWOPASSMODE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'twopass1readsN': $( if [ ! -z ${VIASH_PAR_TWOPASS1READSN+x} ]; then echo "int(r'${VIASH_PAR_TWOPASS1READSN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'waspOutputMode': $( if [ ! -z ${VIASH_PAR_WASPOUTPUTMODE+x} ]; then echo "r'${VIASH_PAR_WASPOUTPUTMODE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'soloType': $( if [ ! -z ${VIASH_PAR_SOLOTYPE+x} ]; then echo "r'${VIASH_PAR_SOLOTYPE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'soloCBwhitelist': $( if [ ! -z ${VIASH_PAR_SOLOCBWHITELIST+x} ]; then echo "r'${VIASH_PAR_SOLOCBWHITELIST//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'soloCBstart': $( if [ ! -z ${VIASH_PAR_SOLOCBSTART+x} ]; then echo "int(r'${VIASH_PAR_SOLOCBSTART//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'soloCBlen': $( if [ ! -z ${VIASH_PAR_SOLOCBLEN+x} ]; then echo "int(r'${VIASH_PAR_SOLOCBLEN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'soloUMIstart': $( if [ ! -z ${VIASH_PAR_SOLOUMISTART+x} ]; then echo "int(r'${VIASH_PAR_SOLOUMISTART//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'soloUMIlen': $( if [ ! -z ${VIASH_PAR_SOLOUMILEN+x} ]; then echo "int(r'${VIASH_PAR_SOLOUMILEN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'soloBarcodeReadLength': $( if [ ! -z ${VIASH_PAR_SOLOBARCODEREADLENGTH+x} ]; then echo "int(r'${VIASH_PAR_SOLOBARCODEREADLENGTH//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'soloBarcodeMate': $( if [ ! -z ${VIASH_PAR_SOLOBARCODEMATE+x} ]; then echo "int(r'${VIASH_PAR_SOLOBARCODEMATE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'soloCBposition': $( if [ ! -z ${VIASH_PAR_SOLOCBPOSITION+x} ]; then echo "r'${VIASH_PAR_SOLOCBPOSITION//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'soloUMIposition': $( if [ ! -z ${VIASH_PAR_SOLOUMIPOSITION+x} ]; then echo "r'${VIASH_PAR_SOLOUMIPOSITION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'soloAdapterSequence': $( if [ ! -z ${VIASH_PAR_SOLOADAPTERSEQUENCE+x} ]; then echo "r'${VIASH_PAR_SOLOADAPTERSEQUENCE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'soloAdapterMismatchesNmax': $( if [ ! -z ${VIASH_PAR_SOLOADAPTERMISMATCHESNMAX+x} ]; then echo "int(r'${VIASH_PAR_SOLOADAPTERMISMATCHESNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'soloCBmatchWLtype': $( if [ ! -z ${VIASH_PAR_SOLOCBMATCHWLTYPE+x} ]; then echo "r'${VIASH_PAR_SOLOCBMATCHWLTYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'soloInputSAMattrBarcodeSeq': $( if [ ! -z ${VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ+x} ]; then echo "r'${VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'soloInputSAMattrBarcodeQual': $( if [ ! -z ${VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL+x} ]; then echo "r'${VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'soloStrand': $( if [ ! -z ${VIASH_PAR_SOLOSTRAND+x} ]; then echo "r'${VIASH_PAR_SOLOSTRAND//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'soloFeatures': $( if [ ! -z ${VIASH_PAR_SOLOFEATURES+x} ]; then echo "r'${VIASH_PAR_SOLOFEATURES//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'soloMultiMappers': $( if [ ! -z ${VIASH_PAR_SOLOMULTIMAPPERS+x} ]; then echo "r'${VIASH_PAR_SOLOMULTIMAPPERS//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'soloUMIdedup': $( if [ ! -z ${VIASH_PAR_SOLOUMIDEDUP+x} ]; then echo "r'${VIASH_PAR_SOLOUMIDEDUP//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'soloUMIfiltering': $( if [ ! -z ${VIASH_PAR_SOLOUMIFILTERING+x} ]; then echo "r'${VIASH_PAR_SOLOUMIFILTERING//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'soloOutFileNames': $( if [ ! -z ${VIASH_PAR_SOLOOUTFILENAMES+x} ]; then echo "r'${VIASH_PAR_SOLOOUTFILENAMES//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'soloCellFilter': $( if [ ! -z ${VIASH_PAR_SOLOCELLFILTER+x} ]; then echo "r'${VIASH_PAR_SOLOCELLFILTER//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'soloOutFormatFeaturesGeneField3': $( if [ ! -z ${VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3+x} ]; then echo "r'${VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'soloCellReadStats': $( if [ ! -z ${VIASH_PAR_SOLOCELLREADSTATS+x} ]; then echo "r'${VIASH_PAR_SOLOCELLREADSTATS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +## VIASH END + +######################## +### Helper functions ### +######################## + +# regex for matching R[12] fastq(gz) files +# examples: +# - TSP10_Fat_MAT_SS2_B134171_B115063_Immune_A1_L003_R1.fastq.gz +# - tinygex_S1_L001_I1_001.fastq.gz +fastqgz_regex = r'(.+)_(R\\\\d+)(_\\\\d+)?\\\\.fastq(\\\\.gz)?' + +# helper function for cheching whether something is a gzip +def is_gz_file(path: Path) -> bool: + with open(path, 'rb') as file: + return file.read(2) == b'\\\\x1f\\\\x8b' + +# look for fastq files in a directory +def search_fastqs(path: Path) -> list[Path]: + if path.is_dir(): + print(f"Input '{path}' is a directory, traversing to see if we can detect any FASTQ files.", flush=True) + value_paths = [file for file in path.iterdir() if re.match(fastqgz_regex, file.name) ] + return value_paths + else: + return [path] + +# if {par_value} is a Path, extract it to a temp_dir_path and return the resulting path +def extract_if_need_be(par_value: Path, temp_dir_path: Path) -> Path: + + if par_value.is_file() and tarfile.is_tarfile(par_value): + # Remove two extensions (if they exist) + extaction_dir_name = Path(par_value.stem).stem + unpacked_path = temp_dir_path / extaction_dir_name + print(f' Tar detected; extracting {par_value} to {unpacked_path}', flush=True) + + with tarfile.open(par_value, 'r') as open_tar: + members = open_tar.getmembers() + root_dirs = [member + for member in members + if member.isdir() and member.name != '.' and '/' not in member.name] + # if there is only one root_dir (and there are files in that directory) + # strip that directory name from the destination folder + if len(root_dirs) == 1: + for mem in members: + mem.path = Path(*Path(mem.path).parts[1:]) + members_to_move = [mem for mem in members if mem.path != Path('.')] + open_tar.extractall(unpacked_path, members=members_to_move) + return unpacked_path + + elif par_value.is_file() and is_gz_file(par_value): + # Remove extension (if it exists) + extaction_file_name = Path(par_value.stem) + unpacked_path = temp_dir_path / extaction_file_name + print(f' Gzip detected; extracting {par_value} to {unpacked_path}', flush=True) + + with gzip.open(par_value, 'rb') as f_in: + with open(unpacked_path, 'wb') as f_out: + shutil.copyfileobj(f_in, f_out) + return unpacked_path + + else: + return par_value + +######################## +### Main code ### +######################## + +# rename keys and convert path strings to Path +# note: only list file arguments here. if non-file arguments also need to be renamed, +# the \\`processPar()\\` generator needs to be adapted +to_rename = {'input': 'readFilesIn', 'reference': 'genomeDir', 'output': 'outFileNamePrefix'} + +def process_par(orig_par, to_rename): + for key, value in orig_par.items(): + # rename the key in par based on the \\`to_rename\\` dict + if key in to_rename.keys(): + new_key = to_rename[key] + + # also turn value into a Path + if isinstance(value, list): + new_value = [Path(val) for val in value] + else: + new_value = Path(value) + else: + new_key = key + new_value = value + yield new_key, new_value +par = dict(process_par(par, to_rename)) + +# create output dir if need be +par["outFileNamePrefix"].mkdir(parents=True, exist_ok=True) + +with tempfile.TemporaryDirectory(prefix="star-", dir=meta["temp_dir"], ignore_cleanup_errors=True) as temp_dir: + print(">> Check whether input files are directories", flush=True) + new_read_files_in = [] + for path in par["readFilesIn"]: + new_read_files_in.extend(search_fastqs(path)) + par["readFilesIn"] = new_read_files_in + print("", flush=True) + + # checking for compressed files, ungzip files if need be + temp_dir_path = Path(temp_dir) + for par_name in ["genomeDir", "readFilesIn"]: + par_values = par[par_name] + if par_values: + # turn value into list + is_multiple = isinstance(par_values, list) + if not is_multiple: + par_values = [ par_values ] + + # output list + new_values = [] + for par_value in par_values: + print(f'>> Check compression of --{par_name} with value: {par_value}', flush=True) + new_value = extract_if_need_be(par_value, temp_dir_path) + new_values.append(new_value) + + # unlist if need be + if not is_multiple: + new_values = new_values[0] + + # replace value + par[par_name] = new_values + # end ungzipping + print("", flush=True) + + print("Grouping R1/R2 input files into pairs", flush=True) + input_grouped = {} + for path in par['readFilesIn']: + key = re.search(fastqgz_regex, path.name).group(2) + if key not in input_grouped: + input_grouped[key] = [] + input_grouped[key].append(str(path)) + par['readFilesIn'] = [ ','.join(val) for val in input_grouped.values() ] + print("", flush=True) + + print(">> Constructing command", flush=True) + par["runMode"] = "alignReads" + par["outTmpDir"] = temp_dir_path / "run" + if 'cpus' in meta and meta['cpus']: + par["runThreadN"] = meta["cpus"] + # make sure there is a trailing / + par["outFileNamePrefix"] = f"{par['outFileNamePrefix']}/" + + cmd_args = [ "STAR" ] + for name, value in par.items(): + if value is not None: + if isinstance(value, list): + cmd_args.extend(["--" + name] + [str(x) for x in value]) + else: + cmd_args.extend(["--" + name, str(value)]) + print("", flush=True) + + print(">> Running STAR with command:", flush=True) + print("+ " + ' '.join([str(x) for x in cmd_args]), flush=True) + print("", flush=True) + + subprocess.run( + cmd_args, + check=True + ) +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/mapping_star_align", + "tag" : "0.12.0" + }, + "label" : [ + "highmem", + "highcpu" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/mapping/star_align/nextflow.config b/target/nextflow/mapping/star_align/nextflow.config new file mode 100644 index 00000000000..4d7fd9f8386 --- /dev/null +++ b/target/nextflow/mapping/star_align/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'star_align' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Align fastq files using STAR.' + author = 'Angela Oliveira Pisco, Robrecht Cannoodt' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/mapping/star_align/nextflow_params.yaml b/target/nextflow/mapping/star_align/nextflow_params.yaml new file mode 100644 index 00000000000..7c77e19eb4a --- /dev/null +++ b/target/nextflow/mapping/star_align/nextflow_params.yaml @@ -0,0 +1,8 @@ +# Input/Output +input: # please fill in - example: ["mysample_S1_L001_R1_001.fastq.gz", "mysample_S1_L001_R2_001.fastq.gz"] +reference: # please fill in - example: "/path/to/reference" +# output: "$id.$key.output.output" + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/mapping/star_align/nextflow_schema.json b/target/nextflow/mapping/star_align/nextflow_schema.json new file mode 100644 index 00000000000..5dba8c5ef4b --- /dev/null +++ b/target/nextflow/mapping/star_align/nextflow_schema.json @@ -0,0 +1,91 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "star_align", +"description": "Align fastq files using STAR.", +"type": "object", +"definitions": { + + + + "input/output" : { + "title": "Input/Output", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: List of `file`, required, example: `mysample_S1_L001_R1_001.fastq.gz;mysample_S1_L001_R2_001.fastq.gz`, multiple_sep: `\";\"`. The FASTQ files to be analyzed", + "help_text": "Type: List of `file`, required, example: `mysample_S1_L001_R1_001.fastq.gz;mysample_S1_L001_R2_001.fastq.gz`, multiple_sep: `\";\"`. The FASTQ files to be analyzed. Corresponds to the --readFilesIn argument in the STAR command." + + } + + + , + "reference": { + "type": + "string", + "description": "Type: `file`, required, example: `/path/to/reference`. Path to the reference built by star_build_reference", + "help_text": "Type: `file`, required, example: `/path/to/reference`. Path to the reference built by star_build_reference. Corresponds to the --genomeDir argument in the STAR command." + + } + + + , + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.output`, example: `/path/to/foo`. Path to output directory", + "help_text": "Type: `file`, required, default: `$id.$key.output.output`, example: `/path/to/foo`. Path to output directory. Corresponds to the --outFileNamePrefix argument in the STAR command." + , + "default": "$id.$key.output.output" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input/output" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/mapping/star_align/setup_logger.py b/target/nextflow/mapping/star_align/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/nextflow/mapping/star_align/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/nextflow/mapping/star_align_v273a/.config.vsh.yaml b/target/nextflow/mapping/star_align_v273a/.config.vsh.yaml new file mode 100644 index 00000000000..f115181214f --- /dev/null +++ b/target/nextflow/mapping/star_align_v273a/.config.vsh.yaml @@ -0,0 +1,2535 @@ +functionality: + name: "star_align_v273a" + namespace: "mapping" + version: "0.12.4" + authors: + - name: "Angela Oliveira Pisco" + roles: + - "author" + info: + role: "Contributor" + links: + github: "aopisco" + orcid: "0000-0003-0142-2355" + linkedin: "aopisco" + organizations: + - name: "Insitro" + href: "https://insitro.com" + role: "Director of Computational Biology" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + - name: "Robrecht Cannoodt" + roles: + - "author" + - "maintainer" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + argument_groups: + - name: "Input/Output" + arguments: + - type: "file" + name: "--input" + alternatives: + - "--readFilesIn" + description: "The FASTQ files to be analyzed. Corresponds to the --readFilesIn\ + \ in the STAR command." + info: null + example: + - "mysample_S1_L001_R1_001.fastq.gz" + - "mysample_S1_L001_R2_001.fastq.gz" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "file" + name: "--reference" + alternatives: + - "--genomeDir" + description: "Path to the reference built by star_build_reference. Corresponds\ + \ to the --genomeDir in the STAR command." + info: null + example: + - "/path/to/reference" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "--outFileNamePrefix" + description: "Path to output directory. Corresponds to the --outFileNamePrefix\ + \ in the STAR command." + info: null + example: + - "/path/to/foo" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Run Parameters" + arguments: + - type: "integer" + name: "--runRNGseed" + description: "random number generator seed." + info: null + example: + - 777 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Genome Parameters" + arguments: + - type: "string" + name: "--genomeLoad" + description: "mode of shared memory usage for the genome files. Only used with\ + \ --runMode alignReads.\n\n- LoadAndKeep ... load genome into shared and\ + \ keep it in memory after run\n- LoadAndRemove ... load genome into shared\ + \ but remove it after run\n- LoadAndExit ... load genome into shared memory\ + \ and exit, keeping the genome in memory for future runs\n- Remove \ + \ ... do not map anything, just remove loaded genome from memory\n- NoSharedMemory\ + \ ... do not use shared memory, each job will have its own private copy of\ + \ the genome" + info: null + example: + - "NoSharedMemory" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--genomeFastaFiles" + description: "path(s) to the fasta files with the genome sequences, separated\ + \ by spaces. These files should be plain text FASTA files, they *cannot* be\ + \ zipped.\n\nRequired for the genome generation (--runMode genomeGenerate).\ + \ Can also be used in the mapping (--runMode alignReads) to add extra (new)\ + \ sequences to the genome (e.g. spike-ins)." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--genomeFileSizes" + description: "genome files exact sizes in bytes. Typically, this should not\ + \ be defined by the user." + info: null + example: + - 0 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--genomeTransformOutput" + description: "which output to transform back to original genome\n\n- SAM \ + \ ... SAM/BAM alignments\n- SJ ... splice junctions (SJ.out.tab)\n-\ + \ None ... no transformation of the output" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--genomeChrSetMitochondrial" + description: "names of the mitochondrial chromosomes. Presently only used for\ + \ STARsolo statistics output/" + info: null + example: + - "chrM" + - "M" + - "MT" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - name: "Splice Junctions Database" + arguments: + - type: "string" + name: "--sjdbFileChrStartEnd" + description: "path to the files with genomic coordinates (chr start \ + \ end strand) for the splice junction introns. Multiple files can be\ + \ supplied and will be concatenated." + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "file" + name: "--sjdbGTFfile" + description: "path to the GTF file with annotations" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--sjdbGTFchrPrefix" + description: "prefix for chromosome names in a GTF file (e.g. 'chr' for using\ + \ ENSMEBL annotations with UCSC genomes)" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--sjdbGTFfeatureExon" + description: "feature type in GTF file to be used as exons for building transcripts" + info: null + example: + - "exon" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--sjdbGTFtagExonParentTranscript" + description: "GTF attribute name for parent transcript ID (default \"transcript_id\"\ + \ works for GTF files)" + info: null + example: + - "transcript_id" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--sjdbGTFtagExonParentGene" + description: "GTF attribute name for parent gene ID (default \"gene_id\" works\ + \ for GTF files)" + info: null + example: + - "gene_id" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--sjdbGTFtagExonParentGeneName" + description: "GTF attribute name for parent gene name" + info: null + example: + - "gene_name" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--sjdbGTFtagExonParentGeneType" + description: "GTF attribute name for parent gene type" + info: null + example: + - "gene_type" + - "gene_biotype" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--sjdbOverhang" + description: "length of the donor/acceptor sequence on each side of the junctions,\ + \ ideally = (mate_length - 1)" + info: null + example: + - 100 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--sjdbScore" + description: "extra alignment score for alignments that cross database junctions" + info: null + example: + - 2 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--sjdbInsertSave" + description: "which files to save when sjdb junctions are inserted on the fly\ + \ at the mapping step\n\n- Basic ... only small junction / transcript files\n\ + - All ... all files including big Genome, SA and SAindex - this will create\ + \ a complete genome directory" + info: null + example: + - "Basic" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Variation parameters" + arguments: + - type: "string" + name: "--varVCFfile" + description: "path to the VCF file that contains variation data. The 10th column\ + \ should contain the genotype information, e.g. 0/1" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Read Parameters" + arguments: + - type: "string" + name: "--readFilesType" + description: "format of input read files\n\n- Fastx ... FASTA or FASTQ\n\ + - SAM SE ... SAM or BAM single-end reads; for BAM use --readFilesCommand\ + \ samtools view\n- SAM PE ... SAM or BAM paired-end reads; for BAM use\ + \ --readFilesCommand samtools view" + info: null + example: + - "Fastx" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--readFilesSAMattrKeep" + description: "for --readFilesType SAM SE/PE, which SAM tags to keep in the output\ + \ BAM, e.g.: --readFilesSAMtagsKeep RG PL\n\n- All ... keep all tags\n\ + - None ... do not keep any tags" + info: null + example: + - "All" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "file" + name: "--readFilesManifest" + description: "path to the \"manifest\" file with the names of read files. The\ + \ manifest file should contain 3 tab-separated columns:\n\npaired-end reads:\ + \ read1_file_name $tab$ read2_file_name $tab$ read_group_line.\nsingle-end\ + \ reads: read1_file_name $tab$ - $tab$ read_group_line.\nSpaces,\ + \ but not tabs are allowed in file names.\nIf read_group_line does not start\ + \ with ID:, it can only contain one ID field, and ID: will be added to it.\n\ + If read_group_line starts with ID:, it can contain several fields separated\ + \ by $tab$, and all fields will be be copied verbatim into SAM @RG header\ + \ line." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--readFilesPrefix" + description: "prefix for the read files names, i.e. it will be added in front\ + \ of the strings in --readFilesIn" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--readFilesCommand" + description: "command line to execute for each of the input file. This command\ + \ should generate FASTA or FASTQ text and send it to stdout\n\nFor example:\ + \ zcat - to uncompress .gz files, bzcat - to uncompress .bz2 files, etc." + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--readMapNumber" + description: "number of reads to map from the beginning of the file\n\n-1: map\ + \ all reads" + info: null + example: + - -1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--readMatesLengthsIn" + description: "Equal/NotEqual - lengths of names,sequences,qualities for both\ + \ mates are the same / not the same. NotEqual is safe in all situations." + info: null + example: + - "NotEqual" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--readNameSeparator" + description: "character(s) separating the part of the read names that will be\ + \ trimmed in output (read name after space is always trimmed)" + info: null + example: + - "/" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--readQualityScoreBase" + description: "number to be subtracted from the ASCII code to get Phred quality\ + \ score" + info: null + example: + - 33 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Read Clipping" + arguments: + - type: "string" + name: "--clipAdapterType" + description: "adapter clipping type\n\n- Hamming ... adapter clipping based\ + \ on Hamming distance, with the number of mismatches controlled by --clip5pAdapterMMp\n\ + - CellRanger4 ... 5p and 3p adapter clipping similar to CellRanger4. Utilizes\ + \ Opal package by Martin Sosic: https://github.com/Martinsos/opal\n- None\ + \ ... no adapter clipping, all other clip* parameters are disregarded" + info: null + example: + - "Hamming" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--clip3pNbases" + description: "number(s) of bases to clip from 3p of each mate. If one value\ + \ is given, it will be assumed the same for both mates." + info: null + example: + - 0 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--clip3pAdapterSeq" + description: "adapter sequences to clip from 3p of each mate. If one value\ + \ is given, it will be assumed the same for both mates.\n\n- polyA ... polyA\ + \ sequence with the length equal to read length" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "double" + name: "--clip3pAdapterMMp" + description: "max proportion of mismatches for 3p adapter clipping for each\ + \ mate. If one value is given, it will be assumed the same for both mates." + info: null + example: + - 0.1 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--clip3pAfterAdapterNbases" + description: "number of bases to clip from 3p of each mate after the adapter\ + \ clipping. If one value is given, it will be assumed the same for both mates." + info: null + example: + - 0 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--clip5pNbases" + description: "number(s) of bases to clip from 5p of each mate. If one value\ + \ is given, it will be assumed the same for both mates." + info: null + example: + - 0 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - name: "Limits" + arguments: + - type: "long" + name: "--limitGenomeGenerateRAM" + description: "maximum available RAM (bytes) for genome generation" + info: null + example: + - 31000000000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "long" + name: "--limitIObufferSize" + description: "max available buffers size (bytes) for input/output, per thread" + info: null + example: + - 30000000 + - 50000000 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "long" + name: "--limitOutSAMoneReadBytes" + description: "max size of the SAM record (bytes) for one read. Recommended value:\ + \ >(2*(LengthMate1+LengthMate2+100)*outFilterMultimapNmax" + info: null + example: + - 100000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--limitOutSJoneRead" + description: "max number of junctions for one read (including all multi-mappers)" + info: null + example: + - 1000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--limitOutSJcollapsed" + description: "max number of collapsed junctions" + info: null + example: + - 1000000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "long" + name: "--limitBAMsortRAM" + description: "maximum available RAM (bytes) for sorting BAM. If =0, it will\ + \ be set to the genome index size. 0 value can only be used with --genomeLoad\ + \ NoSharedMemory option." + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--limitSjdbInsertNsj" + description: "maximum number of junctions to be inserted to the genome on the\ + \ fly at the mapping stage, including those from annotations and those detected\ + \ in the 1st step of the 2-pass run" + info: null + example: + - 1000000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--limitNreadsSoft" + description: "soft limit on the number of reads" + info: null + example: + - -1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Output: general" + arguments: + - type: "string" + name: "--outTmpKeep" + description: "whether to keep the temporary files after STAR runs is finished\n\ + \n- None ... remove all temporary files\n- All ... keep all files" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outStd" + description: "which output will be directed to stdout (standard out)\n\n- Log\ + \ ... log messages\n- SAM ... alignments\ + \ in SAM format (which normally are output to Aligned.out.sam file), normal\ + \ standard output will go into Log.std.out\n- BAM_Unsorted ... alignments\ + \ in BAM format, unsorted. Requires --outSAMtype BAM Unsorted\n- BAM_SortedByCoordinate\ + \ ... alignments in BAM format, sorted by coordinate. Requires --outSAMtype\ + \ BAM SortedByCoordinate\n- BAM_Quant ... alignments to transcriptome\ + \ in BAM format, unsorted. Requires --quantMode TranscriptomeSAM" + info: null + example: + - "Log" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outReadsUnmapped" + description: "output of unmapped and partially mapped (i.e. mapped only one\ + \ mate of a paired end read) reads in separate file(s).\n\n- None ... no\ + \ output\n- Fastx ... output in separate fasta/fastq files, Unmapped.out.mate1/2" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outQSconversionAdd" + description: "add this number to the quality score (e.g. to convert from Illumina\ + \ to Sanger, use -31)" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outMultimapperOrder" + description: "order of multimapping alignments in the output files\n\n- Old_2.4\ + \ ... quasi-random order used before 2.5.0\n- Random \ + \ ... random order of alignments for each multi-mapper. Read mates (pairs)\ + \ are always adjacent, all alignment for each read stay together. This option\ + \ will become default in the future releases." + info: null + example: + - "Old_2.4" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Output: SAM and BAM" + arguments: + - type: "string" + name: "--outSAMtype" + description: "type of SAM/BAM output\n\n1st word:\n- BAM ... output BAM without\ + \ sorting\n- SAM ... output SAM without sorting\n- None ... no SAM/BAM output\n\ + 2nd, 3rd:\n- Unsorted ... standard unsorted\n- SortedByCoordinate\ + \ ... sorted by coordinate. This option will allocate extra memory for sorting\ + \ which can be specified by --limitBAMsortRAM." + info: null + example: + - "SAM" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--outSAMmode" + description: "mode of SAM output\n\n- None ... no SAM output\n- Full ... full\ + \ SAM output\n- NoQS ... full SAM but without quality scores" + info: null + example: + - "Full" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outSAMstrandField" + description: "Cufflinks-like strand field flag\n\n- None ... not used\n\ + - intronMotif ... strand derived from the intron motif. This option changes\ + \ the output alignments: reads with inconsistent and/or non-canonical introns\ + \ are filtered out." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outSAMattributes" + description: "a string of desired SAM attributes, in the order desired for the\ + \ output SAM. Tags can be listed in any combination/order.\n\n***Presets:\n\ + - None ... no attributes\n- Standard ... NH HI AS nM\n- All \ + \ ... NH HI AS nM NM MD jM jI MC ch\n***Alignment:\n- NH ...\ + \ number of loci the reads maps to: =1 for unique mappers, >1 for multimappers.\ + \ Standard SAM tag.\n- HI ... multiple alignment index, starts with\ + \ --outSAMattrIHstart (=1 by default). Standard SAM tag.\n- AS ...\ + \ local alignment score, +1/-1 for matches/mismateches, score* penalties for\ + \ indels and gaps. For PE reads, total score for two mates. Stadnard SAM tag.\n\ + - nM ... number of mismatches. For PE reads, sum over two mates.\n\ + - NM ... edit distance to the reference (number of mismatched + inserted\ + \ + deleted bases) for each mate. Standard SAM tag.\n- MD ... string\ + \ encoding mismatched and deleted reference bases (see standard SAM specifications).\ + \ Standard SAM tag.\n- jM ... intron motifs for all junctions (i.e.\ + \ N in CIGAR): 0: non-canonical; 1: GT/AG, 2: CT/AC, 3: GC/AG, 4: CT/GC, 5:\ + \ AT/AC, 6: GT/AT. If splice junctions database is used, and a junction is\ + \ annotated, 20 is added to its motif value.\n- jI ... start and\ + \ end of introns for all junctions (1-based).\n- XS ... alignment\ + \ strand according to --outSAMstrandField.\n- MC ... mate's CIGAR\ + \ string. Standard SAM tag.\n- ch ... marks all segment of all chimeric\ + \ alingments for --chimOutType WithinBAM output.\n- cN ... number\ + \ of bases clipped from the read ends: 5' and 3'\n***Variation:\n- vA \ + \ ... variant allele\n- vG ... genomic coordinate of the variant\ + \ overlapped by the read.\n- vW ... 1 - alignment passes WASP filtering;\ + \ 2,3,4,5,6,7 - alignment does not pass WASP filtering. Requires --waspOutputMode\ + \ SAMtag.\n***STARsolo:\n- CR CY UR UY ... sequences and quality scores of\ + \ cell barcodes and UMIs for the solo* demultiplexing.\n- GX GN ...\ + \ gene ID and gene name for unique-gene reads.\n- gx gn ... gene IDs\ + \ and gene names for unique- and multi-gene reads.\n- CB UB ... error-corrected\ + \ cell barcodes and UMIs for solo* demultiplexing. Requires --outSAMtype BAM\ + \ SortedByCoordinate.\n- sM ... assessment of CB and UMI.\n- sS \ + \ ... sequence of the entire barcode (CB,UMI,adapter).\n- sQ \ + \ ... quality of the entire barcode.\n***Unsupported/undocumented:\n-\ + \ ha ... haplotype (1/2) when mapping to the diploid genome. Requires\ + \ genome generated with --genomeTransformType Diploid .\n- rB ...\ + \ alignment block read/genomic coordinates.\n- vR ... read coordinate\ + \ of the variant." + info: null + example: + - "Standard" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--outSAMattrIHstart" + description: "start value for the IH attribute. 0 may be required by some downstream\ + \ software, such as Cufflinks or StringTie." + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outSAMunmapped" + description: "output of unmapped reads in the SAM format\n\n1st word:\n- None\ + \ ... no output\n- Within ... output unmapped reads within the main SAM\ + \ file (i.e. Aligned.out.sam)\n2nd word:\n- KeepPairs ... record unmapped\ + \ mate for each alignment, and, in case of unsorted output, keep it adjacent\ + \ to its mapped mate. Only affects multi-mapping reads." + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--outSAMorder" + description: "type of sorting for the SAM output\n\nPaired: one mate after the\ + \ other for all paired alignments\nPairedKeepInputOrder: one mate after the\ + \ other for all paired alignments, the order is kept the same as in the input\ + \ FASTQ files" + info: null + example: + - "Paired" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outSAMprimaryFlag" + description: "which alignments are considered primary - all others will be marked\ + \ with 0x100 bit in the FLAG\n\n- OneBestScore ... only one alignment with\ + \ the best score is primary\n- AllBestScore ... all alignments with the best\ + \ score are primary" + info: null + example: + - "OneBestScore" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outSAMreadID" + description: "read ID record type\n\n- Standard ... first word (until space)\ + \ from the FASTx read ID line, removing /1,/2 from the end\n- Number ...\ + \ read number (index) in the FASTx file" + info: null + example: + - "Standard" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outSAMmapqUnique" + description: "0 to 255: the MAPQ value for unique mappers" + info: null + example: + - 255 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outSAMflagOR" + description: "0 to 65535: sam FLAG will be bitwise OR'd with this value, i.e.\ + \ FLAG=FLAG | outSAMflagOR. This is applied after all flags have been set\ + \ by STAR, and after outSAMflagAND. Can be used to set specific bits that\ + \ are not set otherwise." + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outSAMflagAND" + description: "0 to 65535: sam FLAG will be bitwise AND'd with this value, i.e.\ + \ FLAG=FLAG & outSAMflagOR. This is applied after all flags have been set\ + \ by STAR, but before outSAMflagOR. Can be used to unset specific bits that\ + \ are not set otherwise." + info: null + example: + - 65535 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outSAMattrRGline" + description: "SAM/BAM read group line. The first word contains the read group\ + \ identifier and must start with \"ID:\", e.g. --outSAMattrRGline ID:xxx CN:yy\ + \ \"DS:z z z\".\n\nxxx will be added as RG tag to each output alignment. Any\ + \ spaces in the tag values have to be double quoted.\nComma separated RG lines\ + \ correspons to different (comma separated) input files in --readFilesIn.\ + \ Commas have to be surrounded by spaces, e.g.\n--outSAMattrRGline ID:xxx\ + \ , ID:zzz \"DS:z z\" , ID:yyy DS:yyyy" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--outSAMheaderHD" + description: "@HD (header) line of the SAM header" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--outSAMheaderPG" + description: "extra @PG (software) line of the SAM header (in addition to STAR)" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--outSAMheaderCommentFile" + description: "path to the file with @CO (comment) lines of the SAM header" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outSAMfilter" + description: "filter the output into main SAM/BAM files\n\n- KeepOnlyAddedReferences\ + \ ... only keep the reads for which all alignments are to the extra reference\ + \ sequences added with --genomeFastaFiles at the mapping stage.\n- KeepAllAddedReferences\ + \ ... keep all alignments to the extra reference sequences added with --genomeFastaFiles\ + \ at the mapping stage." + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--outSAMmultNmax" + description: "max number of multiple alignments for a read that will be output\ + \ to the SAM/BAM files. Note that if this value is not equal to -1, the top\ + \ scoring alignment will be output first\n\n- -1 ... all alignments (up to\ + \ --outFilterMultimapNmax) will be output" + info: null + example: + - -1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outSAMtlen" + description: "calculation method for the TLEN field in the SAM/BAM files\n\n\ + - 1 ... leftmost base of the (+)strand mate to rightmost base of the (-)mate.\ + \ (+)sign for the (+)strand mate\n- 2 ... leftmost base of any mate to rightmost\ + \ base of any mate. (+)sign for the mate with the leftmost base. This is different\ + \ from 1 for overlapping mates with protruding ends" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outBAMcompression" + description: "-1 to 10 BAM compression level, -1=default compression (6?),\ + \ 0=no compression, 10=maximum compression" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outBAMsortingThreadN" + description: ">=0: number of threads for BAM sorting. 0 will default to min(6,--runThreadN)." + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outBAMsortingBinsN" + description: ">0: number of genome bins for coordinate-sorting" + info: null + example: + - 50 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "BAM processing" + arguments: + - type: "string" + name: "--bamRemoveDuplicatesType" + description: "mark duplicates in the BAM file, for now only works with (i) sorted\ + \ BAM fed with inputBAMfile, and (ii) for paired-end alignments only\n\n-\ + \ - ... no duplicate removal/marking\n- UniqueIdentical\ + \ ... mark all multimappers, and duplicate unique mappers. The coordinates,\ + \ FLAG, CIGAR must be identical\n- UniqueIdenticalNotMulti ... mark duplicate\ + \ unique mappers but not multimappers." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--bamRemoveDuplicatesMate2basesN" + description: "number of bases from the 5' of mate 2 to use in collapsing (e.g.\ + \ for RAMPAGE)" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Output Wiggle" + arguments: + - type: "string" + name: "--outWigType" + description: "type of signal output, e.g. \"bedGraph\" OR \"bedGraph read1_5p\"\ + . Requires sorted BAM: --outSAMtype BAM SortedByCoordinate .\n\n1st word:\n\ + - None ... no signal output\n- bedGraph ... bedGraph format\n- wiggle\ + \ ... wiggle format\n2nd word:\n- read1_5p ... signal from only 5' of\ + \ the 1st read, useful for CAGE/RAMPAGE etc\n- read2 ... signal from\ + \ only 2nd read" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--outWigStrand" + description: "strandedness of wiggle/bedGraph output\n\n- Stranded ... separate\ + \ strands, str1 and str2\n- Unstranded ... collapsed strands" + info: null + example: + - "Stranded" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outWigReferencesPrefix" + description: "prefix matching reference names to include in the output wiggle\ + \ file, e.g. \"chr\", default \"-\" - include all references" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outWigNorm" + description: "type of normalization for the signal\n\n- RPM ... reads per\ + \ million of mapped reads\n- None ... no normalization, \"raw\" counts" + info: null + example: + - "RPM" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Output Filtering" + arguments: + - type: "string" + name: "--outFilterType" + description: "type of filtering\n\n- Normal ... standard filtering using only\ + \ current alignment\n- BySJout ... keep only those reads that contain junctions\ + \ that passed filtering into SJ.out.tab" + info: null + example: + - "Normal" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outFilterMultimapScoreRange" + description: "the score range below the maximum score for multimapping alignments" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outFilterMultimapNmax" + description: "maximum number of loci the read is allowed to map to. Alignments\ + \ (all of them) will be output only if the read maps to no more loci than\ + \ this value.\n\nOtherwise no alignments will be output, and the read will\ + \ be counted as \"mapped to too many loci\" in the Log.final.out ." + info: null + example: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outFilterMismatchNmax" + description: "alignment will be output only if it has no more mismatches than\ + \ this value." + info: null + example: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--outFilterMismatchNoverLmax" + description: "alignment will be output only if its ratio of mismatches to *mapped*\ + \ length is less than or equal to this value." + info: null + example: + - 0.3 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--outFilterMismatchNoverReadLmax" + description: "alignment will be output only if its ratio of mismatches to *read*\ + \ length is less than or equal to this value." + info: null + example: + - 1.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outFilterScoreMin" + description: "alignment will be output only if its score is higher than or equal\ + \ to this value." + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--outFilterScoreMinOverLread" + description: "same as outFilterScoreMin, but normalized to read length (sum\ + \ of mates' lengths for paired-end reads)" + info: null + example: + - 0.66 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outFilterMatchNmin" + description: "alignment will be output only if the number of matched bases is\ + \ higher than or equal to this value." + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--outFilterMatchNminOverLread" + description: "sam as outFilterMatchNmin, but normalized to the read length (sum\ + \ of mates' lengths for paired-end reads)." + info: null + example: + - 0.66 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outFilterIntronMotifs" + description: "filter alignment using their motifs\n\n- None \ + \ ... no filtering\n- RemoveNoncanonical ... filter\ + \ out alignments that contain non-canonical junctions\n- RemoveNoncanonicalUnannotated\ + \ ... filter out alignments that contain non-canonical unannotated junctions\ + \ when using annotated splice junctions database. The annotated non-canonical\ + \ junctions will be kept." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--outFilterIntronStrands" + description: "filter alignments\n\n- RemoveInconsistentStrands ... remove\ + \ alignments that have junctions with inconsistent strands\n- None \ + \ ... no filtering" + info: null + example: + - "RemoveInconsistentStrands" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Output splice junctions (SJ.out.tab)" + arguments: + - type: "string" + name: "--outSJtype" + description: "type of splice junction output\n\n- Standard ... standard SJ.out.tab\ + \ output\n- None ... no splice junction output" + info: null + example: + - "Standard" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Output Filtering: Splice Junctions" + arguments: + - type: "string" + name: "--outSJfilterReads" + description: "which reads to consider for collapsed splice junctions output\n\ + \n- All ... all reads, unique- and multi-mappers\n- Unique ... uniquely\ + \ mapping reads only" + info: null + example: + - "All" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--outSJfilterOverhangMin" + description: "minimum overhang length for splice junctions on both sides for:\ + \ (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC\ + \ motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\n\ + does not apply to annotated junctions" + info: null + example: + - 30 + - 12 + - 12 + - 12 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--outSJfilterCountUniqueMin" + description: "minimum uniquely mapping read count per junction for: (1) non-canonical\ + \ motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC\ + \ and GT/AT motif. -1 means no output for that motif\n\nJunctions are output\ + \ if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin conditions\ + \ are satisfied\ndoes not apply to annotated junctions" + info: null + example: + - 3 + - 1 + - 1 + - 1 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--outSJfilterCountTotalMin" + description: "minimum total (multi-mapping+unique) read count per junction for:\ + \ (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC\ + \ motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\n\ + Junctions are output if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin\ + \ conditions are satisfied\ndoes not apply to annotated junctions" + info: null + example: + - 3 + - 1 + - 1 + - 1 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--outSJfilterDistToOtherSJmin" + description: "minimum allowed distance to other junctions' donor/acceptor\n\n\ + does not apply to annotated junctions" + info: null + example: + - 10 + - 0 + - 5 + - 10 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--outSJfilterIntronMaxVsReadN" + description: "maximum gap allowed for junctions supported by 1,2,3,,,N reads\n\ + \ni.e. by default junctions supported by 1 read can have gaps <=50000b, by\ + \ 2 reads: <=100000b, by 3 reads: <=200000. by >=4 reads any gap <=alignIntronMax\n\ + does not apply to annotated junctions" + info: null + example: + - 50000 + - 100000 + - 200000 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - name: "Scoring" + arguments: + - type: "integer" + name: "--scoreGap" + description: "splice junction penalty (independent on intron motif)" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--scoreGapNoncan" + description: "non-canonical junction penalty (in addition to scoreGap)" + info: null + example: + - -8 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--scoreGapGCAG" + description: "GC/AG and CT/GC junction penalty (in addition to scoreGap)" + info: null + example: + - -4 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--scoreGapATAC" + description: "AT/AC and GT/AT junction penalty (in addition to scoreGap)" + info: null + example: + - -8 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--scoreGenomicLengthLog2scale" + description: "extra score logarithmically scaled with genomic length of the\ + \ alignment: scoreGenomicLengthLog2scale*log2(genomicLength)" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--scoreDelOpen" + description: "deletion open penalty" + info: null + example: + - -2 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--scoreDelBase" + description: "deletion extension penalty per base (in addition to scoreDelOpen)" + info: null + example: + - -2 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--scoreInsOpen" + description: "insertion open penalty" + info: null + example: + - -2 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--scoreInsBase" + description: "insertion extension penalty per base (in addition to scoreInsOpen)" + info: null + example: + - -2 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--scoreStitchSJshift" + description: "maximum score reduction while searching for SJ boundaries in the\ + \ stitching step" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Alignments and Seeding" + arguments: + - type: "integer" + name: "--seedSearchStartLmax" + description: "defines the search start point through the read - the read is\ + \ split into pieces no longer than this value" + info: null + example: + - 50 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--seedSearchStartLmaxOverLread" + description: "seedSearchStartLmax normalized to read length (sum of mates' lengths\ + \ for paired-end reads)" + info: null + example: + - 1.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--seedSearchLmax" + description: "defines the maximum length of the seeds, if =0 seed length is\ + \ not limited" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--seedMultimapNmax" + description: "only pieces that map fewer than this value are utilized in the\ + \ stitching procedure" + info: null + example: + - 10000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--seedPerReadNmax" + description: "max number of seeds per read" + info: null + example: + - 1000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--seedPerWindowNmax" + description: "max number of seeds per window" + info: null + example: + - 50 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--seedNoneLociPerWindow" + description: "max number of one seed loci per window" + info: null + example: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--seedSplitMin" + description: "min length of the seed sequences split by Ns or mate gap" + info: null + example: + - 12 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--seedMapMin" + description: "min length of seeds to be mapped" + info: null + example: + - 5 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--alignIntronMin" + description: "minimum intron size, genomic gap is considered intron if its length>=alignIntronMin,\ + \ otherwise it is considered Deletion" + info: null + example: + - 21 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--alignIntronMax" + description: "maximum intron size, if 0, max intron size will be determined\ + \ by (2^winBinNbits)*winAnchorDistNbins" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--alignMatesGapMax" + description: "maximum gap between two mates, if 0, max intron gap will be determined\ + \ by (2^winBinNbits)*winAnchorDistNbins" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--alignSJoverhangMin" + description: "minimum overhang (i.e. block size) for spliced alignments" + info: null + example: + - 5 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--alignSJstitchMismatchNmax" + description: "maximum number of mismatches for stitching of the splice junctions\ + \ (-1: no limit).\n\n(1) non-canonical motifs, (2) GT/AG and CT/AC motif,\ + \ (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif." + info: null + example: + - 0 + - -1 + - 0 + - 0 + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--alignSJDBoverhangMin" + description: "minimum overhang (i.e. block size) for annotated (sjdb) spliced\ + \ alignments" + info: null + example: + - 3 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--alignSplicedMateMapLmin" + description: "minimum mapped length for a read mate that is spliced" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--alignSplicedMateMapLminOverLmate" + description: "alignSplicedMateMapLmin normalized to mate length" + info: null + example: + - 0.66 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--alignWindowsPerReadNmax" + description: "max number of windows per read" + info: null + example: + - 10000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--alignTranscriptsPerWindowNmax" + description: "max number of transcripts per window" + info: null + example: + - 100 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--alignTranscriptsPerReadNmax" + description: "max number of different alignments per read to consider" + info: null + example: + - 10000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--alignEndsType" + description: "type of read ends alignment\n\n- Local ... standard\ + \ local alignment with soft-clipping allowed\n- EndToEnd ... force\ + \ end-to-end read alignment, do not soft-clip\n- Extend5pOfRead1 ... fully\ + \ extend only the 5p of the read1, all other ends: local alignment\n- Extend5pOfReads12\ + \ ... fully extend only the 5p of the both read1 and read2, all other ends:\ + \ local alignment" + info: null + example: + - "Local" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--alignEndsProtrude" + description: "allow protrusion of alignment ends, i.e. start (end) of the +strand\ + \ mate downstream of the start (end) of the -strand mate\n\n1st word: int:\ + \ maximum number of protrusion bases allowed\n2nd word: string:\n- \ + \ ConcordantPair ... report alignments with non-zero protrusion\ + \ as concordant pairs\n- DiscordantPair ... report alignments\ + \ with non-zero protrusion as discordant pairs" + info: null + example: + - "0 ConcordantPair" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--alignSoftClipAtReferenceEnds" + description: "allow the soft-clipping of the alignments past the end of the\ + \ chromosomes\n\n- Yes ... allow\n- No ... prohibit, useful for compatibility\ + \ with Cufflinks" + info: null + example: + - "Yes" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--alignInsertionFlush" + description: "how to flush ambiguous insertion positions\n\n- None ... insertions\ + \ are not flushed\n- Right ... insertions are flushed to the right" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Paired-End reads" + arguments: + - type: "integer" + name: "--peOverlapNbasesMin" + description: "minimum number of overlapping bases to trigger mates merging and\ + \ realignment. Specify >0 value to switch on the \"merginf of overlapping\ + \ mates\" algorithm." + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--peOverlapMMp" + description: "maximum proportion of mismatched bases in the overlap area" + info: null + example: + - 0.01 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Windows, Anchors, Binning" + arguments: + - type: "integer" + name: "--winAnchorMultimapNmax" + description: "max number of loci anchors are allowed to map to" + info: null + example: + - 50 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--winBinNbits" + description: "=log2(winBin), where winBin is the size of the bin for the windows/clustering,\ + \ each window will occupy an integer number of bins." + info: null + example: + - 16 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--winAnchorDistNbins" + description: "max number of bins between two anchors that allows aggregation\ + \ of anchors into one window" + info: null + example: + - 9 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--winFlankNbins" + description: "log2(winFlank), where win Flank is the size of the left and right\ + \ flanking regions for each window" + info: null + example: + - 4 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--winReadCoverageRelativeMin" + description: "minimum relative coverage of the read sequence by the seeds in\ + \ a window, for STARlong algorithm only." + info: null + example: + - 0.5 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--winReadCoverageBasesMin" + description: "minimum number of bases covered by the seeds in a window , for\ + \ STARlong algorithm only." + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Chimeric Alignments" + arguments: + - type: "string" + name: "--chimOutType" + description: "type of chimeric output\n\n- Junctions ... Chimeric.out.junction\n\ + - SeparateSAMold ... output old SAM into separate Chimeric.out.sam file\n\ + - WithinBAM ... output into main aligned BAM files (Aligned.*.bam)\n\ + - WithinBAM HardClip ... (default) hard-clipping in the CIGAR for supplemental\ + \ chimeric alignments (default if no 2nd word is present)\n- WithinBAM SoftClip\ + \ ... soft-clipping in the CIGAR for supplemental chimeric alignments" + info: null + example: + - "Junctions" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--chimSegmentMin" + description: "minimum length of chimeric segment length, if ==0, no chimeric\ + \ output" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimScoreMin" + description: "minimum total (summed) score of the chimeric segments" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimScoreDropMax" + description: "max drop (difference) of chimeric score (the sum of scores of\ + \ all chimeric segments) from the read length" + info: null + example: + - 20 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimScoreSeparation" + description: "minimum difference (separation) between the best chimeric score\ + \ and the next one" + info: null + example: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimScoreJunctionNonGTAG" + description: "penalty for a non-GT/AG chimeric junction" + info: null + example: + - -1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimJunctionOverhangMin" + description: "minimum overhang for a chimeric junction" + info: null + example: + - 20 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimSegmentReadGapMax" + description: "maximum gap in the read sequence between chimeric segments" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--chimFilter" + description: "different filters for chimeric alignments\n\n- None ... no filtering\n\ + - banGenomicN ... Ns are not allowed in the genome sequence around the chimeric\ + \ junction" + info: null + example: + - "banGenomicN" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--chimMainSegmentMultNmax" + description: "maximum number of multi-alignments for the main chimeric segment.\ + \ =1 will prohibit multimapping main segments." + info: null + example: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimMultimapNmax" + description: "maximum number of chimeric multi-alignments\n\n- 0 ... use the\ + \ old scheme for chimeric detection which only considered unique alignments" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimMultimapScoreRange" + description: "the score range for multi-mapping chimeras below the best chimeric\ + \ score. Only works with --chimMultimapNmax > 1" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimNonchimScoreDropMin" + description: "to trigger chimeric detection, the drop in the best non-chimeric\ + \ alignment score with respect to the read length has to be greater than this\ + \ value" + info: null + example: + - 20 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--chimOutJunctionFormat" + description: "formatting type for the Chimeric.out.junction file\n\n- 0 ...\ + \ no comment lines/headers\n- 1 ... comment lines at the end of the file:\ + \ command line and Nreads: total, unique/multi-mapping" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Quantification of Annotations" + arguments: + - type: "string" + name: "--quantMode" + description: "types of quantification requested\n\n- - ... none\n\ + - TranscriptomeSAM ... output SAM/BAM alignments to transcriptome into a separate\ + \ file\n- GeneCounts ... count reads per gene" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--quantTranscriptomeBAMcompression" + description: "-2 to 10 transcriptome BAM compression level\n\n- -2 ... no\ + \ BAM output\n- -1 ... default compression (6?)\n- 0 ... no compression\n\ + - 10 ... maximum compression" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--quantTranscriptomeBan" + description: "prohibit various alignment type\n\n- IndelSoftclipSingleend ...\ + \ prohibit indels, soft clipping and single-end alignments - compatible with\ + \ RSEM\n- Singleend ... prohibit single-end alignments" + info: null + example: + - "IndelSoftclipSingleend" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "2-pass Mapping" + arguments: + - type: "string" + name: "--twopassMode" + description: "2-pass mapping mode.\n\n- None ... 1-pass mapping\n- Basic\ + \ ... basic 2-pass mapping, with all 1st pass junctions inserted into\ + \ the genome indices on the fly" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--twopass1readsN" + description: "number of reads to process for the 1st step. Use very large number\ + \ (or default -1) to map all reads in the first step." + info: null + example: + - -1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "WASP parameters" + arguments: + - type: "string" + name: "--waspOutputMode" + description: "WASP allele-specific output type. This is re-implementation of\ + \ the original WASP mappability filtering by Bryce van de Geijn, Graham McVicker,\ + \ Yoav Gilad & Jonathan K Pritchard. Please cite the original WASP paper:\ + \ Nature Methods 12, 1061-1063 (2015), https://www.nature.com/articles/nmeth.3582\ + \ .\n\n- SAMtag ... add WASP tags to the alignments that pass WASP filtering" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "STARsolo (single cell RNA-seq) parameters" + arguments: + - type: "string" + name: "--soloType" + description: "type of single-cell RNA-seq\n\n- CB_UMI_Simple ... (a.k.a. Droplet)\ + \ one UMI and one Cell Barcode of fixed length in read2, e.g. Drop-seq and\ + \ 10X Chromium.\n- CB_UMI_Complex ... multiple Cell Barcodes of varying length,\ + \ one UMI of fixed length and one adapter sequence of fixed length are allowed\ + \ in read2 only (e.g. inDrop, ddSeq).\n- CB_samTagOut ... output Cell Barcode\ + \ as CR and/or CB SAm tag. No UMI counting. --readFilesIn cDNA_read1 [cDNA_read2\ + \ if paired-end] CellBarcode_read . Requires --outSAMtype BAM Unsorted [and/or\ + \ SortedByCoordinate]\n- SmartSeq ... Smart-seq: each cell in a separate\ + \ FASTQ (paired- or single-end), barcodes are corresponding read-groups, no\ + \ UMI sequences, alignments deduplicated according to alignment start and\ + \ end (after extending soft-clipped bases)" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloCBwhitelist" + description: "file(s) with whitelist(s) of cell barcodes. Only --soloType CB_UMI_Complex\ + \ allows more than one whitelist file.\n\n- None ... no whitelist:\ + \ all cell barcodes are allowed" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "integer" + name: "--soloCBstart" + description: "cell barcode start base" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--soloCBlen" + description: "cell barcode length" + info: null + example: + - 16 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--soloUMIstart" + description: "UMI start base" + info: null + example: + - 17 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--soloUMIlen" + description: "UMI length" + info: null + example: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--soloBarcodeReadLength" + description: "length of the barcode read\n\n- 1 ... equal to sum of soloCBlen+soloUMIlen\n\ + - 0 ... not defined, do not check" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--soloBarcodeMate" + description: "identifies which read mate contains the barcode (CB+UMI) sequence\n\ + \n- 0 ... barcode sequence is on separate read, which should always be the\ + \ last file in the --readFilesIn listed\n- 1 ... barcode sequence is a part\ + \ of mate 1\n- 2 ... barcode sequence is a part of mate 2" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--soloCBposition" + description: "position of Cell Barcode(s) on the barcode read.\n\nPresently\ + \ only works with --soloType CB_UMI_Complex, and barcodes are assumed to be\ + \ on Read2.\nFormat for each barcode: startAnchor_startPosition_endAnchor_endPosition\n\ + start(end)Anchor defines the Anchor Base for the CB: 0: read start; 1: read\ + \ end; 2: adapter start; 3: adapter end\nstart(end)Position is the 0-based\ + \ position with of the CB start(end) with respect to the Anchor Base\nString\ + \ for different barcodes are separated by space.\nExample: inDrop (Zilionis\ + \ et al, Nat. Protocols, 2017):\n--soloCBposition 0_0_2_-1 3_1_3_8" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloUMIposition" + description: "position of the UMI on the barcode read, same as soloCBposition\n\ + \nExample: inDrop (Zilionis et al, Nat. Protocols, 2017):\n--soloCBposition\ + \ 3_9_3_14" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--soloAdapterSequence" + description: "adapter sequence to anchor barcodes. Only one adapter sequence\ + \ is allowed." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--soloAdapterMismatchesNmax" + description: "maximum number of mismatches allowed in adapter sequence." + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--soloCBmatchWLtype" + description: "matching the Cell Barcodes to the WhiteList\n\n- Exact \ + \ ... only exact matches allowed\n- 1MM \ + \ ... only one match in whitelist with 1 mismatched base allowed.\ + \ Allowed CBs have to have at least one read with exact match.\n- 1MM_multi\ + \ ... multiple matches in whitelist with 1 mismatched\ + \ base allowed, posterior probability calculation is used choose one of the\ + \ matches.\nAllowed CBs have to have at least one read with exact match. This\ + \ option matches best with CellRanger 2.2.0\n- 1MM_multi_pseudocounts \ + \ ... same as 1MM_Multi, but pseudocounts of 1 are added to all whitelist\ + \ barcodes.\n- 1MM_multi_Nbase_pseudocounts ... same as 1MM_multi_pseudocounts,\ + \ multimatching to WL is allowed for CBs with N-bases. This option matches\ + \ best with CellRanger >= 3.0.0\n- EditDist_2 ... allow\ + \ up to edit distance of 3 fpr each of the barcodes. May include one deletion\ + \ + one insertion. Only works with --soloType CB_UMI_Complex. Matches to multiple\ + \ passlist barcdoes are not allowed. Similar to ParseBio Split-seq pipeline." + info: null + example: + - "1MM_multi" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--soloInputSAMattrBarcodeSeq" + description: "when inputting reads from a SAM file (--readsFileType SAM SE/PE),\ + \ these SAM attributes mark the barcode sequence (in proper order).\n\nFor\ + \ instance, for 10X CellRanger or STARsolo BAMs, use --soloInputSAMattrBarcodeSeq\ + \ CR UR .\nThis parameter is required when running STARsolo with input from\ + \ SAM." + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloInputSAMattrBarcodeQual" + description: "when inputting reads from a SAM file (--readsFileType SAM SE/PE),\ + \ these SAM attributes mark the barcode qualities (in proper order).\n\nFor\ + \ instance, for 10X CellRanger or STARsolo BAMs, use --soloInputSAMattrBarcodeQual\ + \ CY UY .\nIf this parameter is '-' (default), the quality 'H' will be assigned\ + \ to all bases." + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloStrand" + description: "strandedness of the solo libraries:\n\n- Unstranded ... no strand\ + \ information\n- Forward ... read strand same as the original RNA molecule\n\ + - Reverse ... read strand opposite to the original RNA molecule" + info: null + example: + - "Forward" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--soloFeatures" + description: "genomic features for which the UMI counts per Cell Barcode are\ + \ collected\n\n- Gene ... genes: reads match the gene transcript\n\ + - SJ ... splice junctions: reported in SJ.out.tab\n- GeneFull\ + \ ... full gene (pre-mRNA): count all reads overlapping genes' exons\ + \ and introns\n- GeneFull_ExonOverIntron ... full gene (pre-mRNA): count all\ + \ reads overlapping genes' exons and introns: prioritize 100% overlap with\ + \ exons\n- GeneFull_Ex50pAS ... full gene (pre-RNA): count all reads\ + \ overlapping genes' exons and introns: prioritize >50% overlap with exons.\ + \ Do not count reads with 100% exonic overlap in the antisense direction." + info: null + example: + - "Gene" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloMultiMappers" + description: "counting method for reads mapping to multiple genes\n\n- Unique\ + \ ... count only reads that map to unique genes\n- Uniform ... uniformly\ + \ distribute multi-genic UMIs to all genes\n- Rescue ... distribute UMIs\ + \ proportionally to unique+uniform counts (~ first iteration of EM)\n- PropUnique\ + \ ... distribute UMIs proportionally to unique mappers, if present, and uniformly\ + \ if not.\n- EM ... multi-gene UMIs are distributed using Expectation\ + \ Maximization algorithm" + info: null + example: + - "Unique" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloUMIdedup" + description: "type of UMI deduplication (collapsing) algorithm\n\n- 1MM_All\ + \ ... all UMIs with 1 mismatch distance to each other\ + \ are collapsed (i.e. counted once).\n- 1MM_Directional_UMItools ... follows\ + \ the \"directional\" method from the UMI-tools by Smith, Heger and Sudbery\ + \ (Genome Research 2017).\n- 1MM_Directional ... same as 1MM_Directional_UMItools,\ + \ but with more stringent criteria for duplicate UMIs\n- Exact \ + \ ... only exactly matching UMIs are collapsed.\n- NoDedup \ + \ ... no deduplication of UMIs, count all reads.\n- 1MM_CR\ + \ ... CellRanger2-4 algorithm for 1MM UMI collapsing." + info: null + example: + - "1MM_All" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloUMIfiltering" + description: "type of UMI filtering (for reads uniquely mapping to genes)\n\n\ + - - ... basic filtering: remove UMIs with N and homopolymers\ + \ (similar to CellRanger 2.2.0).\n- MultiGeneUMI ... basic + remove\ + \ lower-count UMIs that map to more than one gene.\n- MultiGeneUMI_All ...\ + \ basic + remove all UMIs that map to more than one gene.\n- MultiGeneUMI_CR\ + \ ... basic + remove lower-count UMIs that map to more than one gene, matching\ + \ CellRanger > 3.0.0 .\nOnly works with --soloUMIdedup 1MM_CR" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloOutFileNames" + description: "file names for STARsolo output:\n\nfile_name_prefix gene_names\ + \ barcode_sequences cell_feature_count_matrix" + info: null + example: + - "Solo.out/" + - "features.tsv" + - "barcodes.tsv" + - "matrix.mtx" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloCellFilter" + description: "cell filtering type and parameters\n\n- None ... do\ + \ not output filtered cells\n- TopCells ... only report top cells by\ + \ UMI count, followed by the exact number of cells\n- CellRanger2.2 ...\ + \ simple filtering of CellRanger 2.2.\nCan be followed by numbers: number\ + \ of expected cells, robust maximum percentile for UMI count, maximum to minimum\ + \ ratio for UMI count\nThe harcoded values are from CellRanger: nExpectedCells=3000;\ + \ maxPercentile=0.99; maxMinRatio=10\n- EmptyDrops_CR ... EmptyDrops filtering\ + \ in CellRanger flavor. Please cite the original EmptyDrops paper: A.T.L Lun\ + \ et al, Genome Biology, 20, 63 (2019): https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1662-y\n\ + Can be followed by 10 numeric parameters: nExpectedCells maxPercentile\ + \ maxMinRatio indMin indMax umiMin umiMinFracMedian candMaxN \ + \ FDR simN\nThe harcoded values are from CellRanger: 3000 \ + \ 0.99 10 45000 90000 500 0.01\ + \ 20000 0.01 10000" + info: null + example: + - "CellRanger2.2" + - "3000" + - "0.99" + - "10" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloOutFormatFeaturesGeneField3" + description: "field 3 in the Gene features.tsv file. If \"-\", then no 3rd field\ + \ is output." + info: null + example: + - "Gene Expression" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + dest: "par" + - type: "string" + name: "--soloCellReadStats" + description: "Output reads statistics for each CB\n\n- Standard ... standard\ + \ output" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "../star_align/script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Align fastq files using STAR." + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/cellranger_tiny_fastq" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "docker" + env: + - "STAR_VERSION 2.7.3a" + - "PACKAGES gcc g++ make wget zlib1g-dev unzip" + - type: "docker" + run: + - "apt-get update && \\\n apt-get install -y --no-install-recommends ${PACKAGES}\ + \ && \\\n cd /tmp && \\\n wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip\ + \ && \\\n unzip ${STAR_VERSION}.zip && \\\n cd STAR-${STAR_VERSION}/source\ + \ && \\\n make STARstatic CXXFLAGS_SIMD=-std=c++11 && \\\n cp STAR /usr/local/bin\ + \ && \\\n cd / && \\\n rm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip\ + \ && \\\n apt-get --purge autoremove -y ${PACKAGES} && \\\n apt-get clean\n" + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highmem" + - "highcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/mapping/star_align_v273a/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/star_align_v273a" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/star_align_v273a/star_align_v273a" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/mapping/star_align_v273a/main.nf b/target/nextflow/mapping/star_align_v273a/main.nf new file mode 100644 index 00000000000..852b3df7a16 --- /dev/null +++ b/target/nextflow/mapping/star_align_v273a/main.nf @@ -0,0 +1,5287 @@ +// star_align_v273a 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Angela Oliveira Pisco (author) +// * Robrecht Cannoodt (author, maintainer) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "star_align_v273a", + "namespace" : "mapping", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Angela Oliveira Pisco", + "roles" : [ + "author" + ], + "info" : { + "role" : "Contributor", + "links" : { + "github" : "aopisco", + "orcid" : "0000-0003-0142-2355", + "linkedin" : "aopisco" + }, + "organizations" : [ + { + "name" : "Insitro", + "href" : "https://insitro.com", + "role" : "Director of Computational Biology" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + }, + { + "name" : "Robrecht Cannoodt", + "roles" : [ + "author", + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "robrecht@data-intuitive.com", + "github" : "rcannood", + "orcid" : "0000-0003-3641-729X", + "linkedin" : "robrechtcannoodt" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Science Engineer" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Input/Output", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "alternatives" : [ + "--readFilesIn" + ], + "description" : "The FASTQ files to be analyzed. Corresponds to the --readFilesIn in the STAR command.", + "example" : [ + "mysample_S1_L001_R1_001.fastq.gz", + "mysample_S1_L001_R2_001.fastq.gz" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--reference", + "alternatives" : [ + "--genomeDir" + ], + "description" : "Path to the reference built by star_build_reference. Corresponds to the --genomeDir in the STAR command.", + "example" : [ + "/path/to/reference" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "--outFileNamePrefix" + ], + "description" : "Path to output directory. Corresponds to the --outFileNamePrefix in the STAR command.", + "example" : [ + "/path/to/foo" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Run Parameters", + "arguments" : [ + { + "type" : "integer", + "name" : "--runRNGseed", + "description" : "random number generator seed.", + "example" : [ + 777 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Genome Parameters", + "arguments" : [ + { + "type" : "string", + "name" : "--genomeLoad", + "description" : "mode of shared memory usage for the genome files. Only used with --runMode alignReads.\n\n- LoadAndKeep ... load genome into shared and keep it in memory after run\n- LoadAndRemove ... load genome into shared but remove it after run\n- LoadAndExit ... load genome into shared memory and exit, keeping the genome in memory for future runs\n- Remove ... do not map anything, just remove loaded genome from memory\n- NoSharedMemory ... do not use shared memory, each job will have its own private copy of the genome", + "example" : [ + "NoSharedMemory" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--genomeFastaFiles", + "description" : "path(s) to the fasta files with the genome sequences, separated by spaces. These files should be plain text FASTA files, they *cannot* be zipped.\n\nRequired for the genome generation (--runMode genomeGenerate). Can also be used in the mapping (--runMode alignReads) to add extra (new) sequences to the genome (e.g. spike-ins).", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--genomeFileSizes", + "description" : "genome files exact sizes in bytes. Typically, this should not be defined by the user.", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--genomeTransformOutput", + "description" : "which output to transform back to original genome\n\n- SAM ... SAM/BAM alignments\n- SJ ... splice junctions (SJ.out.tab)\n- None ... no transformation of the output", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--genomeChrSetMitochondrial", + "description" : "names of the mitochondrial chromosomes. Presently only used for STARsolo statistics output/", + "example" : [ + "chrM", + "M", + "MT" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + } + ] + }, + { + "name" : "Splice Junctions Database", + "arguments" : [ + { + "type" : "string", + "name" : "--sjdbFileChrStartEnd", + "description" : "path to the files with genomic coordinates (chr start end strand) for the splice junction introns. Multiple files can be supplied and will be concatenated.", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--sjdbGTFfile", + "description" : "path to the GTF file with annotations", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--sjdbGTFchrPrefix", + "description" : "prefix for chromosome names in a GTF file (e.g. 'chr' for using ENSMEBL annotations with UCSC genomes)", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--sjdbGTFfeatureExon", + "description" : "feature type in GTF file to be used as exons for building transcripts", + "example" : [ + "exon" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--sjdbGTFtagExonParentTranscript", + "description" : "GTF attribute name for parent transcript ID (default \\"transcript_id\\" works for GTF files)", + "example" : [ + "transcript_id" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--sjdbGTFtagExonParentGene", + "description" : "GTF attribute name for parent gene ID (default \\"gene_id\\" works for GTF files)", + "example" : [ + "gene_id" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--sjdbGTFtagExonParentGeneName", + "description" : "GTF attribute name for parent gene name", + "example" : [ + "gene_name" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--sjdbGTFtagExonParentGeneType", + "description" : "GTF attribute name for parent gene type", + "example" : [ + "gene_type", + "gene_biotype" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--sjdbOverhang", + "description" : "length of the donor/acceptor sequence on each side of the junctions, ideally = (mate_length - 1)", + "example" : [ + 100 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--sjdbScore", + "description" : "extra alignment score for alignments that cross database junctions", + "example" : [ + 2 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--sjdbInsertSave", + "description" : "which files to save when sjdb junctions are inserted on the fly at the mapping step\n\n- Basic ... only small junction / transcript files\n- All ... all files including big Genome, SA and SAindex - this will create a complete genome directory", + "example" : [ + "Basic" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Variation parameters", + "arguments" : [ + { + "type" : "string", + "name" : "--varVCFfile", + "description" : "path to the VCF file that contains variation data. The 10th column should contain the genotype information, e.g. 0/1", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Read Parameters", + "arguments" : [ + { + "type" : "string", + "name" : "--readFilesType", + "description" : "format of input read files\n\n- Fastx ... FASTA or FASTQ\n- SAM SE ... SAM or BAM single-end reads; for BAM use --readFilesCommand samtools view\n- SAM PE ... SAM or BAM paired-end reads; for BAM use --readFilesCommand samtools view", + "example" : [ + "Fastx" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--readFilesSAMattrKeep", + "description" : "for --readFilesType SAM SE/PE, which SAM tags to keep in the output BAM, e.g.: --readFilesSAMtagsKeep RG PL\n\n- All ... keep all tags\n- None ... do not keep any tags", + "example" : [ + "All" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--readFilesManifest", + "description" : "path to the \\"manifest\\" file with the names of read files. The manifest file should contain 3 tab-separated columns:\n\npaired-end reads: read1_file_name $tab$ read2_file_name $tab$ read_group_line.\nsingle-end reads: read1_file_name $tab$ - $tab$ read_group_line.\nSpaces, but not tabs are allowed in file names.\nIf read_group_line does not start with ID:, it can only contain one ID field, and ID: will be added to it.\nIf read_group_line starts with ID:, it can contain several fields separated by $tab$, and all fields will be be copied verbatim into SAM @RG header line.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--readFilesPrefix", + "description" : "prefix for the read files names, i.e. it will be added in front of the strings in --readFilesIn", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--readFilesCommand", + "description" : "command line to execute for each of the input file. This command should generate FASTA or FASTQ text and send it to stdout\n\nFor example: zcat - to uncompress .gz files, bzcat - to uncompress .bz2 files, etc.", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--readMapNumber", + "description" : "number of reads to map from the beginning of the file\n\n-1: map all reads", + "example" : [ + -1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--readMatesLengthsIn", + "description" : "Equal/NotEqual - lengths of names,sequences,qualities for both mates are the same / not the same. NotEqual is safe in all situations.", + "example" : [ + "NotEqual" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--readNameSeparator", + "description" : "character(s) separating the part of the read names that will be trimmed in output (read name after space is always trimmed)", + "example" : [ + "/" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--readQualityScoreBase", + "description" : "number to be subtracted from the ASCII code to get Phred quality score", + "example" : [ + 33 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Read Clipping", + "arguments" : [ + { + "type" : "string", + "name" : "--clipAdapterType", + "description" : "adapter clipping type\n\n- Hamming ... adapter clipping based on Hamming distance, with the number of mismatches controlled by --clip5pAdapterMMp\n- CellRanger4 ... 5p and 3p adapter clipping similar to CellRanger4. Utilizes Opal package by Martin Sosic: https://github.com/Martinsos/opal\n- None ... no adapter clipping, all other clip* parameters are disregarded", + "example" : [ + "Hamming" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--clip3pNbases", + "description" : "number(s) of bases to clip from 3p of each mate. If one value is given, it will be assumed the same for both mates.", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--clip3pAdapterSeq", + "description" : "adapter sequences to clip from 3p of each mate. If one value is given, it will be assumed the same for both mates.\n\n- polyA ... polyA sequence with the length equal to read length", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--clip3pAdapterMMp", + "description" : "max proportion of mismatches for 3p adapter clipping for each mate. If one value is given, it will be assumed the same for both mates.", + "example" : [ + 0.1 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--clip3pAfterAdapterNbases", + "description" : "number of bases to clip from 3p of each mate after the adapter clipping. If one value is given, it will be assumed the same for both mates.", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--clip5pNbases", + "description" : "number(s) of bases to clip from 5p of each mate. If one value is given, it will be assumed the same for both mates.", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + } + ] + }, + { + "name" : "Limits", + "arguments" : [ + { + "type" : "long", + "name" : "--limitGenomeGenerateRAM", + "description" : "maximum available RAM (bytes) for genome generation", + "example" : [ + 31000000000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "long", + "name" : "--limitIObufferSize", + "description" : "max available buffers size (bytes) for input/output, per thread", + "example" : [ + 30000000, + 50000000 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "long", + "name" : "--limitOutSAMoneReadBytes", + "description" : "max size of the SAM record (bytes) for one read. Recommended value: >(2*(LengthMate1+LengthMate2+100)*outFilterMultimapNmax", + "example" : [ + 100000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--limitOutSJoneRead", + "description" : "max number of junctions for one read (including all multi-mappers)", + "example" : [ + 1000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--limitOutSJcollapsed", + "description" : "max number of collapsed junctions", + "example" : [ + 1000000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "long", + "name" : "--limitBAMsortRAM", + "description" : "maximum available RAM (bytes) for sorting BAM. If =0, it will be set to the genome index size. 0 value can only be used with --genomeLoad NoSharedMemory option.", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--limitSjdbInsertNsj", + "description" : "maximum number of junctions to be inserted to the genome on the fly at the mapping stage, including those from annotations and those detected in the 1st step of the 2-pass run", + "example" : [ + 1000000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--limitNreadsSoft", + "description" : "soft limit on the number of reads", + "example" : [ + -1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Output: general", + "arguments" : [ + { + "type" : "string", + "name" : "--outTmpKeep", + "description" : "whether to keep the temporary files after STAR runs is finished\n\n- None ... remove all temporary files\n- All ... keep all files", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outStd", + "description" : "which output will be directed to stdout (standard out)\n\n- Log ... log messages\n- SAM ... alignments in SAM format (which normally are output to Aligned.out.sam file), normal standard output will go into Log.std.out\n- BAM_Unsorted ... alignments in BAM format, unsorted. Requires --outSAMtype BAM Unsorted\n- BAM_SortedByCoordinate ... alignments in BAM format, sorted by coordinate. Requires --outSAMtype BAM SortedByCoordinate\n- BAM_Quant ... alignments to transcriptome in BAM format, unsorted. Requires --quantMode TranscriptomeSAM", + "example" : [ + "Log" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outReadsUnmapped", + "description" : "output of unmapped and partially mapped (i.e. mapped only one mate of a paired end read) reads in separate file(s).\n\n- None ... no output\n- Fastx ... output in separate fasta/fastq files, Unmapped.out.mate1/2", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outQSconversionAdd", + "description" : "add this number to the quality score (e.g. to convert from Illumina to Sanger, use -31)", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outMultimapperOrder", + "description" : "order of multimapping alignments in the output files\n\n- Old_2.4 ... quasi-random order used before 2.5.0\n- Random ... random order of alignments for each multi-mapper. Read mates (pairs) are always adjacent, all alignment for each read stay together. This option will become default in the future releases.", + "example" : [ + "Old_2.4" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Output: SAM and BAM", + "arguments" : [ + { + "type" : "string", + "name" : "--outSAMtype", + "description" : "type of SAM/BAM output\n\n1st word:\n- BAM ... output BAM without sorting\n- SAM ... output SAM without sorting\n- None ... no SAM/BAM output\n2nd, 3rd:\n- Unsorted ... standard unsorted\n- SortedByCoordinate ... sorted by coordinate. This option will allocate extra memory for sorting which can be specified by --limitBAMsortRAM.", + "example" : [ + "SAM" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outSAMmode", + "description" : "mode of SAM output\n\n- None ... no SAM output\n- Full ... full SAM output\n- NoQS ... full SAM but without quality scores", + "example" : [ + "Full" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outSAMstrandField", + "description" : "Cufflinks-like strand field flag\n\n- None ... not used\n- intronMotif ... strand derived from the intron motif. This option changes the output alignments: reads with inconsistent and/or non-canonical introns are filtered out.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outSAMattributes", + "description" : "a string of desired SAM attributes, in the order desired for the output SAM. Tags can be listed in any combination/order.\n\n***Presets:\n- None ... no attributes\n- Standard ... NH HI AS nM\n- All ... NH HI AS nM NM MD jM jI MC ch\n***Alignment:\n- NH ... number of loci the reads maps to: =1 for unique mappers, >1 for multimappers. Standard SAM tag.\n- HI ... multiple alignment index, starts with --outSAMattrIHstart (=1 by default). Standard SAM tag.\n- AS ... local alignment score, +1/-1 for matches/mismateches, score* penalties for indels and gaps. For PE reads, total score for two mates. Stadnard SAM tag.\n- nM ... number of mismatches. For PE reads, sum over two mates.\n- NM ... edit distance to the reference (number of mismatched + inserted + deleted bases) for each mate. Standard SAM tag.\n- MD ... string encoding mismatched and deleted reference bases (see standard SAM specifications). Standard SAM tag.\n- jM ... intron motifs for all junctions (i.e. N in CIGAR): 0: non-canonical; 1: GT/AG, 2: CT/AC, 3: GC/AG, 4: CT/GC, 5: AT/AC, 6: GT/AT. If splice junctions database is used, and a junction is annotated, 20 is added to its motif value.\n- jI ... start and end of introns for all junctions (1-based).\n- XS ... alignment strand according to --outSAMstrandField.\n- MC ... mate's CIGAR string. Standard SAM tag.\n- ch ... marks all segment of all chimeric alingments for --chimOutType WithinBAM output.\n- cN ... number of bases clipped from the read ends: 5' and 3'\n***Variation:\n- vA ... variant allele\n- vG ... genomic coordinate of the variant overlapped by the read.\n- vW ... 1 - alignment passes WASP filtering; 2,3,4,5,6,7 - alignment does not pass WASP filtering. Requires --waspOutputMode SAMtag.\n***STARsolo:\n- CR CY UR UY ... sequences and quality scores of cell barcodes and UMIs for the solo* demultiplexing.\n- GX GN ... gene ID and gene name for unique-gene reads.\n- gx gn ... gene IDs and gene names for unique- and multi-gene reads.\n- CB UB ... error-corrected cell barcodes and UMIs for solo* demultiplexing. Requires --outSAMtype BAM SortedByCoordinate.\n- sM ... assessment of CB and UMI.\n- sS ... sequence of the entire barcode (CB,UMI,adapter).\n- sQ ... quality of the entire barcode.\n***Unsupported/undocumented:\n- ha ... haplotype (1/2) when mapping to the diploid genome. Requires genome generated with --genomeTransformType Diploid .\n- rB ... alignment block read/genomic coordinates.\n- vR ... read coordinate of the variant.", + "example" : [ + "Standard" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outSAMattrIHstart", + "description" : "start value for the IH attribute. 0 may be required by some downstream software, such as Cufflinks or StringTie.", + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outSAMunmapped", + "description" : "output of unmapped reads in the SAM format\n\n1st word:\n- None ... no output\n- Within ... output unmapped reads within the main SAM file (i.e. Aligned.out.sam)\n2nd word:\n- KeepPairs ... record unmapped mate for each alignment, and, in case of unsorted output, keep it adjacent to its mapped mate. Only affects multi-mapping reads.", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outSAMorder", + "description" : "type of sorting for the SAM output\n\nPaired: one mate after the other for all paired alignments\nPairedKeepInputOrder: one mate after the other for all paired alignments, the order is kept the same as in the input FASTQ files", + "example" : [ + "Paired" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outSAMprimaryFlag", + "description" : "which alignments are considered primary - all others will be marked with 0x100 bit in the FLAG\n\n- OneBestScore ... only one alignment with the best score is primary\n- AllBestScore ... all alignments with the best score are primary", + "example" : [ + "OneBestScore" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outSAMreadID", + "description" : "read ID record type\n\n- Standard ... first word (until space) from the FASTx read ID line, removing /1,/2 from the end\n- Number ... read number (index) in the FASTx file", + "example" : [ + "Standard" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outSAMmapqUnique", + "description" : "0 to 255: the MAPQ value for unique mappers", + "example" : [ + 255 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outSAMflagOR", + "description" : "0 to 65535: sam FLAG will be bitwise OR'd with this value, i.e. FLAG=FLAG | outSAMflagOR. This is applied after all flags have been set by STAR, and after outSAMflagAND. Can be used to set specific bits that are not set otherwise.", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outSAMflagAND", + "description" : "0 to 65535: sam FLAG will be bitwise AND'd with this value, i.e. FLAG=FLAG & outSAMflagOR. This is applied after all flags have been set by STAR, but before outSAMflagOR. Can be used to unset specific bits that are not set otherwise.", + "example" : [ + 65535 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outSAMattrRGline", + "description" : "SAM/BAM read group line. The first word contains the read group identifier and must start with \\"ID:\\", e.g. --outSAMattrRGline ID:xxx CN:yy \\"DS:z z z\\".\n\nxxx will be added as RG tag to each output alignment. Any spaces in the tag values have to be double quoted.\nComma separated RG lines correspons to different (comma separated) input files in --readFilesIn. Commas have to be surrounded by spaces, e.g.\n--outSAMattrRGline ID:xxx , ID:zzz \\"DS:z z\\" , ID:yyy DS:yyyy", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outSAMheaderHD", + "description" : "@HD (header) line of the SAM header", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outSAMheaderPG", + "description" : "extra @PG (software) line of the SAM header (in addition to STAR)", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outSAMheaderCommentFile", + "description" : "path to the file with @CO (comment) lines of the SAM header", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outSAMfilter", + "description" : "filter the output into main SAM/BAM files\n\n- KeepOnlyAddedReferences ... only keep the reads for which all alignments are to the extra reference sequences added with --genomeFastaFiles at the mapping stage.\n- KeepAllAddedReferences ... keep all alignments to the extra reference sequences added with --genomeFastaFiles at the mapping stage.", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outSAMmultNmax", + "description" : "max number of multiple alignments for a read that will be output to the SAM/BAM files. Note that if this value is not equal to -1, the top scoring alignment will be output first\n\n- -1 ... all alignments (up to --outFilterMultimapNmax) will be output", + "example" : [ + -1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outSAMtlen", + "description" : "calculation method for the TLEN field in the SAM/BAM files\n\n- 1 ... leftmost base of the (+)strand mate to rightmost base of the (-)mate. (+)sign for the (+)strand mate\n- 2 ... leftmost base of any mate to rightmost base of any mate. (+)sign for the mate with the leftmost base. This is different from 1 for overlapping mates with protruding ends", + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outBAMcompression", + "description" : "-1 to 10 BAM compression level, -1=default compression (6?), 0=no compression, 10=maximum compression", + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outBAMsortingThreadN", + "description" : ">=0: number of threads for BAM sorting. 0 will default to min(6,--runThreadN).", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outBAMsortingBinsN", + "description" : ">0: number of genome bins for coordinate-sorting", + "example" : [ + 50 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "BAM processing", + "arguments" : [ + { + "type" : "string", + "name" : "--bamRemoveDuplicatesType", + "description" : "mark duplicates in the BAM file, for now only works with (i) sorted BAM fed with inputBAMfile, and (ii) for paired-end alignments only\n\n- - ... no duplicate removal/marking\n- UniqueIdentical ... mark all multimappers, and duplicate unique mappers. The coordinates, FLAG, CIGAR must be identical\n- UniqueIdenticalNotMulti ... mark duplicate unique mappers but not multimappers.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--bamRemoveDuplicatesMate2basesN", + "description" : "number of bases from the 5' of mate 2 to use in collapsing (e.g. for RAMPAGE)", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Output Wiggle", + "arguments" : [ + { + "type" : "string", + "name" : "--outWigType", + "description" : "type of signal output, e.g. \\"bedGraph\\" OR \\"bedGraph read1_5p\\". Requires sorted BAM: --outSAMtype BAM SortedByCoordinate .\n\n1st word:\n- None ... no signal output\n- bedGraph ... bedGraph format\n- wiggle ... wiggle format\n2nd word:\n- read1_5p ... signal from only 5' of the 1st read, useful for CAGE/RAMPAGE etc\n- read2 ... signal from only 2nd read", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outWigStrand", + "description" : "strandedness of wiggle/bedGraph output\n\n- Stranded ... separate strands, str1 and str2\n- Unstranded ... collapsed strands", + "example" : [ + "Stranded" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outWigReferencesPrefix", + "description" : "prefix matching reference names to include in the output wiggle file, e.g. \\"chr\\", default \\"-\\" - include all references", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outWigNorm", + "description" : "type of normalization for the signal\n\n- RPM ... reads per million of mapped reads\n- None ... no normalization, \\"raw\\" counts", + "example" : [ + "RPM" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Output Filtering", + "arguments" : [ + { + "type" : "string", + "name" : "--outFilterType", + "description" : "type of filtering\n\n- Normal ... standard filtering using only current alignment\n- BySJout ... keep only those reads that contain junctions that passed filtering into SJ.out.tab", + "example" : [ + "Normal" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outFilterMultimapScoreRange", + "description" : "the score range below the maximum score for multimapping alignments", + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outFilterMultimapNmax", + "description" : "maximum number of loci the read is allowed to map to. Alignments (all of them) will be output only if the read maps to no more loci than this value.\n\nOtherwise no alignments will be output, and the read will be counted as \\"mapped to too many loci\\" in the Log.final.out .", + "example" : [ + 10 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outFilterMismatchNmax", + "description" : "alignment will be output only if it has no more mismatches than this value.", + "example" : [ + 10 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--outFilterMismatchNoverLmax", + "description" : "alignment will be output only if its ratio of mismatches to *mapped* length is less than or equal to this value.", + "example" : [ + 0.3 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--outFilterMismatchNoverReadLmax", + "description" : "alignment will be output only if its ratio of mismatches to *read* length is less than or equal to this value.", + "example" : [ + 1.0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outFilterScoreMin", + "description" : "alignment will be output only if its score is higher than or equal to this value.", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--outFilterScoreMinOverLread", + "description" : "same as outFilterScoreMin, but normalized to read length (sum of mates' lengths for paired-end reads)", + "example" : [ + 0.66 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outFilterMatchNmin", + "description" : "alignment will be output only if the number of matched bases is higher than or equal to this value.", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--outFilterMatchNminOverLread", + "description" : "sam as outFilterMatchNmin, but normalized to the read length (sum of mates' lengths for paired-end reads).", + "example" : [ + 0.66 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outFilterIntronMotifs", + "description" : "filter alignment using their motifs\n\n- None ... no filtering\n- RemoveNoncanonical ... filter out alignments that contain non-canonical junctions\n- RemoveNoncanonicalUnannotated ... filter out alignments that contain non-canonical unannotated junctions when using annotated splice junctions database. The annotated non-canonical junctions will be kept.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--outFilterIntronStrands", + "description" : "filter alignments\n\n- RemoveInconsistentStrands ... remove alignments that have junctions with inconsistent strands\n- None ... no filtering", + "example" : [ + "RemoveInconsistentStrands" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Output splice junctions (SJ.out.tab)", + "arguments" : [ + { + "type" : "string", + "name" : "--outSJtype", + "description" : "type of splice junction output\n\n- Standard ... standard SJ.out.tab output\n- None ... no splice junction output", + "example" : [ + "Standard" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Output Filtering: Splice Junctions", + "arguments" : [ + { + "type" : "string", + "name" : "--outSJfilterReads", + "description" : "which reads to consider for collapsed splice junctions output\n\n- All ... all reads, unique- and multi-mappers\n- Unique ... uniquely mapping reads only", + "example" : [ + "All" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outSJfilterOverhangMin", + "description" : "minimum overhang length for splice junctions on both sides for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\ndoes not apply to annotated junctions", + "example" : [ + 30, + 12, + 12, + 12 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outSJfilterCountUniqueMin", + "description" : "minimum uniquely mapping read count per junction for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\nJunctions are output if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin conditions are satisfied\ndoes not apply to annotated junctions", + "example" : [ + 3, + 1, + 1, + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outSJfilterCountTotalMin", + "description" : "minimum total (multi-mapping+unique) read count per junction for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\nJunctions are output if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin conditions are satisfied\ndoes not apply to annotated junctions", + "example" : [ + 3, + 1, + 1, + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outSJfilterDistToOtherSJmin", + "description" : "minimum allowed distance to other junctions' donor/acceptor\n\ndoes not apply to annotated junctions", + "example" : [ + 10, + 0, + 5, + 10 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--outSJfilterIntronMaxVsReadN", + "description" : "maximum gap allowed for junctions supported by 1,2,3,,,N reads\n\ni.e. by default junctions supported by 1 read can have gaps <=50000b, by 2 reads: <=100000b, by 3 reads: <=200000. by >=4 reads any gap <=alignIntronMax\ndoes not apply to annotated junctions", + "example" : [ + 50000, + 100000, + 200000 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + } + ] + }, + { + "name" : "Scoring", + "arguments" : [ + { + "type" : "integer", + "name" : "--scoreGap", + "description" : "splice junction penalty (independent on intron motif)", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--scoreGapNoncan", + "description" : "non-canonical junction penalty (in addition to scoreGap)", + "example" : [ + -8 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--scoreGapGCAG", + "description" : "GC/AG and CT/GC junction penalty (in addition to scoreGap)", + "example" : [ + -4 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--scoreGapATAC", + "description" : "AT/AC and GT/AT junction penalty (in addition to scoreGap)", + "example" : [ + -8 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--scoreGenomicLengthLog2scale", + "description" : "extra score logarithmically scaled with genomic length of the alignment: scoreGenomicLengthLog2scale*log2(genomicLength)", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--scoreDelOpen", + "description" : "deletion open penalty", + "example" : [ + -2 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--scoreDelBase", + "description" : "deletion extension penalty per base (in addition to scoreDelOpen)", + "example" : [ + -2 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--scoreInsOpen", + "description" : "insertion open penalty", + "example" : [ + -2 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--scoreInsBase", + "description" : "insertion extension penalty per base (in addition to scoreInsOpen)", + "example" : [ + -2 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--scoreStitchSJshift", + "description" : "maximum score reduction while searching for SJ boundaries in the stitching step", + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Alignments and Seeding", + "arguments" : [ + { + "type" : "integer", + "name" : "--seedSearchStartLmax", + "description" : "defines the search start point through the read - the read is split into pieces no longer than this value", + "example" : [ + 50 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--seedSearchStartLmaxOverLread", + "description" : "seedSearchStartLmax normalized to read length (sum of mates' lengths for paired-end reads)", + "example" : [ + 1.0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--seedSearchLmax", + "description" : "defines the maximum length of the seeds, if =0 seed length is not limited", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--seedMultimapNmax", + "description" : "only pieces that map fewer than this value are utilized in the stitching procedure", + "example" : [ + 10000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--seedPerReadNmax", + "description" : "max number of seeds per read", + "example" : [ + 1000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--seedPerWindowNmax", + "description" : "max number of seeds per window", + "example" : [ + 50 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--seedNoneLociPerWindow", + "description" : "max number of one seed loci per window", + "example" : [ + 10 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--seedSplitMin", + "description" : "min length of the seed sequences split by Ns or mate gap", + "example" : [ + 12 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--seedMapMin", + "description" : "min length of seeds to be mapped", + "example" : [ + 5 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--alignIntronMin", + "description" : "minimum intron size, genomic gap is considered intron if its length>=alignIntronMin, otherwise it is considered Deletion", + "example" : [ + 21 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--alignIntronMax", + "description" : "maximum intron size, if 0, max intron size will be determined by (2^winBinNbits)*winAnchorDistNbins", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--alignMatesGapMax", + "description" : "maximum gap between two mates, if 0, max intron gap will be determined by (2^winBinNbits)*winAnchorDistNbins", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--alignSJoverhangMin", + "description" : "minimum overhang (i.e. block size) for spliced alignments", + "example" : [ + ''' + '''5 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--alignSJstitchMismatchNmax", + "description" : "maximum number of mismatches for stitching of the splice junctions (-1: no limit).\n\n(1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif.", + "example" : [ + 0, + -1, + 0, + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--alignSJDBoverhangMin", + "description" : "minimum overhang (i.e. block size) for annotated (sjdb) spliced alignments", + "example" : [ + 3 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--alignSplicedMateMapLmin", + "description" : "minimum mapped length for a read mate that is spliced", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--alignSplicedMateMapLminOverLmate", + "description" : "alignSplicedMateMapLmin normalized to mate length", + "example" : [ + 0.66 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--alignWindowsPerReadNmax", + "description" : "max number of windows per read", + "example" : [ + 10000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--alignTranscriptsPerWindowNmax", + "description" : "max number of transcripts per window", + "example" : [ + 100 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--alignTranscriptsPerReadNmax", + "description" : "max number of different alignments per read to consider", + "example" : [ + 10000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--alignEndsType", + "description" : "type of read ends alignment\n\n- Local ... standard local alignment with soft-clipping allowed\n- EndToEnd ... force end-to-end read alignment, do not soft-clip\n- Extend5pOfRead1 ... fully extend only the 5p of the read1, all other ends: local alignment\n- Extend5pOfReads12 ... fully extend only the 5p of the both read1 and read2, all other ends: local alignment", + "example" : [ + "Local" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--alignEndsProtrude", + "description" : "allow protrusion of alignment ends, i.e. start (end) of the +strand mate downstream of the start (end) of the -strand mate\n\n1st word: int: maximum number of protrusion bases allowed\n2nd word: string:\n- ConcordantPair ... report alignments with non-zero protrusion as concordant pairs\n- DiscordantPair ... report alignments with non-zero protrusion as discordant pairs", + "example" : [ + "0 ConcordantPair" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--alignSoftClipAtReferenceEnds", + "description" : "allow the soft-clipping of the alignments past the end of the chromosomes\n\n- Yes ... allow\n- No ... prohibit, useful for compatibility with Cufflinks", + "example" : [ + "Yes" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--alignInsertionFlush", + "description" : "how to flush ambiguous insertion positions\n\n- None ... insertions are not flushed\n- Right ... insertions are flushed to the right", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Paired-End reads", + "arguments" : [ + { + "type" : "integer", + "name" : "--peOverlapNbasesMin", + "description" : "minimum number of overlapping bases to trigger mates merging and realignment. Specify >0 value to switch on the \\"merginf of overlapping mates\\" algorithm.", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--peOverlapMMp", + "description" : "maximum proportion of mismatched bases in the overlap area", + "example" : [ + 0.01 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Windows, Anchors, Binning", + "arguments" : [ + { + "type" : "integer", + "name" : "--winAnchorMultimapNmax", + "description" : "max number of loci anchors are allowed to map to", + "example" : [ + 50 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--winBinNbits", + "description" : "=log2(winBin), where winBin is the size of the bin for the windows/clustering, each window will occupy an integer number of bins.", + "example" : [ + 16 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--winAnchorDistNbins", + "description" : "max number of bins between two anchors that allows aggregation of anchors into one window", + "example" : [ + 9 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--winFlankNbins", + "description" : "log2(winFlank), where win Flank is the size of the left and right flanking regions for each window", + "example" : [ + 4 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--winReadCoverageRelativeMin", + "description" : "minimum relative coverage of the read sequence by the seeds in a window, for STARlong algorithm only.", + "example" : [ + 0.5 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--winReadCoverageBasesMin", + "description" : "minimum number of bases covered by the seeds in a window , for STARlong algorithm only.", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Chimeric Alignments", + "arguments" : [ + { + "type" : "string", + "name" : "--chimOutType", + "description" : "type of chimeric output\n\n- Junctions ... Chimeric.out.junction\n- SeparateSAMold ... output old SAM into separate Chimeric.out.sam file\n- WithinBAM ... output into main aligned BAM files (Aligned.*.bam)\n- WithinBAM HardClip ... (default) hard-clipping in the CIGAR for supplemental chimeric alignments (default if no 2nd word is present)\n- WithinBAM SoftClip ... soft-clipping in the CIGAR for supplemental chimeric alignments", + "example" : [ + "Junctions" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--chimSegmentMin", + "description" : "minimum length of chimeric segment length, if ==0, no chimeric output", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--chimScoreMin", + "description" : "minimum total (summed) score of the chimeric segments", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--chimScoreDropMax", + "description" : "max drop (difference) of chimeric score (the sum of scores of all chimeric segments) from the read length", + "example" : [ + 20 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--chimScoreSeparation", + "description" : "minimum difference (separation) between the best chimeric score and the next one", + "example" : [ + 10 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--chimScoreJunctionNonGTAG", + "description" : "penalty for a non-GT/AG chimeric junction", + "example" : [ + -1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--chimJunctionOverhangMin", + "description" : "minimum overhang for a chimeric junction", + "example" : [ + 20 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--chimSegmentReadGapMax", + "description" : "maximum gap in the read sequence between chimeric segments", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--chimFilter", + "description" : "different filters for chimeric alignments\n\n- None ... no filtering\n- banGenomicN ... Ns are not allowed in the genome sequence around the chimeric junction", + "example" : [ + "banGenomicN" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--chimMainSegmentMultNmax", + "description" : "maximum number of multi-alignments for the main chimeric segment. =1 will prohibit multimapping main segments.", + "example" : [ + 10 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--chimMultimapNmax", + "description" : "maximum number of chimeric multi-alignments\n\n- 0 ... use the old scheme for chimeric detection which only considered unique alignments", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--chimMultimapScoreRange", + "description" : "the score range for multi-mapping chimeras below the best chimeric score. Only works with --chimMultimapNmax > 1", + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--chimNonchimScoreDropMin", + "description" : "to trigger chimeric detection, the drop in the best non-chimeric alignment score with respect to the read length has to be greater than this value", + "example" : [ + 20 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--chimOutJunctionFormat", + "description" : "formatting type for the Chimeric.out.junction file\n\n- 0 ... no comment lines/headers\n- 1 ... comment lines at the end of the file: command line and Nreads: total, unique/multi-mapping", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Quantification of Annotations", + "arguments" : [ + { + "type" : "string", + "name" : "--quantMode", + "description" : "types of quantification requested\n\n- - ... none\n- TranscriptomeSAM ... output SAM/BAM alignments to transcriptome into a separate file\n- GeneCounts ... count reads per gene", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--quantTranscriptomeBAMcompression", + "description" : "-2 to 10 transcriptome BAM compression level\n\n- -2 ... no BAM output\n- -1 ... default compression (6?)\n- 0 ... no compression\n- 10 ... maximum compression", + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--quantTranscriptomeBan", + "description" : "prohibit various alignment type\n\n- IndelSoftclipSingleend ... prohibit indels, soft clipping and single-end alignments - compatible with RSEM\n- Singleend ... prohibit single-end alignments", + "example" : [ + "IndelSoftclipSingleend" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "2-pass Mapping", + "arguments" : [ + { + "type" : "string", + "name" : "--twopassMode", + "description" : "2-pass mapping mode.\n\n- None ... 1-pass mapping\n- Basic ... basic 2-pass mapping, with all 1st pass junctions inserted into the genome indices on the fly", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--twopass1readsN", + "description" : "number of reads to process for the 1st step. Use very large number (or default -1) to map all reads in the first step.", + "example" : [ + -1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "WASP parameters", + "arguments" : [ + { + "type" : "string", + "name" : "--waspOutputMode", + "description" : "WASP allele-specific output type. This is re-implementation of the original WASP mappability filtering by Bryce van de Geijn, Graham McVicker, Yoav Gilad & Jonathan K Pritchard. Please cite the original WASP paper: Nature Methods 12, 1061-1063 (2015), https://www.nature.com/articles/nmeth.3582 .\n\n- SAMtag ... add WASP tags to the alignments that pass WASP filtering", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "STARsolo (single cell RNA-seq) parameters", + "arguments" : [ + { + "type" : "string", + "name" : "--soloType", + "description" : "type of single-cell RNA-seq\n\n- CB_UMI_Simple ... (a.k.a. Droplet) one UMI and one Cell Barcode of fixed length in read2, e.g. Drop-seq and 10X Chromium.\n- CB_UMI_Complex ... multiple Cell Barcodes of varying length, one UMI of fixed length and one adapter sequence of fixed length are allowed in read2 only (e.g. inDrop, ddSeq).\n- CB_samTagOut ... output Cell Barcode as CR and/or CB SAm tag. No UMI counting. --readFilesIn cDNA_read1 [cDNA_read2 if paired-end] CellBarcode_read . Requires --outSAMtype BAM Unsorted [and/or SortedByCoordinate]\n- SmartSeq ... Smart-seq: each cell in a separate FASTQ (paired- or single-end), barcodes are corresponding read-groups, no UMI sequences, alignments deduplicated according to alignment start and end (after extending soft-clipped bases)", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--soloCBwhitelist", + "description" : "file(s) with whitelist(s) of cell barcodes. Only --soloType CB_UMI_Complex allows more than one whitelist file.\n\n- None ... no whitelist: all cell barcodes are allowed", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--soloCBstart", + "description" : "cell barcode start base", + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--soloCBlen", + "description" : "cell barcode length", + "example" : [ + 16 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--soloUMIstart", + "description" : "UMI start base", + "example" : [ + 17 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--soloUMIlen", + "description" : "UMI length", + "example" : [ + 10 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--soloBarcodeReadLength", + "description" : "length of the barcode read\n\n- 1 ... equal to sum of soloCBlen+soloUMIlen\n- 0 ... not defined, do not check", + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--soloBarcodeMate", + "description" : "identifies which read mate contains the barcode (CB+UMI) sequence\n\n- 0 ... barcode sequence is on separate read, which should always be the last file in the --readFilesIn listed\n- 1 ... barcode sequence is a part of mate 1\n- 2 ... barcode sequence is a part of mate 2", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--soloCBposition", + "description" : "position of Cell Barcode(s) on the barcode read.\n\nPresently only works with --soloType CB_UMI_Complex, and barcodes are assumed to be on Read2.\nFormat for each barcode: startAnchor_startPosition_endAnchor_endPosition\nstart(end)Anchor defines the Anchor Base for the CB: 0: read start; 1: read end; 2: adapter start; 3: adapter end\nstart(end)Position is the 0-based position with of the CB start(end) with respect to the Anchor Base\nString for different barcodes are separated by space.\nExample: inDrop (Zilionis et al, Nat. Protocols, 2017):\n--soloCBposition 0_0_2_-1 3_1_3_8", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--soloUMIposition", + "description" : "position of the UMI on the barcode read, same as soloCBposition\n\nExample: inDrop (Zilionis et al, Nat. Protocols, 2017):\n--soloCBposition 3_9_3_14", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--soloAdapterSequence", + "description" : "adapter sequence to anchor barcodes. Only one adapter sequence is allowed.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--soloAdapterMismatchesNmax", + "description" : "maximum number of mismatches allowed in adapter sequence.", + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--soloCBmatchWLtype", + "description" : "matching the Cell Barcodes to the WhiteList\n\n- Exact ... only exact matches allowed\n- 1MM ... only one match in whitelist with 1 mismatched base allowed. Allowed CBs have to have at least one read with exact match.\n- 1MM_multi ... multiple matches in whitelist with 1 mismatched base allowed, posterior probability calculation is used choose one of the matches.\nAllowed CBs have to have at least one read with exact match. This option matches best with CellRanger 2.2.0\n- 1MM_multi_pseudocounts ... same as 1MM_Multi, but pseudocounts of 1 are added to all whitelist barcodes.\n- 1MM_multi_Nbase_pseudocounts ... same as 1MM_multi_pseudocounts, multimatching to WL is allowed for CBs with N-bases. This option matches best with CellRanger >= 3.0.0\n- EditDist_2 ... allow up to edit distance of 3 fpr each of the barcodes. May include one deletion + one insertion. Only works with --soloType CB_UMI_Complex. Matches to multiple passlist barcdoes are not allowed. Similar to ParseBio Split-seq pipeline.", + "example" : [ + "1MM_multi" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--soloInputSAMattrBarcodeSeq", + "description" : "when inputting reads from a SAM file (--readsFileType SAM SE/PE), these SAM attributes mark the barcode sequence (in proper order).\n\nFor instance, for 10X CellRanger or STARsolo BAMs, use --soloInputSAMattrBarcodeSeq CR UR .\nThis parameter is required when running STARsolo with input from SAM.", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--soloInputSAMattrBarcodeQual", + "description" : "when inputting reads from a SAM file (--readsFileType SAM SE/PE), these SAM attributes mark the barcode qualities (in proper order).\n\nFor instance, for 10X CellRanger or STARsolo BAMs, use --soloInputSAMattrBarcodeQual CY UY .\nIf this parameter is '-' (default), the quality 'H' will be assigned to all bases.", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--soloStrand", + "description" : "strandedness of the solo libraries:\n\n- Unstranded ... no strand information\n- Forward ... read strand same as the original RNA molecule\n- Reverse ... read strand opposite to the original RNA molecule", + "example" : [ + "Forward" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--soloFeatures", + "description" : "genomic features for which the UMI counts per Cell Barcode are collected\n\n- Gene ... genes: reads match the gene transcript\n- SJ ... splice junctions: reported in SJ.out.tab\n- GeneFull ... full gene (pre-mRNA): count all reads overlapping genes' exons and introns\n- GeneFull_ExonOverIntron ... full gene (pre-mRNA): count all reads overlapping genes' exons and introns: prioritize 100% overlap with exons\n- GeneFull_Ex50pAS ... full gene (pre-RNA): count all reads overlapping genes' exons and introns: prioritize >50% overlap with exons. Do not count reads with 100% exonic overlap in the antisense direction.", + "example" : [ + "Gene" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--soloMultiMappers", + "description" : "counting method for reads mapping to multiple genes\n\n- Unique ... count only reads that map to unique genes\n- Uniform ... uniformly distribute multi-genic UMIs to all genes\n- Rescue ... distribute UMIs proportionally to unique+uniform counts (~ first iteration of EM)\n- PropUnique ... distribute UMIs proportionally to unique mappers, if present, and uniformly if not.\n- EM ... multi-gene UMIs are distributed using Expectation Maximization algorithm", + "example" : [ + "Unique" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--soloUMIdedup", + "description" : "type of UMI deduplication (collapsing) algorithm\n\n- 1MM_All ... all UMIs with 1 mismatch distance to each other are collapsed (i.e. counted once).\n- 1MM_Directional_UMItools ... follows the \\"directional\\" method from the UMI-tools by Smith, Heger and Sudbery (Genome Research 2017).\n- 1MM_Directional ... same as 1MM_Directional_UMItools, but with more stringent criteria for duplicate UMIs\n- Exact ... only exactly matching UMIs are collapsed.\n- NoDedup ... no deduplication of UMIs, count all reads.\n- 1MM_CR ... CellRanger2-4 algorithm for 1MM UMI collapsing.", + "example" : [ + "1MM_All" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--soloUMIfiltering", + "description" : "type of UMI filtering (for reads uniquely mapping to genes)\n\n- - ... basic filtering: remove UMIs with N and homopolymers (similar to CellRanger 2.2.0).\n- MultiGeneUMI ... basic + remove lower-count UMIs that map to more than one gene.\n- MultiGeneUMI_All ... basic + remove all UMIs that map to more than one gene.\n- MultiGeneUMI_CR ... basic + remove lower-count UMIs that map to more than one gene, matching CellRanger > 3.0.0 .\nOnly works with --soloUMIdedup 1MM_CR", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--soloOutFileNames", + "description" : "file names for STARsolo output:\n\nfile_name_prefix gene_names barcode_sequences cell_feature_count_matrix", + "example" : [ + "Solo.out/", + "features.tsv", + "barcodes.tsv", + "matrix.mtx" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--soloCellFilter", + "description" : "cell filtering type and parameters\n\n- None ... do not output filtered cells\n- TopCells ... only report top cells by UMI count, followed by the exact number of cells\n- CellRanger2.2 ... simple filtering of CellRanger 2.2.\nCan be followed by numbers: number of expected cells, robust maximum percentile for UMI count, maximum to minimum ratio for UMI count\nThe harcoded values are from CellRanger: nExpectedCells=3000; maxPercentile=0.99; maxMinRatio=10\n- EmptyDrops_CR ... EmptyDrops filtering in CellRanger flavor. Please cite the original EmptyDrops paper: A.T.L Lun et al, Genome Biology, 20, 63 (2019): https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1662-y\nCan be followed by 10 numeric parameters: nExpectedCells maxPercentile maxMinRatio indMin indMax umiMin umiMinFracMedian candMaxN FDR simN\nThe harcoded values are from CellRanger: 3000 0.99 10 45000 90000 500 0.01 20000 0.01 10000", + "example" : [ + "CellRanger2.2", + "3000", + "0.99", + "10" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--soloOutFormatFeaturesGeneField3", + "description" : "field 3 in the Gene features.tsv file. If \\"-\\", then no 3rd field is output.", + "example" : [ + "Gene Expression" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--soloCellReadStats", + "description" : "Output reads statistics for each CB\n\n- Standard ... standard output", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "../star_align/script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/star_align_v273a/" + }, + { + "type" : "file", + "path" : "src/utils/setup_logger.py", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "description" : "Align fastq files using STAR.", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/star_align_v273a/" + }, + { + "type" : "file", + "path" : "resources_test/cellranger_tiny_fastq", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.10-slim", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "procps" + ], + "interactive" : false + }, + { + "type" : "docker", + "env" : [ + "STAR_VERSION 2.7.3a", + "PACKAGES gcc g++ make wget zlib1g-dev unzip" + ] + }, + { + "type" : "docker", + "run" : [ + "apt-get update && \\\\\n apt-get install -y --no-install-recommends ${PACKAGES} && \\\\\n cd /tmp && \\\\\n wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip && \\\\\n unzip ${STAR_VERSION}.zip && \\\\\n cd STAR-${STAR_VERSION}/source && \\\\\n make STARstatic CXXFLAGS_SIMD=-std=c++11 && \\\\\n cp STAR /usr/local/bin && \\\\\n cd / && \\\\\n rm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip && \\\\\n apt-get --purge autoremove -y ${PACKAGES} && \\\\\n apt-get clean\n" + ] + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "highmem", + "highcpu" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/mapping/star_align_v273a/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/star_align_v273a", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +import re +import tempfile +import subprocess +from pathlib import Path +import tarfile +import gzip +import shutil + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'reference': $( if [ ! -z ${VIASH_PAR_REFERENCE+x} ]; then echo "r'${VIASH_PAR_REFERENCE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'runRNGseed': $( if [ ! -z ${VIASH_PAR_RUNRNGSEED+x} ]; then echo "int(r'${VIASH_PAR_RUNRNGSEED//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'genomeLoad': $( if [ ! -z ${VIASH_PAR_GENOMELOAD+x} ]; then echo "r'${VIASH_PAR_GENOMELOAD//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'genomeFastaFiles': $( if [ ! -z ${VIASH_PAR_GENOMEFASTAFILES+x} ]; then echo "r'${VIASH_PAR_GENOMEFASTAFILES//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'genomeFileSizes': $( if [ ! -z ${VIASH_PAR_GENOMEFILESIZES+x} ]; then echo "list(map(int, r'${VIASH_PAR_GENOMEFILESIZES//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'genomeTransformOutput': $( if [ ! -z ${VIASH_PAR_GENOMETRANSFORMOUTPUT+x} ]; then echo "r'${VIASH_PAR_GENOMETRANSFORMOUTPUT//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'genomeChrSetMitochondrial': $( if [ ! -z ${VIASH_PAR_GENOMECHRSETMITOCHONDRIAL+x} ]; then echo "r'${VIASH_PAR_GENOMECHRSETMITOCHONDRIAL//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'sjdbFileChrStartEnd': $( if [ ! -z ${VIASH_PAR_SJDBFILECHRSTARTEND+x} ]; then echo "r'${VIASH_PAR_SJDBFILECHRSTARTEND//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'sjdbGTFfile': $( if [ ! -z ${VIASH_PAR_SJDBGTFFILE+x} ]; then echo "r'${VIASH_PAR_SJDBGTFFILE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'sjdbGTFchrPrefix': $( if [ ! -z ${VIASH_PAR_SJDBGTFCHRPREFIX+x} ]; then echo "r'${VIASH_PAR_SJDBGTFCHRPREFIX//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'sjdbGTFfeatureExon': $( if [ ! -z ${VIASH_PAR_SJDBGTFFEATUREEXON+x} ]; then echo "r'${VIASH_PAR_SJDBGTFFEATUREEXON//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'sjdbGTFtagExonParentTranscript': $( if [ ! -z ${VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT+x} ]; then echo "r'${VIASH_PAR_SJDBGTFTAGEXONPARENTTRANSCRIPT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'sjdbGTFtagExonParentGene': $( if [ ! -z ${VIASH_PAR_SJDBGTFTAGEXONPARENTGENE+x} ]; then echo "r'${VIASH_PAR_SJDBGTFTAGEXONPARENTGENE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'sjdbGTFtagExonParentGeneName': $( if [ ! -z ${VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME+x} ]; then echo "r'${VIASH_PAR_SJDBGTFTAGEXONPARENTGENENAME//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'sjdbGTFtagExonParentGeneType': $( if [ ! -z ${VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE+x} ]; then echo "r'${VIASH_PAR_SJDBGTFTAGEXONPARENTGENETYPE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'sjdbOverhang': $( if [ ! -z ${VIASH_PAR_SJDBOVERHANG+x} ]; then echo "int(r'${VIASH_PAR_SJDBOVERHANG//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'sjdbScore': $( if [ ! -z ${VIASH_PAR_SJDBSCORE+x} ]; then echo "int(r'${VIASH_PAR_SJDBSCORE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'sjdbInsertSave': $( if [ ! -z ${VIASH_PAR_SJDBINSERTSAVE+x} ]; then echo "r'${VIASH_PAR_SJDBINSERTSAVE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'varVCFfile': $( if [ ! -z ${VIASH_PAR_VARVCFFILE+x} ]; then echo "r'${VIASH_PAR_VARVCFFILE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'readFilesType': $( if [ ! -z ${VIASH_PAR_READFILESTYPE+x} ]; then echo "r'${VIASH_PAR_READFILESTYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'readFilesSAMattrKeep': $( if [ ! -z ${VIASH_PAR_READFILESSAMATTRKEEP+x} ]; then echo "r'${VIASH_PAR_READFILESSAMATTRKEEP//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'readFilesManifest': $( if [ ! -z ${VIASH_PAR_READFILESMANIFEST+x} ]; then echo "r'${VIASH_PAR_READFILESMANIFEST//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'readFilesPrefix': $( if [ ! -z ${VIASH_PAR_READFILESPREFIX+x} ]; then echo "r'${VIASH_PAR_READFILESPREFIX//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'readFilesCommand': $( if [ ! -z ${VIASH_PAR_READFILESCOMMAND+x} ]; then echo "r'${VIASH_PAR_READFILESCOMMAND//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'readMapNumber': $( if [ ! -z ${VIASH_PAR_READMAPNUMBER+x} ]; then echo "int(r'${VIASH_PAR_READMAPNUMBER//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'readMatesLengthsIn': $( if [ ! -z ${VIASH_PAR_READMATESLENGTHSIN+x} ]; then echo "r'${VIASH_PAR_READMATESLENGTHSIN//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'readNameSeparator': $( if [ ! -z ${VIASH_PAR_READNAMESEPARATOR+x} ]; then echo "r'${VIASH_PAR_READNAMESEPARATOR//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'readQualityScoreBase': $( if [ ! -z ${VIASH_PAR_READQUALITYSCOREBASE+x} ]; then echo "int(r'${VIASH_PAR_READQUALITYSCOREBASE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'clipAdapterType': $( if [ ! -z ${VIASH_PAR_CLIPADAPTERTYPE+x} ]; then echo "r'${VIASH_PAR_CLIPADAPTERTYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'clip3pNbases': $( if [ ! -z ${VIASH_PAR_CLIP3PNBASES+x} ]; then echo "list(map(int, r'${VIASH_PAR_CLIP3PNBASES//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'clip3pAdapterSeq': $( if [ ! -z ${VIASH_PAR_CLIP3PADAPTERSEQ+x} ]; then echo "r'${VIASH_PAR_CLIP3PADAPTERSEQ//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'clip3pAdapterMMp': $( if [ ! -z ${VIASH_PAR_CLIP3PADAPTERMMP+x} ]; then echo "list(map(float, r'${VIASH_PAR_CLIP3PADAPTERMMP//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'clip3pAfterAdapterNbases': $( if [ ! -z ${VIASH_PAR_CLIP3PAFTERADAPTERNBASES+x} ]; then echo "list(map(int, r'${VIASH_PAR_CLIP3PAFTERADAPTERNBASES//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'clip5pNbases': $( if [ ! -z ${VIASH_PAR_CLIP5PNBASES+x} ]; then echo "list(map(int, r'${VIASH_PAR_CLIP5PNBASES//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'limitGenomeGenerateRAM': $( if [ ! -z ${VIASH_PAR_LIMITGENOMEGENERATERAM+x} ]; then echo "int(r'${VIASH_PAR_LIMITGENOMEGENERATERAM//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'limitIObufferSize': $( if [ ! -z ${VIASH_PAR_LIMITIOBUFFERSIZE+x} ]; then echo "list(map(int, r'${VIASH_PAR_LIMITIOBUFFERSIZE//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'limitOutSAMoneReadBytes': $( if [ ! -z ${VIASH_PAR_LIMITOUTSAMONEREADBYTES+x} ]; then echo "int(r'${VIASH_PAR_LIMITOUTSAMONEREADBYTES//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'limitOutSJoneRead': $( if [ ! -z ${VIASH_PAR_LIMITOUTSJONEREAD+x} ]; then echo "int(r'${VIASH_PAR_LIMITOUTSJONEREAD//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'limitOutSJcollapsed': $( if [ ! -z ${VIASH_PAR_LIMITOUTSJCOLLAPSED+x} ]; then echo "int(r'${VIASH_PAR_LIMITOUTSJCOLLAPSED//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'limitBAMsortRAM': $( if [ ! -z ${VIASH_PAR_LIMITBAMSORTRAM+x} ]; then echo "int(r'${VIASH_PAR_LIMITBAMSORTRAM//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'limitSjdbInsertNsj': $( if [ ! -z ${VIASH_PAR_LIMITSJDBINSERTNSJ+x} ]; then echo "int(r'${VIASH_PAR_LIMITSJDBINSERTNSJ//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'limitNreadsSoft': $( if [ ! -z ${VIASH_PAR_LIMITNREADSSOFT+x} ]; then echo "int(r'${VIASH_PAR_LIMITNREADSSOFT//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outTmpKeep': $( if [ ! -z ${VIASH_PAR_OUTTMPKEEP+x} ]; then echo "r'${VIASH_PAR_OUTTMPKEEP//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outStd': $( if [ ! -z ${VIASH_PAR_OUTSTD+x} ]; then echo "r'${VIASH_PAR_OUTSTD//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outReadsUnmapped': $( if [ ! -z ${VIASH_PAR_OUTREADSUNMAPPED+x} ]; then echo "r'${VIASH_PAR_OUTREADSUNMAPPED//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outQSconversionAdd': $( if [ ! -z ${VIASH_PAR_OUTQSCONVERSIONADD+x} ]; then echo "int(r'${VIASH_PAR_OUTQSCONVERSIONADD//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outMultimapperOrder': $( if [ ! -z ${VIASH_PAR_OUTMULTIMAPPERORDER+x} ]; then echo "r'${VIASH_PAR_OUTMULTIMAPPERORDER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outSAMtype': $( if [ ! -z ${VIASH_PAR_OUTSAMTYPE+x} ]; then echo "r'${VIASH_PAR_OUTSAMTYPE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'outSAMmode': $( if [ ! -z ${VIASH_PAR_OUTSAMMODE+x} ]; then echo "r'${VIASH_PAR_OUTSAMMODE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outSAMstrandField': $( if [ ! -z ${VIASH_PAR_OUTSAMSTRANDFIELD+x} ]; then echo "r'${VIASH_PAR_OUTSAMSTRANDFIELD//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outSAMattributes': $( if [ ! -z ${VIASH_PAR_OUTSAMATTRIBUTES+x} ]; then echo "r'${VIASH_PAR_OUTSAMATTRIBUTES//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'outSAMattrIHstart': $( if [ ! -z ${VIASH_PAR_OUTSAMATTRIHSTART+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMATTRIHSTART//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outSAMunmapped': $( if [ ! -z ${VIASH_PAR_OUTSAMUNMAPPED+x} ]; then echo "r'${VIASH_PAR_OUTSAMUNMAPPED//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'outSAMorder': $( if [ ! -z ${VIASH_PAR_OUTSAMORDER+x} ]; then echo "r'${VIASH_PAR_OUTSAMORDER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outSAMprimaryFlag': $( if [ ! -z ${VIASH_PAR_OUTSAMPRIMARYFLAG+x} ]; then echo "r'${VIASH_PAR_OUTSAMPRIMARYFLAG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outSAMreadID': $( if [ ! -z ${VIASH_PAR_OUTSAMREADID+x} ]; then echo "r'${VIASH_PAR_OUTSAMREADID//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outSAMmapqUnique': $( if [ ! -z ${VIASH_PAR_OUTSAMMAPQUNIQUE+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMMAPQUNIQUE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outSAMflagOR': $( if [ ! -z ${VIASH_PAR_OUTSAMFLAGOR+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMFLAGOR//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outSAMflagAND': $( if [ ! -z ${VIASH_PAR_OUTSAMFLAGAND+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMFLAGAND//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outSAMattrRGline': $( if [ ! -z ${VIASH_PAR_OUTSAMATTRRGLINE+x} ]; then echo "r'${VIASH_PAR_OUTSAMATTRRGLINE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'outSAMheaderHD': $( if [ ! -z ${VIASH_PAR_OUTSAMHEADERHD+x} ]; then echo "r'${VIASH_PAR_OUTSAMHEADERHD//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'outSAMheaderPG': $( if [ ! -z ${VIASH_PAR_OUTSAMHEADERPG+x} ]; then echo "r'${VIASH_PAR_OUTSAMHEADERPG//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'outSAMheaderCommentFile': $( if [ ! -z ${VIASH_PAR_OUTSAMHEADERCOMMENTFILE+x} ]; then echo "r'${VIASH_PAR_OUTSAMHEADERCOMMENTFILE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outSAMfilter': $( if [ ! -z ${VIASH_PAR_OUTSAMFILTER+x} ]; then echo "r'${VIASH_PAR_OUTSAMFILTER//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'outSAMmultNmax': $( if [ ! -z ${VIASH_PAR_OUTSAMMULTNMAX+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMMULTNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outSAMtlen': $( if [ ! -z ${VIASH_PAR_OUTSAMTLEN+x} ]; then echo "int(r'${VIASH_PAR_OUTSAMTLEN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outBAMcompression': $( if [ ! -z ${VIASH_PAR_OUTBAMCOMPRESSION+x} ]; then echo "int(r'${VIASH_PAR_OUTBAMCOMPRESSION//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outBAMsortingThreadN': $( if [ ! -z ${VIASH_PAR_OUTBAMSORTINGTHREADN+x} ]; then echo "int(r'${VIASH_PAR_OUTBAMSORTINGTHREADN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outBAMsortingBinsN': $( if [ ! -z ${VIASH_PAR_OUTBAMSORTINGBINSN+x} ]; then echo "int(r'${VIASH_PAR_OUTBAMSORTINGBINSN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'bamRemoveDuplicatesType': $( if [ ! -z ${VIASH_PAR_BAMREMOVEDUPLICATESTYPE+x} ]; then echo "r'${VIASH_PAR_BAMREMOVEDUPLICATESTYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'bamRemoveDuplicatesMate2basesN': $( if [ ! -z ${VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN+x} ]; then echo "int(r'${VIASH_PAR_BAMREMOVEDUPLICATESMATE2BASESN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outWigType': $( if [ ! -z ${VIASH_PAR_OUTWIGTYPE+x} ]; then echo "r'${VIASH_PAR_OUTWIGTYPE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'outWigStrand': $( if [ ! -z ${VIASH_PAR_OUTWIGSTRAND+x} ]; then echo "r'${VIASH_PAR_OUTWIGSTRAND//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outWigReferencesPrefix': $( if [ ! -z ${VIASH_PAR_OUTWIGREFERENCESPREFIX+x} ]; then echo "r'${VIASH_PAR_OUTWIGREFERENCESPREFIX//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outWigNorm': $( if [ ! -z ${VIASH_PAR_OUTWIGNORM+x} ]; then echo "r'${VIASH_PAR_OUTWIGNORM//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outFilterType': $( if [ ! -z ${VIASH_PAR_OUTFILTERTYPE+x} ]; then echo "r'${VIASH_PAR_OUTFILTERTYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outFilterMultimapScoreRange': $( if [ ! -z ${VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERMULTIMAPSCORERANGE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outFilterMultimapNmax': $( if [ ! -z ${VIASH_PAR_OUTFILTERMULTIMAPNMAX+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERMULTIMAPNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outFilterMismatchNmax': $( if [ ! -z ${VIASH_PAR_OUTFILTERMISMATCHNMAX+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERMISMATCHNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outFilterMismatchNoverLmax': $( if [ ! -z ${VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX+x} ]; then echo "float(r'${VIASH_PAR_OUTFILTERMISMATCHNOVERLMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outFilterMismatchNoverReadLmax': $( if [ ! -z ${VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX+x} ]; then echo "float(r'${VIASH_PAR_OUTFILTERMISMATCHNOVERREADLMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outFilterScoreMin': $( if [ ! -z ${VIASH_PAR_OUTFILTERSCOREMIN+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERSCOREMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outFilterScoreMinOverLread': $( if [ ! -z ${VIASH_PAR_OUTFILTERSCOREMINOVERLREAD+x} ]; then echo "float(r'${VIASH_PAR_OUTFILTERSCOREMINOVERLREAD//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outFilterMatchNmin': $( if [ ! -z ${VIASH_PAR_OUTFILTERMATCHNMIN+x} ]; then echo "int(r'${VIASH_PAR_OUTFILTERMATCHNMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outFilterMatchNminOverLread': $( if [ ! -z ${VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD+x} ]; then echo "float(r'${VIASH_PAR_OUTFILTERMATCHNMINOVERLREAD//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'outFilterIntronMotifs': $( if [ ! -z ${VIASH_PAR_OUTFILTERINTRONMOTIFS+x} ]; then echo "r'${VIASH_PAR_OUTFILTERINTRONMOTIFS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outFilterIntronStrands': $( if [ ! -z ${VIASH_PAR_OUTFILTERINTRONSTRANDS+x} ]; then echo "r'${VIASH_PAR_OUTFILTERINTRONSTRANDS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outSJtype': $( if [ ! -z ${VIASH_PAR_OUTSJTYPE+x} ]; then echo "r'${VIASH_PAR_OUTSJTYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outSJfilterReads': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERREADS+x} ]; then echo "r'${VIASH_PAR_OUTSJFILTERREADS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'outSJfilterOverhangMin': $( if [ ! -z ${VIASH_PAR_OUTSJFILTEROVERHANGMIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTEROVERHANGMIN//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'outSJfilterCountUniqueMin': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTERCOUNTUNIQUEMIN//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'outSJfilterCountTotalMin': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTERCOUNTTOTALMIN//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'outSJfilterDistToOtherSJmin': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTERDISTTOOTHERSJMIN//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'outSJfilterIntronMaxVsReadN': $( if [ ! -z ${VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN+x} ]; then echo "list(map(int, r'${VIASH_PAR_OUTSJFILTERINTRONMAXVSREADN//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'scoreGap': $( if [ ! -z ${VIASH_PAR_SCOREGAP+x} ]; then echo "int(r'${VIASH_PAR_SCOREGAP//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'scoreGapNoncan': $( if [ ! -z ${VIASH_PAR_SCOREGAPNONCAN+x} ]; then echo "int(r'${VIASH_PAR_SCOREGAPNONCAN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'scoreGapGCAG': $( if [ ! -z ${VIASH_PAR_SCOREGAPGCAG+x} ]; then echo "int(r'${VIASH_PAR_SCOREGAPGCAG//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'scoreGapATAC': $( if [ ! -z ${VIASH_PAR_SCOREGAPATAC+x} ]; then echo "int(r'${VIASH_PAR_SCOREGAPATAC//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'scoreGenomicLengthLog2scale': $( if [ ! -z ${VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE+x} ]; then echo "int(r'${VIASH_PAR_SCOREGENOMICLENGTHLOG2SCALE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'scoreDelOpen': $( if [ ! -z ${VIASH_PAR_SCOREDELOPEN+x} ]; then echo "int(r'${VIASH_PAR_SCOREDELOPEN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'scoreDelBase': $( if [ ! -z ${VIASH_PAR_SCOREDELBASE+x} ]; then echo "int(r'${VIASH_PAR_SCOREDELBASE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'scoreInsOpen': $( if [ ! -z ${VIASH_PAR_SCOREINSOPEN+x} ]; then echo "int(r'${VIASH_PAR_SCOREINSOPEN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'scoreInsBase': $( if [ ! -z ${VIASH_PAR_SCOREINSBASE+x} ]; then echo "int(r'${VIASH_PAR_SCOREINSBASE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'scoreStitchSJshift': $( if [ ! -z ${VIASH_PAR_SCORESTITCHSJSHIFT+x} ]; then echo "int(r'${VIASH_PAR_SCORESTITCHSJSHIFT//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'seedSearchStartLmax': $( if [ ! -z ${VIASH_PAR_SEEDSEARCHSTARTLMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDSEARCHSTARTLMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'seedSearchStartLmaxOverLread': $( if [ ! -z ${VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD+x} ]; then echo "float(r'${VIASH_PAR_SEEDSEARCHSTARTLMAXOVERLREAD//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'seedSearchLmax': $( if [ ! -z ${VIASH_PAR_SEEDSEARCHLMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDSEARCHLMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'seedMultimapNmax': $( if [ ! -z ${VIASH_PAR_SEEDMULTIMAPNMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDMULTIMAPNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'seedPerReadNmax': $( if [ ! -z ${VIASH_PAR_SEEDPERREADNMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDPERREADNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'seedPerWindowNmax': $( if [ ! -z ${VIASH_PAR_SEEDPERWINDOWNMAX+x} ]; then echo "int(r'${VIASH_PAR_SEEDPERWINDOWNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'seedNoneLociPerWindow': $( if [ ! -z ${VIASH_PAR_SEEDNONELOCIPERWINDOW+x} ]; then echo "int(r'${VIASH_PAR_SEEDNONELOCIPERWINDOW//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'seedSplitMin': $( if [ ! -z ${VIASH_PAR_SEEDSPLITMIN+x} ]; then echo "int(r'${VIASH_PAR_SEEDSPLITMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'seedMapMin': $( if [ ! -z ${VIASH_PAR_SEEDMAPMIN+x} ]; then echo "int(r'${VIASH_PAR_SEEDMAPMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'alignIntronMin': $( if [ ! -z ${VIASH_PAR_ALIGNINTRONMIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGNINTRONMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'alignIntronMax': $( if [ ! -z ${VIASH_PAR_ALIGNINTRONMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNINTRONMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'alignMatesGapMax': $( if [ ! -z ${VIASH_PAR_ALIGNMATESGAPMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNMATESGAPMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'alignSJoverhangMin': $( if [ ! -z ${VIASH_PAR_ALIGNSJOVERHANGMIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGNSJOVERHANGMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'alignSJstitchMismatchNmax': $( if [ ! -z ${VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX+x} ]; then echo "list(map(int, r'${VIASH_PAR_ALIGNSJSTITCHMISMATCHNMAX//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), + 'alignSJDBoverhangMin': $( if [ ! -z ${VIASH_PAR_ALIGNSJDBOVERHANGMIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGNSJDBOVERHANGMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'alignSplicedMateMapLmin': $( if [ ! -z ${VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN+x} ]; then echo "int(r'${VIASH_PAR_ALIGNSPLICEDMATEMAPLMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'alignSplicedMateMapLminOverLmate': $( if [ ! -z ${VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE+x} ]; then echo "float(r'${VIASH_PAR_ALIGNSPLICEDMATEMAPLMINOVERLMATE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'alignWindowsPerReadNmax': $( if [ ! -z ${VIASH_PAR_ALIGNWINDOWSPERREADNMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNWINDOWSPERREADNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'alignTranscriptsPerWindowNmax': $( if [ ! -z ${VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNTRANSCRIPTSPERWINDOWNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'alignTranscriptsPerReadNmax': $( if [ ! -z ${VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX+x} ]; then echo "int(r'${VIASH_PAR_ALIGNTRANSCRIPTSPERREADNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'alignEndsType': $( if [ ! -z ${VIASH_PAR_ALIGNENDSTYPE+x} ]; then echo "r'${VIASH_PAR_ALIGNENDSTYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'alignEndsProtrude': $( if [ ! -z ${VIASH_PAR_ALIGNENDSPROTRUDE+x} ]; then echo "r'${VIASH_PAR_ALIGNENDSPROTRUDE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'alignSoftClipAtReferenceEnds': $( if [ ! -z ${VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS+x} ]; then echo "r'${VIASH_PAR_ALIGNSOFTCLIPATREFERENCEENDS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'alignInsertionFlush': $( if [ ! -z ${VIASH_PAR_ALIGNINSERTIONFLUSH+x} ]; then echo "r'${VIASH_PAR_ALIGNINSERTIONFLUSH//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'peOverlapNbasesMin': $( if [ ! -z ${VIASH_PAR_PEOVERLAPNBASESMIN+x} ]; then echo "int(r'${VIASH_PAR_PEOVERLAPNBASESMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'peOverlapMMp': $( if [ ! -z ${VIASH_PAR_PEOVERLAPMMP+x} ]; then echo "float(r'${VIASH_PAR_PEOVERLAPMMP//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'winAnchorMultimapNmax': $( if [ ! -z ${VIASH_PAR_WINANCHORMULTIMAPNMAX+x} ]; then echo "int(r'${VIASH_PAR_WINANCHORMULTIMAPNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'winBinNbits': $( if [ ! -z ${VIASH_PAR_WINBINNBITS+x} ]; then echo "int(r'${VIASH_PAR_WINBINNBITS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'winAnchorDistNbins': $( if [ ! -z ${VIASH_PAR_WINANCHORDISTNBINS+x} ]; then echo "int(r'${VIASH_PAR_WINANCHORDISTNBINS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'winFlankNbins': $( if [ ! -z ${VIASH_PAR_WINFLANKNBINS+x} ]; then echo "int(r'${VIASH_PAR_WINFLANKNBINS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'winReadCoverageRelativeMin': $( if [ ! -z ${VIASH_PAR_WINREADCOVERAGERELATIVEMIN+x} ]; then echo "float(r'${VIASH_PAR_WINREADCOVERAGERELATIVEMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'winReadCoverageBasesMin': $( if [ ! -z ${VIASH_PAR_WINREADCOVERAGEBASESMIN+x} ]; then echo "int(r'${VIASH_PAR_WINREADCOVERAGEBASESMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chimOutType': $( if [ ! -z ${VIASH_PAR_CHIMOUTTYPE+x} ]; then echo "r'${VIASH_PAR_CHIMOUTTYPE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'chimSegmentMin': $( if [ ! -z ${VIASH_PAR_CHIMSEGMENTMIN+x} ]; then echo "int(r'${VIASH_PAR_CHIMSEGMENTMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chimScoreMin': $( if [ ! -z ${VIASH_PAR_CHIMSCOREMIN+x} ]; then echo "int(r'${VIASH_PAR_CHIMSCOREMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chimScoreDropMax': $( if [ ! -z ${VIASH_PAR_CHIMSCOREDROPMAX+x} ]; then echo "int(r'${VIASH_PAR_CHIMSCOREDROPMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chimScoreSeparation': $( if [ ! -z ${VIASH_PAR_CHIMSCORESEPARATION+x} ]; then echo "int(r'${VIASH_PAR_CHIMSCORESEPARATION//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chimScoreJunctionNonGTAG': $( if [ ! -z ${VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG+x} ]; then echo "int(r'${VIASH_PAR_CHIMSCOREJUNCTIONNONGTAG//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chimJunctionOverhangMin': $( if [ ! -z ${VIASH_PAR_CHIMJUNCTIONOVERHANGMIN+x} ]; then echo "int(r'${VIASH_PAR_CHIMJUNCTIONOVERHANGMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chimSegmentReadGapMax': $( if [ ! -z ${VIASH_PAR_CHIMSEGMENTREADGAPMAX+x} ]; then echo "int(r'${VIASH_PAR_CHIMSEGMENTREADGAPMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chimFilter': $( if [ ! -z ${VIASH_PAR_CHIMFILTER+x} ]; then echo "r'${VIASH_PAR_CHIMFILTER//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'chimMainSegmentMultNmax': $( if [ ! -z ${VIASH_PAR_CHIMMAINSEGMENTMULTNMAX+x} ]; then echo "int(r'${VIASH_PAR_CHIMMAINSEGMENTMULTNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chimMultimapNmax': $( if [ ! -z ${VIASH_PAR_CHIMMULTIMAPNMAX+x} ]; then echo "int(r'${VIASH_PAR_CHIMMULTIMAPNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chimMultimapScoreRange': $( if [ ! -z ${VIASH_PAR_CHIMMULTIMAPSCORERANGE+x} ]; then echo "int(r'${VIASH_PAR_CHIMMULTIMAPSCORERANGE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chimNonchimScoreDropMin': $( if [ ! -z ${VIASH_PAR_CHIMNONCHIMSCOREDROPMIN+x} ]; then echo "int(r'${VIASH_PAR_CHIMNONCHIMSCOREDROPMIN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'chimOutJunctionFormat': $( if [ ! -z ${VIASH_PAR_CHIMOUTJUNCTIONFORMAT+x} ]; then echo "int(r'${VIASH_PAR_CHIMOUTJUNCTIONFORMAT//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'quantMode': $( if [ ! -z ${VIASH_PAR_QUANTMODE+x} ]; then echo "r'${VIASH_PAR_QUANTMODE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'quantTranscriptomeBAMcompression': $( if [ ! -z ${VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION+x} ]; then echo "int(r'${VIASH_PAR_QUANTTRANSCRIPTOMEBAMCOMPRESSION//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'quantTranscriptomeBan': $( if [ ! -z ${VIASH_PAR_QUANTTRANSCRIPTOMEBAN+x} ]; then echo "r'${VIASH_PAR_QUANTTRANSCRIPTOMEBAN//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'twopassMode': $( if [ ! -z ${VIASH_PAR_TWOPASSMODE+x} ]; then echo "r'${VIASH_PAR_TWOPASSMODE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'twopass1readsN': $( if [ ! -z ${VIASH_PAR_TWOPASS1READSN+x} ]; then echo "int(r'${VIASH_PAR_TWOPASS1READSN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'waspOutputMode': $( if [ ! -z ${VIASH_PAR_WASPOUTPUTMODE+x} ]; then echo "r'${VIASH_PAR_WASPOUTPUTMODE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'soloType': $( if [ ! -z ${VIASH_PAR_SOLOTYPE+x} ]; then echo "r'${VIASH_PAR_SOLOTYPE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'soloCBwhitelist': $( if [ ! -z ${VIASH_PAR_SOLOCBWHITELIST+x} ]; then echo "r'${VIASH_PAR_SOLOCBWHITELIST//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'soloCBstart': $( if [ ! -z ${VIASH_PAR_SOLOCBSTART+x} ]; then echo "int(r'${VIASH_PAR_SOLOCBSTART//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'soloCBlen': $( if [ ! -z ${VIASH_PAR_SOLOCBLEN+x} ]; then echo "int(r'${VIASH_PAR_SOLOCBLEN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'soloUMIstart': $( if [ ! -z ${VIASH_PAR_SOLOUMISTART+x} ]; then echo "int(r'${VIASH_PAR_SOLOUMISTART//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'soloUMIlen': $( if [ ! -z ${VIASH_PAR_SOLOUMILEN+x} ]; then echo "int(r'${VIASH_PAR_SOLOUMILEN//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'soloBarcodeReadLength': $( if [ ! -z ${VIASH_PAR_SOLOBARCODEREADLENGTH+x} ]; then echo "int(r'${VIASH_PAR_SOLOBARCODEREADLENGTH//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'soloBarcodeMate': $( if [ ! -z ${VIASH_PAR_SOLOBARCODEMATE+x} ]; then echo "int(r'${VIASH_PAR_SOLOBARCODEMATE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'soloCBposition': $( if [ ! -z ${VIASH_PAR_SOLOCBPOSITION+x} ]; then echo "r'${VIASH_PAR_SOLOCBPOSITION//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'soloUMIposition': $( if [ ! -z ${VIASH_PAR_SOLOUMIPOSITION+x} ]; then echo "r'${VIASH_PAR_SOLOUMIPOSITION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'soloAdapterSequence': $( if [ ! -z ${VIASH_PAR_SOLOADAPTERSEQUENCE+x} ]; then echo "r'${VIASH_PAR_SOLOADAPTERSEQUENCE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'soloAdapterMismatchesNmax': $( if [ ! -z ${VIASH_PAR_SOLOADAPTERMISMATCHESNMAX+x} ]; then echo "int(r'${VIASH_PAR_SOLOADAPTERMISMATCHESNMAX//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'soloCBmatchWLtype': $( if [ ! -z ${VIASH_PAR_SOLOCBMATCHWLTYPE+x} ]; then echo "r'${VIASH_PAR_SOLOCBMATCHWLTYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'soloInputSAMattrBarcodeSeq': $( if [ ! -z ${VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ+x} ]; then echo "r'${VIASH_PAR_SOLOINPUTSAMATTRBARCODESEQ//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'soloInputSAMattrBarcodeQual': $( if [ ! -z ${VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL+x} ]; then echo "r'${VIASH_PAR_SOLOINPUTSAMATTRBARCODEQUAL//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'soloStrand': $( if [ ! -z ${VIASH_PAR_SOLOSTRAND+x} ]; then echo "r'${VIASH_PAR_SOLOSTRAND//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'soloFeatures': $( if [ ! -z ${VIASH_PAR_SOLOFEATURES+x} ]; then echo "r'${VIASH_PAR_SOLOFEATURES//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'soloMultiMappers': $( if [ ! -z ${VIASH_PAR_SOLOMULTIMAPPERS+x} ]; then echo "r'${VIASH_PAR_SOLOMULTIMAPPERS//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'soloUMIdedup': $( if [ ! -z ${VIASH_PAR_SOLOUMIDEDUP+x} ]; then echo "r'${VIASH_PAR_SOLOUMIDEDUP//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'soloUMIfiltering': $( if [ ! -z ${VIASH_PAR_SOLOUMIFILTERING+x} ]; then echo "r'${VIASH_PAR_SOLOUMIFILTERING//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'soloOutFileNames': $( if [ ! -z ${VIASH_PAR_SOLOOUTFILENAMES+x} ]; then echo "r'${VIASH_PAR_SOLOOUTFILENAMES//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'soloCellFilter': $( if [ ! -z ${VIASH_PAR_SOLOCELLFILTER+x} ]; then echo "r'${VIASH_PAR_SOLOCELLFILTER//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'soloOutFormatFeaturesGeneField3': $( if [ ! -z ${VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3+x} ]; then echo "r'${VIASH_PAR_SOLOOUTFORMATFEATURESGENEFIELD3//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), + 'soloCellReadStats': $( if [ ! -z ${VIASH_PAR_SOLOCELLREADSTATS+x} ]; then echo "r'${VIASH_PAR_SOLOCELLREADSTATS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +## VIASH END + +######################## +### Helper functions ### +######################## + +# regex for matching R[12] fastq(gz) files +# examples: +# - TSP10_Fat_MAT_SS2_B134171_B115063_Immune_A1_L003_R1.fastq.gz +# - tinygex_S1_L001_I1_001.fastq.gz +fastqgz_regex = r'(.+)_(R\\\\d+)(_\\\\d+)?\\\\.fastq(\\\\.gz)?' + +# helper function for cheching whether something is a gzip +def is_gz_file(path: Path) -> bool: + with open(path, 'rb') as file: + return file.read(2) == b'\\\\x1f\\\\x8b' + +# look for fastq files in a directory +def search_fastqs(path: Path) -> list[Path]: + if path.is_dir(): + print(f"Input '{path}' is a directory, traversing to see if we can detect any FASTQ files.", flush=True) + value_paths = [file for file in path.iterdir() if re.match(fastqgz_regex, file.name) ] + return value_paths + else: + return [path] + +# if {par_value} is a Path, extract it to a temp_dir_path and return the resulting path +def extract_if_need_be(par_value: Path, temp_dir_path: Path) -> Path: + + if par_value.is_file() and tarfile.is_tarfile(par_value): + # Remove two extensions (if they exist) + extaction_dir_name = Path(par_value.stem).stem + unpacked_path = temp_dir_path / extaction_dir_name + print(f' Tar detected; extracting {par_value} to {unpacked_path}', flush=True) + + with tarfile.open(par_value, 'r') as open_tar: + members = open_tar.getmembers() + root_dirs = [member + for member in members + if member.isdir() and member.name != '.' and '/' not in member.name] + # if there is only one root_dir (and there are files in that directory) + # strip that directory name from the destination folder + if len(root_dirs) == 1: + for mem in members: + mem.path = Path(*Path(mem.path).parts[1:]) + members_to_move = [mem for mem in members if mem.path != Path('.')] + open_tar.extractall(unpacked_path, members=members_to_move) + return unpacked_path + + elif par_value.is_file() and is_gz_file(par_value): + # Remove extension (if it exists) + extaction_file_name = Path(par_value.stem) + unpacked_path = temp_dir_path / extaction_file_name + print(f' Gzip detected; extracting {par_value} to {unpacked_path}', flush=True) + + with gzip.open(par_value, 'rb') as f_in: + with open(unpacked_path, 'wb') as f_out: + shutil.copyfileobj(f_in, f_out) + return unpacked_path + + else: + return par_value + +######################## +### Main code ### +######################## + +# rename keys and convert path strings to Path +# note: only list file arguments here. if non-file arguments also need to be renamed, +# the \\`processPar()\\` generator needs to be adapted +to_rename = {'input': 'readFilesIn', 'reference': 'genomeDir', 'output': 'outFileNamePrefix'} + +def process_par(orig_par, to_rename): + for key, value in orig_par.items(): + # rename the key in par based on the \\`to_rename\\` dict + if key in to_rename.keys(): + new_key = to_rename[key] + + # also turn value into a Path + if isinstance(value, list): + new_value = [Path(val) for val in value] + else: + new_value = Path(value) + else: + new_key = key + new_value = value + yield new_key, new_value +par = dict(process_par(par, to_rename)) + +# create output dir if need be +par["outFileNamePrefix"].mkdir(parents=True, exist_ok=True) + +with tempfile.TemporaryDirectory(prefix="star-", dir=meta["temp_dir"], ignore_cleanup_errors=True) as temp_dir: + print(">> Check whether input files are directories", flush=True) + new_read_files_in = [] + for path in par["readFilesIn"]: + new_read_files_in.extend(search_fastqs(path)) + par["readFilesIn"] = new_read_files_in + print("", flush=True) + + # checking for compressed files, ungzip files if need be + temp_dir_path = Path(temp_dir) + for par_name in ["genomeDir", "readFilesIn"]: + par_values = par[par_name] + if par_values: + # turn value into list + is_multiple = isinstance(par_values, list) + if not is_multiple: + par_values = [ par_values ] + + # output list + new_values = [] + for par_value in par_values: + print(f'>> Check compression of --{par_name} with value: {par_value}', flush=True) + new_value = extract_if_need_be(par_value, temp_dir_path) + new_values.append(new_value) + + # unlist if need be + if not is_multiple: + new_values = new_values[0] + + # replace value + par[par_name] = new_values + # end ungzipping + print("", flush=True) + + print("Grouping R1/R2 input files into pairs", flush=True) + input_grouped = {} + for path in par['readFilesIn']: + key = re.search(fastqgz_regex, path.name).group(2) + if key not in input_grouped: + input_grouped[key] = [] + input_grouped[key].append(str(path)) + par['readFilesIn'] = [ ','.join(val) for val in input_grouped.values() ] + print("", flush=True) + + print(">> Constructing command", flush=True) + par["runMode"] = "alignReads" + par["outTmpDir"] = temp_dir_path / "run" + if 'cpus' in meta and meta['cpus']: + par["runThreadN"] = meta["cpus"] + # make sure there is a trailing / + par["outFileNamePrefix"] = f"{par['outFileNamePrefix']}/" + + cmd_args = [ "STAR" ] + for name, value in par.items(): + if value is not None: + if isinstance(value, list): + cmd_args.extend(["--" + name] + [str(x) for x in value]) + else: + cmd_args.extend(["--" + name, str(value)]) + print("", flush=True) + + print(">> Running STAR with command:", flush=True) + print("+ " + ' '.join([str(x) for x in cmd_args]), flush=True) + print("", flush=True) + + subprocess.run( + cmd_args, + check=True + ) +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/mapping_star_align_v273a", + "tag" : "0.12.0" + }, + "label" : [ + "highmem", + "highcpu" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/mapping/star_align_v273a/nextflow.config b/target/nextflow/mapping/star_align_v273a/nextflow.config new file mode 100644 index 00000000000..bc4302335ff --- /dev/null +++ b/target/nextflow/mapping/star_align_v273a/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'star_align_v273a' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Align fastq files using STAR.' + author = 'Angela Oliveira Pisco, Robrecht Cannoodt' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/mapping/star_align_v273a/nextflow_params.yaml b/target/nextflow/mapping/star_align_v273a/nextflow_params.yaml new file mode 100644 index 00000000000..7c77e19eb4a --- /dev/null +++ b/target/nextflow/mapping/star_align_v273a/nextflow_params.yaml @@ -0,0 +1,8 @@ +# Input/Output +input: # please fill in - example: ["mysample_S1_L001_R1_001.fastq.gz", "mysample_S1_L001_R2_001.fastq.gz"] +reference: # please fill in - example: "/path/to/reference" +# output: "$id.$key.output.output" + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/mapping/star_align_v273a/nextflow_schema.json b/target/nextflow/mapping/star_align_v273a/nextflow_schema.json new file mode 100644 index 00000000000..1997c0616a5 --- /dev/null +++ b/target/nextflow/mapping/star_align_v273a/nextflow_schema.json @@ -0,0 +1,91 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "star_align_v273a", +"description": "Align fastq files using STAR.", +"type": "object", +"definitions": { + + + + "input/output" : { + "title": "Input/Output", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: List of `file`, required, example: `mysample_S1_L001_R1_001.fastq.gz;mysample_S1_L001_R2_001.fastq.gz`, multiple_sep: `\";\"`. The FASTQ files to be analyzed", + "help_text": "Type: List of `file`, required, example: `mysample_S1_L001_R1_001.fastq.gz;mysample_S1_L001_R2_001.fastq.gz`, multiple_sep: `\";\"`. The FASTQ files to be analyzed. Corresponds to the --readFilesIn in the STAR command." + + } + + + , + "reference": { + "type": + "string", + "description": "Type: `file`, required, example: `/path/to/reference`. Path to the reference built by star_build_reference", + "help_text": "Type: `file`, required, example: `/path/to/reference`. Path to the reference built by star_build_reference. Corresponds to the --genomeDir in the STAR command." + + } + + + , + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.output`, example: `/path/to/foo`. Path to output directory", + "help_text": "Type: `file`, required, default: `$id.$key.output.output`, example: `/path/to/foo`. Path to output directory. Corresponds to the --outFileNamePrefix in the STAR command." + , + "default": "$id.$key.output.output" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input/output" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/mapping/star_align_v273a/setup_logger.py b/target/nextflow/mapping/star_align_v273a/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/nextflow/mapping/star_align_v273a/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/nextflow/mapping/star_build_reference/.config.vsh.yaml b/target/nextflow/mapping/star_build_reference/.config.vsh.yaml new file mode 100644 index 00000000000..7b3b6c9ad69 --- /dev/null +++ b/target/nextflow/mapping/star_build_reference/.config.vsh.yaml @@ -0,0 +1,190 @@ +functionality: + name: "star_build_reference" + namespace: "mapping" + version: "0.12.4" + authors: + - name: "Dries Schaumont" + roles: + - "author" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + argument_groups: + - name: "Input/Output" + arguments: + - type: "file" + name: "--genome_fasta" + alternatives: + - "--genomeFastaFiles" + description: "The fasta files to be included in the reference. Corresponds to\ + \ the --genomeFastaFiles argument in the STAR command." + info: null + example: + - "chr1.fasta" + - "chr2.fasta" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: true + multiple_sep: " " + dest: "par" + - type: "file" + name: "--transcriptome_gtf" + alternatives: + - "--sjdbGTFfile" + description: "Specifies the path to the file with annotated transcripts in the\ + \ standard GTF\nformat. STAR will extract splice junctions from this file\ + \ and use them to greatly improve\naccuracy of the mapping. Corresponds to\ + \ the --sjdbGTFfile argument in the STAR command.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "--genomeDir" + description: "Path to output directory. Corresponds to the --genomeDir argument\ + \ in the STAR command." + info: null + example: + - "/path/to/foo" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Genome indexing arguments" + arguments: + - type: "integer" + name: "--genomeSAindexNbases" + description: "Length (bases) of the SA pre-indexing string. Typically between\ + \ 10 and 15.\nLonger strings will use much more memory, but allow faster searches.\ + \ For small\ngenomes, the parameter {genomeSAindexNbases must be scaled down\ + \ to\nmin(14, log2(GenomeLength)/2 - 1).\n" + info: null + default: + - 14 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + description: "Create a reference for STAR from a set of fasta files." + test_resources: + - type: "bash_script" + path: "test.sh" + is_executable: true + - type: "file" + path: "../../../resources_test/cellranger_tiny_fastq" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "docker" + env: + - "STAR_VERSION 2.7.10b" + - "PACKAGES gcc g++ make wget zlib1g-dev unzip" + - type: "docker" + run: + - "apt-get update && \\\n apt-get install -y --no-install-recommends ${PACKAGES}\ + \ && \\\n cd /tmp && \\\n wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip\ + \ && \\\n unzip ${STAR_VERSION}.zip && \\\n cd STAR-${STAR_VERSION}/source\ + \ && \\\n make STARstatic CXXFLAGS_SIMD=-std=c++11 && \\\n cp STAR /usr/local/bin\ + \ && \\\n cd / && \\\n rm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip\ + \ && \\\n apt-get --purge autoremove -y ${PACKAGES} && \\\n apt-get clean\n" + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highmem" + - "highcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/mapping/star_build_reference/config.vsh.yml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/star_build_reference" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/star_build_reference/star_build_reference" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/mapping/star_build_reference/main.nf b/target/nextflow/mapping/star_build_reference/main.nf new file mode 100644 index 00000000000..4c8ac615877 --- /dev/null +++ b/target/nextflow/mapping/star_build_reference/main.nf @@ -0,0 +1,2686 @@ +// star_build_reference 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Dries Schaumont (author) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "star_build_reference", + "namespace" : "mapping", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Dries Schaumont", + "roles" : [ + "author" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "dries@data-intuitive.com", + "github" : "DriesSchaumont", + "orcid" : "0000-0002-4389-0440", + "linkedin" : "dries-schaumont" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Scientist" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Input/Output", + "arguments" : [ + { + "type" : "file", + "name" : "--genome_fasta", + "alternatives" : [ + "--genomeFastaFiles" + ], + "description" : "The fasta files to be included in the reference. Corresponds to the --genomeFastaFiles argument in the STAR command.", + "example" : [ + "chr1.fasta", + "chr2.fasta" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : true, + "multiple_sep" : " ", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--transcriptome_gtf", + "alternatives" : [ + "--sjdbGTFfile" + ], + "description" : "Specifies the path to the file with annotated transcripts in the standard GTF\nformat. STAR will extract splice junctions from this file and use them to greatly improve\naccuracy of the mapping. Corresponds to the --sjdbGTFfile argument in the STAR command.\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "--genomeDir" + ], + "description" : "Path to output directory. Corresponds to the --genomeDir argument in the STAR command.", + "example" : [ + "/path/to/foo" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Genome indexing arguments", + "arguments" : [ + { + "type" : "integer", + "name" : "--genomeSAindexNbases", + "description" : "Length (bases) of the SA pre-indexing string. Typically between 10 and 15.\nLonger strings will use much more memory, but allow faster searches. For small\ngenomes, the parameter {genomeSAindexNbases must be scaled down to\nmin(14, log2(GenomeLength)/2 - 1).\n", + "default" : [ + 14 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/star_build_reference/" + } + ], + "description" : "Create a reference for STAR from a set of fasta files.", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/star_build_reference/" + }, + { + "type" : "file", + "path" : "../../../resources_test/cellranger_tiny_fastq", + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/mapping/star_build_reference/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.10-slim", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "procps" + ], + "interactive" : false + }, + { + "type" : "docker", + "env" : [ + "STAR_VERSION 2.7.10b", + "PACKAGES gcc g++ make wget zlib1g-dev unzip" + ] + }, + { + "type" : "docker", + "run" : [ + "apt-get update && \\\\\n apt-get install -y --no-install-recommends ${PACKAGES} && \\\\\n cd /tmp && \\\\\n wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip && \\\\\n unzip ${STAR_VERSION}.zip && \\\\\n cd STAR-${STAR_VERSION}/source && \\\\\n make STARstatic CXXFLAGS_SIMD=-std=c++11 && \\\\\n cp STAR /usr/local/bin && \\\\\n cd / && \\\\\n rm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip && \\\\\n apt-get --purge autoremove -y ${PACKAGES} && \\\\\n apt-get clean\n" + ] + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "highmem", + "highcpu" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/mapping/star_build_reference/config.vsh.yml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/mapping/star_build_reference", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +import re +import tempfile +import subprocess +from pathlib import Path +import tarfile +import gzip +import shutil + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'genome_fasta': $( if [ ! -z ${VIASH_PAR_GENOME_FASTA+x} ]; then echo "r'${VIASH_PAR_GENOME_FASTA//\\'/\\'\\"\\'\\"r\\'}'.split(' ')"; else echo None; fi ), + 'transcriptome_gtf': $( if [ ! -z ${VIASH_PAR_TRANSCRIPTOME_GTF+x} ]; then echo "r'${VIASH_PAR_TRANSCRIPTOME_GTF//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'genomeSAindexNbases': $( if [ ! -z ${VIASH_PAR_GENOMESAINDEXNBASES+x} ]; then echo "int(r'${VIASH_PAR_GENOMESAINDEXNBASES//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +## VIASH END + +######################## +### Helper functions ### +######################## + +# helper function for cheching whether something is a gzip +def is_gz_file(path: Path) -> bool: + with open(path, 'rb') as file: + return file.read(2) == b'\\\\x1f\\\\x8b' + +# if {par_value} is a Path, extract it to a temp_dir_path and return the resulting path +def extract_if_need_be(par_value: Path, temp_dir_path: Path) -> Path: + if par_value.is_file() and tarfile.is_tarfile(par_value): + # Remove two extensions (if they exist) + extaction_dir_name = Path(par_value.stem).stem + unpacked_path = temp_dir_path / extaction_dir_name + print(f' Tar detected; extracting {par_value} to {unpacked_path}', flush=True) + + with tarfile.open(par_value, 'r') as open_tar: + members = open_tar.getmembers() + root_dirs = [member + for member in members + if member.isdir() and member.name != '.' and '/' not in member.name] + # if there is only one root_dir (and there are files in that directory) + # strip that directory name from the destination folder + if len(root_dirs) == 1: + for mem in members: + mem.path = Path(*Path(mem.path).parts[1:]) + members_to_move = [mem for mem in members if mem.path != Path('.')] + open_tar.extractall(unpacked_path, members=members_to_move) + return unpacked_path + + elif par_value.is_file() and is_gz_file(par_value): + # Remove extension (if it exists) + extaction_file_name = Path(par_value.stem) + unpacked_path = temp_dir_path / extaction_file_name + print(f' Gzip detected; extracting {par_value} to {unpacked_path}', flush=True) + + with gzip.open(par_value, 'rb') as f_in: + with open(unpacked_path, 'wb') as f_out: + shutil.copyfileobj(f_in, f_out) + return unpacked_path + + else: + return par_value + +######################## +### Main code ### +######################## + +# rename keys and convert path strings to Path +# note: only list file arguments here. if non-file arguments also need to be renamed, +# the \\`processPar()\\` generator needs to be adapted +to_rename = {'genome_fasta': 'genomeFastaFiles', 'output': 'genomeDir', 'transcriptome_gtf': 'sjdbGTFfile'} + +def process_par(orig_par, to_rename): + for key, value in orig_par.items(): + # rename the key in par based on the \\`to_rename\\` dict + if key in to_rename.keys(): + new_key = to_rename[key] + + # also turn value into a Path + if isinstance(value, list): + new_value = [Path(val) for val in value] + else: + new_value = Path(value) + else: + new_key = key + new_value = value + yield new_key, new_value +par = dict(process_par(par, to_rename)) + +# create output dir if need be +par["genomeDir"].mkdir(parents=True, exist_ok=True) + +with tempfile.TemporaryDirectory(prefix="star-", dir=meta["temp_dir"]) as temp_dir: + + # checking for compressed files, ungzip files if need be + temp_dir_path = Path(temp_dir) + for par_name in ["genomeFastaFiles", "sjdbGTFfile"]: + par_values = par[par_name] + if par_values: + # turn value into list + is_multiple = isinstance(par_values, list) + if not is_multiple: + par_values = [ par_values ] + + # output list + new_values = [] + for par_value in par_values: + print(f'>> Check compression of --{par_name} with value: {par_value}', flush=True) + new_value = extract_if_need_be(par_value, temp_dir_path) + new_values.append(new_value) + + # unlist if need be + if not is_multiple: + new_values = new_values[0] + + # replace value + par[par_name] = new_values + # end ungzipping + print("", flush=True) + + print(">> Constructing command", flush=True) + par["runMode"] = "genomeGenerate" + par["outTmpDir"] = temp_dir_path / "run" + if 'cpus' in meta and meta['cpus']: + par["runThreadN"] = meta["cpus"] + + + cmd_args = [ "STAR" ] + for name, value in par.items(): + if value is not None: + if isinstance(value, list): + cmd_args.extend(["--" + name] + [str(x) for x in value]) + else: + cmd_args.extend(["--" + name, str(value)]) + print("", flush=True) + + print(">> Running STAR with command:", flush=True) + print("+ " + ' '.join([str(x) for x in cmd_args]), flush=True) + print("", flush=True) + + subprocess.run( + cmd_args, + check=True + ) +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/mapping_star_build_reference", + "tag" : "0.12.0" + }, + "label" : [ + "highmem", + "highcpu" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/mapping/star_build_reference/nextflow.config b/target/nextflow/mapping/star_build_reference/nextflow.config new file mode 100644 index 00000000000..fda6dff482d --- /dev/null +++ b/target/nextflow/mapping/star_build_reference/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'star_build_reference' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Create a reference for STAR from a set of fasta files.' + author = 'Dries Schaumont' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/mapping/star_build_reference/nextflow_params.yaml b/target/nextflow/mapping/star_build_reference/nextflow_params.yaml new file mode 100644 index 00000000000..616c69ec6f2 --- /dev/null +++ b/target/nextflow/mapping/star_build_reference/nextflow_params.yaml @@ -0,0 +1,11 @@ +# Input/Output +genome_fasta: # please fill in - example: ["chr1.fasta", "chr2.fasta"] +# transcriptome_gtf: "path/to/file" +# output: "$id.$key.output.output" + +# Genome indexing arguments +genomeSAindexNbases: 14 + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/mapping/star_build_reference/nextflow_schema.json b/target/nextflow/mapping/star_build_reference/nextflow_schema.json new file mode 100644 index 00000000000..1c7cf142af1 --- /dev/null +++ b/target/nextflow/mapping/star_build_reference/nextflow_schema.json @@ -0,0 +1,116 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "star_build_reference", +"description": "Create a reference for STAR from a set of fasta files.", +"type": "object", +"definitions": { + + + + "input/output" : { + "title": "Input/Output", + "type": "object", + "description": "No description", + "properties": { + + + "genome_fasta": { + "type": + "string", + "description": "Type: List of `file`, required, example: `chr1.fasta chr2.fasta`, multiple_sep: `\" \"`. The fasta files to be included in the reference", + "help_text": "Type: List of `file`, required, example: `chr1.fasta chr2.fasta`, multiple_sep: `\" \"`. The fasta files to be included in the reference. Corresponds to the --genomeFastaFiles argument in the STAR command." + + } + + + , + "transcriptome_gtf": { + "type": + "string", + "description": "Type: `file`. Specifies the path to the file with annotated transcripts in the standard GTF\nformat", + "help_text": "Type: `file`. Specifies the path to the file with annotated transcripts in the standard GTF\nformat. STAR will extract splice junctions from this file and use them to greatly improve\naccuracy of the mapping. Corresponds to the --sjdbGTFfile argument in the STAR command.\n" + + } + + + , + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.output`, example: `/path/to/foo`. Path to output directory", + "help_text": "Type: `file`, required, default: `$id.$key.output.output`, example: `/path/to/foo`. Path to output directory. Corresponds to the --genomeDir argument in the STAR command." + , + "default": "$id.$key.output.output" + } + + +} +}, + + + "genome indexing arguments" : { + "title": "Genome indexing arguments", + "type": "object", + "description": "No description", + "properties": { + + + "genomeSAindexNbases": { + "type": + "integer", + "description": "Type: `integer`, default: `14`. Length (bases) of the SA pre-indexing string", + "help_text": "Type: `integer`, default: `14`. Length (bases) of the SA pre-indexing string. Typically between 10 and 15.\nLonger strings will use much more memory, but allow faster searches. For small\ngenomes, the parameter {genomeSAindexNbases must be scaled down to\nmin(14, log2(GenomeLength)/2 - 1).\n" + , + "default": "14" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input/output" + }, + + { + "$ref": "#/definitions/genome indexing arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/metadata/add_id/.config.vsh.yaml b/target/nextflow/metadata/add_id/.config.vsh.yaml new file mode 100644 index 00000000000..1626d853a61 --- /dev/null +++ b/target/nextflow/metadata/add_id/.config.vsh.yaml @@ -0,0 +1,197 @@ +functionality: + name: "add_id" + namespace: "metadata" + version: "0.12.4" + authors: + - name: "Dries Schaumont" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Path to the input .h5mu." + info: null + example: + - "sample_path" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--input_id" + description: "The input id." + info: null + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_output" + description: "Name of the .obs column where to store the id." + info: null + default: + - "sample_id" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--make_observation_keys_unique" + description: "Join the id to the .obs index (.obs_names)." + info: null + direction: "input" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Add id of .obs. Also allows to make .obs_names (the .obs index) unique\ + \ \nby prefixing the values with an unique id per .h5mu file.\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/concat_test_data/e18_mouse_brain_fresh_5k_filtered_feature_bc_matrix_subset_unique_obs.h5mu" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "native" + id: "native" +- type: "nextflow" + id: "nextflow" + directives: + label: + - "singlecpu" + - "lowmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/metadata/add_id/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/metadata/add_id" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/metadata/add_id/add_id" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/metadata/add_id/main.nf b/target/nextflow/metadata/add_id/main.nf new file mode 100644 index 00000000000..f84c84cede8 --- /dev/null +++ b/target/nextflow/metadata/add_id/main.nf @@ -0,0 +1,2631 @@ +// add_id 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Dries Schaumont (maintainer) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "add_id", + "namespace" : "metadata", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Dries Schaumont", + "roles" : [ + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "dries@data-intuitive.com", + "github" : "DriesSchaumont", + "orcid" : "0000-0002-4389-0440", + "linkedin" : "dries-schaumont" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Scientist" + } + ] + } + } + ], + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "alternatives" : [ + "-i" + ], + "description" : "Path to the input .h5mu.", + "example" : [ + "sample_path" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--input_id", + "description" : "The input id.", + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--obs_output", + "description" : "Name of the .obs column where to store the id.", + "default" : [ + "sample_id" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "-o" + ], + "example" : [ + "output.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_compression", + "description" : "The compression format to be used on the output h5mu object.", + "example" : [ + "gzip" + ], + "required" : false, + "choices" : [ + "gzip", + "lzf" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "boolean_true", + "name" : "--make_observation_keys_unique", + "description" : "Join the id to the .obs index (.obs_names).", + "direction" : "input", + "dest" : "par" + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/metadata/add_id/" + }, + { + "type" : "file", + "path" : "src/utils/setup_logger.py", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "description" : "Add id of .obs. Also allows to make .obs_names (the .obs index) unique \nby prefixing the values with an unique id per .h5mu file.\n", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/metadata/add_id/" + }, + { + "type" : "file", + "path" : "resources_test/concat_test_data/e18_mouse_brain_fresh_5k_filtered_feature_bc_matrix_subset_unique_obs.h5mu", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.10-slim", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "procps" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "mudata~=0.2.3", + "anndata~=0.9.1" + ], + "upgrade" : true + } + ], + "test_setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "viashpy==0.5.0" + ], + "upgrade" : true + } + ] + }, + { + "type" : "native", + "id" : "native" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "singlecpu", + "lowmem" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/metadata/add_id/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/metadata/add_id", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +from __future__ import annotations +import sys +from mudata import read_h5mu, MuData + +### VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'input_id': $( if [ ! -z ${VIASH_PAR_INPUT_ID+x} ]; then echo "r'${VIASH_PAR_INPUT_ID//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'obs_output': $( if [ ! -z ${VIASH_PAR_OBS_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OBS_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'make_observation_keys_unique': $( if [ ! -z ${VIASH_PAR_MAKE_OBSERVATION_KEYS_UNIQUE+x} ]; then echo "r'${VIASH_PAR_MAKE_OBSERVATION_KEYS_UNIQUE//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +### VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +def make_observation_keys_unique(sample_id: str, sample: MuData) -> None: + """ + Make the observation keys unique across all samples. At input, + the observation keys are unique within a sample. By adding the sample name + (unique for a sample) to each observation key, the observation key is made + unique across all samples as well. + """ + logger.info('Making observation keys unique across all samples.') + sample.obs.index = f"{sample_id}_" + sample.obs.index + make_observation_keys_unique_per_mod(sample_id, sample) + + +def make_observation_keys_unique_per_mod(sample_id: str, sample: MuData) -> None: + """ + Updating MuData.obs_names is not allowed (it is read-only). + So the observation keys for each modality has to be updated manually. + """ + for mod in sample.mod.values(): + mod.obs_names = f"{sample_id}_" + mod.obs_names + +def main(): + input_data = read_h5mu(par["input"]) + input_data.obs[par["obs_output"]] = par["input_id"] + for mod_data in input_data.mod.values(): + mod_data.obs[par["obs_output"]] = par["input_id"] + if par["make_observation_keys_unique"]: + make_observation_keys_unique(par["input_id"], input_data) + logger.info("Writing out data to '%s'.", par["output"]) + input_data.write_h5mu(par["output"], compression=par["output_compression"]) + +if __name__ == '__main__': + main() +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/metadata_add_id", + "tag" : "0.12.0" + }, + "label" : [ + "singlecpu", + "lowmem" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/metadata/add_id/nextflow.config b/target/nextflow/metadata/add_id/nextflow.config new file mode 100644 index 00000000000..ea37a6fa8b8 --- /dev/null +++ b/target/nextflow/metadata/add_id/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'add_id' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Add id of .obs. Also allows to make .obs_names (the .obs index) unique \nby prefixing the values with an unique id per .h5mu file.\n' + author = 'Dries Schaumont' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/metadata/add_id/nextflow_params.yaml b/target/nextflow/metadata/add_id/nextflow_params.yaml new file mode 100644 index 00000000000..9be66e50b43 --- /dev/null +++ b/target/nextflow/metadata/add_id/nextflow_params.yaml @@ -0,0 +1,11 @@ +# Arguments +input: # please fill in - example: "sample_path" +input_id: # please fill in - example: "foo" +obs_output: "sample_id" +# output: "$id.$key.output.h5mu" +# output_compression: "gzip" +make_observation_keys_unique: false + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/metadata/add_id/nextflow_schema.json b/target/nextflow/metadata/add_id/nextflow_schema.json new file mode 100644 index 00000000000..29df98bbad8 --- /dev/null +++ b/target/nextflow/metadata/add_id/nextflow_schema.json @@ -0,0 +1,125 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "add_id", +"description": "Add id of .obs. Also allows to make .obs_names (the .obs index) unique \nby prefixing the values with an unique id per .h5mu file.\n", +"type": "object", +"definitions": { + + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required, example: `sample_path`. Path to the input ", + "help_text": "Type: `file`, required, example: `sample_path`. Path to the input .h5mu." + + } + + + , + "input_id": { + "type": + "string", + "description": "Type: `string`, required. The input id", + "help_text": "Type: `string`, required. The input id." + + } + + + , + "obs_output": { + "type": + "string", + "description": "Type: `string`, default: `sample_id`. Name of the ", + "help_text": "Type: `string`, default: `sample_id`. Name of the .obs column where to store the id." + , + "default": "sample_id" + } + + + , + "output": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. ", + "help_text": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. " + , + "default": "$id.$key.output.h5mu" + } + + + , + "output_compression": { + "type": + "string", + "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", + "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", + "enum": ["gzip", "lzf"] + + + } + + + , + "make_observation_keys_unique": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Join the id to the ", + "help_text": "Type: `boolean_true`, default: `false`. Join the id to the .obs index (.obs_names)." + , + "default": "False" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/metadata/add_id/setup_logger.py b/target/nextflow/metadata/add_id/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/nextflow/metadata/add_id/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/nextflow/metadata/grep_annotation_column/.config.vsh.yaml b/target/nextflow/metadata/grep_annotation_column/.config.vsh.yaml new file mode 100644 index 00000000000..e50c809bd05 --- /dev/null +++ b/target/nextflow/metadata/grep_annotation_column/.config.vsh.yaml @@ -0,0 +1,244 @@ +functionality: + name: "grep_annotation_column" + namespace: "metadata" + version: "0.12.4" + authors: + - name: "Dries Schaumont" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + argument_groups: + - name: "Inputs" + description: "Arguments related to the input dataset." + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Path to the input .h5mu." + info: null + example: + - "sample_path" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--input_column" + description: "Column to query. If not specified, use .var_names or .obs_names,\ + \ depending on the value of --matrix" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + description: "Which modality to get the annotation matrix from.\n" + info: null + example: + - "rna" + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--matrix" + description: "Matrix to fetch the column from that will be searched." + info: null + example: + - "var" + required: false + choices: + - "var" + - "obs" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Outputs" + description: "Arguments related to how the output will be written." + arguments: + - type: "file" + name: "--output" + alternatives: + - "-o" + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_match_column" + description: "Name of the column to write the result to." + info: null + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_fraction_column" + description: "For the opposite axis, name of the column to write the fraction\ + \ of \nobservations that matches to the pattern.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Query options" + description: "Options related to the query" + arguments: + - type: "string" + name: "--regex_pattern" + description: "Regex to use to match with the input column." + info: null + example: + - "^[mM][tT]-" + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + description: "Perform a regex lookup on a column from the annotation matrices .obs\ + \ or .var.\nThe annotation matrix can originate from either a modality, or all\ + \ modalities (global .var or .obs).\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/concat_test_data/e18_mouse_brain_fresh_5k_filtered_feature_bc_matrix_subset_unique_obs.h5mu" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "native" + id: "native" +- type: "nextflow" + id: "nextflow" + directives: + label: + - "singlecpu" + - "lowmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/metadata/grep_annotation_column/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/metadata/grep_annotation_column" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/metadata/grep_annotation_column/grep_annotation_column" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/metadata/grep_annotation_column/main.nf b/target/nextflow/metadata/grep_annotation_column/main.nf new file mode 100644 index 00000000000..894f743fae6 --- /dev/null +++ b/target/nextflow/metadata/grep_annotation_column/main.nf @@ -0,0 +1,2700 @@ +// grep_annotation_column 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Dries Schaumont (maintainer) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "grep_annotation_column", + "namespace" : "metadata", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Dries Schaumont", + "roles" : [ + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "dries@data-intuitive.com", + "github" : "DriesSchaumont", + "orcid" : "0000-0002-4389-0440", + "linkedin" : "dries-schaumont" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Scientist" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Inputs", + "description" : "Arguments related to the input dataset.", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "alternatives" : [ + "-i" + ], + "description" : "Path to the input .h5mu.", + "example" : [ + "sample_path" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--input_column", + "description" : "Column to query. If not specified, use .var_names or .obs_names, depending on the value of --matrix", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--modality", + "description" : "Which modality to get the annotation matrix from.\n", + "example" : [ + "rna" + ], + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--matrix", + "description" : "Matrix to fetch the column from that will be searched.", + "example" : [ + "var" + ], + "required" : false, + "choices" : [ + "var", + "obs" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Outputs", + "description" : "Arguments related to how the output will be written.", + "arguments" : [ + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "-o" + ], + "example" : [ + "output.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_compression", + "description" : "The compression format to be used on the output h5mu object.", + "example" : [ + "gzip" + ], + "required" : false, + "choices" : [ + "gzip", + "lzf" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_match_column", + "description" : "Name of the column to write the result to.", + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_fraction_column", + "description" : "For the opposite axis, name of the column to write the fraction of \nobservations that matches to the pattern.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Query options", + "description" : "Options related to the query", + "arguments" : [ + { + "type" : "string", + "name" : "--regex_pattern", + "description" : "Regex to use to match with the input column.", + "example" : [ + "^[mM][tT]-" + ], + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/metadata/grep_annotation_column/" + } + ], + "description" : "Perform a regex lookup on a column from the annotation matrices .obs or .var.\nThe annotation matrix can originate from either a modality, or all modalities (global .var or .obs).\n", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/metadata/grep_annotation_column/" + }, + { + "type" : "file", + "path" : "resources_test/concat_test_data/e18_mouse_brain_fresh_5k_filtered_feature_bc_matrix_subset_unique_obs.h5mu", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.10-slim", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "procps" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "mudata~=0.2.3", + "anndata~=0.9.1" + ], + "upgrade" : true + } + ], + "test_setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "viashpy==0.5.0" + ], + "upgrade" : true + } + ] + }, + { + "type" : "native", + "id" : "native" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "singlecpu", + "lowmem" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/metadata/grep_annotation_column/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/metadata/grep_annotation_column", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +import mudata as mu +from pathlib import Path +from operator import attrgetter +import re +import numpy as np + + +### VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'input_column': $( if [ ! -z ${VIASH_PAR_INPUT_COLUMN+x} ]; then echo "r'${VIASH_PAR_INPUT_COLUMN//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'matrix': $( if [ ! -z ${VIASH_PAR_MATRIX+x} ]; then echo "r'${VIASH_PAR_MATRIX//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_match_column': $( if [ ! -z ${VIASH_PAR_OUTPUT_MATCH_COLUMN+x} ]; then echo "r'${VIASH_PAR_OUTPUT_MATCH_COLUMN//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_fraction_column': $( if [ ! -z ${VIASH_PAR_OUTPUT_FRACTION_COLUMN+x} ]; then echo "r'${VIASH_PAR_OUTPUT_FRACTION_COLUMN//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'regex_pattern': $( if [ ! -z ${VIASH_PAR_REGEX_PATTERN+x} ]; then echo "r'${VIASH_PAR_REGEX_PATTERN//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +### VIASH END + +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +def main(par): + input_file, output_file, mod_name = Path(par["input"]), Path(par["output"]), par['modality'] + try: + compiled_regex = re.compile(par["regex_pattern"]) + except (TypeError, re.error) as e: + raise ValueError(f"{par['regex_pattern']} is not a valid regular expression pattern.") from e + else: + if compiled_regex.groups: + raise NotImplementedError("Using match groups is not supported by this component.") + logger.info('Reading input file %s, modality %s.', input_file, mod_name) + + mudata = mu.read_h5mu(input_file) + modality_data = mudata[mod_name] + annotation_matrix = getattr(modality_data, par['matrix']) + default_column = { + "var": attrgetter("var_names"), + "obs": attrgetter("obs_names") + } + if par["input_column"]: + try: + annotation_column = annotation_matrix[par["input_column"]] + except KeyError as e: + raise ValueError(f"Column {par['input_column']} could not be found for modality " + f"{par['modality']}. Available columns: {','.join(annotation_matrix.columns.to_list())}") from e + else: + annotation_column = default_column[par['matrix']](modality_data) + grep_result = annotation_column.str.contains(par["regex_pattern"], regex=True) + + other_axis_attribute = { + "var": "obs", + "obs": "var" + } + if par['output_fraction_column']: + pct_matching = np.ravel(np.sum(modality_data[:, grep_result].X, axis=1) / np.sum(modality_data.X, axis=1)) + getattr(modality_data, other_axis_attribute[par['matrix']])[par['output_fraction_column']] = pct_matching + getattr(modality_data, par['matrix'])[par["output_match_column"]] = grep_result + mudata.write(output_file, compression=par["output_compression"]) + +if __name__ == "__main__": + main(par) +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/metadata_grep_annotation_column", + "tag" : "0.12.0" + }, + "label" : [ + "singlecpu", + "lowmem" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/metadata/grep_annotation_column/nextflow.config b/target/nextflow/metadata/grep_annotation_column/nextflow.config new file mode 100644 index 00000000000..a2ddbb9c962 --- /dev/null +++ b/target/nextflow/metadata/grep_annotation_column/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'grep_annotation_column' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Perform a regex lookup on a column from the annotation matrices .obs or .var.\nThe annotation matrix can originate from either a modality, or all modalities (global .var or .obs).\n' + author = 'Dries Schaumont' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/metadata/grep_annotation_column/nextflow_params.yaml b/target/nextflow/metadata/grep_annotation_column/nextflow_params.yaml new file mode 100644 index 00000000000..7244549f726 --- /dev/null +++ b/target/nextflow/metadata/grep_annotation_column/nextflow_params.yaml @@ -0,0 +1,18 @@ +# Inputs +input: # please fill in - example: "sample_path" +# input_column: "foo" +modality: # please fill in - example: "rna" +# matrix: "var" + +# Outputs +# output: "$id.$key.output.h5mu" +# output_compression: "gzip" +output_match_column: # please fill in - example: "foo" +# output_fraction_column: "foo" + +# Query options +regex_pattern: # please fill in - example: "^[mM][tT]-" + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/metadata/grep_annotation_column/nextflow_schema.json b/target/nextflow/metadata/grep_annotation_column/nextflow_schema.json new file mode 100644 index 00000000000..7af99853125 --- /dev/null +++ b/target/nextflow/metadata/grep_annotation_column/nextflow_schema.json @@ -0,0 +1,183 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "grep_annotation_column", +"description": "Perform a regex lookup on a column from the annotation matrices .obs or .var.\nThe annotation matrix can originate from either a modality, or all modalities (global .var or .obs).\n", +"type": "object", +"definitions": { + + + + "inputs" : { + "title": "Inputs", + "type": "object", + "description": "Arguments related to the input dataset.", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required, example: `sample_path`. Path to the input ", + "help_text": "Type: `file`, required, example: `sample_path`. Path to the input .h5mu." + + } + + + , + "input_column": { + "type": + "string", + "description": "Type: `string`. Column to query", + "help_text": "Type: `string`. Column to query. If not specified, use .var_names or .obs_names, depending on the value of --matrix" + + } + + + , + "modality": { + "type": + "string", + "description": "Type: `string`, required, example: `rna`. Which modality to get the annotation matrix from", + "help_text": "Type: `string`, required, example: `rna`. Which modality to get the annotation matrix from.\n" + + } + + + , + "matrix": { + "type": + "string", + "description": "Type: `string`, example: `var`, choices: ``var`, `obs``. Matrix to fetch the column from that will be searched", + "help_text": "Type: `string`, example: `var`, choices: ``var`, `obs``. Matrix to fetch the column from that will be searched.", + "enum": ["var", "obs"] + + + } + + +} +}, + + + "outputs" : { + "title": "Outputs", + "type": "object", + "description": "Arguments related to how the output will be written.", + "properties": { + + + "output": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. ", + "help_text": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. " + , + "default": "$id.$key.output.h5mu" + } + + + , + "output_compression": { + "type": + "string", + "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", + "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", + "enum": ["gzip", "lzf"] + + + } + + + , + "output_match_column": { + "type": + "string", + "description": "Type: `string`, required. Name of the column to write the result to", + "help_text": "Type: `string`, required. Name of the column to write the result to." + + } + + + , + "output_fraction_column": { + "type": + "string", + "description": "Type: `string`. For the opposite axis, name of the column to write the fraction of \nobservations that matches to the pattern", + "help_text": "Type: `string`. For the opposite axis, name of the column to write the fraction of \nobservations that matches to the pattern.\n" + + } + + +} +}, + + + "query options" : { + "title": "Query options", + "type": "object", + "description": "Options related to the query", + "properties": { + + + "regex_pattern": { + "type": + "string", + "description": "Type: `string`, required, example: `^[mM][tT]-`. Regex to use to match with the input column", + "help_text": "Type: `string`, required, example: `^[mM][tT]-`. Regex to use to match with the input column." + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/inputs" + }, + + { + "$ref": "#/definitions/outputs" + }, + + { + "$ref": "#/definitions/query options" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/metadata/join_csv/.config.vsh.yaml b/target/nextflow/metadata/join_csv/.config.vsh.yaml new file mode 100644 index 00000000000..a83cca1757c --- /dev/null +++ b/target/nextflow/metadata/join_csv/.config.vsh.yaml @@ -0,0 +1,229 @@ +functionality: + name: "join_csv" + namespace: "metadata" + version: "0.12.4" + authors: + - name: "Dries Schaumont" + roles: + - "author" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + argument_groups: + - name: "MuData Input" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input h5mu file" + info: null + example: + - "input.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_key" + description: "Obs column name where the sample id can be found for each observation\ + \ to join on.\nUseful when adding metadata to concatenated samples.\nMutually\ + \ exclusive with `--var_key`.\"\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--var_key" + description: "Var column name where the sample id can be found for each variable\ + \ to join on.\nMutually exclusive with `--obs_key`.\"\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "MuData Output" + arguments: + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output h5mu file." + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Metadata Input" + arguments: + - type: "file" + name: "--input_csv" + description: ".csv file containing metadata" + info: null + example: + - "metadata.csv" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--csv_key" + description: "column of the the csv that corresponds to the sample id." + info: null + default: + - "id" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Join a csv containing metadata to the .obs or .var field of a mudata\ + \ file." + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "singlecpu" + - "lowmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/metadata/join_csv/config.vsh.yml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/metadata/join_csv" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/metadata/join_csv/join_csv" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/metadata/join_csv/main.nf b/target/nextflow/metadata/join_csv/main.nf new file mode 100644 index 00000000000..868f1e5f9de --- /dev/null +++ b/target/nextflow/metadata/join_csv/main.nf @@ -0,0 +1,2670 @@ +// join_csv 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Dries Schaumont (author) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "join_csv", + "namespace" : "metadata", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Dries Schaumont", + "roles" : [ + "author" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "dries@data-intuitive.com", + "github" : "DriesSchaumont", + "orcid" : "0000-0002-4389-0440", + "linkedin" : "dries-schaumont" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Scientist" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "MuData Input", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "alternatives" : [ + "-i" + ], + "description" : "Input h5mu file", + "example" : [ + "input.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--modality", + "default" : [ + "rna" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--obs_key", + "description" : "Obs column name where the sample id can be found for each observation to join on.\nUseful when adding metadata to concatenated samples.\nMutually exclusive with `--var_key`.\\"\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--var_key", + "description" : "Var column name where the sample id can be found for each variable to join on.\nMutually exclusive with `--obs_key`.\\"\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "MuData Output", + "arguments" : [ + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "-o" + ], + "description" : "Output h5mu file.", + "example" : [ + "output.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_compression", + "description" : "The compression format to be used on the output h5mu object.", + "example" : [ + "gzip" + ], + "required" : false, + "choices" : [ + "gzip", + "lzf" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Metadata Input", + "arguments" : [ + { + "type" : "file", + "name" : "--input_csv", + "description" : ".csv file containing metadata", + "example" : [ + "metadata.csv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--csv_key", + "description" : "column of the the csv that corresponds to the sample id.", + "default" : [ + "id" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/metadata/join_csv/" + }, + { + "type" : "file", + "path" : "src/utils/setup_logger.py", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "description" : "Join a csv containing metadata to the .obs or .var field of a mudata file.", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/metadata/join_csv/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.10-slim", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "procps" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "mudata~=0.2.3", + "anndata~=0.9.1" + ], + "upgrade" : true + } + ], + "test_setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "viashpy==0.5.0" + ], + "upgrade" : true + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "singlecpu", + "lowmem" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/metadata/join_csv/config.vsh.yml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/metadata/join_csv", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +import sys +import pandas as pd +from mudata import read_h5mu + +### VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'obs_key': $( if [ ! -z ${VIASH_PAR_OBS_KEY+x} ]; then echo "r'${VIASH_PAR_OBS_KEY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'var_key': $( if [ ! -z ${VIASH_PAR_VAR_KEY+x} ]; then echo "r'${VIASH_PAR_VAR_KEY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'input_csv': $( if [ ! -z ${VIASH_PAR_INPUT_CSV+x} ]; then echo "r'${VIASH_PAR_INPUT_CSV//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'csv_key': $( if [ ! -z ${VIASH_PAR_CSV_KEY+x} ]; then echo "r'${VIASH_PAR_CSV_KEY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +### VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +if par["obs_key"] and par["var_key"]: + raise ValueError("--obs_key can not be used in conjuction with --var_key.") +if not (par["obs_key"] or par["var_key"]): + raise ValueError("Must define either --obs_key or --var_key") + +logger.info("Read metadata csv from file") +metadata = pd.read_csv(par['input_csv'], sep=",", header=0, index_col=par["csv_key"]) +metadata.fillna('', inplace=True) + +logger.info("Read mudata from file") +mdata = read_h5mu(par['input']) +mod_data = mdata.mod[par['modality']] + +logger.info("Joining csv to mudata") +matrix = 'var' if par["var_key"] else 'obs' +matrix_sample_column_name = par["var_key"] if par["var_key"] else par["obs_key"] +original_matrix = getattr(mod_data, matrix) +sample_ids = original_matrix[matrix_sample_column_name] + +try: + new_columns = metadata.loc[sample_ids.tolist()] +except KeyError as e: + raise KeyError(f"Not all sample IDs selected from {matrix} " + "(using the column selected with --var_key or --obs_key) were found in " + "the csv file.") from e +new_matrix = pd.concat([original_matrix.reset_index(drop=True), + new_columns.reset_index(drop=True)], axis=1)\\\\ + .set_axis(original_matrix.index) +setattr(mod_data, matrix, new_matrix) + +logger.info("Write output to mudata file") +mdata.write_h5mu(par['output'], compression=par["output_compression"]) +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/metadata_join_csv", + "tag" : "0.12.0" + }, + "label" : [ + "singlecpu", + "lowmem" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/metadata/join_csv/nextflow.config b/target/nextflow/metadata/join_csv/nextflow.config new file mode 100644 index 00000000000..09b472f385b --- /dev/null +++ b/target/nextflow/metadata/join_csv/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'join_csv' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Join a csv containing metadata to the .obs or .var field of a mudata file.' + author = 'Dries Schaumont' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/metadata/join_csv/nextflow_params.yaml b/target/nextflow/metadata/join_csv/nextflow_params.yaml new file mode 100644 index 00000000000..4d1cdd4de31 --- /dev/null +++ b/target/nextflow/metadata/join_csv/nextflow_params.yaml @@ -0,0 +1,17 @@ +# MuData Input +input: # please fill in - example: "input.h5mu" +modality: "rna" +# obs_key: "foo" +# var_key: "foo" + +# MuData Output +# output: "$id.$key.output.h5mu" +# output_compression: "gzip" + +# Metadata Input +input_csv: # please fill in - example: "metadata.csv" +csv_key: "id" + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/metadata/join_csv/nextflow_schema.json b/target/nextflow/metadata/join_csv/nextflow_schema.json new file mode 100644 index 00000000000..8b29808ecde --- /dev/null +++ b/target/nextflow/metadata/join_csv/nextflow_schema.json @@ -0,0 +1,173 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "join_csv", +"description": "Join a csv containing metadata to the .obs or .var field of a mudata file.", +"type": "object", +"definitions": { + + + + "mudata input" : { + "title": "MuData Input", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required, example: `input.h5mu`. Input h5mu file", + "help_text": "Type: `file`, required, example: `input.h5mu`. Input h5mu file" + + } + + + , + "modality": { + "type": + "string", + "description": "Type: `string`, default: `rna`. ", + "help_text": "Type: `string`, default: `rna`. " + , + "default": "rna" + } + + + , + "obs_key": { + "type": + "string", + "description": "Type: `string`. Obs column name where the sample id can be found for each observation to join on", + "help_text": "Type: `string`. Obs column name where the sample id can be found for each observation to join on.\nUseful when adding metadata to concatenated samples.\nMutually exclusive with `--var_key`.\"\n" + + } + + + , + "var_key": { + "type": + "string", + "description": "Type: `string`. Var column name where the sample id can be found for each variable to join on", + "help_text": "Type: `string`. Var column name where the sample id can be found for each variable to join on.\nMutually exclusive with `--obs_key`.\"\n" + + } + + +} +}, + + + "mudata output" : { + "title": "MuData Output", + "type": "object", + "description": "No description", + "properties": { + + + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file", + "help_text": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file." + , + "default": "$id.$key.output.h5mu" + } + + + , + "output_compression": { + "type": + "string", + "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", + "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", + "enum": ["gzip", "lzf"] + + + } + + +} +}, + + + "metadata input" : { + "title": "Metadata Input", + "type": "object", + "description": "No description", + "properties": { + + + "input_csv": { + "type": + "string", + "description": "Type: `file`, required, example: `metadata.csv`. ", + "help_text": "Type: `file`, required, example: `metadata.csv`. .csv file containing metadata" + + } + + + , + "csv_key": { + "type": + "string", + "description": "Type: `string`, default: `id`. column of the the csv that corresponds to the sample id", + "help_text": "Type: `string`, default: `id`. column of the the csv that corresponds to the sample id." + , + "default": "id" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/mudata input" + }, + + { + "$ref": "#/definitions/mudata output" + }, + + { + "$ref": "#/definitions/metadata input" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/metadata/join_csv/setup_logger.py b/target/nextflow/metadata/join_csv/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/nextflow/metadata/join_csv/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/nextflow/metadata/join_uns_to_obs/.config.vsh.yaml b/target/nextflow/metadata/join_uns_to_obs/.config.vsh.yaml new file mode 100644 index 00000000000..97ee148bbb7 --- /dev/null +++ b/target/nextflow/metadata/join_uns_to_obs/.config.vsh.yaml @@ -0,0 +1,171 @@ +functionality: + name: "join_uns_to_obs" + namespace: "metadata" + version: "0.12.4" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input h5mu file" + info: null + example: + - "input.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--uns_key" + info: null + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output h5mu file." + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Join a data frame of length 1 (1 row index value) in .uns containing\ + \ metadata to the .obs of a mudata file." + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "singlecpu" + - "lowmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/metadata/join_uns_to_obs/config.vsh.yml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/metadata/join_uns_to_obs" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/metadata/join_uns_to_obs/join_uns_to_obs" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/metadata/join_uns_to_obs/main.nf b/target/nextflow/metadata/join_uns_to_obs/main.nf new file mode 100644 index 00000000000..0fe70ce2c0b --- /dev/null +++ b/target/nextflow/metadata/join_uns_to_obs/main.nf @@ -0,0 +1,2577 @@ +// join_uns_to_obs 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "join_uns_to_obs", + "namespace" : "metadata", + "version" : "0.12.4", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "alternatives" : [ + "-i" + ], + "description" : "Input h5mu file", + "example" : [ + "input.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--modality", + "default" : [ + "rna" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--uns_key", + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "-o" + ], + "description" : "Output h5mu file.", + "example" : [ + "output.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_compression", + "description" : "The compression format to be used on the output h5mu object.", + "example" : [ + "gzip" + ], + "required" : false, + "choices" : [ + "gzip", + "lzf" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/metadata/join_uns_to_obs/" + }, + { + "type" : "file", + "path" : "src/utils/setup_logger.py", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "description" : "Join a data frame of length 1 (1 row index value) in .uns containing metadata to the .obs of a mudata file.", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/metadata/join_uns_to_obs/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.10-slim", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "procps" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "mudata~=0.2.3", + "anndata~=0.9.1" + ], + "upgrade" : true + } + ], + "test_setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "viashpy==0.5.0" + ], + "upgrade" : true + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "singlecpu", + "lowmem" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/metadata/join_uns_to_obs/config.vsh.yml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/metadata/join_uns_to_obs", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +import sys +import pandas as pd +from mudata import read_h5mu + +### VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'uns_key': $( if [ ! -z ${VIASH_PAR_UNS_KEY+x} ]; then echo "r'${VIASH_PAR_UNS_KEY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +### VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +logger.info("Read mudata from file") +mdata = read_h5mu(par['input']) +mod_data = mdata.mod[par['modality']] + +logger.info("Joining uns to obs") +# get data frame +uns_df = mod_data.uns[par['uns_key']] + +# check for overlapping colnames +intersect_keys = uns_df.keys().intersection(mod_data.obs.keys()) +obs_drop = mod_data.obs.drop(intersect_keys, axis=1) + +# create data frame to join +uns_df_rep = uns_df.loc[uns_df.index.repeat(mod_data.n_obs)] +uns_df_rep.index = mod_data.obs_names + +# create new obs +mod_data.obs = pd.concat([obs_drop, uns_df_rep], axis=1) + +logger.info("Write output to mudata file") +mdata.write_h5mu(par['output'], compression=par["output_compression"]) + + +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/metadata_join_uns_to_obs", + "tag" : "0.12.0" + }, + "label" : [ + "singlecpu", + "lowmem" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/metadata/join_uns_to_obs/nextflow.config b/target/nextflow/metadata/join_uns_to_obs/nextflow.config new file mode 100644 index 00000000000..d5b1cd18a90 --- /dev/null +++ b/target/nextflow/metadata/join_uns_to_obs/nextflow.config @@ -0,0 +1,107 @@ +manifest { + name = 'join_uns_to_obs' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Join a data frame of length 1 (1 row index value) in .uns containing metadata to the .obs of a mudata file.' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/metadata/join_uns_to_obs/nextflow_params.yaml b/target/nextflow/metadata/join_uns_to_obs/nextflow_params.yaml new file mode 100644 index 00000000000..c3420e64d1b --- /dev/null +++ b/target/nextflow/metadata/join_uns_to_obs/nextflow_params.yaml @@ -0,0 +1,10 @@ +# Arguments +input: # please fill in - example: "input.h5mu" +modality: "rna" +uns_key: # please fill in - example: "foo" +# output: "$id.$key.output.h5mu" +# output_compression: "gzip" + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/metadata/join_uns_to_obs/nextflow_schema.json b/target/nextflow/metadata/join_uns_to_obs/nextflow_schema.json new file mode 100644 index 00000000000..1885dec4c65 --- /dev/null +++ b/target/nextflow/metadata/join_uns_to_obs/nextflow_schema.json @@ -0,0 +1,114 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "join_uns_to_obs", +"description": "Join a data frame of length 1 (1 row index value) in .uns containing metadata to the .obs of a mudata file.", +"type": "object", +"definitions": { + + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required, example: `input.h5mu`. Input h5mu file", + "help_text": "Type: `file`, required, example: `input.h5mu`. Input h5mu file" + + } + + + , + "modality": { + "type": + "string", + "description": "Type: `string`, default: `rna`. ", + "help_text": "Type: `string`, default: `rna`. " + , + "default": "rna" + } + + + , + "uns_key": { + "type": + "string", + "description": "Type: `string`, required. ", + "help_text": "Type: `string`, required. " + + } + + + , + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file", + "help_text": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file." + , + "default": "$id.$key.output.h5mu" + } + + + , + "output_compression": { + "type": + "string", + "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", + "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", + "enum": ["gzip", "lzf"] + + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/metadata/join_uns_to_obs/setup_logger.py b/target/nextflow/metadata/join_uns_to_obs/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/nextflow/metadata/join_uns_to_obs/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/nextflow/metadata/move_obsm_to_obs/.config.vsh.yaml b/target/nextflow/metadata/move_obsm_to_obs/.config.vsh.yaml new file mode 100644 index 00000000000..bf0d8a7e2ca --- /dev/null +++ b/target/nextflow/metadata/move_obsm_to_obs/.config.vsh.yaml @@ -0,0 +1,192 @@ +functionality: + name: "move_obsm_to_obs" + namespace: "metadata" + version: "0.12.4" + authors: + - name: "Dries Schaumont" + roles: + - "author" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + argument_groups: + - name: "MuData Input" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input h5mu file" + info: null + example: + - "input.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obsm_key" + description: "Key of a data structure to move from `.obsm` to `.obs`." + info: null + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "MuData Output" + arguments: + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output h5mu file." + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Move a matrix from .obsm to .obs. Newly created columns in .obs will\ + \ \nbe created from the .obsm key suffixed with an underscore and the name of\ + \ the columns\nof the specified .obsm matrix.\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "singlecpu" + - "lowmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/metadata/move_obsm_to_obs/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/metadata/move_obsm_to_obs" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/metadata/move_obsm_to_obs/move_obsm_to_obs" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/metadata/move_obsm_to_obs/main.nf b/target/nextflow/metadata/move_obsm_to_obs/main.nf new file mode 100644 index 00000000000..1f7c0cbb978 --- /dev/null +++ b/target/nextflow/metadata/move_obsm_to_obs/main.nf @@ -0,0 +1,2626 @@ +// move_obsm_to_obs 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Dries Schaumont (author) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "move_obsm_to_obs", + "namespace" : "metadata", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Dries Schaumont", + "roles" : [ + "author" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "dries@data-intuitive.com", + "github" : "DriesSchaumont", + "orcid" : "0000-0002-4389-0440", + "linkedin" : "dries-schaumont" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Scientist" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "MuData Input", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "alternatives" : [ + "-i" + ], + "description" : "Input h5mu file", + "example" : [ + "input.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--modality", + "default" : [ + "rna" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--obsm_key", + "description" : "Key of a data structure to move from `.obsm` to `.obs`.", + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "MuData Output", + "arguments" : [ + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "-o" + ], + "description" : "Output h5mu file.", + "example" : [ + "output.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_compression", + "description" : "The compression format to be used on the output h5mu object.", + "example" : [ + "gzip" + ], + "required" : false, + "choices" : [ + "gzip", + "lzf" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/metadata/move_obsm_to_obs/" + }, + { + "type" : "file", + "path" : "src/utils/setup_logger.py", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "description" : "Move a matrix from .obsm to .obs. Newly created columns in .obs will \nbe created from the .obsm key suffixed with an underscore and the name of the columns\nof the specified .obsm matrix.\n", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/metadata/move_obsm_to_obs/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.10-slim", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "procps" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "mudata~=0.2.3", + "anndata~=0.9.1" + ], + "upgrade" : true + } + ], + "test_setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "viashpy==0.5.0" + ], + "upgrade" : true + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "singlecpu", + "lowmem" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/metadata/move_obsm_to_obs/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/metadata/move_obsm_to_obs", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +import sys +from functools import partial +from pandas.errors import MergeError +from mudata import read_h5mu + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'obsm_key': $( if [ ! -z ${VIASH_PAR_OBSM_KEY+x} ]; then echo "r'${VIASH_PAR_OBSM_KEY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +## VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +logger.info("Read mudata from file") +mdata = read_h5mu(par['input']) +try: + mod_data = mdata.mod[par['modality']] +except KeyError: + raise ValueError(f"Modality {par['modality']} does not exist.") + +logger.info("Moving .obm key %s", par["obsm_key"]) +try: + obsm_matrix = mod_data.obsm[par["obsm_key"]].copy() +except KeyError: + raise ValueError(f".obsm key {par['obsm_key']} was not found in " + f".obsm slot for modality {par['modality']}.") + + +obsm_matrix.rename(partial("{key}_{}".format, key=par["obsm_key"]), + axis="columns", copy=False, inplace=True) + +original_n_obs = len(mod_data.obs) +try: + logger.info(f".obs names: {mod_data.obs_names}") + logger.info(f".obsm index: {obsm_matrix.index}") + mod_data.obs = mod_data.obs.merge(obsm_matrix, how="left", + validate="one_to_one", + left_index=True, right_index=True) +except MergeError as e: + raise ValueError(f"Could not join .obsm matrix at {par['obsm_key']} to .obs because there " + "are some observation that are not overlapping between the two matrices " + "(indexes should overlap). This is either a bug or your mudata file is corrupt.") +del mod_data.obsm[par["obsm_key"]] + +logger.info("Write output to mudata file") +mdata.write_h5mu(par['output'], compression=par["output_compression"]) +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/metadata_move_obsm_to_obs", + "tag" : "0.12.0" + }, + "label" : [ + "singlecpu", + "lowmem" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/metadata/move_obsm_to_obs/nextflow.config b/target/nextflow/metadata/move_obsm_to_obs/nextflow.config new file mode 100644 index 00000000000..f61d2671f2f --- /dev/null +++ b/target/nextflow/metadata/move_obsm_to_obs/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'move_obsm_to_obs' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Move a matrix from .obsm to .obs. Newly created columns in .obs will \nbe created from the .obsm key suffixed with an underscore and the name of the columns\nof the specified .obsm matrix.\n' + author = 'Dries Schaumont' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/metadata/move_obsm_to_obs/nextflow_params.yaml b/target/nextflow/metadata/move_obsm_to_obs/nextflow_params.yaml new file mode 100644 index 00000000000..9d15103b6a6 --- /dev/null +++ b/target/nextflow/metadata/move_obsm_to_obs/nextflow_params.yaml @@ -0,0 +1,12 @@ +# MuData Input +input: # please fill in - example: "input.h5mu" +modality: "rna" +obsm_key: # please fill in - example: "foo" + +# MuData Output +# output: "$id.$key.output.h5mu" +# output_compression: "gzip" + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/metadata/move_obsm_to_obs/nextflow_schema.json b/target/nextflow/metadata/move_obsm_to_obs/nextflow_schema.json new file mode 100644 index 00000000000..221c9a0926f --- /dev/null +++ b/target/nextflow/metadata/move_obsm_to_obs/nextflow_schema.json @@ -0,0 +1,128 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "move_obsm_to_obs", +"description": "Move a matrix from .obsm to .obs. Newly created columns in .obs will \nbe created from the .obsm key suffixed with an underscore and the name of the columns\nof the specified .obsm matrix.\n", +"type": "object", +"definitions": { + + + + "mudata input" : { + "title": "MuData Input", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required, example: `input.h5mu`. Input h5mu file", + "help_text": "Type: `file`, required, example: `input.h5mu`. Input h5mu file" + + } + + + , + "modality": { + "type": + "string", + "description": "Type: `string`, default: `rna`. ", + "help_text": "Type: `string`, default: `rna`. " + , + "default": "rna" + } + + + , + "obsm_key": { + "type": + "string", + "description": "Type: `string`, required. Key of a data structure to move from `", + "help_text": "Type: `string`, required. Key of a data structure to move from `.obsm` to `.obs`." + + } + + +} +}, + + + "mudata output" : { + "title": "MuData Output", + "type": "object", + "description": "No description", + "properties": { + + + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file", + "help_text": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file." + , + "default": "$id.$key.output.h5mu" + } + + + , + "output_compression": { + "type": + "string", + "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", + "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", + "enum": ["gzip", "lzf"] + + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/mudata input" + }, + + { + "$ref": "#/definitions/mudata output" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/metadata/move_obsm_to_obs/setup_logger.py b/target/nextflow/metadata/move_obsm_to_obs/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/nextflow/metadata/move_obsm_to_obs/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/nextflow/neighbors/bbknn/.config.vsh.yaml b/target/nextflow/neighbors/bbknn/.config.vsh.yaml new file mode 100644 index 00000000000..ee202d7dd97 --- /dev/null +++ b/target/nextflow/neighbors/bbknn/.config.vsh.yaml @@ -0,0 +1,289 @@ +functionality: + name: "bbknn" + namespace: "neighbors" + version: "0.12.4" + authors: + - name: "Dries De Maeyer" + roles: + - "author" + info: + role: "Core Team Member" + links: + email: "ddemaeyer@gmail.com" + github: "ddemaeyer" + linkedin: "dries-de-maeyer-b46a814" + organizations: + - name: "Janssen Pharmaceuticals" + href: "https://www.janssen.com" + role: "Principal Scientist" + - name: "Dries Schaumont" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input h5mu file" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obsm_input" + description: "The dimensionality reduction in `.obsm` to use for neighbour detection.\ + \ Defaults to X_pca." + info: null + default: + - "X_pca" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_batch" + description: ".obs column name discriminating between your batches." + info: null + default: + - "batch" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output .h5mu file." + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--uns_output" + description: "Mandatory .uns slot to store various neighbor output objects." + info: null + default: + - "neighbors" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obsp_distances" + description: "In which .obsp slot to store the distance matrix between the resulting\ + \ neighbors." + info: null + default: + - "distances" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obsp_connectivities" + description: "In which .obsp slot to store the connectivities matrix between the\ + \ resulting neighbors." + info: null + default: + - "connectivities" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--n_neighbors_within_batch" + description: "How many top neighbours to report for each batch; total number of\ + \ neighbours in the initial k-nearest-neighbours computation will be this number\ + \ times the number of batches." + info: null + default: + - 3 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--n_pcs" + description: "How many dimensions (in case of PCA, principal components) to use\ + \ in the analysis." + info: null + default: + - 50 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--n_trim" + description: "Trim the neighbours of each cell to these many top connectivities.\ + \ May help with population independence and improve the tidiness of clustering.\ + \ The lower the value the more independent the individual populations, at the\ + \ cost of more conserved batch effect. If `None` (default), sets the parameter\ + \ value automatically to 10 times `neighbors_within_batch` times the number\ + \ of batches. Set to 0 to skip." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + description: "BBKNN network generation\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + - "build-essential" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "scanpy~=1.9.5" + - "bbknn" + - "scikit-learn~=1.2.2" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "lowcpu" + - "highmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/neighbors/bbknn/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/neighbors/bbknn" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/neighbors/bbknn/bbknn" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/neighbors/bbknn/main.nf b/target/nextflow/neighbors/bbknn/main.nf new file mode 100644 index 00000000000..d329e12a468 --- /dev/null +++ b/target/nextflow/neighbors/bbknn/main.nf @@ -0,0 +1,2706 @@ +// bbknn 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Dries De Maeyer (author) +// * Dries Schaumont (maintainer) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "bbknn", + "namespace" : "neighbors", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Dries De Maeyer", + "roles" : [ + "author" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "ddemaeyer@gmail.com", + "github" : "ddemaeyer", + "linkedin" : "dries-de-maeyer-b46a814" + }, + "organizations" : [ + { + "name" : "Janssen Pharmaceuticals", + "href" : "https://www.janssen.com", + "role" : "Principal Scientist" + } + ] + } + }, + { + "name" : "Dries Schaumont", + "roles" : [ + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "dries@data-intuitive.com", + "github" : "DriesSchaumont", + "orcid" : "0000-0002-4389-0440", + "linkedin" : "dries-schaumont" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Scientist" + } + ] + } + } + ], + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "alternatives" : [ + "-i" + ], + "description" : "Input h5mu file", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--modality", + "default" : [ + "rna" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--obsm_input", + "description" : "The dimensionality reduction in `.obsm` to use for neighbour detection. Defaults to X_pca.", + "default" : [ + "X_pca" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--obs_batch", + "description" : ".obs column name discriminating between your batches.", + "default" : [ + "batch" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "-o" + ], + "description" : "Output .h5mu file.", + "example" : [ + "output.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_compression", + "description" : "The compression format to be used on the output h5mu object.", + "example" : [ + "gzip" + ], + "required" : false, + "choices" : [ + "gzip", + "lzf" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--uns_output", + "description" : "Mandatory .uns slot to store various neighbor output objects.", + "default" : [ + "neighbors" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--obsp_distances", + "description" : "In which .obsp slot to store the distance matrix between the resulting neighbors.", + "default" : [ + "distances" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--obsp_connectivities", + "description" : "In which .obsp slot to store the connectivities matrix between the resulting neighbors.", + "default" : [ + "connectivities" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--n_neighbors_within_batch", + "description" : "How many top neighbours to report for each batch; total number of neighbours in the initial k-nearest-neighbours computation will be this number times the number of batches.", + "default" : [ + 3 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--n_pcs", + "description" : "How many dimensions (in case of PCA, principal components) to use in the analysis.", + "default" : [ + 50 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--n_trim", + "description" : "Trim the neighbours of each cell to these many top connectivities. May help with population independence and improve the tidiness of clustering. The lower the value the more independent the individual populations, at the cost of more conserved batch effect. If `None` (default), sets the parameter value automatically to 10 times `neighbors_within_batch` times the number of batches. Set to 0 to skip.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/neighbors/bbknn/" + } + ], + "description" : "BBKNN network generation\n", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/neighbors/bbknn/" + }, + { + "type" : "file", + "path" : "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.10-slim", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "procps", + "build-essential" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "mudata~=0.2.3", + "anndata~=0.9.1", + "scanpy~=1.9.5", + "bbknn", + "scikit-learn~=1.2.2" + ], + "upgrade" : true + } + ], + "test_setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "viashpy==0.5.0" + ], + "upgrade" : true + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "lowcpu", + "highmem" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/neighbors/bbknn/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/neighbors/bbknn", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +from mudata import read_h5mu +import bbknn + +### VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'obsm_input': $( if [ ! -z ${VIASH_PAR_OBSM_INPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'obs_batch': $( if [ ! -z ${VIASH_PAR_OBS_BATCH+x} ]; then echo "r'${VIASH_PAR_OBS_BATCH//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'uns_output': $( if [ ! -z ${VIASH_PAR_UNS_OUTPUT+x} ]; then echo "r'${VIASH_PAR_UNS_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'obsp_distances': $( if [ ! -z ${VIASH_PAR_OBSP_DISTANCES+x} ]; then echo "r'${VIASH_PAR_OBSP_DISTANCES//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'obsp_connectivities': $( if [ ! -z ${VIASH_PAR_OBSP_CONNECTIVITIES+x} ]; then echo "r'${VIASH_PAR_OBSP_CONNECTIVITIES//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'n_neighbors_within_batch': $( if [ ! -z ${VIASH_PAR_N_NEIGHBORS_WITHIN_BATCH+x} ]; then echo "int(r'${VIASH_PAR_N_NEIGHBORS_WITHIN_BATCH//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'n_pcs': $( if [ ! -z ${VIASH_PAR_N_PCS+x} ]; then echo "int(r'${VIASH_PAR_N_PCS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'n_trim': $( if [ ! -z ${VIASH_PAR_N_TRIM+x} ]; then echo "int(r'${VIASH_PAR_N_TRIM//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +### VIASH END + +mudata = read_h5mu(par["input"]) +adata = mudata.mod[par["modality"]] + +# copy data +tmp_adata = adata.copy() +bbknn.bbknn( + tmp_adata, + use_rep=par["obsm_input"], + batch_key = par["obs_batch"], + neighbors_within_batch=par["n_neighbors_within_batch"], + n_pcs=par["n_pcs"], + trim=par["n_trim"] +) + +# store output +adata.obsp[par["obsp_connectivities"]] = tmp_adata.obsp["connectivities"] +adata.obsp[par["obsp_distances"]] = tmp_adata.obsp["distances"] +adata.uns[par["uns_output"]] = tmp_adata.uns["neighbors"] +adata.uns[par["uns_output"]]["distances_key"] = par["obsp_distances"] +adata.uns[par["uns_output"]]["connectivities_key"] = par["obsp_connectivities"] + +# write to file +mudata.write_h5mu(par["output"], compression=par["output_compression"]) +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/neighbors_bbknn", + "tag" : "0.12.0" + }, + "label" : [ + "lowcpu", + "highmem" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/neighbors/bbknn/nextflow.config b/target/nextflow/neighbors/bbknn/nextflow.config new file mode 100644 index 00000000000..71c6d999ff1 --- /dev/null +++ b/target/nextflow/neighbors/bbknn/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'bbknn' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'BBKNN network generation\n' + author = 'Dries De Maeyer, Dries Schaumont' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/neighbors/bbknn/nextflow_params.yaml b/target/nextflow/neighbors/bbknn/nextflow_params.yaml new file mode 100644 index 00000000000..f7d8955af44 --- /dev/null +++ b/target/nextflow/neighbors/bbknn/nextflow_params.yaml @@ -0,0 +1,17 @@ +# Arguments +input: # please fill in - example: "path/to/file" +modality: "rna" +obsm_input: "X_pca" +obs_batch: "batch" +# output: "$id.$key.output.h5mu" +# output_compression: "gzip" +uns_output: "neighbors" +obsp_distances: "distances" +obsp_connectivities: "connectivities" +n_neighbors_within_batch: 3 +n_pcs: 50 +# n_trim: 123 + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/neighbors/bbknn/nextflow_schema.json b/target/nextflow/neighbors/bbknn/nextflow_schema.json new file mode 100644 index 00000000000..e8f17e69f40 --- /dev/null +++ b/target/nextflow/neighbors/bbknn/nextflow_schema.json @@ -0,0 +1,191 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "bbknn", +"description": "BBKNN network generation\n", +"type": "object", +"definitions": { + + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required. Input h5mu file", + "help_text": "Type: `file`, required. Input h5mu file" + + } + + + , + "modality": { + "type": + "string", + "description": "Type: `string`, default: `rna`. ", + "help_text": "Type: `string`, default: `rna`. " + , + "default": "rna" + } + + + , + "obsm_input": { + "type": + "string", + "description": "Type: `string`, default: `X_pca`. The dimensionality reduction in `", + "help_text": "Type: `string`, default: `X_pca`. The dimensionality reduction in `.obsm` to use for neighbour detection. Defaults to X_pca." + , + "default": "X_pca" + } + + + , + "obs_batch": { + "type": + "string", + "description": "Type: `string`, default: `batch`. ", + "help_text": "Type: `string`, default: `batch`. .obs column name discriminating between your batches." + , + "default": "batch" + } + + + , + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output ", + "help_text": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output .h5mu file." + , + "default": "$id.$key.output.h5mu" + } + + + , + "output_compression": { + "type": + "string", + "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", + "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", + "enum": ["gzip", "lzf"] + + + } + + + , + "uns_output": { + "type": + "string", + "description": "Type: `string`, default: `neighbors`. Mandatory ", + "help_text": "Type: `string`, default: `neighbors`. Mandatory .uns slot to store various neighbor output objects." + , + "default": "neighbors" + } + + + , + "obsp_distances": { + "type": + "string", + "description": "Type: `string`, default: `distances`. In which ", + "help_text": "Type: `string`, default: `distances`. In which .obsp slot to store the distance matrix between the resulting neighbors." + , + "default": "distances" + } + + + , + "obsp_connectivities": { + "type": + "string", + "description": "Type: `string`, default: `connectivities`. In which ", + "help_text": "Type: `string`, default: `connectivities`. In which .obsp slot to store the connectivities matrix between the resulting neighbors." + , + "default": "connectivities" + } + + + , + "n_neighbors_within_batch": { + "type": + "integer", + "description": "Type: `integer`, default: `3`. How many top neighbours to report for each batch; total number of neighbours in the initial k-nearest-neighbours computation will be this number times the number of batches", + "help_text": "Type: `integer`, default: `3`. How many top neighbours to report for each batch; total number of neighbours in the initial k-nearest-neighbours computation will be this number times the number of batches." + , + "default": "3" + } + + + , + "n_pcs": { + "type": + "integer", + "description": "Type: `integer`, default: `50`. How many dimensions (in case of PCA, principal components) to use in the analysis", + "help_text": "Type: `integer`, default: `50`. How many dimensions (in case of PCA, principal components) to use in the analysis." + , + "default": "50" + } + + + , + "n_trim": { + "type": + "integer", + "description": "Type: `integer`. Trim the neighbours of each cell to these many top connectivities", + "help_text": "Type: `integer`. Trim the neighbours of each cell to these many top connectivities. May help with population independence and improve the tidiness of clustering. The lower the value the more independent the individual populations, at the cost of more conserved batch effect. If `None` (default), sets the parameter value automatically to 10 times `neighbors_within_batch` times the number of batches. Set to 0 to skip." + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/neighbors/find_neighbors/.config.vsh.yaml b/target/nextflow/neighbors/find_neighbors/.config.vsh.yaml new file mode 100644 index 00000000000..72154030a36 --- /dev/null +++ b/target/nextflow/neighbors/find_neighbors/.config.vsh.yaml @@ -0,0 +1,309 @@ +functionality: + name: "find_neighbors" + namespace: "neighbors" + version: "0.12.4" + authors: + - name: "Dries De Maeyer" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "ddemaeyer@gmail.com" + github: "ddemaeyer" + linkedin: "dries-de-maeyer-b46a814" + organizations: + - name: "Janssen Pharmaceuticals" + href: "https://www.janssen.com" + role: "Principal Scientist" + - name: "Robrecht Cannoodt" + roles: + - "contributor" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input h5mu file" + info: null + example: + - "input.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obsm_input" + description: "Which .obsm slot to use as a starting PCA embedding." + info: null + default: + - "X_pca" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output h5mu file containing the found neighbors." + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--uns_output" + description: "Mandatory .uns slot to store various neighbor output objects." + info: null + default: + - "neighbors" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obsp_distances" + description: "In which .obsp slot to store the distance matrix between the resulting\ + \ neighbors." + info: null + default: + - "distances" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obsp_connectivities" + description: "In which .obsp slot to store the connectivities matrix between the\ + \ resulting neighbors." + info: null + default: + - "connectivities" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--metric" + description: "The distance metric to be used in the generation of the nearest\ + \ neighborhood network." + info: null + default: + - "euclidean" + required: false + choices: + - "cityblock" + - "cosine" + - "euclidean" + - "l1" + - "l2" + - "manhattan" + - "braycurtis" + - "canberra" + - "chebyshev" + - "correlation" + - "dice" + - "hamming" + - "jaccard" + - "kulsinski" + - "mahalanobis" + - "minkowski" + - "rogerstanimoto" + - "russellrao" + - "seuclidean" + - "sokalmichener" + - "sokalsneath" + - "sqeuclidean" + - "yule" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--num_neighbors" + description: "The size of local neighborhood (in terms of number of neighboring\ + \ data points) used for manifold approximation. Larger values result in more\ + \ global views of the manifold, while smaller values result in more local data\ + \ being preserved. In general values should be in the range 2 to 100. If knn\ + \ is True, number of nearest neighbors to be searched. If knn is False, a Gaussian\ + \ kernel width is set to the distance of the n_neighbors neighbor." + info: null + default: + - 15 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--seed" + description: "A random seed." + info: null + default: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Compute a neighborhood graph of observations [McInnes18].\n\nThe neighbor\ + \ search efficiency of this heavily relies on UMAP [McInnes18], which also provides\ + \ a method for estimating connectivities of data points - the connectivity of\ + \ the manifold (method=='umap'). If method=='gauss', connectivities are computed\ + \ according to [Coifman05], in the adaption of [Haghverdi16].\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "scanpy~=1.9.5" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "lowcpu" + - "midmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/neighbors/find_neighbors/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/neighbors/find_neighbors" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/neighbors/find_neighbors/find_neighbors" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/neighbors/find_neighbors/main.nf b/target/nextflow/neighbors/find_neighbors/main.nf new file mode 100644 index 00000000000..6fe2d2e84d2 --- /dev/null +++ b/target/nextflow/neighbors/find_neighbors/main.nf @@ -0,0 +1,2759 @@ +// find_neighbors 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Dries De Maeyer (maintainer) +// * Robrecht Cannoodt (contributor) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "find_neighbors", + "namespace" : "neighbors", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Dries De Maeyer", + "roles" : [ + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "ddemaeyer@gmail.com", + "github" : "ddemaeyer", + "linkedin" : "dries-de-maeyer-b46a814" + }, + "organizations" : [ + { + "name" : "Janssen Pharmaceuticals", + "href" : "https://www.janssen.com", + "role" : "Principal Scientist" + } + ] + } + }, + { + "name" : "Robrecht Cannoodt", + "roles" : [ + "contributor" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "robrecht@data-intuitive.com", + "github" : "rcannood", + "orcid" : "0000-0003-3641-729X", + "linkedin" : "robrechtcannoodt" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Science Engineer" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + } + ], + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "alternatives" : [ + "-i" + ], + "description" : "Input h5mu file", + "example" : [ + "input.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--modality", + "default" : [ + "rna" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--obsm_input", + "description" : "Which .obsm slot to use as a starting PCA embedding.", + "default" : [ + "X_pca" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "-o" + ], + "description" : "Output h5mu file containing the found neighbors.", + "example" : [ + "output.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_compression", + "description" : "The compression format to be used on the output h5mu object.", + "example" : [ + "gzip" + ], + "required" : false, + "choices" : [ + "gzip", + "lzf" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--uns_output", + "description" : "Mandatory .uns slot to store various neighbor output objects.", + "default" : [ + "neighbors" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--obsp_distances", + "description" : "In which .obsp slot to store the distance matrix between the resulting neighbors.", + "default" : [ + "distances" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--obsp_connectivities", + "description" : "In which .obsp slot to store the connectivities matrix between the resulting neighbors.", + "default" : [ + "connectivities" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--metric", + "description" : "The distance metric to be used in the generation of the nearest neighborhood network.", + "default" : [ + "euclidean" + ], + "required" : false, + "choices" : [ + "cityblock", + "cosine", + "euclidean", + "l1", + "l2", + "manhattan", + "braycurtis", + "canberra", + "chebyshev", + "correlation", + "dice", + "hamming", + "jaccard", + "kulsinski", + "mahalanobis", + "minkowski", + "rogerstanimoto", + "russellrao", + "seuclidean", + "sokalmichener", + "sokalsneath", + "sqeuclidean", + "yule" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--num_neighbors", + "description" : "The size of local neighborhood (in terms of number of neighboring data points) used for manifold approximation. Larger values result in more global views of the manifold, while smaller values result in more local data being preserved. In general values should be in the range 2 to 100. If knn is True, number of nearest neighbors to be searched. If knn is False, a Gaussian kernel width is set to the distance of the n_neighbors neighbor.", + "default" : [ + 15 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--seed", + "description" : "A random seed.", + "default" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/neighbors/find_neighbors/" + }, + { + "type" : "file", + "path" : "src/utils/setup_logger.py", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "description" : "Compute a neighborhood graph of observations [McInnes18].\n\nThe neighbor search efficiency of this heavily relies on UMAP [McInnes18], which also provides a method for estimating connectivities of data points - the connectivity of the manifold (method=='umap'). If method=='gauss', connectivities are computed according to [Coifman05], in the adaption of [Haghverdi16].\n", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/neighbors/find_neighbors/" + }, + { + "type" : "file", + "path" : "resources_test/pbmc_1k_protein_v3", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.10-slim", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "procps" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "mudata~=0.2.3", + "anndata~=0.9.1", + "scanpy~=1.9.5" + ], + "upgrade" : true + } + ], + "test_setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "viashpy==0.5.0" + ], + "upgrade" : true + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "lowcpu", + "midmem" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/neighbors/find_neighbors/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/neighbors/find_neighbors", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +import mudata as mu +import scanpy as sc +import sys + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'obsm_input': $( if [ ! -z ${VIASH_PAR_OBSM_INPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'uns_output': $( if [ ! -z ${VIASH_PAR_UNS_OUTPUT+x} ]; then echo "r'${VIASH_PAR_UNS_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'obsp_distances': $( if [ ! -z ${VIASH_PAR_OBSP_DISTANCES+x} ]; then echo "r'${VIASH_PAR_OBSP_DISTANCES//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'obsp_connectivities': $( if [ ! -z ${VIASH_PAR_OBSP_CONNECTIVITIES+x} ]; then echo "r'${VIASH_PAR_OBSP_CONNECTIVITIES//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'metric': $( if [ ! -z ${VIASH_PAR_METRIC+x} ]; then echo "r'${VIASH_PAR_METRIC//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'num_neighbors': $( if [ ! -z ${VIASH_PAR_NUM_NEIGHBORS+x} ]; then echo "int(r'${VIASH_PAR_NUM_NEIGHBORS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'seed': $( if [ ! -z ${VIASH_PAR_SEED+x} ]; then echo "int(r'${VIASH_PAR_SEED//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +## VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +logger.info("Reading input mudata") +mdata = mu.read_h5mu(par["input"]) + +mod = par["modality"] +logger.info("Computing a neighborhood graph on modality %s", mod) +adata = mdata.mod[mod] +neighbors = sc.Neighbors(adata) +neighbors.compute_neighbors( + n_neighbors=par["num_neighbors"], + use_rep=par["obsm_input"], + metric=par["metric"], + random_state=par["seed"], + method="umap" +) + +adata.uns[par["uns_output"]] = { + 'connectivities_key': par["obsp_connectivities"], + 'distances_key': par["obsp_distances"], + 'params': { + 'n_neighbors': neighbors.n_neighbors, + 'method': "umap", + 'random_state': par["seed"], + 'metric': par["metric"], + 'use_rep': par["obsm_input"] + } +} + +adata.obsp[par["obsp_distances"]] = neighbors.distances +adata.obsp[par["obsp_connectivities"]] = neighbors.connectivities + +logger.info("Writing to %s", par["output"]) +mdata.write_h5mu(filename=par["output"], compression=par["output_compression"]) +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/neighbors_find_neighbors", + "tag" : "0.12.0" + }, + "label" : [ + "lowcpu", + "midmem" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/neighbors/find_neighbors/nextflow.config b/target/nextflow/neighbors/find_neighbors/nextflow.config new file mode 100644 index 00000000000..cc69889dff8 --- /dev/null +++ b/target/nextflow/neighbors/find_neighbors/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'find_neighbors' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Compute a neighborhood graph of observations [McInnes18].\n\nThe neighbor search efficiency of this heavily relies on UMAP [McInnes18], which also provides a method for estimating connectivities of data points - the connectivity of the manifold (method==\'umap\'). If method==\'gauss\', connectivities are computed according to [Coifman05], in the adaption of [Haghverdi16].\n' + author = 'Dries De Maeyer, Robrecht Cannoodt' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/neighbors/find_neighbors/nextflow_params.yaml b/target/nextflow/neighbors/find_neighbors/nextflow_params.yaml new file mode 100644 index 00000000000..a709dbf4760 --- /dev/null +++ b/target/nextflow/neighbors/find_neighbors/nextflow_params.yaml @@ -0,0 +1,16 @@ +# Arguments +input: # please fill in - example: "input.h5mu" +modality: "rna" +obsm_input: "X_pca" +# output: "$id.$key.output.h5mu" +# output_compression: "gzip" +uns_output: "neighbors" +obsp_distances: "distances" +obsp_connectivities: "connectivities" +metric: "euclidean" +num_neighbors: 15 +seed: 0 + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/neighbors/find_neighbors/nextflow_schema.json b/target/nextflow/neighbors/find_neighbors/nextflow_schema.json new file mode 100644 index 00000000000..abc7b48efa6 --- /dev/null +++ b/target/nextflow/neighbors/find_neighbors/nextflow_schema.json @@ -0,0 +1,183 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "find_neighbors", +"description": "Compute a neighborhood graph of observations [McInnes18].\n\nThe neighbor search efficiency of this heavily relies on UMAP [McInnes18], which also provides a method for estimating connectivities of data points - the connectivity of the manifold (method==\u0027umap\u0027). If method==\u0027gauss\u0027, connectivities are computed according to [Coifman05], in the adaption of [Haghverdi16].\n", +"type": "object", +"definitions": { + + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required, example: `input.h5mu`. Input h5mu file", + "help_text": "Type: `file`, required, example: `input.h5mu`. Input h5mu file" + + } + + + , + "modality": { + "type": + "string", + "description": "Type: `string`, default: `rna`. ", + "help_text": "Type: `string`, default: `rna`. " + , + "default": "rna" + } + + + , + "obsm_input": { + "type": + "string", + "description": "Type: `string`, default: `X_pca`. Which ", + "help_text": "Type: `string`, default: `X_pca`. Which .obsm slot to use as a starting PCA embedding." + , + "default": "X_pca" + } + + + , + "output": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file containing the found neighbors", + "help_text": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file containing the found neighbors." + , + "default": "$id.$key.output.h5mu" + } + + + , + "output_compression": { + "type": + "string", + "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", + "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", + "enum": ["gzip", "lzf"] + + + } + + + , + "uns_output": { + "type": + "string", + "description": "Type: `string`, default: `neighbors`. Mandatory ", + "help_text": "Type: `string`, default: `neighbors`. Mandatory .uns slot to store various neighbor output objects." + , + "default": "neighbors" + } + + + , + "obsp_distances": { + "type": + "string", + "description": "Type: `string`, default: `distances`. In which ", + "help_text": "Type: `string`, default: `distances`. In which .obsp slot to store the distance matrix between the resulting neighbors." + , + "default": "distances" + } + + + , + "obsp_connectivities": { + "type": + "string", + "description": "Type: `string`, default: `connectivities`. In which ", + "help_text": "Type: `string`, default: `connectivities`. In which .obsp slot to store the connectivities matrix between the resulting neighbors." + , + "default": "connectivities" + } + + + , + "metric": { + "type": + "string", + "description": "Type: `string`, default: `euclidean`, choices: ``cityblock`, `cosine`, `euclidean`, `l1`, `l2`, `manhattan`, `braycurtis`, `canberra`, `chebyshev`, `correlation`, `dice`, `hamming`, `jaccard`, `kulsinski`, `mahalanobis`, `minkowski`, `rogerstanimoto`, `russellrao`, `seuclidean`, `sokalmichener`, `sokalsneath`, `sqeuclidean`, `yule``. The distance metric to be used in the generation of the nearest neighborhood network", + "help_text": "Type: `string`, default: `euclidean`, choices: ``cityblock`, `cosine`, `euclidean`, `l1`, `l2`, `manhattan`, `braycurtis`, `canberra`, `chebyshev`, `correlation`, `dice`, `hamming`, `jaccard`, `kulsinski`, `mahalanobis`, `minkowski`, `rogerstanimoto`, `russellrao`, `seuclidean`, `sokalmichener`, `sokalsneath`, `sqeuclidean`, `yule``. The distance metric to be used in the generation of the nearest neighborhood network.", + "enum": ["cityblock", "cosine", "euclidean", "l1", "l2", "manhattan", "braycurtis", "canberra", "chebyshev", "correlation", "dice", "hamming", "jaccard", "kulsinski", "mahalanobis", "minkowski", "rogerstanimoto", "russellrao", "seuclidean", "sokalmichener", "sokalsneath", "sqeuclidean", "yule"] + + , + "default": "euclidean" + } + + + , + "num_neighbors": { + "type": + "integer", + "description": "Type: `integer`, default: `15`. The size of local neighborhood (in terms of number of neighboring data points) used for manifold approximation", + "help_text": "Type: `integer`, default: `15`. The size of local neighborhood (in terms of number of neighboring data points) used for manifold approximation. Larger values result in more global views of the manifold, while smaller values result in more local data being preserved. In general values should be in the range 2 to 100. If knn is True, number of nearest neighbors to be searched. If knn is False, a Gaussian kernel width is set to the distance of the n_neighbors neighbor." + , + "default": "15" + } + + + , + "seed": { + "type": + "integer", + "description": "Type: `integer`, default: `0`. A random seed", + "help_text": "Type: `integer`, default: `0`. A random seed." + , + "default": "0" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/neighbors/find_neighbors/setup_logger.py b/target/nextflow/neighbors/find_neighbors/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/nextflow/neighbors/find_neighbors/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/nextflow/process_10xh5/filter_10xh5/.config.vsh.yaml b/target/nextflow/process_10xh5/filter_10xh5/.config.vsh.yaml new file mode 100644 index 00000000000..812f4ffb213 --- /dev/null +++ b/target/nextflow/process_10xh5/filter_10xh5/.config.vsh.yaml @@ -0,0 +1,195 @@ +functionality: + name: "filter_10xh5" + namespace: "process_10xh5" + version: "0.12.4" + authors: + - name: "Robrecht Cannoodt" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + arguments: + - type: "file" + name: "--input" + description: "An h5 file from the 10x genomics website." + info: null + example: + - "pbmc_1k_protein_v3_raw_feature_bc_matrix.h5" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + description: "Output h5 file." + info: null + example: + - "pbmc_1k_protein_v3_raw_feature_bc_matrix_filtered.h5" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--min_library_size" + description: "Minimum library size." + info: null + default: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--min_cells_per_gene" + description: "Minimum number of cells per gene." + info: null + default: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--keep_feature_types" + description: "Specify which feature types will never be filtered out" + info: null + example: + - "Antibody Capture" + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--verbose" + description: "Increase verbosity" + info: null + direction: "input" + dest: "par" + resources: + - type: "r_script" + path: "script.R" + is_executable: true + description: "Filter a 10x h5 dataset.\n" + usage: "filter_10xh5 \\\n --input pbmc_1k_protein_v3_raw_feature_bc_matrix.h5 \\\ + \n --output pbmc_1k_protein_v3_raw_feature_bc_matrix_filtered.h5 \\\n --min_library_size\ + \ 1000 --min_cells_per_gene 300\n" + test_resources: + - type: "r_script" + path: "run_test.R" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "eddelbuettel/r2u:22.04" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "libhdf5-dev python3-pip python3-dev" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "scanpy~=1.9.5" + upgrade: true + - type: "r" + cran: + - "testthat" + - "anndata" + - "hdf5r" + bioc_force_install: false + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "singlecpu" + - "lowmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/process_10xh5/filter_10xh5/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/process_10xh5/filter_10xh5" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/process_10xh5/filter_10xh5/filter_10xh5" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/process_10xh5/filter_10xh5/main.nf b/target/nextflow/process_10xh5/filter_10xh5/main.nf new file mode 100644 index 00000000000..593bc46b812 --- /dev/null +++ b/target/nextflow/process_10xh5/filter_10xh5/main.nf @@ -0,0 +1,2642 @@ +// filter_10xh5 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Robrecht Cannoodt (maintainer) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "filter_10xh5", + "namespace" : "process_10xh5", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Robrecht Cannoodt", + "roles" : [ + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "robrecht@data-intuitive.com", + "github" : "rcannood", + "orcid" : "0000-0003-3641-729X", + "linkedin" : "robrechtcannoodt" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Science Engineer" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + } + ], + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "description" : "An h5 file from the 10x genomics website.", + "example" : [ + "pbmc_1k_protein_v3_raw_feature_bc_matrix.h5" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--output", + "description" : "Output h5 file.", + "example" : [ + "pbmc_1k_protein_v3_raw_feature_bc_matrix_filtered.h5" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--min_library_size", + "description" : "Minimum library size.", + "default" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--min_cells_per_gene", + "description" : "Minimum number of cells per gene.", + "default" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--keep_feature_types", + "description" : "Specify which feature types will never be filtered out", + "example" : [ + "Antibody Capture" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "boolean_true", + "name" : "--verbose", + "description" : "Increase verbosity", + "direction" : "input", + "dest" : "par" + } + ], + "resources" : [ + { + "type" : "r_script", + "path" : "script.R", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/process_10xh5/filter_10xh5/" + } + ], + "description" : "Filter a 10x h5 dataset.\n", + "usage" : "filter_10xh5 \\\\\n --input pbmc_1k_protein_v3_raw_feature_bc_matrix.h5 \\\\\n --output pbmc_1k_protein_v3_raw_feature_bc_matrix_filtered.h5 \\\\\n --min_library_size 1000 --min_cells_per_gene 300\n", + "test_resources" : [ + { + "type" : "r_script", + "path" : "run_test.R", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/process_10xh5/filter_10xh5/" + }, + { + "type" : "file", + "path" : "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "eddelbuettel/r2u:22.04", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "libhdf5-dev python3-pip python3-dev" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "mudata~=0.2.3", + "anndata~=0.9.1", + "scanpy~=1.9.5" + ], + "upgrade" : true + }, + { + "type" : "r", + "cran" : [ + "testthat", + "anndata", + "hdf5r" + ], + "bioc_force_install" : false + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "singlecpu", + "lowmem" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/process_10xh5/filter_10xh5/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/process_10xh5/filter_10xh5", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +## VIASH START +# The following code has been auto-generated by Viash. +# treat warnings as errors +.viash_orig_warn <- options(warn = 2) + +par <- list( + "input" = $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_INPUT" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), + "output" = $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_OUTPUT" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), + "min_library_size" = $( if [ ! -z ${VIASH_PAR_MIN_LIBRARY_SIZE+x} ]; then echo -n "as.integer('"; echo -n "$VIASH_PAR_MIN_LIBRARY_SIZE" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), + "min_cells_per_gene" = $( if [ ! -z ${VIASH_PAR_MIN_CELLS_PER_GENE+x} ]; then echo -n "as.integer('"; echo -n "$VIASH_PAR_MIN_CELLS_PER_GENE" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), + "keep_feature_types" = $( if [ ! -z ${VIASH_PAR_KEEP_FEATURE_TYPES+x} ]; then echo -n "strsplit('"; echo -n "$VIASH_PAR_KEEP_FEATURE_TYPES" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "', split = ':')[[1]]"; else echo NULL; fi ), + "verbose" = $( if [ ! -z ${VIASH_PAR_VERBOSE+x} ]; then echo -n "as.logical(toupper('"; echo -n "$VIASH_PAR_VERBOSE" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'))"; else echo NULL; fi ) +) +meta <- list( + "functionality_name" = $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo -n "'"; echo -n "$VIASH_META_FUNCTIONALITY_NAME" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), + "resources_dir" = $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo -n "'"; echo -n "$VIASH_META_RESOURCES_DIR" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), + "executable" = $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo -n "'"; echo -n "$VIASH_META_EXECUTABLE" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), + "config" = $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo -n "'"; echo -n "$VIASH_META_CONFIG" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), + "temp_dir" = $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo -n "'"; echo -n "$VIASH_META_TEMP_DIR" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), + "cpus" = $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo -n "as.integer('"; echo -n "$VIASH_META_CPUS" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_b" = $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_B" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_kb" = $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_KB" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_mb" = $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_MB" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_gb" = $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_GB" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_tb" = $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_TB" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_pb" = $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_PB" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ) +) + + +# restore original warn setting +options(.viash_orig_warn) +rm(.viash_orig_warn) + +## VIASH END + +if (par\\$verbose) cat("Loading dependencies\\\\n") +requireNamespace("hdf5r", quietly = TRUE) + +if (par\\$verbose) cat("Opening h5 file\\\\n") +h5 <- hdf5r::H5File\\$new(par\\$input, mode = "r") + +if (par\\$verbose) cat("Reading data in memory\\\\n") +features__all_tag_keys <- h5[["matrix/features/_all_tag_keys"]][] + +features <- data.frame( + feature_type = h5[["matrix/features/feature_type"]][], + genome = h5[["matrix/features/genome"]][], + id = h5[["matrix/features/id"]][], + name = h5[["matrix/features/name"]][] +) + +mat <- Matrix::sparseMatrix( + i = h5[["matrix/indices"]][], + p = h5[["matrix/indptr"]][], + x = h5[["matrix/data"]][], + dims = h5[["matrix/shape"]][], + index1 = FALSE, + dimnames = list( + features\\$id, + h5[["matrix/barcodes"]][] + ) +) + +if (par\\$verbose) cat("Filtering out cells with library size < ", par\\$min_library_size, "\\\\n", sep = "") +library_size <- Matrix::colSums(mat) +mat2 <- mat[, library_size >= par\\$min_library_size, drop = FALSE] + +if (par\\$verbose) cat("Filtering genes with num cells < ", par\\$min_cells_per_gene, "\\\\n", sep = "") +num_cells <- Matrix::rowSums(mat2 > 0) +mat3 <- mat2[num_cells >= par\\$min_cells_per_gene | features\\$feature_type %in% par\\$keep_feature_types, , drop = FALSE] +features2 <- features[match(rownames(mat3), features\\$id), , drop = FALSE] + +# helper fun +set_with_type <- function(path, value) { + orig_dtype <- h5[[path]]\\$get_type() + orig_chunk <- h5[[path]]\\$chunk_dims + if (is.na(orig_chunk)) orig_chunk <- "auto" + h5new\\$create_dataset(path, value, dtype = orig_dtype, chunk_dims = orig_chunk) +} + +# create new file +if (par\\$verbose) cat("Saving h5 file at '", par\\$output, "'\\\\n", sep = "") +h5new <- hdf5r::H5File\\$new(par\\$output, mode = "w") +zz <- h5new\\$create_group("matrix") +zz <- h5new\\$create_group("matrix/features") + +set_with_type("matrix/features/feature_type", features2\\$feature_type) +set_with_type("matrix/features/genome", features2\\$genome) +set_with_type("matrix/features/id", features2\\$id) +set_with_type("matrix/features/name", features2\\$name) +set_with_type("matrix/features/_all_tag_keys", features__all_tag_keys) +set_with_type("matrix/indices", mat3@i) +set_with_type("matrix/indptr", mat3@p) +set_with_type("matrix/data", as.integer(mat3@x)) +set_with_type("matrix/shape", dim(mat3)) +set_with_type("matrix/barcodes", colnames(mat3)) + +for (attname in hdf5r::h5attr_names(h5)) { + h5new\\$create_attr(attname, hdf5r::h5attr(h5, attname)) +} +h5new\\$close_all() +h5\\$close_all() +VIASHMAIN +Rscript "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/process_10xh5_filter_10xh5", + "tag" : "0.12.0" + }, + "label" : [ + "singlecpu", + "lowmem" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/process_10xh5/filter_10xh5/nextflow.config b/target/nextflow/process_10xh5/filter_10xh5/nextflow.config new file mode 100644 index 00000000000..08ba7c0cadd --- /dev/null +++ b/target/nextflow/process_10xh5/filter_10xh5/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'filter_10xh5' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Filter a 10x h5 dataset.\n' + author = 'Robrecht Cannoodt' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/process_10xh5/filter_10xh5/nextflow_params.yaml b/target/nextflow/process_10xh5/filter_10xh5/nextflow_params.yaml new file mode 100644 index 00000000000..62f6a24296e --- /dev/null +++ b/target/nextflow/process_10xh5/filter_10xh5/nextflow_params.yaml @@ -0,0 +1,11 @@ +# Arguments +input: # please fill in - example: "pbmc_1k_protein_v3_raw_feature_bc_matrix.h5" +# output: "$id.$key.output.h5" +min_library_size: 0 +min_cells_per_gene: 0 +# keep_feature_types: ["Antibody Capture"] +verbose: false + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/process_10xh5/filter_10xh5/nextflow_schema.json b/target/nextflow/process_10xh5/filter_10xh5/nextflow_schema.json new file mode 100644 index 00000000000..3bce03774d3 --- /dev/null +++ b/target/nextflow/process_10xh5/filter_10xh5/nextflow_schema.json @@ -0,0 +1,124 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "filter_10xh5", +"description": "Filter a 10x h5 dataset.\n", +"type": "object", +"definitions": { + + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required, example: `pbmc_1k_protein_v3_raw_feature_bc_matrix.h5`. An h5 file from the 10x genomics website", + "help_text": "Type: `file`, required, example: `pbmc_1k_protein_v3_raw_feature_bc_matrix.h5`. An h5 file from the 10x genomics website." + + } + + + , + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.h5`, example: `pbmc_1k_protein_v3_raw_feature_bc_matrix_filtered.h5`. Output h5 file", + "help_text": "Type: `file`, required, default: `$id.$key.output.h5`, example: `pbmc_1k_protein_v3_raw_feature_bc_matrix_filtered.h5`. Output h5 file." + , + "default": "$id.$key.output.h5" + } + + + , + "min_library_size": { + "type": + "integer", + "description": "Type: `integer`, default: `0`. Minimum library size", + "help_text": "Type: `integer`, default: `0`. Minimum library size." + , + "default": "0" + } + + + , + "min_cells_per_gene": { + "type": + "integer", + "description": "Type: `integer`, default: `0`. Minimum number of cells per gene", + "help_text": "Type: `integer`, default: `0`. Minimum number of cells per gene." + , + "default": "0" + } + + + , + "keep_feature_types": { + "type": + "string", + "description": "Type: List of `string`, example: `Antibody Capture`, multiple_sep: `\":\"`. Specify which feature types will never be filtered out", + "help_text": "Type: List of `string`, example: `Antibody Capture`, multiple_sep: `\":\"`. Specify which feature types will never be filtered out" + + } + + + , + "verbose": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Increase verbosity", + "help_text": "Type: `boolean_true`, default: `false`. Increase verbosity" + , + "default": "False" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/qc/calculate_qc_metrics/.config.vsh.yaml b/target/nextflow/qc/calculate_qc_metrics/.config.vsh.yaml new file mode 100644 index 00000000000..8cb769d646f --- /dev/null +++ b/target/nextflow/qc/calculate_qc_metrics/.config.vsh.yaml @@ -0,0 +1,235 @@ +functionality: + name: "calculate_qc_metrics" + namespace: "qc" + version: "0.12.4" + authors: + - name: "Dries Schaumont" + roles: + - "author" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + argument_groups: + - name: "Inputs" + arguments: + - type: "file" + name: "--input" + description: "Input h5mu file" + info: null + example: + - "input.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--layer" + info: null + example: + - "raw_counts" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--var_qc_metrics" + description: "Keys to select a boolean (containing only True or False) column\ + \ from .var.\nFor each cell, calculate the proportion of total values for\ + \ genes which are labeled 'True', \ncompared to the total sum of the values\ + \ for all genes.\n" + info: null + example: + - "ercc,highly_variable,mitochondrial" + required: false + direction: "input" + multiple: true + multiple_sep: "," + dest: "par" + - type: "boolean" + name: "--var_qc_metrics_fill_na_value" + description: "Fill any 'NA' values found in the columns specified with --var_qc_metrics\ + \ to 'True' or 'False'.\nas False.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--top_n_vars" + description: "Number of top vars to be used to calculate cumulative proportions.\n\ + If not specified, proportions are not calculated. `--top_n_vars 20,50` finds\n\ + cumulative proportion to the 20th and 50th most expressed vars.\n" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: "," + dest: "par" + - name: "Outputs" + arguments: + - type: "file" + name: "--output" + description: "Output h5mu file." + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Add basic quality control metrics to an .h5mu file.\n\nThe metrics\ + \ are comparable to what scanpy.pp.calculate_qc_metrics output,\nalthough they\ + \ have slightly different names:\n\nVar metrics (name in this component -> name\ + \ in scanpy):\n - pct_dropout -> pct_dropout_by_{expr_type}\n - num_nonzero_obs\ + \ -> n_cells_by_{expr_type}\n - obs_mean -> mean_{expr_type}\n - total_counts\ + \ -> total_{expr_type}\n\nObs metrics:\n - num_nonzero_vars -> n_genes_by_{expr_type}\n\ + \ - pct_{var_qc_metrics} -> pct_{expr_type}_{qc_var}\n - total_counts_{var_qc_metrics}\ + \ -> total_{expr_type}_{qc_var}\n - pct_of_counts_in_top_{top_n_vars}_vars ->\ + \ pct_{expr_type}_in_top_{n}_{var_type}\n - total_counts -> total_{expr_type}\n\ + \ \n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.9-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "scikit-learn~=1.2.0" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + - "scanpy~=1.9.5" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "singlecpu" + - "midmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/qc/calculate_qc_metrics/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/qc/calculate_qc_metrics" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/qc/calculate_qc_metrics/calculate_qc_metrics" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/qc/calculate_qc_metrics/main.nf b/target/nextflow/qc/calculate_qc_metrics/main.nf new file mode 100644 index 00000000000..b5074e7c89b --- /dev/null +++ b/target/nextflow/qc/calculate_qc_metrics/main.nf @@ -0,0 +1,2739 @@ +// calculate_qc_metrics 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Dries Schaumont (author) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "calculate_qc_metrics", + "namespace" : "qc", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Dries Schaumont", + "roles" : [ + "author" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "dries@data-intuitive.com", + "github" : "DriesSchaumont", + "orcid" : "0000-0002-4389-0440", + "linkedin" : "dries-schaumont" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Scientist" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Inputs", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "description" : "Input h5mu file", + "example" : [ + "input.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--modality", + "default" : [ + "rna" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--layer", + "example" : [ + "raw_counts" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--var_qc_metrics", + "description" : "Keys to select a boolean (containing only True or False) column from .var.\nFor each cell, calculate the proportion of total values for genes which are labeled 'True', \ncompared to the total sum of the values for all genes.\n", + "example" : [ + "ercc,highly_variable,mitochondrial" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ",", + "dest" : "par" + }, + { + "type" : "boolean", + "name" : "--var_qc_metrics_fill_na_value", + "description" : "Fill any 'NA' values found in the columns specified with --var_qc_metrics to 'True' or 'False'.\nas False.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--top_n_vars", + "description" : "Number of top vars to be used to calculate cumulative proportions.\nIf not specified, proportions are not calculated. `--top_n_vars 20,50` finds\ncumulative proportion to the 20th and 50th most expressed vars.\n", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ",", + "dest" : "par" + } + ] + }, + { + "name" : "Outputs", + "arguments" : [ + { + "type" : "file", + "name" : "--output", + "description" : "Output h5mu file.", + "example" : [ + "output.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_compression", + "description" : "The compression format to be used on the output h5mu object.", + "example" : [ + "gzip" + ], + "required" : false, + "choices" : [ + "gzip", + "lzf" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/qc/calculate_qc_metrics/" + }, + { + "type" : "file", + "path" : "src/utils/setup_logger.py", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "description" : "Add basic quality control metrics to an .h5mu file.\n\nThe metrics are comparable to what scanpy.pp.calculate_qc_metrics output,\nalthough they have slightly different names:\n\nVar metrics (name in this component -> name in scanpy):\n - pct_dropout -> pct_dropout_by_{expr_type}\n - num_nonzero_obs -> n_cells_by_{expr_type}\n - obs_mean -> mean_{expr_type}\n - total_counts -> total_{expr_type}\n\nObs metrics:\n - num_nonzero_vars -> n_genes_by_{expr_type}\n - pct_{var_qc_metrics} -> pct_{expr_type}_{qc_var}\n - total_counts_{var_qc_metrics} -> total_{expr_type}_{qc_var}\n - pct_of_counts_in_top_{top_n_vars}_vars -> pct_{expr_type}_in_top_{n}_{var_type}\n - total_counts -> total_{expr_type}\n \n", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/qc/calculate_qc_metrics/" + }, + { + "type" : "file", + "path" : "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.9-slim", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "procps" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "mudata~=0.2.3", + "anndata~=0.9.1", + "scikit-learn~=1.2.0" + ], + "upgrade" : true + } + ], + "test_setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "viashpy==0.5.0", + "scanpy~=1.9.5" + ], + "upgrade" : true + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "singlecpu", + "midmem" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/qc/calculate_qc_metrics/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/qc/calculate_qc_metrics", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +import sys +from mudata import read_h5mu +from scipy.sparse import issparse, isspmatrix_coo, csr_matrix +from sklearn.utils.sparsefuncs import mean_variance_axis +import numpy as np + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'layer': $( if [ ! -z ${VIASH_PAR_LAYER+x} ]; then echo "r'${VIASH_PAR_LAYER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'var_qc_metrics': $( if [ ! -z ${VIASH_PAR_VAR_QC_METRICS+x} ]; then echo "r'${VIASH_PAR_VAR_QC_METRICS//\\'/\\'\\"\\'\\"r\\'}'.split(',')"; else echo None; fi ), + 'var_qc_metrics_fill_na_value': $( if [ ! -z ${VIASH_PAR_VAR_QC_METRICS_FILL_NA_VALUE+x} ]; then echo "r'${VIASH_PAR_VAR_QC_METRICS_FILL_NA_VALUE//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), + 'top_n_vars': $( if [ ! -z ${VIASH_PAR_TOP_N_VARS+x} ]; then echo "list(map(int, r'${VIASH_PAR_TOP_N_VARS//\\'/\\'\\"\\'\\"r\\'}'.split(',')))"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +## VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +def main(): + input_data = read_h5mu(par["input"]) + modality_data = input_data.mod[par["modality"]] + var = modality_data.var + layer = modality_data.X if not par['layer'] else modality_data.layers[par['layer']] + if not issparse(layer): + raise NotImplementedError("Expected layer to be in sparse format.") + if isspmatrix_coo(layer): + layer = csr_matrix(layer) + layer.eliminate_zeros() + + # var statistics + num_nonzero_obs = layer.getnnz(axis=0) + obs_mean, _ = mean_variance_axis(layer, axis=0) + pct_dropout = (1 - num_nonzero_obs / layer.shape[0]) * 100 + total_counts_obs = np.ravel(layer.sum(axis=0)) + + # obs statistics + num_nonzero_vars = layer.getnnz(axis=1) + total_counts_var = np.ravel(layer.sum(axis=1)) + + top_metrics = {} + if par["top_n_vars"]: + par["top_n_vars"] = sorted(par["top_n_vars"]) + distributions = get_top_from_csr_matrix(layer, par["top_n_vars"]) + top_metrics = {distribution_size: distribution * 100 + for distribution_size, distribution + in zip(par["top_n_vars"], distributions.T)} + + total_expr_qc = {} + pct_expr_qc = {} + if par["var_qc_metrics"]: + for qc_metric in par["var_qc_metrics"]: + if not qc_metric in var: + raise ValueError(f"Value for --var_qc_metrics, {qc_metric} " + f"not found in .var for modality {par['modality']}") + qc_column = var[qc_metric] + if qc_column.isna().any(): + if par["var_qc_metrics_fill_na_value"] is None: + raise ValueError(f"The .var column '{qc_metric}', selected by '--var_qc_metrics', contains NA values. " + "It is ambiguous whether or not to include these values in the static calulation. " + "You can explicitly map the NA values to 'False' or 'True using '--var_qc_metrics_fill_na_value'") + else: + qc_column = qc_column.fillna(par['var_qc_metrics_fill_na_value'], inplace=False) + qc_column = qc_column.values + if set(np.unique(qc_column)) - {True, False}: + raise ValueError(f"Column {qc_metric} in .var for modality {par['modality']} " + f"must only contain boolean values") + + total_expr_qc[qc_metric] = np.ravel(layer[:, qc_column].sum(axis=1)) + pct_expr_qc[qc_metric] = total_expr_qc[qc_metric] / total_counts_var * 100 + + # Write all of the calculated statistics + modality_data.var = modality_data.var.assign( + **{"pct_dropout": pct_dropout, + "num_nonzero_obs": num_nonzero_obs, + "obs_mean": obs_mean, + "total_counts": total_counts_obs}) + + modality_data.obs = modality_data.obs.assign( + **({"num_nonzero_vars": num_nonzero_vars, + "total_counts": total_counts_var} | \\\\ + {f"pct_{qc_metric}": col for qc_metric, col in pct_expr_qc.items()} | \\\\ + {f"total_counts_{qc_metrix}": col for qc_metrix, col in total_expr_qc.items()}) | \\\\ + {f"pct_of_counts_in_top_{n_top}_vars": col for n_top, col in top_metrics.items()}) + + input_data.write(par["output"], compression=par["output_compression"]) + +def get_top_from_csr_matrix(matrix, top_n_genes): + # csr matrices stores a 3D matrix in a format such that data for individual cells + # are stored in 1 array. Another array (indptr) here stores the ranges of indices + # to select from the data-array (.e.g. data[indptr[0]:indptr[1]] for row 0) for each row. + # Another array 'indices' maps each element of data to a column + # (data and indices arrays have the same length) + top_n_genes = np.array(top_n_genes).astype(np.int64) + assert np.all(top_n_genes[:-1] <= top_n_genes[1:]), "top_n_genes must be sorted" + row_indices, data = matrix.indptr, matrix.data + number_of_rows, max_genes_to_parse = row_indices.size-1, top_n_genes[-1] + top_data = np.zeros((number_of_rows, max_genes_to_parse), + dtype=data.dtype) + # Loop over each row to create a dense matrix without the 0 counts, + # but not for the whole matrix, only store the genes up until + # the largest number of top n genes. + for row_number in range(number_of_rows): + row_start_index, row_end_index = row_indices[row_number], row_indices[row_number+1] + row_data = data[row_start_index:row_end_index] # all non-zero counts for an row + try: + # There are less genes with counts in the row than the + # maximum number of genes we would like to select + # all these genes are in the top genes, just store them + top_data[row_number, :row_end_index-row_start_index] = row_data + except ValueError: + # Store the counts for the top genes + top_data[row_number, :] = np.partition(row_data, -max_genes_to_parse)[-max_genes_to_parse:] + + # Partition works from smallest to largest, but we want largest + # so do smallest to largest first (but with reversed indices) + top_data = np.partition(top_data, max_genes_to_parse - top_n_genes) + # And then switch the order around + top_data = np.flip(top_data, axis=1) + + cumulative = top_data.cumsum(axis=1, dtype=np.float64)[:,top_n_genes-1] + return cumulative / np.array(matrix.sum(axis=1)) + +if __name__ == "__main__": + main() +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/qc_calculate_qc_metrics", + "tag" : "0.12.0" + }, + "label" : [ + "singlecpu", + "midmem" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/qc/calculate_qc_metrics/nextflow.config b/target/nextflow/qc/calculate_qc_metrics/nextflow.config new file mode 100644 index 00000000000..7a79e470d77 --- /dev/null +++ b/target/nextflow/qc/calculate_qc_metrics/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'calculate_qc_metrics' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Add basic quality control metrics to an .h5mu file.\n\nThe metrics are comparable to what scanpy.pp.calculate_qc_metrics output,\nalthough they have slightly different names:\n\nVar metrics (name in this component -> name in scanpy):\n - pct_dropout -> pct_dropout_by_{expr_type}\n - num_nonzero_obs -> n_cells_by_{expr_type}\n - obs_mean -> mean_{expr_type}\n - total_counts -> total_{expr_type}\n\nObs metrics:\n - num_nonzero_vars -> n_genes_by_{expr_type}\n - pct_{var_qc_metrics} -> pct_{expr_type}_{qc_var}\n - total_counts_{var_qc_metrics} -> total_{expr_type}_{qc_var}\n - pct_of_counts_in_top_{top_n_vars}_vars -> pct_{expr_type}_in_top_{n}_{var_type}\n - total_counts -> total_{expr_type}\n \n' + author = 'Dries Schaumont' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/qc/calculate_qc_metrics/nextflow_params.yaml b/target/nextflow/qc/calculate_qc_metrics/nextflow_params.yaml new file mode 100644 index 00000000000..90ae6c7000e --- /dev/null +++ b/target/nextflow/qc/calculate_qc_metrics/nextflow_params.yaml @@ -0,0 +1,15 @@ +# Inputs +input: # please fill in - example: "input.h5mu" +modality: "rna" +# layer: "raw_counts" +# var_qc_metrics: ["ercc", "highly_variable", "mitochondrial"] +# var_qc_metrics_fill_na_value: true +# top_n_vars: [123] + +# Outputs +# output: "$id.$key.output.h5mu" +# output_compression: "gzip" + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/qc/calculate_qc_metrics/nextflow_schema.json b/target/nextflow/qc/calculate_qc_metrics/nextflow_schema.json new file mode 100644 index 00000000000..f034d5ca2ca --- /dev/null +++ b/target/nextflow/qc/calculate_qc_metrics/nextflow_schema.json @@ -0,0 +1,158 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "calculate_qc_metrics", +"description": "Add basic quality control metrics to an .h5mu file.\n\nThe metrics are comparable to what scanpy.pp.calculate_qc_metrics output,\nalthough they have slightly different names:\n\nVar metrics (name in this component -\u003e name in scanpy):\n - pct_dropout -\u003e pct_dropout_by_{expr_type}\n - num_nonzero_obs -\u003e n_cells_by_{expr_type}\n - obs_mean -\u003e mean_{expr_type}\n - total_counts -\u003e total_{expr_type}\n\nObs metrics:\n - num_nonzero_vars -\u003e n_genes_by_{expr_type}\n - pct_{var_qc_metrics} -\u003e pct_{expr_type}_{qc_var}\n - total_counts_{var_qc_metrics} -\u003e total_{expr_type}_{qc_var}\n - pct_of_counts_in_top_{top_n_vars}_vars -\u003e pct_{expr_type}_in_top_{n}_{var_type}\n - total_counts -\u003e total_{expr_type}\n \n", +"type": "object", +"definitions": { + + + + "inputs" : { + "title": "Inputs", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required, example: `input.h5mu`. Input h5mu file", + "help_text": "Type: `file`, required, example: `input.h5mu`. Input h5mu file" + + } + + + , + "modality": { + "type": + "string", + "description": "Type: `string`, default: `rna`. ", + "help_text": "Type: `string`, default: `rna`. " + , + "default": "rna" + } + + + , + "layer": { + "type": + "string", + "description": "Type: `string`, example: `raw_counts`. ", + "help_text": "Type: `string`, example: `raw_counts`. " + + } + + + , + "var_qc_metrics": { + "type": + "string", + "description": "Type: List of `string`, example: `ercc,highly_variable,mitochondrial`, multiple_sep: `\",\"`. Keys to select a boolean (containing only True or False) column from ", + "help_text": "Type: List of `string`, example: `ercc,highly_variable,mitochondrial`, multiple_sep: `\",\"`. Keys to select a boolean (containing only True or False) column from .var.\nFor each cell, calculate the proportion of total values for genes which are labeled \u0027True\u0027, \ncompared to the total sum of the values for all genes.\n" + + } + + + , + "var_qc_metrics_fill_na_value": { + "type": + "boolean", + "description": "Type: `boolean`. Fill any \u0027NA\u0027 values found in the columns specified with --var_qc_metrics to \u0027True\u0027 or \u0027False\u0027", + "help_text": "Type: `boolean`. Fill any \u0027NA\u0027 values found in the columns specified with --var_qc_metrics to \u0027True\u0027 or \u0027False\u0027.\nas False.\n" + + } + + + , + "top_n_vars": { + "type": + "string", + "description": "Type: List of `integer`, multiple_sep: `\",\"`. Number of top vars to be used to calculate cumulative proportions", + "help_text": "Type: List of `integer`, multiple_sep: `\",\"`. Number of top vars to be used to calculate cumulative proportions.\nIf not specified, proportions are not calculated. `--top_n_vars 20,50` finds\ncumulative proportion to the 20th and 50th most expressed vars.\n" + + } + + +} +}, + + + "outputs" : { + "title": "Outputs", + "type": "object", + "description": "No description", + "properties": { + + + "output": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file", + "help_text": "Type: `file`, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file." + , + "default": "$id.$key.output.h5mu" + } + + + , + "output_compression": { + "type": + "string", + "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", + "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", + "enum": ["gzip", "lzf"] + + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/inputs" + }, + + { + "$ref": "#/definitions/outputs" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/qc/calculate_qc_metrics/setup_logger.py b/target/nextflow/qc/calculate_qc_metrics/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/nextflow/qc/calculate_qc_metrics/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/nextflow/qc/fastqc/.config.vsh.yaml b/target/nextflow/qc/fastqc/.config.vsh.yaml new file mode 100644 index 00000000000..d04e4308891 --- /dev/null +++ b/target/nextflow/qc/fastqc/.config.vsh.yaml @@ -0,0 +1,156 @@ +functionality: + name: "fastqc" + namespace: "qc" + version: "0.12.4" + arguments: + - type: "string" + name: "--mode" + alternatives: + - "-m" + description: "The mode in which the component works. Can be either files or dir." + info: null + default: + - "files" + required: false + choices: + - "files" + - "dir" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Directory containing input fastq files." + info: null + example: + - "fastq_dir" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output directory to write reports to." + info: null + example: + - "qc" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--threads" + alternatives: + - "-t" + description: "Specifies the number of files which can be processed simultaneously.\ + \ Each thread will be allocated 250MB of\nmemory.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "bash_script" + path: "script.sh" + is_executable: true + description: "Fastqc component, please see https://www.bioinformatics.babraham.ac.uk/projects/fastqc/.\ + \ This component can take one or more files (by means of shell globbing) or a\ + \ complete directory.\n" + test_resources: + - type: "bash_script" + path: "test.sh" + is_executable: true + - type: "file" + path: "resources_test/cellranger_tiny_fastq/cellranger_tiny_fastq" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "fastqc" + interactive: false + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "lowcpu" + - "midmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/qc/fastqc/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/qc/fastqc" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/qc/fastqc/fastqc" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/qc/fastqc/main.nf b/target/nextflow/qc/fastqc/main.nf new file mode 100644 index 00000000000..2294d984ab1 --- /dev/null +++ b/target/nextflow/qc/fastqc/main.nf @@ -0,0 +1,2512 @@ +// fastqc 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "fastqc", + "namespace" : "qc", + "version" : "0.12.4", + "arguments" : [ + { + "type" : "string", + "name" : "--mode", + "alternatives" : [ + "-m" + ], + "description" : "The mode in which the component works. Can be either files or dir.", + "default" : [ + "files" + ], + "required" : false, + "choices" : [ + "files", + "dir" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--input", + "alternatives" : [ + "-i" + ], + "description" : "Directory containing input fastq files.", + "example" : [ + "fastq_dir" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "-o" + ], + "description" : "Output directory to write reports to.", + "example" : [ + "qc" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--threads", + "alternatives" : [ + "-t" + ], + "description" : "Specifies the number of files which can be processed simultaneously. Each thread will be allocated 250MB of\nmemory.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/qc/fastqc/" + } + ], + "description" : "Fastqc component, please see https://www.bioinformatics.babraham.ac.uk/projects/fastqc/. This component can take one or more files (by means of shell globbing) or a complete directory.\n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/qc/fastqc/" + }, + { + "type" : "file", + "path" : "resources_test/cellranger_tiny_fastq/cellranger_tiny_fastq", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "ubuntu:22.04", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "fastqc" + ], + "interactive" : false + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "lowcpu", + "midmem" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/qc/fastqc/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/qc/fastqc", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_MODE+x} ]; then echo "${VIASH_PAR_MODE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_mode='&'#" ; else echo "# par_mode="; fi ) +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_PAR_THREADS+x} ]; then echo "${VIASH_PAR_THREADS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_threads='&'#" ; else echo "# par_threads="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +mkdir -p "\\$par_output" + +if [ "\\$par_mode" == "dir" ]; then + par_input="\\$par_input/*.fastq.gz" +fi + +eval fastqc \\${par_threads:+--threads \\$par_threads} -o "\\$par_output" "\\$par_input" +VIASHMAIN +bash "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/qc_fastqc", + "tag" : "0.12.0" + }, + "label" : [ + "lowcpu", + "midmem" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/qc/fastqc/nextflow.config b/target/nextflow/qc/fastqc/nextflow.config new file mode 100644 index 00000000000..f4090c86c99 --- /dev/null +++ b/target/nextflow/qc/fastqc/nextflow.config @@ -0,0 +1,107 @@ +manifest { + name = 'fastqc' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Fastqc component, please see https://www.bioinformatics.babraham.ac.uk/projects/fastqc/. This component can take one or more files (by means of shell globbing) or a complete directory.\n' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/qc/fastqc/nextflow_params.yaml b/target/nextflow/qc/fastqc/nextflow_params.yaml new file mode 100644 index 00000000000..7492ca67288 --- /dev/null +++ b/target/nextflow/qc/fastqc/nextflow_params.yaml @@ -0,0 +1,9 @@ +# Arguments +mode: "files" +input: # please fill in - example: "fastq_dir/" +# output: "$id.$key.output.output" +# threads: 123 + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/qc/fastqc/nextflow_schema.json b/target/nextflow/qc/fastqc/nextflow_schema.json new file mode 100644 index 00000000000..a282d4e8632 --- /dev/null +++ b/target/nextflow/qc/fastqc/nextflow_schema.json @@ -0,0 +1,104 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "fastqc", +"description": "Fastqc component, please see https://www.bioinformatics.babraham.ac.uk/projects/fastqc/. This component can take one or more files (by means of shell globbing) or a complete directory.\n", +"type": "object", +"definitions": { + + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "mode": { + "type": + "string", + "description": "Type: `string`, default: `files`, choices: ``files`, `dir``. The mode in which the component works", + "help_text": "Type: `string`, default: `files`, choices: ``files`, `dir``. The mode in which the component works. Can be either files or dir.", + "enum": ["files", "dir"] + + , + "default": "files" + } + + + , + "input": { + "type": + "string", + "description": "Type: `file`, required, example: `fastq_dir/`. Directory containing input fastq files", + "help_text": "Type: `file`, required, example: `fastq_dir/`. Directory containing input fastq files." + + } + + + , + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.output`, example: `qc/`. Output directory to write reports to", + "help_text": "Type: `file`, required, default: `$id.$key.output.output`, example: `qc/`. Output directory to write reports to." + , + "default": "$id.$key.output.output" + } + + + , + "threads": { + "type": + "integer", + "description": "Type: `integer`. Specifies the number of files which can be processed simultaneously", + "help_text": "Type: `integer`. Specifies the number of files which can be processed simultaneously. Each thread will be allocated 250MB of\nmemory.\n" + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/qc/multiqc/.config.vsh.yaml b/target/nextflow/qc/multiqc/.config.vsh.yaml new file mode 100644 index 00000000000..236a878b939 --- /dev/null +++ b/target/nextflow/qc/multiqc/.config.vsh.yaml @@ -0,0 +1,140 @@ +functionality: + name: "multiqc" + namespace: "qc" + version: "0.12.4" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Inputs for MultiQC." + info: null + example: + - "input.txt" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Create report in the specified output directory." + info: null + example: + - "report" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + description: "MultiQC aggregates results from bioinformatics analyses across many\ + \ samples into a single report.\nIt searches a given directory for analysis logs\ + \ and compiles a HTML report. It's a general use tool, perfect for summarising\ + \ the output from numerous bioinformatics tools.\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/10x_5k_anticmv/fastqc/" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "multiqc" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "singlecpu" + - "lowmem" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/qc/multiqc/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/qc/multiqc" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/qc/multiqc/multiqc" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/qc/multiqc/main.nf b/target/nextflow/qc/multiqc/main.nf new file mode 100644 index 00000000000..8475dc0c71d --- /dev/null +++ b/target/nextflow/qc/multiqc/main.nf @@ -0,0 +1,2493 @@ +// multiqc 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "multiqc", + "namespace" : "qc", + "version" : "0.12.4", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "alternatives" : [ + "-i" + ], + "description" : "Inputs for MultiQC.", + "example" : [ + "input.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "-o" + ], + "description" : "Create report in the specified output directory.", + "example" : [ + "report" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/qc/multiqc/" + } + ], + "description" : "MultiQC aggregates results from bioinformatics analyses across many samples into a single report.\nIt searches a given directory for analysis logs and compiles a HTML report. It's a general use tool, perfect for summarising the output from numerous bioinformatics tools.\n", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/qc/multiqc/" + }, + { + "type" : "file", + "path" : "resources_test/10x_5k_anticmv/fastqc/", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.10-slim", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "procps" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "multiqc" + ], + "upgrade" : true + } + ], + "test_setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "viashpy==0.5.0" + ], + "upgrade" : true + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "singlecpu", + "lowmem" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/qc/multiqc/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/qc/multiqc", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +import subprocess + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'.split(':')"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +## VIASH END + +# Run MultiQC +subprocess.run(["multiqc", "-o", par["output"]] + par["input"]) +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/qc_multiqc", + "tag" : "0.12.0" + }, + "label" : [ + "singlecpu", + "lowmem" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/qc/multiqc/nextflow.config b/target/nextflow/qc/multiqc/nextflow.config new file mode 100644 index 00000000000..85ce6d27df4 --- /dev/null +++ b/target/nextflow/qc/multiqc/nextflow.config @@ -0,0 +1,107 @@ +manifest { + name = 'multiqc' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'MultiQC aggregates results from bioinformatics analyses across many samples into a single report.\nIt searches a given directory for analysis logs and compiles a HTML report. It\'s a general use tool, perfect for summarising the output from numerous bioinformatics tools.\n' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/qc/multiqc/nextflow_params.yaml b/target/nextflow/qc/multiqc/nextflow_params.yaml new file mode 100644 index 00000000000..f9dd0ac813d --- /dev/null +++ b/target/nextflow/qc/multiqc/nextflow_params.yaml @@ -0,0 +1,7 @@ +# Arguments +input: # please fill in - example: ["input.txt"] +# output: "$id.$key.output.output" + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/qc/multiqc/nextflow_schema.json b/target/nextflow/qc/multiqc/nextflow_schema.json new file mode 100644 index 00000000000..0e0dd93540b --- /dev/null +++ b/target/nextflow/qc/multiqc/nextflow_schema.json @@ -0,0 +1,81 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "multiqc", +"description": "MultiQC aggregates results from bioinformatics analyses across many samples into a single report.\nIt searches a given directory for analysis logs and compiles a HTML report. It\u0027s a general use tool, perfect for summarising the output from numerous bioinformatics tools.\n", +"type": "object", +"definitions": { + + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: List of `file`, required, example: `input.txt`, multiple_sep: `\":\"`. Inputs for MultiQC", + "help_text": "Type: List of `file`, required, example: `input.txt`, multiple_sep: `\":\"`. Inputs for MultiQC." + + } + + + , + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.output`, example: `report`. Create report in the specified output directory", + "help_text": "Type: `file`, required, default: `$id.$key.output.output`, example: `report`. Create report in the specified output directory." + , + "default": "$id.$key.output.output" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/query/cellxgene_census/.config.vsh.yaml b/target/nextflow/query/cellxgene_census/.config.vsh.yaml new file mode 100644 index 00000000000..7dccd30c5af --- /dev/null +++ b/target/nextflow/query/cellxgene_census/.config.vsh.yaml @@ -0,0 +1,260 @@ +functionality: + name: "cellxgene_census" + namespace: "query" + version: "0.12.4" + authors: + - name: "Matthias Beyens" + info: + role: "Contributor" + links: + github: "MatthiasBeyens" + orcid: "0000-0003-3304-0706" + email: "matthias.beyens@gmail.com" + linkedin: "mbeyens" + organizations: + - name: "Janssen Pharmaceuticals" + href: "https://www.janssen.com" + role: "Principal Scientist" + - name: "Dries De Maeyer" + roles: + - "author" + info: + role: "Core Team Member" + links: + email: "ddemaeyer@gmail.com" + github: "ddemaeyer" + linkedin: "dries-de-maeyer-b46a814" + organizations: + - name: "Janssen Pharmaceuticals" + href: "https://www.janssen.com" + role: "Principal Scientist" + argument_groups: + - name: "Inputs" + description: "Arguments related to the input (aka query) dataset." + arguments: + - type: "string" + name: "--input_database" + description: "Full input database S3 prefix URL. Default: CellxGene Census" + info: null + example: + - "s3://" + default: + - "CellxGene" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + description: "Which modality to store the output in." + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--cellxgene_release" + description: "CellxGene Census release date. More information: https://chanzuckerberg.github.io/cellxgene-census/cellxgene_census_docsite_data_release_info.html" + info: null + default: + - "2023-05-15" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Query" + description: "Arguments related to the query." + arguments: + - type: "string" + name: "--species" + description: "Specie(s) of interest. If not specified, Homo Sapiens will be\ + \ queried." + info: null + example: + - "homo_sapiens" + default: + - "homo_sapiens" + required: false + choices: + - "homo_sapiens" + - "mus_musculus" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--cell_query" + description: "The query for selecting the cells as defined by the cellxgene\ + \ census schema." + info: null + example: + - "is_primary_data == True and cell_type_ontology_term_id in ['CL:0000136',\ + \ 'CL:1000311', 'CL:0002616'] and suspension_type == 'cell'" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--cells_filter_columns" + description: "The query for selecting the cells as defined by the cellxgene\ + \ census schema." + info: null + example: + - "dataset_id" + - "tissue" + - "assay" + - "disease" + - "cell_type" + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--min_cells_filter_columns" + description: "Minimum of amount of summed cells_filter_columns cells" + info: null + example: + - 100.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Outputs" + description: "Output arguments." + arguments: + - type: "file" + name: "--output" + description: "Output h5mu file." + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Query CellxGene Census or user-specified TileDBSoma object, and eventually\ + \ fetch cell and gene metadata or/and expression counts." + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.9" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "cellxgene-census~=1.2.0" + - "obonet~=1.0.0" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highmem" + - "midcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/query/cellxgene_census/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/query/cellxgene_census" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/query/cellxgene_census/cellxgene_census" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/query/cellxgene_census/main.nf b/target/nextflow/query/cellxgene_census/main.nf new file mode 100644 index 00000000000..dc6eeb5c733 --- /dev/null +++ b/target/nextflow/query/cellxgene_census/main.nf @@ -0,0 +1,2803 @@ +// cellxgene_census 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Matthias Beyens +// * Dries De Maeyer (author) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "cellxgene_census", + "namespace" : "query", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Matthias Beyens", + "info" : { + "role" : "Contributor", + "links" : { + "github" : "MatthiasBeyens", + "orcid" : "0000-0003-3304-0706", + "email" : "matthias.beyens@gmail.com", + "linkedin" : "mbeyens" + }, + "organizations" : [ + { + "name" : "Janssen Pharmaceuticals", + "href" : "https://www.janssen.com", + "role" : "Principal Scientist" + } + ] + } + }, + { + "name" : "Dries De Maeyer", + "roles" : [ + "author" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "ddemaeyer@gmail.com", + "github" : "ddemaeyer", + "linkedin" : "dries-de-maeyer-b46a814" + }, + "organizations" : [ + { + "name" : "Janssen Pharmaceuticals", + "href" : "https://www.janssen.com", + "role" : "Principal Scientist" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Inputs", + "description" : "Arguments related to the input (aka query) dataset.", + "arguments" : [ + { + "type" : "string", + "name" : "--input_database", + "description" : "Full input database S3 prefix URL. Default: CellxGene Census", + "example" : [ + "s3://" + ], + "default" : [ + "CellxGene" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--modality", + "description" : "Which modality to store the output in.", + "default" : [ + "rna" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--cellxgene_release", + "description" : "CellxGene Census release date. More information: https://chanzuckerberg.github.io/cellxgene-census/cellxgene_census_docsite_data_release_info.html", + "default" : [ + "2023-05-15" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Query", + "description" : "Arguments related to the query.", + "arguments" : [ + { + "type" : "string", + "name" : "--species", + "description" : "Specie(s) of interest. If not specified, Homo Sapiens will be queried.", + "example" : [ + "homo_sapiens" + ], + "default" : [ + "homo_sapiens" + ], + "required" : false, + "choices" : [ + "homo_sapiens", + "mus_musculus" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--cell_query", + "description" : "The query for selecting the cells as defined by the cellxgene census schema.", + "example" : [ + "is_primary_data == True and cell_type_ontology_term_id in ['CL:0000136', 'CL:1000311', 'CL:0002616'] and suspension_type == 'cell'" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--cells_filter_columns", + "description" : "The query for selecting the cells as defined by the cellxgene census schema.", + "example" : [ + "dataset_id", + "tissue", + "assay", + "disease", + "cell_type" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--min_cells_filter_columns", + "description" : "Minimum of amount of summed cells_filter_columns cells", + "example" : [ + 100.0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Outputs", + "description" : "Output arguments.", + "arguments" : [ + { + "type" : "file", + "name" : "--output", + "description" : "Output h5mu file.", + "example" : [ + "output.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_compression", + "example" : [ + "gzip" + ], + "required" : false, + "choices" : [ + "gzip", + "lzf" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/query/cellxgene_census/" + }, + { + "type" : "file", + "path" : "src/utils/setup_logger.py", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "description" : "Query CellxGene Census or user-specified TileDBSoma object, and eventually fetch cell and gene metadata or/and expression counts.", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/query/cellxgene_census/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.9", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "mudata~=0.2.3", + "anndata~=0.9.1", + "cellxgene-census~=1.2.0", + "obonet~=1.0.0" + ], + "upgrade" : true + } + ], + "test_setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "viashpy==0.5.0" + ], + "upgrade" : true + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "highmem", + "midcpu" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/query/cellxgene_census/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/query/cellxgene_census", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +import sys +import os +import cellxgene_census +import mudata as mu +import anndata as ad + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input_database': $( if [ ! -z ${VIASH_PAR_INPUT_DATABASE+x} ]; then echo "r'${VIASH_PAR_INPUT_DATABASE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cellxgene_release': $( if [ ! -z ${VIASH_PAR_CELLXGENE_RELEASE+x} ]; then echo "r'${VIASH_PAR_CELLXGENE_RELEASE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'species': $( if [ ! -z ${VIASH_PAR_SPECIES+x} ]; then echo "r'${VIASH_PAR_SPECIES//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cell_query': $( if [ ! -z ${VIASH_PAR_CELL_QUERY+x} ]; then echo "r'${VIASH_PAR_CELL_QUERY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cells_filter_columns': $( if [ ! -z ${VIASH_PAR_CELLS_FILTER_COLUMNS+x} ]; then echo "r'${VIASH_PAR_CELLS_FILTER_COLUMNS//\\'/\\'\\"\\'\\"r\\'}'.split(':')"; else echo None; fi ), + 'min_cells_filter_columns': $( if [ ! -z ${VIASH_PAR_MIN_CELLS_FILTER_COLUMNS+x} ]; then echo "float(r'${VIASH_PAR_MIN_CELLS_FILTER_COLUMNS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +### VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +def connect_census(input_database, release): + """ + Connect to CellxGene Census or user-provided TileDBSoma object + """ + if input_database != "CellxGene": + raise NotImplementedError( + "Custom census database is not implemented yet!" + ) + + logger.info( + "Initializing %s release %s", + input_database, release + ) + return cellxgene_census.open_soma( + census_version = release + ) + + +def get_anndata(census_connection, cell_query, species): + logger.info( + "Getting gene expression data based on %s query.", + cell_query + ) + return cellxgene_census.get_anndata( + census = census_connection, + obs_value_filter = cell_query, + organism = species + ) + + +def add_cellcensus_metadata_obs(census_connection, query_data): + logger.info( + "Adding extented metadata to gene expression data." + ) + census_datasets = census_connection["census_info"]["datasets"].read().concat().to_pandas() + + query_data.obs.dataset_id = query_data.obs.dataset_id.astype("category") + + dataset_info = census_datasets[census_datasets.dataset_id.isin(query_data.obs.dataset_id.cat.categories)]\\\\ + [['collection_id', 'collection_name', 'collection_doi', 'dataset_id', 'dataset_title']]\\\\ + .reset_index(drop=True)\\\\ + .apply(lambda x: x.astype('category')) + + return query_data.obs.merge( + dataset_info, on='dataset_id', how = 'left' + ) + + +def cellcensus_cell_filter(query_data, cells_filter_columns, min_cells_filter_columns): + t0 = query_data.shape + query_data = query_data[ + query_data.obs.groupby(cells_filter_columns)["soma_joinid"].transform('count') >= min_cells_filter_columns + ] + t1 = query_data.shape + logger.info( + 'Removed %s cells based on %s min_cells_filter_columns of %s cells_filter_columns.' + % ((t0[0] - t1[0]), min_cells_filter_columns, cells_filter_columns) + ) + return query_data + + +def write_mudata(mdata, output_location, compression): + logger.info("Writing %s", output_location) + mdata.write_h5mu( + output_location, + compression=compression + ) + + +def main(): + + # start dev + logger.info('cells_filter_columns: %s' % par["cells_filter_columns"]) + logger.info('min_cells_filter_columns: %s' % par["min_cells_filter_columns"]) + # end dev + + census_connection = connect_census( + par["input_database"], + par["cellxgene_release"] + ) + + query_data = get_anndata( + census_connection, + par["cell_query"], + par["species"] + ) + + query_data.obs = add_cellcensus_metadata_obs( + census_connection, + query_data + ) + + census_connection.close() + del census_connection + + if par["cells_filter_columns"]: + if not par["min_cells_filter_columns"]: + raise NotImplementedError( + "You specified cells_filter_columns, thus add min_cells_filter_columns!" + ) + query_data = cellcensus_cell_filter( + query_data, + par["cells_filter_columns"], + par["min_cells_filter_columns"] + ) + + query_data.var_names = query_data.var["feature_id"] + query_data.var["gene_symbol"] = query_data.var["feature_name"] + + # Create empty mudata file + mdata = mu.MuData({par["modality"]: ad.AnnData()}) + + write_mudata( + mdata, + par["output"], + par["output_compression"] + ) + + mu.write_h5ad(par["output"], data=query_data, mod=par["modality"]) + +if __name__ == "__main__": + main() +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/query_cellxgene_census", + "tag" : "0.12.0" + }, + "label" : [ + "highmem", + "midcpu" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/query/cellxgene_census/nextflow.config b/target/nextflow/query/cellxgene_census/nextflow.config new file mode 100644 index 00000000000..8b1b584f334 --- /dev/null +++ b/target/nextflow/query/cellxgene_census/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'cellxgene_census' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Query CellxGene Census or user-specified TileDBSoma object, and eventually fetch cell and gene metadata or/and expression counts.' + author = 'Matthias Beyens, Dries De Maeyer' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/query/cellxgene_census/nextflow_params.yaml b/target/nextflow/query/cellxgene_census/nextflow_params.yaml new file mode 100644 index 00000000000..83970b54f64 --- /dev/null +++ b/target/nextflow/query/cellxgene_census/nextflow_params.yaml @@ -0,0 +1,18 @@ +# Inputs +input_database: "CellxGene" +modality: "rna" +cellxgene_release: "2023-05-15" + +# Outputs +# output: "$id.$key.output.h5mu" +# output_compression: "gzip" + +# Query +species: "homo_sapiens" +# cell_query: "is_primary_data == True and cell_type_ontology_term_id in ['CL:0000136', 'CL:1000311', 'CL:0002616'] and suspension_type == 'cell'" +# cells_filter_columns: ["dataset_id", "tissue", "assay", "disease", "cell_type"] +# min_cells_filter_columns: 100 + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/query/cellxgene_census/nextflow_schema.json b/target/nextflow/query/cellxgene_census/nextflow_schema.json new file mode 100644 index 00000000000..40eef071b53 --- /dev/null +++ b/target/nextflow/query/cellxgene_census/nextflow_schema.json @@ -0,0 +1,187 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "cellxgene_census", +"description": "Query CellxGene Census or user-specified TileDBSoma object, and eventually fetch cell and gene metadata or/and expression counts.", +"type": "object", +"definitions": { + + + + "inputs" : { + "title": "Inputs", + "type": "object", + "description": "Arguments related to the input (aka query) dataset.", + "properties": { + + + "input_database": { + "type": + "string", + "description": "Type: `string`, default: `CellxGene`, example: `s3://`. Full input database S3 prefix URL", + "help_text": "Type: `string`, default: `CellxGene`, example: `s3://`. Full input database S3 prefix URL. Default: CellxGene Census" + , + "default": "CellxGene" + } + + + , + "modality": { + "type": + "string", + "description": "Type: `string`, default: `rna`. Which modality to store the output in", + "help_text": "Type: `string`, default: `rna`. Which modality to store the output in." + , + "default": "rna" + } + + + , + "cellxgene_release": { + "type": + "string", + "description": "Type: `string`, default: `2023-05-15`. CellxGene Census release date", + "help_text": "Type: `string`, default: `2023-05-15`. CellxGene Census release date. More information: https://chanzuckerberg.github.io/cellxgene-census/cellxgene_census_docsite_data_release_info.html" + , + "default": "2023-05-15" + } + + +} +}, + + + "outputs" : { + "title": "Outputs", + "type": "object", + "description": "Output arguments.", + "properties": { + + + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file", + "help_text": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. Output h5mu file." + , + "default": "$id.$key.output.h5mu" + } + + + , + "output_compression": { + "type": + "string", + "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. ", + "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. ", + "enum": ["gzip", "lzf"] + + + } + + +} +}, + + + "query" : { + "title": "Query", + "type": "object", + "description": "Arguments related to the query.", + "properties": { + + + "species": { + "type": + "string", + "description": "Type: `string`, default: `homo_sapiens`, example: `homo_sapiens`, choices: ``homo_sapiens`, `mus_musculus``. Specie(s) of interest", + "help_text": "Type: `string`, default: `homo_sapiens`, example: `homo_sapiens`, choices: ``homo_sapiens`, `mus_musculus``. Specie(s) of interest. If not specified, Homo Sapiens will be queried.", + "enum": ["homo_sapiens", "mus_musculus"] + + , + "default": "homo_sapiens" + } + + + , + "cell_query": { + "type": + "string", + "description": "Type: `string`, example: `is_primary_data == True and cell_type_ontology_term_id in [\u0027CL:0000136\u0027, \u0027CL:1000311\u0027, \u0027CL:0002616\u0027] and suspension_type == \u0027cell\u0027`. The query for selecting the cells as defined by the cellxgene census schema", + "help_text": "Type: `string`, example: `is_primary_data == True and cell_type_ontology_term_id in [\u0027CL:0000136\u0027, \u0027CL:1000311\u0027, \u0027CL:0002616\u0027] and suspension_type == \u0027cell\u0027`. The query for selecting the cells as defined by the cellxgene census schema." + + } + + + , + "cells_filter_columns": { + "type": + "string", + "description": "Type: List of `string`, example: `dataset_id:tissue:assay:disease:cell_type`, multiple_sep: `\":\"`. The query for selecting the cells as defined by the cellxgene census schema", + "help_text": "Type: List of `string`, example: `dataset_id:tissue:assay:disease:cell_type`, multiple_sep: `\":\"`. The query for selecting the cells as defined by the cellxgene census schema." + + } + + + , + "min_cells_filter_columns": { + "type": + "number", + "description": "Type: `double`, example: `100`. Minimum of amount of summed cells_filter_columns cells", + "help_text": "Type: `double`, example: `100`. Minimum of amount of summed cells_filter_columns cells" + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/inputs" + }, + + { + "$ref": "#/definitions/outputs" + }, + + { + "$ref": "#/definitions/query" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/query/cellxgene_census/setup_logger.py b/target/nextflow/query/cellxgene_census/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/nextflow/query/cellxgene_census/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/nextflow/reference/build_bdrhap_reference/.config.vsh.yaml b/target/nextflow/reference/build_bdrhap_reference/.config.vsh.yaml new file mode 100644 index 00000000000..f9850d9ec6f --- /dev/null +++ b/target/nextflow/reference/build_bdrhap_reference/.config.vsh.yaml @@ -0,0 +1,186 @@ +functionality: + name: "build_bdrhap_reference" + namespace: "reference" + version: "0.12.4" + authors: + - name: "Angela Oliveira Pisco" + roles: + - "author" + info: + role: "Contributor" + links: + github: "aopisco" + orcid: "0000-0003-0142-2355" + linkedin: "aopisco" + organizations: + - name: "Insitro" + href: "https://insitro.com" + role: "Director of Computational Biology" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + - name: "Robrecht Cannoodt" + roles: + - "author" + - "maintainer" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + arguments: + - type: "file" + name: "--genome_fasta" + description: "Reference genome fasta." + info: null + example: + - "genome_sequence.fa.gz" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--transcriptome_gtf" + description: "Reference transcriptome annotation." + info: null + example: + - "transcriptome_annotation.gtf.gz" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + description: "Star index" + info: null + example: + - "star_index.tar.gz" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "bash_script" + path: "script.sh" + is_executable: true + description: "Compile a reference into a STAR index compatible with the BD Rhapsody\ + \ pipeline." + test_resources: + - type: "bash_script" + path: "run_test.sh" + is_executable: true + - type: "file" + path: "resources_test/reference_gencodev41_chr1" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "bdgenomics/rhapsody:1.10.1" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "pigz" + interactive: false + test_setup: + - type: "docker" + env: + - "GOPATH /root/go" + - "GOBIN /root/go/bin" + - "PATH \"${PATH}:/root/go/bin\"" + - type: "apt" + packages: + - "golang" + interactive: false + - type: "docker" + run: + - "go get golang.org/dl/go1.20.6 && go1.20.6 download && \\\ngit clone --branch\ + \ v2.5.0 https://github.com/shenwei356/seqkit.git && \\\ncd seqkit/seqkit/ &&\ + \ go1.20.6 build && cp seqkit /usr/bin/ && cd ../../ && rm -rf seqkit\n" + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highmem" + - "highcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/reference/build_bdrhap_reference/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/reference/build_bdrhap_reference" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/reference/build_bdrhap_reference/build_bdrhap_reference" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/reference/build_bdrhap_reference/main.nf b/target/nextflow/reference/build_bdrhap_reference/main.nf new file mode 100644 index 00000000000..b657f52f956 --- /dev/null +++ b/target/nextflow/reference/build_bdrhap_reference/main.nf @@ -0,0 +1,2597 @@ +// build_bdrhap_reference 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Angela Oliveira Pisco (author) +// * Robrecht Cannoodt (author, maintainer) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "build_bdrhap_reference", + "namespace" : "reference", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Angela Oliveira Pisco", + "roles" : [ + "author" + ], + "info" : { + "role" : "Contributor", + "links" : { + "github" : "aopisco", + "orcid" : "0000-0003-0142-2355", + "linkedin" : "aopisco" + }, + "organizations" : [ + { + "name" : "Insitro", + "href" : "https://insitro.com", + "role" : "Director of Computational Biology" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + }, + { + "name" : "Robrecht Cannoodt", + "roles" : [ + "author", + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "robrecht@data-intuitive.com", + "github" : "rcannood", + "orcid" : "0000-0003-3641-729X", + "linkedin" : "robrechtcannoodt" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Science Engineer" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + } + ], + "arguments" : [ + { + "type" : "file", + "name" : "--genome_fasta", + "description" : "Reference genome fasta.", + "example" : [ + "genome_sequence.fa.gz" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--transcriptome_gtf", + "description" : "Reference transcriptome annotation.", + "example" : [ + "transcriptome_annotation.gtf.gz" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--output", + "description" : "Star index", + "example" : [ + "star_index.tar.gz" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/reference/build_bdrhap_reference/" + } + ], + "description" : "Compile a reference into a STAR index compatible with the BD Rhapsody pipeline.", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "run_test.sh", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/reference/build_bdrhap_reference/" + }, + { + "type" : "file", + "path" : "resources_test/reference_gencodev41_chr1", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "bdgenomics/rhapsody:1.10.1", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "pigz" + ], + "interactive" : false + } + ], + "test_setup" : [ + { + "type" : "docker", + "env" : [ + "GOPATH /root/go", + "GOBIN /root/go/bin", + "PATH \\"${PATH}:/root/go/bin\\"" + ] + }, + { + "type" : "apt", + "packages" : [ + "golang" + ], + "interactive" : false + }, + { + "type" : "docker", + "run" : [ + "go get golang.org/dl/go1.20.6 && go1.20.6 download && \\\\\ngit clone --branch v2.5.0 https://github.com/shenwei356/seqkit.git && \\\\\ncd seqkit/seqkit/ && go1.20.6 build && cp seqkit /usr/bin/ && cd ../../ && rm -rf seqkit\n" + ] + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "highmem", + "highcpu" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/reference/build_bdrhap_reference/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/reference/build_bdrhap_reference", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +#!/bin/bash + +set -eo pipefail + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_GENOME_FASTA+x} ]; then echo "${VIASH_PAR_GENOME_FASTA}" | sed "s#'#'\\"'\\"'#g;s#.*#par_genome_fasta='&'#" ; else echo "# par_genome_fasta="; fi ) +$( if [ ! -z ${VIASH_PAR_TRANSCRIPTOME_GTF+x} ]; then echo "${VIASH_PAR_TRANSCRIPTOME_GTF}" | sed "s#'#'\\"'\\"'#g;s#.*#par_transcriptome_gtf='&'#" ; else echo "# par_transcriptome_gtf="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) + +## VIASH END + +# create temporary directory +tmpdir=\\$(mktemp -d "$VIASH_TEMP/\\$meta_functionality_name-XXXXXXXX") +function clean_up { + rm -rf "\\$tmpdir" +} +trap clean_up EXIT + +meta_cpus="\\${meta_cpus:-1}" + +# process params +extra_params=( ) + +if [ ! -z "\\$meta_cpus" ]; then + extra_params+=( "--runThreadN \\$meta_cpus" ) +fi + +echo "> Unzipping input files" +unpigz -c "\\$par_genome_fasta" > "\\$tmpdir/genome.fa" +unpigz -c "\\$par_transcriptome_gtf" > "\\$tmpdir/transcriptome.gtf" + +echo "> Building star index" +mkdir "\\$tmpdir/out" +STAR \\\\ + --runMode genomeGenerate \\\\ + --genomeDir "\\$tmpdir/out" \\\\ + --genomeFastaFiles "\\$tmpdir/genome.fa" \\\\ + --sjdbGTFfile "\\$tmpdir/transcriptome.gtf" \\\\ + --sjdbOverhang 100 \\\\ + --genomeSAindexNbases 11 \\\\ + "\\${extra_params[@]}" + +echo "> Creating archive" +tar --use-compress-program="pigz -k " -cf "\\$par_output" -C "\\$tmpdir/out" . +VIASHMAIN +bash "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/reference_build_bdrhap_reference", + "tag" : "0.12.0" + }, + "label" : [ + "highmem", + "highcpu" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/reference/build_bdrhap_reference/nextflow.config b/target/nextflow/reference/build_bdrhap_reference/nextflow.config new file mode 100644 index 00000000000..0aa24e8d240 --- /dev/null +++ b/target/nextflow/reference/build_bdrhap_reference/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'build_bdrhap_reference' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Compile a reference into a STAR index compatible with the BD Rhapsody pipeline.' + author = 'Angela Oliveira Pisco, Robrecht Cannoodt' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/reference/build_bdrhap_reference/nextflow_params.yaml b/target/nextflow/reference/build_bdrhap_reference/nextflow_params.yaml new file mode 100644 index 00000000000..827860782eb --- /dev/null +++ b/target/nextflow/reference/build_bdrhap_reference/nextflow_params.yaml @@ -0,0 +1,8 @@ +# Arguments +genome_fasta: # please fill in - example: "genome_sequence.fa.gz" +transcriptome_gtf: # please fill in - example: "transcriptome_annotation.gtf.gz" +# output: "$id.$key.output.gz" + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/reference/build_bdrhap_reference/nextflow_schema.json b/target/nextflow/reference/build_bdrhap_reference/nextflow_schema.json new file mode 100644 index 00000000000..c1aa06c35ea --- /dev/null +++ b/target/nextflow/reference/build_bdrhap_reference/nextflow_schema.json @@ -0,0 +1,91 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "build_bdrhap_reference", +"description": "Compile a reference into a STAR index compatible with the BD Rhapsody pipeline.", +"type": "object", +"definitions": { + + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "genome_fasta": { + "type": + "string", + "description": "Type: `file`, required, example: `genome_sequence.fa.gz`. Reference genome fasta", + "help_text": "Type: `file`, required, example: `genome_sequence.fa.gz`. Reference genome fasta." + + } + + + , + "transcriptome_gtf": { + "type": + "string", + "description": "Type: `file`, required, example: `transcriptome_annotation.gtf.gz`. Reference transcriptome annotation", + "help_text": "Type: `file`, required, example: `transcriptome_annotation.gtf.gz`. Reference transcriptome annotation." + + } + + + , + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.gz`, example: `star_index.tar.gz`. Star index", + "help_text": "Type: `file`, required, default: `$id.$key.output.gz`, example: `star_index.tar.gz`. Star index" + , + "default": "$id.$key.output.gz" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/reference/build_cellranger_reference/.config.vsh.yaml b/target/nextflow/reference/build_cellranger_reference/.config.vsh.yaml new file mode 100644 index 00000000000..4fb149e6340 --- /dev/null +++ b/target/nextflow/reference/build_cellranger_reference/.config.vsh.yaml @@ -0,0 +1,187 @@ +functionality: + name: "build_cellranger_reference" + namespace: "reference" + version: "0.12.4" + authors: + - name: "Angela Oliveira Pisco" + roles: + - "author" + info: + role: "Contributor" + links: + github: "aopisco" + orcid: "0000-0003-0142-2355" + linkedin: "aopisco" + organizations: + - name: "Insitro" + href: "https://insitro.com" + role: "Director of Computational Biology" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + - name: "Robrecht Cannoodt" + roles: + - "author" + - "maintainer" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + arguments: + - type: "file" + name: "--genome_fasta" + description: "Reference genome fasta." + info: null + example: + - "genome_sequence.fa.gz" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--transcriptome_gtf" + description: "Reference transcriptome annotation." + info: null + example: + - "transcriptome_annotation.gtf.gz" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + description: "Output folder" + info: null + example: + - "cellranger_reference" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "bash_script" + path: "script.sh" + is_executable: true + description: "Build a Cell Ranger-compatible reference folder from user-supplied\ + \ genome FASTA and gene GTF files. Creates a new folder named after the genome." + test_resources: + - type: "bash_script" + path: "run_test.sh" + is_executable: true + - type: "file" + path: "resources_test/reference_gencodev41_chr1" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "ghcr.io/data-intuitive/cellranger:7.0" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "pigz" + interactive: false + test_setup: + - type: "docker" + env: + - "GOPATH /root/go" + - "GOBIN /root/go/bin" + - "PATH \"${PATH}:/root/go/bin\"" + - type: "apt" + packages: + - "golang" + - "git" + interactive: false + - type: "docker" + run: + - "go install golang.org/dl/go1.20.6@latest && go1.20.6 download && \\\ngit clone\ + \ --branch v2.5.0 https://github.com/shenwei356/seqkit.git && \\\ncd seqkit/seqkit/\ + \ && go1.20.6 build && cp seqkit /usr/bin/ && cd ../../ && rm -rf seqkit\n" + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highmem" + - "highcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/reference/build_cellranger_reference/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/reference/build_cellranger_reference" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/reference/build_cellranger_reference/build_cellranger_reference" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/reference/build_cellranger_reference/main.nf b/target/nextflow/reference/build_cellranger_reference/main.nf new file mode 100644 index 00000000000..b56982f534e --- /dev/null +++ b/target/nextflow/reference/build_cellranger_reference/main.nf @@ -0,0 +1,2602 @@ +// build_cellranger_reference 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Angela Oliveira Pisco (author) +// * Robrecht Cannoodt (author, maintainer) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "build_cellranger_reference", + "namespace" : "reference", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Angela Oliveira Pisco", + "roles" : [ + "author" + ], + "info" : { + "role" : "Contributor", + "links" : { + "github" : "aopisco", + "orcid" : "0000-0003-0142-2355", + "linkedin" : "aopisco" + }, + "organizations" : [ + { + "name" : "Insitro", + "href" : "https://insitro.com", + "role" : "Director of Computational Biology" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + }, + { + "name" : "Robrecht Cannoodt", + "roles" : [ + "author", + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "robrecht@data-intuitive.com", + "github" : "rcannood", + "orcid" : "0000-0003-3641-729X", + "linkedin" : "robrechtcannoodt" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Science Engineer" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + } + ], + "arguments" : [ + { + "type" : "file", + "name" : "--genome_fasta", + "description" : "Reference genome fasta.", + "example" : [ + "genome_sequence.fa.gz" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--transcriptome_gtf", + "description" : "Reference transcriptome annotation.", + "example" : [ + "transcriptome_annotation.gtf.gz" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--output", + "description" : "Output folder", + "example" : [ + "cellranger_reference" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/reference/build_cellranger_reference/" + } + ], + "description" : "Build a Cell Ranger-compatible reference folder from user-supplied genome FASTA and gene GTF files. Creates a new folder named after the genome.", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "run_test.sh", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/reference/build_cellranger_reference/" + }, + { + "type" : "file", + "path" : "resources_test/reference_gencodev41_chr1", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "ghcr.io/data-intuitive/cellranger:7.0", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "pigz" + ], + "interactive" : false + } + ], + "test_setup" : [ + { + "type" : "docker", + "env" : [ + "GOPATH /root/go", + "GOBIN /root/go/bin", + "PATH \\"${PATH}:/root/go/bin\\"" + ] + }, + { + "type" : "apt", + "packages" : [ + "golang", + "git" + ], + "interactive" : false + }, + { + "type" : "docker", + "run" : [ + "go install golang.org/dl/go1.20.6@latest && go1.20.6 download && \\\\\ngit clone --branch v2.5.0 https://github.com/shenwei356/seqkit.git && \\\\\ncd seqkit/seqkit/ && go1.20.6 build && cp seqkit /usr/bin/ && cd ../../ && rm -rf seqkit\n" + ] + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "highmem", + "highcpu" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/reference/build_cellranger_reference/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/reference/build_cellranger_reference", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +#!/bin/bash + +set -eo pipefail + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_GENOME_FASTA+x} ]; then echo "${VIASH_PAR_GENOME_FASTA}" | sed "s#'#'\\"'\\"'#g;s#.*#par_genome_fasta='&'#" ; else echo "# par_genome_fasta="; fi ) +$( if [ ! -z ${VIASH_PAR_TRANSCRIPTOME_GTF+x} ]; then echo "${VIASH_PAR_TRANSCRIPTOME_GTF}" | sed "s#'#'\\"'\\"'#g;s#.*#par_transcriptome_gtf='&'#" ; else echo "# par_transcriptome_gtf="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) + +## VIASH END + +# create temporary directory +tmpdir=\\$(mktemp -d "$VIASH_TEMP/\\$meta_functionality_name-XXXXXXXX") +function clean_up { + rm -rf "\\$tmpdir" +} +trap clean_up EXIT + +# just to make sure +par_genome_fasta=\\`realpath \\$par_genome_fasta\\` +par_transcriptome_gtf=\\`realpath \\$par_transcriptome_gtf\\` +par_output=\\`realpath \\$par_output\\` + +# process params +extra_params=( ) + +if [ ! -z "\\$meta_cpus" ]; then + extra_params+=( "--nthreads=\\$meta_cpus" ) +fi +if [ ! -z "\\$meta_memory_gb" ]; then + # always keep 2gb for the OS itself + memory_gb=\\`python -c "print(int('\\$meta_memory_gb') - 2)"\\` + extra_params+=( "--memgb=\\$memory_gb" ) +fi + +echo "> Unzipping input files" +unpigz -c "\\$par_genome_fasta" > "\\$tmpdir/genome.fa" + +echo "> Building star index" +cd "\\$tmpdir" +cellranger mkref \\\\ + --fasta "\\$tmpdir/genome.fa" \\\\ + --genes "\\$par_transcriptome_gtf" \\\\ + --genome output \\\\ + "\\${extra_params[@]}" + +echo "> Creating archive" +tar --use-compress-program="pigz -k " -cf "\\$par_output" -C "\\$tmpdir/output" . +VIASHMAIN +bash "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/reference_build_cellranger_reference", + "tag" : "0.12.0" + }, + "label" : [ + "highmem", + "highcpu" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/reference/build_cellranger_reference/nextflow.config b/target/nextflow/reference/build_cellranger_reference/nextflow.config new file mode 100644 index 00000000000..9418dcf3275 --- /dev/null +++ b/target/nextflow/reference/build_cellranger_reference/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'build_cellranger_reference' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Build a Cell Ranger-compatible reference folder from user-supplied genome FASTA and gene GTF files. Creates a new folder named after the genome.' + author = 'Angela Oliveira Pisco, Robrecht Cannoodt' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/reference/build_cellranger_reference/nextflow_params.yaml b/target/nextflow/reference/build_cellranger_reference/nextflow_params.yaml new file mode 100644 index 00000000000..4779fa85c32 --- /dev/null +++ b/target/nextflow/reference/build_cellranger_reference/nextflow_params.yaml @@ -0,0 +1,8 @@ +# Arguments +genome_fasta: # please fill in - example: "genome_sequence.fa.gz" +transcriptome_gtf: # please fill in - example: "transcriptome_annotation.gtf.gz" +# output: "$id.$key.output.output" + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/reference/build_cellranger_reference/nextflow_schema.json b/target/nextflow/reference/build_cellranger_reference/nextflow_schema.json new file mode 100644 index 00000000000..1641ad1b08c --- /dev/null +++ b/target/nextflow/reference/build_cellranger_reference/nextflow_schema.json @@ -0,0 +1,91 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "build_cellranger_reference", +"description": "Build a Cell Ranger-compatible reference folder from user-supplied genome FASTA and gene GTF files. Creates a new folder named after the genome.", +"type": "object", +"definitions": { + + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "genome_fasta": { + "type": + "string", + "description": "Type: `file`, required, example: `genome_sequence.fa.gz`. Reference genome fasta", + "help_text": "Type: `file`, required, example: `genome_sequence.fa.gz`. Reference genome fasta." + + } + + + , + "transcriptome_gtf": { + "type": + "string", + "description": "Type: `file`, required, example: `transcriptome_annotation.gtf.gz`. Reference transcriptome annotation", + "help_text": "Type: `file`, required, example: `transcriptome_annotation.gtf.gz`. Reference transcriptome annotation." + + } + + + , + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.output`, example: `cellranger_reference`. Output folder", + "help_text": "Type: `file`, required, default: `$id.$key.output.output`, example: `cellranger_reference`. Output folder" + , + "default": "$id.$key.output.output" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/reference/make_reference/.config.vsh.yaml b/target/nextflow/reference/make_reference/.config.vsh.yaml new file mode 100644 index 00000000000..8923203c593 --- /dev/null +++ b/target/nextflow/reference/make_reference/.config.vsh.yaml @@ -0,0 +1,212 @@ +functionality: + name: "make_reference" + namespace: "reference" + version: "0.12.4" + authors: + - name: "Angela Oliveira Pisco" + roles: + - "author" + info: + role: "Contributor" + links: + github: "aopisco" + orcid: "0000-0003-0142-2355" + linkedin: "aopisco" + organizations: + - name: "Insitro" + href: "https://insitro.com" + role: "Director of Computational Biology" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + - name: "Robrecht Cannoodt" + roles: + - "author" + - "maintainer" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + arguments: + - type: "file" + name: "--genome_fasta" + description: "Reference genome fasta. Example: " + info: null + example: + - "genome_fasta.fa.gz" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--transcriptome_gtf" + description: "Reference transcriptome annotation." + info: null + example: + - "transcriptome.gtf.gz" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--ercc" + description: "ERCC sequence and annotation file." + info: null + example: + - "ercc.zip" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--subset_regex" + description: "Will subset the reference chromosomes using the given regex." + info: null + example: + - "(ERCC-00002|chr1)" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output_fasta" + description: "Output genome sequence fasta." + info: null + example: + - "genome_sequence.fa.gz" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output_gtf" + description: "Output transcriptome annotation gtf." + info: null + example: + - "transcriptome_annotation.gtf.gz" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "bash_script" + path: "script.sh" + is_executable: true + description: "Preprocess and build a transcriptome reference.\n\nExample input files\ + \ are:\n - `genome_fasta`: https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_41/GRCh38.primary_assembly.genome.fa.gz\n\ + \ - `transcriptome_gtf`: https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_41/gencode.v41.annotation.gtf.gz\n\ + \ - `ercc`: https://assets.thermofisher.com/TFS-Assets/LSG/manuals/ERCC92.zip\n" + test_resources: + - type: "bash_script" + path: "run_test.sh" + is_executable: true + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "pigz" + - "seqkit" + - "curl" + - "wget" + - "unzip" + interactive: false + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highmem" + - "highcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/reference/make_reference/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/reference/make_reference" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/reference/make_reference/make_reference" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/reference/make_reference/main.nf b/target/nextflow/reference/make_reference/main.nf new file mode 100644 index 00000000000..84c4e0164ed --- /dev/null +++ b/target/nextflow/reference/make_reference/main.nf @@ -0,0 +1,2635 @@ +// make_reference 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Angela Oliveira Pisco (author) +// * Robrecht Cannoodt (author, maintainer) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "make_reference", + "namespace" : "reference", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Angela Oliveira Pisco", + "roles" : [ + "author" + ], + "info" : { + "role" : "Contributor", + "links" : { + "github" : "aopisco", + "orcid" : "0000-0003-0142-2355", + "linkedin" : "aopisco" + }, + "organizations" : [ + { + "name" : "Insitro", + "href" : "https://insitro.com", + "role" : "Director of Computational Biology" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + }, + { + "name" : "Robrecht Cannoodt", + "roles" : [ + "author", + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "robrecht@data-intuitive.com", + "github" : "rcannood", + "orcid" : "0000-0003-3641-729X", + "linkedin" : "robrechtcannoodt" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Science Engineer" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + } + ], + "arguments" : [ + { + "type" : "file", + "name" : "--genome_fasta", + "description" : "Reference genome fasta. Example: ", + "example" : [ + "genome_fasta.fa.gz" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--transcriptome_gtf", + "description" : "Reference transcriptome annotation.", + "example" : [ + "transcriptome.gtf.gz" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--ercc", + "description" : "ERCC sequence and annotation file.", + "example" : [ + "ercc.zip" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--subset_regex", + "description" : "Will subset the reference chromosomes using the given regex.", + "example" : [ + "(ERCC-00002|chr1)" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--output_fasta", + "description" : "Output genome sequence fasta.", + "example" : [ + "genome_sequence.fa.gz" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--output_gtf", + "description" : "Output transcriptome annotation gtf.", + "example" : [ + "transcriptome_annotation.gtf.gz" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/reference/make_reference/" + } + ], + "description" : "Preprocess and build a transcriptome reference.\n\nExample input files are:\n - `genome_fasta`: https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_41/GRCh38.primary_assembly.genome.fa.gz\n - `transcriptome_gtf`: https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_41/gencode.v41.annotation.gtf.gz\n - `ercc`: https://assets.thermofisher.com/TFS-Assets/LSG/manuals/ERCC92.zip\n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "run_test.sh", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/reference/make_reference/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "ubuntu:22.04", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "pigz", + "seqkit", + "curl", + "wget", + "unzip" + ], + "interactive" : false + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "highmem", + "highcpu" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/reference/make_reference/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/reference/make_reference", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +#!/bin/bash + +set -eo pipefail + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_GENOME_FASTA+x} ]; then echo "${VIASH_PAR_GENOME_FASTA}" | sed "s#'#'\\"'\\"'#g;s#.*#par_genome_fasta='&'#" ; else echo "# par_genome_fasta="; fi ) +$( if [ ! -z ${VIASH_PAR_TRANSCRIPTOME_GTF+x} ]; then echo "${VIASH_PAR_TRANSCRIPTOME_GTF}" | sed "s#'#'\\"'\\"'#g;s#.*#par_transcriptome_gtf='&'#" ; else echo "# par_transcriptome_gtf="; fi ) +$( if [ ! -z ${VIASH_PAR_ERCC+x} ]; then echo "${VIASH_PAR_ERCC}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ercc='&'#" ; else echo "# par_ercc="; fi ) +$( if [ ! -z ${VIASH_PAR_SUBSET_REGEX+x} ]; then echo "${VIASH_PAR_SUBSET_REGEX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_subset_regex='&'#" ; else echo "# par_subset_regex="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_FASTA+x} ]; then echo "${VIASH_PAR_OUTPUT_FASTA}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_fasta='&'#" ; else echo "# par_output_fasta="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_GTF+x} ]; then echo "${VIASH_PAR_OUTPUT_GTF}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_gtf='&'#" ; else echo "# par_output_gtf="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) + +## VIASH END + +# create temporary directory +tmpdir=\\$(mktemp -d "$VIASH_TEMP/\\$meta_functionality_name-XXXXXXXX") +function clean_up { + rm -rf "\\$tmpdir" +} +trap clean_up EXIT + +echo "> Processing genome sequence" +genome_fasta="\\$tmpdir/genome_sequence.fa" +# curl "\\$par_genome_fasta" | gunzip > "\\$genome_fasta" +gunzip -c "\\$par_genome_fasta" > "\\$genome_fasta" + +echo "> Processing transcriptome annotation" +transcriptome_gtf="\\$tmpdir/transcriptome_annotation.gtf" +# curl "\\$par_transcriptome_gtf" | gunzip > "\\$transcriptome_gtf" +gunzip -c "\\$par_transcriptome_gtf"> "\\$transcriptome_gtf" + +if [[ ! -z \\$par_ercc ]]; then + echo "> Processing ERCC sequences" + # wget "\\$par_ercc" -O "\\$tmpdir/ercc.zip" + # unzip "\\$tmpdir/ercc.zip" -d "\\$tmpdir" + unzip "\\$par_ercc" -d "\\$tmpdir" + cat "\\$tmpdir/ERCC92.fa" >> "\\$genome_fasta" + cat "\\$tmpdir/ERCC92.gtf" >> "\\$transcriptome_gtf" +fi + +# create output & filter reference if so desired +if [[ ! -z \\$par_subset_regex ]]; then + echo "> Subsetting reference with regex '\\$par_subset_regex'" + awk '{print \\$1}' "\\$genome_fasta" | seqkit grep -r -p "^\\$par_subset_regex\\\\\\$" > "\\$tmpdir/genome_sequence_filtered.fa" + genome_fasta="\\$tmpdir/genome_sequence_filtered.fa" + grep -E "^\\$par_subset_regex[^A-Za-z0-9]" "\\$transcriptome_gtf" > "\\$tmpdir/transcriptome_annotation_filtered.gtf" + transcriptome_gtf="\\$tmpdir/transcriptome_annotation_filtered.gtf" + + echo + echo "Matched tags:" + cat "\\$genome_fasta" | grep '^>' | sed 's#^>##' | sed 's# .*##' | sort | uniq + echo +fi + +echo "> Gzipping outputs" +pigz -c "\\$genome_fasta" > "\\$par_output_fasta" +pigz -c "\\$transcriptome_gtf" > "\\$par_output_gtf" + +# to do: re enable +# echo "> Sanity check of outputs" +# readarray -t fasta_tags < <( cat "\\$genome_fasta" | grep '^>' | sed 's#^>##' | sed 's# .*##' | sort | uniq ) +# readarray -t transcriptome_tags < <( cat "\\$transcriptome_gtf" | cut -d\\$'\\\\t' -f1 | sort | uniq | grep '^[^#]' ) +# [ "\\${fasta_tags[*]}" == "\\${transcriptome_tags[*]}" ] || { echo "Warning: fasta tags differ from transcriptome tags"; exit 1; } +VIASHMAIN +bash "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/reference_make_reference", + "tag" : "0.12.0" + }, + "label" : [ + "highmem", + "highcpu" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/reference/make_reference/nextflow.config b/target/nextflow/reference/make_reference/nextflow.config new file mode 100644 index 00000000000..efc17f8d31b --- /dev/null +++ b/target/nextflow/reference/make_reference/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'make_reference' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Preprocess and build a transcriptome reference.\n\nExample input files are:\n - `genome_fasta`: https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_41/GRCh38.primary_assembly.genome.fa.gz\n - `transcriptome_gtf`: https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_41/gencode.v41.annotation.gtf.gz\n - `ercc`: https://assets.thermofisher.com/TFS-Assets/LSG/manuals/ERCC92.zip\n' + author = 'Angela Oliveira Pisco, Robrecht Cannoodt' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/reference/make_reference/nextflow_params.yaml b/target/nextflow/reference/make_reference/nextflow_params.yaml new file mode 100644 index 00000000000..f904aa639cb --- /dev/null +++ b/target/nextflow/reference/make_reference/nextflow_params.yaml @@ -0,0 +1,11 @@ +# Arguments +genome_fasta: # please fill in - example: "genome_fasta.fa.gz" +transcriptome_gtf: # please fill in - example: "transcriptome.gtf.gz" +# ercc: "ercc.zip" +# subset_regex: "(ERCC-00002|chr1)" +# output_fasta: "$id.$key.output_fasta.gz" +# output_gtf: "$id.$key.output_gtf.gz" + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/reference/make_reference/nextflow_schema.json b/target/nextflow/reference/make_reference/nextflow_schema.json new file mode 100644 index 00000000000..f0e22993ef6 --- /dev/null +++ b/target/nextflow/reference/make_reference/nextflow_schema.json @@ -0,0 +1,122 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "make_reference", +"description": "Preprocess and build a transcriptome reference.\n\nExample input files are:\n - `genome_fasta`: https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_41/GRCh38.primary_assembly.genome.fa.gz\n - `transcriptome_gtf`: https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_41/gencode.v41.annotation.gtf.gz\n - `ercc`: https://assets.thermofisher.com/TFS-Assets/LSG/manuals/ERCC92.zip\n", +"type": "object", +"definitions": { + + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "genome_fasta": { + "type": + "string", + "description": "Type: `file`, required, example: `genome_fasta.fa.gz`. Reference genome fasta", + "help_text": "Type: `file`, required, example: `genome_fasta.fa.gz`. Reference genome fasta. Example: " + + } + + + , + "transcriptome_gtf": { + "type": + "string", + "description": "Type: `file`, required, example: `transcriptome.gtf.gz`. Reference transcriptome annotation", + "help_text": "Type: `file`, required, example: `transcriptome.gtf.gz`. Reference transcriptome annotation." + + } + + + , + "ercc": { + "type": + "string", + "description": "Type: `file`, example: `ercc.zip`. ERCC sequence and annotation file", + "help_text": "Type: `file`, example: `ercc.zip`. ERCC sequence and annotation file." + + } + + + , + "subset_regex": { + "type": + "string", + "description": "Type: `string`, example: `(ERCC-00002|chr1)`. Will subset the reference chromosomes using the given regex", + "help_text": "Type: `string`, example: `(ERCC-00002|chr1)`. Will subset the reference chromosomes using the given regex." + + } + + + , + "output_fasta": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output_fasta.gz`, example: `genome_sequence.fa.gz`. Output genome sequence fasta", + "help_text": "Type: `file`, required, default: `$id.$key.output_fasta.gz`, example: `genome_sequence.fa.gz`. Output genome sequence fasta." + , + "default": "$id.$key.output_fasta.gz" + } + + + , + "output_gtf": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output_gtf.gz`, example: `transcriptome_annotation.gtf.gz`. Output transcriptome annotation gtf", + "help_text": "Type: `file`, required, default: `$id.$key.output_gtf.gz`, example: `transcriptome_annotation.gtf.gz`. Output transcriptome annotation gtf." + , + "default": "$id.$key.output_gtf.gz" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/report/mermaid/.config.vsh.yaml b/target/nextflow/report/mermaid/.config.vsh.yaml new file mode 100644 index 00000000000..29c0e241526 --- /dev/null +++ b/target/nextflow/report/mermaid/.config.vsh.yaml @@ -0,0 +1,185 @@ +functionality: + name: "mermaid" + namespace: "report" + version: "0.12.4" + authors: + - name: "Dries De Maeyer" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "ddemaeyer@gmail.com" + github: "ddemaeyer" + linkedin: "dries-de-maeyer-b46a814" + organizations: + - name: "Janssen Pharmaceuticals" + href: "https://www.janssen.com" + role: "Principal Scientist" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input directory" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Generated network as output." + info: null + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_format" + description: "Output format for the generated image. By default will be inferred\ + \ from the extension \nof the file specified with --output.\n" + info: null + required: false + choices: + - "svg" + - "png" + - "pdf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--width" + description: "Width of the page" + info: null + default: + - 800 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--height" + description: "Height of the page" + info: null + default: + - 600 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--background_color" + description: "Background color for pngs/svgs (not pdfs)" + info: null + example: + - "#F0F0F0" + default: + - "white" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "bash_script" + path: "script.sh" + is_executable: true + - type: "file" + path: "./puppeteer-config.json" + description: "Generates a network from mermaid code.\n" + test_resources: + - type: "bash_script" + path: "test.sh" + is_executable: true + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "node:20-bullseye" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "javascript" + npm: + - "@mermaid-js/mermaid-cli" + - type: "apt" + packages: + - "chromium" + interactive: false + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/report/mermaid/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/report/mermaid" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/report/mermaid/mermaid" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/report/mermaid/main.nf b/target/nextflow/report/mermaid/main.nf new file mode 100644 index 00000000000..b06fc6a7aab --- /dev/null +++ b/target/nextflow/report/mermaid/main.nf @@ -0,0 +1,2554 @@ +// mermaid 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Dries De Maeyer (maintainer) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "mermaid", + "namespace" : "report", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Dries De Maeyer", + "roles" : [ + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "ddemaeyer@gmail.com", + "github" : "ddemaeyer", + "linkedin" : "dries-de-maeyer-b46a814" + }, + "organizations" : [ + { + "name" : "Janssen Pharmaceuticals", + "href" : "https://www.janssen.com", + "role" : "Principal Scientist" + } + ] + } + } + ], + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "alternatives" : [ + "-i" + ], + "description" : "Input directory", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "-o" + ], + "description" : "Generated network as output.", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_format", + "description" : "Output format for the generated image. By default will be inferred from the extension \nof the file specified with --output.\n", + "required" : false, + "choices" : [ + "svg", + "png", + "pdf" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--width", + "description" : "Width of the page", + "default" : [ + 800 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--height", + "description" : "Height of the page", + "default" : [ + 600 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--background_color", + "description" : "Background color for pngs/svgs (not pdfs)", + "example" : [ + "#F0F0F0" + ], + "default" : [ + "white" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/report/mermaid/" + }, + { + "type" : "file", + "path" : "./puppeteer-config.json", + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/report/mermaid/" + } + ], + "description" : "Generates a network from mermaid code.\n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/report/mermaid/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "node:20-bullseye", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "javascript", + "npm" : [ + "@mermaid-js/mermaid-cli" + ] + }, + { + "type" : "apt", + "packages" : [ + "chromium" + ], + "interactive" : false + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/report/mermaid/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/report/mermaid", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_FORMAT+x} ]; then echo "${VIASH_PAR_OUTPUT_FORMAT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_format='&'#" ; else echo "# par_output_format="; fi ) +$( if [ ! -z ${VIASH_PAR_WIDTH+x} ]; then echo "${VIASH_PAR_WIDTH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_width='&'#" ; else echo "# par_width="; fi ) +$( if [ ! -z ${VIASH_PAR_HEIGHT+x} ]; then echo "${VIASH_PAR_HEIGHT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_height='&'#" ; else echo "# par_height="; fi ) +$( if [ ! -z ${VIASH_PAR_BACKGROUND_COLOR+x} ]; then echo "${VIASH_PAR_BACKGROUND_COLOR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_background_color='&'#" ; else echo "# par_background_color="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) + +## VIASH END +#!/bin/bash + +mmdc -p "\\$meta_resources_dir/puppeteer-config.json" \\\\ + -i "\\$par_input" \\\\ + -o "\\$par_output" \\\\ + --width "\\$par_width" \\\\ + --height "\\$par_height" \\\\ + \\${par_background_color:+--backgroundColor \\$par_background_color} \\\\ + \\${output_format:+--outputFormat \\$par_output_format} +VIASHMAIN +bash "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/report_mermaid", + "tag" : "0.12.0" + }, + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/report/mermaid/nextflow.config b/target/nextflow/report/mermaid/nextflow.config new file mode 100644 index 00000000000..d95905ffb16 --- /dev/null +++ b/target/nextflow/report/mermaid/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'mermaid' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Generates a network from mermaid code.\n' + author = 'Dries De Maeyer' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/report/mermaid/nextflow_params.yaml b/target/nextflow/report/mermaid/nextflow_params.yaml new file mode 100644 index 00000000000..6e937e2d1b0 --- /dev/null +++ b/target/nextflow/report/mermaid/nextflow_params.yaml @@ -0,0 +1,11 @@ +# Arguments +input: # please fill in - example: "path/to/file" +# output: "$id.$key.output.output" +# output_format: "foo" +width: 800 +height: 600 +background_color: "white" + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/report/mermaid/nextflow_schema.json b/target/nextflow/report/mermaid/nextflow_schema.json new file mode 100644 index 00000000000..3ccef175850 --- /dev/null +++ b/target/nextflow/report/mermaid/nextflow_schema.json @@ -0,0 +1,126 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "mermaid", +"description": "Generates a network from mermaid code.\n", +"type": "object", +"definitions": { + + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required. Input directory", + "help_text": "Type: `file`, required. Input directory" + + } + + + , + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.output`. Generated network as output", + "help_text": "Type: `file`, required, default: `$id.$key.output.output`. Generated network as output." + , + "default": "$id.$key.output.output" + } + + + , + "output_format": { + "type": + "string", + "description": "Type: `string`, choices: ``svg`, `png`, `pdf``. Output format for the generated image", + "help_text": "Type: `string`, choices: ``svg`, `png`, `pdf``. Output format for the generated image. By default will be inferred from the extension \nof the file specified with --output.\n", + "enum": ["svg", "png", "pdf"] + + + } + + + , + "width": { + "type": + "integer", + "description": "Type: `integer`, default: `800`. Width of the page", + "help_text": "Type: `integer`, default: `800`. Width of the page" + , + "default": "800" + } + + + , + "height": { + "type": + "integer", + "description": "Type: `integer`, default: `600`. Height of the page", + "help_text": "Type: `integer`, default: `600`. Height of the page" + , + "default": "600" + } + + + , + "background_color": { + "type": + "string", + "description": "Type: `string`, default: `white`, example: `#F0F0F0`. Background color for pngs/svgs (not pdfs)", + "help_text": "Type: `string`, default: `white`, example: `#F0F0F0`. Background color for pngs/svgs (not pdfs)" + , + "default": "white" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/report/mermaid/puppeteer-config.json b/target/nextflow/report/mermaid/puppeteer-config.json new file mode 100644 index 00000000000..7b2851c2995 --- /dev/null +++ b/target/nextflow/report/mermaid/puppeteer-config.json @@ -0,0 +1,6 @@ +{ + "executablePath": "/usr/bin/chromium", + "args": [ + "--no-sandbox" + ] +} \ No newline at end of file diff --git a/target/nextflow/transfer/publish/.config.vsh.yaml b/target/nextflow/transfer/publish/.config.vsh.yaml new file mode 100644 index 00000000000..099a2122b8d --- /dev/null +++ b/target/nextflow/transfer/publish/.config.vsh.yaml @@ -0,0 +1,125 @@ +functionality: + name: "publish" + namespace: "transfer" + version: "0.12.4" + authors: + - name: "Toni Verbeiren" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + github: "tverbeiren" + linkedin: "verbeiren" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist and CEO" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input filename" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output filename" + info: null + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "bash_script" + path: "script.sh" + is_executable: true + description: "Publish an artifact and optionally rename with parameters" + test_resources: + - type: "bash_script" + path: "run_test.sh" + is_executable: true + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/transfer/publish/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/transfer/publish" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/transfer/publish/publish" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/transfer/publish/main.nf b/target/nextflow/transfer/publish/main.nf new file mode 100644 index 00000000000..8caa1b903b7 --- /dev/null +++ b/target/nextflow/transfer/publish/main.nf @@ -0,0 +1,2474 @@ +// publish 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Toni Verbeiren (maintainer) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "publish", + "namespace" : "transfer", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Toni Verbeiren", + "roles" : [ + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "github" : "tverbeiren", + "linkedin" : "verbeiren" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Scientist and CEO" + } + ] + } + } + ], + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "alternatives" : [ + "-i" + ], + "description" : "Input filename", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "-o" + ], + "description" : "Output filename", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/transfer/publish/" + } + ], + "description" : "Publish an artifact and optionally rename with parameters", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "run_test.sh", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/transfer/publish/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "ubuntu:22.04", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/transfer/publish/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/transfer/publish", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +#!/bin/bash + +set -eo pipefail + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) + +## VIASH END + +parent=\\`dirname "\\$par_output"\\` +if [[ ! -d "\\$parent" ]]; then + mkdir -p "\\$parent" +fi + +cp -r "\\$par_input" "\\$par_output" +VIASHMAIN +bash "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/transfer_publish", + "tag" : "0.12.0" + }, + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/transfer/publish/nextflow.config b/target/nextflow/transfer/publish/nextflow.config new file mode 100644 index 00000000000..e90f5f6cc05 --- /dev/null +++ b/target/nextflow/transfer/publish/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'publish' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Publish an artifact and optionally rename with parameters' + author = 'Toni Verbeiren' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/transfer/publish/nextflow_params.yaml b/target/nextflow/transfer/publish/nextflow_params.yaml new file mode 100644 index 00000000000..ba67ce27737 --- /dev/null +++ b/target/nextflow/transfer/publish/nextflow_params.yaml @@ -0,0 +1,7 @@ +# Arguments +input: # please fill in - example: "path/to/file" +# output: "$id.$key.output.output" + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/transfer/publish/nextflow_schema.json b/target/nextflow/transfer/publish/nextflow_schema.json new file mode 100644 index 00000000000..22417228cd8 --- /dev/null +++ b/target/nextflow/transfer/publish/nextflow_schema.json @@ -0,0 +1,81 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "publish", +"description": "Publish an artifact and optionally rename with parameters", +"type": "object", +"definitions": { + + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required. Input filename", + "help_text": "Type: `file`, required. Input filename" + + } + + + , + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.output`. Output filename", + "help_text": "Type: `file`, required, default: `$id.$key.output.output`. Output filename" + , + "default": "$id.$key.output.output" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/transform/clr/.config.vsh.yaml b/target/nextflow/transform/clr/.config.vsh.yaml new file mode 100644 index 00000000000..d13d5d690e7 --- /dev/null +++ b/target/nextflow/transform/clr/.config.vsh.yaml @@ -0,0 +1,188 @@ +functionality: + name: "clr" + namespace: "transform" + version: "0.12.4" + authors: + - name: "Dries Schaumont" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input h5mu file" + info: null + example: + - "input.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + default: + - "prot" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output h5mu file." + info: null + default: + - "output.h5mu" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_layer" + description: "Output layer to use. By default, use X." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + description: "Perform CLR normalization on CITE-seq data (Stoeckius et al., 2017).\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "scanpy~=1.9.5" + - "muon~=0.1.5" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "lowmem" + - "midcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/transform/clr/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/transform/clr" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/transform/clr/clr" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/transform/clr/main.nf b/target/nextflow/transform/clr/main.nf new file mode 100644 index 00000000000..270b8588416 --- /dev/null +++ b/target/nextflow/transform/clr/main.nf @@ -0,0 +1,2577 @@ +// clr 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Dries Schaumont (maintainer) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "clr", + "namespace" : "transform", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Dries Schaumont", + "roles" : [ + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "dries@data-intuitive.com", + "github" : "DriesSchaumont", + "orcid" : "0000-0002-4389-0440", + "linkedin" : "dries-schaumont" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Scientist" + } + ] + } + } + ], + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "alternatives" : [ + "-i" + ], + "description" : "Input h5mu file", + "example" : [ + "input.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--modality", + "default" : [ + "prot" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "-o" + ], + "description" : "Output h5mu file.", + "default" : [ + "output.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_compression", + "description" : "The compression format to be used on the output h5mu object.", + "example" : [ + "gzip" + ], + "required" : false, + "choices" : [ + "gzip", + "lzf" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_layer", + "description" : "Output layer to use. By default, use X.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/transform/clr/" + } + ], + "description" : "Perform CLR normalization on CITE-seq data (Stoeckius et al., 2017).\n", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/transform/clr/" + }, + { + "type" : "file", + "path" : "resources_test/pbmc_1k_protein_v3", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.10-slim", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "procps" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "mudata~=0.2.3", + "anndata~=0.9.1", + "scanpy~=1.9.5", + "muon~=0.1.5" + ], + "upgrade" : true + } + ], + "test_setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "viashpy==0.5.0" + ], + "upgrade" : true + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "lowmem", + "midcpu" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/transform/clr/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/transform/clr", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +from muon import prot as pt +from mudata import read_h5mu + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_layer': $( if [ ! -z ${VIASH_PAR_OUTPUT_LAYER+x} ]; then echo "r'${VIASH_PAR_OUTPUT_LAYER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +## VIASH END + + +def main(): + input_h5mu = read_h5mu(par['input']) + modality = input_h5mu[par['modality']] + normalized_counts = pt.pp.clr(modality, inplace=False if par['output_layer'] else True) + if par['output_layer'] and not normalized_counts: + raise RuntimeError("CLR failed to return the requested output layer") + if normalized_counts: + input_h5mu[par["modality"]].layers[par['output_layer']] = normalized_counts.X + input_h5mu.write_h5mu(par['output'], compression=par["output_compression"]) + +if __name__ == "__main__": + main() +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/transform_clr", + "tag" : "0.12.0" + }, + "label" : [ + "lowmem", + "midcpu" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/transform/clr/nextflow.config b/target/nextflow/transform/clr/nextflow.config new file mode 100644 index 00000000000..cc6a5a3cd21 --- /dev/null +++ b/target/nextflow/transform/clr/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'clr' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Perform CLR normalization on CITE-seq data (Stoeckius et al., 2017).\n' + author = 'Dries Schaumont' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/transform/clr/nextflow_params.yaml b/target/nextflow/transform/clr/nextflow_params.yaml new file mode 100644 index 00000000000..f8ea11d94b4 --- /dev/null +++ b/target/nextflow/transform/clr/nextflow_params.yaml @@ -0,0 +1,10 @@ +# Arguments +input: # please fill in - example: "input.h5mu" +modality: "prot" +# output: "$id.$key.output.h5mu" +# output_compression: "gzip" +# output_layer: "foo" + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/transform/clr/nextflow_schema.json b/target/nextflow/transform/clr/nextflow_schema.json new file mode 100644 index 00000000000..043ac6b8144 --- /dev/null +++ b/target/nextflow/transform/clr/nextflow_schema.json @@ -0,0 +1,114 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "clr", +"description": "Perform CLR normalization on CITE-seq data (Stoeckius et al., 2017).\n", +"type": "object", +"definitions": { + + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required, example: `input.h5mu`. Input h5mu file", + "help_text": "Type: `file`, required, example: `input.h5mu`. Input h5mu file" + + } + + + , + "modality": { + "type": + "string", + "description": "Type: `string`, default: `prot`. ", + "help_text": "Type: `string`, default: `prot`. " + , + "default": "prot" + } + + + , + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.h5mu`. Output h5mu file", + "help_text": "Type: `file`, required, default: `$id.$key.output.h5mu`. Output h5mu file." + , + "default": "$id.$key.output.h5mu" + } + + + , + "output_compression": { + "type": + "string", + "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", + "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", + "enum": ["gzip", "lzf"] + + + } + + + , + "output_layer": { + "type": + "string", + "description": "Type: `string`. Output layer to use", + "help_text": "Type: `string`. Output layer to use. By default, use X." + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/transform/delete_layer/.config.vsh.yaml b/target/nextflow/transform/delete_layer/.config.vsh.yaml new file mode 100644 index 00000000000..5b6705e0e51 --- /dev/null +++ b/target/nextflow/transform/delete_layer/.config.vsh.yaml @@ -0,0 +1,196 @@ +functionality: + name: "delete_layer" + namespace: "transform" + version: "0.12.4" + authors: + - name: "Dries Schaumont" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input h5mu file" + info: null + example: + - "input.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--layer" + description: "Input layer to remove" + info: null + required: true + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output h5mu file." + info: null + default: + - "output.h5mu" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--missing_ok" + description: "Do not raise an error if the layer does not exist for all modalities." + info: null + direction: "input" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/compress_h5mu.py" + - type: "file" + path: "src/utils/setup_logger.py" + description: "Delete an anndata layer from one or more modalities.\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.9-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "midmem" + - "singlecpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/transform/delete_layer/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/transform/delete_layer" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/transform/delete_layer/delete_layer" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/transform/delete_layer/compress_h5mu.py b/target/nextflow/transform/delete_layer/compress_h5mu.py new file mode 100644 index 00000000000..9d92395a573 --- /dev/null +++ b/target/nextflow/transform/delete_layer/compress_h5mu.py @@ -0,0 +1,49 @@ +from h5py import File as H5File +from h5py import Group, Dataset +from pathlib import Path +from typing import Union, Literal +from functools import partial + + +def compress_h5mu(input_path: Union[str, Path], + output_path: Union[str, Path], + compression: Union[Literal['gzip'], Literal['lzf']]): + input_path, output_path = str(input_path), str(output_path) + + def copy_attributes(in_object, out_object): + for key, value in in_object.attrs.items(): + out_object.attrs[key] = value + + def visit_path(output_h5: H5File, + compression: Union[Literal['gzip'], Literal['lzf']], + name: str, object: Union[Group, Dataset]): + if isinstance(object, Group): + new_group = output_h5.create_group(name) + copy_attributes(object, new_group) + elif isinstance(object, Dataset): + # Compression only works for non-scalar Dataset objects + # Scalar objects dont have a shape defined + if not object.compression and object.shape not in [None, ()]: + new_dataset = output_h5.create_dataset(name, data=object, compression=compression) + copy_attributes(object, new_dataset) + else: + output_h5.copy(object, name) + else: + raise NotImplementedError(f"Could not copy element {name}, " + f"type has not been implemented yet: {type(object)}") + + with H5File(input_path, 'r') as input_h5, H5File(output_path, 'w', userblock_size=512) as output_h5: + copy_attributes(input_h5, output_h5) + input_h5.visititems(partial(visit_path, output_h5, compression)) + + with open(input_path, "rb") as input_bytes: + # Mudata puts metadata like this in the first 512 bytes: + # MuData (format-version=0.1.0;creator=muon;creator-version=0.2.0) + # See mudata/_core/io.py, read_h5mu() function + starting_metadata = input_bytes.read(100) + # The metadata is padded with extra null bytes up until 512 bytes + truncate_location = starting_metadata.find(b"\x00") + starting_metadata = starting_metadata[:truncate_location] + with open(output_path, "br+") as f: + nbytes = f.write(starting_metadata) + f.write(b"\0" * (512 - nbytes)) diff --git a/target/nextflow/transform/delete_layer/main.nf b/target/nextflow/transform/delete_layer/main.nf new file mode 100644 index 00000000000..a359b2ebc80 --- /dev/null +++ b/target/nextflow/transform/delete_layer/main.nf @@ -0,0 +1,2681 @@ +// delete_layer 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Dries Schaumont (maintainer) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "delete_layer", + "namespace" : "transform", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Dries Schaumont", + "roles" : [ + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "dries@data-intuitive.com", + "github" : "DriesSchaumont", + "orcid" : "0000-0002-4389-0440", + "linkedin" : "dries-schaumont" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Scientist" + } + ] + } + } + ], + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "alternatives" : [ + "-i" + ], + "description" : "Input h5mu file", + "example" : [ + "input.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--modality", + "default" : [ + "rna" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--layer", + "description" : "Input layer to remove", + "required" : true, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "-o" + ], + "description" : "Output h5mu file.", + "default" : [ + "output.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_compression", + "description" : "The compression format to be used on the output h5mu object.", + "example" : [ + "gzip" + ], + "required" : false, + "choices" : [ + "gzip", + "lzf" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "boolean_true", + "name" : "--missing_ok", + "description" : "Do not raise an error if the layer does not exist for all modalities.", + "direction" : "input", + "dest" : "par" + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/transform/delete_layer/" + }, + { + "type" : "file", + "path" : "src/utils/compress_h5mu.py", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + }, + { + "type" : "file", + "path" : "src/utils/setup_logger.py", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "description" : "Delete an anndata layer from one or more modalities.\n", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/transform/delete_layer/" + }, + { + "type" : "file", + "path" : "resources_test/pbmc_1k_protein_v3", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.9-slim", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "procps" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "mudata~=0.2.3", + "anndata~=0.9.1" + ], + "upgrade" : true + } + ], + "test_setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "viashpy==0.5.0" + ], + "upgrade" : true + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "midmem", + "singlecpu" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/transform/delete_layer/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/transform/delete_layer", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +import sys +from mudata import read_h5ad, write_h5ad +import shutil +from pathlib import Path + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'layer': $( if [ ! -z ${VIASH_PAR_LAYER+x} ]; then echo "r'${VIASH_PAR_LAYER//\\'/\\'\\"\\'\\"r\\'}'.split(':')"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'missing_ok': $( if [ ! -z ${VIASH_PAR_MISSING_OK+x} ]; then echo "r'${VIASH_PAR_MISSING_OK//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +## VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +# START TEMPORARY WORKAROUND compress_h5mu +# reason: resources aren't available when using Nextflow fusion +# from compress_h5mu import compress_h5mu +from h5py import File as H5File +from h5py import Group, Dataset +from pathlib import Path +from typing import Union, Literal +from functools import partial + + +def compress_h5mu(input_path: Union[str, Path], + output_path: Union[str, Path], + compression: Union[Literal['gzip'], Literal['lzf']]): + input_path, output_path = str(input_path), str(output_path) + + def copy_attributes(in_object, out_object): + for key, value in in_object.attrs.items(): + out_object.attrs[key] = value + + def visit_path(output_h5: H5File, + compression: Union[Literal['gzip'], Literal['lzf']], + name: str, object: Union[Group, Dataset]): + if isinstance(object, Group): + new_group = output_h5.create_group(name) + copy_attributes(object, new_group) + elif isinstance(object, Dataset): + # Compression only works for non-scalar Dataset objects + # Scalar objects dont have a shape defined + if not object.compression and object.shape not in [None, ()]: + new_dataset = output_h5.create_dataset(name, data=object, compression=compression) + copy_attributes(object, new_dataset) + else: + output_h5.copy(object, name) + else: + raise NotImplementedError(f"Could not copy element {name}, " + f"type has not been implemented yet: {type(object)}") + + with H5File(input_path, 'r') as input_h5, H5File(output_path, 'w', userblock_size=512) as output_h5: + copy_attributes(input_h5, output_h5) + input_h5.visititems(partial(visit_path, output_h5, compression)) + + with open(input_path, "rb") as input_bytes: + # Mudata puts metadata like this in the first 512 bytes: + # MuData (format-version=0.1.0;creator=muon;creator-version=0.2.0) + # See mudata/_core/io.py, read_h5mu() function + starting_metadata = input_bytes.read(100) + # The metadata is padded with extra null bytes up until 512 bytes + truncate_location = starting_metadata.find(b"\\\\x00") + starting_metadata = starting_metadata[:truncate_location] + with open(output_path, "br+") as f: + nbytes = f.write(starting_metadata) + f.write(b"\\\\0" * (512 - nbytes)) +# END TEMPORARY WORKAROUND compress_h5mu + +def main(): + input_file, output_file, mod_name = Path(par["input"]), Path(par["output"]), par['modality'] + + logger.info('Reading input file %s, modality %s.', input_file, mod_name) + mod = read_h5ad(input_file, mod=mod_name) + for layer in par['layer']: + if layer not in mod.layers: + if par['missing_ok']: + continue + raise ValueError(f"Layer '{layer}' is not present in modality {mod_name}.") + logger.info('Deleting layer %s from modality %s.', layer, mod_name) + del mod.layers[layer] + + logger.info('Writing output to %s.', par['output']) + output_file_uncompressed = output_file.with_name(output_file.stem + "_uncompressed.h5mu") \\\\ + if par["output_compression"] else output_file + shutil.copyfile(par['input'], output_file_uncompressed) + write_h5ad(filename=output_file_uncompressed, mod=mod_name, data=mod) + if par["output_compression"]: + compress_h5mu(output_file_uncompressed, output_file, compression=par["output_compression"]) + output_file_uncompressed.unlink() + + logger.info('Finished.') + +if __name__ == "__main__": + main() +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/transform_delete_layer", + "tag" : "0.12.0" + }, + "label" : [ + "midmem", + "singlecpu" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/transform/delete_layer/nextflow.config b/target/nextflow/transform/delete_layer/nextflow.config new file mode 100644 index 00000000000..974fb132735 --- /dev/null +++ b/target/nextflow/transform/delete_layer/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'delete_layer' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Delete an anndata layer from one or more modalities.\n' + author = 'Dries Schaumont' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/transform/delete_layer/nextflow_params.yaml b/target/nextflow/transform/delete_layer/nextflow_params.yaml new file mode 100644 index 00000000000..b3cca00ac16 --- /dev/null +++ b/target/nextflow/transform/delete_layer/nextflow_params.yaml @@ -0,0 +1,11 @@ +# Arguments +input: # please fill in - example: "input.h5mu" +modality: "rna" +layer: # please fill in - example: ["foo"] +# output: "$id.$key.output.h5mu" +# output_compression: "gzip" +missing_ok: false + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/transform/delete_layer/nextflow_schema.json b/target/nextflow/transform/delete_layer/nextflow_schema.json new file mode 100644 index 00000000000..09e743fd401 --- /dev/null +++ b/target/nextflow/transform/delete_layer/nextflow_schema.json @@ -0,0 +1,125 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "delete_layer", +"description": "Delete an anndata layer from one or more modalities.\n", +"type": "object", +"definitions": { + + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required, example: `input.h5mu`. Input h5mu file", + "help_text": "Type: `file`, required, example: `input.h5mu`. Input h5mu file" + + } + + + , + "modality": { + "type": + "string", + "description": "Type: `string`, default: `rna`. ", + "help_text": "Type: `string`, default: `rna`. " + , + "default": "rna" + } + + + , + "layer": { + "type": + "string", + "description": "Type: List of `string`, required, multiple_sep: `\":\"`. Input layer to remove", + "help_text": "Type: List of `string`, required, multiple_sep: `\":\"`. Input layer to remove" + + } + + + , + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.h5mu`. Output h5mu file", + "help_text": "Type: `file`, required, default: `$id.$key.output.h5mu`. Output h5mu file." + , + "default": "$id.$key.output.h5mu" + } + + + , + "output_compression": { + "type": + "string", + "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", + "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", + "enum": ["gzip", "lzf"] + + + } + + + , + "missing_ok": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Do not raise an error if the layer does not exist for all modalities", + "help_text": "Type: `boolean_true`, default: `false`. Do not raise an error if the layer does not exist for all modalities." + , + "default": "False" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/transform/delete_layer/setup_logger.py b/target/nextflow/transform/delete_layer/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/nextflow/transform/delete_layer/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/nextflow/transform/log1p/.config.vsh.yaml b/target/nextflow/transform/log1p/.config.vsh.yaml new file mode 100644 index 00000000000..735bf74a31b --- /dev/null +++ b/target/nextflow/transform/log1p/.config.vsh.yaml @@ -0,0 +1,225 @@ +functionality: + name: "log1p" + namespace: "transform" + version: "0.12.4" + authors: + - name: "Dries De Maeyer" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "ddemaeyer@gmail.com" + github: "ddemaeyer" + linkedin: "dries-de-maeyer-b46a814" + organizations: + - name: "Janssen Pharmaceuticals" + href: "https://www.janssen.com" + role: "Principal Scientist" + - name: "Robrecht Cannoodt" + roles: + - "contributor" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input h5mu file" + info: null + example: + - "input.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--input_layer" + description: "Input layer to use. If None, X is normalized" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_layer" + description: "Output layer to use. By default, use X." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output h5mu file." + info: null + default: + - "output.h5mu" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--base" + info: null + example: + - 2.0 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Logarithmize the data matrix. Computes X = log(X + 1), where log denotes\ + \ the natural logarithm unless a different base is given.\n" + test_resources: + - type: "python_script" + path: "run_test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.9-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "scanpy~=1.9.5" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "midmem" + - "lowcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/transform/log1p/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/transform/log1p" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/transform/log1p/log1p" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/transform/log1p/main.nf b/target/nextflow/transform/log1p/main.nf new file mode 100644 index 00000000000..8200de683ba --- /dev/null +++ b/target/nextflow/transform/log1p/main.nf @@ -0,0 +1,2668 @@ +// log1p 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Dries De Maeyer (maintainer) +// * Robrecht Cannoodt (contributor) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "log1p", + "namespace" : "transform", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Dries De Maeyer", + "roles" : [ + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "ddemaeyer@gmail.com", + "github" : "ddemaeyer", + "linkedin" : "dries-de-maeyer-b46a814" + }, + "organizations" : [ + { + "name" : "Janssen Pharmaceuticals", + "href" : "https://www.janssen.com", + "role" : "Principal Scientist" + } + ] + } + }, + { + "name" : "Robrecht Cannoodt", + "roles" : [ + "contributor" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "robrecht@data-intuitive.com", + "github" : "rcannood", + "orcid" : "0000-0003-3641-729X", + "linkedin" : "robrechtcannoodt" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Science Engineer" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + } + ], + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "alternatives" : [ + "-i" + ], + "description" : "Input h5mu file", + "example" : [ + "input.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--modality", + "default" : [ + "rna" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--input_layer", + "description" : "Input layer to use. If None, X is normalized", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_layer", + "description" : "Output layer to use. By default, use X.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "-o" + ], + "description" : "Output h5mu file.", + "default" : [ + "output.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_compression", + "description" : "The compression format to be used on the output h5mu object.", + "example" : [ + "gzip" + ], + "required" : false, + "choices" : [ + "gzip", + "lzf" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--base", + "example" : [ + 2.0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/transform/log1p/" + }, + { + "type" : "file", + "path" : "src/utils/setup_logger.py", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "description" : "Logarithmize the data matrix. Computes X = log(X + 1), where log denotes the natural logarithm unless a different base is given.\n", + "test_resources" : [ + { + "type" : "python_script", + "path" : "run_test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/transform/log1p/" + }, + { + "type" : "file", + "path" : "resources_test/pbmc_1k_protein_v3", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.9-slim", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "procps" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "mudata~=0.2.3", + "anndata~=0.9.1", + "scanpy~=1.9.5" + ], + "upgrade" : true + } + ], + "test_setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "viashpy==0.5.0" + ], + "upgrade" : true + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "midmem", + "lowcpu" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/transform/log1p/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/transform/log1p", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +import scanpy as sc +import mudata as mu +import anndata as ad +import sys + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'input_layer': $( if [ ! -z ${VIASH_PAR_INPUT_LAYER+x} ]; then echo "r'${VIASH_PAR_INPUT_LAYER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_layer': $( if [ ! -z ${VIASH_PAR_OUTPUT_LAYER+x} ]; then echo "r'${VIASH_PAR_OUTPUT_LAYER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'base': $( if [ ! -z ${VIASH_PAR_BASE+x} ]; then echo "float(r'${VIASH_PAR_BASE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +## VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +logger.info("Reading input mudata") +mdata = mu.read_h5mu(par["input"]) +mdata.var_names_make_unique() + +mod = par["modality"] +logger.info("Performing log transformation on modality %s", mod) +data = mdata.mod[mod] + +# Make our own copy with not a lot of data +# this avoid excessive memory usage and accidental overwrites +input_layer = data.layers[par["input_layer"]] \\\\ + if par["input_layer"] else data.X +data_for_scanpy = ad.AnnData(X=input_layer.copy()) +sc.pp.log1p(data_for_scanpy, + base=par["base"], + layer=None, # use X + copy=False) # allow overwrites in the copy that was made + +# Scanpy will overwrite the input layer. +# So fetch input layer from the copy and use it to populate the output slot +if par["output_layer"]: + data.layers[par["output_layer"]] = data_for_scanpy.X +else: + data.X = data_for_scanpy.X +data.uns['log1p'] = data_for_scanpy.uns['log1p'].copy() + +logger.info("Writing to file %s", par["output"]) +mdata.write_h5mu(filename=par["output"], compression=par["output_compression"]) +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/transform_log1p", + "tag" : "0.12.0" + }, + "label" : [ + "midmem", + "lowcpu" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/transform/log1p/nextflow.config b/target/nextflow/transform/log1p/nextflow.config new file mode 100644 index 00000000000..968128e5b75 --- /dev/null +++ b/target/nextflow/transform/log1p/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'log1p' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Logarithmize the data matrix. Computes X = log(X + 1), where log denotes the natural logarithm unless a different base is given.\n' + author = 'Dries De Maeyer, Robrecht Cannoodt' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/transform/log1p/nextflow_params.yaml b/target/nextflow/transform/log1p/nextflow_params.yaml new file mode 100644 index 00000000000..6a8622d8570 --- /dev/null +++ b/target/nextflow/transform/log1p/nextflow_params.yaml @@ -0,0 +1,12 @@ +# Arguments +input: # please fill in - example: "input.h5mu" +modality: "rna" +# input_layer: "foo" +# output_layer: "foo" +# output: "$id.$key.output.h5mu" +# output_compression: "gzip" +# base: 2 + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/transform/log1p/nextflow_schema.json b/target/nextflow/transform/log1p/nextflow_schema.json new file mode 100644 index 00000000000..b05824b7fbb --- /dev/null +++ b/target/nextflow/transform/log1p/nextflow_schema.json @@ -0,0 +1,134 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "log1p", +"description": "Logarithmize the data matrix. Computes X = log(X + 1), where log denotes the natural logarithm unless a different base is given.\n", +"type": "object", +"definitions": { + + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required, example: `input.h5mu`. Input h5mu file", + "help_text": "Type: `file`, required, example: `input.h5mu`. Input h5mu file" + + } + + + , + "modality": { + "type": + "string", + "description": "Type: `string`, default: `rna`. ", + "help_text": "Type: `string`, default: `rna`. " + , + "default": "rna" + } + + + , + "input_layer": { + "type": + "string", + "description": "Type: `string`. Input layer to use", + "help_text": "Type: `string`. Input layer to use. If None, X is normalized" + + } + + + , + "output_layer": { + "type": + "string", + "description": "Type: `string`. Output layer to use", + "help_text": "Type: `string`. Output layer to use. By default, use X." + + } + + + , + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.h5mu`. Output h5mu file", + "help_text": "Type: `file`, required, default: `$id.$key.output.h5mu`. Output h5mu file." + , + "default": "$id.$key.output.h5mu" + } + + + , + "output_compression": { + "type": + "string", + "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", + "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", + "enum": ["gzip", "lzf"] + + + } + + + , + "base": { + "type": + "number", + "description": "Type: `double`, example: `2`. ", + "help_text": "Type: `double`, example: `2`. " + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/transform/log1p/setup_logger.py b/target/nextflow/transform/log1p/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/nextflow/transform/log1p/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/nextflow/transform/normalize_total/.config.vsh.yaml b/target/nextflow/transform/normalize_total/.config.vsh.yaml new file mode 100644 index 00000000000..9f6bb962f90 --- /dev/null +++ b/target/nextflow/transform/normalize_total/.config.vsh.yaml @@ -0,0 +1,242 @@ +functionality: + name: "normalize_total" + namespace: "transform" + version: "0.12.4" + authors: + - name: "Dries De Maeyer" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "ddemaeyer@gmail.com" + github: "ddemaeyer" + linkedin: "dries-de-maeyer-b46a814" + organizations: + - name: "Janssen Pharmaceuticals" + href: "https://www.janssen.com" + role: "Principal Scientist" + - name: "Robrecht Cannoodt" + roles: + - "contributor" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input h5mu file" + info: null + example: + - "input.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--input_layer" + description: "Input layer to use. By default, X is normalized" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output h5mu file." + info: null + default: + - "output.h5mu" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_layer" + description: "Output layer to use. By default, use X." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--target_sum" + description: "If None, after normalization, each observation (cell) has a total\ + \ count equal to the median of total counts for observations (cells) before\ + \ normalization." + info: null + default: + - 10000 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--exclude_highly_expressed" + description: "Exclude (very) highly expressed genes for the computation of the\ + \ normalization factor (size factor) for each cell. A gene is considered highly\ + \ expressed, if it has more than max_fraction of the total counts in at least\ + \ one cell. The not-excluded genes will sum up to target_sum." + info: null + direction: "input" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Normalize counts per cell.\n\nNormalize each cell by total counts\ + \ over all genes, so that every cell has the same total count after normalization.\ + \ If choosing target_sum=1e6, this is CPM normalization.\n\nIf exclude_highly_expressed=True,\ + \ very highly expressed genes are excluded from the computation of the normalization\ + \ factor (size factor) for each cell. This is meaningful as these can strongly\ + \ influence the resulting normalized values for all other genes [Weinreb17].\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim-bullseye" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "libhdf5-dev" + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "scanpy~=1.9.5" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "midmem" + - "lowcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/transform/normalize_total/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/transform/normalize_total" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/transform/normalize_total/normalize_total" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/transform/normalize_total/main.nf b/target/nextflow/transform/normalize_total/main.nf new file mode 100644 index 00000000000..5f3c0d1859c --- /dev/null +++ b/target/nextflow/transform/normalize_total/main.nf @@ -0,0 +1,2669 @@ +// normalize_total 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Dries De Maeyer (maintainer) +// * Robrecht Cannoodt (contributor) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "normalize_total", + "namespace" : "transform", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Dries De Maeyer", + "roles" : [ + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "ddemaeyer@gmail.com", + "github" : "ddemaeyer", + "linkedin" : "dries-de-maeyer-b46a814" + }, + "organizations" : [ + { + "name" : "Janssen Pharmaceuticals", + "href" : "https://www.janssen.com", + "role" : "Principal Scientist" + } + ] + } + }, + { + "name" : "Robrecht Cannoodt", + "roles" : [ + "contributor" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "robrecht@data-intuitive.com", + "github" : "rcannood", + "orcid" : "0000-0003-3641-729X", + "linkedin" : "robrechtcannoodt" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Science Engineer" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + } + ], + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "alternatives" : [ + "-i" + ], + "description" : "Input h5mu file", + "example" : [ + "input.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--modality", + "default" : [ + "rna" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--input_layer", + "description" : "Input layer to use. By default, X is normalized", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "-o" + ], + "description" : "Output h5mu file.", + "default" : [ + "output.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_compression", + "description" : "The compression format to be used on the output h5mu object.", + "example" : [ + "gzip" + ], + "required" : false, + "choices" : [ + "gzip", + "lzf" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_layer", + "description" : "Output layer to use. By default, use X.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--target_sum", + "description" : "If None, after normalization, each observation (cell) has a total count equal to the median of total counts for observations (cells) before normalization.", + "default" : [ + 10000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "boolean_true", + "name" : "--exclude_highly_expressed", + "description" : "Exclude (very) highly expressed genes for the computation of the normalization factor (size factor) for each cell. A gene is considered highly expressed, if it has more than max_fraction of the total counts in at least one cell. The not-excluded genes will sum up to target_sum.", + "direction" : "input", + "dest" : "par" + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/transform/normalize_total/" + }, + { + "type" : "file", + "path" : "src/utils/setup_logger.py", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "description" : "Normalize counts per cell.\n\nNormalize each cell by total counts over all genes, so that every cell has the same total count after normalization. If choosing target_sum=1e6, this is CPM normalization.\n\nIf exclude_highly_expressed=True, very highly expressed genes are excluded from the computation of the normalization factor (size factor) for each cell. This is meaningful as these can strongly influence the resulting normalized values for all other genes [Weinreb17].\n", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/transform/normalize_total/" + }, + { + "type" : "file", + "path" : "resources_test/pbmc_1k_protein_v3", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.10-slim-bullseye", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "libhdf5-dev", + "procps" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "mudata~=0.2.3", + "anndata~=0.9.1", + "scanpy~=1.9.5" + ], + "upgrade" : true + } + ], + "test_setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "viashpy==0.5.0" + ], + "upgrade" : true + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "midmem", + "lowcpu" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/transform/normalize_total/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/transform/normalize_total", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +import sys +import scanpy as sc +import mudata as mu + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'input_layer': $( if [ ! -z ${VIASH_PAR_INPUT_LAYER+x} ]; then echo "r'${VIASH_PAR_INPUT_LAYER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_layer': $( if [ ! -z ${VIASH_PAR_OUTPUT_LAYER+x} ]; then echo "r'${VIASH_PAR_OUTPUT_LAYER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'target_sum': $( if [ ! -z ${VIASH_PAR_TARGET_SUM+x} ]; then echo "int(r'${VIASH_PAR_TARGET_SUM//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'exclude_highly_expressed': $( if [ ! -z ${VIASH_PAR_EXCLUDE_HIGHLY_EXPRESSED+x} ]; then echo "r'${VIASH_PAR_EXCLUDE_HIGHLY_EXPRESSED//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +## VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +logger.info("Reading input mudata") +mdata = mu.read_h5mu(par["input"]) +mdata.var_names_make_unique() + +logger.info(par) + +mod = par["modality"] +logger.info("Performing total normalization on modality %s", mod) +dat = mdata.mod[mod] +if par['input_layer'] and not par['input_layer'] in dat.layers.keys(): + raise ValueError(f"Input layer {par['input_layer']} not found in {mod}") +output_data = sc.pp.normalize_total(dat, + layer=par["input_layer"], + copy=True if par["output_layer"] else False) + +if output_data: + dat.layers[par["output_layer"]] = output_data.X + +logger.info("Writing to file") +mdata.write_h5mu(filename=par["output"], compression=par["output_compression"]) +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/transform_normalize_total", + "tag" : "0.12.0" + }, + "label" : [ + "midmem", + "lowcpu" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/transform/normalize_total/nextflow.config b/target/nextflow/transform/normalize_total/nextflow.config new file mode 100644 index 00000000000..204717adda0 --- /dev/null +++ b/target/nextflow/transform/normalize_total/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'normalize_total' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Normalize counts per cell.\n\nNormalize each cell by total counts over all genes, so that every cell has the same total count after normalization. If choosing target_sum=1e6, this is CPM normalization.\n\nIf exclude_highly_expressed=True, very highly expressed genes are excluded from the computation of the normalization factor (size factor) for each cell. This is meaningful as these can strongly influence the resulting normalized values for all other genes [Weinreb17].\n' + author = 'Dries De Maeyer, Robrecht Cannoodt' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/transform/normalize_total/nextflow_params.yaml b/target/nextflow/transform/normalize_total/nextflow_params.yaml new file mode 100644 index 00000000000..fbd8c75f1ec --- /dev/null +++ b/target/nextflow/transform/normalize_total/nextflow_params.yaml @@ -0,0 +1,13 @@ +# Arguments +input: # please fill in - example: "input.h5mu" +modality: "rna" +# input_layer: "foo" +# output: "$id.$key.output.h5mu" +# output_compression: "gzip" +# output_layer: "foo" +target_sum: 10000 +exclude_highly_expressed: false + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/transform/normalize_total/nextflow_schema.json b/target/nextflow/transform/normalize_total/nextflow_schema.json new file mode 100644 index 00000000000..9a1389e85dc --- /dev/null +++ b/target/nextflow/transform/normalize_total/nextflow_schema.json @@ -0,0 +1,146 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "normalize_total", +"description": "Normalize counts per cell.\n\nNormalize each cell by total counts over all genes, so that every cell has the same total count after normalization. If choosing target_sum=1e6, this is CPM normalization.\n\nIf exclude_highly_expressed=True, very highly expressed genes are excluded from the computation of the normalization factor (size factor) for each cell. This is meaningful as these can strongly influence the resulting normalized values for all other genes [Weinreb17].\n", +"type": "object", +"definitions": { + + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required, example: `input.h5mu`. Input h5mu file", + "help_text": "Type: `file`, required, example: `input.h5mu`. Input h5mu file" + + } + + + , + "modality": { + "type": + "string", + "description": "Type: `string`, default: `rna`. ", + "help_text": "Type: `string`, default: `rna`. " + , + "default": "rna" + } + + + , + "input_layer": { + "type": + "string", + "description": "Type: `string`. Input layer to use", + "help_text": "Type: `string`. Input layer to use. By default, X is normalized" + + } + + + , + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.h5mu`. Output h5mu file", + "help_text": "Type: `file`, required, default: `$id.$key.output.h5mu`. Output h5mu file." + , + "default": "$id.$key.output.h5mu" + } + + + , + "output_compression": { + "type": + "string", + "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", + "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", + "enum": ["gzip", "lzf"] + + + } + + + , + "output_layer": { + "type": + "string", + "description": "Type: `string`. Output layer to use", + "help_text": "Type: `string`. Output layer to use. By default, use X." + + } + + + , + "target_sum": { + "type": + "integer", + "description": "Type: `integer`, default: `10000`. If None, after normalization, each observation (cell) has a total count equal to the median of total counts for observations (cells) before normalization", + "help_text": "Type: `integer`, default: `10000`. If None, after normalization, each observation (cell) has a total count equal to the median of total counts for observations (cells) before normalization." + , + "default": "10000" + } + + + , + "exclude_highly_expressed": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Exclude (very) highly expressed genes for the computation of the normalization factor (size factor) for each cell", + "help_text": "Type: `boolean_true`, default: `false`. Exclude (very) highly expressed genes for the computation of the normalization factor (size factor) for each cell. A gene is considered highly expressed, if it has more than max_fraction of the total counts in at least one cell. The not-excluded genes will sum up to target_sum." + , + "default": "False" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/transform/normalize_total/setup_logger.py b/target/nextflow/transform/normalize_total/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/nextflow/transform/normalize_total/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/nextflow/transform/regress_out/.config.vsh.yaml b/target/nextflow/transform/regress_out/.config.vsh.yaml new file mode 100644 index 00000000000..4fb5f0809e3 --- /dev/null +++ b/target/nextflow/transform/regress_out/.config.vsh.yaml @@ -0,0 +1,195 @@ +functionality: + name: "regress_out" + namespace: "transform" + version: "0.12.4" + authors: + - name: "Robrecht Cannoodt" + roles: + - "maintainer" + - "contributor" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + arguments: + - type: "file" + name: "--input" + description: "Input h5mu file" + info: null + example: + - "input.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output h5mu file." + info: null + default: + - "output.h5mu" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + description: "Which modality (one or more) to run this component on." + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--obs_keys" + description: "Which .obs keys to regress on." + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Regress out (mostly) unwanted sources of variation.\nUses simple linear\ + \ regression. This is inspired by Seurat's regressOut function in R [Satija15].\ + \ \nNote that this function tends to overcorrect in certain circumstances as described\ + \ in issue theislab/scanpy#526.\nSee https://github.com/theislab/scanpy/issues/526.\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "scanpy~=1.9.5" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "lowmem" + - "lowcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/transform/regress_out/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/transform/regress_out" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/transform/regress_out/regress_out" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/transform/regress_out/main.nf b/target/nextflow/transform/regress_out/main.nf new file mode 100644 index 00000000000..f8fe1b348e9 --- /dev/null +++ b/target/nextflow/transform/regress_out/main.nf @@ -0,0 +1,2613 @@ +// regress_out 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Robrecht Cannoodt (maintainer, contributor) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "regress_out", + "namespace" : "transform", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Robrecht Cannoodt", + "roles" : [ + "maintainer", + "contributor" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "robrecht@data-intuitive.com", + "github" : "rcannood", + "orcid" : "0000-0003-3641-729X", + "linkedin" : "robrechtcannoodt" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Science Engineer" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + } + ], + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "description" : "Input h5mu file", + "example" : [ + "input.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "-o" + ], + "description" : "Output h5mu file.", + "default" : [ + "output.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_compression", + "description" : "The compression format to be used on the output h5mu object.", + "example" : [ + "gzip" + ], + "required" : false, + "choices" : [ + "gzip", + "lzf" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--modality", + "description" : "Which modality (one or more) to run this component on.", + "default" : [ + "rna" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--obs_keys", + "description" : "Which .obs keys to regress on.", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ":", + "dest" : "par" + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/transform/regress_out/" + }, + { + "type" : "file", + "path" : "src/utils/setup_logger.py", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "description" : "Regress out (mostly) unwanted sources of variation.\nUses simple linear regression. This is inspired by Seurat's regressOut function in R [Satija15]. \nNote that this function tends to overcorrect in certain circumstances as described in issue theislab/scanpy#526.\nSee https://github.com/theislab/scanpy/issues/526.\n", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/transform/regress_out/" + }, + { + "type" : "file", + "path" : "resources_test/pbmc_1k_protein_v3", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.10-slim", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "procps" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "mudata~=0.2.3", + "anndata~=0.9.1", + "scanpy~=1.9.5" + ], + "upgrade" : true + } + ], + "test_setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "viashpy==0.5.0" + ], + "upgrade" : true + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "lowmem", + "lowcpu" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/transform/regress_out/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/transform/regress_out", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +import scanpy as sc +import mudata as mu +import multiprocessing +import sys + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'obs_keys': $( if [ ! -z ${VIASH_PAR_OBS_KEYS+x} ]; then echo "r'${VIASH_PAR_OBS_KEYS//\\'/\\'\\"\\'\\"r\\'}'.split(':')"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +## VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +logger.info("Reading input mudata") +mdata = mu.read_h5mu(par["input"]) +mdata.var_names_make_unique() + +if ( + par["obs_keys"] is not None + and len(par["obs_keys"]) > 0 +): + mod = par["modality"] + logger.info("Regress out variables on modality %s", mod) + data = mdata.mod[mod] + + sc.pp.regress_out( + data, + keys=par["obs_keys"], + n_jobs=multiprocessing.cpu_count() - 1 + ) + +logger.info("Writing to file") +mdata.write_h5mu(filename=par["output"], compression=par["output_compression"]) +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/transform_regress_out", + "tag" : "0.12.0" + }, + "label" : [ + "lowmem", + "lowcpu" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/transform/regress_out/nextflow.config b/target/nextflow/transform/regress_out/nextflow.config new file mode 100644 index 00000000000..d23086c2fbb --- /dev/null +++ b/target/nextflow/transform/regress_out/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'regress_out' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Regress out (mostly) unwanted sources of variation.\nUses simple linear regression. This is inspired by Seurat\'s regressOut function in R [Satija15]. \nNote that this function tends to overcorrect in certain circumstances as described in issue theislab/scanpy#526.\nSee https://github.com/theislab/scanpy/issues/526.\n' + author = 'Robrecht Cannoodt' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/transform/regress_out/nextflow_params.yaml b/target/nextflow/transform/regress_out/nextflow_params.yaml new file mode 100644 index 00000000000..0988f9dcdf5 --- /dev/null +++ b/target/nextflow/transform/regress_out/nextflow_params.yaml @@ -0,0 +1,10 @@ +# Arguments +input: # please fill in - example: "input.h5mu" +# output: "$id.$key.output.h5mu" +# output_compression: "gzip" +modality: "rna" +# obs_keys: ["foo"] + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/transform/regress_out/nextflow_schema.json b/target/nextflow/transform/regress_out/nextflow_schema.json new file mode 100644 index 00000000000..5bc7d2c6cac --- /dev/null +++ b/target/nextflow/transform/regress_out/nextflow_schema.json @@ -0,0 +1,114 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "regress_out", +"description": "Regress out (mostly) unwanted sources of variation.\nUses simple linear regression. This is inspired by Seurat\u0027s regressOut function in R [Satija15]. \nNote that this function tends to overcorrect in certain circumstances as described in issue theislab/scanpy#526.\nSee https://github.com/theislab/scanpy/issues/526.\n", +"type": "object", +"definitions": { + + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required, example: `input.h5mu`. Input h5mu file", + "help_text": "Type: `file`, required, example: `input.h5mu`. Input h5mu file" + + } + + + , + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.h5mu`. Output h5mu file", + "help_text": "Type: `file`, required, default: `$id.$key.output.h5mu`. Output h5mu file." + , + "default": "$id.$key.output.h5mu" + } + + + , + "output_compression": { + "type": + "string", + "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", + "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", + "enum": ["gzip", "lzf"] + + + } + + + , + "modality": { + "type": + "string", + "description": "Type: `string`, default: `rna`. Which modality (one or more) to run this component on", + "help_text": "Type: `string`, default: `rna`. Which modality (one or more) to run this component on." + , + "default": "rna" + } + + + , + "obs_keys": { + "type": + "string", + "description": "Type: List of `string`, multiple_sep: `\":\"`. Which ", + "help_text": "Type: List of `string`, multiple_sep: `\":\"`. Which .obs keys to regress on." + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/transform/regress_out/setup_logger.py b/target/nextflow/transform/regress_out/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/nextflow/transform/regress_out/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/nextflow/transform/scale/.config.vsh.yaml b/target/nextflow/transform/scale/.config.vsh.yaml new file mode 100644 index 00000000000..28fd067b84c --- /dev/null +++ b/target/nextflow/transform/scale/.config.vsh.yaml @@ -0,0 +1,205 @@ +functionality: + name: "scale" + namespace: "transform" + version: "0.12.4" + authors: + - name: "Dries Schaumont" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input h5mu file." + info: null + example: + - "input.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--modality" + description: "List of modalities to process." + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "double" + name: "--max_value" + description: "Clip (truncate) to this value after scaling. Does not clip by default." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean" + name: "--zero_center" + description: "If False, omit zero-centering variables, which allows to handle\ + \ sparse input efficiently." + info: null + default: + - true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output h5mu file." + info: null + default: + - "output.h5mu" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + description: "Scale data to unit variance and zero mean.\n" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/pbmc_1k_protein_v3" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.10-slim-bullseye" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "libhdf5-dev" + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "scanpy~=1.9.5" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "nextflow" + id: "nextflow" + directives: + label: + - "lowmem" + - "lowcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +- type: "native" + id: "native" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/transform/scaling/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/transform/scale" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/transform/scale/scale" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/transform/scale/main.nf b/target/nextflow/transform/scale/main.nf new file mode 100644 index 00000000000..868ca4bd15b --- /dev/null +++ b/target/nextflow/transform/scale/main.nf @@ -0,0 +1,2625 @@ +// scale 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Dries Schaumont (maintainer) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "scale", + "namespace" : "transform", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Dries Schaumont", + "roles" : [ + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "dries@data-intuitive.com", + "github" : "DriesSchaumont", + "orcid" : "0000-0002-4389-0440", + "linkedin" : "dries-schaumont" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Scientist" + } + ] + } + } + ], + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "alternatives" : [ + "-i" + ], + "description" : "Input h5mu file.", + "example" : [ + "input.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--modality", + "description" : "List of modalities to process.", + "default" : [ + "rna" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "double", + "name" : "--max_value", + "description" : "Clip (truncate) to this value after scaling. Does not clip by default.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "boolean", + "name" : "--zero_center", + "description" : "If False, omit zero-centering variables, which allows to handle sparse input efficiently.", + "default" : [ + true + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "-o" + ], + "description" : "Output h5mu file.", + "default" : [ + "output.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_compression", + "description" : "The compression format to be used on the output h5mu object.", + "example" : [ + "gzip" + ], + "required" : false, + "choices" : [ + "gzip", + "lzf" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/transform/scaling/" + }, + { + "type" : "file", + "path" : "src/utils/setup_logger.py", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "description" : "Scale data to unit variance and zero mean.\n", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/transform/scaling/" + }, + { + "type" : "file", + "path" : "resources_test/pbmc_1k_protein_v3", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.10-slim-bullseye", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "libhdf5-dev", + "procps" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "mudata~=0.2.3", + "anndata~=0.9.1", + "scanpy~=1.9.5" + ], + "upgrade" : true + } + ], + "test_setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "viashpy==0.5.0" + ], + "upgrade" : true + } + ] + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "lowmem", + "lowcpu" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + }, + { + "type" : "native", + "id" : "native" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/transform/scaling/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/transform/scale", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +import sys +from mudata import read_h5mu +import scanpy + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'max_value': $( if [ ! -z ${VIASH_PAR_MAX_VALUE+x} ]; then echo "float(r'${VIASH_PAR_MAX_VALUE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'zero_center': $( if [ ! -z ${VIASH_PAR_ZERO_CENTER+x} ]; then echo "r'${VIASH_PAR_ZERO_CENTER//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +## VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +def main(): + logger.info(f'Reading .h5mu file: {par["input"]}') + mudata = read_h5mu(par["input"]) + mod = par["modality"] + data = mudata.mod[mod] + + logger.info("Scaling modality: %s", mod) + scanpy.pp.scale(data, + zero_center=par["zero_center"], + max_value=par["max_value"]) + + logger.info("Writing to %s", par["output"]) + mudata.write_h5mu(filename=par["output"], compression=par["output_compression"]) + logger.info("Finished") + +if __name__ == "__main__": + main() +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/transform_scale", + "tag" : "0.12.0" + }, + "label" : [ + "lowmem", + "lowcpu" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/transform/scale/nextflow.config b/target/nextflow/transform/scale/nextflow.config new file mode 100644 index 00000000000..74828936a99 --- /dev/null +++ b/target/nextflow/transform/scale/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'scale' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Scale data to unit variance and zero mean.\n' + author = 'Dries Schaumont' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/transform/scale/nextflow_params.yaml b/target/nextflow/transform/scale/nextflow_params.yaml new file mode 100644 index 00000000000..8ac8cc5c9f6 --- /dev/null +++ b/target/nextflow/transform/scale/nextflow_params.yaml @@ -0,0 +1,11 @@ +# Arguments +input: # please fill in - example: "input.h5mu" +modality: "rna" +# max_value: 123.0 +zero_center: true +# output: "$id.$key.output.h5mu" +# output_compression: "gzip" + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/transform/scale/nextflow_schema.json b/target/nextflow/transform/scale/nextflow_schema.json new file mode 100644 index 00000000000..ef05bb5baf0 --- /dev/null +++ b/target/nextflow/transform/scale/nextflow_schema.json @@ -0,0 +1,125 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "scale", +"description": "Scale data to unit variance and zero mean.\n", +"type": "object", +"definitions": { + + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required, example: `input.h5mu`. Input h5mu file", + "help_text": "Type: `file`, required, example: `input.h5mu`. Input h5mu file." + + } + + + , + "modality": { + "type": + "string", + "description": "Type: `string`, default: `rna`. List of modalities to process", + "help_text": "Type: `string`, default: `rna`. List of modalities to process." + , + "default": "rna" + } + + + , + "max_value": { + "type": + "number", + "description": "Type: `double`. Clip (truncate) to this value after scaling", + "help_text": "Type: `double`. Clip (truncate) to this value after scaling. Does not clip by default." + + } + + + , + "zero_center": { + "type": + "boolean", + "description": "Type: `boolean`, default: `true`. If False, omit zero-centering variables, which allows to handle sparse input efficiently", + "help_text": "Type: `boolean`, default: `true`. If False, omit zero-centering variables, which allows to handle sparse input efficiently." + , + "default": "True" + } + + + , + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.h5mu`. Output h5mu file", + "help_text": "Type: `file`, required, default: `$id.$key.output.h5mu`. Output h5mu file." + , + "default": "$id.$key.output.h5mu" + } + + + , + "output_compression": { + "type": + "string", + "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", + "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", + "enum": ["gzip", "lzf"] + + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/transform/scale/setup_logger.py b/target/nextflow/transform/scale/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/nextflow/transform/scale/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/nextflow/velocity/scvelo/.config.vsh.yaml b/target/nextflow/velocity/scvelo/.config.vsh.yaml new file mode 100644 index 00000000000..315757ebed7 --- /dev/null +++ b/target/nextflow/velocity/scvelo/.config.vsh.yaml @@ -0,0 +1,276 @@ +functionality: + name: "scvelo" + namespace: "velocity" + version: "0.12.4" + authors: + - name: "Dries Schaumont" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" + argument_groups: + - name: "Inputs" + arguments: + - type: "file" + name: "--input" + description: "Velocyto loom file." + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Outputs" + arguments: + - type: "file" + name: "--output" + description: "Output directory. If it does not exist, will be created." + info: null + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--output_compression" + description: "The compression format to be used on the output h5mu object." + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Filtering and normalization" + description: "Arguments for filtering, normalization an log transform (see scvelo.pp.filter_and_normalize\ + \ function)" + arguments: + - type: "integer" + name: "--min_counts" + description: "Minimum number of counts required for a gene to pass filtering\ + \ (spliced)." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--min_counts_u" + description: "Minimum number of counts required for a gene to pass filtering\ + \ (unspliced)." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--min_cells" + description: "Minimum number of cells expressed required to pass filtering (spliced)." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--min_cells_u" + description: "Minimum number of cells expressed required to pass filtering (unspliced)." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--min_shared_counts" + description: "Minimum number of counts (both unspliced and spliced) required\ + \ for a gene." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--min_shared_cells" + description: "Minimum number of cells required to be expressed (both unspliced\ + \ and spliced)." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--n_top_genes" + description: "Number of genes to keep." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean" + name: "--log_transform" + description: "Do not log transform counts." + info: null + default: + - true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - name: "Fitting parameters" + description: "Arguments for fitting the data" + arguments: + - type: "integer" + name: "--n_principal_components" + description: "Number of principal components to use for calculating moments." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "integer" + name: "--n_neighbors" + description: "Number of neighbors to use. First/second-order moments are computed\ + \ for each\ncell across its nearest neighbors, where the neighbor graph is\ + \ obtained from\neuclidean distances in PCA space.\n" + info: null + default: + - 30 + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "python_script" + path: "script.py" + is_executable: true + - type: "file" + path: "src/utils/setup_logger.py" + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/rna_velocity/velocyto_processed/cellranger_tiny.loom" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.9-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + - type: "python" + user: false + packages: + - "mudata~=0.2.3" + - "anndata~=0.9.1" + - "scvelo~=0.2.5" + - "numpy~=1.23.5" + - "matplotlib<3.8.0" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "native" + id: "native" +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highmem" + - "highcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/velocity/scvelo/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/velocity/scvelo" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/velocity/scvelo/scvelo" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/velocity/scvelo/main.nf b/target/nextflow/velocity/scvelo/main.nf new file mode 100644 index 00000000000..d02caa1fdfe --- /dev/null +++ b/target/nextflow/velocity/scvelo/main.nf @@ -0,0 +1,2761 @@ +// scvelo 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Dries Schaumont (maintainer) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "scvelo", + "namespace" : "velocity", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Dries Schaumont", + "roles" : [ + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "dries@data-intuitive.com", + "github" : "DriesSchaumont", + "orcid" : "0000-0002-4389-0440", + "linkedin" : "dries-schaumont" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Scientist" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Inputs", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "description" : "Velocyto loom file.", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Outputs", + "arguments" : [ + { + "type" : "file", + "name" : "--output", + "description" : "Output directory. If it does not exist, will be created.", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--output_compression", + "description" : "The compression format to be used on the output h5mu object.", + "example" : [ + "gzip" + ], + "required" : false, + "choices" : [ + "gzip", + "lzf" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Filtering and normalization", + "description" : "Arguments for filtering, normalization an log transform (see scvelo.pp.filter_and_normalize function)", + "arguments" : [ + { + "type" : "integer", + "name" : "--min_counts", + "description" : "Minimum number of counts required for a gene to pass filtering (spliced).", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--min_counts_u", + "description" : "Minimum number of counts required for a gene to pass filtering (unspliced).", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--min_cells", + "description" : "Minimum number of cells expressed required to pass filtering (spliced).", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--min_cells_u", + "description" : "Minimum number of cells expressed required to pass filtering (unspliced).", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--min_shared_counts", + "description" : "Minimum number of counts (both unspliced and spliced) required for a gene.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--min_shared_cells", + "description" : "Minimum number of cells required to be expressed (both unspliced and spliced).", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--n_top_genes", + "description" : "Number of genes to keep.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "boolean", + "name" : "--log_transform", + "description" : "Do not log transform counts.", + "default" : [ + true + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + }, + { + "name" : "Fitting parameters", + "description" : "Arguments for fitting the data", + "arguments" : [ + { + "type" : "integer", + "name" : "--n_principal_components", + "description" : "Number of principal components to use for calculating moments.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "integer", + "name" : "--n_neighbors", + "description" : "Number of neighbors to use. First/second-order moments are computed for each\ncell across its nearest neighbors, where the neighbor graph is obtained from\neuclidean distances in PCA space.\n", + "default" : [ + 30 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ] + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/velocity/scvelo/" + }, + { + "type" : "file", + "path" : "src/utils/setup_logger.py", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/velocity/scvelo/" + }, + { + "type" : "file", + "path" : "resources_test/rna_velocity/velocyto_processed/cellranger_tiny.loom", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.9-slim", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "procps" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "mudata~=0.2.3", + "anndata~=0.9.1", + "scvelo~=0.2.5", + "numpy~=1.23.5", + "matplotlib<3.8.0" + ], + "upgrade" : true + } + ], + "test_setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "viashpy==0.5.0" + ], + "upgrade" : true + } + ] + }, + { + "type" : "native", + "id" : "native" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "highmem", + "highcpu" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/velocity/scvelo/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/velocity/scvelo", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +import sys +import scvelo +import mudata +from contextlib import redirect_stdout +from pathlib import Path +import matplotlib as mpl + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'min_counts': $( if [ ! -z ${VIASH_PAR_MIN_COUNTS+x} ]; then echo "int(r'${VIASH_PAR_MIN_COUNTS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'min_counts_u': $( if [ ! -z ${VIASH_PAR_MIN_COUNTS_U+x} ]; then echo "int(r'${VIASH_PAR_MIN_COUNTS_U//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'min_cells': $( if [ ! -z ${VIASH_PAR_MIN_CELLS+x} ]; then echo "int(r'${VIASH_PAR_MIN_CELLS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'min_cells_u': $( if [ ! -z ${VIASH_PAR_MIN_CELLS_U+x} ]; then echo "int(r'${VIASH_PAR_MIN_CELLS_U//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'min_shared_counts': $( if [ ! -z ${VIASH_PAR_MIN_SHARED_COUNTS+x} ]; then echo "int(r'${VIASH_PAR_MIN_SHARED_COUNTS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'min_shared_cells': $( if [ ! -z ${VIASH_PAR_MIN_SHARED_CELLS+x} ]; then echo "int(r'${VIASH_PAR_MIN_SHARED_CELLS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'n_top_genes': $( if [ ! -z ${VIASH_PAR_N_TOP_GENES+x} ]; then echo "int(r'${VIASH_PAR_N_TOP_GENES//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'log_transform': $( if [ ! -z ${VIASH_PAR_LOG_TRANSFORM+x} ]; then echo "r'${VIASH_PAR_LOG_TRANSFORM//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), + 'n_principal_components': $( if [ ! -z ${VIASH_PAR_N_PRINCIPAL_COMPONENTS+x} ]; then echo "int(r'${VIASH_PAR_N_PRINCIPAL_COMPONENTS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'n_neighbors': $( if [ ! -z ${VIASH_PAR_N_NEIGHBORS+x} ]; then echo "int(r'${VIASH_PAR_N_NEIGHBORS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} +meta = { + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} + +## VIASH END + +sys.path.append(meta["resources_dir"]) +# START TEMPORARY WORKAROUND setup_logger +# reason: resources aren't available when using Nextflow fusion +# from setup_logger import setup_logger +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger +# END TEMPORARY WORKAROUND setup_logger +logger = setup_logger() + +mpl.rcParams['savefig.dpi']=150 + +# Script must be wrapped into a main function because scvelo spawn subprocesses +# and this fails when the functions are not wrapped. +def main(): + # Create output directory + output_dir = Path(par['output']) + output_dir.mkdir(parents=True, exist_ok=True) + scvelo.settings.figdir = str(output_dir) + + + # Calculate the sample name + sample_name = par["output"].removesuffix(".loom") + sample_name = Path(sample_name).name + + # Read the input data + adata = scvelo.read(par['input']) + + # Save spliced vs unspliced proportions to file + with (output_dir / "proportions.txt").open('w') as target: + with redirect_stdout(target): + scvelo.utils.show_proportions(adata) + + # Plot piecharts of spliced vs unspliced proportions + scvelo.pl.proportions(adata, save=True, show=False) + + # Perform preprocessing + scvelo.pp.filter_and_normalize(adata, + min_counts=par["min_counts"], + min_counts_u=par["min_counts_u"], + min_cells=par["min_cells"], + min_cells_u=par["min_cells_u"], + min_shared_counts=par["min_shared_counts"], + min_shared_cells=par["min_shared_cells"], + n_top_genes=par["n_top_genes"], + log=par["log_transform"]) + + # Fitting + scvelo.pp.moments(adata, + n_pcs=par["n_principal_components"], + n_neighbors=par["n_neighbors"]) + + + # Second step in velocyto calculations + # Velocity calculation and visualization + # From the scvelo manual: + # The solution to the full dynamical model is obtained by setting mode='dynamical', + # which requires to run scv.tl.recover_dynamics(adata) beforehand + scvelo.tl.recover_dynamics(adata) + scvelo.tl.velocity(adata, mode="dynamical") + scvelo.tl.velocity_graph(adata) + scvelo.pl.velocity_graph(adata, save=str(output_dir / "scvelo_graph.pdf"), show=False) + + # Plotting + # TODO: add more here. + scvelo.pl.velocity_embedding_stream(adata, save=str(output_dir / "scvelo_embedding.pdf"), show=False) + + # Create output + ouput_data = mudata.MuData({'rna_velocity': adata}) + ouput_data.write_h5mu(output_dir / f"{sample_name}.h5mu", compression=par["output_compression"]) + +if __name__ == "__main__": + main() +VIASHMAIN +python -B "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/velocity_scvelo", + "tag" : "0.12.0" + }, + "label" : [ + "highmem", + "highcpu" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/velocity/scvelo/nextflow.config b/target/nextflow/velocity/scvelo/nextflow.config new file mode 100644 index 00000000000..7d8abc5310a --- /dev/null +++ b/target/nextflow/velocity/scvelo/nextflow.config @@ -0,0 +1,107 @@ +manifest { + name = 'scvelo' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + author = 'Dries Schaumont' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/velocity/scvelo/nextflow_params.yaml b/target/nextflow/velocity/scvelo/nextflow_params.yaml new file mode 100644 index 00000000000..58cca805fd1 --- /dev/null +++ b/target/nextflow/velocity/scvelo/nextflow_params.yaml @@ -0,0 +1,24 @@ +# Inputs +input: # please fill in - example: "path/to/file" + +# Outputs +# output: "$id.$key.output.output" +# output_compression: "gzip" + +# Filtering and normalization +# min_counts: 123 +# min_counts_u: 123 +# min_cells: 123 +# min_cells_u: 123 +# min_shared_counts: 123 +# min_shared_cells: 123 +# n_top_genes: 123 +log_transform: true + +# Fitting parameters +# n_principal_components: 123 +n_neighbors: 30 + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/velocity/scvelo/nextflow_schema.json b/target/nextflow/velocity/scvelo/nextflow_schema.json new file mode 100644 index 00000000000..d6881694a4f --- /dev/null +++ b/target/nextflow/velocity/scvelo/nextflow_schema.json @@ -0,0 +1,237 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "scvelo", +"description": "No description", +"type": "object", +"definitions": { + + + + "inputs" : { + "title": "Inputs", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required. Velocyto loom file", + "help_text": "Type: `file`, required. Velocyto loom file." + + } + + +} +}, + + + "outputs" : { + "title": "Outputs", + "type": "object", + "description": "No description", + "properties": { + + + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.output`. Output directory", + "help_text": "Type: `file`, required, default: `$id.$key.output.output`. Output directory. If it does not exist, will be created." + , + "default": "$id.$key.output.output" + } + + + , + "output_compression": { + "type": + "string", + "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object", + "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression format to be used on the output h5mu object.", + "enum": ["gzip", "lzf"] + + + } + + +} +}, + + + "filtering and normalization" : { + "title": "Filtering and normalization", + "type": "object", + "description": "Arguments for filtering, normalization an log transform (see scvelo.pp.filter_and_normalize function)", + "properties": { + + + "min_counts": { + "type": + "integer", + "description": "Type: `integer`. Minimum number of counts required for a gene to pass filtering (spliced)", + "help_text": "Type: `integer`. Minimum number of counts required for a gene to pass filtering (spliced)." + + } + + + , + "min_counts_u": { + "type": + "integer", + "description": "Type: `integer`. Minimum number of counts required for a gene to pass filtering (unspliced)", + "help_text": "Type: `integer`. Minimum number of counts required for a gene to pass filtering (unspliced)." + + } + + + , + "min_cells": { + "type": + "integer", + "description": "Type: `integer`. Minimum number of cells expressed required to pass filtering (spliced)", + "help_text": "Type: `integer`. Minimum number of cells expressed required to pass filtering (spliced)." + + } + + + , + "min_cells_u": { + "type": + "integer", + "description": "Type: `integer`. Minimum number of cells expressed required to pass filtering (unspliced)", + "help_text": "Type: `integer`. Minimum number of cells expressed required to pass filtering (unspliced)." + + } + + + , + "min_shared_counts": { + "type": + "integer", + "description": "Type: `integer`. Minimum number of counts (both unspliced and spliced) required for a gene", + "help_text": "Type: `integer`. Minimum number of counts (both unspliced and spliced) required for a gene." + + } + + + , + "min_shared_cells": { + "type": + "integer", + "description": "Type: `integer`. Minimum number of cells required to be expressed (both unspliced and spliced)", + "help_text": "Type: `integer`. Minimum number of cells required to be expressed (both unspliced and spliced)." + + } + + + , + "n_top_genes": { + "type": + "integer", + "description": "Type: `integer`. Number of genes to keep", + "help_text": "Type: `integer`. Number of genes to keep." + + } + + + , + "log_transform": { + "type": + "boolean", + "description": "Type: `boolean`, default: `true`. Do not log transform counts", + "help_text": "Type: `boolean`, default: `true`. Do not log transform counts." + , + "default": "True" + } + + +} +}, + + + "fitting parameters" : { + "title": "Fitting parameters", + "type": "object", + "description": "Arguments for fitting the data", + "properties": { + + + "n_principal_components": { + "type": + "integer", + "description": "Type: `integer`. Number of principal components to use for calculating moments", + "help_text": "Type: `integer`. Number of principal components to use for calculating moments." + + } + + + , + "n_neighbors": { + "type": + "integer", + "description": "Type: `integer`, default: `30`. Number of neighbors to use", + "help_text": "Type: `integer`, default: `30`. Number of neighbors to use. First/second-order moments are computed for each\ncell across its nearest neighbors, where the neighbor graph is obtained from\neuclidean distances in PCA space.\n" + , + "default": "30" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/inputs" + }, + + { + "$ref": "#/definitions/outputs" + }, + + { + "$ref": "#/definitions/filtering and normalization" + }, + + { + "$ref": "#/definitions/fitting parameters" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/velocity/scvelo/setup_logger.py b/target/nextflow/velocity/scvelo/setup_logger.py new file mode 100644 index 00000000000..ae71eb96115 --- /dev/null +++ b/target/nextflow/velocity/scvelo/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/target/nextflow/velocity/velocyto/.config.vsh.yaml b/target/nextflow/velocity/velocyto/.config.vsh.yaml new file mode 100644 index 00000000000..aa9b3619f52 --- /dev/null +++ b/target/nextflow/velocity/velocyto/.config.vsh.yaml @@ -0,0 +1,225 @@ +functionality: + name: "velocyto" + namespace: "velocity" + version: "0.12.4" + authors: + - name: "Robrecht Cannoodt" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "robrecht@data-intuitive.com" + github: "rcannood" + orcid: "0000-0003-3641-729X" + linkedin: "robrechtcannoodt" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Science Engineer" + - name: "Open Problems" + href: "https://openproblems.bio" + role: "Core Member" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Path to BAM file" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--transcriptome" + alternatives: + - "-t" + description: "Path to GTF file" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "file" + name: "--barcode" + alternatives: + - "-b" + description: "Valid barcodes file, to filter the bam. If --bcfile is not specified\ + \ all the cell barcodes will be included.\nCell barcodes should be specified\ + \ in the bcfile as the 'CB' tag for each read\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "boolean_true" + name: "--without_umi" + description: "foo" + info: null + direction: "input" + dest: "par" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Velocyto loom file" + info: null + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ":" + dest: "par" + - type: "string" + name: "--logic" + alternatives: + - "-l" + description: "The logic to use for the filtering." + info: null + default: + - "Default" + required: false + choices: + - "Default" + - "Permissive10X" + - "Intermediate10X" + - "ValidatedIntrons10X" + - "Stricter10X" + - "ObservedSpanning10X" + - "Discordant10X" + - "SmartSeq2" + direction: "input" + multiple: false + multiple_sep: ":" + dest: "par" + resources: + - type: "bash_script" + path: "script.sh" + is_executable: true + description: "Runs the velocity analysis on a BAM file, outputting a loom file." + test_resources: + - type: "python_script" + path: "test.py" + is_executable: true + - type: "file" + path: "resources_test/cellranger_tiny_fastq" + - type: "file" + path: "resources_test/rna_velocity" + - type: "file" + path: "resources_test/reference_gencodev41_chr1" + info: null + status: "enabled" + requirements: + commands: + - "ps" + set_wd_to_resources_dir: false +platforms: +- type: "docker" + id: "docker" + image: "python:3.9-slim" + target_organization: "openpipelines-bio" + target_registry: "ghcr.io" + target_tag: "0.12.0" + namespace_separator: "_" + resolve_volume: "Automatic" + chown: true + setup_strategy: "ifneedbepullelsecachedbuild" + target_image_source: "https://github.com/openpipelines-bio/openpipeline" + setup: + - type: "apt" + packages: + - "procps" + - "build-essential" + - "file" + interactive: false + - type: "python" + user: false + pip: + - "numpy" + - "Cython" + upgrade: true + - type: "python" + user: false + pip: + - "velocyto" + upgrade: true + - type: "apt" + packages: + - "samtools" + interactive: false + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.5.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "native" + id: "native" +- type: "nextflow" + id: "nextflow" + directives: + label: + - "highmem" + - "lowcpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1.GB" + mem2gb: "memory = 2.GB" + mem4gb: "memory = 4.GB" + mem8gb: "memory = 8.GB" + mem16gb: "memory = 16.GB" + mem32gb: "memory = 32.GB" + mem64gb: "memory = 64.GB" + mem128gb: "memory = 128.GB" + mem256gb: "memory = 256.GB" + mem512gb: "memory = 512.GB" + mem1tb: "memory = 1.TB" + mem2tb: "memory = 2.TB" + mem4tb: "memory = 4.TB" + mem8tb: "memory = 8.TB" + mem16tb: "memory = 16.TB" + mem32tb: "memory = 32.TB" + mem64tb: "memory = 64.TB" + mem128tb: "memory = 128.TB" + mem256tb: "memory = 256.TB" + mem512tb: "memory = 512.TB" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +info: + config: "/home/runner/work/openpipeline/openpipeline/src/velocity/velocyto/config.vsh.yaml" + platform: "nextflow" + output: "/home/runner/work/openpipeline/openpipeline/target/nextflow/velocity/velocyto" + executable: "/home/runner/work/openpipeline/openpipeline/target/nextflow/velocity/velocyto/velocyto" + viash_version: "0.7.5" + git_commit: "a075b9f384e200b357c4c85801062a980ddb3383" + git_remote: "https://github.com/openpipelines-bio/openpipeline" + git_tag: "0.12.3-3-ga075b9f384" diff --git a/target/nextflow/velocity/velocyto/main.nf b/target/nextflow/velocity/velocyto/main.nf new file mode 100644 index 00000000000..8d114d7bb1b --- /dev/null +++ b/target/nextflow/velocity/velocyto/main.nf @@ -0,0 +1,2650 @@ +// velocyto 0.12.4 +// +// This wrapper script is auto-generated by viash 0.7.5 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Robrecht Cannoodt (maintainer) + +nextflow.enable.dsl=2 + +// Required imports +import groovy.json.JsonSlurper + +// initialise slurper +def jsonSlurper = new JsonSlurper() + +// DEFINE CUSTOM CODE + +// functionality metadata +thisConfig = processConfig(jsonSlurper.parseText('''{ + "functionality" : { + "name" : "velocyto", + "namespace" : "velocity", + "version" : "0.12.4", + "authors" : [ + { + "name" : "Robrecht Cannoodt", + "roles" : [ + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "robrecht@data-intuitive.com", + "github" : "rcannood", + "orcid" : "0000-0003-3641-729X", + "linkedin" : "robrechtcannoodt" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Science Engineer" + }, + { + "name" : "Open Problems", + "href" : "https://openproblems.bio", + "role" : "Core Member" + } + ] + } + } + ], + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "alternatives" : [ + "-i" + ], + "description" : "Path to BAM file", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--transcriptome", + "alternatives" : [ + "-t" + ], + "description" : "Path to GTF file", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--barcode", + "alternatives" : [ + "-b" + ], + "description" : "Valid barcodes file, to filter the bam. If --bcfile is not specified all the cell barcodes will be included.\nCell barcodes should be specified in the bcfile as the 'CB' tag for each read\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "boolean_true", + "name" : "--without_umi", + "description" : "foo", + "direction" : "input", + "dest" : "par" + }, + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "-o" + ], + "description" : "Velocyto loom file", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + }, + { + "type" : "string", + "name" : "--logic", + "alternatives" : [ + "-l" + ], + "description" : "The logic to use for the filtering.", + "default" : [ + "Default" + ], + "required" : false, + "choices" : [ + "Default", + "Permissive10X", + "Intermediate10X", + "ValidatedIntrons10X", + "Stricter10X", + "ObservedSpanning10X", + "Discordant10X", + "SmartSeq2" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ":", + "dest" : "par" + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/velocity/velocyto/" + } + ], + "description" : "Runs the velocity analysis on a BAM file, outputting a loom file.", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true, + "parent" : "file:/home/runner/work/openpipeline/openpipeline/src/velocity/velocyto/" + }, + { + "type" : "file", + "path" : "resources_test/cellranger_tiny_fastq", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + }, + { + "type" : "file", + "path" : "resources_test/rna_velocity", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + }, + { + "type" : "file", + "path" : "resources_test/reference_gencodev41_chr1", + "parent" : "file:///home/runner/work/openpipeline/openpipeline/" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "set_wd_to_resources_dir" : false + }, + "platforms" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.9-slim", + "target_organization" : "openpipelines-bio", + "target_registry" : "ghcr.io", + "target_tag" : "0.12.0", + "namespace_separator" : "_", + "resolve_volume" : "Automatic", + "chown" : true, + "setup_strategy" : "ifneedbepullelsecachedbuild", + "target_image_source" : "https://github.com/openpipelines-bio/openpipeline", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "procps", + "build-essential", + "file" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "pip" : [ + "numpy", + "Cython" + ], + "upgrade" : true + }, + { + "type" : "python", + "user" : false, + "pip" : [ + "velocyto" + ], + "upgrade" : true + }, + { + "type" : "apt", + "packages" : [ + "samtools" + ], + "interactive" : false + } + ], + "test_setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "viashpy==0.5.0" + ], + "upgrade" : true + } + ] + }, + { + "type" : "native", + "id" : "native" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "highmem", + "lowcpu" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1.GB", + "mem2gb" : "memory = 2.GB", + "mem4gb" : "memory = 4.GB", + "mem8gb" : "memory = 8.GB", + "mem16gb" : "memory = 16.GB", + "mem32gb" : "memory = 32.GB", + "mem64gb" : "memory = 64.GB", + "mem128gb" : "memory = 128.GB", + "mem256gb" : "memory = 256.GB", + "mem512gb" : "memory = 512.GB", + "mem1tb" : "memory = 1.TB", + "mem2tb" : "memory = 2.TB", + "mem4tb" : "memory = 4.TB", + "mem8tb" : "memory = 8.TB", + "mem16tb" : "memory = 16.TB", + "mem32tb" : "memory = 32.TB", + "mem64tb" : "memory = 64.TB", + "mem128tb" : "memory = 128.TB", + "mem256tb" : "memory = 256.TB", + "mem512tb" : "memory = 512.TB", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "info" : { + "config" : "/home/runner/work/openpipeline/openpipeline/src/velocity/velocyto/config.vsh.yaml", + "platform" : "nextflow", + "output" : "/home/runner/work/openpipeline/openpipeline/target/nextflow/velocity/velocyto", + "viash_version" : "0.7.5", + "git_commit" : "a075b9f384e200b357c4c85801062a980ddb3383", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline", + "git_tag" : "0.12.3-3-ga075b9f384" + } +}''')) + +thisScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +#!/bin/bash + +set -eo pipefail + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_TRANSCRIPTOME+x} ]; then echo "${VIASH_PAR_TRANSCRIPTOME}" | sed "s#'#'\\"'\\"'#g;s#.*#par_transcriptome='&'#" ; else echo "# par_transcriptome="; fi ) +$( if [ ! -z ${VIASH_PAR_BARCODE+x} ]; then echo "${VIASH_PAR_BARCODE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_barcode='&'#" ; else echo "# par_barcode="; fi ) +$( if [ ! -z ${VIASH_PAR_WITHOUT_UMI+x} ]; then echo "${VIASH_PAR_WITHOUT_UMI}" | sed "s#'#'\\"'\\"'#g;s#.*#par_without_umi='&'#" ; else echo "# par_without_umi="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_PAR_LOGIC+x} ]; then echo "${VIASH_PAR_LOGIC}" | sed "s#'#'\\"'\\"'#g;s#.*#par_logic='&'#" ; else echo "# par_logic="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) + +## VIASH END + +extra_params=( ) + +if [ ! -z "\\$par_barcode" ]; then + extra_params+=( "--bcfile=\\$par_barcode" ) +fi +if [ "\\$par_without_umi" == "true" ]; then + extra_params+=( "--without-umi" ) +fi +if [ ! -z "\\$meta_cpus" ]; then + extra_params+=( "--samtools-threads" "\\$meta_cpus" ) +fi +if [ ! -z "\\$meta_memory_mb" ]; then + extra_params+=( "--samtools-memory" "\\$meta_memory_mb" ) +fi + +output_dir=\\`dirname "\\$par_output"\\` +sample_id=\\`basename "\\$par_output" .loom\\` + +if (file \\`readlink -f "\\$par_transcriptome"\\` | grep -q compressed ) ; then + # create temporary directory + tmpdir=\\$(mktemp -d "\\$meta_temp_dir/\\$meta_functionality_name-XXXXXXXX") + function clean_up { + rm -rf "\\$tmpdir" + } + trap clean_up EXIT + + zcat "\\$par_transcriptome" > "\\$tmpdir/genes.gtf" + par_transcriptome="\\$tmpdir/genes.gtf" +fi + +velocyto run \\\\ + "\\$par_input" \\\\ + "\\$par_transcriptome" \\\\ + "\\${extra_params[@]}" \\\\ + --outputfolder "\\$output_dir" \\\\ + --sampleid "\\$sample_id" +VIASHMAIN +bash "$tempscript" +''' + +thisDefaultProcessArgs = [ + // key to be used to trace the process and determine output names + key: thisConfig.functionality.name, + // fixed arguments to be passed to script + args: [:], + // default directives + directives: jsonSlurper.parseText('''{ + "container" : { + "registry" : "ghcr.io", + "image" : "openpipelines-bio/velocity_velocyto", + "tag" : "0.12.0" + }, + "label" : [ + "highmem", + "lowcpu" + ], + "tag" : "$id" +}'''), + // auto settings + auto: jsonSlurper.parseText('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// END CUSTOM CODE + +///////////////////////////////////// +// Viash Workflow helper functions // +///////////////////////////////////// + +import java.util.regex.Pattern +import java.io.BufferedReader +import java.io.FileReader +import java.nio.file.Paths +import java.nio.file.Files +import groovy.json.JsonSlurper +import groovy.text.SimpleTemplateEngine +import org.yaml.snakeyaml.Yaml + +// param helpers // +def paramExists(name) { + return params.containsKey(name) && params[name] != "" +} + +def assertParamExists(name, description) { + if (!paramExists(name)) { + exit 1, "ERROR: Please provide a --${name} parameter ${description}" + } +} + +// helper functions for reading params from file // +def getChild(parent, child) { + if (child.contains("://") || Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = Pattern.compile('''"(.*)"''') + + def br = Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +def readJsonBlob(str) { + def jsonSlurper = new JsonSlurper() + jsonSlurper.parseText(str) +} + +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def jsonSlurper = new JsonSlurper() + jsonSlurper.parse(inputFile) +} + +def readYamlBlob(str) { + def yamlSlurper = new Yaml() + yamlSlurper.load(str) +} + +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path) : file_path + def yamlSlurper = new Yaml() + yamlSlurper.load(inputFile) +} + +// helper functions for reading a viash config in groovy // + +// based on how Functionality.scala is implemented +def processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ":" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + if (arg.type == "file" && arg.direction == "output") { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// based on how Functionality.scala is implemented +def processArgumentGroup(argumentGroups, name, arguments) { + def argNamesInGroups = argumentGroups.collectMany{it.arguments.findAll{it instanceof String}}.toSet() + + // Check if 'arguments' is in 'argumentGroups'. + def argumentsNotInGroup = arguments.findAll{arg -> !(argNamesInGroups.contains(arg.plainName))} + + // Check whether an argument group of 'name' exists. + def existing = argumentGroups.find{gr -> name == gr.name} + + // if there are no arguments missing from the argument group, just return the existing group (if any) + if (argumentsNotInGroup.isEmpty()) { + return existing == null ? [] : [existing] + + // if there are missing arguments and there is an existing group, add the missing arguments to it + } else if (existing != null) { + def newEx = existing.clone() + newEx.arguments.addAll(argumentsNotInGroup.findAll{it !instanceof String}) + return [newEx] + + // else create a new group + } else { + def newEx = [name: name, arguments: argumentsNotInGroup.findAll{it !instanceof String}] + return [newEx] + } +} + +// based on how Functionality.scala is implemented +def processConfig(config) { + // TODO: assert .functionality etc. + if (config.functionality.inputs) { + System.err.println("Warning: .functionality.inputs is deprecated. Please use .functionality.arguments instead.") + } + if (config.functionality.outputs) { + System.err.println("Warning: .functionality.outputs is deprecated. Please use .functionality.arguments instead.") + } + + // set defaults for inputs + config.functionality.inputs = + (config.functionality.inputs != null ? config.functionality.inputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "input" + processArgument(arg) + } + // set defaults for outputs + config.functionality.outputs = + (config.functionality.outputs != null ? config.functionality.outputs : []).collect{arg -> + arg.type = arg.type != null ? arg.type : "file" + arg.direction = "output" + processArgument(arg) + } + // set defaults for arguments + config.functionality.arguments = + (config.functionality.arguments != null ? config.functionality.arguments : []).collect{arg -> + processArgument(arg) + } + // set defaults for argument_group arguments + config.functionality.argument_groups = + (config.functionality.argument_groups != null ? config.functionality.argument_groups : []).collect{grp -> + grp.arguments = (grp.arguments != null ? grp.arguments : []).collect{arg -> + arg instanceof String ? arg.replaceAll("^-*", "") : processArgument(arg) + } + grp + } + + // create combined arguments list + config.functionality.allArguments = + config.functionality.inputs + + config.functionality.outputs + + config.functionality.arguments + + config.functionality.argument_groups.collectMany{ group -> + group.arguments.findAll{ it !instanceof String } + } + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.functionality.argument_groups + def inputGroup = processArgumentGroup(argGroups, "Inputs", config.functionality.inputs) + def outputGroup = processArgumentGroup(argGroups, "Outputs", config.functionality.outputs) + def defaultGroup = processArgumentGroup(argGroups, "Arguments", config.functionality.arguments) + def groupsFiltered = argGroups.findAll(gr -> !(["Inputs", "Outputs", "Arguments"].contains(gr.name))) + config.functionality.allArgumentGroups = inputGroup + outputGroup + defaultGroup + groupsFiltered + + config +} + +def readConfig(file) { + def config = readYaml(file != null ? file : "$projectDir/config.vsh.yaml") + processConfig(config) +} + +// recursively merge two maps +def mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +def addGlobalParams(config) { + def localConfig = [ + "functionality" : [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ], + ] + ] + ] + ] + ] + + return processConfig(mergeMap(config, localConfig)) +} + +// helper functions for generating help // + +// based on io.viash.helpers.Format.wordWrap +def formatWordWrap(str, maxLength) { + def words = str.split("\\s").toList() + + def word = null + def line = "" + def lines = [] + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + lines.add(line) + line = word + } + if (words.isEmpty()) { + lines.add(line) + } + } + return lines +} + +// based on Format.paragraphWrap +def paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def generateHelp(config) { + def fun = config.functionality + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +def helpMessage(config) { + if (paramExists("help")) { + def mergedConfig = addGlobalParams(config) + def helpStr = generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +def _guessParamListFormat(params) { + if (!params.containsKey("param_list") || params.param_list == null) { + "none" + } else { + def param_list = params.param_list + + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } + } +} + +viashChannelDeprecationWarningPrinted = false + +def paramsToList(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToList has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + // fetch default params from functionality + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + + // check multi input params + // objects should be closures and not functions, thanks to FunctionDef + def multiParamFormat = _guessParamListFormat(params) + + def multiOptionFunctions = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert multiOptionFunctions.containsKey(multiParamFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$multiParamFormat'. Expected: one of 'csv', 'json', 'yaml', 'yaml_blob', 'asis' or 'none'" + + // fetch multi param inputs + def multiOptionFun = multiOptionFunctions.get(multiParamFormat) + // todo: add try catch + def multiOptionOut = multiOptionFun(params.containsKey("param_list") ? params.param_list : "") + def paramList = multiOptionOut[1] + def multiFile = multiOptionOut[0] + + // data checks + assert paramList instanceof List: "--param_list should contain a list of maps" + for (value in paramList) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // combine parameters + def processedParams = paramList.collect{ multiParam -> + // combine params + def combinedArgs = defaultArgs + paramArgs + multiParam + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + combinedArgs = [id: "stub"] + combinedArgs + } else { + // else check whether required arguments exist + config.functionality.allArguments + .findAll { it.required } + .forEach { par -> + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + + // process arguments + def inputs = config.functionality.allArguments + .findAll{ par -> combinedArgs.containsKey(par.plainName) } + .collectEntries { par -> + // split on 'multiple_sep' + if (par.multiple) { + parData = combinedArgs[par.plainName] + if (parData instanceof List) { + parData = parData.collect{it instanceof String ? it.split(par.multiple_sep) : it } + } else if (parData instanceof String) { + parData = parData.split(par.multiple_sep) + } else if (parData == null) { + parData = [] + } else { + parData = [ parData ] + } + } else { + parData = [ combinedArgs[par.plainName] ] + } + + // flatten + parData = parData.flatten() + + // cast types + if (par.type == "file" && ((par.direction != null ? par.direction : "input") == "input")) { + parData = parData.collect{path -> + if (path !instanceof String) { + path + } else if (multiFile) { + file(getChild(multiFile, path)) + } else { + file(path) + } + }.flatten() + } else if (par.type == "integer") { + parData = parData.collect{it as Integer} + } else if (par.type == "double") { + parData = parData.collect{it as Double} + } else if (par.type == "boolean" || par.type == "boolean_true" || par.type == "boolean_false") { + parData = parData.collect{it as Boolean} + } + // simplify list to value if need be + if (!par.multiple) { + assert parData.size() == 1 : + "Error: argument ${par.plainName} has too many values.\n" + + " Expected amount: 1. Found: ${parData.size()}" + parData = parData[0] + } + + // return pair + [ par.plainName, parData ] + } + // remove parameters which were explicitly set to null + .findAll{ par -> par != null } + } + + + // check processed params + processedParams.forEach { args -> + assert args.containsKey("id"): "Each argument set should have an 'id'. Argument set: $args" + } + def ppIds = processedParams.collect{it.id} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" + + processedParams +} + +def paramsToChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: paramsToChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + Channel.fromList(paramsToList(params, config)) +} + +def viashChannel(params, config) { + if (!viashChannelDeprecationWarningPrinted) { + viashChannelDeprecationWarningPrinted = true + System.err.println("Warning: viashChannel has deprecated in Viash 0.7.0. " + + "Please use a combination of channelFromParams and preprocessInputs.") + } + paramsToChannel(params, config) + | map{tup -> [tup.id, tup]} +} + +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.functionality.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +/** + * Resolve the file paths in the parameters relative to given path + * + * @param paramList A Map containing parameters to process. + * This function assumes that files are still of type String. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * @param relativeTo path of a file to resolve the parameters values to. + * + * @return A map of parameters where the location of the input file parameters have been resolved + * resolved relatively to the provided path. + */ +private Map _resolvePathsRelativeTo(Map paramList, Map config, String relativeTo) { + paramList.collectEntries { parName, parValue -> + argSettings = config.functionality.allArguments.find{it.plainName == parName} + if (argSettings && argSettings.type == "file" && argSettings.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collect({path -> + path !instanceof String ? path : file(getChild(relativeTo, path)) + }) + } else { + parValue = parValue !instanceof String ? path : file(getChild(relativeTo, parValue)) + } + } + [parName, parValue] + } +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters from nextflow. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameter sets that were parsed from the 'param_list' argument value. + */ +private List> _parseParamListArguments(Map params, Map config){ + // first try to guess the format (if not set in params) + def paramListFormat = _guessParamListFormat(params) + + // get the correct parser function for the detected params_list format + def paramListParsers = [ + "csv": {[it, readCsv(it)]}, + "json": {[it, readJson(it)]}, + "yaml": {[it, readYaml(it)]}, + "yaml_blob": {[null, readYamlBlob(it)]}, + "asis": {[null, it]}, + "none": {[null, [[:]]]} + ] + assert paramListParsers.containsKey(paramListFormat): + "Format of provided --param_list not recognised.\n" + + "You can use '--param_list_format' to manually specify the format.\n" + + "Found: '$paramListFormat'. Expected: one of 'csv', 'json', "+ + "'yaml', 'yaml_blob', 'asis' or 'none'" + def paramListParser = paramListParsers.get(paramListFormat) + + // fetch multi param inputs + def paramListOut = paramListParser(params.containsKey("param_list") ? params.param_list : "") + // multiFile is null if the value passed to param_list was not a file (e.g a blob) + // If the value was indeed a file, multiFile contains the location that file (used later). + def paramListFile = paramListOut[0] + def paramSets = paramListOut[1] // these are the actual parameters from reading the blob/file + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ paramValues -> + [paramValues.get("id", null), paramValues.findAll{it.key != 'id'}] + }) + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, paramValues -> + def splitParamValues = _splitParams(paramValues, config) + [id, splitParamValues] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListFile){ + paramSets = paramSets.collect({ id, paramValues -> + def relativeParamValues = _resolvePathsRelativeTo(paramValues, config, paramListFile) + [id, relativeParamValues] + }) + } + + return paramSets +} + +/** + * Cast parameters to the correct type as defined in the Viash config + * + * @param parValues A Map of input arguments. + * + * @return The input arguments that have been cast to the type from the viash config. + */ + +private Map _castParamTypes(Map parValues, Map config) { + // Cast the input to the correct type according to viash config + def castParValues = parValues.collectEntries({ parName, parValue -> + paramSettings = config.functionality.allArguments.find({it.plainName == parName}) + // dont parse parameters like publish_dir ( in which case paramSettings = null) + parType = paramSettings ? paramSettings.get("type", null) : null + if (parValue !instanceof Collection) { + parValue = [parValue] + } + if (parType == "file" && ((paramSettings.direction != null ? paramSettings.direction : "input") == "input")) { + parValue = parValue.collect{ path -> + if (path !instanceof String) { + path + } else { + file(path) + } + } + } else if (parType == "integer") { + parValue = parValue.collect{it as Integer} + } else if (parType == "double") { + parValue = parValue.collect{it as Double} + } else if (parType == "boolean" || + parType == "boolean_true" || + parType == "boolean_false") { + parValue = parValue.collect{it as Boolean} + } + + // simplify list to value if need be + if (paramSettings && !paramSettings.multiple) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + }) + return castParValues +} + +/** + * Apply the argument settings specified in a Viash config to a single parameter set. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * + * @param paramValues A Map of parameter to be processed. All parameters must + * also be specified in the Viash config. + * @param config: A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * @return The input parameters that have been processed. + */ +Map applyConfigToOneParameterSet(Map paramValues, Map config){ + def splitParamValues = _splitParams(paramValues, config) + def castParamValues = _castParamTypes(splitParamValues, config) + + // Check if any unexpected arguments were passed + def knownParams = config.functionality.allArguments.collect({it.plainName}) + ["publishDir", "publish_dir"] + castParamValues.each({parName, parValue -> + assert parName in knownParams: "Unknown parameter. Parameter $parName should be in $knownParams" + }) + return castParamValues +} + +/** + * Apply the argument settings specified in a Viash config to a list of parameter sets. + * - Split the parameter values according to their seperator if + * the parameter accepts multiple values + * - Cast the parameters to their corect types. + * - Assertions: + * ~ Check if any unknown parameters are found + * ~ Check if the ID of the parameter set is unique across all sets. + * + * @return The input parameters that have been processed. + */ + +List applyConfig(List parameterSets, Map config){ + def processedparameterSets = parameterSets.collect({ parameterSet -> + def id = parameterSet[0] + def paramValues = parameterSet[1] + def passthrough = parameterSet.drop(2) + def processedSet = applyConfigToOneParameterSet(paramValues, config) + [id, processedSet] + passthrough + }) + + _checkUniqueIds(processedparameterSets) + return processedparameterSets +} + +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.functionality.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + def globalParamsValues = applyConfigToOneParameterSet(globalParams.findAll{it.key != 'id'}, config) + + /* process params_list arguments */ + /*********************************/ + def paramSets = _parseParamListArguments(params, config) + def parameterSetsWithConfigApplied = applyConfig(paramSets, config) + + /* combine arguments into channel */ + /**********************************/ + def processedParams = parameterSetsWithConfigApplied.indexed().collect{ index, paramSet -> + def id = paramSet[0] + def parValues = paramSet[1] + id = [id, globalID].find({it != null}) // first non-null element + + if (workflow.stubRun) { + // if stub run, explicitly add an id if missing + id = id ? id : "stub" + index + } + assert id != null: "Each parameter set should have at least an ID." + // Add regular parameters together with parameters passed with 'param_list' + def combinedArgsValues = globalParamsValues + parValues + + // Remove parameters which are null, if the default is also null + combinedArgsValues = combinedArgsValues.collectEntries{paramName, paramValue -> + parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + if ( paramValue != null || parameterSettings.get("default", null) != null ) { + [paramName, paramValue] + } + } + [id, combinedArgsValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +/** + * Process a list of Vdsl3 formatted parameters and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * @param params A list of parameter sets as Tuples. The first element of the tuples + * must be a unique id of the parameter set, and the second element + * must contain the parameters themselves. Optional extra elements + * of the tuples will be passed to the output as is. + * @param config A Map of the Viash configuration. This Map can be generated from + * the config file using the readConfig() function. + * + * @return A list of processed parameters sets as tuples. + */ + +private List _preprocessInputsList(List params, Map config) { + // Get different parameter types (used throughout this function) + def defaultArgs = config.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // Apply config to default parameters + def parsedDefaultValues = applyConfigToOneParameterSet(defaultArgs, config) + + // Apply config to input parameters + def parsedInputParamSets = applyConfig(params, config) + + // Merge two parameter sets together + def parsedArgs = parsedInputParamSets.collect({ parsedInputParamSet -> + def id = parsedInputParamSet[0] + def parValues = parsedInputParamSet[1] + def passthrough = parsedInputParamSet.drop(2) + def parValuesWithDefault = parsedDefaultValues + parValues + [id, parValuesWithDefault] + passthrough + }) + _checkUniqueIds(parsedArgs) + + return parsedArgs +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + wfKey = args.key != null ? args.key : "preprocessInputs" + config = args.config + workflow preprocessInputsInstance { + take: + input_ch + + main: + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + + output_ch = input_ch + | toSortedList + | map { paramList -> _preprocessInputsList(paramList, config) } + | flatMap + emit: + output_ch + } + + return preprocessInputsInstance.cloneWithName(wfKey) +} + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +import nextflow.Nextflow +import nextflow.script.IncludeDef +import nextflow.script.ScriptBinding +import nextflow.script.ScriptMeta +import nextflow.script.ScriptParser + +// retrieve resourcesDir here to make sure the correct path is found +resourcesDir = ScriptMeta.current().getScriptPath().getParent() + +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + + // check whether expected keys are all booleans (for now) + for (key in expectedKeys) { + assert auto.containsKey(key) + assert auto[key] instanceof Boolean + } + + return auto.subMap(expectedKeys) +} + +def processProcessArgs(Map args) { + // override defaults with args + def processArgs = thisDefaultProcessArgs + args + + // check whether 'key' exists + assert processArgs.containsKey("key") : "Error in module '${thisConfig.functionality.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (processArgs["key"] instanceof Closure) { + processArgs["key"] = processArgs["key"](thisConfig.functionality.name) + } + def key = processArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check whether directives exists and apply defaults + assert processArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert processArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${processArgs['directives'].getClass()}" + processArgs["directives"] = processDirectives(thisDefaultProcessArgs.directives + processArgs["directives"]) + + // check whether directives exists and apply defaults + assert processArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert processArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${processArgs['auto'].getClass()}" + processArgs["auto"] = processAuto(thisDefaultProcessArgs.auto + processArgs["auto"]) + + // auto define publish, if so desired + if (processArgs.auto.publish == true && (processArgs.directives.publishDir != null ? processArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = + params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null + + if (publishDir != null) { + processArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (processArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${processArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = processArgs.directives.publishDir != null ? processArgs.directives.publishDir : null ? processArgs.directives.publishDir : [] + processArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + processArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter"]) { + if (processArgs.containsKey(nam) && processArgs[nam]) { + assert processArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${processArgs[nam].getClass()}" + } + } + + // check fromState + assert processArgs.containsKey("fromState") : "Error in module '$key': fromState is a required argument" + def fromState = processArgs["fromState"] + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState) { + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def data = fromStateMap.collectEntries{newkey, origkey -> + // check whether all values of fromState are in state + assert state.containsKey(origkey) : "Error in module '$key': fromState key '$origkey' not found in current state" + [newkey, state[origkey]] + } + data + } + } + + processArgs["fromState"] = fromState + } + + // check toState + def toState = processArgs["toState"] + + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key': the output is not a Map" + assert state instanceof Map : "Error in module '$key': the state is not a Map" + def extraEntries = toStateMap.collectEntries{newkey, origkey -> + // check whether all values of toState are in output + assert output.containsKey(origkey) : "Error in module '$key': toState key '$origkey' not found in current output" + [newkey, output[origkey]] + } + state + extraEntries + } + } + + processArgs["toState"] = toState + + // return output + return processArgs +} + +def processFactory(Map processArgs) { + // autodetect process key + def wfKey = processArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def meta = ScriptMeta.current() + def existing = meta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = processArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ')' } + .join() + + def outputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (processArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + viash_par_contents = !par.required && !par.multiple ? "viash_par_${par.plainName}[0]" : "viash_par_${par.plainName}.join(\"${par.multiple_sep}\")" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = thisScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = processArgs.auto.publish || processArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir) + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') } + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir.toRealPath().toAbsolutePath()}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_FUNCTIONALITY_NAME="${thisConfig.functionality.name}" + |export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_FUNCTIONALITY_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+1023) / 1024 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+1023) / 1024 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+1023) / 1024 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (processArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // create runtime process + def ownerParams = new ScriptBinding.ParamsMap() + def binding = new ScriptBinding().setParams(ownerParams) + def module = new IncludeDef.Module(name: procKey) + def scriptParser = new ScriptParser(session) + .setModule(true) + .setBinding(binding) + scriptParser.scriptPath = ScriptMeta.current().getScriptPath() + def moduleScript = scriptParser.runScript(procStr) + .getScript() + + // register module in meta + meta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return meta.getProcess(procKey) +} + +def debug(processArgs, debugKey) { + if (processArgs.debug) { + view { "process '${processArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +def workflowFactory(Map args) { + def processArgs = processProcessArgs(args) + def key = processArgs["key"] + def meta = ScriptMeta.current() + + def workflowKey = key + + def processObj = null + + workflow workflowInstance { + take: + input_ + + main: + if (processObj == null) { + processObj = processFactory(processArgs) + } + + mid1_ = input_ + | debug(processArgs, "input") + | map { tuple -> + tuple = tuple.clone() + + if (processArgs.map) { + tuple = processArgs.map(tuple) + } + if (processArgs.mapId) { + tuple[0] = processArgs.mapId(tuple[0]) + } + if (processArgs.mapData) { + tuple[1] = processArgs.mapData(tuple[1]) + } + if (processArgs.mapPassthrough) { + tuple = tuple.take(2) + processArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + assert tuple[0] instanceof CharSequence : + "Error in module '${key}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (processArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (processArgs.renameKeys) { + assert processArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${processArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + processArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(processArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + if (processArgs.filter) { + mid2_ = mid1_ + | filter{processArgs.filter(it)} + } else { + mid2_ = mid1_ + } + + if (processArgs.fromState) { + mid3_ = mid2_ + | map{ + def new_data = processArgs["fromState"](it.take(2)) + [it[0], new_data] + } + } else { + mid3_ = mid2_ + } + + out0_ = mid3_ + | debug(processArgs, "processed") + | map { tuple -> + def id = tuple[0] + def data = tuple[1] + + // fetch default params from functionality + def defaultArgs = thisConfig.functionality.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = thisConfig.functionality.allArguments + .findAll { par -> + def argKey = key + "__" + par.plainName + params.containsKey(argKey) && params[argKey] != "viash_no_value" + } + .collectEntries { [ it.plainName, params[key + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = thisConfig.functionality.allArguments + .findAll { data.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + processArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs.removeAll{it.value == null} + + if (workflow.stubRun) { + // add id if missing + combinedArgs = [id: 'stub'] + combinedArgs + } else { + // check whether required arguments exist + thisConfig.functionality.allArguments + .forEach { par -> + if (par.required) { + assert combinedArgs.containsKey(par.plainName): "Argument ${par.plainName} is required but does not have a value" + } + } + } + + // TODO: check whether parameters have the right type + + // process input files separately + def inputPaths = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = combinedArgs.containsKey(par.plainName) ? combinedArgs[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = thisConfig.functionality.allArguments + .findAll { (it.type != "file" || it.direction != "input") && combinedArgs.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = combinedArgs[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val.replaceAll('\\$id', id).replaceAll('\\$key', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, resourcesDir ] + } + | processObj + | map { output -> + def outputFiles = thisConfig.functionality.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + if (processArgs.auto.simplifyOutput && outputFiles.size() == 1) { + outputFiles = outputFiles.values()[0] + } + + [ output[0], outputFiles ] + } + + // join the output [id, output] with the previous state [id, state, ...] + out1_ = out0_.join(mid2_, failOnDuplicate: true) + // input tuple format: [id, output, prev_state, ...] + // output tuple format: [id, new_state, ...] + | map{ + def new_state = processArgs["toState"](it) + [it[0], new_state] + it.drop(3) + } + | debug(processArgs, "output") + + + emit: + out1_ + } + + def wf = workflowInstance.cloneWithName(workflowKey) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs) + } + // add config to module for later introspection + wf.metaClass.config = thisConfig + + return wf +} + +// initialise default workflow +myWfInstance = workflowFactory([:]) + +// add workflow to environment +ScriptMeta.current().addDefinition(myWfInstance) + +// anonymous workflow for running this module as a standalone +workflow { + def mergedConfig = thisConfig + def mergedParams = [:] + params + + // add id argument if it's not already in the config + if (mergedConfig.functionality.arguments.every{it.plainName != "id"}) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + mergedConfig.functionality.arguments.add(0, idArg) + mergedConfig = processConfig(mergedConfig) + } + if (!mergedParams.containsKey("id")) { + mergedParams.id = "run" + } + + helpMessage(mergedConfig) + + channelFromParams(mergedParams, mergedConfig) + | preprocessInputs("config": mergedConfig) + | view { "input: $it" } + | myWfInstance.run( + auto: [ publish: true ] + ) + | view { "output: $it" } +} \ No newline at end of file diff --git a/target/nextflow/velocity/velocyto/nextflow.config b/target/nextflow/velocity/velocyto/nextflow.config new file mode 100644 index 00000000000..7ef917db6d8 --- /dev/null +++ b/target/nextflow/velocity/velocyto/nextflow.config @@ -0,0 +1,108 @@ +manifest { + name = 'velocyto' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = '0.12.4' + description = 'Runs the velocity analysis on a BAM file, outputting a loom file.' + author = 'Robrecht Cannoodt' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1.GB } + withLabel: mem2gb { memory = 2.GB } + withLabel: mem4gb { memory = 4.GB } + withLabel: mem8gb { memory = 8.GB } + withLabel: mem16gb { memory = 16.GB } + withLabel: mem32gb { memory = 32.GB } + withLabel: mem64gb { memory = 64.GB } + withLabel: mem128gb { memory = 128.GB } + withLabel: mem256gb { memory = 256.GB } + withLabel: mem512gb { memory = 512.GB } + withLabel: mem1tb { memory = 1.TB } + withLabel: mem2tb { memory = 2.TB } + withLabel: mem4tb { memory = 4.TB } + withLabel: mem8tb { memory = 8.TB } + withLabel: mem16tb { memory = 16.TB } + withLabel: mem32tb { memory = 32.TB } + withLabel: mem64tb { memory = 64.TB } + withLabel: mem128tb { memory = 128.TB } + withLabel: mem256tb { memory = 256.TB } + withLabel: mem512tb { memory = 512.TB } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/nextflow/velocity/velocyto/nextflow_params.yaml b/target/nextflow/velocity/velocyto/nextflow_params.yaml new file mode 100644 index 00000000000..e4bbff323ad --- /dev/null +++ b/target/nextflow/velocity/velocyto/nextflow_params.yaml @@ -0,0 +1,11 @@ +# Arguments +input: # please fill in - example: "path/to/file" +transcriptome: # please fill in - example: "path/to/file" +# barcode: "path/to/file" +without_umi: false +# output: "$id.$key.output.output" +logic: "Default" + +# Nextflow input-output arguments +publish_dir: # please fill in - example: "output/" +# param_list: "my_params.yaml" diff --git a/target/nextflow/velocity/velocyto/nextflow_schema.json b/target/nextflow/velocity/velocyto/nextflow_schema.json new file mode 100644 index 00000000000..ea06a74c7c3 --- /dev/null +++ b/target/nextflow/velocity/velocyto/nextflow_schema.json @@ -0,0 +1,125 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "velocyto", +"description": "Runs the velocity analysis on a BAM file, outputting a loom file.", +"type": "object", +"definitions": { + + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required. Path to BAM file", + "help_text": "Type: `file`, required. Path to BAM file" + + } + + + , + "transcriptome": { + "type": + "string", + "description": "Type: `file`, required. Path to GTF file", + "help_text": "Type: `file`, required. Path to GTF file" + + } + + + , + "barcode": { + "type": + "string", + "description": "Type: `file`. Valid barcodes file, to filter the bam", + "help_text": "Type: `file`. Valid barcodes file, to filter the bam. If --bcfile is not specified all the cell barcodes will be included.\nCell barcodes should be specified in the bcfile as the \u0027CB\u0027 tag for each read\n" + + } + + + , + "without_umi": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. foo", + "help_text": "Type: `boolean_true`, default: `false`. foo" + , + "default": "False" + } + + + , + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.output`. Velocyto loom file", + "help_text": "Type: `file`, required, default: `$id.$key.output.output`. Velocyto loom file" + , + "default": "$id.$key.output.output" + } + + + , + "logic": { + "type": + "string", + "description": "Type: `string`, default: `Default`, choices: ``Default`, `Permissive10X`, `Intermediate10X`, `ValidatedIntrons10X`, `Stricter10X`, `ObservedSpanning10X`, `Discordant10X`, `SmartSeq2``. The logic to use for the filtering", + "help_text": "Type: `string`, default: `Default`, choices: ``Default`, `Permissive10X`, `Intermediate10X`, `ValidatedIntrons10X`, `Stricter10X`, `ObservedSpanning10X`, `Discordant10X`, `SmartSeq2``. The logic to use for the filtering.", + "enum": ["Default", "Permissive10X", "Intermediate10X", "ValidatedIntrons10X", "Stricter10X", "ObservedSpanning10X", "Discordant10X", "SmartSeq2"] + + , + "default": "Default" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} From 062ffd3f7cd5bc96864b3ea801b46bd22329d803 Mon Sep 17 00:00:00 2001 From: DriesSchaumont <5946712+DriesSchaumont@users.noreply.github.com> Date: Wed, 31 Jan 2024 10:31:26 +0100 Subject: [PATCH 5/5] Revert "update CI" This reverts commit a075b9f384e200b357c4c85801062a980ddb3383. --- .github/workflows/create-documentation-pr.yml | 14 +- .github/workflows/integration-test.yml | 163 +++++++--- .github/workflows/main-build.yml | 296 ++---------------- .github/workflows/release-build-viash-hub.yml | 46 ++- .github/workflows/release-build.yml | 43 ++- .github/workflows/viash-test.yml | 15 +- 6 files changed, 210 insertions(+), 367 deletions(-) diff --git a/.github/workflows/create-documentation-pr.yml b/.github/workflows/create-documentation-pr.yml index 272ee8fc000..f3ef7785861 100644 --- a/.github/workflows/create-documentation-pr.yml +++ b/.github/workflows/create-documentation-pr.yml @@ -22,20 +22,20 @@ jobs: steps: - uses: actions/checkout@v4 - - uses: viash-io/viash-actions/setup@v5 + - uses: viash-io/viash-actions/setup@v4 - id: ns_list_components - uses: viash-io/viash-actions/ns-list@v5 + uses: viash-io/viash-actions/ns-list@v4 with: platform: docker + src: src format: json - query_namespace: ^(?!workflows) - id: ns_list_workflows - uses: viash-io/viash-actions/ns-list@v5 + uses: viash-io/viash-actions/ns-list@v4 with: + src: workflows format: json - query_namespace: ^workflows - id: set_matrix run: | @@ -71,7 +71,7 @@ jobs: path: website token: ${{ secrets.GTHB_PAT }} - - uses: viash-io/viash-actions/setup@v5 + - uses: viash-io/viash-actions/setup@v4 - name: Get tag to use id: get_tag @@ -80,7 +80,7 @@ jobs: TAG_OR_BRANCH_NAME=${INPUT_TAG:-"${{ github.ref_name }}"} echo "tag=$TAG_OR_BRANCH_NAME" >> $GITHUB_OUTPUT - - uses: viash-io/viash-actions/pro/generate-documentation-qmd@v5 + - uses: viash-io/viash-actions/pro/generate-documentation-qmd@v4 with: project_directory: openpipelines src: ./ diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml index f19e50cd831..5258d7a6686 100644 --- a/.github/workflows/integration-test.yml +++ b/.github/workflows/integration-test.yml @@ -1,88 +1,169 @@ name: integration test -concurrency: - group: ${{ github.workflow }} - cancel-in-progress: false on: workflow_dispatch: - inputs: - push_containers: - type: boolean - required: false - default: true - description: Build docker images and push them to the registry schedule: - cron: '33 2 * * *' jobs: - # Build and create containers - build: - uses: ./.github/workflows/main-build.yml - with: - push_containers: ${{ github.event_name == 'schedule' || inputs.push_containers }} - version: 'integration_build' - target_tag: 'integration_build' - deploy_to_viash_hub: false - deploy_branch: 'integration_build' - secrets: inherit - - # Synchronize S3 Bucket and create cache for per-component runs - sync_s3: + # phase 1 + list: env: s3_bucket: s3://openpipelines-data/ runs-on: ubuntu-latest outputs: + component_matrix: ${{ steps.set_matrix.outputs.components }} + workflow_matrix: ${{ steps.set_matrix.outputs.workflows }} cache_key: ${{ steps.cache.outputs.cache_key }} steps: - - uses: viash-io/viash-actions/project/sync-and-cache-s3@v5 - id: cache + - uses: actions/checkout@v4 + + - uses: viash-io/viash-actions/setup@v4 + + - uses: viash-io/viash-actions/project/sync-and-cache-s3@v4 + id: cache with: s3_bucket: $s3_bucket dest_path: resources_test cache_key_prefix: resources_test__ - # phase 3 - integration_test: + - name: Remove target folder from .gitignore + run: | + # allow publishing the target folder + sed -i '/^\/target\/$/d' .gitignore + + - uses: viash-io/viash-actions/ns-build@v4 + with: + config_mod: .functionality.version := 'integration_build' + parallel: true + + - name: Deploy to target branch + uses: peaceiris/actions-gh-pages@v3 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_dir: . + publish_branch: integration_build + exclude_assets: '' + + - id: ns_list_components + uses: viash-io/viash-actions/ns-list@v4 + with: + platform: docker + src: src + format: json + + - id: ns_list_workflows + uses: viash-io/viash-actions/ns-list@v4 + with: + src: workflows + format: json + + - id: set_matrix + run: | + echo "components=$(jq -c '[ .[] | + { + "name": (.functionality.namespace + (.platforms | map(select(.type == "docker"))[0].namespace_separator) + .functionality.name), + "config": .info.config, + "dir": .info.config | capture("^(?.*\/)").dir + } + ]' ${{ steps.ns_list_components.outputs.output_file }} )" >> $GITHUB_OUTPUT + + echo "workflows=$(jq -c '[ .[] | . as $config | (.functionality.test_resources // [])[] | select(.type == "nextflow_script", .entrypoint) | + { + "name": ($config.functionality.namespace + "/" + $config.functionality.name), + "main_script": (($config.info.config | capture("^(?.*\/)").dir) + "/" + .path), + "entry": .entrypoint, + "config": $config.info.config + } + ] | unique' ${{ steps.ns_list_workflows.outputs.output_file }} )" >> $GITHUB_OUTPUT + + # phase 2 + build: + needs: list + runs-on: ubuntu-latest - needs: [ build, sync_s3 ] - if: "${{ needs.build.outputs.workflow_matrix != '[]' }}" strategy: fail-fast: false matrix: - component: ${{ fromJson(needs.build.outputs.workflow_matrix) }} + component: ${{ fromJson(needs.list.outputs.component_matrix) }} steps: # Remove unnecessary files to free up space. Otherwise, we get 'no space left on device.' - uses: data-intuitive/reclaim-the-bytes@v2 - - name: Keep symlinks as-is - run: | - git config --global core.symlinks true - - uses: actions/checkout@v4 + + - uses: viash-io/viash-actions/setup@v4 + + - name: Build container + uses: viash-io/viash-actions/ns-build@v4 + with: + config_mod: .functionality.version := 'integration_build' + setup: build + src: ${{ matrix.component.dir }} + + - name: Login to container registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ secrets.GTHB_USER }} + password: ${{ secrets.GTHB_PAT }} + + - name: Push container + uses: viash-io/viash-actions/ns-build@v4 with: - ref: 'integration_build' + config_mod: .functionality.version := 'integration_build' + platform: docker + src: ${{ matrix.component.dir }} + setup: push + + ################################### + # phase 3 + integration_test: + needs: [ build, list ] + if: "${{ needs.list.outputs.workflow_matrix != '[]' }}" + + runs-on: ubuntu-latest - - uses: viash-io/viash-actions/setup@v5 + strategy: + fail-fast: false + matrix: + component: ${{ fromJson(needs.list.outputs.workflow_matrix) }} - - uses: nf-core/setup-nextflow@v1.5.0 + steps: + # Remove unnecessary files to free up space. Otherwise, we get 'no space left on device.' + - uses: data-intuitive/reclaim-the-bytes@v2 + + - uses: actions/checkout@v4 + + - uses: viash-io/viash-actions/setup@v4 + + - uses: nf-core/setup-nextflow@v1.3.0 + + # build target dir + # use containers from integration_build branch, hopefully these are available + - name: Build target dir + uses: viash-io/viash-actions/ns-build@v4 + with: + config_mod: ".functionality.version := 'integration_build'" + parallel: true # use cache - name: Cache resources data - uses: actions/cache@v4 + uses: actions/cache@v3 timeout-minutes: 5 with: path: resources_test - key: ${{ needs.sync_s3.outputs.cache_key }} + key: ${{ needs.list.outputs.cache_key }} fail-on-cache-miss: true - name: Remove unused test resources to save space shell: bash run: | - readarray -t resources < <(viash config view --format json "${{ matrix.component.config }}" -c 'del(.functionality.dependencies)' | jq -r -c '(.info.config | capture("^(?.*\/)").dir) as $dir | .functionality.test_resources | map(select(.type == "file")) | map($dir + .path) | unique | .[]') + readarray -t resources < <(viash config view --format json "${{ matrix.component.config }}" | jq -r -c '(.info.config | capture("^(?.*\/)").dir) as $dir | .functionality.test_resources | map(select(.type == "file")) | map($dir + .path) | unique | .[]') to_not_remove=() for resource in "${resources[@]}"; do if [[ $resource == *"resources_test"* ]]; then @@ -97,9 +178,7 @@ jobs: unset 'to_not_remove[${#to_not_remove[@]}-1]' to_not_remove+=( "(" "${to_not_remove[@]}" ")" "-prune" "-o") fi - echo "Not removing ${to_not_remove[@]}" find ./resources_test/ "${to_not_remove[@]}" -type f -exec rm {} + - tree ./resources_test/ - name: Run integration test timeout-minutes: 60 @@ -110,4 +189,4 @@ jobs: -main-script "${{ matrix.component.main_script }}" \ -entry "${{ matrix.component.entry }}" \ -profile docker,mount_temp,no_publish \ - -c src/workflows/utils/labels_ci.config + -c workflows/utils/labels_ci.config diff --git a/.github/workflows/main-build.yml b/.github/workflows/main-build.yml index d679dab8651..536e8743ed6 100644 --- a/.github/workflows/main-build.yml +++ b/.github/workflows/main-build.yml @@ -1,201 +1,70 @@ -name: Build +name: main build concurrency: - group: ${{ github.workflow }}-${{ github.event.inputs.deploy_branch && format('{0}_build', github.ref_name) || github.event.inputs.deploy_branch }} + group: main_build cancel-in-progress: true on: - workflow_dispatch: - inputs: - push_containers: - type: boolean - required: false - default: false - description: Build docker images and push them to the registry - version: - type: string - required: false - description: | - Version to tag the build components with (e.i functionality.version). - Defaults to name of the branch that triggered the workflow, suffixed by "_build". - target_tag: - type: string - required: false - default: main_build - description: | - Version tag of containers to use. Is `main_build` by default. - Can be used in combination with 'push_containers' to re-use existing docker images - or set the tag for new builds. - deploy_to_viash_hub: - type: boolean - required: false - default: false - description: Also build packages and docker images for viash-hub.com and push them. - - # when used as a subworkflow - workflow_call: - inputs: - push_containers: - type: boolean - required: false - default: false - description: push the containers to the registry - version: - type: string - required: false - description: | - Version to tag the build components with (e.i functionality.version). - Defaults to name of the branch that triggered the workflow, suffixed by "_build". - target_tag: - type: string - required: false - default: main_build - description: Version tag of existing containers to use. Is `main_build` by default. - deploy_branch: - type: string - required: false - description: | - Branch to deploy the build to. Defaults to name of the branch - that triggered the workflow, suffixed by "_build". - deploy_to_viash_hub: - type: boolean - required: false - default: false - description: Also build packages and docker images for viash-hub.com and push them. - outputs: - component_matrix: - description: "A JSON object that can be used to populate a github actions matrix for component jobs." - value: ${{ jobs.build_and_deploy_target_folder.outputs.component_matrix }} - workflow_matrix: - description: "A JSON object that can be used to populate a github actions matrix for workflow jobs." - value: ${{ jobs.build_and_deploy_target_folder.outputs.workflow_matrix}} - secrets: - VIASHHUB_USER: - required: true - VIASHHUB_PAT: - required: true - GTHB_USER: - required: true - GTHB_PAT: - required: true push: branches: [ 'main' ] - jobs: # phase 1 - build_and_deploy_target_folder: - name: "Build and push target folder" + list: runs-on: ubuntu-latest outputs: - component_matrix: ${{ steps.set_matrix.outputs.components }} - workflow_matrix: ${{ steps.set_matrix.outputs.workflows }} - - env: - DEPLOY_BRANCH: ${{ !inputs.deploy_branch && format('{0}_build', github.ref_name) || inputs.deploy_branch }} + component_matrix: ${{ steps.set_matrix.outputs.matrix }} + cache_key: ${{ steps.cache.outputs.cache_key }} steps: - - name: Keep symlinks as-is - run: | - git config --global core.symlinks true - - uses: actions/checkout@v4 - if: ${{ inputs.deploy_to_viash_hub == 'true' }} - with: - fetch-depth: 0 - - - name: Push ref to Viash-hub - if: ${{ inputs.deploy_to_viash_hub == 'true' }} - run: | - git remote add viash-hub https://x-access-token:${{ secrets.VIASHHUB_PAT }}@viash-hub.com/openpipelines-bio/openpipeline.git - git push -f -u viash-hub ${{ github.ref_name }} - - - name: Branch to checkout (use existing target branch if it exists) - id: get_checkout_branch - run: | - if ! git ls-remote --heads --exit-code https://github.com/openpipelines-bio/openpipeline.git "$DEPLOY_BRANCH" > /dev/null; then - echo "Remote branch does not exist, fetching current branch and building on top of it" - echo "checkout_branch=${{ github.ref_name }}" >> "$GITHUB_OUTPUT" - else - echo "Remote branch exists, checking out existing branch" - echo "checkout_branch=$DEPLOY_BRANCH" >> "$GITHUB_OUTPUT" - fi - - - uses: actions/checkout@v4 - with: - ref: ${{ steps.get_checkout_branch.outputs.checkout_branch }} - fetch-depth: 0 - - name: Fetch changes from ${{github.ref_name}} - run: | - git fetch origin ${{github.ref_name}} - git checkout -f --no-overlay origin/${{github.ref_name}} -- '.' - - - uses: viash-io/viash-actions/setup@v5 + - uses: viash-io/viash-actions/setup@v4 - name: Remove target folder from .gitignore run: | # allow publishing the target folder sed -i '/^\/target\/$/d' .gitignore - - uses: viash-io/viash-actions/ns-build@v5 + - uses: viash-io/viash-actions/ns-build@v4 with: - config_mod: | - .functionality.version := "${{ inputs.version || format('{0}_build', github.ref_name) }}" - .platforms[.type == 'docker'].target_tag := '${{ github.event_name == 'push' && 'main_build' || inputs.target_tag }}' + config_mod: .functionality.version := 'main_build' parallel: true - query: ^(?!workflows) - - - uses: viash-io/viash-actions/ns-build@v5 - with: - config_mod: .functionality.version := "${{ inputs.version || format('{0}_build', github.ref_name) }}" - parallel: true - query: ^workflows - + - name: Build nextflow schemas - uses: viash-io/viash-actions/pro/build-nextflow-schemas@v5 + uses: viash-io/viash-actions/pro/build-nextflow-schemas@v4 with: + workflows: workflows components: src - workflows: src viash_pro_token: ${{ secrets.GTHB_PAT }} tools_version: 'main_build' - name: Build parameter files - uses: viash-io/viash-actions/pro/build-nextflow-params@v5 + uses: viash-io/viash-actions/pro/build-nextflow-params@v4 with: - workflows: src + workflows: workflows components: src viash_pro_token: ${{ secrets.GTHB_PAT }} tools_version: 'main_build' - name: Deploy to target branch - uses: stefanzweifel/git-auto-commit-action@v5 + uses: peaceiris/actions-gh-pages@v3 with: - create_branch: true - commit_message: "deploy: ${{github.sha}}" - skip_dirty_check: true - branch: ${{ !inputs.deploy_branch && format('{0}_build', github.ref_name) || inputs.deploy_branch }} - - name: "List components" - id: ns_list - uses: viash-io/viash-actions/ns-list@v5 - with: - platform: docker - src: src - format: json - query_namespace: ^(?!workflows) + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_dir: . + publish_branch: main_build + exclude_assets: '' - - name: "List workflows" - id: ns_list_workflows - uses: viash-io/viash-actions/ns-list@v5 + - id: ns_list + uses: viash-io/viash-actions/ns-list@v4 with: + platform: docker src: src format: json - query_namespace: ^workflows - - name: "Parse JSON output from 'viash ns list' as input for matrix." - id: set_matrix + - id: set_matrix run: | - echo "components=$(jq -c '[ .[] | + echo "matrix=$(jq -c '[ .[] | { "name": (.functionality.namespace + "/" + .functionality.name), "config": .info.config, @@ -203,82 +72,16 @@ jobs: } ]' ${{ steps.ns_list.outputs.output_file }} )" >> $GITHUB_OUTPUT - echo "workflows=$(jq -c '[ .[] | . as $config | (.functionality.test_resources // [])[] | select(.type == "nextflow_script", .entrypoint) | - { - "name": ($config.functionality.namespace + "/" + $config.functionality.name), - "main_script": (($config.info.config | capture("^(?.*\/)").dir) + "/" + .path), - "entry": .entrypoint, - "config": $config.info.config - } - ] | unique' ${{ steps.ns_list_workflows.outputs.output_file }} )" >> $GITHUB_OUTPUT - - - uses: actions/checkout@v4 - if: ${{ github.event_name == 'push' || inputs.deploy_to_viash_hub }} - with: - ref: ${{ !inputs.deploy_branch && format('{0}_build', github.ref_name) || inputs.deploy_branch }} - fetch-depth: 0 - clean: true - - - name: Set origin to viash-hub and commit on top of it. - if: ${{ github.event_name == 'push' || inputs.deploy_to_viash_hub }} - # This is needed because git-auto-commit-action uses origin by default - run: | - git remote add viash-hub https://x-access-token:${{ secrets.VIASHHUB_PAT }}@viash-hub.com/openpipelines-bio/openpipeline.git - if git ls-remote --heads --exit-code https://viash-hub.com/openpipelines-bio/openpipeline.git ${{ github.ref_name }}_build > /dev/null; then - git fetch viash-hub ${{ !inputs.deploy_branch && format('{0}_build', github.ref_name) || inputs.deploy_branch }} - git reset --hard viash-hub/${{ !inputs.deploy_branch && format('{0}_build', github.ref_name) || inputs.deploy_branch }} - fi - git checkout -f --no-overlay origin/${{github.ref_name}} -- '.' - git remote set-url origin https://x-access-token:${{ secrets.VIASHHUB_PAT }}@viash-hub.com/openpipelines-bio/openpipeline.git - git remote rm viash-hub - - - name: Remove target folder from .gitignore - run: | - # allow publishing the target folder - sed -i '/^\/target\/$/d' .gitignore - - - uses: viash-io/viash-actions/ns-build@v5 - if: ${{ github.event_name == 'push' || inputs.deploy_to_viash_hub }} - with: - config_mod: | - .functionality.version := " ${{ !inputs.deploy_branch && format('{0}_build', github.ref_name) || inputs.deploy_branch }}" - .platforms[.type == 'docker'].target_tag := '${{ github.event_name == 'push' && 'main_build' || inputs.target_tag }}' - .platforms[.type == 'docker'].target_organization := 'openpipelines-bio/openpipeline' - .platforms[.type == 'docker'].target_registry := 'viash-hub.com:5050' - .platforms[.type == 'docker'].target_image_source := 'https://viash-hub.com/openpipelines-bio/openpipeline' - parallel: true - query: ^(?!workflows) - - - uses: viash-io/viash-actions/ns-build@v5 - if: ${{ github.event_name == 'push' || inputs.deploy_to_viash_hub }} - with: - config_mod: | - .functionality.version := "${{ inputs.version || format('{0}_build', github.ref_name) }}" - parallel: true - query: ^workflows - - - name: Deploy to target branch - if: ${{ github.event_name == 'push' || inputs.deploy_to_viash_hub }} - uses: stefanzweifel/git-auto-commit-action@v5 - with: - create_branch: true - commit_message: "deploy: ${{github.sha}}" - skip_dirty_check: true - branch: ${{ !inputs.deploy_branch && format('{0}_build', github.ref_name) || inputs.deploy_branch }} - skip_checkout: true - # phase 2 - build_and_deploy_docker_containers: - name: "Build and Deploy Docker Images" - needs: build_and_deploy_target_folder - if: ${{github.event_name == 'push' || inputs.push_containers }} + build: + needs: list runs-on: ubuntu-latest strategy: fail-fast: false matrix: - component: ${{ fromJson(needs.build_and_deploy_target_folder.outputs.component_matrix) }} + component: ${{ fromJson(needs.list.outputs.component_matrix) }} steps: # Remove unnecessary files to free up space. Otherwise, we get 'no space left on device.' @@ -286,14 +89,12 @@ jobs: - uses: actions/checkout@v4 - - uses: viash-io/viash-actions/setup@v5 + - uses: viash-io/viash-actions/setup@v4 - name: Build container - uses: viash-io/viash-actions/ns-build@v5 + uses: viash-io/viash-actions/ns-build@v4 with: - config_mod: | - .functionality.version := "${{ inputs.version || format('{0}_build', github.ref_name) }}" - .platforms[.type == 'docker'].target_tag := '${{ github.event_name == 'push' && 'main_build' || inputs.target_tag }}' + config_mod: .functionality.version := 'main_build' platform: docker src: ${{ matrix.component.dir }} setup: build @@ -306,42 +107,9 @@ jobs: password: ${{ secrets.GTHB_PAT }} - name: Push container - uses: viash-io/viash-actions/ns-build@v5 - with: - config_mod: .functionality.version := "${{ inputs.version || format('{0}_build', github.ref_name) }}" - platform: docker - src: ${{ matrix.component.dir }} - setup: push - - - name: Login to Viash-Hub container registry - if: ${{ github.event_name == 'push' || inputs.deploy_to_viash_hub }} - uses: docker/login-action@v3 - with: - registry: viash-hub.com:5050 - username: ${{ secrets.VIASHHUB_USER }} - password: ${{ secrets.VIASHHUB_PAT }} - - - name: Update Docker settings - if: ${{ github.event_name == 'push' || inputs.deploy_to_viash_hub }} - run: | - sudo sed -i 's/ }/, \"max-concurrent-uploads\": 2 }/' /etc/docker/daemon.json - sudo systemctl restart docker - - - name: "Re-tag containers for viash-hub" - if: ${{ github.event_name == 'push' || inputs.deploy_to_viash_hub }} - run: | - viash ns exec -s ${{ matrix.component.dir }} --apply_platform -p docker \ - 'docker tag ghcr.io/openpipelines-bio/{namespace}_{functionality-name}:${{ github.event_name == 'push' && 'main_build' || inputs.target_tag }} viash-hub.com:5050/openpipelines-bio/openpipeline/{namespace}_{functionality-name}:${{ github.ref_name }}_build' - - - name: Push container to Viash-Hub - if: ${{ github.event_name == 'push' || inputs.deploy_to_viash_hub }} - uses: viash-io/viash-actions/ns-build@v5 + uses: viash-io/viash-actions/ns-build@v4 with: - config_mod: | - .functionality.version := "${{ inputs.version || format('{0}_build', github.ref_name) }}" - .platforms[.type == 'docker'].target_registry := 'viash-hub.com:5050' - .platforms[.type == 'docker'].target_organization := 'openpipelines-bio/openpipeline' - .platforms[.type == 'docker'].target_image_source := 'https://viash-hub.com/openpipelines-bio/openpipeline' + config_mod: .functionality.version := 'main_build' platform: docker src: ${{ matrix.component.dir }} - setup: push + setup: push \ No newline at end of file diff --git a/.github/workflows/release-build-viash-hub.yml b/.github/workflows/release-build-viash-hub.yml index 76025f11315..382909746c9 100644 --- a/.github/workflows/release-build-viash-hub.yml +++ b/.github/workflows/release-build-viash-hub.yml @@ -27,9 +27,9 @@ jobs: # git remote add viash-hub https://x-access-token:${{ secrets.VIASHHUB_PAT }}@viash-hub.com/openpipelines-bio/openpipeline.git # git push -f viash-hub main - - uses: viash-io/viash-actions/setup@v5 + - uses: viash-io/viash-actions/setup@v4 - - uses: viash-io/viash-actions/project/sync-and-cache-s3@v5 + - uses: viash-io/viash-actions/project/sync-and-cache-s3@v4 id: cache with: s3_bucket: $s3_bucket @@ -41,7 +41,7 @@ jobs: # allow publishing the target folder sed -i '/^\/target\/$/d' .gitignore - - uses: viash-io/viash-actions/ns-build@v5 + - uses: viash-io/viash-actions/ns-build@v4 with: config_mod: | .functionality.version := '${{ github.event.inputs.version_tag }}' @@ -49,27 +49,25 @@ jobs: .platforms[.type == 'docker'].target_organization := 'openpipelines-bio/openpipeline' .platforms[.type == 'docker'].target_image_source := 'https://viash-hub.com/openpipelines-bio/openpipeline' parallel: true - query_namespace: ^(?!workflows) - - name: Build nextflow schemas - uses: viash-io/viash-actions/pro/build-nextflow-schemas@v5 + uses: viash-io/viash-actions/pro/build-nextflow-schemas@v4 with: - workflows: src + workflows: workflows components: src viash_pro_token: ${{ secrets.GTHB_PAT }} tools_version: 'main_build' - name: Build parameter files - uses: viash-io/viash-actions/pro/build-nextflow-params@v5 + uses: viash-io/viash-actions/pro/build-nextflow-params@v4 with: - workflows: src + workflows: workflows components: src viash_pro_token: ${{ secrets.GTHB_PAT }} tools_version: 'main_build' - name: Deploy build artifacts to Viash-Hub - uses: viash-io/viash-actions/viash-hub/deploy@v5 + uses: viash-io/viash-actions/viash-hub/deploy@v4 with: github_token: ${{ github.token }} viash_hub_token: ${{ secrets.VIASHHUB_PAT }} @@ -80,17 +78,17 @@ jobs: commit_message: "Deploy for release ${{ github.event.inputs.version_tag }} from ${{ github.sha }}" - id: ns_list_components - uses: viash-io/viash-actions/ns-list@v5 + uses: viash-io/viash-actions/ns-list@v4 with: platform: docker + src: src format: json - query_namespace: ^(?!workflows) - id: ns_list_workflows - uses: viash-io/viash-actions/ns-list@v5 + uses: viash-io/viash-actions/ns-list@v4 with: + src: workflows format: json - query_namespace: ^workflows - id: set_matrix run: | @@ -128,10 +126,10 @@ jobs: - uses: actions/checkout@v4 - - uses: viash-io/viash-actions/setup@v5 + - uses: viash-io/viash-actions/setup@v4 - name: Build container - uses: viash-io/viash-actions/ns-build@v5 + uses: viash-io/viash-actions/ns-build@v4 with: config_mod: | .functionality.version := '${{ github.event.inputs.version_tag }}' @@ -151,7 +149,7 @@ jobs: password: ${{ secrets.VIASHHUB_PAT }} - name: Push container - uses: viash-io/viash-actions/ns-build@v5 + uses: viash-io/viash-actions/ns-build@v4 with: config_mod: | .functionality.version := '${{ github.event.inputs.version_tag }}' @@ -181,14 +179,14 @@ jobs: - uses: actions/checkout@v4 - - uses: viash-io/viash-actions/setup@v5 + - uses: viash-io/viash-actions/setup@v4 - - uses: nf-core/setup-nextflow@v1.5.0 + - uses: nf-core/setup-nextflow@v1.3.0 # build target dir # use containers from release branch, hopefully these are available - name: Build target dir - uses: viash-io/viash-actions/ns-build@v5 + uses: viash-io/viash-actions/ns-build@v4 with: config_mod: | .functionality.version := '${{ github.event.inputs.version_tag }}' @@ -200,7 +198,7 @@ jobs: # use cache - name: Cache resources data - uses: actions/cache@v4 + uses: actions/cache@v3 timeout-minutes: 5 with: path: resources_test @@ -236,7 +234,7 @@ jobs: -main-script "${{ matrix.component.main_script }}" \ -entry ${{ matrix.component.entry }} \ -profile docker,mount_temp,no_publish \ - -c src/workflows/utils/labels_ci.config + -c workflows/utils/labels_ci.config ###################################3 # phase 4 @@ -256,12 +254,12 @@ jobs: - uses: actions/checkout@v4 - - uses: viash-io/viash-actions/setup@v5 + - uses: viash-io/viash-actions/setup@v4 # use cache - name: Cache resources data id: restore_cache - uses: actions/cache/restore@v4 + uses: actions/cache/restore@v3 env: SEGMENT_DOWNLOAD_TIMEOUT_MINS: 5 with: diff --git a/.github/workflows/release-build.yml b/.github/workflows/release-build.yml index 555cfac9795..a92d51c990f 100644 --- a/.github/workflows/release-build.yml +++ b/.github/workflows/release-build.yml @@ -22,9 +22,9 @@ jobs: steps: - uses: actions/checkout@v4 - - uses: viash-io/viash-actions/setup@v5 + - uses: viash-io/viash-actions/setup@v4 - - uses: viash-io/viash-actions/project/sync-and-cache-s3@v5 + - uses: viash-io/viash-actions/project/sync-and-cache-s3@v4 id: cache with: s3_bucket: $s3_bucket @@ -36,24 +36,23 @@ jobs: # allow publishing the target folder sed -i '/^\/target\/$/d' .gitignore - - uses: viash-io/viash-actions/ns-build@v5 + - uses: viash-io/viash-actions/ns-build@v4 with: config_mod: ".functionality.version := '${{ github.event.inputs.version_tag }}'" parallel: true - query_namespace: ^(?!workflows) - name: Build nextflow schemas - uses: viash-io/viash-actions/pro/build-nextflow-schemas@v5 + uses: viash-io/viash-actions/pro/build-nextflow-schemas@v4 with: - workflows: src + workflows: workflows components: src viash_pro_token: ${{ secrets.GTHB_PAT }} tools_version: 'main_build' - name: Build parameter files - uses: viash-io/viash-actions/pro/build-nextflow-params@v5 + uses: viash-io/viash-actions/pro/build-nextflow-params@v4 with: - workflows: src + workflows: workflows components: src viash_pro_token: ${{ secrets.GTHB_PAT }} tools_version: 'main_build' @@ -69,17 +68,17 @@ jobs: - id: ns_list_components - uses: viash-io/viash-actions/ns-list@v5 + uses: viash-io/viash-actions/ns-list@v4 with: platform: docker + src: src format: json - query_namespace: ^(?!workflows) - id: ns_list_workflows - uses: viash-io/viash-actions/ns-list@v5 + uses: viash-io/viash-actions/ns-list@v4 with: + src: workflows format: json - query_namespace: ^workflows - id: set_matrix run: | @@ -117,10 +116,10 @@ jobs: - uses: actions/checkout@v4 - - uses: viash-io/viash-actions/setup@v5 + - uses: viash-io/viash-actions/setup@v4 - name: Build container - uses: viash-io/viash-actions/ns-build@v5 + uses: viash-io/viash-actions/ns-build@v4 with: config_mod: .functionality.version := '${{ github.event.inputs.version_tag }}' platform: docker @@ -136,7 +135,7 @@ jobs: password: ${{ secrets.GTHB_PAT }} - name: Push container - uses: viash-io/viash-actions/ns-build@v5 + uses: viash-io/viash-actions/ns-build@v4 with: config_mod: .functionality.version := '${{ github.event.inputs.version_tag }}' platform: docker @@ -162,14 +161,14 @@ jobs: - uses: actions/checkout@v4 - - uses: viash-io/viash-actions/setup@v5 + - uses: viash-io/viash-actions/setup@v4 - - uses: nf-core/setup-nextflow@v1.5.0 + - uses: nf-core/setup-nextflow@v1.3.0 # build target dir # use containers from release branch, hopefully these are available - name: Build target dir - uses: viash-io/viash-actions/ns-build@v5 + uses: viash-io/viash-actions/ns-build@v4 with: config_mod: ".functionality.version := '${{ github.event.inputs.version_tag }}'" parallel: true @@ -177,7 +176,7 @@ jobs: # use cache - name: Cache resources data - uses: actions/cache@v4 + uses: actions/cache@v3 timeout-minutes: 5 with: path: resources_test @@ -213,7 +212,7 @@ jobs: -main-script "${{ matrix.component.main_script }}" \ -entry ${{ matrix.component.entry }} \ -profile docker,mount_temp,no_publish \ - -c src/workflows/utils/labels_ci.config + -c workflows/utils/labels_ci.config ###################################3 # phase 4 @@ -233,12 +232,12 @@ jobs: - uses: actions/checkout@v4 - - uses: viash-io/viash-actions/setup@v5 + - uses: viash-io/viash-actions/setup@v4 # use cache - name: Cache resources data id: restore_cache - uses: actions/cache/restore@v4 + uses: actions/cache/restore@v3 env: SEGMENT_DOWNLOAD_TIMEOUT_MINS: 5 with: diff --git a/.github/workflows/viash-test.yml b/.github/workflows/viash-test.yml index 6aba6802743..369a51db013 100644 --- a/.github/workflows/viash-test.yml +++ b/.github/workflows/viash-test.yml @@ -41,13 +41,13 @@ jobs: with: fetch-depth: 0 - - uses: viash-io/viash-actions/setup@v5 + - uses: viash-io/viash-actions/setup@v4 - name: Check if all config can be parsed if there is no unicode support run: | LANG=C viash ns list > /dev/null - - uses: viash-io/viash-actions/project/sync-and-cache-s3@v5 + - uses: viash-io/viash-actions/project/sync-and-cache-s3@v4 id: cache with: s3_bucket: $s3_bucket @@ -56,20 +56,19 @@ jobs: - name: Get changed files id: changed-files - uses: tj-actions/changed-files@v42 + uses: tj-actions/changed-files@v39 with: separator: ";" diff_relative: true - id: ns_list - uses: viash-io/viash-actions/ns-list@v5 + uses: viash-io/viash-actions/ns-list@v4 with: platform: docker format: json - query_namespace: ^(?!workflows) - id: ns_list_filtered - uses: viash-io/viash-actions/project/detect-changed-components@v5 + uses: viash-io/viash-actions/project/detect-changed-components@v4 with: input_file: "${{ steps.ns_list.outputs.output_file }}" @@ -100,12 +99,12 @@ jobs: - uses: actions/checkout@v4 - - uses: viash-io/viash-actions/setup@v5 + - uses: viash-io/viash-actions/setup@v4 # use cache - name: Cache resources data id: restore_cache - uses: actions/cache/restore@v4 + uses: actions/cache/restore@v3 env: SEGMENT_DOWNLOAD_TIMEOUT_MINS: 5 with: